diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 21:12:47 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2015-11-04 21:12:47 -0800 |
commit | ac322de6bf5416cb145b58599297b8be73cd86ac (patch) | |
tree | 1a1be9f8b9241159fb4cde14a548eba9a4155b28 /drivers/md/raid5.c | |
parent | ccf21b69a83afaee4d5499e0d03eacf23946e08c (diff) | |
parent | 339421def582abb14c2217aa8c8f28bb2e299174 (diff) |
Merge tag 'md/4.4' of git://neil.brown.name/md
Pull md updates from Neil Brown:
"Two major components to this update.
1) The clustered-raid1 support from SUSE is nearly complete. There
are a few outstanding issues being worked on. Maybe half a dozen
patches will bring this to a usable state.
2) The first stage of journalled-raid5 support from Facebook makes an
appearance. With a journal device configured (typically NVRAM or
SSD), the "RAID5 write hole" should be closed - a crash during
degraded operations cannot result in data corruption.
The next stage will be to use the journal as a write-behind cache
so that latency can be reduced and in some cases throughput
increased by performing more full-stripe writes.
* tag 'md/4.4' of git://neil.brown.name/md: (66 commits)
MD: when RAID journal is missing/faulty, block RESTART_ARRAY_RW
MD: set journal disk ->raid_disk
MD: kick out journal disk if it's not fresh
raid5-cache: start raid5 readonly if journal is missing
MD: add new bit to indicate raid array with journal
raid5-cache: IO error handling
raid5: journal disk can't be removed
raid5-cache: add trim support for log
MD: fix info output for journal disk
raid5-cache: use bio chaining
raid5-cache: small log->seq cleanup
raid5-cache: new helper: r5_reserve_log_entry
raid5-cache: inline r5l_alloc_io_unit into r5l_new_meta
raid5-cache: take rdev->data_offset into account early on
raid5-cache: refactor bio allocation
raid5-cache: clean up r5l_get_meta
raid5-cache: simplify state machine when caches flushes are not needed
raid5-cache: factor out a helper to run all stripes for an I/O unit
raid5-cache: rename flushed_ios to finished_ios
raid5-cache: free I/O units earlier
...
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 189 |
1 files changed, 132 insertions, 57 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 45933c160697..704ef7fcfbf8 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -353,7 +353,7 @@ static void release_inactive_stripe_list(struct r5conf *conf, struct list_head *list = &temp_inactive_list[size - 1]; /* - * We don't hold any lock here yet, get_active_stripe() might + * We don't hold any lock here yet, raid5_get_active_stripe() might * remove stripes from the list */ if (!list_empty_careful(list)) { @@ -413,7 +413,7 @@ static int release_stripe_list(struct r5conf *conf, return count; } -static void release_stripe(struct stripe_head *sh) +void raid5_release_stripe(struct stripe_head *sh) { struct r5conf *conf = sh->raid_conf; unsigned long flags; @@ -658,9 +658,9 @@ static int has_failed(struct r5conf *conf) return 0; } -static struct stripe_head * -get_active_stripe(struct r5conf *conf, sector_t sector, - int previous, int noblock, int noquiesce) +struct stripe_head * +raid5_get_active_stripe(struct r5conf *conf, sector_t sector, + int previous, int noblock, int noquiesce) { struct stripe_head *sh; int hash = stripe_hash_locks_hash(sector); @@ -755,6 +755,10 @@ static void unlock_two_stripes(struct stripe_head *sh1, struct stripe_head *sh2) /* Only freshly new full stripe normal write stripe can be added to a batch list */ static bool stripe_can_batch(struct stripe_head *sh) { + struct r5conf *conf = sh->raid_conf; + + if (conf->log) + return false; return test_bit(STRIPE_BATCH_READY, &sh->state) && !test_bit(STRIPE_BITMAP_PENDING, &sh->state) && is_full_stripe_write(sh); @@ -858,7 +862,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh unlock_out: unlock_two_stripes(head, sh); out: - release_stripe(head); + raid5_release_stripe(head); } /* Determine if 'data_offset' or 'new_data_offset' should be used @@ -895,6 +899,8 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) might_sleep(); + if (r5l_write_stripe(conf->log, sh) == 0) + return; for (i = disks; i--; ) { int rw; int replace_only = 0; @@ -1208,7 +1214,7 @@ static void ops_complete_biofill(void *stripe_head_ref) return_io(&return_bi); set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); + raid5_release_stripe(sh); } static void ops_run_biofill(struct stripe_head *sh) @@ -1271,7 +1277,7 @@ static void ops_complete_compute(void *stripe_head_ref) if (sh->check_state == check_state_compute_run) sh->check_state = check_state_compute_result; set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); + raid5_release_stripe(sh); } /* return a pointer to the address conversion region of the scribble buffer */ @@ -1697,7 +1703,7 @@ static void ops_complete_reconstruct(void *stripe_head_ref) } set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); + raid5_release_stripe(sh); } static void @@ -1855,7 +1861,7 @@ static void ops_complete_check(void *stripe_head_ref) sh->check_state = check_state_check_result; set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); + raid5_release_stripe(sh); } static void ops_run_check_p(struct stripe_head *sh, struct raid5_percpu *percpu) @@ -2017,7 +2023,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) /* we just created an active stripe so... */ atomic_inc(&conf->active_stripes); - release_stripe(sh); + raid5_release_stripe(sh); conf->max_nr_stripes++; return 1; } @@ -2236,7 +2242,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) if (!p) err = -ENOMEM; } - release_stripe(nsh); + raid5_release_stripe(nsh); } /* critical section pass, GFP_NOIO no longer needed */ @@ -2394,7 +2400,7 @@ static void raid5_end_read_request(struct bio * bi) rdev_dec_pending(rdev, conf->mddev); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); + raid5_release_stripe(sh); } static void raid5_end_write_request(struct bio *bi) @@ -2468,14 +2474,12 @@ static void raid5_end_write_request(struct bio *bi) if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); + raid5_release_stripe(sh); if (sh->batch_head && sh != sh->batch_head) - release_stripe(sh->batch_head); + raid5_release_stripe(sh->batch_head); } -static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous); - static void raid5_build_block(struct stripe_head *sh, int i, int previous) { struct r5dev *dev = &sh->dev[i]; @@ -2491,7 +2495,7 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous) dev->rreq.bi_private = sh; dev->flags = 0; - dev->sector = compute_blocknr(sh, i, previous); + dev->sector = raid5_compute_blocknr(sh, i, previous); } static void error(struct mddev *mddev, struct md_rdev *rdev) @@ -2524,9 +2528,9 @@ static void error(struct mddev *mddev, struct md_rdev *rdev) * Input: a 'big' sector number, * Output: index of the data and parity disk, and the sector # in them. */ -static sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, - int previous, int *dd_idx, - struct stripe_head *sh) +sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, + int previous, int *dd_idx, + struct stripe_head *sh) { sector_t stripe, stripe2; sector_t chunk_number; @@ -2726,7 +2730,7 @@ static sector_t raid5_compute_sector(struct r5conf *conf, sector_t r_sector, return new_sector; } -static sector_t compute_blocknr(struct stripe_head *sh, int i, int previous) +sector_t raid5_compute_blocknr(struct stripe_head *sh, int i, int previous) { struct r5conf *conf = sh->raid_conf; int raid_disks = sh->disks; @@ -3098,6 +3102,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, if (bi) bitmap_end = 1; + r5l_stripe_write_finished(sh); + if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) wake_up(&conf->wait_for_overlap); @@ -3141,6 +3147,7 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh, * the data has not reached the cache yet. */ if (!test_bit(R5_Wantfill, &sh->dev[i].flags) && + s->failed > conf->max_degraded && (!test_bit(R5_Insync, &sh->dev[i].flags) || test_bit(R5_ReadError, &sh->dev[i].flags))) { spin_lock_irq(&sh->stripe_lock); @@ -3497,6 +3504,9 @@ returnbi: WARN_ON(test_bit(R5_SkipCopy, &dev->flags)); WARN_ON(dev->page != dev->orig_page); } + + r5l_stripe_write_finished(sh); + if (!discard_pending && test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) { int hash; @@ -3939,10 +3949,10 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) struct stripe_head *sh2; struct async_submit_ctl submit; - sector_t bn = compute_blocknr(sh, i, 1); + sector_t bn = raid5_compute_blocknr(sh, i, 1); sector_t s = raid5_compute_sector(conf, bn, 0, &dd_idx, NULL); - sh2 = get_active_stripe(conf, s, 0, 1, 1); + sh2 = raid5_get_active_stripe(conf, s, 0, 1, 1); if (sh2 == NULL) /* so far only the early blocks of this stripe * have been requested. When later blocks @@ -3952,7 +3962,7 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) if (!test_bit(STRIPE_EXPANDING, &sh2->state) || test_bit(R5_Expanded, &sh2->dev[dd_idx].flags)) { /* must have already done this block */ - release_stripe(sh2); + raid5_release_stripe(sh2); continue; } @@ -3973,7 +3983,7 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh) set_bit(STRIPE_EXPAND_READY, &sh2->state); set_bit(STRIPE_HANDLE, &sh2->state); } - release_stripe(sh2); + raid5_release_stripe(sh2); } /* done submitting copies, wait for them to complete */ @@ -4008,6 +4018,7 @@ static void analyse_stripe(struct stripe_head *sh, struct stripe_head_state *s) s->expanded = test_bit(STRIPE_EXPAND_READY, &sh->state) && !sh->batch_head; s->failed_num[0] = -1; s->failed_num[1] = -1; + s->log_failed = r5l_log_disk_error(conf); /* Now to look around and see what can be done */ rcu_read_lock(); @@ -4259,7 +4270,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh, if (handle_flags == 0 || sh->state & handle_flags) set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); + raid5_release_stripe(sh); } spin_lock_irq(&head_sh->stripe_lock); head_sh->batch_head = NULL; @@ -4320,6 +4331,9 @@ static void handle_stripe(struct stripe_head *sh) analyse_stripe(sh, &s); + if (test_bit(STRIPE_LOG_TRAPPED, &sh->state)) + goto finish; + if (s.handle_bad_blocks) { set_bit(STRIPE_HANDLE, &sh->state); goto finish; @@ -4348,7 +4362,7 @@ static void handle_stripe(struct stripe_head *sh) /* check if the array has lost more than max_degraded devices and, * if so, some requests might need to be failed. */ - if (s.failed > conf->max_degraded) { + if (s.failed > conf->max_degraded || s.log_failed) { sh->check_state = 0; sh->reconstruct_state = 0; break_stripe_batch_list(sh, 0); @@ -4506,7 +4520,7 @@ static void handle_stripe(struct stripe_head *sh) /* Finish reconstruct operations initiated by the expansion process */ if (sh->reconstruct_state == reconstruct_state_result) { struct stripe_head *sh_src - = get_active_stripe(conf, sh->sector, 1, 1, 1); + = raid5_get_active_stripe(conf, sh->sector, 1, 1, 1); if (sh_src && test_bit(STRIPE_EXPAND_SOURCE, &sh_src->state)) { /* sh cannot be written until sh_src has been read. * so arrange for sh to be delayed a little @@ -4516,11 +4530,11 @@ static void handle_stripe(struct stripe_head *sh) if (!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh_src->state)) atomic_inc(&conf->preread_active_stripes); - release_stripe(sh_src); + raid5_release_stripe(sh_src); goto finish; } if (sh_src) - release_stripe(sh_src); + raid5_release_stripe(sh_src); sh->reconstruct_state = reconstruct_state_idle; clear_bit(STRIPE_EXPANDING, &sh->state); @@ -5012,7 +5026,7 @@ static void release_stripe_plug(struct mddev *mddev, struct raid5_plug_cb *cb; if (!blk_cb) { - release_stripe(sh); + raid5_release_stripe(sh); return; } @@ -5028,7 +5042,7 @@ static void release_stripe_plug(struct mddev *mddev, if (!test_and_set_bit(STRIPE_ON_UNPLUG_LIST, &sh->state)) list_add_tail(&sh->lru, &cb->list); else - release_stripe(sh); + raid5_release_stripe(sh); } static void make_discard_request(struct mddev *mddev, struct bio *bi) @@ -5063,12 +5077,12 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) DEFINE_WAIT(w); int d; again: - sh = get_active_stripe(conf, logical_sector, 0, 0, 0); + sh = raid5_get_active_stripe(conf, logical_sector, 0, 0, 0); prepare_to_wait(&conf->wait_for_overlap, &w, TASK_UNINTERRUPTIBLE); set_bit(R5_Overlap, &sh->dev[sh->pd_idx].flags); if (test_bit(STRIPE_SYNCING, &sh->state)) { - release_stripe(sh); + raid5_release_stripe(sh); schedule(); goto again; } @@ -5080,7 +5094,7 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi) if (sh->dev[d].towrite || sh->dev[d].toread) { set_bit(R5_Overlap, &sh->dev[d].flags); spin_unlock_irq(&sh->stripe_lock); - release_stripe(sh); + raid5_release_stripe(sh); schedule(); goto again; } @@ -5136,8 +5150,15 @@ static void make_request(struct mddev *mddev, struct bio * bi) bool do_prepare; if (unlikely(bi->bi_rw & REQ_FLUSH)) { - md_flush_request(mddev, bi); - return; + int ret = r5l_handle_flush_request(conf->log, bi); + + if (ret == 0) + return; + if (ret == -ENODEV) { + md_flush_request(mddev, bi); + return; + } + /* ret == -EAGAIN, fallback */ } md_write_start(mddev, bi); @@ -5210,7 +5231,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) (unsigned long long)new_sector, (unsigned long long)logical_sector); - sh = get_active_stripe(conf, new_sector, previous, + sh = raid5_get_active_stripe(conf, new_sector, previous, (bi->bi_rw&RWA_MASK), 0); if (sh) { if (unlikely(previous)) { @@ -5231,7 +5252,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) must_retry = 1; spin_unlock_irq(&conf->device_lock); if (must_retry) { - release_stripe(sh); + raid5_release_stripe(sh); schedule(); do_prepare = true; goto retry; @@ -5241,14 +5262,14 @@ static void make_request(struct mddev *mddev, struct bio * bi) /* Might have got the wrong stripe_head * by accident */ - release_stripe(sh); + raid5_release_stripe(sh); goto retry; } if (rw == WRITE && logical_sector >= mddev->suspend_lo && logical_sector < mddev->suspend_hi) { - release_stripe(sh); + raid5_release_stripe(sh); /* As the suspend_* range is controlled by * userspace, we want an interruptible * wait. @@ -5271,7 +5292,7 @@ static void make_request(struct mddev *mddev, struct bio * bi) * and wait a while */ md_wakeup_thread(mddev->thread); - release_stripe(sh); + raid5_release_stripe(sh); schedule(); do_prepare = true; goto retry; @@ -5458,7 +5479,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk for (i = 0; i < reshape_sectors; i += STRIPE_SECTORS) { int j; int skipped_disk = 0; - sh = get_active_stripe(conf, stripe_addr+i, 0, 0, 1); + sh = raid5_get_active_stripe(conf, stripe_addr+i, 0, 0, 1); set_bit(STRIPE_EXPANDING, &sh->state); atomic_inc(&conf->reshape_stripes); /* If any of this stripe is beyond the end of the old @@ -5471,7 +5492,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk if (conf->level == 6 && j == sh->qd_idx) continue; - s = compute_blocknr(sh, j, 0); + s = raid5_compute_blocknr(sh, j, 0); if (s < raid5_size(mddev, 0, 0)) { skipped_disk = 1; continue; @@ -5507,10 +5528,10 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk if (last_sector >= mddev->dev_sectors) last_sector = mddev->dev_sectors - 1; while (first_sector <= last_sector) { - sh = get_active_stripe(conf, first_sector, 1, 0, 1); + sh = raid5_get_active_stripe(conf, first_sector, 1, 0, 1); set_bit(STRIPE_EXPAND_SOURCE, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); + raid5_release_stripe(sh); first_sector += STRIPE_SECTORS; } /* Now that the sources are clearly marked, we can release @@ -5519,7 +5540,7 @@ static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr, int *sk while (!list_empty(&stripes)) { sh = list_entry(stripes.next, struct stripe_head, lru); list_del_init(&sh->lru); - release_stripe(sh); + raid5_release_stripe(sh); } /* If this takes us to the resync_max point where we have to pause, * then we need to write out the superblock. @@ -5615,11 +5636,11 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int return sync_blocks * STRIPE_SECTORS; /* keep things rounded to whole stripes */ } - bitmap_cond_end_sync(mddev->bitmap, sector_nr); + bitmap_cond_end_sync(mddev->bitmap, sector_nr, false); - sh = get_active_stripe(conf, sector_nr, 0, 1, 0); + sh = raid5_get_active_stripe(conf, sector_nr, 0, 1, 0); if (sh == NULL) { - sh = get_active_stripe(conf, sector_nr, 0, 0, 0); + sh = raid5_get_active_stripe(conf, sector_nr, 0, 0, 0); /* make sure we don't swamp the stripe cache if someone else * is trying to get access */ @@ -5643,7 +5664,7 @@ static inline sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int set_bit(STRIPE_SYNC_REQUESTED, &sh->state); set_bit(STRIPE_HANDLE, &sh->state); - release_stripe(sh); + raid5_release_stripe(sh); return STRIPE_SECTORS; } @@ -5682,7 +5703,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) /* already done this stripe */ continue; - sh = get_active_stripe(conf, sector, 0, 1, 1); + sh = raid5_get_active_stripe(conf, sector, 0, 1, 1); if (!sh) { /* failed to get a stripe - must wait */ @@ -5692,7 +5713,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) } if (!add_stripe_bio(sh, raid_bio, dd_idx, 0, 0)) { - release_stripe(sh); + raid5_release_stripe(sh); raid5_set_bi_processed_stripes(raid_bio, scnt); conf->retry_read_aligned = raid_bio; return handled; @@ -5700,7 +5721,7 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio) set_bit(R5_ReadNoMerge, &sh->dev[dd_idx].flags); handle_stripe(sh); - release_stripe(sh); + raid5_release_stripe(sh); handled++; } remaining = raid5_dec_bi_active_stripes(raid_bio); @@ -5730,8 +5751,12 @@ static int handle_active_stripes(struct r5conf *conf, int group, for (i = 0; i < NR_STRIPE_HASH_LOCKS; i++) if (!list_empty(temp_inactive_list + i)) break; - if (i == NR_STRIPE_HASH_LOCKS) + if (i == NR_STRIPE_HASH_LOCKS) { + spin_unlock_irq(&conf->device_lock); + r5l_flush_stripe_to_raid(conf->log); + spin_lock_irq(&conf->device_lock); return batch_size; + } release_inactive = true; } spin_unlock_irq(&conf->device_lock); @@ -5739,6 +5764,7 @@ static int handle_active_stripes(struct r5conf *conf, int group, release_inactive_stripe_list(conf, temp_inactive_list, NR_STRIPE_HASH_LOCKS); + r5l_flush_stripe_to_raid(conf->log); if (release_inactive) { spin_lock_irq(&conf->device_lock); return 0; @@ -5746,6 +5772,7 @@ static int handle_active_stripes(struct r5conf *conf, int group, for (i = 0; i < batch_size; i++) handle_stripe(batch[i]); + r5l_write_stripe_run(conf->log); cond_resched(); @@ -5879,6 +5906,8 @@ static void raid5d(struct md_thread *thread) mutex_unlock(&conf->cache_size_mutex); } + r5l_flush_stripe_to_raid(conf->log); + async_tx_issue_pending_all(); blk_finish_plug(&plug); @@ -6316,8 +6345,11 @@ static void raid5_free_percpu(struct r5conf *conf) static void free_conf(struct r5conf *conf) { + if (conf->log) + r5l_exit_log(conf->log); if (conf->shrinker.seeks) unregister_shrinker(&conf->shrinker); + free_thread_groups(conf); shrink_stripes(conf); raid5_free_percpu(conf); @@ -6530,7 +6562,7 @@ static struct r5conf *setup_conf(struct mddev *mddev) rdev_for_each(rdev, mddev) { raid_disk = rdev->raid_disk; if (raid_disk >= max_disks - || raid_disk < 0) + || raid_disk < 0 || test_bit(Journal, &rdev->flags)) continue; disk = conf->disks + raid_disk; @@ -6650,6 +6682,7 @@ static int run(struct mddev *mddev) int working_disks = 0; int dirty_parity_disks = 0; struct md_rdev *rdev; + struct md_rdev *journal_dev = NULL; sector_t reshape_offset = 0; int i; long long min_offset_diff = 0; @@ -6662,6 +6695,11 @@ static int run(struct mddev *mddev) rdev_for_each(rdev, mddev) { long long diff; + + if (test_bit(Journal, &rdev->flags)) { + journal_dev = rdev; + continue; + } if (rdev->raid_disk < 0) continue; diff = (rdev->new_data_offset - rdev->data_offset); @@ -6695,6 +6733,12 @@ static int run(struct mddev *mddev) int chunk_sectors; int new_data_disks; + if (journal_dev) { + printk(KERN_ERR "md/raid:%s: don't support reshape with journal - aborting.\n", + mdname(mddev)); + return -EINVAL; + } + if (mddev->new_level != mddev->level) { printk(KERN_ERR "md/raid:%s: unsupported reshape " "required - aborting.\n", @@ -6770,6 +6814,13 @@ static int run(struct mddev *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); + if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) { + printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n", + mdname(mddev)); + mddev->ro = 1; + set_disk_ro(mddev->gendisk, 1); + } + conf->min_offset_diff = min_offset_diff; mddev->thread = conf->thread; conf->thread = NULL; @@ -6973,6 +7024,14 @@ static int run(struct mddev *mddev) mddev->queue); } + if (journal_dev) { + char b[BDEVNAME_SIZE]; + + printk(KERN_INFO"md/raid:%s: using device %s as journal\n", + mdname(mddev), bdevname(journal_dev->bdev, b)); + r5l_init_log(conf, journal_dev); + } + return 0; abort: md_unregister_thread(&mddev->thread); @@ -7082,6 +7141,15 @@ static int raid5_remove_disk(struct mddev *mddev, struct md_rdev *rdev) struct disk_info *p = conf->disks + number; print_raid5_conf(conf); + if (test_bit(Journal, &rdev->flags)) { + /* + * journal disk is not removable, but we need give a chance to + * update superblock of other disks. Otherwise journal disk + * will be considered as 'fresh' + */ + set_bit(MD_CHANGE_DEVS, &mddev->flags); + return -EINVAL; + } if (rdev == p->rdev) rdevp = &p->rdev; else if (rdev == p->replacement) @@ -7144,6 +7212,8 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev) int first = 0; int last = conf->raid_disks - 1; + if (test_bit(Journal, &rdev->flags)) + return -EINVAL; if (mddev->recovery_disabled == conf->recovery_disabled) return -EBUSY; @@ -7205,6 +7275,8 @@ static int raid5_resize(struct mddev *mddev, sector_t sectors) sector_t newsize; struct r5conf *conf = mddev->private; + if (conf->log) + return -EINVAL; sectors &= ~((sector_t)conf->chunk_sectors - 1); newsize = raid5_size(mddev, sectors, mddev->raid_disks); if (mddev->external_size && @@ -7256,6 +7328,8 @@ static int check_reshape(struct mddev *mddev) { struct r5conf *conf = mddev->private; + if (conf->log) + return -EINVAL; if (mddev->delta_disks == 0 && mddev->new_layout == mddev->layout && mddev->new_chunk_sectors == mddev->chunk_sectors) @@ -7532,6 +7606,7 @@ static void raid5_quiesce(struct mddev *mddev, int state) unlock_all_device_hash_locks_irq(conf); break; } + r5l_quiesce(conf->log, state); } static void *raid45_takeover_raid0(struct mddev *mddev, int level) |