diff options
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 424 |
1 files changed, 227 insertions, 197 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 4df78e30b76a..afaf36b2f6ab 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -368,28 +368,6 @@ EXPORT_SYMBOL_GPL(md_new_event); static LIST_HEAD(all_mddevs); static DEFINE_SPINLOCK(all_mddevs_lock); -/* - * iterates through all used mddevs in the system. - * We take care to grab the all_mddevs_lock whenever navigating - * the list, and to always hold a refcount when unlocked. - * Any code which breaks out of this loop while own - * a reference to the current mddev and must mddev_put it. - */ -#define for_each_mddev(_mddev,_tmp) \ - \ - for (({ spin_lock(&all_mddevs_lock); \ - _tmp = all_mddevs.next; \ - _mddev = NULL;}); \ - ({ if (_tmp != &all_mddevs) \ - mddev_get(list_entry(_tmp, struct mddev, all_mddevs));\ - spin_unlock(&all_mddevs_lock); \ - if (_mddev) mddev_put(_mddev); \ - _mddev = list_entry(_tmp, struct mddev, all_mddevs); \ - _tmp != &all_mddevs;}); \ - ({ spin_lock(&all_mddevs_lock); \ - _tmp = _tmp->next;}) \ - ) - /* Rather than calling directly into the personality make_request function, * IO requests come here first so that we can check if the device is * being suspended pending a reconfiguration. @@ -464,7 +442,7 @@ static void md_submit_bio(struct bio *bio) return; } - blk_queue_split(&bio); + bio = bio_split_to_limits(bio); if (mddev->ro == 1 && unlikely(rw == WRITE)) { if (bio_sectors(bio) != 0) @@ -647,13 +625,17 @@ EXPORT_SYMBOL(md_flush_request); static inline struct mddev *mddev_get(struct mddev *mddev) { + lockdep_assert_held(&all_mddevs_lock); + + if (test_bit(MD_DELETED, &mddev->flags)) + return NULL; atomic_inc(&mddev->active); return mddev; } static void mddev_delayed_delete(struct work_struct *ws); -static void mddev_put(struct mddev *mddev) +void mddev_put(struct mddev *mddev) { if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock)) return; @@ -661,7 +643,7 @@ static void mddev_put(struct mddev *mddev) mddev->ctime == 0 && !mddev->hold_active) { /* Array is not configured at all, and not held active, * so destroy it */ - list_del_init(&mddev->all_mddevs); + set_bit(MD_DELETED, &mddev->flags); /* * Call queue_work inside the spinlock so that @@ -678,7 +660,6 @@ static void md_safemode_timeout(struct timer_list *t); void mddev_init(struct mddev *mddev) { - kobject_init(&mddev->kobj, &md_ktype); mutex_init(&mddev->open_mutex); mutex_init(&mddev->reconfig_mutex); mutex_init(&mddev->bitmap_info.mutex); @@ -733,22 +714,6 @@ static dev_t mddev_alloc_unit(void) return dev; } -static struct mddev *mddev_find(dev_t unit) -{ - struct mddev *mddev; - - if (MAJOR(unit) != MD_MAJOR) - unit &= ~((1 << MdpMinorShift) - 1); - - spin_lock(&all_mddevs_lock); - mddev = mddev_find_locked(unit); - if (mddev) - mddev_get(mddev); - spin_unlock(&all_mddevs_lock); - - return mddev; -} - static struct mddev *mddev_alloc(dev_t unit) { struct mddev *new; @@ -791,6 +756,15 @@ out_free_new: return ERR_PTR(error); } +static void mddev_free(struct mddev *mddev) +{ + spin_lock(&all_mddevs_lock); + list_del(&mddev->all_mddevs); + spin_unlock(&all_mddevs_lock); + + kfree(mddev); +} + static const struct attribute_group md_redundancy_group; void mddev_unlock(struct mddev *mddev) @@ -3335,14 +3309,35 @@ rdev_size_show(struct md_rdev *rdev, char *page) return sprintf(page, "%llu\n", (unsigned long long)rdev->sectors / 2); } -static int overlaps(sector_t s1, sector_t l1, sector_t s2, sector_t l2) +static int md_rdevs_overlap(struct md_rdev *a, struct md_rdev *b) { /* check if two start/length pairs overlap */ - if (s1+l1 <= s2) - return 0; - if (s2+l2 <= s1) - return 0; - return 1; + if (a->data_offset + a->sectors <= b->data_offset) + return false; + if (b->data_offset + b->sectors <= a->data_offset) + return false; + return true; +} + +static bool md_rdev_overlaps(struct md_rdev *rdev) +{ + struct mddev *mddev; + struct md_rdev *rdev2; + + spin_lock(&all_mddevs_lock); + list_for_each_entry(mddev, &all_mddevs, all_mddevs) { + if (test_bit(MD_DELETED, &mddev->flags)) + continue; + rdev_for_each(rdev2, mddev) { + if (rdev != rdev2 && rdev->bdev == rdev2->bdev && + md_rdevs_overlap(rdev, rdev2)) { + spin_unlock(&all_mddevs_lock); + return true; + } + } + } + spin_unlock(&all_mddevs_lock); + return false; } static int strict_blocks_to_sectors(const char *buf, sector_t *sectors) @@ -3394,46 +3389,21 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) return -EINVAL; /* component must fit device */ rdev->sectors = sectors; - if (sectors > oldsectors && my_mddev->external) { - /* Need to check that all other rdevs with the same - * ->bdev do not overlap. 'rcu' is sufficient to walk - * the rdev lists safely. - * This check does not provide a hard guarantee, it - * just helps avoid dangerous mistakes. - */ - struct mddev *mddev; - int overlap = 0; - struct list_head *tmp; - - rcu_read_lock(); - for_each_mddev(mddev, tmp) { - struct md_rdev *rdev2; - rdev_for_each(rdev2, mddev) - if (rdev->bdev == rdev2->bdev && - rdev != rdev2 && - overlaps(rdev->data_offset, rdev->sectors, - rdev2->data_offset, - rdev2->sectors)) { - overlap = 1; - break; - } - if (overlap) { - mddev_put(mddev); - break; - } - } - rcu_read_unlock(); - if (overlap) { - /* Someone else could have slipped in a size - * change here, but doing so is just silly. - * We put oldsectors back because we *know* it is - * safe, and trust userspace not to race with - * itself - */ - rdev->sectors = oldsectors; - return -EBUSY; - } + /* + * Check that all other rdevs with the same bdev do not overlap. This + * check does not provide a hard guarantee, it just helps avoid + * dangerous mistakes. + */ + if (sectors > oldsectors && my_mddev->external && + md_rdev_overlaps(rdev)) { + /* + * Someone else could have slipped in a size change here, but + * doing so is just silly. We put oldsectors back because we + * know it is safe, and trust userspace not to race with itself. + */ + rdev->sectors = oldsectors; + return -EBUSY; } return len; } @@ -4830,6 +4800,19 @@ action_store(struct mddev *mddev, const char *page, size_t len) if (work_pending(&mddev->del_work)) flush_workqueue(md_misc_wq); if (mddev->sync_thread) { + sector_t save_rp = mddev->reshape_position; + + mddev_unlock(mddev); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); + md_unregister_thread(&mddev->sync_thread); + mddev_lock_nointr(mddev); + /* + * set RECOVERY_INTR again and restore reshape + * position in case others changed them after + * got lock, eg, reshape_position_store and + * md_check_recovery. + */ + mddev->reshape_position = save_rp; set_bit(MD_RECOVERY_INTR, &mddev->recovery); md_reap_sync_thread(mddev); } @@ -5001,7 +4984,7 @@ static ssize_t sync_speed_show(struct mddev *mddev, char *page) { unsigned long resync, dt, db; - if (mddev->curr_resync == 0) + if (mddev->curr_resync == MD_RESYNC_NONE) return sprintf(page, "none\n"); resync = mddev->curr_mark_cnt - atomic_read(&mddev->recovery_active); dt = (jiffies - mddev->resync_mark) / HZ; @@ -5020,8 +5003,8 @@ sync_completed_show(struct mddev *mddev, char *page) if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) return sprintf(page, "none\n"); - if (mddev->curr_resync == 1 || - mddev->curr_resync == 2) + if (mddev->curr_resync == MD_RESYNC_YIELDED || + mddev->curr_resync == MD_RESYNC_DELAYED) return sprintf(page, "delayed\n"); if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || @@ -5532,11 +5515,10 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) if (!entry->show) return -EIO; spin_lock(&all_mddevs_lock); - if (list_empty(&mddev->all_mddevs)) { + if (!mddev_get(mddev)) { spin_unlock(&all_mddevs_lock); return -EBUSY; } - mddev_get(mddev); spin_unlock(&all_mddevs_lock); rv = entry->show(mddev, page); @@ -5557,18 +5539,17 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, if (!capable(CAP_SYS_ADMIN)) return -EACCES; spin_lock(&all_mddevs_lock); - if (list_empty(&mddev->all_mddevs)) { + if (!mddev_get(mddev)) { spin_unlock(&all_mddevs_lock); return -EBUSY; } - mddev_get(mddev); spin_unlock(&all_mddevs_lock); rv = entry->store(mddev, page, length); mddev_put(mddev); return rv; } -static void md_free(struct kobject *ko) +static void md_kobj_release(struct kobject *ko) { struct mddev *mddev = container_of(ko, struct mddev, kobj); @@ -5577,15 +5558,8 @@ static void md_free(struct kobject *ko) if (mddev->sysfs_level) sysfs_put(mddev->sysfs_level); - if (mddev->gendisk) { - del_gendisk(mddev->gendisk); - put_disk(mddev->gendisk); - } - percpu_ref_exit(&mddev->writes_pending); - - bioset_exit(&mddev->bio_set); - bioset_exit(&mddev->sync_set); - kfree(mddev); + del_gendisk(mddev->gendisk); + put_disk(mddev->gendisk); } static const struct sysfs_ops md_sysfs_ops = { @@ -5593,7 +5567,7 @@ static const struct sysfs_ops md_sysfs_ops = { .store = md_attr_store, }; static struct kobj_type md_ktype = { - .release = md_free, + .release = md_kobj_release, .sysfs_ops = &md_sysfs_ops, .default_groups = md_attr_groups, }; @@ -5604,7 +5578,6 @@ static void mddev_delayed_delete(struct work_struct *ws) { struct mddev *mddev = container_of(ws, struct mddev, del_work); - kobject_del(&mddev->kobj); kobject_put(&mddev->kobj); } @@ -5623,7 +5596,7 @@ int mddev_init_writes_pending(struct mddev *mddev) } EXPORT_SYMBOL_GPL(mddev_init_writes_pending); -static int md_alloc(dev_t dev, char *name) +struct mddev *md_alloc(dev_t dev, char *name) { /* * If dev is zero, name is the name of a device to allocate with @@ -5651,8 +5624,8 @@ static int md_alloc(dev_t dev, char *name) mutex_lock(&disks_mutex); mddev = mddev_alloc(dev); if (IS_ERR(mddev)) { - mutex_unlock(&disks_mutex); - return PTR_ERR(mddev); + error = PTR_ERR(mddev); + goto out_unlock; } partitioned = (MAJOR(mddev->unit) != MD_MAJOR); @@ -5670,7 +5643,7 @@ static int md_alloc(dev_t dev, char *name) strcmp(mddev2->gendisk->disk_name, name) == 0) { spin_unlock(&all_mddevs_lock); error = -EEXIST; - goto out_unlock_disks_mutex; + goto out_free_mddev; } spin_unlock(&all_mddevs_lock); } @@ -5683,7 +5656,7 @@ static int md_alloc(dev_t dev, char *name) error = -ENOMEM; disk = blk_alloc_disk(NUMA_NO_NODE); if (!disk) - goto out_unlock_disks_mutex; + goto out_free_mddev; disk->major = MAJOR(mddev->unit); disk->first_minor = unit << shift; @@ -5704,25 +5677,45 @@ static int md_alloc(dev_t dev, char *name) mddev->gendisk = disk; error = add_disk(disk); if (error) - goto out_cleanup_disk; + goto out_put_disk; + kobject_init(&mddev->kobj, &md_ktype); error = kobject_add(&mddev->kobj, &disk_to_dev(disk)->kobj, "%s", "md"); - if (error) - goto out_del_gendisk; + if (error) { + /* + * The disk is already live at this point. Clear the hold flag + * and let mddev_put take care of the deletion, as it isn't any + * different from a normal close on last release now. + */ + mddev->hold_active = 0; + mutex_unlock(&disks_mutex); + mddev_put(mddev); + return ERR_PTR(error); + } kobject_uevent(&mddev->kobj, KOBJ_ADD); mddev->sysfs_state = sysfs_get_dirent_safe(mddev->kobj.sd, "array_state"); mddev->sysfs_level = sysfs_get_dirent_safe(mddev->kobj.sd, "level"); - goto out_unlock_disks_mutex; + mutex_unlock(&disks_mutex); + return mddev; -out_del_gendisk: - del_gendisk(disk); -out_cleanup_disk: +out_put_disk: put_disk(disk); -out_unlock_disks_mutex: +out_free_mddev: + mddev_free(mddev); +out_unlock: mutex_unlock(&disks_mutex); + return ERR_PTR(error); +} + +static int md_alloc_and_put(dev_t dev, char *name) +{ + struct mddev *mddev = md_alloc(dev, name); + + if (IS_ERR(mddev)) + return PTR_ERR(mddev); mddev_put(mddev); - return error; + return 0; } static void md_probe(dev_t dev) @@ -5730,7 +5723,7 @@ static void md_probe(dev_t dev) if (MAJOR(dev) == MD_MAJOR && MINOR(dev) >= 512) return; if (create_on_open) - md_alloc(dev, NULL); + md_alloc_and_put(dev, NULL); } static int add_named_array(const char *val, const struct kernel_param *kp) @@ -5752,12 +5745,12 @@ static int add_named_array(const char *val, const struct kernel_param *kp) return -E2BIG; strscpy(buf, val, len+1); if (strncmp(buf, "md_", 3) == 0) - return md_alloc(0, buf); + return md_alloc_and_put(0, buf); if (strncmp(buf, "md", 2) == 0 && isdigit(buf[2]) && kstrtoul(buf+2, 10, &devnum) == 0 && devnum <= MINORMASK) - return md_alloc(MKDEV(MD_MAJOR, devnum), NULL); + return md_alloc_and_put(MKDEV(MD_MAJOR, devnum), NULL); return -EINVAL; } @@ -6197,6 +6190,7 @@ static void __md_stop_writes(struct mddev *mddev) flush_workqueue(md_misc_wq); if (mddev->sync_thread) { set_bit(MD_RECOVERY_INTR, &mddev->recovery); + md_unregister_thread(&mddev->sync_thread); md_reap_sync_thread(mddev); } @@ -6244,11 +6238,11 @@ static void mddev_detach(struct mddev *mddev) static void __md_stop(struct mddev *mddev) { struct md_personality *pers = mddev->pers; - md_bitmap_destroy(mddev); mddev_detach(mddev); /* Ensure ->event_work is done */ if (mddev->event_work.func) flush_workqueue(md_misc_wq); + md_bitmap_destroy(mddev); spin_lock(&mddev->lock); mddev->pers = NULL; spin_unlock(&mddev->lock); @@ -6497,9 +6491,8 @@ static void autorun_devices(int part) break; } - md_probe(dev); - mddev = mddev_find(dev); - if (!mddev) + mddev = md_alloc(dev, NULL); + if (IS_ERR(mddev)) break; if (mddev_lock(mddev)) @@ -7782,45 +7775,33 @@ out_unlock: static int md_open(struct block_device *bdev, fmode_t mode) { - /* - * Succeed if we can lock the mddev, which confirms that - * it isn't being stopped right now. - */ - struct mddev *mddev = mddev_find(bdev->bd_dev); + struct mddev *mddev; int err; + spin_lock(&all_mddevs_lock); + mddev = mddev_get(bdev->bd_disk->private_data); + spin_unlock(&all_mddevs_lock); if (!mddev) return -ENODEV; - if (mddev->gendisk != bdev->bd_disk) { - /* we are racing with mddev_put which is discarding this - * bd_disk. - */ - mddev_put(mddev); - /* Wait until bdev->bd_disk is definitely gone */ - if (work_pending(&mddev->del_work)) - flush_workqueue(md_misc_wq); - return -EBUSY; - } - BUG_ON(mddev != bdev->bd_disk->private_data); - - if ((err = mutex_lock_interruptible(&mddev->open_mutex))) + err = mutex_lock_interruptible(&mddev->open_mutex); + if (err) goto out; - if (test_bit(MD_CLOSING, &mddev->flags)) { - mutex_unlock(&mddev->open_mutex); - err = -ENODEV; - goto out; - } + err = -ENODEV; + if (test_bit(MD_CLOSING, &mddev->flags)) + goto out_unlock; - err = 0; atomic_inc(&mddev->openers); mutex_unlock(&mddev->open_mutex); bdev_check_media_change(bdev); - out: - if (err) - mddev_put(mddev); + return 0; + +out_unlock: + mutex_unlock(&mddev->open_mutex); +out: + mddev_put(mddev); return err; } @@ -7844,6 +7825,17 @@ static unsigned int md_check_events(struct gendisk *disk, unsigned int clearing) return ret; } +static void md_free_disk(struct gendisk *disk) +{ + struct mddev *mddev = disk->private_data; + + percpu_ref_exit(&mddev->writes_pending); + bioset_exit(&mddev->bio_set); + bioset_exit(&mddev->sync_set); + + mddev_free(mddev); +} + const struct block_device_operations md_fops = { .owner = THIS_MODULE, @@ -7857,6 +7849,7 @@ const struct block_device_operations md_fops = .getgeo = md_getgeo, .check_events = md_check_events, .set_read_only = md_set_read_only, + .free_disk = md_free_disk, }; static int md_thread(void *arg) @@ -8018,16 +8011,26 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) max_sectors = mddev->dev_sectors; resync = mddev->curr_resync; - if (resync <= 3) { + if (resync < MD_RESYNC_ACTIVE) { if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) /* Still cleaning up */ resync = max_sectors; - } else if (resync > max_sectors) + } else if (resync > max_sectors) { resync = max_sectors; - else + } else { resync -= atomic_read(&mddev->recovery_active); + if (resync < MD_RESYNC_ACTIVE) { + /* + * Resync has started, but the subtraction has + * yielded one of the special values. Force it + * to active to ensure the status reports an + * active resync. + */ + resync = MD_RESYNC_ACTIVE; + } + } - if (resync == 0) { + if (resync == MD_RESYNC_NONE) { if (test_bit(MD_RESYNCING_REMOTE, &mddev->recovery)) { struct md_rdev *rdev; @@ -8051,7 +8054,7 @@ static int status_resync(struct seq_file *seq, struct mddev *mddev) } return 0; } - if (resync < 3) { + if (resync < MD_RESYNC_ACTIVE) { seq_printf(seq, "\tresync=DELAYED"); return 1; } @@ -8152,6 +8155,8 @@ static void *md_seq_start(struct seq_file *seq, loff_t *pos) if (!l--) { mddev = list_entry(tmp, struct mddev, all_mddevs); mddev_get(mddev); + if (!mddev_get(mddev)) + continue; spin_unlock(&all_mddevs_lock); return mddev; } @@ -8165,25 +8170,35 @@ static void *md_seq_next(struct seq_file *seq, void *v, loff_t *pos) { struct list_head *tmp; struct mddev *next_mddev, *mddev = v; + struct mddev *to_put = NULL; ++*pos; if (v == (void*)2) return NULL; spin_lock(&all_mddevs_lock); - if (v == (void*)1) + if (v == (void*)1) { tmp = all_mddevs.next; - else + } else { + to_put = mddev; + tmp = mddev->all_mddevs.next; + } + + for (;;) { + if (tmp == &all_mddevs) { + next_mddev = (void*)2; + *pos = 0x10000; + break; + } + next_mddev = list_entry(tmp, struct mddev, all_mddevs); + if (mddev_get(next_mddev)) + break; + mddev = next_mddev; tmp = mddev->all_mddevs.next; - if (tmp != &all_mddevs) - next_mddev = mddev_get(list_entry(tmp,struct mddev,all_mddevs)); - else { - next_mddev = (void*)2; - *pos = 0x10000; } spin_unlock(&all_mddevs_lock); - if (v != (void*)1) + if (to_put) mddev_put(mddev); return next_mddev; @@ -8682,7 +8697,6 @@ void md_do_sync(struct md_thread *thread) unsigned long update_time; sector_t mark_cnt[SYNC_MARKS]; int last_mark,m; - struct list_head *tmp; sector_t last_check; int skipped = 0; struct md_rdev *rdev; @@ -8729,13 +8743,7 @@ void md_do_sync(struct md_thread *thread) mddev->last_sync_action = action ?: desc; - /* we overload curr_resync somewhat here. - * 0 == not engaged in resync at all - * 2 == checking that there is no conflict with another sync - * 1 == like 2, but have yielded to allow conflicting resync to - * commence - * other == active in resync - this many blocks - * + /* * Before starting a resync we must have set curr_resync to * 2, and then checked that every "conflicting" array has curr_resync * less than ours. When we find one that is the same or higher @@ -8747,24 +8755,29 @@ void md_do_sync(struct md_thread *thread) do { int mddev2_minor = -1; - mddev->curr_resync = 2; + mddev->curr_resync = MD_RESYNC_DELAYED; try_again: if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) goto skip; - for_each_mddev(mddev2, tmp) { + spin_lock(&all_mddevs_lock); + list_for_each_entry(mddev2, &all_mddevs, all_mddevs) { + if (test_bit(MD_DELETED, &mddev2->flags)) + continue; if (mddev2 == mddev) continue; if (!mddev->parallel_resync && mddev2->curr_resync && match_mddev_units(mddev, mddev2)) { DEFINE_WAIT(wq); - if (mddev < mddev2 && mddev->curr_resync == 2) { + if (mddev < mddev2 && + mddev->curr_resync == MD_RESYNC_DELAYED) { /* arbitrarily yield */ - mddev->curr_resync = 1; + mddev->curr_resync = MD_RESYNC_YIELDED; wake_up(&resync_wait); } - if (mddev > mddev2 && mddev->curr_resync == 1) + if (mddev > mddev2 && + mddev->curr_resync == MD_RESYNC_YIELDED) /* no need to wait here, we can wait the next * time 'round when curr_resync == 2 */ @@ -8782,7 +8795,8 @@ void md_do_sync(struct md_thread *thread) desc, mdname(mddev), mdname(mddev2)); } - mddev_put(mddev2); + spin_unlock(&all_mddevs_lock); + if (signal_pending(current)) flush_signals(current); schedule(); @@ -8792,7 +8806,8 @@ void md_do_sync(struct md_thread *thread) finish_wait(&resync_wait, &wq); } } - } while (mddev->curr_resync < 2); + spin_unlock(&all_mddevs_lock); + } while (mddev->curr_resync < MD_RESYNC_DELAYED); j = 0; if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { @@ -8876,7 +8891,7 @@ void md_do_sync(struct md_thread *thread) desc, mdname(mddev)); mddev->curr_resync = j; } else - mddev->curr_resync = 3; /* no longer delayed */ + mddev->curr_resync = MD_RESYNC_ACTIVE; /* no longer delayed */ mddev->curr_resync_completed = j; sysfs_notify_dirent_safe(mddev->sysfs_completed); md_new_event(); @@ -9011,14 +9026,14 @@ void md_do_sync(struct md_thread *thread) if (!test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) && !test_bit(MD_RECOVERY_INTR, &mddev->recovery) && - mddev->curr_resync > 3) { + mddev->curr_resync >= MD_RESYNC_ACTIVE) { mddev->curr_resync_completed = mddev->curr_resync; sysfs_notify_dirent_safe(mddev->sysfs_completed); } mddev->pers->sync_request(mddev, max_sectors, &skipped); if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && - mddev->curr_resync > 3) { + mddev->curr_resync >= MD_RESYNC_ACTIVE) { if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) { if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { if (mddev->curr_resync >= mddev->recovery_cp) { @@ -9082,7 +9097,7 @@ void md_do_sync(struct md_thread *thread) } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) mddev->resync_min = mddev->curr_resync_completed; set_bit(MD_RECOVERY_DONE, &mddev->recovery); - mddev->curr_resync = 0; + mddev->curr_resync = MD_RESYNC_NONE; spin_unlock(&mddev->lock); wake_up(&resync_wait); @@ -9303,6 +9318,7 @@ void md_check_recovery(struct mddev *mddev) * ->spare_active and clear saved_raid_disk */ set_bit(MD_RECOVERY_INTR, &mddev->recovery); + md_unregister_thread(&mddev->sync_thread); md_reap_sync_thread(mddev); clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery); clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -9338,6 +9354,7 @@ void md_check_recovery(struct mddev *mddev) goto unlock; } if (mddev->sync_thread) { + md_unregister_thread(&mddev->sync_thread); md_reap_sync_thread(mddev); goto unlock; } @@ -9417,8 +9434,7 @@ void md_reap_sync_thread(struct mddev *mddev) sector_t old_dev_sectors = mddev->dev_sectors; bool is_reshaped = false; - /* resync has finished, collect result */ - md_unregister_thread(&mddev->sync_thread); + /* sync_thread should be unregistered, collect result */ if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) && mddev->degraded != mddev->raid_disks) { @@ -9466,6 +9482,7 @@ void md_reap_sync_thread(struct mddev *mddev) wake_up(&resync_wait); /* flag recovery needed just to double check */ set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + sysfs_notify_dirent_safe(mddev->sysfs_completed); sysfs_notify_dirent_safe(mddev->sysfs_action); md_new_event(); if (mddev->event_work.func) @@ -9544,11 +9561,14 @@ EXPORT_SYMBOL_GPL(rdev_clear_badblocks); static int md_notify_reboot(struct notifier_block *this, unsigned long code, void *x) { - struct list_head *tmp; - struct mddev *mddev; + struct mddev *mddev, *n; int need_delay = 0; - for_each_mddev(mddev, tmp) { + spin_lock(&all_mddevs_lock); + list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) { + if (!mddev_get(mddev)) + continue; + spin_unlock(&all_mddevs_lock); if (mddev_trylock(mddev)) { if (mddev->pers) __md_stop_writes(mddev); @@ -9557,7 +9577,11 @@ static int md_notify_reboot(struct notifier_block *this, mddev_unlock(mddev); } need_delay = 1; + mddev_put(mddev); + spin_lock(&all_mddevs_lock); } + spin_unlock(&all_mddevs_lock); + /* * certain more exotic SCSI devices are known to be * volatile wrt too early system reboots. While the @@ -9876,8 +9900,7 @@ void md_autostart_arrays(int part) static __exit void md_exit(void) { - struct mddev *mddev; - struct list_head *tmp; + struct mddev *mddev, *n; int delay = 1; unregister_blkdev(MD_MAJOR,"md"); @@ -9897,17 +9920,24 @@ static __exit void md_exit(void) } remove_proc_entry("mdstat", NULL); - for_each_mddev(mddev, tmp) { + spin_lock(&all_mddevs_lock); + list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) { + if (!mddev_get(mddev)) + continue; + spin_unlock(&all_mddevs_lock); export_array(mddev); mddev->ctime = 0; mddev->hold_active = 0; /* - * for_each_mddev() will call mddev_put() at the end of each - * iteration. As the mddev is now fully clear, this will - * schedule the mddev for destruction by a workqueue, and the + * As the mddev is now fully clear, mddev_put will schedule + * the mddev for destruction by a workqueue, and the * destroy_workqueue() below will wait for that to complete. */ + mddev_put(mddev); + spin_lock(&all_mddevs_lock); } + spin_unlock(&all_mddevs_lock); + destroy_workqueue(md_rdev_misc_wq); destroy_workqueue(md_misc_wq); destroy_workqueue(md_wq); |