diff options
Diffstat (limited to 'drivers/md/md.c')
| -rw-r--r-- | drivers/md/md.c | 95 | 
1 files changed, 60 insertions, 35 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index c068f171b4eb..866825f10b4c 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -284,6 +284,8 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)  	 * go away inside make_request  	 */  	sectors = bio_sectors(bio); +	/* bio could be mergeable after passing to underlayer */ +	bio->bi_rw &= ~REQ_NOMERGE;  	mddev->pers->make_request(mddev, bio);  	cpu = part_stat_lock(); @@ -305,7 +307,7 @@ static blk_qc_t md_make_request(struct request_queue *q, struct bio *bio)   */  void mddev_suspend(struct mddev *mddev)  { -	WARN_ON_ONCE(current == mddev->thread->tsk); +	WARN_ON_ONCE(mddev->thread && current == mddev->thread->tsk);  	if (mddev->suspended++)  		return;  	synchronize_rcu(); @@ -718,6 +720,7 @@ static void super_written(struct bio *bio)  	if (atomic_dec_and_test(&mddev->pending_writes))  		wake_up(&mddev->sb_wait); +	rdev_dec_pending(rdev, mddev);  	bio_put(bio);  } @@ -732,6 +735,8 @@ void md_super_write(struct mddev *mddev, struct md_rdev *rdev,  	 */  	struct bio *bio = bio_alloc_mddev(GFP_NOIO, 1, mddev); +	atomic_inc(&rdev->nr_pending); +  	bio->bi_bdev = rdev->meta_bdev ? rdev->meta_bdev : rdev->bdev;  	bio->bi_iter.bi_sector = sector;  	bio_add_page(bio, page, size, 0); @@ -2286,19 +2291,24 @@ void md_update_sb(struct mddev *mddev, int force_change)  		return;  	} +repeat:  	if (mddev_is_clustered(mddev)) {  		if (test_and_clear_bit(MD_CHANGE_DEVS, &mddev->flags))  			force_change = 1; +		if (test_and_clear_bit(MD_CHANGE_CLEAN, &mddev->flags)) +			nospares = 1;  		ret = md_cluster_ops->metadata_update_start(mddev);  		/* Has someone else has updated the sb */  		if (!does_sb_need_changing(mddev)) {  			if (ret == 0)  				md_cluster_ops->metadata_update_cancel(mddev); -			clear_bit(MD_CHANGE_PENDING, &mddev->flags); +			bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING), +							 BIT(MD_CHANGE_DEVS) | +							 BIT(MD_CHANGE_CLEAN));  			return;  		}  	} -repeat: +  	/* First make sure individual recovery_offsets are correct */  	rdev_for_each(rdev, mddev) {  		if (rdev->raid_disk >= 0 && @@ -2425,15 +2435,14 @@ repeat:  	md_super_wait(mddev);  	/* if there was a failure, MD_CHANGE_DEVS was set, and we re-write super */ -	spin_lock(&mddev->lock); +	if (mddev_is_clustered(mddev) && ret == 0) +		md_cluster_ops->metadata_update_finish(mddev); +  	if (mddev->in_sync != sync_req || -	    test_bit(MD_CHANGE_DEVS, &mddev->flags)) { +	    !bit_clear_unless(&mddev->flags, BIT(MD_CHANGE_PENDING), +			       BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_CLEAN)))  		/* have to write it out again */ -		spin_unlock(&mddev->lock);  		goto repeat; -	} -	clear_bit(MD_CHANGE_PENDING, &mddev->flags); -	spin_unlock(&mddev->lock);  	wake_up(&mddev->sb_wait);  	if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery))  		sysfs_notify(&mddev->kobj, NULL, "sync_completed"); @@ -2447,9 +2456,6 @@ repeat:  		clear_bit(BlockedBadBlocks, &rdev->flags);  		wake_up(&rdev->blocked_wait);  	} - -	if (mddev_is_clustered(mddev) && ret == 0) -		md_cluster_ops->metadata_update_finish(mddev);  }  EXPORT_SYMBOL(md_update_sb); @@ -4811,6 +4817,10 @@ array_size_store(struct mddev *mddev, const char *buf, size_t len)  	if (err)  		return err; +	/* cluster raid doesn't support change array_sectors */ +	if (mddev_is_clustered(mddev)) +		return -EINVAL; +  	if (strncmp(buf, "default", 7) == 0) {  		if (mddev->pers)  			sectors = mddev->pers->size(mddev, 0, 0); @@ -5034,7 +5044,7 @@ static int md_alloc(dev_t dev, char *name)  	disk->fops = &md_fops;  	disk->private_data = mddev;  	disk->queue = mddev->queue; -	blk_queue_flush(mddev->queue, REQ_FLUSH | REQ_FUA); +	blk_queue_write_cache(mddev->queue, true, true);  	/* Allow extended partitions.  This makes the  	 * 'mdp' device redundant, but we can't really  	 * remove it now. @@ -6432,6 +6442,10 @@ static int update_size(struct mddev *mddev, sector_t num_sectors)  	int rv;  	int fit = (num_sectors == 0); +	/* cluster raid doesn't support update size */ +	if (mddev_is_clustered(mddev)) +		return -EINVAL; +  	if (mddev->pers->resize == NULL)  		return -EINVAL;  	/* The "num_sectors" is the number of sectors of each device that @@ -6883,7 +6897,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,  	case ADD_NEW_DISK:  		/* We can support ADD_NEW_DISK on read-only arrays -		 * on if we are re-adding a preexisting device. +		 * only if we are re-adding a preexisting device.  		 * So require mddev->pers and MD_DISK_SYNC.  		 */  		if (mddev->pers) { @@ -7780,7 +7794,7 @@ void md_do_sync(struct md_thread *thread)  	struct md_rdev *rdev;  	char *desc, *action = NULL;  	struct blk_plug plug; -	bool cluster_resync_finished = false; +	int ret;  	/* just incase thread restarts... */  	if (test_bit(MD_RECOVERY_DONE, &mddev->recovery)) @@ -7790,6 +7804,19 @@ void md_do_sync(struct md_thread *thread)  		return;  	} +	if (mddev_is_clustered(mddev)) { +		ret = md_cluster_ops->resync_start(mddev); +		if (ret) +			goto skip; + +		if (!(test_bit(MD_RECOVERY_SYNC, &mddev->recovery) || +			test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) || +			test_bit(MD_RECOVERY_RECOVER, &mddev->recovery)) +		     && ((unsigned long long)mddev->curr_resync_completed +			 < (unsigned long long)mddev->resync_max_sectors)) +			goto skip; +	} +  	if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {  		if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {  			desc = "data-check"; @@ -8084,11 +8111,6 @@ void md_do_sync(struct md_thread *thread)  		mddev->curr_resync_completed = mddev->curr_resync;  		sysfs_notify(&mddev->kobj, NULL, "sync_completed");  	} -	/* tell personality and other nodes that we are finished */ -	if (mddev_is_clustered(mddev)) { -		md_cluster_ops->resync_finish(mddev); -		cluster_resync_finished = true; -	}  	mddev->pers->sync_request(mddev, max_sectors, &skipped);  	if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) && @@ -8125,12 +8147,18 @@ void md_do_sync(struct md_thread *thread)  		}  	}   skip: -	set_bit(MD_CHANGE_DEVS, &mddev->flags); -  	if (mddev_is_clustered(mddev) && -	    test_bit(MD_RECOVERY_INTR, &mddev->recovery) && -	    !cluster_resync_finished) +	    ret == 0) { +		/* set CHANGE_PENDING here since maybe another +		 * update is needed, so other nodes are informed */ +		set_mask_bits(&mddev->flags, 0, +			      BIT(MD_CHANGE_PENDING) | BIT(MD_CHANGE_DEVS)); +		md_wakeup_thread(mddev->thread); +		wait_event(mddev->sb_wait, +			   !test_bit(MD_CHANGE_PENDING, &mddev->flags));  		md_cluster_ops->resync_finish(mddev); +	} else +		set_bit(MD_CHANGE_DEVS, &mddev->flags);  	spin_lock(&mddev->lock);  	if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { @@ -8221,18 +8249,9 @@ static void md_start_sync(struct work_struct *ws)  	struct mddev *mddev = container_of(ws, struct mddev, del_work);  	int ret = 0; -	if (mddev_is_clustered(mddev)) { -		ret = md_cluster_ops->resync_start(mddev); -		if (ret) { -			mddev->sync_thread = NULL; -			goto out; -		} -	} -  	mddev->sync_thread = md_register_thread(md_do_sync,  						mddev,  						"resync"); -out:  	if (!mddev->sync_thread) {  		if (!(mddev_is_clustered(mddev) && ret == -EAGAIN))  			printk(KERN_ERR "%s: could not start resync" @@ -8531,6 +8550,7 @@ EXPORT_SYMBOL(md_finish_reshape);  int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,  		       int is_new)  { +	struct mddev *mddev = rdev->mddev;  	int rv;  	if (is_new)  		s += rdev->new_data_offset; @@ -8540,8 +8560,8 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,  	if (rv == 0) {  		/* Make sure they get written out promptly */  		sysfs_notify_dirent_safe(rdev->sysfs_state); -		set_bit(MD_CHANGE_CLEAN, &rdev->mddev->flags); -		set_bit(MD_CHANGE_PENDING, &rdev->mddev->flags); +		set_mask_bits(&mddev->flags, 0, +			      BIT(MD_CHANGE_CLEAN) | BIT(MD_CHANGE_PENDING));  		md_wakeup_thread(rdev->mddev->thread);  		return 1;  	} else @@ -8675,6 +8695,11 @@ static void check_sb_changes(struct mddev *mddev, struct md_rdev *rdev)  				ret = remove_and_add_spares(mddev, rdev2);  				pr_info("Activated spare: %s\n",  						bdevname(rdev2->bdev,b)); +				/* wakeup mddev->thread here, so array could +				 * perform resync with the new activated disk */ +				set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); +				md_wakeup_thread(mddev->thread); +  			}  			/* device faulty  			 * We just want to do the minimum to mark the disk  | 
