diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/exofs/Kconfig.ore | 2 | ||||
| -rw-r--r-- | fs/exofs/ore.c | 100 | ||||
| -rw-r--r-- | fs/exofs/ore_raid.c | 54 | ||||
| -rw-r--r-- | fs/exofs/ore_raid.h | 21 | 
4 files changed, 97 insertions, 80 deletions
diff --git a/fs/exofs/Kconfig.ore b/fs/exofs/Kconfig.ore index 1ca7fb7b6ba8..2daf2329c28d 100644 --- a/fs/exofs/Kconfig.ore +++ b/fs/exofs/Kconfig.ore @@ -9,4 +9,6 @@ config ORE  	tristate  	depends on EXOFS_FS || PNFS_OBJLAYOUT  	select ASYNC_XOR +	select RAID6_PQ +	select ASYNC_PQ  	default SCSI_OSD_ULD diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c index dae884694bd9..cfc0205d62c4 100644 --- a/fs/exofs/ore.c +++ b/fs/exofs/ore.c @@ -58,9 +58,12 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)  		layout->parity = 1;  		break;  	case PNFS_OSD_RAID_PQ: +		layout->parity = 2; +		break;  	case PNFS_OSD_RAID_4:  	default: -		ORE_ERR("Only RAID_0/5 for now\n"); +		ORE_ERR("Only RAID_0/5/6 for now received-enum=%d\n", +			layout->raid_algorithm);  		return -EINVAL;  	}  	if (0 != (layout->stripe_unit & ~PAGE_MASK)) { @@ -112,6 +115,8 @@ int ore_verify_layout(unsigned total_comps, struct ore_layout *layout)  		layout->max_io_length /= stripe_length;  		layout->max_io_length *= stripe_length;  	} +	ORE_DBGMSG("max_io_length=0x%lx\n", layout->max_io_length); +  	return 0;  }  EXPORT_SYMBOL(ore_verify_layout); @@ -545,21 +550,24 @@ void ore_calc_stripe_info(struct ore_layout *layout, u64 file_offset,  	/* "H - (N * U)" is just "H % U" so it's bound to u32 */  	u32	C = (u32)(H - (N * U)) / stripe_unit + G * group_width; +	u32 first_dev = C - C % group_width;  	div_u64_rem(file_offset, stripe_unit, &si->unit_off);  	si->obj_offset = si->unit_off + (N * stripe_unit) +  				  (M * group_depth * stripe_unit); +	si->cur_comp = C - first_dev; +	si->cur_pg = si->unit_off / PAGE_SIZE;  	if (parity) {  		u32 LCMdP = lcm(group_width, parity) / parity;  		/* R     = N % LCMdP; */  		u32 RxP   = (N % LCMdP) * parity; -		u32 first_dev = C - C % group_width;  		si->par_dev = (group_width + group_width - parity - RxP) %  			      group_width + first_dev; -		si->dev = (group_width + C - RxP) % group_width + first_dev; +		si->dev = (group_width + group_width + C - RxP) % +			  group_width + first_dev;  		si->bytes_in_stripe = U;  		si->first_stripe_start = M * S + G * T + N * U;  	} else { @@ -649,6 +657,43 @@ out:	/* we fail the complete unit on an error eg don't advance  	return ret;  } +static int _add_parity_units(struct ore_io_state *ios, +			     struct ore_striping_info *si, +			     unsigned dev, unsigned first_dev, +			     unsigned mirrors_p1, unsigned devs_in_group, +			     unsigned cur_len) +{ +	unsigned do_parity; +	int ret = 0; + +	for (do_parity = ios->layout->parity; do_parity; --do_parity) { +		struct ore_per_dev_state *per_dev; + +		per_dev = &ios->per_dev[dev - first_dev]; +		if (!per_dev->length && !per_dev->offset) { +			/* Only/always the parity unit of the first +			 * stripe will be empty. So this is a chance to +			 * initialize the per_dev info. +			 */ +			per_dev->dev = dev; +			per_dev->offset = si->obj_offset - si->unit_off; +		} + +		ret = _ore_add_parity_unit(ios, si, per_dev, cur_len, +					   do_parity == 1); +		if (unlikely(ret)) +				break; + +		if (do_parity != 1) { +			dev = ((dev + mirrors_p1) % devs_in_group) + first_dev; +			si->cur_comp = (si->cur_comp + 1) % +						       ios->layout->group_width; +		} +	} + +	return ret; +} +  static int _prepare_for_striping(struct ore_io_state *ios)  {  	struct ore_striping_info *si = &ios->si; @@ -658,7 +703,6 @@ static int _prepare_for_striping(struct ore_io_state *ios)  	unsigned devs_in_group = group_width * mirrors_p1;  	unsigned dev = si->dev;  	unsigned first_dev = dev - (dev % devs_in_group); -	unsigned dev_order;  	unsigned cur_pg = ios->pages_consumed;  	u64 length = ios->length;  	int ret = 0; @@ -670,16 +714,13 @@ static int _prepare_for_striping(struct ore_io_state *ios)  	BUG_ON(length > si->length); -	dev_order = _dev_order(devs_in_group, mirrors_p1, si->par_dev, dev); -	si->cur_comp = dev_order; -	si->cur_pg = si->unit_off / PAGE_SIZE; -  	while (length) { -		unsigned comp = dev - first_dev; -		struct ore_per_dev_state *per_dev = &ios->per_dev[comp]; +		struct ore_per_dev_state *per_dev = +						&ios->per_dev[dev - first_dev];  		unsigned cur_len, page_off = 0; -		if (!per_dev->length) { +		if (!per_dev->length && !per_dev->offset) { +			/* First time initialize the per_dev info. */  			per_dev->dev = dev;  			if (dev == si->dev) {  				WARN_ON(dev == si->par_dev); @@ -688,13 +729,7 @@ static int _prepare_for_striping(struct ore_io_state *ios)  				page_off = si->unit_off & ~PAGE_MASK;  				BUG_ON(page_off && (page_off != ios->pgbase));  			} else { -				if (si->cur_comp > dev_order) -					per_dev->offset = -						si->obj_offset - si->unit_off; -				else /* si->cur_comp < dev_order */ -					per_dev->offset = -						si->obj_offset + stripe_unit - -								   si->unit_off; +				per_dev->offset = si->obj_offset - si->unit_off;  				cur_len = stripe_unit;  			}  		} else { @@ -708,11 +743,9 @@ static int _prepare_for_striping(struct ore_io_state *ios)  		if (unlikely(ret))  			goto out; -		dev += mirrors_p1; -		dev = (dev % devs_in_group) + first_dev; -  		length -= cur_len; +		dev = ((dev + mirrors_p1) % devs_in_group) + first_dev;  		si->cur_comp = (si->cur_comp + 1) % group_width;  		if (unlikely((dev == si->par_dev) || (!length && ios->sp2d))) {  			if (!length && ios->sp2d) { @@ -720,23 +753,16 @@ static int _prepare_for_striping(struct ore_io_state *ios)  				 * stripe. then operate on parity dev.  				 */  				dev = si->par_dev; -			} -			if (ios->sp2d) -				/* In writes cur_len just means if it's the -				 * last one. See _ore_add_parity_unit. -				 */ -				cur_len = length; -			per_dev = &ios->per_dev[dev - first_dev]; -			if (!per_dev->length) { -				/* Only/always the parity unit of the first -				 * stripe will be empty. So this is a chance to -				 * initialize the per_dev info. -				 */ -				per_dev->dev = dev; -				per_dev->offset = si->obj_offset - si->unit_off; +				/* If last stripe operate on parity comp */ +				si->cur_comp = group_width - ios->layout->parity;  			} -			ret = _ore_add_parity_unit(ios, si, per_dev, cur_len); +			/* In writes cur_len just means if it's the +			 * last one. See _ore_add_parity_unit. +			 */ +			ret = _add_parity_units(ios, si, dev, first_dev, +						mirrors_p1, devs_in_group, +						ios->sp2d ? length : cur_len);  			if (unlikely(ret))  					goto out; @@ -747,6 +773,8 @@ static int _prepare_for_striping(struct ore_io_state *ios)  			/* Next stripe, start fresh */  			si->cur_comp = 0;  			si->cur_pg = 0; +			si->obj_offset += cur_len; +			si->unit_off = 0;  		}  	}  out: diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c index 4e2c032ab8a1..7f20f25c232c 100644 --- a/fs/exofs/ore_raid.c +++ b/fs/exofs/ore_raid.c @@ -218,22 +218,28 @@ static unsigned _sp2d_max_pg(struct __stripe_pages_2d *sp2d)  static void _gen_xor_unit(struct __stripe_pages_2d *sp2d)  {  	unsigned p; +	unsigned tx_flags = ASYNC_TX_ACK; + +	if (sp2d->parity == 1) +		tx_flags |= ASYNC_TX_XOR_ZERO_DST; +  	for (p = 0; p < sp2d->pages_in_unit; p++) {  		struct __1_page_stripe *_1ps = &sp2d->_1p_stripes[p];  		if (!_1ps->write_count)  			continue; -		init_async_submit(&_1ps->submit, -			ASYNC_TX_XOR_ZERO_DST | ASYNC_TX_ACK, -			NULL, -			NULL, NULL, -			(addr_conv_t *)_1ps->scribble); +		init_async_submit(&_1ps->submit, tx_flags, +			NULL, NULL, NULL, (addr_conv_t *)_1ps->scribble); -		/* TODO: raid6 */ -		_1ps->tx = async_xor(_1ps->pages[sp2d->data_devs], _1ps->pages, -				     0, sp2d->data_devs, PAGE_SIZE, -				     &_1ps->submit); +		if (sp2d->parity == 1) +			_1ps->tx = async_xor(_1ps->pages[sp2d->data_devs], +						_1ps->pages, 0, sp2d->data_devs, +						PAGE_SIZE, &_1ps->submit); +		else /* parity == 2 */ +			_1ps->tx = async_gen_syndrome(_1ps->pages, 0, +						sp2d->data_devs + sp2d->parity, +						PAGE_SIZE, &_1ps->submit);  	}  	for (p = 0; p < sp2d->pages_in_unit; p++) { @@ -404,9 +410,8 @@ static int _add_to_r4w_last_page(struct ore_io_state *ios, u64 *offset)  	ore_calc_stripe_info(ios->layout, *offset, 0, &si); -	p = si.unit_off / PAGE_SIZE; -	c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, -		       ios->layout->mirrors_p1, si.par_dev, si.dev); +	p = si.cur_pg; +	c = si.cur_comp;  	page = ios->sp2d->_1p_stripes[p].pages[c];  	pg_len = PAGE_SIZE - (si.unit_off % PAGE_SIZE); @@ -534,9 +539,8 @@ static int _read_4_write_last_stripe(struct ore_io_state *ios)  		goto read_it;  	ore_calc_stripe_info(ios->layout, offset, 0, &read_si); -	p = read_si.unit_off / PAGE_SIZE; -	c = _dev_order(ios->layout->group_width * ios->layout->mirrors_p1, -		       ios->layout->mirrors_p1, read_si.par_dev, read_si.dev); +	p = read_si.cur_pg; +	c = read_si.cur_comp;  	if (min_p == sp2d->pages_in_unit) {  		/* Didn't do it yet */ @@ -620,7 +624,7 @@ static int _read_4_write_execute(struct ore_io_state *ios)  int _ore_add_parity_unit(struct ore_io_state *ios,  			    struct ore_striping_info *si,  			    struct ore_per_dev_state *per_dev, -			    unsigned cur_len) +			    unsigned cur_len, bool do_xor)  {  	if (ios->reading) {  		if (per_dev->cur_sg >= ios->sgs_per_dev) { @@ -640,17 +644,16 @@ int _ore_add_parity_unit(struct ore_io_state *ios,  		si->cur_pg = _sp2d_min_pg(sp2d);  		num_pages  = _sp2d_max_pg(sp2d) + 1 - si->cur_pg; -		if (!cur_len) /* If last stripe operate on parity comp */ -			si->cur_comp = sp2d->data_devs; -  		if (!per_dev->length) {  			per_dev->offset += si->cur_pg * PAGE_SIZE;  			/* If first stripe, Read in all read4write pages  			 * (if needed) before we calculate the first parity.  			 */ -			_read_4_write_first_stripe(ios); +			if (do_xor) +				_read_4_write_first_stripe(ios);  		} -		if (!cur_len) /* If last stripe r4w pages of last stripe */ +		if (!cur_len && do_xor) +			/* If last stripe r4w pages of last stripe */  			_read_4_write_last_stripe(ios);  		_read_4_write_execute(ios); @@ -662,7 +665,7 @@ int _ore_add_parity_unit(struct ore_io_state *ios,  			++(ios->cur_par_page);  		} -		BUG_ON(si->cur_comp != sp2d->data_devs); +		BUG_ON(si->cur_comp < sp2d->data_devs);  		BUG_ON(si->cur_pg + num_pages > sp2d->pages_in_unit);  		ret = _ore_add_stripe_unit(ios,  &array_start, 0, pages, @@ -670,9 +673,10 @@ int _ore_add_parity_unit(struct ore_io_state *ios,  		if (unlikely(ret))  			return ret; -		/* TODO: raid6 if (last_parity_dev) */ -		_gen_xor_unit(sp2d); -		_sp2d_reset(sp2d, ios->r4w, ios->private); +		if (do_xor) { +			_gen_xor_unit(sp2d); +			_sp2d_reset(sp2d, ios->r4w, ios->private); +		}  	}  	return 0;  } diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h index 2ffd2c3c6e46..cf6375d82129 100644 --- a/fs/exofs/ore_raid.h +++ b/fs/exofs/ore_raid.h @@ -31,24 +31,6 @@  #define ORE_DBGMSG2(M...) do {} while (0)  /* #define ORE_DBGMSG2 ORE_DBGMSG */ -/* Calculate the component order in a stripe. eg the logical data unit - * address within the stripe of @dev given the @par_dev of this stripe. - */ -static inline unsigned _dev_order(unsigned devs_in_group, unsigned mirrors_p1, -				  unsigned par_dev, unsigned dev) -{ -	unsigned first_dev = dev - dev % devs_in_group; - -	dev -= first_dev; -	par_dev -= first_dev; - -	if (devs_in_group == par_dev) /* The raid 0 case */ -		return dev / mirrors_p1; -	/* raid4/5/6 case */ -	return ((devs_in_group + dev - par_dev - mirrors_p1) % devs_in_group) / -	       mirrors_p1; -} -  /* ios_raid.c stuff needed by ios.c */  int _ore_post_alloc_raid_stuff(struct ore_io_state *ios);  void _ore_free_raid_stuff(struct ore_io_state *ios); @@ -56,7 +38,8 @@ void _ore_free_raid_stuff(struct ore_io_state *ios);  void _ore_add_sg_seg(struct ore_per_dev_state *per_dev, unsigned cur_len,  		 bool not_last);  int _ore_add_parity_unit(struct ore_io_state *ios, struct ore_striping_info *si, -		     struct ore_per_dev_state *per_dev, unsigned cur_len); +		     struct ore_per_dev_state *per_dev, unsigned cur_len, +		     bool do_xor);  void _ore_add_stripe_page(struct __stripe_pages_2d *sp2d,  		       struct ore_striping_info *si, struct page *page);  static inline void _add_stripe_page(struct __stripe_pages_2d *sp2d,  | 
