diff options
| -rw-r--r-- | drivers/staging/lustre/lustre/llite/rw26.c | 92 | ||||
| -rw-r--r-- | fs/nfs/direct.c | 288 | ||||
| -rw-r--r-- | include/linux/uio.h | 2 | ||||
| -rw-r--r-- | mm/iov_iter.c | 40 | 
4 files changed, 166 insertions, 256 deletions
| diff --git a/drivers/staging/lustre/lustre/llite/rw26.c b/drivers/staging/lustre/lustre/llite/rw26.c index f718585c9e08..6b5994577b6b 100644 --- a/drivers/staging/lustre/lustre/llite/rw26.c +++ b/drivers/staging/lustre/lustre/llite/rw26.c @@ -218,14 +218,11 @@ static void ll_free_user_pages(struct page **pages, int npages, int do_dirty)  	int i;  	for (i = 0; i < npages; i++) { -		if (pages[i] == NULL) -			break;  		if (do_dirty)  			set_page_dirty_lock(pages[i]);  		page_cache_release(pages[i]);  	} - -	OBD_FREE_LARGE(pages, npages * sizeof(*pages)); +	kvfree(pages);  }  ssize_t ll_direct_rw_pages(const struct lu_env *env, struct cl_io *io, @@ -370,10 +367,9 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,  	struct file *file = iocb->ki_filp;  	struct inode *inode = file->f_mapping->host;  	struct ccc_object *obj = cl_inode2ccc(inode); -	long count = iov_iter_count(iter); -	long tot_bytes = 0, result = 0; +	ssize_t count = iov_iter_count(iter); +	ssize_t tot_bytes = 0, result = 0;  	struct ll_inode_info *lli = ll_i2info(inode); -	unsigned long seg = 0;  	long size = MAX_DIO_SIZE;  	int refcheck; @@ -407,63 +403,49 @@ static ssize_t ll_direct_IO_26(int rw, struct kiocb *iocb,  		mutex_lock(&inode->i_mutex);  	LASSERT(obj->cob_transient_pages == 0); -	for (seg = 0; seg < iter->nr_segs; seg++) { -		long iov_left = iter->iov[seg].iov_len; -		unsigned long user_addr = (unsigned long)iter->iov[seg].iov_base; +	while (iov_iter_count(iter)) { +		struct page **pages; +		size_t offs; +		count = min_t(size_t, iov_iter_count(iter), size);  		if (rw == READ) {  			if (file_offset >= i_size_read(inode))  				break; -			if (file_offset + iov_left > i_size_read(inode)) -				iov_left = i_size_read(inode) - file_offset; +			if (file_offset + count > i_size_read(inode)) +				count = i_size_read(inode) - file_offset;  		} -		while (iov_left > 0) { -			struct page **pages; -			int page_count, max_pages = 0; -			long bytes; - -			bytes = min(size, iov_left); -			page_count = ll_get_user_pages(rw, user_addr, bytes, -						       &pages, &max_pages); -			if (likely(page_count > 0)) { -				if (unlikely(page_count <  max_pages)) -					bytes = page_count << PAGE_CACHE_SHIFT; -				result = ll_direct_IO_26_seg(env, io, rw, inode, -							     file->f_mapping, -							     bytes, file_offset, -							     pages, page_count); -				ll_free_user_pages(pages, max_pages, rw==READ); -			} else if (page_count == 0) { -				GOTO(out, result = -EFAULT); -			} else { -				result = page_count; -			} -			if (unlikely(result <= 0)) { -				/* If we can't allocate a large enough buffer -				 * for the request, shrink it to a smaller -				 * PAGE_SIZE multiple and try again. -				 * We should always be able to kmalloc for a -				 * page worth of page pointers = 4MB on i386. */ -				if (result == -ENOMEM && -				    size > (PAGE_CACHE_SIZE / sizeof(*pages)) * -					   PAGE_CACHE_SIZE) { -					size = ((((size / 2) - 1) | -						 ~CFS_PAGE_MASK) + 1) & -						CFS_PAGE_MASK; -					CDEBUG(D_VFSTRACE,"DIO size now %lu\n", -					       size); -					continue; -				} - -				GOTO(out, result); +		result = iov_iter_get_pages_alloc(iter, &pages, count, &offs); +		if (likely(result > 0)) { +			int n = (result + offs + PAGE_SIZE - 1) / PAGE_SIZE; +			result = ll_direct_IO_26_seg(env, io, rw, inode, +						     file->f_mapping, +						     result, file_offset, +						     pages, n); +			ll_free_user_pages(pages, n, rw==READ); +		} +		if (unlikely(result <= 0)) { +			/* If we can't allocate a large enough buffer +			 * for the request, shrink it to a smaller +			 * PAGE_SIZE multiple and try again. +			 * We should always be able to kmalloc for a +			 * page worth of page pointers = 4MB on i386. */ +			if (result == -ENOMEM && +			    size > (PAGE_CACHE_SIZE / sizeof(*pages)) * +				   PAGE_CACHE_SIZE) { +				size = ((((size / 2) - 1) | +					 ~CFS_PAGE_MASK) + 1) & +					CFS_PAGE_MASK; +				CDEBUG(D_VFSTRACE,"DIO size now %lu\n", +				       size); +				continue;  			} -			tot_bytes += result; -			file_offset += result; -			iov_left -= result; -			user_addr += result; +			GOTO(out, result);  		} +		iov_iter_advance(iter, result); +		tot_bytes += result; +		file_offset += result;  	}  out:  	LASSERT(obj->cob_transient_pages == 0); diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 1d34f454989e..b122fe21fea0 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -322,60 +322,37 @@ static const struct nfs_pgio_completion_ops nfs_direct_read_completion_ops = {   * handled automatically by nfs_direct_read_result().  Otherwise, if   * no requests have been sent, just return an error.   */ -static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *desc, -						const struct iovec *iov, -						loff_t pos, bool uio) + +static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, +					      struct iov_iter *iter, +					      loff_t pos)  { -	struct nfs_direct_req *dreq = desc->pg_dreq; -	struct nfs_open_context *ctx = dreq->ctx; -	struct inode *inode = ctx->dentry->d_inode; -	unsigned long user_addr = (unsigned long)iov->iov_base; -	size_t count = iov->iov_len; -	size_t rsize = NFS_SERVER(inode)->rsize; -	unsigned int pgbase; -	int result; -	ssize_t started = 0; -	struct page **pagevec = NULL; -	unsigned int npages; +	struct nfs_pageio_descriptor desc; +	struct inode *inode = dreq->inode; +	ssize_t result = -EINVAL; +	size_t requested_bytes = 0; +	size_t rsize = max_t(size_t, NFS_SERVER(inode)->rsize, PAGE_SIZE); -	do { -		size_t bytes; -		int i; +	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, +			     &nfs_direct_read_completion_ops); +	get_dreq(dreq); +	desc.pg_dreq = dreq; +	atomic_inc(&inode->i_dio_count); -		pgbase = user_addr & ~PAGE_MASK; -		bytes = min(max_t(size_t, rsize, PAGE_SIZE), count); +	while (iov_iter_count(iter)) { +		struct page **pagevec; +		size_t bytes; +		size_t pgbase; +		unsigned npages, i; -		result = -ENOMEM; -		npages = nfs_page_array_len(pgbase, bytes); -		if (!pagevec) -			pagevec = kmalloc(npages * sizeof(struct page *), -					  GFP_KERNEL); -		if (!pagevec) +		result = iov_iter_get_pages_alloc(iter, &pagevec,  +						  rsize, &pgbase); +		if (result < 0)  			break; -		if (uio) { -			down_read(¤t->mm->mmap_sem); -			result = get_user_pages(current, current->mm, user_addr, -					npages, 1, 0, pagevec, NULL); -			up_read(¤t->mm->mmap_sem); -			if (result < 0) -				break; -		} else { -			WARN_ON(npages != 1); -			result = get_kernel_page(user_addr, 1, pagevec); -			if (WARN_ON(result != 1)) -				break; -		} - -		if ((unsigned)result < npages) { -			bytes = result * PAGE_SIZE; -			if (bytes <= pgbase) { -				nfs_direct_release_pages(pagevec, result); -				break; -			} -			bytes -= pgbase; -			npages = result; -		} - +	 +		bytes = result; +		iov_iter_advance(iter, bytes); +		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE;  		for (i = 0; i < npages; i++) {  			struct nfs_page *req;  			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); @@ -389,55 +366,21 @@ static ssize_t nfs_direct_read_schedule_segment(struct nfs_pageio_descriptor *de  			}  			req->wb_index = pos >> PAGE_SHIFT;  			req->wb_offset = pos & ~PAGE_MASK; -			if (!nfs_pageio_add_request(desc, req)) { -				result = desc->pg_error; +			if (!nfs_pageio_add_request(&desc, req)) { +				result = desc.pg_error;  				nfs_release_request(req);  				break;  			}  			pgbase = 0;  			bytes -= req_len; -			started += req_len; -			user_addr += req_len; +			requested_bytes += req_len;  			pos += req_len; -			count -= req_len;  			dreq->bytes_left -= req_len;  		} -		/* The nfs_page now hold references to these pages */  		nfs_direct_release_pages(pagevec, npages); -	} while (count != 0 && result >= 0); - -	kfree(pagevec); - -	if (started) -		return started; -	return result < 0 ? (ssize_t) result : -EFAULT; -} - -static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq, -					      struct iov_iter *iter, -					      loff_t pos, bool uio) -{ -	struct nfs_pageio_descriptor desc; -	struct inode *inode = dreq->inode; -	ssize_t result = -EINVAL; -	size_t requested_bytes = 0; -	unsigned long seg; - -	NFS_PROTO(dreq->inode)->read_pageio_init(&desc, dreq->inode, -			     &nfs_direct_read_completion_ops); -	get_dreq(dreq); -	desc.pg_dreq = dreq; -	atomic_inc(&inode->i_dio_count); - -	for (seg = 0; seg < iter->nr_segs; seg++) { -		const struct iovec *vec = &iter->iov[seg]; -		result = nfs_direct_read_schedule_segment(&desc, vec, pos, uio); +		kvfree(pagevec);  		if (result < 0)  			break; -		requested_bytes += result; -		if ((size_t)result < vec->iov_len) -			break; -		pos += vec->iov_len;  	}  	nfs_pageio_complete(&desc); @@ -521,7 +464,7 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,  		dreq->iocb = iocb;  	NFS_I(inode)->read_io += count; -	result = nfs_direct_read_schedule_iovec(dreq, iter, pos, uio); +	result = nfs_direct_read_schedule_iovec(dreq, iter, pos);  	mutex_unlock(&inode->i_mutex); @@ -677,109 +620,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode  }  #endif -/* - * NB: Return the value of the first error return code.  Subsequent - *     errors after the first one are ignored. - */ -/* - * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE - * operation.  If nfs_writedata_alloc() or get_user_pages() fails, - * bail and stop sending more writes.  Write length accounting is - * handled automatically by nfs_direct_write_result().  Otherwise, if - * no requests have been sent, just return an error. - */ -static ssize_t nfs_direct_write_schedule_segment(struct nfs_pageio_descriptor *desc, -						 const struct iovec *iov, -						 loff_t pos, bool uio) -{ -	struct nfs_direct_req *dreq = desc->pg_dreq; -	struct nfs_open_context *ctx = dreq->ctx; -	struct inode *inode = ctx->dentry->d_inode; -	unsigned long user_addr = (unsigned long)iov->iov_base; -	size_t count = iov->iov_len; -	size_t wsize = NFS_SERVER(inode)->wsize; -	unsigned int pgbase; -	int result; -	ssize_t started = 0; -	struct page **pagevec = NULL; -	unsigned int npages; - -	do { -		size_t bytes; -		int i; - -		pgbase = user_addr & ~PAGE_MASK; -		bytes = min(max_t(size_t, wsize, PAGE_SIZE), count); - -		result = -ENOMEM; -		npages = nfs_page_array_len(pgbase, bytes); -		if (!pagevec) -			pagevec = kmalloc(npages * sizeof(struct page *), GFP_KERNEL); -		if (!pagevec) -			break; - -		if (uio) { -			down_read(¤t->mm->mmap_sem); -			result = get_user_pages(current, current->mm, user_addr, -						npages, 0, 0, pagevec, NULL); -			up_read(¤t->mm->mmap_sem); -			if (result < 0) -				break; -		} else { -			WARN_ON(npages != 1); -			result = get_kernel_page(user_addr, 0, pagevec); -			if (WARN_ON(result != 1)) -				break; -		} - -		if ((unsigned)result < npages) { -			bytes = result * PAGE_SIZE; -			if (bytes <= pgbase) { -				nfs_direct_release_pages(pagevec, result); -				break; -			} -			bytes -= pgbase; -			npages = result; -		} - -		for (i = 0; i < npages; i++) { -			struct nfs_page *req; -			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); - -			req = nfs_create_request(dreq->ctx, dreq->inode, -						 pagevec[i], -						 pgbase, req_len); -			if (IS_ERR(req)) { -				result = PTR_ERR(req); -				break; -			} -			nfs_lock_request(req); -			req->wb_index = pos >> PAGE_SHIFT; -			req->wb_offset = pos & ~PAGE_MASK; -			if (!nfs_pageio_add_request(desc, req)) { -				result = desc->pg_error; -				nfs_unlock_and_release_request(req); -				break; -			} -			pgbase = 0; -			bytes -= req_len; -			started += req_len; -			user_addr += req_len; -			pos += req_len; -			count -= req_len; -			dreq->bytes_left -= req_len; -		} -		/* The nfs_page now hold references to these pages */ -		nfs_direct_release_pages(pagevec, npages); -	} while (count != 0 && result >= 0); - -	kfree(pagevec); - -	if (started) -		return started; -	return result < 0 ? (ssize_t) result : -EFAULT; -} -  static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)  {  	struct nfs_direct_req *dreq = hdr->dreq; @@ -859,15 +699,27 @@ static const struct nfs_pgio_completion_ops nfs_direct_write_completion_ops = {  	.completion = nfs_direct_write_completion,  }; + +/* + * NB: Return the value of the first error return code.  Subsequent + *     errors after the first one are ignored. + */ +/* + * For each wsize'd chunk of the user's buffer, dispatch an NFS WRITE + * operation.  If nfs_writedata_alloc() or get_user_pages() fails, + * bail and stop sending more writes.  Write length accounting is + * handled automatically by nfs_direct_write_result().  Otherwise, if + * no requests have been sent, just return an error. + */  static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,  					       struct iov_iter *iter, -					       loff_t pos, bool uio) +					       loff_t pos)  {  	struct nfs_pageio_descriptor desc;  	struct inode *inode = dreq->inode;  	ssize_t result = 0;  	size_t requested_bytes = 0; -	unsigned long seg; +	size_t wsize = max_t(size_t, NFS_SERVER(inode)->wsize, PAGE_SIZE);  	NFS_PROTO(inode)->write_pageio_init(&desc, inode, FLUSH_COND_STABLE,  			      &nfs_direct_write_completion_ops); @@ -875,16 +727,50 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,  	get_dreq(dreq);  	atomic_inc(&inode->i_dio_count); -	NFS_I(dreq->inode)->write_io += iov_iter_count(iter); -	for (seg = 0; seg < iter->nr_segs; seg++) { -		const struct iovec *vec = &iter->iov[seg]; -		result = nfs_direct_write_schedule_segment(&desc, vec, pos, uio); +	NFS_I(inode)->write_io += iov_iter_count(iter); +	while (iov_iter_count(iter)) { +		struct page **pagevec; +		size_t bytes; +		size_t pgbase; +		unsigned npages, i; + +		result = iov_iter_get_pages_alloc(iter, &pagevec,  +						  wsize, &pgbase);  		if (result < 0)  			break; -		requested_bytes += result; -		if ((size_t)result < vec->iov_len) + +		bytes = result; +		iov_iter_advance(iter, bytes); +		npages = (result + pgbase + PAGE_SIZE - 1) / PAGE_SIZE; +		for (i = 0; i < npages; i++) { +			struct nfs_page *req; +			unsigned int req_len = min_t(size_t, bytes, PAGE_SIZE - pgbase); + +			req = nfs_create_request(dreq->ctx, inode, +						 pagevec[i], +						 pgbase, req_len); +			if (IS_ERR(req)) { +				result = PTR_ERR(req); +				break; +			} +			nfs_lock_request(req); +			req->wb_index = pos >> PAGE_SHIFT; +			req->wb_offset = pos & ~PAGE_MASK; +			if (!nfs_pageio_add_request(&desc, req)) { +				result = desc.pg_error; +				nfs_unlock_and_release_request(req); +				break; +			} +			pgbase = 0; +			bytes -= req_len; +			requested_bytes += req_len; +			pos += req_len; +			dreq->bytes_left -= req_len; +		} +		nfs_direct_release_pages(pagevec, npages); +		kvfree(pagevec); +		if (result < 0)  			break; -		pos += vec->iov_len;  	}  	nfs_pageio_complete(&desc); @@ -985,7 +871,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,  	if (!is_sync_kiocb(iocb))  		dreq->iocb = iocb; -	result = nfs_direct_write_schedule_iovec(dreq, iter, pos, uio); +	result = nfs_direct_write_schedule_iovec(dreq, iter, pos);  	if (mapping->nrpages) {  		invalidate_inode_pages2_range(mapping, diff --git a/include/linux/uio.h b/include/linux/uio.h index 2f8825b06680..4876e9f2a58f 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -73,6 +73,8 @@ void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,  			unsigned long nr_segs, size_t count);  ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,  			size_t maxsize, size_t *start); +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, +			size_t maxsize, size_t *start);  int iov_iter_npages(const struct iov_iter *i, int maxpages);  static inline size_t iov_iter_count(struct iov_iter *i) diff --git a/mm/iov_iter.c b/mm/iov_iter.c index 0b677f8f9bad..a5c691c1a283 100644 --- a/mm/iov_iter.c +++ b/mm/iov_iter.c @@ -1,6 +1,8 @@  #include <linux/export.h>  #include <linux/uio.h>  #include <linux/pagemap.h> +#include <linux/slab.h> +#include <linux/vmalloc.h>  size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,  			 struct iov_iter *i) @@ -263,6 +265,44 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,  }  EXPORT_SYMBOL(iov_iter_get_pages); +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, +		   struct page ***pages, size_t maxsize, +		   size_t *start) +{ +	size_t offset = i->iov_offset; +	const struct iovec *iov = i->iov; +	size_t len; +	unsigned long addr; +	void *p; +	int n; +	int res; + +	len = iov->iov_len - offset; +	if (len > i->count) +		len = i->count; +	if (len > maxsize) +		len = maxsize; +	addr = (unsigned long)iov->iov_base + offset; +	len += *start = addr & (PAGE_SIZE - 1); +	addr &= ~(PAGE_SIZE - 1); +	n = (len + PAGE_SIZE - 1) / PAGE_SIZE; +	 +	p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); +	if (!p) +		p = vmalloc(n * sizeof(struct page *)); +	if (!p) +		return -ENOMEM; + +	res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); +	if (unlikely(res < 0)) { +		kvfree(p); +		return res; +	} +	*pages = p; +	return (res == n ? len : res * PAGE_SIZE) - *start; +} +EXPORT_SYMBOL(iov_iter_get_pages_alloc); +  int iov_iter_npages(const struct iov_iter *i, int maxpages)  {  	size_t offset = i->iov_offset; | 
