diff options
Diffstat (limited to 'block/bio.c')
-rw-r--r-- | block/bio.c | 262 |
1 files changed, 125 insertions, 137 deletions
diff --git a/block/bio.c b/block/bio.c index 67eff5eddc49..b12966e415d3 100644 --- a/block/bio.c +++ b/block/bio.c @@ -28,9 +28,11 @@ #include <linux/mempool.h> #include <linux/workqueue.h> #include <linux/cgroup.h> +#include <linux/blk-cgroup.h> #include <trace/events/block.h> #include "blk.h" +#include "blk-rq-qos.h" /* * Test patch to inline a certain number of bi_io_vec's inside the bio @@ -156,7 +158,7 @@ out: unsigned int bvec_nr_vecs(unsigned short idx) { - return bvec_slabs[idx].nr_vecs; + return bvec_slabs[--idx].nr_vecs; } void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned int idx) @@ -645,83 +647,6 @@ struct bio *bio_clone_fast(struct bio *bio, gfp_t gfp_mask, struct bio_set *bs) EXPORT_SYMBOL(bio_clone_fast); /** - * bio_clone_bioset - clone a bio - * @bio_src: bio to clone - * @gfp_mask: allocation priority - * @bs: bio_set to allocate from - * - * Clone bio. Caller will own the returned bio, but not the actual data it - * points to. Reference count of returned bio will be one. - */ -struct bio *bio_clone_bioset(struct bio *bio_src, gfp_t gfp_mask, - struct bio_set *bs) -{ - struct bvec_iter iter; - struct bio_vec bv; - struct bio *bio; - - /* - * Pre immutable biovecs, __bio_clone() used to just do a memcpy from - * bio_src->bi_io_vec to bio->bi_io_vec. - * - * We can't do that anymore, because: - * - * - The point of cloning the biovec is to produce a bio with a biovec - * the caller can modify: bi_idx and bi_bvec_done should be 0. - * - * - The original bio could've had more than BIO_MAX_PAGES biovecs; if - * we tried to clone the whole thing bio_alloc_bioset() would fail. - * But the clone should succeed as long as the number of biovecs we - * actually need to allocate is fewer than BIO_MAX_PAGES. - * - * - Lastly, bi_vcnt should not be looked at or relied upon by code - * that does not own the bio - reason being drivers don't use it for - * iterating over the biovec anymore, so expecting it to be kept up - * to date (i.e. for clones that share the parent biovec) is just - * asking for trouble and would force extra work on - * __bio_clone_fast() anyways. - */ - - bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs); - if (!bio) - return NULL; - bio->bi_disk = bio_src->bi_disk; - bio->bi_opf = bio_src->bi_opf; - bio->bi_write_hint = bio_src->bi_write_hint; - bio->bi_iter.bi_sector = bio_src->bi_iter.bi_sector; - bio->bi_iter.bi_size = bio_src->bi_iter.bi_size; - - switch (bio_op(bio)) { - case REQ_OP_DISCARD: - case REQ_OP_SECURE_ERASE: - case REQ_OP_WRITE_ZEROES: - break; - case REQ_OP_WRITE_SAME: - bio->bi_io_vec[bio->bi_vcnt++] = bio_src->bi_io_vec[0]; - break; - default: - bio_for_each_segment(bv, bio_src, iter) - bio->bi_io_vec[bio->bi_vcnt++] = bv; - break; - } - - if (bio_integrity(bio_src)) { - int ret; - - ret = bio_integrity_clone(bio, bio_src, gfp_mask); - if (ret < 0) { - bio_put(bio); - return NULL; - } - } - - bio_clone_blkcg_association(bio, bio_src); - - return bio; -} -EXPORT_SYMBOL(bio_clone_bioset); - -/** * bio_add_pc_page - attempt to add page to bio * @q: the target queue * @bio: destination bio @@ -903,25 +828,27 @@ int bio_add_page(struct bio *bio, struct page *page, EXPORT_SYMBOL(bio_add_page); /** - * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio + * __bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio * @bio: bio to add pages to * @iter: iov iterator describing the region to be mapped * - * Pins as many pages from *iter and appends them to @bio's bvec array. The + * Pins pages from *iter and appends them to @bio's bvec array. The * pages will have to be released using put_page() when done. + * For multi-segment *iter, this function only adds pages from the + * the next non-empty segment of the iov iterator. */ -int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) +static int __bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) { - unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt; + unsigned short nr_pages = bio->bi_max_vecs - bio->bi_vcnt, idx; struct bio_vec *bv = bio->bi_io_vec + bio->bi_vcnt; struct page **pages = (struct page **)bv; - size_t offset, diff; + size_t offset; ssize_t size; size = iov_iter_get_pages(iter, pages, LONG_MAX, nr_pages, &offset); if (unlikely(size <= 0)) return size ? size : -EFAULT; - nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; + idx = nr_pages = (size + offset + PAGE_SIZE - 1) / PAGE_SIZE; /* * Deep magic below: We need to walk the pinned pages backwards @@ -934,21 +861,46 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) bio->bi_iter.bi_size += size; bio->bi_vcnt += nr_pages; - diff = (nr_pages * PAGE_SIZE - offset) - size; - while (nr_pages--) { - bv[nr_pages].bv_page = pages[nr_pages]; - bv[nr_pages].bv_len = PAGE_SIZE; - bv[nr_pages].bv_offset = 0; + while (idx--) { + bv[idx].bv_page = pages[idx]; + bv[idx].bv_len = PAGE_SIZE; + bv[idx].bv_offset = 0; } bv[0].bv_offset += offset; bv[0].bv_len -= offset; - if (diff) - bv[bio->bi_vcnt - 1].bv_len -= diff; + bv[nr_pages - 1].bv_len -= nr_pages * PAGE_SIZE - offset - size; iov_iter_advance(iter, size); return 0; } + +/** + * bio_iov_iter_get_pages - pin user or kernel pages and add them to a bio + * @bio: bio to add pages to + * @iter: iov iterator describing the region to be mapped + * + * Pins pages from *iter and appends them to @bio's bvec array. The + * pages will have to be released using put_page() when done. + * The function tries, but does not guarantee, to pin as many pages as + * fit into the bio, or are requested in *iter, whatever is smaller. + * If MM encounters an error pinning the requested pages, it stops. + * Error is returned only if 0 pages could be pinned. + */ +int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter) +{ + unsigned short orig_vcnt = bio->bi_vcnt; + + do { + int ret = __bio_iov_iter_get_pages(bio, iter); + + if (unlikely(ret)) + return bio->bi_vcnt > orig_vcnt ? 0 : ret; + + } while (iov_iter_count(iter) && !bio_full(bio)); + + return 0; +} EXPORT_SYMBOL_GPL(bio_iov_iter_get_pages); static void submit_bio_wait_endio(struct bio *bio) @@ -1634,10 +1586,8 @@ void bio_set_pages_dirty(struct bio *bio) int i; bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; - - if (page && !PageCompound(page)) - set_page_dirty_lock(page); + if (!PageCompound(bvec->bv_page)) + set_page_dirty_lock(bvec->bv_page); } } EXPORT_SYMBOL_GPL(bio_set_pages_dirty); @@ -1647,19 +1597,15 @@ static void bio_release_pages(struct bio *bio) struct bio_vec *bvec; int i; - bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; - - if (page) - put_page(page); - } + bio_for_each_segment_all(bvec, bio, i) + put_page(bvec->bv_page); } /* * bio_check_pages_dirty() will check that all the BIO's pages are still dirty. * If they are, then fine. If, however, some pages are clean then they must * have been written out during the direct-IO read. So we take another ref on - * the BIO and the offending pages and re-dirty the pages in process context. + * the BIO and re-dirty the pages in process context. * * It is expected that bio_check_pages_dirty() will wholly own the BIO from * here on. It will run one put_page() against each page and will run one @@ -1677,78 +1623,70 @@ static struct bio *bio_dirty_list; */ static void bio_dirty_fn(struct work_struct *work) { - unsigned long flags; - struct bio *bio; + struct bio *bio, *next; - spin_lock_irqsave(&bio_dirty_lock, flags); - bio = bio_dirty_list; + spin_lock_irq(&bio_dirty_lock); + next = bio_dirty_list; bio_dirty_list = NULL; - spin_unlock_irqrestore(&bio_dirty_lock, flags); + spin_unlock_irq(&bio_dirty_lock); - while (bio) { - struct bio *next = bio->bi_private; + while ((bio = next) != NULL) { + next = bio->bi_private; bio_set_pages_dirty(bio); bio_release_pages(bio); bio_put(bio); - bio = next; } } void bio_check_pages_dirty(struct bio *bio) { struct bio_vec *bvec; - int nr_clean_pages = 0; + unsigned long flags; int i; bio_for_each_segment_all(bvec, bio, i) { - struct page *page = bvec->bv_page; - - if (PageDirty(page) || PageCompound(page)) { - put_page(page); - bvec->bv_page = NULL; - } else { - nr_clean_pages++; - } + if (!PageDirty(bvec->bv_page) && !PageCompound(bvec->bv_page)) + goto defer; } - if (nr_clean_pages) { - unsigned long flags; - - spin_lock_irqsave(&bio_dirty_lock, flags); - bio->bi_private = bio_dirty_list; - bio_dirty_list = bio; - spin_unlock_irqrestore(&bio_dirty_lock, flags); - schedule_work(&bio_dirty_work); - } else { - bio_put(bio); - } + bio_release_pages(bio); + bio_put(bio); + return; +defer: + spin_lock_irqsave(&bio_dirty_lock, flags); + bio->bi_private = bio_dirty_list; + bio_dirty_list = bio; + spin_unlock_irqrestore(&bio_dirty_lock, flags); + schedule_work(&bio_dirty_work); } EXPORT_SYMBOL_GPL(bio_check_pages_dirty); -void generic_start_io_acct(struct request_queue *q, int rw, +void generic_start_io_acct(struct request_queue *q, int op, unsigned long sectors, struct hd_struct *part) { + const int sgrp = op_stat_group(op); int cpu = part_stat_lock(); part_round_stats(q, cpu, part); - part_stat_inc(cpu, part, ios[rw]); - part_stat_add(cpu, part, sectors[rw], sectors); - part_inc_in_flight(q, part, rw); + part_stat_inc(cpu, part, ios[sgrp]); + part_stat_add(cpu, part, sectors[sgrp], sectors); + part_inc_in_flight(q, part, op_is_write(op)); part_stat_unlock(); } EXPORT_SYMBOL(generic_start_io_acct); -void generic_end_io_acct(struct request_queue *q, int rw, +void generic_end_io_acct(struct request_queue *q, int req_op, struct hd_struct *part, unsigned long start_time) { unsigned long duration = jiffies - start_time; + const int sgrp = op_stat_group(req_op); int cpu = part_stat_lock(); - part_stat_add(cpu, part, ticks[rw], duration); + part_stat_add(cpu, part, ticks[sgrp], duration); part_round_stats(q, cpu, part); - part_dec_in_flight(q, part, rw); + part_dec_in_flight(q, part, op_is_write(req_op)); part_stat_unlock(); } @@ -1807,6 +1745,9 @@ again: if (!bio_integrity_endio(bio)) return; + if (bio->bi_disk) + rq_qos_done_bio(bio->bi_disk->queue, bio); + /* * Need to have a real endio function for chained bios, otherwise * various corner cases will break (like stacking block devices that @@ -1866,6 +1807,7 @@ struct bio *bio_split(struct bio *bio, int sectors, bio_integrity_trim(split); bio_advance(bio, split->bi_iter.bi_size); + bio->bi_iter.bi_done = 0; if (bio_flagged(bio, BIO_TRACE_COMPLETION)) bio_set_flag(split, BIO_TRACE_COMPLETION); @@ -2014,6 +1956,30 @@ EXPORT_SYMBOL(bioset_init_from_src); #ifdef CONFIG_BLK_CGROUP +#ifdef CONFIG_MEMCG +/** + * bio_associate_blkcg_from_page - associate a bio with the page's blkcg + * @bio: target bio + * @page: the page to lookup the blkcg from + * + * Associate @bio with the blkcg from @page's owning memcg. This works like + * every other associate function wrt references. + */ +int bio_associate_blkcg_from_page(struct bio *bio, struct page *page) +{ + struct cgroup_subsys_state *blkcg_css; + + if (unlikely(bio->bi_css)) + return -EBUSY; + if (!page->mem_cgroup) + return 0; + blkcg_css = cgroup_get_e_css(page->mem_cgroup->css.cgroup, + &io_cgrp_subsys); + bio->bi_css = blkcg_css; + return 0; +} +#endif /* CONFIG_MEMCG */ + /** * bio_associate_blkcg - associate a bio with the specified blkcg * @bio: target bio @@ -2037,6 +2003,24 @@ int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) EXPORT_SYMBOL_GPL(bio_associate_blkcg); /** + * bio_associate_blkg - associate a bio with the specified blkg + * @bio: target bio + * @blkg: the blkg to associate + * + * Associate @bio with the blkg specified by @blkg. This is the queue specific + * blkcg information associated with the @bio, a reference will be taken on the + * @blkg and will be freed when the bio is freed. + */ +int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg) +{ + if (unlikely(bio->bi_blkg)) + return -EBUSY; + blkg_get(blkg); + bio->bi_blkg = blkg; + return 0; +} + +/** * bio_disassociate_task - undo bio_associate_current() * @bio: target bio */ @@ -2050,6 +2034,10 @@ void bio_disassociate_task(struct bio *bio) css_put(bio->bi_css); bio->bi_css = NULL; } + if (bio->bi_blkg) { + blkg_put(bio->bi_blkg); + bio->bi_blkg = NULL; + } } /** |