From b41c9b0266e8370033a7799f6806bfc70b7fd75f Mon Sep 17 00:00:00 2001 From: Eric Wheeler Date: Fri, 13 Oct 2017 16:35:33 -0700 Subject: bcache: update bio->bi_opf bypass/writeback REQ_ flag hints Flag for bypass if the IO is for read-ahead or background, unless the read-ahead request is for metadata (eg, from gfs2). Bypass if: bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && !(bio->bi_opf & REQ_META)) Writeback if: op_is_sync(bio->bi_opf) || bio->bi_opf & (REQ_META|REQ_PRIO) Signed-off-by: Eric Wheeler Reviewed-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/md/bcache/request.c') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 681b4f12b05a..9ee137e8d387 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -384,6 +384,14 @@ static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) op_is_write(bio_op(bio)))) goto skip; + /* + * Flag for bypass if the IO is for read-ahead or background, + * unless the read-ahead request is for metadata (eg, for gfs2). + */ + if (bio->bi_opf & (REQ_RAHEAD|REQ_BACKGROUND) && + !(bio->bi_opf & REQ_META)) + goto skip; + if (bio->bi_iter.bi_sector & (c->sb.block_size - 1) || bio_sectors(bio) & (c->sb.block_size - 1)) { pr_debug("skipping unaligned io"); -- cgit v1.2.3-70-g09d2 From 238501027abf0386fa5f5dcaf589f406eb187bc3 Mon Sep 17 00:00:00 2001 From: Yijing Wang Date: Fri, 13 Oct 2017 16:35:34 -0700 Subject: bcache: remove unused parameter Parameter bio is no longer used, clean it. Signed-off-by: Yijing Wang Reviewed-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/md/bcache/request.c') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 9ee137e8d387..163a17a80874 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -26,12 +26,12 @@ struct kmem_cache *bch_search_cache; static void bch_data_insert_start(struct closure *); -static unsigned cache_mode(struct cached_dev *dc, struct bio *bio) +static unsigned cache_mode(struct cached_dev *dc) { return BDEV_CACHE_MODE(&dc->sb); } -static bool verify(struct cached_dev *dc, struct bio *bio) +static bool verify(struct cached_dev *dc) { return dc->verify; } @@ -369,7 +369,7 @@ static struct hlist_head *iohash(struct cached_dev *dc, uint64_t k) static bool check_should_bypass(struct cached_dev *dc, struct bio *bio) { struct cache_set *c = dc->disk.c; - unsigned mode = cache_mode(dc, bio); + unsigned mode = cache_mode(dc); unsigned sectors, congested = bch_get_congested(c); struct task_struct *task = current; struct io *i; @@ -747,7 +747,7 @@ static void cached_dev_read_done(struct closure *cl) s->cache_miss = NULL; } - if (verify(dc, &s->bio.bio) && s->recoverable && !s->read_dirty_data) + if (verify(dc) && s->recoverable && !s->read_dirty_data) bch_data_verify(dc, s->orig_bio); bio_complete(s); @@ -772,7 +772,7 @@ static void cached_dev_read_done_bh(struct closure *cl) if (s->iop.status) continue_at_nobarrier(cl, cached_dev_read_error, bcache_wq); - else if (s->iop.bio || verify(dc, &s->bio.bio)) + else if (s->iop.bio || verify(dc)) continue_at_nobarrier(cl, cached_dev_read_done, bcache_wq); else continue_at_nobarrier(cl, cached_dev_bio_complete, NULL); @@ -899,7 +899,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) s->iop.bypass = true; if (should_writeback(dc, s->orig_bio, - cache_mode(dc, bio), + cache_mode(dc), s->iop.bypass)) { s->iop.bypass = false; s->iop.writeback = true; -- cgit v1.2.3-70-g09d2 From d59b23795933678c9638fd20c942d2b4f3cd6185 Mon Sep 17 00:00:00 2001 From: Coly Li Date: Mon, 30 Oct 2017 14:46:31 -0700 Subject: bcache: only permit to recovery read error when cache device is clean When bcache does read I/Os, for example in writeback or writethrough mode, if a read request on cache device is failed, bcache will try to recovery the request by reading from cached device. If the data on cached device is not synced with cache device, then requester will get a stale data. For critical storage system like database, providing stale data from recovery may result an application level data corruption, which is unacceptible. With this patch, for a failed read request in writeback or writethrough mode, recovery a recoverable read request only happens when cache device is clean. That is to say, all data on cached device is up to update. For other cache modes in bcache, read request will never hit cached_dev_read_error(), they don't need this patch. Please note, because cache mode can be switched arbitrarily in run time, a writethrough mode might be switched from a writeback mode. Therefore checking dc->has_data in writethrough mode still makes sense. Changelog: V4: Fix parens error pointed by Michael Lyle. v3: By response from Kent Oversteet, he thinks recovering stale data is a bug to fix, and option to permit it is unnecessary. So this version the sysfs file is removed. v2: rename sysfs entry from allow_stale_data_on_failure to allow_stale_data_on_failure, and fix the confusing commit log. v1: initial patch posted. [small change to patch comment spelling by mlyle] Signed-off-by: Coly Li Signed-off-by: Michael Lyle Reported-by: Arne Wolf Reviewed-by: Michael Lyle Cc: Kent Overstreet Cc: Nix Cc: Kai Krakow Cc: Eric Wheeler Cc: Junhui Tang Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'drivers/md/bcache/request.c') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 163a17a80874..886e4b6643f1 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -705,8 +705,16 @@ static void cached_dev_read_error(struct closure *cl) { struct search *s = container_of(cl, struct search, cl); struct bio *bio = &s->bio.bio; + struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); - if (s->recoverable) { + /* + * If cache device is dirty (dc->has_dirty is non-zero), then + * recovery a failed read request from cached device may get a + * stale data back. So read failure recovery is only permitted + * when cache device is clean. + */ + if (s->recoverable && + (dc && !atomic_read(&dc->has_dirty))) { /* Retry from the backing device: */ trace_bcache_read_retry(s->orig_bio); -- cgit v1.2.3-70-g09d2 From c157313791a999646901b3e3c6888514ebc36d62 Mon Sep 17 00:00:00 2001 From: "tang.junhui" Date: Mon, 30 Oct 2017 14:46:34 -0700 Subject: bcache: fix wrong cache_misses statistics Currently, Cache missed IOs are identified by s->cache_miss, but actually, there are many situations that missed IOs are not assigned a value for s->cache_miss in cached_dev_cache_miss(), for example, a bypassed IO (s->iop.bypass = 1), or the cache_bio allocate failed. In these situations, it will go to out_put or out_submit, and s->cache_miss is null, which leads bch_mark_cache_accounting() to treat this IO as a hit IO. [ML: applied by 3-way merge] Signed-off-by: tang.junhui Reviewed-by: Michael Lyle Reviewed-by: Coly Li Signed-off-by: Jens Axboe --- drivers/md/bcache/request.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/md/bcache/request.c') diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 886e4b6643f1..597dd1e87bea 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -470,6 +470,7 @@ struct search { unsigned recoverable:1; unsigned write:1; unsigned read_dirty_data:1; + unsigned cache_missed:1; unsigned long start_time; @@ -656,6 +657,7 @@ static inline struct search *search_alloc(struct bio *bio, s->orig_bio = bio; s->cache_miss = NULL; + s->cache_missed = 0; s->d = d; s->recoverable = 1; s->write = op_is_write(bio_op(bio)); @@ -775,7 +777,7 @@ static void cached_dev_read_done_bh(struct closure *cl) struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); bch_mark_cache_accounting(s->iop.c, s->d, - !s->cache_miss, s->iop.bypass); + !s->cache_missed, s->iop.bypass); trace_bcache_read(s->orig_bio, !s->cache_miss, s->iop.bypass); if (s->iop.status) @@ -794,6 +796,8 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, struct cached_dev *dc = container_of(s->d, struct cached_dev, disk); struct bio *miss, *cache_bio; + s->cache_missed = 1; + if (s->cache_miss || s->iop.bypass) { miss = bio_next_split(bio, sectors, GFP_NOIO, s->d->bio_split); ret = miss == bio ? MAP_DONE : MAP_CONTINUE; -- cgit v1.2.3-70-g09d2