From 91af8300d9c1d7c6b6a2fd754109e08d4798b8d8 Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 13 Oct 2017 16:35:29 -0700
Subject: bcache: check ca->alloc_thread initialized before wake up it

In bcache code, sysfs entries are created before all resources get
allocated, e.g. allocation thread of a cache set.

There is posibility for NULL pointer deference if a resource is accessed
but which is not initialized yet. Indeed Jorg Bornschein catches one on
cache set allocation thread and gets a kernel oops.

The reason for this bug is, when bch_bucket_alloc() is called during
cache set registration and attaching, ca->alloc_thread is not properly
allocated and initialized yet, call wake_up_process() on ca->alloc_thread
triggers NULL pointer deference failure. A simple and fast fix is, before
waking up ca->alloc_thread, checking whether it is allocated, and only
wake up ca->alloc_thread when it is not NULL.

Signed-off-by: Coly Li <colyli@suse.de>
Reported-by: Jorg Bornschein <jb@capsec.org>
Cc: Kent Overstreet <kent.overstreet@gmail.com>
Cc: stable@vger.kernel.org
Reviewed-by: Michael Lyle <mlyle@lyle.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/alloc.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'drivers/md/bcache/alloc.c')

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index cacbe2dbd5c3..e1a4205caa2e 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -406,7 +406,8 @@ long bch_bucket_alloc(struct cache *ca, unsigned reserve, bool wait)
 
 	finish_wait(&ca->set->bucket_wait, &w);
 out:
-	wake_up_process(ca->alloc_thread);
+	if (ca->alloc_thread)
+		wake_up_process(ca->alloc_thread);
 
 	trace_bcache_alloc(ca, reserve);
 
-- 
cgit v1.2.3-70-g09d2


From b1e8139e48b58e3bc1234e619c750ffd1394be2f Mon Sep 17 00:00:00 2001
From: Coly Li <colyli@suse.de>
Date: Fri, 13 Oct 2017 16:35:30 -0700
Subject: bcache: fix a comments typo in bch_alloc_sectors()

Code comments in alloc.c:bch_alloc_sectors() mentions a function
name find_data_bucket(), the correct function name should be
pick_data_bucket() indeed. bch_alloc_sectors() is a quite important
function in bcache allocation code, fixing the typo may help
other people to have less confusion.

Signed-off-by: Coly Li <colyli@suse.de>
Reviewed-by: Tang Junhui <tang.junhui@zte.com.cn>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/alloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'drivers/md/bcache/alloc.c')

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index e1a4205caa2e..4c40870e99f5 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -601,7 +601,7 @@ bool bch_alloc_sectors(struct cache_set *c, struct bkey *k, unsigned sectors,
 
 	/*
 	 * If we had to allocate, we might race and not need to allocate the
-	 * second time we call find_data_bucket(). If we allocated a bucket but
+	 * second time we call pick_data_bucket(). If we allocated a bucket but
 	 * didn't use it, drop the refcount bch_bucket_alloc_set() took:
 	 */
 	if (KEY_PTRS(&alloc.key))
-- 
cgit v1.2.3-70-g09d2


From d44c2f9e7cc0041f0cd88df1fe7a1fceb713ab14 Mon Sep 17 00:00:00 2001
From: Tang Junhui <tang.junhui@zte.com.cn>
Date: Mon, 30 Oct 2017 14:46:33 -0700
Subject: bcache: update bucket_in_use in real time

bucket_in_use is updated in gc thread which triggered by invalidating or
writing sectors_to_gc dirty data, It's a long interval. Therefore, when we
use it to compare with the threshold, it is often not timely, which leads
to inaccurate judgment and often results in bucket depletion.

We have send a patch before, by the means of updating bucket_in_use
periodically In gc thread, which Coly thought that would lead high
latency, In this patch, we add avail_nbuckets to record the count of
available buckets, and we calculate bucket_in_use when alloc or free
bucket in real time.

[edited by ML: eliminated some whitespace errors]

Signed-off-by: Tang Junhui <tang.junhui@zte.com.cn>
Signed-off-by: Michael Lyle <mlyle@lyle.org>
Reviewed-by: Michael Lyle <mlyle@lyle.org>
Reviewed-by: Coly Li <colyli@suse.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/md/bcache/alloc.c  | 10 ++++++++++
 drivers/md/bcache/bcache.h |  1 +
 drivers/md/bcache/btree.c  | 17 ++++++++++-------
 drivers/md/bcache/btree.h  |  2 +-
 4 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'drivers/md/bcache/alloc.c')

diff --git a/drivers/md/bcache/alloc.c b/drivers/md/bcache/alloc.c
index 4c40870e99f5..8c5a626343d4 100644
--- a/drivers/md/bcache/alloc.c
+++ b/drivers/md/bcache/alloc.c
@@ -442,6 +442,11 @@ out:
 		b->prio = INITIAL_PRIO;
 	}
 
+	if (ca->set->avail_nbuckets > 0) {
+		ca->set->avail_nbuckets--;
+		bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
+	}
+
 	return r;
 }
 
@@ -449,6 +454,11 @@ void __bch_bucket_free(struct cache *ca, struct bucket *b)
 {
 	SET_GC_MARK(b, 0);
 	SET_GC_SECTORS_USED(b, 0);
+
+	if (ca->set->avail_nbuckets < ca->set->nbuckets) {
+		ca->set->avail_nbuckets++;
+		bch_update_bucket_in_use(ca->set, &ca->set->gc_stats);
+	}
 }
 
 void bch_bucket_free(struct cache_set *c, struct bkey *k)
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 363ea6256b39..e274082330dc 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -581,6 +581,7 @@ struct cache_set {
 	uint8_t			need_gc;
 	struct gc_stat		gc_stats;
 	size_t			nbuckets;
+	size_t			avail_nbuckets;
 
 	struct task_struct	*gc_thread;
 	/* Where in the btree gc currently is */
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index 866dcf78ff8e..d8865e6ead37 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -1240,6 +1240,11 @@ void bch_initial_mark_key(struct cache_set *c, int level, struct bkey *k)
 	__bch_btree_mark_key(c, level, k);
 }
 
+void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats)
+{
+	stats->in_use = (c->nbuckets - c->avail_nbuckets) * 100 / c->nbuckets;
+}
+
 static bool btree_gc_mark_node(struct btree *b, struct gc_stat *gc)
 {
 	uint8_t stale = 0;
@@ -1651,9 +1656,8 @@ static void btree_gc_start(struct cache_set *c)
 	mutex_unlock(&c->bucket_lock);
 }
 
-static size_t bch_btree_gc_finish(struct cache_set *c)
+static void bch_btree_gc_finish(struct cache_set *c)
 {
-	size_t available = 0;
 	struct bucket *b;
 	struct cache *ca;
 	unsigned i;
@@ -1690,6 +1694,7 @@ static size_t bch_btree_gc_finish(struct cache_set *c)
 	}
 	rcu_read_unlock();
 
+	c->avail_nbuckets = 0;
 	for_each_cache(ca, c, i) {
 		uint64_t *i;
 
@@ -1711,18 +1716,16 @@ static size_t bch_btree_gc_finish(struct cache_set *c)
 			BUG_ON(!GC_MARK(b) && GC_SECTORS_USED(b));
 
 			if (!GC_MARK(b) || GC_MARK(b) == GC_MARK_RECLAIMABLE)
-				available++;
+				c->avail_nbuckets++;
 		}
 	}
 
 	mutex_unlock(&c->bucket_lock);
-	return available;
 }
 
 static void bch_btree_gc(struct cache_set *c)
 {
 	int ret;
-	unsigned long available;
 	struct gc_stat stats;
 	struct closure writes;
 	struct btree_op op;
@@ -1745,14 +1748,14 @@ static void bch_btree_gc(struct cache_set *c)
 			pr_warn("gc failed!");
 	} while (ret);
 
-	available = bch_btree_gc_finish(c);
+	bch_btree_gc_finish(c);
 	wake_up_allocators(c);
 
 	bch_time_stats_update(&c->btree_gc_time, start_time);
 
 	stats.key_bytes *= sizeof(uint64_t);
 	stats.data	<<= 9;
-	stats.in_use	= (c->nbuckets - available) * 100 / c->nbuckets;
+	bch_update_bucket_in_use(c, &stats);
 	memcpy(&c->gc_stats, &stats, sizeof(struct gc_stat));
 
 	trace_bcache_gc_end(c);
diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h
index 73da1f5626cb..4073aca09a49 100644
--- a/drivers/md/bcache/btree.h
+++ b/drivers/md/bcache/btree.h
@@ -305,5 +305,5 @@ void bch_keybuf_del(struct keybuf *, struct keybuf_key *);
 struct keybuf_key *bch_keybuf_next(struct keybuf *);
 struct keybuf_key *bch_keybuf_next_rescan(struct cache_set *, struct keybuf *,
 					  struct bkey *, keybuf_pred_fn *);
-
+void bch_update_bucket_in_use(struct cache_set *c, struct gc_stat *stats);
 #endif
-- 
cgit v1.2.3-70-g09d2