From d09e9a2bddba6c48e0fddb16c4383172ac593251 Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1@huawei.com>
Date: Tue, 10 May 2022 20:31:24 +0800
Subject: ubi: fastmap: Fix high cpu usage of ubi_bgt by making sure wl_pool
 not empty

There at least 6 PEBs reserved on UBI device:
1. EBA_RESERVED_PEBS[1]
2. WL_RESERVED_PEBS[1]
3. UBI_LAYOUT_VOLUME_EBS[2]
4. MIN_FASTMAP_RESERVED_PEBS[2]

When all ubi volumes take all their PEBs, there are 3 (EBA_RESERVED_PEBS +
WL_RESERVED_PEBS + MIN_FASTMAP_RESERVED_PEBS - MIN_FASTMAP_TAKEN_PEBS[1])
free PEBs. Since commit f9c34bb529975fe ("ubi: Fix producing anchor PEBs")
and commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering
wear level rules") applied, there is only 1 (3 - FASTMAP_ANCHOR_PEBS[1] -
FASTMAP_NEXT_ANCHOR_PEBS[1]) free PEB to fill pool and wl_pool, after
filling pool, wl_pool is always empty. So, UBI could be stuck in an
infinite loop:

	ubi_thread	   system_wq
wear_leveling_worker <--------------------------------------------------
  get_peb_for_wl							|
    // fm_wl_pool, used = size = 0					|
    schedule_work(&ubi->fm_work)					|
									|
		    update_fastmap_work_fn				|
		      ubi_update_fastmap				|
			ubi_refill_pools				|
			// ubi->free_count - ubi->beb_rsvd_pebs < 5	|
			// wl_pool is not filled with any PEBs		|
			schedule_erase(old_fm_anchor)			|
			ubi_ensure_anchor_pebs				|
			  __schedule_ubi_work(wear_leveling_worker)	|
									|
__erase_worker								|
  ensure_wear_leveling							|
    __schedule_ubi_work(wear_leveling_worker) --------------------------

, which cause high cpu usage of ubi_bgt:
top - 12:10:42 up 5 min,  2 users,  load average: 1.76, 0.68, 0.27
Tasks: 123 total,   3 running,  54 sleeping,   0 stopped,   0 zombie

  PID USER PR   NI VIRT    RES    SHR S  %CPU %MEM     TIME+ COMMAND
 1589 root 20   0   0      0      0 R  45.0  0.0   0:38.86 ubi_bgt0d
  319 root 20   0   0      0      0 I  15.2  0.0   0:15.29 kworker/0:3-eve
  371 root 20   0   0      0      0 I  14.9  0.0   0:12.85 kworker/3:3-eve
   20 root 20   0   0      0      0 I  11.3  0.0   0:05.33 kworker/1:0-eve
  202 root 20   0   0      0      0 I  11.3  0.0   0:04.93 kworker/2:3-eve

In commit 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs considering
wear level rules"), there are three key changes:
  1) Choose the fastmap anchor when the most free PEBs are available.
  2) Enable anchor move within the anchor area again as it is useful
     for distributing wear.
  3) Import a candidate fm anchor and check this PEB's erase count during
     wear leveling. If the wear leveling limit is exceeded, use the used
     anchor area PEB with the lowest erase count to replace it.

The anchor candidate can be removed, we can check fm_anchor PEB's erase
count during wear leveling. Fix it by:
  1) Removing 'fm_next_anchor' and check 'fm_anchor' during wear leveling.
  2) Preferentially filling one free peb into fm_wl_pool in condition of
     ubi->free_count > ubi->beb_rsvd_pebs, then try to reserve enough
     free count for fastmap non anchor pebs after the above prerequisites
     are met.
Then, there are at least 1 PEB in pool and 1 PEB in wl_pool after calling
ubi_refill_pools() with all erase works done.

Fetch a reproducer in [Link].

Fixes: 4b68bf9a69d22dd ("ubi: Select fastmap anchor PEBs ... rules")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=215407
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/fastmap-wl.c | 69 +++++++++++++++++++++++++++++---------------
 drivers/mtd/ubi/fastmap.c    | 11 -------
 drivers/mtd/ubi/ubi.h        |  4 +--
 drivers/mtd/ubi/wl.c         | 19 ++++++------
 4 files changed, 57 insertions(+), 46 deletions(-)

(limited to 'drivers')

diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c
index 28f55f9cf715..053ab52668e8 100644
--- a/drivers/mtd/ubi/fastmap-wl.c
+++ b/drivers/mtd/ubi/fastmap-wl.c
@@ -97,6 +97,33 @@ out:
 	return e;
 }
 
+/*
+ * has_enough_free_count - whether ubi has enough free pebs to fill fm pools
+ * @ubi: UBI device description object
+ * @is_wl_pool: whether UBI is filling wear leveling pool
+ *
+ * This helper function checks whether there are enough free pebs (deducted
+ * by fastmap pebs) to fill fm_pool and fm_wl_pool, above rule works after
+ * there is at least one of free pebs is filled into fm_wl_pool.
+ * For wear leveling pool, UBI should also reserve free pebs for bad pebs
+ * handling, because there maybe no enough free pebs for user volumes after
+ * producing new bad pebs.
+ */
+static bool has_enough_free_count(struct ubi_device *ubi, bool is_wl_pool)
+{
+	int fm_used = 0;	// fastmap non anchor pebs.
+	int beb_rsvd_pebs;
+
+	if (!ubi->free.rb_node)
+		return false;
+
+	beb_rsvd_pebs = is_wl_pool ? ubi->beb_rsvd_pebs : 0;
+	if (ubi->fm_wl_pool.size > 0 && !(ubi->ro_mode || ubi->fm_disabled))
+		fm_used = ubi->fm_size / ubi->leb_size - 1;
+
+	return ubi->free_count - beb_rsvd_pebs > fm_used;
+}
+
 /**
  * ubi_refill_pools - refills all fastmap PEB pools.
  * @ubi: UBI device description object
@@ -120,21 +147,17 @@ void ubi_refill_pools(struct ubi_device *ubi)
 		wl_tree_add(ubi->fm_anchor, &ubi->free);
 		ubi->free_count++;
 	}
-	if (ubi->fm_next_anchor) {
-		wl_tree_add(ubi->fm_next_anchor, &ubi->free);
-		ubi->free_count++;
-	}
 
-	/* All available PEBs are in ubi->free, now is the time to get
+	/*
+	 * All available PEBs are in ubi->free, now is the time to get
 	 * the best anchor PEBs.
 	 */
 	ubi->fm_anchor = ubi_wl_get_fm_peb(ubi, 1);
-	ubi->fm_next_anchor = ubi_wl_get_fm_peb(ubi, 1);
 
 	for (;;) {
 		enough = 0;
 		if (pool->size < pool->max_size) {
-			if (!ubi->free.rb_node)
+			if (!has_enough_free_count(ubi, false))
 				break;
 
 			e = wl_get_wle(ubi);
@@ -147,8 +170,7 @@ void ubi_refill_pools(struct ubi_device *ubi)
 			enough++;
 
 		if (wl_pool->size < wl_pool->max_size) {
-			if (!ubi->free.rb_node ||
-			   (ubi->free_count - ubi->beb_rsvd_pebs < 5))
+			if (!has_enough_free_count(ubi, true))
 				break;
 
 			e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
@@ -286,20 +308,26 @@ static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi)
 int ubi_ensure_anchor_pebs(struct ubi_device *ubi)
 {
 	struct ubi_work *wrk;
+	struct ubi_wl_entry *anchor;
 
 	spin_lock(&ubi->wl_lock);
 
-	/* Do we have a next anchor? */
-	if (!ubi->fm_next_anchor) {
-		ubi->fm_next_anchor = ubi_wl_get_fm_peb(ubi, 1);
-		if (!ubi->fm_next_anchor)
-			/* Tell wear leveling to produce a new anchor PEB */
-			ubi->fm_do_produce_anchor = 1;
+	/* Do we already have an anchor? */
+	if (ubi->fm_anchor) {
+		spin_unlock(&ubi->wl_lock);
+		return 0;
 	}
 
-	/* Do wear leveling to get a new anchor PEB or check the
-	 * existing next anchor candidate.
-	 */
+	/* See if we can find an anchor PEB on the list of free PEBs */
+	anchor = ubi_wl_get_fm_peb(ubi, 1);
+	if (anchor) {
+		ubi->fm_anchor = anchor;
+		spin_unlock(&ubi->wl_lock);
+		return 0;
+	}
+
+	ubi->fm_do_produce_anchor = 1;
+	/* No luck, trigger wear leveling to produce a new anchor PEB. */
 	if (ubi->wl_scheduled) {
 		spin_unlock(&ubi->wl_lock);
 		return 0;
@@ -381,11 +409,6 @@ static void ubi_fastmap_close(struct ubi_device *ubi)
 		ubi->fm_anchor = NULL;
 	}
 
-	if (ubi->fm_next_anchor) {
-		return_unused_peb(ubi, ubi->fm_next_anchor);
-		ubi->fm_next_anchor = NULL;
-	}
-
 	if (ubi->fm) {
 		for (i = 0; i < ubi->fm->used_blocks; i++)
 			kfree(ubi->fm->e[i]);
diff --git a/drivers/mtd/ubi/fastmap.c b/drivers/mtd/ubi/fastmap.c
index 6b5f1ffd961b..6e95c4b1473e 100644
--- a/drivers/mtd/ubi/fastmap.c
+++ b/drivers/mtd/ubi/fastmap.c
@@ -1230,17 +1230,6 @@ static int ubi_write_fastmap(struct ubi_device *ubi,
 		fm_pos += sizeof(*fec);
 		ubi_assert(fm_pos <= ubi->fm_size);
 	}
-	if (ubi->fm_next_anchor) {
-		fec = (struct ubi_fm_ec *)(fm_raw + fm_pos);
-
-		fec->pnum = cpu_to_be32(ubi->fm_next_anchor->pnum);
-		set_seen(ubi, ubi->fm_next_anchor->pnum, seen_pebs);
-		fec->ec = cpu_to_be32(ubi->fm_next_anchor->ec);
-
-		free_peb_count++;
-		fm_pos += sizeof(*fec);
-		ubi_assert(fm_pos <= ubi->fm_size);
-	}
 	fmh->free_peb_count = cpu_to_be32(free_peb_count);
 
 	ubi_for_each_used_peb(ubi, wl_e, tmp_rb) {
diff --git a/drivers/mtd/ubi/ubi.h b/drivers/mtd/ubi/ubi.h
index 7c083ad58274..078112e23dfd 100644
--- a/drivers/mtd/ubi/ubi.h
+++ b/drivers/mtd/ubi/ubi.h
@@ -489,8 +489,7 @@ struct ubi_debug_info {
  * @fm_work: fastmap work queue
  * @fm_work_scheduled: non-zero if fastmap work was scheduled
  * @fast_attach: non-zero if UBI was attached by fastmap
- * @fm_anchor: The new anchor PEB used during fastmap update
- * @fm_next_anchor: An anchor PEB candidate for the next time fastmap is updated
+ * @fm_anchor: The next anchor PEB to use for fastmap
  * @fm_do_produce_anchor: If true produce an anchor PEB in wl
  *
  * @used: RB-tree of used physical eraseblocks
@@ -601,7 +600,6 @@ struct ubi_device {
 	int fm_work_scheduled;
 	int fast_attach;
 	struct ubi_wl_entry *fm_anchor;
-	struct ubi_wl_entry *fm_next_anchor;
 	int fm_do_produce_anchor;
 
 	/* Wear-leveling sub-system's stuff */
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index 8455f1d47f3c..afcdacb9d0e9 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -689,16 +689,16 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 
 #ifdef CONFIG_MTD_UBI_FASTMAP
 	e1 = find_anchor_wl_entry(&ubi->used);
-	if (e1 && ubi->fm_next_anchor &&
-	    (ubi->fm_next_anchor->ec - e1->ec >= UBI_WL_THRESHOLD)) {
+	if (e1 && ubi->fm_anchor &&
+	    (ubi->fm_anchor->ec - e1->ec >= UBI_WL_THRESHOLD)) {
 		ubi->fm_do_produce_anchor = 1;
-		/* fm_next_anchor is no longer considered a good anchor
-		 * candidate.
+		/*
+		 * fm_anchor is no longer considered a good anchor.
 		 * NULL assignment also prevents multiple wear level checks
 		 * of this PEB.
 		 */
-		wl_tree_add(ubi->fm_next_anchor, &ubi->free);
-		ubi->fm_next_anchor = NULL;
+		wl_tree_add(ubi->fm_anchor, &ubi->free);
+		ubi->fm_anchor = NULL;
 		ubi->free_count++;
 	}
 
@@ -1085,12 +1085,13 @@ static int __erase_worker(struct ubi_device *ubi, struct ubi_work *wl_wrk)
 	if (!err) {
 		spin_lock(&ubi->wl_lock);
 
-		if (!ubi->fm_disabled && !ubi->fm_next_anchor &&
+		if (!ubi->fm_disabled && !ubi->fm_anchor &&
 		    e->pnum < UBI_FM_MAX_START) {
-			/* Abort anchor production, if needed it will be
+			/*
+			 * Abort anchor production, if needed it will be
 			 * enabled again in the wear leveling started below.
 			 */
-			ubi->fm_next_anchor = e;
+			ubi->fm_anchor = e;
 			ubi->fm_do_produce_anchor = 0;
 		} else {
 			wl_tree_add(e, &ubi->free);
-- 
cgit v1.2.3-70-g09d2


From 14072ee33d5a2a07d735b667205f99d0a95de9f2 Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1@huawei.com>
Date: Tue, 10 May 2022 20:31:25 +0800
Subject: ubi: fastmap: Check wl_pool for free peb before wear leveling

UBI fetches free peb from wl_pool during wear leveling, so UBI should
check wl_pool's empty status before wear leveling. Otherwise, UBI will
miss wear leveling chances when free pebs are run out.

Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/fastmap-wl.c | 52 ++++++++++++++++++++++++++++++++++++++++++++
 drivers/mtd/ubi/wl.c         | 14 ++++++++++--
 drivers/mtd/ubi/wl.h         |  2 ++
 3 files changed, 66 insertions(+), 2 deletions(-)

(limited to 'drivers')

diff --git a/drivers/mtd/ubi/fastmap-wl.c b/drivers/mtd/ubi/fastmap-wl.c
index 053ab52668e8..0ee452275578 100644
--- a/drivers/mtd/ubi/fastmap-wl.c
+++ b/drivers/mtd/ubi/fastmap-wl.c
@@ -275,6 +275,58 @@ out:
 	return ret;
 }
 
+/**
+ * next_peb_for_wl - returns next PEB to be used internally by the
+ * WL sub-system.
+ *
+ * @ubi: UBI device description object
+ */
+static struct ubi_wl_entry *next_peb_for_wl(struct ubi_device *ubi)
+{
+	struct ubi_fm_pool *pool = &ubi->fm_wl_pool;
+	int pnum;
+
+	if (pool->used == pool->size)
+		return NULL;
+
+	pnum = pool->pebs[pool->used];
+	return ubi->lookuptbl[pnum];
+}
+
+/**
+ * need_wear_leveling - checks whether to trigger a wear leveling work.
+ * UBI fetches free PEB from wl_pool, we check free PEBs from both 'wl_pool'
+ * and 'ubi->free', because free PEB in 'ubi->free' tree maybe moved into
+ * 'wl_pool' by ubi_refill_pools().
+ *
+ * @ubi: UBI device description object
+ */
+static bool need_wear_leveling(struct ubi_device *ubi)
+{
+	int ec;
+	struct ubi_wl_entry *e;
+
+	if (!ubi->used.rb_node)
+		return false;
+
+	e = next_peb_for_wl(ubi);
+	if (!e) {
+		if (!ubi->free.rb_node)
+			return false;
+		e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
+		ec = e->ec;
+	} else {
+		ec = e->ec;
+		if (ubi->free.rb_node) {
+			e = find_wl_entry(ubi, &ubi->free, WL_FREE_MAX_DIFF);
+			ec = max(ec, e->ec);
+		}
+	}
+	e = rb_entry(rb_first(&ubi->used), struct ubi_wl_entry, u.rb);
+
+	return ec - e->ec >= UBI_WL_THRESHOLD;
+}
+
 /* get_peb_for_wl - returns a PEB to be used internally by the WL sub-system.
  *
  * @ubi: UBI device description object
diff --git a/drivers/mtd/ubi/wl.c b/drivers/mtd/ubi/wl.c
index afcdacb9d0e9..55bae06cf408 100644
--- a/drivers/mtd/ubi/wl.c
+++ b/drivers/mtd/ubi/wl.c
@@ -670,7 +670,11 @@ static int wear_leveling_worker(struct ubi_device *ubi, struct ubi_work *wrk,
 	ubi_assert(!ubi->move_from && !ubi->move_to);
 	ubi_assert(!ubi->move_to_put);
 
+#ifdef CONFIG_MTD_UBI_FASTMAP
+	if (!next_peb_for_wl(ubi) ||
+#else
 	if (!ubi->free.rb_node ||
+#endif
 	    (!ubi->used.rb_node && !ubi->scrub.rb_node)) {
 		/*
 		 * No free physical eraseblocks? Well, they must be waiting in
@@ -1003,8 +1007,6 @@ out_cancel:
 static int ensure_wear_leveling(struct ubi_device *ubi, int nested)
 {
 	int err = 0;
-	struct ubi_wl_entry *e1;
-	struct ubi_wl_entry *e2;
 	struct ubi_work *wrk;
 
 	spin_lock(&ubi->wl_lock);
@@ -1017,6 +1019,13 @@ static int ensure_wear_leveling(struct ubi_device *ubi, int nested)
 	 * the WL worker has to be scheduled anyway.
 	 */
 	if (!ubi->scrub.rb_node) {
+#ifdef CONFIG_MTD_UBI_FASTMAP
+		if (!need_wear_leveling(ubi))
+			goto out_unlock;
+#else
+		struct ubi_wl_entry *e1;
+		struct ubi_wl_entry *e2;
+
 		if (!ubi->used.rb_node || !ubi->free.rb_node)
 			/* No physical eraseblocks - no deal */
 			goto out_unlock;
@@ -1032,6 +1041,7 @@ static int ensure_wear_leveling(struct ubi_device *ubi, int nested)
 
 		if (!(e2->ec - e1->ec >= UBI_WL_THRESHOLD))
 			goto out_unlock;
+#endif
 		dbg_wl("schedule wear-leveling");
 	} else
 		dbg_wl("schedule scrubbing");
diff --git a/drivers/mtd/ubi/wl.h b/drivers/mtd/ubi/wl.h
index c93a53293786..5ebe374a08ae 100644
--- a/drivers/mtd/ubi/wl.h
+++ b/drivers/mtd/ubi/wl.h
@@ -5,6 +5,8 @@
 static void update_fastmap_work_fn(struct work_struct *wrk);
 static struct ubi_wl_entry *find_anchor_wl_entry(struct rb_root *root);
 static struct ubi_wl_entry *get_peb_for_wl(struct ubi_device *ubi);
+static struct ubi_wl_entry *next_peb_for_wl(struct ubi_device *ubi);
+static bool need_wear_leveling(struct ubi_device *ubi);
 static void ubi_fastmap_close(struct ubi_device *ubi);
 static inline void ubi_fastmap_init(struct ubi_device *ubi, int *count)
 {
-- 
cgit v1.2.3-70-g09d2


From 8c03a1c21d72210f81cb369cc528e3fde4b45411 Mon Sep 17 00:00:00 2001
From: Zhihao Cheng <chengzhihao1@huawei.com>
Date: Tue, 10 May 2022 20:31:26 +0800
Subject: ubi: ubi_create_volume: Fix use-after-free when volume creation
 failed

There is an use-after-free problem for 'eba_tbl' in ubi_create_volume()'s
error handling path:

  ubi_eba_replace_table(vol, eba_tbl)
    vol->eba_tbl = tbl
out_mapping:
  ubi_eba_destroy_table(eba_tbl)   // Free 'eba_tbl'
out_unlock:
  put_device(&vol->dev)
    vol_release
      kfree(tbl->entries)	  // UAF

Fix it by removing redundant 'eba_tbl' releasing.
Fetch a reproducer in [Link].

Fixes: 493cfaeaa0c9b ("mtd: utilize new cdev_device_add helper function")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=215965
Signed-off-by: Zhihao Cheng <chengzhihao1@huawei.com>
Signed-off-by: Richard Weinberger <richard@nod.at>
---
 drivers/mtd/ubi/vmt.c | 1 -
 1 file changed, 1 deletion(-)

(limited to 'drivers')

diff --git a/drivers/mtd/ubi/vmt.c b/drivers/mtd/ubi/vmt.c
index 1bc7b3a05604..6ea95ade4ca6 100644
--- a/drivers/mtd/ubi/vmt.c
+++ b/drivers/mtd/ubi/vmt.c
@@ -309,7 +309,6 @@ out_mapping:
 	ubi->volumes[vol_id] = NULL;
 	ubi->vol_count -= 1;
 	spin_unlock(&ubi->volumes_lock);
-	ubi_eba_destroy_table(eba_tbl);
 out_acc:
 	spin_lock(&ubi->volumes_lock);
 	ubi->rsvd_pebs -= vol->reserved_pebs;
-- 
cgit v1.2.3-70-g09d2