summaryrefslogtreecommitdiff
path: root/drivers/block
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-23 09:58:07 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-23 09:58:07 -0800
commit5c00ff742bf5caf85f60e1c73999f99376fb865d (patch)
treefa484e83c27af79f1c0511e7e0673507461c9379 /drivers/block
parent228a1157fb9fec47eb135b51c0202b574e079ebf (diff)
parent2532e6c74a67e65b95f310946e0c0e0a41b3a34b (diff)
Merge tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton: - The series "zram: optimal post-processing target selection" from Sergey Senozhatsky improves zram's post-processing selection algorithm. This leads to improved memory savings. - Wei Yang has gone to town on the mapletree code, contributing several series which clean up the implementation: - "refine mas_mab_cp()" - "Reduce the space to be cleared for maple_big_node" - "maple_tree: simplify mas_push_node()" - "Following cleanup after introduce mas_wr_store_type()" - "refine storing null" - The series "selftests/mm: hugetlb_fault_after_madv improvements" from David Hildenbrand fixes this selftest for s390. - The series "introduce pte_offset_map_{ro|rw}_nolock()" from Qi Zheng implements some rationaizations and cleanups in the page mapping code. - The series "mm: optimize shadow entries removal" from Shakeel Butt optimizes the file truncation code by speeding up the handling of shadow entries. - The series "Remove PageKsm()" from Matthew Wilcox completes the migration of this flag over to being a folio-based flag. - The series "Unify hugetlb into arch_get_unmapped_area functions" from Oscar Salvador implements a bunch of consolidations and cleanups in the hugetlb code. - The series "Do not shatter hugezeropage on wp-fault" from Dev Jain takes away the wp-fault time practice of turning a huge zero page into small pages. Instead we replace the whole thing with a THP. More consistent cleaner and potentiall saves a large number of pagefaults. - The series "percpu: Add a test case and fix for clang" from Andy Shevchenko enhances and fixes the kernel's built in percpu test code. - The series "mm/mremap: Remove extra vma tree walk" from Liam Howlett optimizes mremap() by avoiding doing things which we didn't need to do. - The series "Improve the tmpfs large folio read performance" from Baolin Wang teaches tmpfs to copy data into userspace at the folio size rather than as individual pages. A 20% speedup was observed. - The series "mm/damon/vaddr: Fix issue in damon_va_evenly_split_region()" fro Zheng Yejian fixes DAMON splitting. - The series "memcg-v1: fully deprecate charge moving" from Shakeel Butt removes the long-deprecated memcgv2 charge moving feature. - The series "fix error handling in mmap_region() and refactor" from Lorenzo Stoakes cleanup up some of the mmap() error handling and addresses some potential performance issues. - The series "x86/module: use large ROX pages for text allocations" from Mike Rapoport teaches x86 to use large pages for read-only-execute module text. - The series "page allocation tag compression" from Suren Baghdasaryan is followon maintenance work for the new page allocation profiling feature. - The series "page->index removals in mm" from Matthew Wilcox remove most references to page->index in mm/. A slow march towards shrinking struct page. - The series "damon/{self,kunit}tests: minor fixups for DAMON debugfs interface tests" from Andrew Paniakin performs maintenance work for DAMON's self testing code. - The series "mm: zswap swap-out of large folios" from Kanchana Sridhar improves zswap's batching of compression and decompression. It is a step along the way towards using Intel IAA hardware acceleration for this zswap operation. - The series "kasan: migrate the last module test to kunit" from Sabyrzhan Tasbolatov completes the migration of the KASAN built-in tests over to the KUnit framework. - The series "implement lightweight guard pages" from Lorenzo Stoakes permits userapace to place fault-generating guard pages within a single VMA, rather than requiring that multiple VMAs be created for this. Improved efficiencies for userspace memory allocators are expected. - The series "memcg: tracepoint for flushing stats" from JP Kobryn uses tracepoints to provide increased visibility into memcg stats flushing activity. - The series "zram: IDLE flag handling fixes" from Sergey Senozhatsky fixes a zram buglet which potentially affected performance. - The series "mm: add more kernel parameters to control mTHP" from MaĆ­ra Canal enhances our ability to control/configuremultisize THP from the kernel boot command line. - The series "kasan: few improvements on kunit tests" from Sabyrzhan Tasbolatov has a couple of fixups for the KASAN KUnit tests. - The series "mm/list_lru: Split list_lru lock into per-cgroup scope" from Kairui Song optimizes list_lru memory utilization when lockdep is enabled. * tag 'mm-stable-2024-11-18-19-27' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (215 commits) cma: enforce non-zero pageblock_order during cma_init_reserved_mem() mm/kfence: add a new kunit test test_use_after_free_read_nofault() zram: fix NULL pointer in comp_algorithm_show() memcg/hugetlb: add hugeTLB counters to memcg vmstat: call fold_vm_zone_numa_events() before show per zone NUMA event mm: mmap_lock: check trace_mmap_lock_$type_enabled() instead of regcount zram: ZRAM_DEF_COMP should depend on ZRAM MAINTAINERS/MEMORY MANAGEMENT: add document files for mm Docs/mm/damon: recommend academic papers to read and/or cite mm: define general function pXd_init() kmemleak: iommu/iova: fix transient kmemleak false positive mm/list_lru: simplify the list_lru walk callback function mm/list_lru: split the lock to per-cgroup scope mm/list_lru: simplify reparenting and initial allocation mm/list_lru: code clean up for reparenting mm/list_lru: don't export list_lru_add mm/list_lru: don't pass unnecessary key parameters kasan: add kunit tests for kmalloc_track_caller, kmalloc_node_track_caller kasan: change kasan_atomics kunit test as KUNIT_CASE_SLOW kasan: use EXPORT_SYMBOL_IF_KUNIT to export symbols ...
Diffstat (limited to 'drivers/block')
-rw-r--r--drivers/block/zram/Kconfig1
-rw-r--r--drivers/block/zram/zram_drv.c384
-rw-r--r--drivers/block/zram/zram_drv.h3
3 files changed, 290 insertions, 98 deletions
diff --git a/drivers/block/zram/Kconfig b/drivers/block/zram/Kconfig
index 6aea609b795c..402b7b175863 100644
--- a/drivers/block/zram/Kconfig
+++ b/drivers/block/zram/Kconfig
@@ -94,6 +94,7 @@ endchoice
config ZRAM_DEF_COMP
string
+ depends on ZRAM
default "lzo-rle" if ZRAM_DEF_COMP_LZORLE
default "lzo" if ZRAM_DEF_COMP_LZO
default "lz4" if ZRAM_DEF_COMP_LZ4
diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index ad9c9bc3ccfc..3dee026988dc 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -178,11 +178,105 @@ static inline u32 zram_get_priority(struct zram *zram, u32 index)
static void zram_accessed(struct zram *zram, u32 index)
{
zram_clear_flag(zram, index, ZRAM_IDLE);
+ zram_clear_flag(zram, index, ZRAM_PP_SLOT);
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
zram->table[index].ac_time = ktime_get_boottime();
#endif
}
+#if defined CONFIG_ZRAM_WRITEBACK || defined CONFIG_ZRAM_MULTI_COMP
+struct zram_pp_slot {
+ unsigned long index;
+ struct list_head entry;
+};
+
+/*
+ * A post-processing bucket is, essentially, a size class, this defines
+ * the range (in bytes) of pp-slots sizes in particular bucket.
+ */
+#define PP_BUCKET_SIZE_RANGE 64
+#define NUM_PP_BUCKETS ((PAGE_SIZE / PP_BUCKET_SIZE_RANGE) + 1)
+
+struct zram_pp_ctl {
+ struct list_head pp_buckets[NUM_PP_BUCKETS];
+};
+
+static struct zram_pp_ctl *init_pp_ctl(void)
+{
+ struct zram_pp_ctl *ctl;
+ u32 idx;
+
+ ctl = kmalloc(sizeof(*ctl), GFP_KERNEL);
+ if (!ctl)
+ return NULL;
+
+ for (idx = 0; idx < NUM_PP_BUCKETS; idx++)
+ INIT_LIST_HEAD(&ctl->pp_buckets[idx]);
+ return ctl;
+}
+
+static void release_pp_slot(struct zram *zram, struct zram_pp_slot *pps)
+{
+ list_del_init(&pps->entry);
+
+ zram_slot_lock(zram, pps->index);
+ zram_clear_flag(zram, pps->index, ZRAM_PP_SLOT);
+ zram_slot_unlock(zram, pps->index);
+
+ kfree(pps);
+}
+
+static void release_pp_ctl(struct zram *zram, struct zram_pp_ctl *ctl)
+{
+ u32 idx;
+
+ if (!ctl)
+ return;
+
+ for (idx = 0; idx < NUM_PP_BUCKETS; idx++) {
+ while (!list_empty(&ctl->pp_buckets[idx])) {
+ struct zram_pp_slot *pps;
+
+ pps = list_first_entry(&ctl->pp_buckets[idx],
+ struct zram_pp_slot,
+ entry);
+ release_pp_slot(zram, pps);
+ }
+ }
+
+ kfree(ctl);
+}
+
+static void place_pp_slot(struct zram *zram, struct zram_pp_ctl *ctl,
+ struct zram_pp_slot *pps)
+{
+ u32 idx;
+
+ idx = zram_get_obj_size(zram, pps->index) / PP_BUCKET_SIZE_RANGE;
+ list_add(&pps->entry, &ctl->pp_buckets[idx]);
+
+ zram_set_flag(zram, pps->index, ZRAM_PP_SLOT);
+}
+
+static struct zram_pp_slot *select_pp_slot(struct zram_pp_ctl *ctl)
+{
+ struct zram_pp_slot *pps = NULL;
+ s32 idx = NUM_PP_BUCKETS - 1;
+
+ /* The higher the bucket id the more optimal slot post-processing is */
+ while (idx >= 0) {
+ pps = list_first_entry_or_null(&ctl->pp_buckets[idx],
+ struct zram_pp_slot,
+ entry);
+ if (pps)
+ break;
+
+ idx--;
+ }
+ return pps;
+}
+#endif
+
static inline void update_used_max(struct zram *zram,
const unsigned long pages)
{
@@ -296,19 +390,28 @@ static void mark_idle(struct zram *zram, ktime_t cutoff)
for (index = 0; index < nr_pages; index++) {
/*
- * Do not mark ZRAM_UNDER_WB slot as ZRAM_IDLE to close race.
- * See the comment in writeback_store.
+ * Do not mark ZRAM_SAME slots as ZRAM_IDLE, because no
+ * post-processing (recompress, writeback) happens to the
+ * ZRAM_SAME slot.
+ *
+ * And ZRAM_WB slots simply cannot be ZRAM_IDLE.
*/
zram_slot_lock(zram, index);
- if (zram_allocated(zram, index) &&
- !zram_test_flag(zram, index, ZRAM_UNDER_WB)) {
+ if (!zram_allocated(zram, index) ||
+ zram_test_flag(zram, index, ZRAM_WB) ||
+ zram_test_flag(zram, index, ZRAM_SAME)) {
+ zram_slot_unlock(zram, index);
+ continue;
+ }
+
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
- is_idle = !cutoff || ktime_after(cutoff,
- zram->table[index].ac_time);
+ is_idle = !cutoff ||
+ ktime_after(cutoff, zram->table[index].ac_time);
#endif
- if (is_idle)
- zram_set_flag(zram, index, ZRAM_IDLE);
- }
+ if (is_idle)
+ zram_set_flag(zram, index, ZRAM_IDLE);
+ else
+ zram_clear_flag(zram, index, ZRAM_IDLE);
zram_slot_unlock(zram, index);
}
}
@@ -587,11 +690,57 @@ static void read_from_bdev_async(struct zram *zram, struct page *page,
#define IDLE_WRITEBACK (1<<1)
#define INCOMPRESSIBLE_WRITEBACK (1<<2)
+static int scan_slots_for_writeback(struct zram *zram, u32 mode,
+ unsigned long nr_pages,
+ unsigned long index,
+ struct zram_pp_ctl *ctl)
+{
+ struct zram_pp_slot *pps = NULL;
+
+ for (; nr_pages != 0; index++, nr_pages--) {
+ if (!pps)
+ pps = kmalloc(sizeof(*pps), GFP_KERNEL);
+ if (!pps)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&pps->entry);
+
+ zram_slot_lock(zram, index);
+ if (!zram_allocated(zram, index))
+ goto next;
+
+ if (zram_test_flag(zram, index, ZRAM_WB) ||
+ zram_test_flag(zram, index, ZRAM_SAME))
+ goto next;
+
+ if (mode & IDLE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_IDLE))
+ goto next;
+ if (mode & HUGE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_HUGE))
+ goto next;
+ if (mode & INCOMPRESSIBLE_WRITEBACK &&
+ !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+ goto next;
+
+ pps->index = index;
+ place_pp_slot(zram, ctl, pps);
+ pps = NULL;
+next:
+ zram_slot_unlock(zram, index);
+ }
+
+ kfree(pps);
+ return 0;
+}
+
static ssize_t writeback_store(struct device *dev,
struct device_attribute *attr, const char *buf, size_t len)
{
struct zram *zram = dev_to_zram(dev);
unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+ struct zram_pp_ctl *ctl = NULL;
+ struct zram_pp_slot *pps;
unsigned long index = 0;
struct bio bio;
struct bio_vec bio_vec;
@@ -626,6 +775,12 @@ static ssize_t writeback_store(struct device *dev,
goto release_init_lock;
}
+ /* Do not permit concurrent post-processing actions. */
+ if (atomic_xchg(&zram->pp_in_progress, 1)) {
+ up_read(&zram->init_lock);
+ return -EAGAIN;
+ }
+
if (!zram->backing_dev) {
ret = -ENODEV;
goto release_init_lock;
@@ -637,7 +792,15 @@ static ssize_t writeback_store(struct device *dev,
goto release_init_lock;
}
- for (; nr_pages != 0; index++, nr_pages--) {
+ ctl = init_pp_ctl();
+ if (!ctl) {
+ ret = -ENOMEM;
+ goto release_init_lock;
+ }
+
+ scan_slots_for_writeback(zram, mode, nr_pages, index, ctl);
+
+ while ((pps = select_pp_slot(ctl))) {
spin_lock(&zram->wb_limit_lock);
if (zram->wb_limit_enable && !zram->bd_wb_limit) {
spin_unlock(&zram->wb_limit_lock);
@@ -654,38 +817,20 @@ static ssize_t writeback_store(struct device *dev,
}
}
+ index = pps->index;
zram_slot_lock(zram, index);
- if (!zram_allocated(zram, index))
- goto next;
-
- if (zram_test_flag(zram, index, ZRAM_WB) ||
- zram_test_flag(zram, index, ZRAM_SAME) ||
- zram_test_flag(zram, index, ZRAM_UNDER_WB))
- goto next;
-
- if (mode & IDLE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_IDLE))
- goto next;
- if (mode & HUGE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_HUGE))
- goto next;
- if (mode & INCOMPRESSIBLE_WRITEBACK &&
- !zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
- goto next;
-
/*
- * Clearing ZRAM_UNDER_WB is duty of caller.
- * IOW, zram_free_page never clear it.
+ * scan_slots() sets ZRAM_PP_SLOT and relases slot lock, so
+ * slots can change in the meantime. If slots are accessed or
+ * freed they lose ZRAM_PP_SLOT flag and hence we don't
+ * post-process them.
*/
- zram_set_flag(zram, index, ZRAM_UNDER_WB);
- /* Need for hugepage writeback racing */
- zram_set_flag(zram, index, ZRAM_IDLE);
+ if (!zram_test_flag(zram, index, ZRAM_PP_SLOT))
+ goto next;
zram_slot_unlock(zram, index);
+
if (zram_read_page(zram, page, index, NULL)) {
- zram_slot_lock(zram, index);
- zram_clear_flag(zram, index, ZRAM_UNDER_WB);
- zram_clear_flag(zram, index, ZRAM_IDLE);
- zram_slot_unlock(zram, index);
+ release_pp_slot(zram, pps);
continue;
}
@@ -700,10 +845,7 @@ static ssize_t writeback_store(struct device *dev,
*/
err = submit_bio_wait(&bio);
if (err) {
- zram_slot_lock(zram, index);
- zram_clear_flag(zram, index, ZRAM_UNDER_WB);
- zram_clear_flag(zram, index, ZRAM_IDLE);
- zram_slot_unlock(zram, index);
+ release_pp_slot(zram, pps);
/*
* BIO errors are not fatal, we continue and simply
* attempt to writeback the remaining objects (pages).
@@ -717,25 +859,19 @@ static ssize_t writeback_store(struct device *dev,
}
atomic64_inc(&zram->stats.bd_writes);
+ zram_slot_lock(zram, index);
/*
- * We released zram_slot_lock so need to check if the slot was
- * changed. If there is freeing for the slot, we can catch it
- * easily by zram_allocated.
- * A subtle case is the slot is freed/reallocated/marked as
- * ZRAM_IDLE again. To close the race, idle_store doesn't
- * mark ZRAM_IDLE once it found the slot was ZRAM_UNDER_WB.
- * Thus, we could close the race by checking ZRAM_IDLE bit.
+ * Same as above, we release slot lock during writeback so
+ * slot can change under us: slot_free() or slot_free() and
+ * reallocation (zram_write_page()). In both cases slot loses
+ * ZRAM_PP_SLOT flag. No concurrent post-processing can set
+ * ZRAM_PP_SLOT on such slots until current post-processing
+ * finishes.
*/
- zram_slot_lock(zram, index);
- if (!zram_allocated(zram, index) ||
- !zram_test_flag(zram, index, ZRAM_IDLE)) {
- zram_clear_flag(zram, index, ZRAM_UNDER_WB);
- zram_clear_flag(zram, index, ZRAM_IDLE);
+ if (!zram_test_flag(zram, index, ZRAM_PP_SLOT))
goto next;
- }
zram_free_page(zram, index);
- zram_clear_flag(zram, index, ZRAM_UNDER_WB);
zram_set_flag(zram, index, ZRAM_WB);
zram_set_element(zram, index, blk_idx);
blk_idx = 0;
@@ -746,12 +882,15 @@ static ssize_t writeback_store(struct device *dev,
spin_unlock(&zram->wb_limit_lock);
next:
zram_slot_unlock(zram, index);
+ release_pp_slot(zram, pps);
}
if (blk_idx)
free_block_bdev(zram, blk_idx);
__free_page(page);
release_init_lock:
+ release_pp_ctl(zram, ctl);
+ atomic_set(&zram->pp_in_progress, 0);
up_read(&zram->init_lock);
return ret;
@@ -1342,19 +1481,17 @@ static void zram_free_page(struct zram *zram, size_t index)
#ifdef CONFIG_ZRAM_TRACK_ENTRY_ACTIME
zram->table[index].ac_time = 0;
#endif
- if (zram_test_flag(zram, index, ZRAM_IDLE))
- zram_clear_flag(zram, index, ZRAM_IDLE);
+
+ zram_clear_flag(zram, index, ZRAM_IDLE);
+ zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
+ zram_clear_flag(zram, index, ZRAM_PP_SLOT);
+ zram_set_priority(zram, index, 0);
if (zram_test_flag(zram, index, ZRAM_HUGE)) {
zram_clear_flag(zram, index, ZRAM_HUGE);
atomic64_dec(&zram->stats.huge_pages);
}
- if (zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
- zram_clear_flag(zram, index, ZRAM_INCOMPRESSIBLE);
-
- zram_set_priority(zram, index, 0);
-
if (zram_test_flag(zram, index, ZRAM_WB)) {
zram_clear_flag(zram, index, ZRAM_WB);
free_block_bdev(zram, zram_get_element(zram, index));
@@ -1378,13 +1515,11 @@ static void zram_free_page(struct zram *zram, size_t index)
zs_free(zram->mem_pool, handle);
atomic64_sub(zram_get_obj_size(zram, index),
- &zram->stats.compr_data_size);
+ &zram->stats.compr_data_size);
out:
atomic64_dec(&zram->stats.pages_stored);
zram_set_handle(zram, index, 0);
zram_set_obj_size(zram, index, 0);
- WARN_ON_ONCE(zram->table[index].flags &
- ~(1UL << ZRAM_UNDER_WB));
}
/*
@@ -1648,6 +1783,52 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
}
#ifdef CONFIG_ZRAM_MULTI_COMP
+#define RECOMPRESS_IDLE (1 << 0)
+#define RECOMPRESS_HUGE (1 << 1)
+
+static int scan_slots_for_recompress(struct zram *zram, u32 mode,
+ struct zram_pp_ctl *ctl)
+{
+ unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
+ struct zram_pp_slot *pps = NULL;
+ unsigned long index;
+
+ for (index = 0; index < nr_pages; index++) {
+ if (!pps)
+ pps = kmalloc(sizeof(*pps), GFP_KERNEL);
+ if (!pps)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&pps->entry);
+
+ zram_slot_lock(zram, index);
+ if (!zram_allocated(zram, index))
+ goto next;
+
+ if (mode & RECOMPRESS_IDLE &&
+ !zram_test_flag(zram, index, ZRAM_IDLE))
+ goto next;
+
+ if (mode & RECOMPRESS_HUGE &&
+ !zram_test_flag(zram, index, ZRAM_HUGE))
+ goto next;
+
+ if (zram_test_flag(zram, index, ZRAM_WB) ||
+ zram_test_flag(zram, index, ZRAM_SAME) ||
+ zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+ goto next;
+
+ pps->index = index;
+ place_pp_slot(zram, ctl, pps);
+ pps = NULL;
+next:
+ zram_slot_unlock(zram, index);
+ }
+
+ kfree(pps);
+ return 0;
+}
+
/*
* This function will decompress (unless it's ZRAM_HUGE) the page and then
* attempt to compress it using provided compression algorithm priority
@@ -1655,7 +1836,7 @@ static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
*
* Corresponding ZRAM slot should be locked.
*/
-static int zram_recompress(struct zram *zram, u32 index, struct page *page,
+static int recompress_slot(struct zram *zram, u32 index, struct page *page,
u64 *num_recomp_pages, u32 threshold, u32 prio,
u32 prio_max)
{
@@ -1685,6 +1866,13 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
if (ret)
return ret;
+ /*
+ * We touched this entry so mark it as non-IDLE. This makes sure that
+ * we don't preserve IDLE flag and don't incorrectly pick this entry
+ * for different post-processing type (e.g. writeback).
+ */
+ zram_clear_flag(zram, index, ZRAM_IDLE);
+
class_index_old = zs_lookup_class_index(zram->mem_pool, comp_len_old);
/*
* Iterate the secondary comp algorithms list (in order of priority)
@@ -1798,20 +1986,17 @@ static int zram_recompress(struct zram *zram, u32 index, struct page *page,
return 0;
}
-#define RECOMPRESS_IDLE (1 << 0)
-#define RECOMPRESS_HUGE (1 << 1)
-
static ssize_t recompress_store(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t len)
{
u32 prio = ZRAM_SECONDARY_COMP, prio_max = ZRAM_MAX_COMPS;
struct zram *zram = dev_to_zram(dev);
- unsigned long nr_pages = zram->disksize >> PAGE_SHIFT;
char *args, *param, *val, *algo = NULL;
u64 num_recomp_pages = ULLONG_MAX;
+ struct zram_pp_ctl *ctl = NULL;
+ struct zram_pp_slot *pps;
u32 mode = 0, threshold = 0;
- unsigned long index;
struct page *page;
ssize_t ret;
@@ -1881,6 +2066,12 @@ static ssize_t recompress_store(struct device *dev,
goto release_init_lock;
}
+ /* Do not permit concurrent post-processing actions. */
+ if (atomic_xchg(&zram->pp_in_progress, 1)) {
+ up_read(&zram->init_lock);
+ return -EAGAIN;
+ }
+
if (algo) {
bool found = false;
@@ -1907,36 +2098,32 @@ static ssize_t recompress_store(struct device *dev,
goto release_init_lock;
}
+ ctl = init_pp_ctl();
+ if (!ctl) {
+ ret = -ENOMEM;
+ goto release_init_lock;
+ }
+
+ scan_slots_for_recompress(zram, mode, ctl);
+
ret = len;
- for (index = 0; index < nr_pages; index++) {
+ while ((pps = select_pp_slot(ctl))) {
int err = 0;
if (!num_recomp_pages)
break;
- zram_slot_lock(zram, index);
-
- if (!zram_allocated(zram, index))
- goto next;
-
- if (mode & RECOMPRESS_IDLE &&
- !zram_test_flag(zram, index, ZRAM_IDLE))
- goto next;
-
- if (mode & RECOMPRESS_HUGE &&
- !zram_test_flag(zram, index, ZRAM_HUGE))
- goto next;
-
- if (zram_test_flag(zram, index, ZRAM_WB) ||
- zram_test_flag(zram, index, ZRAM_UNDER_WB) ||
- zram_test_flag(zram, index, ZRAM_SAME) ||
- zram_test_flag(zram, index, ZRAM_INCOMPRESSIBLE))
+ zram_slot_lock(zram, pps->index);
+ if (!zram_test_flag(zram, pps->index, ZRAM_PP_SLOT))
goto next;
- err = zram_recompress(zram, index, page, &num_recomp_pages,
- threshold, prio, prio_max);
+ err = recompress_slot(zram, pps->index, page,
+ &num_recomp_pages, threshold,
+ prio, prio_max);
next:
- zram_slot_unlock(zram, index);
+ zram_slot_unlock(zram, pps->index);
+ release_pp_slot(zram, pps);
+
if (err) {
ret = err;
break;
@@ -1948,6 +2135,8 @@ next:
__free_page(page);
release_init_lock:
+ release_pp_ctl(zram, ctl);
+ atomic_set(&zram->pp_in_progress, 0);
up_read(&zram->init_lock);
return ret;
}
@@ -2105,7 +2294,7 @@ static void zram_destroy_comps(struct zram *zram)
{
u32 prio;
- for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
+ for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
struct zcomp *comp = zram->comps[prio];
zram->comps[prio] = NULL;
@@ -2144,6 +2333,7 @@ static void zram_reset_device(struct zram *zram)
zram->disksize = 0;
zram_destroy_comps(zram);
memset(&zram->stats, 0, sizeof(zram->stats));
+ atomic_set(&zram->pp_in_progress, 0);
reset_bdev(zram);
comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
@@ -2176,7 +2366,7 @@ static ssize_t disksize_store(struct device *dev,
goto out_unlock;
}
- for (prio = 0; prio < ZRAM_MAX_COMPS; prio++) {
+ for (prio = ZRAM_PRIMARY_COMP; prio < ZRAM_MAX_COMPS; prio++) {
if (!zram->comp_algs[prio])
continue;
@@ -2381,6 +2571,9 @@ static int zram_add(void)
zram->disk->fops = &zram_devops;
zram->disk->private_data = zram;
snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
+ atomic_set(&zram->pp_in_progress, 0);
+ zram_comp_params_reset(zram);
+ comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
/* Actual capacity set using sysfs (/sys/block/zram<id>/disksize */
set_capacity(zram->disk, 0);
@@ -2388,9 +2581,6 @@ static int zram_add(void)
if (ret)
goto out_cleanup_disk;
- zram_comp_params_reset(zram);
- comp_algorithm_set(zram, ZRAM_PRIMARY_COMP, default_compressor);
-
zram_debugfs_register(zram);
pr_info("Added device: %s\n", zram->disk->disk_name);
return device_id;
diff --git a/drivers/block/zram/zram_drv.h b/drivers/block/zram/zram_drv.h
index cfc8c059db63..134be414e210 100644
--- a/drivers/block/zram/zram_drv.h
+++ b/drivers/block/zram/zram_drv.h
@@ -47,7 +47,7 @@
enum zram_pageflags {
ZRAM_SAME = ZRAM_FLAG_SHIFT, /* Page consists the same element */
ZRAM_WB, /* page is stored on backing_device */
- ZRAM_UNDER_WB, /* page is under writeback */
+ ZRAM_PP_SLOT, /* Selected for post-processing */
ZRAM_HUGE, /* Incompressible page */
ZRAM_IDLE, /* not accessed page since last idle marking */
ZRAM_INCOMPRESSIBLE, /* none of the algorithms could compress it */
@@ -139,5 +139,6 @@ struct zram {
#ifdef CONFIG_ZRAM_MEMORY_TRACKING
struct dentry *debugfs_dir;
#endif
+ atomic_t pp_in_progress;
};
#endif