diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/filemap.c | 91 | ||||
-rw-r--r-- | mm/percpu.c | 18 | ||||
-rw-r--r-- | mm/truncate.c | 69 | ||||
-rw-r--r-- | mm/vmscan.c | 9 | ||||
-rw-r--r-- | mm/workingset.c | 4 |
5 files changed, 141 insertions, 50 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 30ab120b33db..bc943867d68c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -11,6 +11,7 @@ */ #include <linux/export.h> #include <linux/compiler.h> +#include <linux/dax.h> #include <linux/fs.h> #include <linux/uaccess.h> #include <linux/capability.h> @@ -123,9 +124,9 @@ static void page_cache_tree_delete(struct address_space *mapping, __radix_tree_lookup(&mapping->page_tree, page->index, &node, &slot); if (shadow) { - mapping->nrshadows++; + mapping->nrexceptional++; /* - * Make sure the nrshadows update is committed before + * Make sure the nrexceptional update is committed before * the nrpages update so that final truncate racing * with reclaim does not see both counters 0 at the * same time and miss a shadow entry. @@ -481,6 +482,12 @@ int filemap_write_and_wait_range(struct address_space *mapping, { int err = 0; + if (dax_mapping(mapping) && mapping->nrexceptional) { + err = dax_writeback_mapping_range(mapping, lstart, lend); + if (err) + return err; + } + if (mapping->nrpages) { err = __filemap_fdatawrite_range(mapping, lstart, lend, WB_SYNC_ALL); @@ -579,9 +586,13 @@ static int page_cache_tree_insert(struct address_space *mapping, p = radix_tree_deref_slot_protected(slot, &mapping->tree_lock); if (!radix_tree_exceptional_entry(p)) return -EEXIST; + + if (WARN_ON(dax_mapping(mapping))) + return -EINVAL; + if (shadowp) *shadowp = p; - mapping->nrshadows--; + mapping->nrexceptional--; if (node) workingset_node_shadows_dec(node); } @@ -1245,9 +1256,9 @@ repeat: if (radix_tree_deref_retry(page)) goto restart; /* - * A shadow entry of a recently evicted page, - * or a swap entry from shmem/tmpfs. Return - * it without attempting to raise page count. + * A shadow entry of a recently evicted page, a swap + * entry from shmem/tmpfs or a DAX entry. Return it + * without attempting to raise page count. */ goto export; } @@ -1494,6 +1505,74 @@ repeat: } EXPORT_SYMBOL(find_get_pages_tag); +/** + * find_get_entries_tag - find and return entries that match @tag + * @mapping: the address_space to search + * @start: the starting page cache index + * @tag: the tag index + * @nr_entries: the maximum number of entries + * @entries: where the resulting entries are placed + * @indices: the cache indices corresponding to the entries in @entries + * + * Like find_get_entries, except we only return entries which are tagged with + * @tag. + */ +unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start, + int tag, unsigned int nr_entries, + struct page **entries, pgoff_t *indices) +{ + void **slot; + unsigned int ret = 0; + struct radix_tree_iter iter; + + if (!nr_entries) + return 0; + + rcu_read_lock(); +restart: + radix_tree_for_each_tagged(slot, &mapping->page_tree, + &iter, start, tag) { + struct page *page; +repeat: + page = radix_tree_deref_slot(slot); + if (unlikely(!page)) + continue; + if (radix_tree_exception(page)) { + if (radix_tree_deref_retry(page)) { + /* + * Transient condition which can only trigger + * when entry at index 0 moves out of or back + * to root: none yet gotten, safe to restart. + */ + goto restart; + } + + /* + * A shadow entry of a recently evicted page, a swap + * entry from shmem/tmpfs or a DAX entry. Return it + * without attempting to raise page count. + */ + goto export; + } + if (!page_cache_get_speculative(page)) + goto repeat; + + /* Has the page moved? */ + if (unlikely(page != *slot)) { + page_cache_release(page); + goto repeat; + } +export: + indices[ret] = iter.index; + entries[ret] = page; + if (++ret == nr_entries) + break; + } + rcu_read_unlock(); + return ret; +} +EXPORT_SYMBOL(find_get_entries_tag); + /* * CD/DVDs are error prone. When a medium error occurs, the driver may fail * a _large_ part of the i/o request. Imagine the worst scenario: diff --git a/mm/percpu.c b/mm/percpu.c index 8a943b97a053..998607adf6eb 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -305,16 +305,12 @@ static void *pcpu_mem_zalloc(size_t size) /** * pcpu_mem_free - free memory * @ptr: memory to free - * @size: size of the area * * Free @ptr. @ptr should have been allocated using pcpu_mem_zalloc(). */ -static void pcpu_mem_free(void *ptr, size_t size) +static void pcpu_mem_free(void *ptr) { - if (size <= PAGE_SIZE) - kfree(ptr); - else - vfree(ptr); + kvfree(ptr); } /** @@ -463,8 +459,8 @@ out_unlock: * pcpu_mem_free() might end up calling vfree() which uses * IRQ-unsafe lock and thus can't be called under pcpu_lock. */ - pcpu_mem_free(old, old_size); - pcpu_mem_free(new, new_size); + pcpu_mem_free(old); + pcpu_mem_free(new); return 0; } @@ -732,7 +728,7 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) chunk->map = pcpu_mem_zalloc(PCPU_DFL_MAP_ALLOC * sizeof(chunk->map[0])); if (!chunk->map) { - pcpu_mem_free(chunk, pcpu_chunk_struct_size); + pcpu_mem_free(chunk); return NULL; } @@ -753,8 +749,8 @@ static void pcpu_free_chunk(struct pcpu_chunk *chunk) { if (!chunk) return; - pcpu_mem_free(chunk->map, chunk->map_alloc * sizeof(chunk->map[0])); - pcpu_mem_free(chunk, pcpu_chunk_struct_size); + pcpu_mem_free(chunk->map); + pcpu_mem_free(chunk); } /** diff --git a/mm/truncate.c b/mm/truncate.c index 76e35ad97102..e3ee0e27cd17 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -9,6 +9,7 @@ #include <linux/kernel.h> #include <linux/backing-dev.h> +#include <linux/dax.h> #include <linux/gfp.h> #include <linux/mm.h> #include <linux/swap.h> @@ -34,31 +35,39 @@ static void clear_exceptional_entry(struct address_space *mapping, return; spin_lock_irq(&mapping->tree_lock); - /* - * Regular page slots are stabilized by the page lock even - * without the tree itself locked. These unlocked entries - * need verification under the tree lock. - */ - if (!__radix_tree_lookup(&mapping->page_tree, index, &node, &slot)) - goto unlock; - if (*slot != entry) - goto unlock; - radix_tree_replace_slot(slot, NULL); - mapping->nrshadows--; - if (!node) - goto unlock; - workingset_node_shadows_dec(node); - /* - * Don't track node without shadow entries. - * - * Avoid acquiring the list_lru lock if already untracked. - * The list_empty() test is safe as node->private_list is - * protected by mapping->tree_lock. - */ - if (!workingset_node_shadows(node) && - !list_empty(&node->private_list)) - list_lru_del(&workingset_shadow_nodes, &node->private_list); - __radix_tree_delete_node(&mapping->page_tree, node); + + if (dax_mapping(mapping)) { + if (radix_tree_delete_item(&mapping->page_tree, index, entry)) + mapping->nrexceptional--; + } else { + /* + * Regular page slots are stabilized by the page lock even + * without the tree itself locked. These unlocked entries + * need verification under the tree lock. + */ + if (!__radix_tree_lookup(&mapping->page_tree, index, &node, + &slot)) + goto unlock; + if (*slot != entry) + goto unlock; + radix_tree_replace_slot(slot, NULL); + mapping->nrexceptional--; + if (!node) + goto unlock; + workingset_node_shadows_dec(node); + /* + * Don't track node without shadow entries. + * + * Avoid acquiring the list_lru lock if already untracked. + * The list_empty() test is safe as node->private_list is + * protected by mapping->tree_lock. + */ + if (!workingset_node_shadows(node) && + !list_empty(&node->private_list)) + list_lru_del(&workingset_shadow_nodes, + &node->private_list); + __radix_tree_delete_node(&mapping->page_tree, node); + } unlock: spin_unlock_irq(&mapping->tree_lock); } @@ -228,7 +237,7 @@ void truncate_inode_pages_range(struct address_space *mapping, int i; cleancache_invalidate_inode(mapping); - if (mapping->nrpages == 0 && mapping->nrshadows == 0) + if (mapping->nrpages == 0 && mapping->nrexceptional == 0) return; /* Offsets within partial pages */ @@ -402,7 +411,7 @@ EXPORT_SYMBOL(truncate_inode_pages); */ void truncate_inode_pages_final(struct address_space *mapping) { - unsigned long nrshadows; + unsigned long nrexceptional; unsigned long nrpages; /* @@ -416,14 +425,14 @@ void truncate_inode_pages_final(struct address_space *mapping) /* * When reclaim installs eviction entries, it increases - * nrshadows first, then decreases nrpages. Make sure we see + * nrexceptional first, then decreases nrpages. Make sure we see * this in the right order or we might miss an entry. */ nrpages = mapping->nrpages; smp_rmb(); - nrshadows = mapping->nrshadows; + nrexceptional = mapping->nrexceptional; - if (nrpages || nrshadows) { + if (nrpages || nrexceptional) { /* * As truncation uses a lockless tree lookup, cycle * the tree lock to make sure any ongoing tree diff --git a/mm/vmscan.c b/mm/vmscan.c index bd620b65db52..eb3dd37ccd7c 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -46,6 +46,7 @@ #include <linux/oom.h> #include <linux/prefetch.h> #include <linux/printk.h> +#include <linux/dax.h> #include <asm/tlbflush.h> #include <asm/div64.h> @@ -671,9 +672,15 @@ static int __remove_mapping(struct address_space *mapping, struct page *page, * inode reclaim needs to empty out the radix tree or * the nodes are lost. Don't plant shadows behind its * back. + * + * We also don't store shadows for DAX mappings because the + * only page cache pages found in these are zero pages + * covering holes, and because we don't want to mix DAX + * exceptional entries and shadow exceptional entries in the + * same page_tree. */ if (reclaimed && page_is_file_cache(page) && - !mapping_exiting(mapping)) + !mapping_exiting(mapping) && !dax_mapping(mapping)) shadow = workingset_eviction(mapping, page); __delete_from_page_cache(page, shadow, memcg); spin_unlock_irqrestore(&mapping->tree_lock, flags); diff --git a/mm/workingset.c b/mm/workingset.c index aa017133744b..61ead9e5549d 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -351,8 +351,8 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, node->slots[i] = NULL; BUG_ON(node->count < (1U << RADIX_TREE_COUNT_SHIFT)); node->count -= 1U << RADIX_TREE_COUNT_SHIFT; - BUG_ON(!mapping->nrshadows); - mapping->nrshadows--; + BUG_ON(!mapping->nrexceptional); + mapping->nrexceptional--; } } BUG_ON(node->count); |