diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-24 16:20:38 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-02-24 16:20:38 -0800 |
commit | 4c48faba5b7f18fb53e4aeeb768932f17c9da1ed (patch) | |
tree | 9d9d2ca881a670b5df55a663ee506f212c9514c3 /include | |
parent | 062c84fccc4444805738d76a2699c4d3c95184ec (diff) | |
parent | a553e3cd2053501b658feec2be9a3b662eb1b22b (diff) |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
"A few small subsystems and some of MM.
172 patches.
Subsystems affected by this patch series: hexagon, scripts, ntfs,
ocfs2, vfs, and mm (slab-generic, slab, slub, debug, pagecache, swap,
memcg, pagemap, mprotect, mremap, page-reporting, vmalloc, kasan,
pagealloc, memory-failure, hugetlb, vmscan, z3fold, compaction,
mempolicy, oom-kill, hugetlbfs, and migration)"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (172 commits)
mm/migrate: remove unneeded semicolons
hugetlbfs: remove unneeded return value of hugetlb_vmtruncate()
hugetlbfs: fix some comment typos
hugetlbfs: correct some obsolete comments about inode i_mutex
hugetlbfs: make hugepage size conversion more readable
hugetlbfs: remove meaningless variable avoid_reserve
hugetlbfs: correct obsolete function name in hugetlbfs_read_iter()
hugetlbfs: use helper macro default_hstate in init_hugetlbfs_fs
hugetlbfs: remove useless BUG_ON(!inode) in hugetlbfs_setattr()
hugetlbfs: remove special hugetlbfs_set_page_dirty()
mm/hugetlb: change hugetlb_reserve_pages() to type bool
mm, oom: fix a comment in dump_task()
mm/mempolicy: use helper range_in_vma() in queue_pages_test_walk()
numa balancing: migrate on fault among multiple bound nodes
mm, compaction: make fast_isolate_freepages() stay within zone
mm/compaction: fix misbehaviors of fast_find_migrateblock()
mm/compaction: correct deferral logic for proactive compaction
mm/compaction: remove duplicated VM_BUG_ON_PAGE !PageLocked
mm/compaction: remove rcu_read_lock during page compaction
z3fold: simplify the zhdr initialization code in init_z3fold_page()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/fs.h | 4 | ||||
-rw-r--r-- | include/linux/gfp.h | 14 | ||||
-rw-r--r-- | include/linux/highmem-internal.h | 5 | ||||
-rw-r--r-- | include/linux/huge_mm.h | 15 | ||||
-rw-r--r-- | include/linux/hugetlb.h | 98 | ||||
-rw-r--r-- | include/linux/kasan-checks.h | 6 | ||||
-rw-r--r-- | include/linux/kasan.h | 37 | ||||
-rw-r--r-- | include/linux/memcontrol.h | 43 | ||||
-rw-r--r-- | include/linux/migrate.h | 2 | ||||
-rw-r--r-- | include/linux/mm.h | 22 | ||||
-rw-r--r-- | include/linux/mm_inline.h | 113 | ||||
-rw-r--r-- | include/linux/mmzone.h | 22 | ||||
-rw-r--r-- | include/linux/page-flags.h | 6 | ||||
-rw-r--r-- | include/linux/page_counter.h | 9 | ||||
-rw-r--r-- | include/linux/pagemap.h | 5 | ||||
-rw-r--r-- | include/linux/swap.h | 8 | ||||
-rw-r--r-- | include/trace/events/kmem.h | 24 | ||||
-rw-r--r-- | include/trace/events/pagemap.h | 11 | ||||
-rw-r--r-- | include/uapi/linux/mempolicy.h | 4 |
19 files changed, 276 insertions, 172 deletions
diff --git a/include/linux/fs.h b/include/linux/fs.h index 418b772d6eca..ec8f3ddf4a6a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3080,8 +3080,8 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *); extern int generic_write_check_limits(struct file *file, loff_t pos, loff_t *count); extern int generic_file_rw_checks(struct file *file_in, struct file *file_out); -extern ssize_t generic_file_buffered_read(struct kiocb *iocb, - struct iov_iter *to, ssize_t already_read); +ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to, + ssize_t already_read); extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *); extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *); extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *); diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 80544d5c08e7..220cd553a9e7 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -8,6 +8,20 @@ #include <linux/linkage.h> #include <linux/topology.h> +/* The typedef is in types.h but we want the documentation here */ +#if 0 +/** + * typedef gfp_t - Memory allocation flags. + * + * GFP flags are commonly used throughout Linux to indicate how memory + * should be allocated. The GFP acronym stands for get_free_pages(), + * the underlying memory allocation function. Not every GFP flag is + * supported by every function which may allocate memory. Most users + * will want to use a plain ``GFP_KERNEL``. + */ +typedef unsigned int __bitwise gfp_t; +#endif + struct vm_area_struct; /* diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h index 1bbe96dc8be6..7902c7d8b55f 100644 --- a/include/linux/highmem-internal.h +++ b/include/linux/highmem-internal.h @@ -127,11 +127,6 @@ static inline unsigned long totalhigh_pages(void) return (unsigned long)atomic_long_read(&_totalhigh_pages); } -static inline void totalhigh_pages_inc(void) -{ - atomic_long_inc(&_totalhigh_pages); -} - static inline void totalhigh_pages_add(long count) { atomic_long_add(count, &_totalhigh_pages); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 6a19f35f836b..ba973efcd369 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -78,6 +78,7 @@ static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn, } enum transparent_hugepage_flag { + TRANSPARENT_HUGEPAGE_NEVER_DAX, TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, @@ -123,6 +124,13 @@ extern unsigned long transparent_hugepage_flags; */ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) { + + /* + * If the hardware/firmware marked hugepage support disabled. + */ + if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX)) + return false; + if (vma->vm_flags & VM_NOHUGEPAGE) return false; @@ -134,12 +142,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma) if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG)) return true; - /* - * For dax vmas, try to always use hugepage mappings. If the kernel does - * not support hugepages, fsdax mappings will fallback to PAGE_SIZE - * mappings, and device-dax namespaces, that try to guarantee a given - * mapping size, will fail to enable - */ + if (vma_is_dax(vma)) return true; diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index b5807f23caf8..cccd1aab69dd 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -37,7 +37,7 @@ struct hugepage_subpool { struct hstate *hstate; long min_hpages; /* Minimum huge pages or -1 if no minimum. */ long rsv_hpages; /* Pages reserved against global pool to */ - /* sasitfy minimum size. */ + /* satisfy minimum size. */ }; struct resv_map { @@ -139,7 +139,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte, unsigned long dst_addr, unsigned long src_addr, struct page **pagep); -int hugetlb_reserve_pages(struct inode *inode, long from, long to, +bool hugetlb_reserve_pages(struct inode *inode, long from, long to, struct vm_area_struct *vma, vm_flags_t vm_flags); long hugetlb_unreserve_pages(struct inode *inode, long start, long end, @@ -472,6 +472,84 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr, unsigned long flags); #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */ +/* + * huegtlb page specific state flags. These flags are located in page.private + * of the hugetlb head page. Functions created via the below macros should be + * used to manipulate these flags. + * + * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at + * allocation time. Cleared when page is fully instantiated. Free + * routine checks flag to restore a reservation on error paths. + * Synchronization: Examined or modified by code that knows it has + * the only reference to page. i.e. After allocation but before use + * or when the page is being freed. + * HPG_migratable - Set after a newly allocated page is added to the page + * cache and/or page tables. Indicates the page is a candidate for + * migration. + * Synchronization: Initially set after new page allocation with no + * locking. When examined and modified during migration processing + * (isolate, migrate, putback) the hugetlb_lock is held. + * HPG_temporary - - Set on a page that is temporarily allocated from the buddy + * allocator. Typically used for migration target pages when no pages + * are available in the pool. The hugetlb free page path will + * immediately free pages with this flag set to the buddy allocator. + * Synchronization: Can be set after huge page allocation from buddy when + * code knows it has only reference. All other examinations and + * modifications require hugetlb_lock. + * HPG_freed - Set when page is on the free lists. + * Synchronization: hugetlb_lock held for examination and modification. + */ +enum hugetlb_page_flags { + HPG_restore_reserve = 0, + HPG_migratable, + HPG_temporary, + HPG_freed, + __NR_HPAGEFLAGS, +}; + +/* + * Macros to create test, set and clear function definitions for + * hugetlb specific page flags. + */ +#ifdef CONFIG_HUGETLB_PAGE +#define TESTHPAGEFLAG(uname, flname) \ +static inline int HPage##uname(struct page *page) \ + { return test_bit(HPG_##flname, &(page->private)); } + +#define SETHPAGEFLAG(uname, flname) \ +static inline void SetHPage##uname(struct page *page) \ + { set_bit(HPG_##flname, &(page->private)); } + +#define CLEARHPAGEFLAG(uname, flname) \ +static inline void ClearHPage##uname(struct page *page) \ + { clear_bit(HPG_##flname, &(page->private)); } +#else +#define TESTHPAGEFLAG(uname, flname) \ +static inline int HPage##uname(struct page *page) \ + { return 0; } + +#define SETHPAGEFLAG(uname, flname) \ +static inline void SetHPage##uname(struct page *page) \ + { } + +#define CLEARHPAGEFLAG(uname, flname) \ +static inline void ClearHPage##uname(struct page *page) \ + { } +#endif + +#define HPAGEFLAG(uname, flname) \ + TESTHPAGEFLAG(uname, flname) \ + SETHPAGEFLAG(uname, flname) \ + CLEARHPAGEFLAG(uname, flname) \ + +/* + * Create functions associated with hugetlb page flags + */ +HPAGEFLAG(RestoreReserve, restore_reserve) +HPAGEFLAG(Migratable, migratable) +HPAGEFLAG(Temporary, temporary) +HPAGEFLAG(Freed, freed) + #ifdef CONFIG_HUGETLB_PAGE #define HSTATE_NAME_LEN 32 @@ -531,6 +609,20 @@ extern unsigned int default_hstate_idx; #define default_hstate (hstates[default_hstate_idx]) +/* + * hugetlb page subpool pointer located in hpage[1].private + */ +static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage) +{ + return (struct hugepage_subpool *)(hpage+1)->private; +} + +static inline void hugetlb_set_page_subpool(struct page *hpage, + struct hugepage_subpool *subpool) +{ + set_page_private(hpage+1, (unsigned long)subpool); +} + static inline struct hstate *hstate_file(struct file *f) { return hstate_inode(file_inode(f)); @@ -770,8 +862,6 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma, } #endif -void set_page_huge_active(struct page *page); - #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; diff --git a/include/linux/kasan-checks.h b/include/linux/kasan-checks.h index ca5e89fb10d3..3d6d22a25bdc 100644 --- a/include/linux/kasan-checks.h +++ b/include/linux/kasan-checks.h @@ -5,6 +5,12 @@ #include <linux/types.h> /* + * The annotations present in this file are only relevant for the software + * KASAN modes that rely on compiler instrumentation, and will be optimized + * away for the hardware tag-based KASAN mode. Use kasan_check_byte() instead. + */ + +/* * __kasan_check_*: Always available when KASAN is enabled. This may be used * even in compilation units that selectively disable KASAN, but must use KASAN * to validate access to an address. Never use these in header files! diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 0aea9e2a2a01..7eaf2d9effb4 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -185,19 +185,18 @@ static __always_inline void * __must_check kasan_init_slab_obj( } bool __kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip); -static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object, - unsigned long ip) +static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object) { if (kasan_enabled()) - return __kasan_slab_free(s, object, ip); + return __kasan_slab_free(s, object, _RET_IP_); return false; } void __kasan_slab_free_mempool(void *ptr, unsigned long ip); -static __always_inline void kasan_slab_free_mempool(void *ptr, unsigned long ip) +static __always_inline void kasan_slab_free_mempool(void *ptr) { if (kasan_enabled()) - __kasan_slab_free_mempool(ptr, ip); + __kasan_slab_free_mempool(ptr, _RET_IP_); } void * __must_check __kasan_slab_alloc(struct kmem_cache *s, @@ -241,12 +240,25 @@ static __always_inline void * __must_check kasan_krealloc(const void *object, } void __kasan_kfree_large(void *ptr, unsigned long ip); -static __always_inline void kasan_kfree_large(void *ptr, unsigned long ip) +static __always_inline void kasan_kfree_large(void *ptr) { if (kasan_enabled()) - __kasan_kfree_large(ptr, ip); + __kasan_kfree_large(ptr, _RET_IP_); } +/* + * Unlike kasan_check_read/write(), kasan_check_byte() is performed even for + * the hardware tag-based mode that doesn't rely on compiler instrumentation. + */ +bool __kasan_check_byte(const void *addr, unsigned long ip); +static __always_inline bool kasan_check_byte(const void *addr) +{ + if (kasan_enabled()) + return __kasan_check_byte(addr, _RET_IP_); + return true; +} + + bool kasan_save_enable_multi_shot(void); void kasan_restore_multi_shot(bool enabled); @@ -277,12 +289,11 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache, { return (void *)object; } -static inline bool kasan_slab_free(struct kmem_cache *s, void *object, - unsigned long ip) +static inline bool kasan_slab_free(struct kmem_cache *s, void *object) { return false; } -static inline void kasan_slab_free_mempool(void *ptr, unsigned long ip) {} +static inline void kasan_slab_free_mempool(void *ptr) {} static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object, gfp_t flags) { @@ -302,7 +313,11 @@ static inline void *kasan_krealloc(const void *object, size_t new_size, { return (void *)object; } -static inline void kasan_kfree_large(void *ptr, unsigned long ip) {} +static inline void kasan_kfree_large(void *ptr) {} +static inline bool kasan_check_byte(const void *address) +{ + return true; +} #endif /* CONFIG_KASAN */ diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index eeb0b52203e9..e6dc793d587d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -92,6 +92,10 @@ struct lruvec_stat { long count[NR_VM_NODE_STAT_ITEMS]; }; +struct batched_lruvec_stat { + s32 count[NR_VM_NODE_STAT_ITEMS]; +}; + /* * Bitmap of shrinker::id corresponding to memcg-aware shrinkers, * which have elements charged to this memcg. @@ -107,11 +111,17 @@ struct memcg_shrinker_map { struct mem_cgroup_per_node { struct lruvec lruvec; - /* Legacy local VM stats */ + /* + * Legacy local VM stats. This should be struct lruvec_stat and + * cannot be optimized to struct batched_lruvec_stat. Because + * the threshold of the lruvec_stat_cpu can be as big as + * MEMCG_CHARGE_BATCH * PAGE_SIZE. It can fit into s32. But this + * filed has no upper limit. + */ struct lruvec_stat __percpu *lruvec_stat_local; /* Subtree VM stats (batched updates) */ - struct lruvec_stat __percpu *lruvec_stat_cpu; + struct batched_lruvec_stat __percpu *lruvec_stat_cpu; atomic_long_t lruvec_stat[NR_VM_NODE_STAT_ITEMS]; unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; @@ -475,19 +485,6 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK); } -/* - * set_page_objcgs - associate a page with a object cgroups vector - * @page: a pointer to the page struct - * @objcgs: a pointer to the object cgroups vector - * - * Atomically associates a page with a vector of object cgroups. - */ -static inline bool set_page_objcgs(struct page *page, - struct obj_cgroup **objcgs) -{ - return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs | - MEMCG_DATA_OBJCGS); -} #else static inline struct obj_cgroup **page_objcgs(struct page *page) { @@ -498,12 +495,6 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page) { return NULL; } - -static inline bool set_page_objcgs(struct page *page, - struct obj_cgroup **objcgs) -{ - return true; -} #endif static __always_inline bool memcg_stat_item_in_bytes(int idx) @@ -689,8 +680,6 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm); -struct mem_cgroup *get_mem_cgroup_from_page(struct page *page); - struct lruvec *lock_page_lruvec(struct page *page); struct lruvec *lock_page_lruvec_irq(struct page *page); struct lruvec *lock_page_lruvec_irqsave(struct page *page, @@ -1200,11 +1189,6 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) return NULL; } -static inline struct mem_cgroup *get_mem_cgroup_from_page(struct page *page) -{ - return NULL; -} - static inline void mem_cgroup_put(struct mem_cgroup *memcg) { } @@ -1601,9 +1585,6 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg, #endif #ifdef CONFIG_MEMCG_KMEM -int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp, - unsigned int nr_pages); -void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages); int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge_page(struct page *page, int order); diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 4594838a0f7c..3a389633b68f 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -89,7 +89,7 @@ extern int PageMovable(struct page *page); extern void __SetPageMovable(struct page *page, struct address_space *mapping); extern void __ClearPageMovable(struct page *page); #else -static inline int PageMovable(struct page *page) { return 0; }; +static inline int PageMovable(struct page *page) { return 0; } static inline void __SetPageMovable(struct page *page, struct address_space *mapping) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 7deb7168104d..77e64e3eac80 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1187,6 +1187,9 @@ static inline void get_page(struct page *page) } bool __must_check try_grab_page(struct page *page, unsigned int flags); +__maybe_unused struct page *try_grab_compound_head(struct page *page, int refs, + unsigned int flags); + static inline __must_check bool try_get_page(struct page *page) { @@ -2310,32 +2313,20 @@ extern void free_initmem(void); extern unsigned long free_reserved_area(void *start, void *end, int poison, const char *s); -#ifdef CONFIG_HIGHMEM -/* - * Free a highmem page into the buddy system, adjusting totalhigh_pages - * and totalram_pages. - */ -extern void free_highmem_page(struct page *page); -#endif - extern void adjust_managed_page_count(struct page *page, long count); extern void mem_init_print_info(const char *str); extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end); /* Free the reserved page into the buddy system, so it gets managed. */ -static inline void __free_reserved_page(struct page *page) +static inline void free_reserved_page(struct page *page) { ClearPageReserved(page); init_page_count(page); __free_page(page); -} - -static inline void free_reserved_page(struct page *page) -{ - __free_reserved_page(page); adjust_managed_page_count(page, 1); } +#define free_highmem_page(page) free_reserved_page(page) static inline void mark_page_reserved(struct page *page) { @@ -2405,9 +2396,10 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn); #endif extern void set_dma_reserve(unsigned long new_dma_reserve); -extern void memmap_init_zone(unsigned long, int, unsigned long, +extern void memmap_init_range(unsigned long, int, unsigned long, unsigned long, unsigned long, enum meminit_context, struct vmem_altmap *, int migratetype); +extern void memmap_init_zone(struct zone *zone); extern void setup_per_zone_wmarks(void); extern int __meminit init_per_zone_wmark_min(void); extern void mem_init(void); diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 8fc71e9d7bb0..355ea1ee32bd 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -24,7 +24,7 @@ static inline int page_is_file_lru(struct page *page) return !PageSwapBacked(page); } -static __always_inline void __update_lru_size(struct lruvec *lruvec, +static __always_inline void update_lru_size(struct lruvec *lruvec, enum lru_list lru, enum zone_type zid, int nr_pages) { @@ -33,76 +33,27 @@ static __always_inline void __update_lru_size(struct lruvec *lruvec, __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages); __mod_zone_page_state(&pgdat->node_zones[zid], NR_ZONE_LRU_BASE + lru, nr_pages); -} - -static __always_inline void update_lru_size(struct lruvec *lruvec, - enum lru_list lru, enum zone_type zid, - int nr_pages) -{ - __update_lru_size(lruvec, lru, zid, nr_pages); #ifdef CONFIG_MEMCG mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages); #endif } -static __always_inline void add_page_to_lru_list(struct page *page, - struct lruvec *lruvec, enum lru_list lru) -{ - update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); - list_add(&page->lru, &lruvec->lists[lru]); -} - -static __always_inline void add_page_to_lru_list_tail(struct page *page, - struct lruvec *lruvec, enum lru_list lru) -{ - update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); - list_add_tail(&page->lru, &lruvec->lists[lru]); -} - -static __always_inline void del_page_from_lru_list(struct page *page, - struct lruvec *lruvec, enum lru_list lru) -{ - list_del(&page->lru); - update_lru_size(lruvec, lru, page_zonenum(page), -thp_nr_pages(page)); -} - /** - * page_lru_base_type - which LRU list type should a page be on? - * @page: the page to test - * - * Used for LRU list index arithmetic. - * - * Returns the base LRU type - file or anon - @page should be on. + * __clear_page_lru_flags - clear page lru flags before releasing a page + * @page: the page that was on lru and now has a zero reference */ -static inline enum lru_list page_lru_base_type(struct page *page) +static __always_inline void __clear_page_lru_flags(struct page *page) { - if (page_is_file_lru(page)) - return LRU_INACTIVE_FILE; - return LRU_INACTIVE_ANON; -} + VM_BUG_ON_PAGE(!PageLRU(page), page); -/** - * page_off_lru - which LRU list was page on? clearing its lru flags. - * @page: the page to test - * - * Returns the LRU list a page was on, as an index into the array of LRU - * lists; and clears its Unevictable or Active flags, ready for freeing. - */ -static __always_inline enum lru_list page_off_lru(struct page *page) -{ - enum lru_list lru; + __ClearPageLRU(page); - if (PageUnevictable(page)) { - __ClearPageUnevictable(page); - lru = LRU_UNEVICTABLE; - } else { - lru = page_lru_base_type(page); - if (PageActive(page)) { - __ClearPageActive(page); - lru += LRU_ACTIVE; - } - } - return lru; + /* this shouldn't happen, so leave the flags to bad_page() */ + if (PageActive(page) && PageUnevictable(page)) + return; + + __ClearPageActive(page); + __ClearPageUnevictable(page); } /** @@ -116,13 +67,41 @@ static __always_inline enum lru_list page_lru(struct page *page) { enum lru_list lru; + VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page); + if (PageUnevictable(page)) - lru = LRU_UNEVICTABLE; - else { - lru = page_lru_base_type(page); - if (PageActive(page)) - lru += LRU_ACTIVE; - } + return LRU_UNEVICTABLE; + + lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON; + if (PageActive(page)) + lru += LRU_ACTIVE; + return lru; } + +static __always_inline void add_page_to_lru_list(struct page *page, + struct lruvec *lruvec) +{ + enum lru_list lru = page_lru(page); + + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); + list_add(&page->lru, &lruvec->lists[lru]); +} + +static __always_inline void add_page_to_lru_list_tail(struct page *page, + struct lruvec *lruvec) +{ + enum lru_list lru = page_lru(page); + + update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page)); + list_add_tail(&page->lru, &lruvec->lists[lru]); +} + +static __always_inline void del_page_from_lru_list(struct page *page, + struct lruvec *lruvec) +{ + list_del(&page->lru); + update_lru_size(lruvec, page_lru(page), page_zonenum(page), + -thp_nr_pages(page)); +} #endif diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b593316bff3d..9198b7ade85f 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -206,10 +206,30 @@ enum node_stat_item { NR_KERNEL_SCS_KB, /* measured in KiB */ #endif NR_PAGETABLE, /* used for pagetables */ +#ifdef CONFIG_SWAP + NR_SWAPCACHE, +#endif NR_VM_NODE_STAT_ITEMS }; /* + * Returns true if the item should be printed in THPs (/proc/vmstat + * currently prints number of anon, file and shmem THPs. But the item + * is charged in pages). + */ +static __always_inline bool vmstat_item_print_in_thp(enum node_stat_item item) +{ + if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE)) + return false; + + return item == NR_ANON_THPS || + item == NR_FILE_THPS || + item == NR_SHMEM_THPS || + item == NR_SHMEM_PMDMAPPED || + item == NR_FILE_PMDMAPPED; +} + +/* * Returns true if the value is measured in bytes (most vmstat values are * measured in pages). This defines the API part, the internal representation * might be different. @@ -872,8 +892,6 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec) #endif } -extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx); - #ifdef CONFIG_HAVE_MEMORYLESS_NODES int local_memory_node(int node_id); #else diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index ec5d0290e0ee..db914477057b 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -592,15 +592,9 @@ static inline void ClearPageCompound(struct page *page) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); int PageHeadHuge(struct page *page); -bool page_huge_active(struct page *page); #else TESTPAGEFLAG_FALSE(Huge) TESTPAGEFLAG_FALSE(HeadHuge) - -static inline bool page_huge_active(struct page *page) -{ - return 0; -} #endif diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 85bd413e784e..679591301994 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -12,7 +12,6 @@ struct page_counter { unsigned long low; unsigned long high; unsigned long max; - struct page_counter *parent; /* effective memory.min and memory.min usage tracking */ unsigned long emin; @@ -27,6 +26,14 @@ struct page_counter { /* legacy */ unsigned long watermark; unsigned long failcnt; + + /* + * 'parent' is placed here to be far from 'usage' to reduce + * cache false sharing, as 'usage' is written mostly while + * parent is frequently read for cgroup's hierarchical + * counting nature. + */ + struct page_counter *parent; }; #if BITS_PER_LONG == 32 diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index d5570deff400..bd629d676a27 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -681,8 +681,7 @@ static inline int wait_on_page_locked_killable(struct page *page) return wait_on_page_bit_killable(compound_head(page), PG_locked); } -extern void put_and_wait_on_page_locked(struct page *page); - +int put_and_wait_on_page_locked(struct page *page, int state); void wait_on_page_writeback(struct page *page); extern void end_page_writeback(struct page *page); void wait_for_stable_page(struct page *page); @@ -757,7 +756,7 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, pgoff_t index, gfp_t gfp_mask); extern void delete_from_page_cache(struct page *page); extern void __delete_from_page_cache(struct page *page, void *shadow); -int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); +void replace_page_cache_page(struct page *old, struct page *new); void delete_from_page_cache_batch(struct address_space *mapping, struct pagevec *pvec); diff --git a/include/linux/swap.h b/include/linux/swap.h index 3f1f7ae0fbe9..32f665b1ee85 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -356,7 +356,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page, extern unsigned long zone_reclaimable_pages(struct zone *zone); extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order, gfp_t gfp_mask, nodemask_t *mask); -extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode); +extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode); extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg, unsigned long nr_pages, gfp_t gfp_mask, @@ -408,7 +408,11 @@ extern struct address_space *swapper_spaces[]; #define swap_address_space(entry) \ (&swapper_spaces[swp_type(entry)][swp_offset(entry) \ >> SWAP_ADDRESS_SPACE_SHIFT]) -extern unsigned long total_swapcache_pages(void); +static inline unsigned long total_swapcache_pages(void) +{ + return global_node_page_state(NR_SWAPCACHE); +} + extern void show_swap_cache_info(void); extern int add_to_swap(struct page *page); extern void *get_shadow_from_swap_cache(swp_entry_t entry); diff --git a/include/trace/events/kmem.h b/include/trace/events/kmem.h index f65b1f6db22d..40845b0d5dad 100644 --- a/include/trace/events/kmem.h +++ b/include/trace/events/kmem.h @@ -115,7 +115,7 @@ DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node, TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node) ); -DECLARE_EVENT_CLASS(kmem_free, +TRACE_EVENT(kfree, TP_PROTO(unsigned long call_site, const void *ptr), @@ -135,18 +135,26 @@ DECLARE_EVENT_CLASS(kmem_free, (void *)__entry->call_site, __entry->ptr) ); -DEFINE_EVENT(kmem_free, kfree, +TRACE_EVENT(kmem_cache_free, - TP_PROTO(unsigned long call_site, const void *ptr), + TP_PROTO(unsigned long call_site, const void *ptr, const char *name), - TP_ARGS(call_site, ptr) -); + TP_ARGS(call_site, ptr, name), -DEFINE_EVENT(kmem_free, kmem_cache_free, + TP_STRUCT__entry( + __field( unsigned long, call_site ) + __field( const void *, ptr ) + __field( const char *, name ) + ), - TP_PROTO(unsigned long call_site, const void *ptr), + TP_fast_assign( + __entry->call_site = call_site; + __entry->ptr = ptr; + __entry->name = name; + ), - TP_ARGS(call_site, ptr) + TP_printk("call_site=%pS ptr=%p name=%s", + (void *)__entry->call_site, __entry->ptr, __entry->name) ); TRACE_EVENT(mm_page_free, diff --git a/include/trace/events/pagemap.h b/include/trace/events/pagemap.h index 8fd1babae761..e1735fe7c76a 100644 --- a/include/trace/events/pagemap.h +++ b/include/trace/events/pagemap.h @@ -27,24 +27,21 @@ TRACE_EVENT(mm_lru_insertion, - TP_PROTO( - struct page *page, - int lru - ), + TP_PROTO(struct page *page), - TP_ARGS(page, lru), + TP_ARGS(page), TP_STRUCT__entry( __field(struct page *, page ) __field(unsigned long, pfn ) - __field(int, lru ) + __field(enum lru_list, lru ) __field(unsigned long, flags ) ), TP_fast_assign( __entry->page = page; __entry->pfn = page_to_pfn(page); - __entry->lru = lru; + __entry->lru = page_lru(page); __entry->flags = trace_pagemap_flags(page); ), diff --git a/include/uapi/linux/mempolicy.h b/include/uapi/linux/mempolicy.h index 3354774af61e..8948467b3992 100644 --- a/include/uapi/linux/mempolicy.h +++ b/include/uapi/linux/mempolicy.h @@ -28,12 +28,14 @@ enum { /* Flags for set_mempolicy */ #define MPOL_F_STATIC_NODES (1 << 15) #define MPOL_F_RELATIVE_NODES (1 << 14) +#define MPOL_F_NUMA_BALANCING (1 << 13) /* Optimize with NUMA balancing if possible */ /* * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to * either set_mempolicy() or mbind(). */ -#define MPOL_MODE_FLAGS (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES) +#define MPOL_MODE_FLAGS \ + (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES | MPOL_F_NUMA_BALANCING) /* Flags for get_mempolicy */ #define MPOL_F_NODE (1<<0) /* return next IL mode instead of node mask */ |