diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 19:25:39 -0700 | 
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-31 19:25:39 -0700 | 
| commit | ac694dbdbc403c00e2c14d10bc7b8412cc378259 (patch) | |
| tree | e37328cfbeaf43716dd5914cad9179e57e84df76 /include | |
| parent | a40a1d3d0a2fd613fdec6d89d3c053268ced76ed (diff) | |
| parent | 437ea90cc3afdca5229b41c6b1d38c4842756cb9 (diff) | |
Merge branch 'akpm' (Andrew's patch-bomb)
Merge Andrew's second set of patches:
 - MM
 - a few random fixes
 - a couple of RTC leftovers
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (120 commits)
  rtc/rtc-88pm80x: remove unneed devm_kfree
  rtc/rtc-88pm80x: assign ret only when rtc_register_driver fails
  mm: hugetlbfs: close race during teardown of hugetlbfs shared page tables
  tmpfs: distribute interleave better across nodes
  mm: remove redundant initialization
  mm: warn if pg_data_t isn't initialized with zero
  mips: zero out pg_data_t when it's allocated
  memcg: gix memory accounting scalability in shrink_page_list
  mm/sparse: remove index_init_lock
  mm/sparse: more checks on mem_section number
  mm/sparse: optimize sparse_index_alloc
  memcg: add mem_cgroup_from_css() helper
  memcg: further prevent OOM with too many dirty pages
  memcg: prevent OOM with too many dirty pages
  mm: mmu_notifier: fix freed page still mapped in secondary MMU
  mm: memcg: only check anon swapin page charges for swap cache
  mm: memcg: only check swap cache pages for repeated charging
  mm: memcg: split swapin charge function into private and public part
  mm: memcg: remove needless !mm fixup to init_mm when charging
  mm: memcg: remove unneeded shmem charge type
  ...
Diffstat (limited to 'include')
30 files changed, 492 insertions, 74 deletions
| diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 489de625cd25..c97c6b9cd38e 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -17,6 +17,7 @@  #include <linux/timer.h>  #include <linux/writeback.h>  #include <linux/atomic.h> +#include <linux/sysctl.h>  struct page;  struct device; @@ -304,6 +305,8 @@ void clear_bdi_congested(struct backing_dev_info *bdi, int sync);  void set_bdi_congested(struct backing_dev_info *bdi, int sync);  long congestion_wait(int sync, long timeout);  long wait_iff_congested(struct zone *zone, int sync, long timeout); +int pdflush_proc_obsolete(struct ctl_table *table, int write, +		void __user *buffer, size_t *lenp, loff_t *ppos);  static inline bool bdi_cap_writeback_dirty(struct backing_dev_info *bdi)  { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 0edb65dd8edd..7b7ac9ccec7a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -160,6 +160,7 @@ enum rq_flag_bits {  	__REQ_FLUSH_SEQ,	/* request for flush sequence */  	__REQ_IO_STAT,		/* account I/O stat */  	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */ +	__REQ_KERNEL, 		/* direct IO to kernel pages */  	__REQ_NR_BITS,		/* stops here */  }; @@ -201,5 +202,6 @@ enum rq_flag_bits {  #define REQ_IO_STAT		(1 << __REQ_IO_STAT)  #define REQ_MIXED_MERGE		(1 << __REQ_MIXED_MERGE)  #define REQ_SECURE		(1 << __REQ_SECURE) +#define REQ_KERNEL		(1 << __REQ_KERNEL)  #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 0bd390ce98b2..dfae957398c3 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -31,7 +31,7 @@ SUBSYS(cpuacct)  /* */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG  SUBSYS(mem_cgroup)  #endif @@ -72,3 +72,9 @@ SUBSYS(net_prio)  #endif  /* */ + +#ifdef CONFIG_CGROUP_HUGETLB +SUBSYS(hugetlb) +#endif + +/* */ diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 51a90b7f2d60..133ddcf83397 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -58,7 +58,7 @@ static inline bool compaction_deferred(struct zone *zone, int order)  	if (++zone->compact_considered > defer_limit)  		zone->compact_considered = defer_limit; -	return zone->compact_considered < (1UL << zone->compact_defer_shift); +	return zone->compact_considered < defer_limit;  }  #else @@ -85,7 +85,7 @@ static inline void defer_compaction(struct zone *zone, int order)  static inline bool compaction_deferred(struct zone *zone, int order)  { -	return 1; +	return true;  }  #endif /* CONFIG_COMPACTION */ diff --git a/include/linux/fs.h b/include/linux/fs.h index b178f9e91e23..d7eed5b98ae2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -165,6 +165,8 @@ struct inodes_stat_t {  #define READ			0  #define WRITE			RW_MASK  #define READA			RWA_MASK +#define KERNEL_READ		(READ|REQ_KERNEL) +#define KERNEL_WRITE		(WRITE|REQ_KERNEL)  #define READ_SYNC		(READ | REQ_SYNC)  #define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE) @@ -427,6 +429,7 @@ struct kstatfs;  struct vm_area_struct;  struct vfsmount;  struct cred; +struct swap_info_struct;  extern void __init inode_init(void);  extern void __init inode_init_early(void); @@ -636,6 +639,11 @@ struct address_space_operations {  	int (*is_partially_uptodate) (struct page *, read_descriptor_t *,  					unsigned long);  	int (*error_remove_page)(struct address_space *, struct page *); + +	/* swapfile support */ +	int (*swap_activate)(struct swap_info_struct *sis, struct file *file, +				sector_t *span); +	void (*swap_deactivate)(struct file *file);  };  extern const struct address_space_operations empty_aops; diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 1e49be49d324..4883f393f50a 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -23,6 +23,7 @@ struct vm_area_struct;  #define ___GFP_REPEAT		0x400u  #define ___GFP_NOFAIL		0x800u  #define ___GFP_NORETRY		0x1000u +#define ___GFP_MEMALLOC		0x2000u  #define ___GFP_COMP		0x4000u  #define ___GFP_ZERO		0x8000u  #define ___GFP_NOMEMALLOC	0x10000u @@ -76,9 +77,14 @@ struct vm_area_struct;  #define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)	/* See above */  #define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)	/* See above */  #define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY) /* See above */ +#define __GFP_MEMALLOC	((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */  #define __GFP_COMP	((__force gfp_t)___GFP_COMP)	/* Add compound page metadata */  #define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)	/* Return zeroed page on success */ -#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves */ +#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves. +							 * This takes precedence over the +							 * __GFP_MEMALLOC flag if both are +							 * set +							 */  #define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */  #define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */  #define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ @@ -129,7 +135,7 @@ struct vm_area_struct;  /* Control page allocator reclaim behavior */  #define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\  			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ -			__GFP_NORETRY|__GFP_NOMEMALLOC) +			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)  /* Control slab gfp mask during early boot */  #define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS)) @@ -379,6 +385,9 @@ void drain_local_pages(void *dummy);   */  extern gfp_t gfp_allowed_mask; +/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */ +bool gfp_pfmemalloc_allowed(gfp_t gfp_mask); +  extern void pm_restrict_gfp_mask(void);  extern void pm_restore_gfp_mask(void); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 774fa47b3b5b..ef788b5b4a35 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -39,10 +39,17 @@ extern unsigned long totalhigh_pages;  void kmap_flush_unused(void); +struct page *kmap_to_page(void *addr); +  #else /* CONFIG_HIGHMEM */  static inline unsigned int nr_free_highpages(void) { return 0; } +static inline struct page *kmap_to_page(void *addr) +{ +	return virt_to_page(addr); +} +  #define totalhigh_pages 0UL  #ifndef ARCH_HAS_KMAP diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index d5d6bbe2259e..225164842ab6 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -4,9 +4,11 @@  #include <linux/mm_types.h>  #include <linux/fs.h>  #include <linux/hugetlb_inline.h> +#include <linux/cgroup.h>  struct ctl_table;  struct user_struct; +struct mmu_gather;  #ifdef CONFIG_HUGETLB_PAGE @@ -20,6 +22,11 @@ struct hugepage_subpool {  	long max_hpages, used_hpages;  }; +extern spinlock_t hugetlb_lock; +extern int hugetlb_max_hstate __read_mostly; +#define for_each_hstate(h) \ +	for ((h) = hstates; (h) < &hstates[hugetlb_max_hstate]; (h)++) +  struct hugepage_subpool *hugepage_new_subpool(long nr_blocks);  void hugepage_put_subpool(struct hugepage_subpool *spool); @@ -40,9 +47,14 @@ int follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,  			struct page **, struct vm_area_struct **,  			unsigned long *, int *, int, unsigned int flags);  void unmap_hugepage_range(struct vm_area_struct *, -			unsigned long, unsigned long, struct page *); -void __unmap_hugepage_range(struct vm_area_struct *, -			unsigned long, unsigned long, struct page *); +			  unsigned long, unsigned long, struct page *); +void __unmap_hugepage_range_final(struct mmu_gather *tlb, +			  struct vm_area_struct *vma, +			  unsigned long start, unsigned long end, +			  struct page *ref_page); +void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, +				unsigned long start, unsigned long end, +				struct page *ref_page);  int hugetlb_prefault(struct address_space *, struct vm_area_struct *);  void hugetlb_report_meminfo(struct seq_file *);  int hugetlb_report_node_meminfo(int, char *); @@ -98,7 +110,6 @@ static inline unsigned long hugetlb_total_pages(void)  #define follow_huge_addr(mm, addr, write)	ERR_PTR(-EINVAL)  #define copy_hugetlb_page_range(src, dst, vma)	({ BUG(); 0; })  #define hugetlb_prefault(mapping, vma)		({ BUG(); 0; }) -#define unmap_hugepage_range(vma, start, end, page)	BUG()  static inline void hugetlb_report_meminfo(struct seq_file *m)  {  } @@ -112,13 +123,31 @@ static inline void hugetlb_report_meminfo(struct seq_file *m)  #define hugetlb_free_pgd_range(tlb, addr, end, floor, ceiling) ({BUG(); 0; })  #define hugetlb_fault(mm, vma, addr, flags)	({ BUG(); 0; })  #define huge_pte_offset(mm, address)	0 -#define dequeue_hwpoisoned_huge_page(page)	0 +static inline int dequeue_hwpoisoned_huge_page(struct page *page) +{ +	return 0; +} +  static inline void copy_huge_page(struct page *dst, struct page *src)  {  }  #define hugetlb_change_protection(vma, address, end, newprot) +static inline void __unmap_hugepage_range_final(struct mmu_gather *tlb, +			struct vm_area_struct *vma, unsigned long start, +			unsigned long end, struct page *ref_page) +{ +	BUG(); +} + +static inline void __unmap_hugepage_range(struct mmu_gather *tlb, +			struct vm_area_struct *vma, unsigned long start, +			unsigned long end, struct page *ref_page) +{ +	BUG(); +} +  #endif /* !CONFIG_HUGETLB_PAGE */  #define HUGETLB_ANON_FILE "anon_hugepage" @@ -199,10 +228,15 @@ struct hstate {  	unsigned long resv_huge_pages;  	unsigned long surplus_huge_pages;  	unsigned long nr_overcommit_huge_pages; +	struct list_head hugepage_activelist;  	struct list_head hugepage_freelists[MAX_NUMNODES];  	unsigned int nr_huge_pages_node[MAX_NUMNODES];  	unsigned int free_huge_pages_node[MAX_NUMNODES];  	unsigned int surplus_huge_pages_node[MAX_NUMNODES]; +#ifdef CONFIG_CGROUP_HUGETLB +	/* cgroup control files */ +	struct cftype cgroup_files[5]; +#endif  	char name[HSTATE_NAME_LEN];  }; @@ -302,6 +336,11 @@ static inline unsigned hstate_index_to_shift(unsigned index)  	return hstates[index].order + PAGE_SHIFT;  } +static inline int hstate_index(struct hstate *h) +{ +	return h - hstates; +} +  #else  struct hstate {};  #define alloc_huge_page_node(h, nid) NULL @@ -320,6 +359,7 @@ static inline unsigned int pages_per_huge_page(struct hstate *h)  	return 1;  }  #define hstate_index_to_shift(index) 0 +#define hstate_index(h) 0  #endif  #endif /* _LINUX_HUGETLB_H */ diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h new file mode 100644 index 000000000000..d73878c694b3 --- /dev/null +++ b/include/linux/hugetlb_cgroup.h @@ -0,0 +1,126 @@ +/* + * Copyright IBM Corporation, 2012 + * Author Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it would be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + * + */ + +#ifndef _LINUX_HUGETLB_CGROUP_H +#define _LINUX_HUGETLB_CGROUP_H + +#include <linux/res_counter.h> + +struct hugetlb_cgroup; +/* + * Minimum page order trackable by hugetlb cgroup. + * At least 3 pages are necessary for all the tracking information. + */ +#define HUGETLB_CGROUP_MIN_ORDER	2 + +#ifdef CONFIG_CGROUP_HUGETLB + +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +{ +	VM_BUG_ON(!PageHuge(page)); + +	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) +		return NULL; +	return (struct hugetlb_cgroup *)page[2].lru.next; +} + +static inline +int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) +{ +	VM_BUG_ON(!PageHuge(page)); + +	if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) +		return -1; +	page[2].lru.next = (void *)h_cg; +	return 0; +} + +static inline bool hugetlb_cgroup_disabled(void) +{ +	if (hugetlb_subsys.disabled) +		return true; +	return false; +} + +extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, +					struct hugetlb_cgroup **ptr); +extern void hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, +					 struct hugetlb_cgroup *h_cg, +					 struct page *page); +extern void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, +					 struct page *page); +extern void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, +					   struct hugetlb_cgroup *h_cg); +extern int hugetlb_cgroup_file_init(int idx) __init; +extern void hugetlb_cgroup_migrate(struct page *oldhpage, +				   struct page *newhpage); + +#else +static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) +{ +	return NULL; +} + +static inline +int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) +{ +	return 0; +} + +static inline bool hugetlb_cgroup_disabled(void) +{ +	return true; +} + +static inline int +hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, +			     struct hugetlb_cgroup **ptr) +{ +	return 0; +} + +static inline void +hugetlb_cgroup_commit_charge(int idx, unsigned long nr_pages, +			     struct hugetlb_cgroup *h_cg, +			     struct page *page) +{ +	return; +} + +static inline void +hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, struct page *page) +{ +	return; +} + +static inline void +hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, +			       struct hugetlb_cgroup *h_cg) +{ +	return; +} + +static inline int __init hugetlb_cgroup_file_init(int idx) +{ +	return 0; +} + +static inline void hugetlb_cgroup_migrate(struct page *oldhpage, +					  struct page *newhpage) +{ +	return; +} + +#endif  /* CONFIG_MEM_RES_CTLR_HUGETLB */ +#endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 83e7ba90d6e5..8d9489fdab2e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -38,7 +38,7 @@ struct mem_cgroup_reclaim_cookie {  	unsigned int generation;  }; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG  /*   * All "charge" functions with gfp_mask should use GFP_KERNEL or   * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't @@ -72,8 +72,6 @@ extern void mem_cgroup_uncharge_end(void);  extern void mem_cgroup_uncharge_page(struct page *page);  extern void mem_cgroup_uncharge_cache_page(struct page *page); -extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, -				     int order);  bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,  				  struct mem_cgroup *memcg);  int task_in_mem_cgroup(struct task_struct *task, const struct mem_cgroup *memcg); @@ -100,9 +98,9 @@ int mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *cgroup)  extern struct cgroup_subsys_state *mem_cgroup_css(struct mem_cgroup *memcg); -extern int -mem_cgroup_prepare_migration(struct page *page, -	struct page *newpage, struct mem_cgroup **memcgp, gfp_t gfp_mask); +extern void +mem_cgroup_prepare_migration(struct page *page, struct page *newpage, +			     struct mem_cgroup **memcgp);  extern void mem_cgroup_end_migration(struct mem_cgroup *memcg,  	struct page *oldpage, struct page *newpage, bool migration_ok); @@ -124,7 +122,7 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg,  extern void mem_cgroup_replace_page_cache(struct page *oldpage,  					struct page *newpage); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP  extern int do_swap_account;  #endif @@ -182,7 +180,6 @@ static inline void mem_cgroup_dec_page_stat(struct page *page,  unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,  						gfp_t gfp_mask,  						unsigned long *total_scanned); -u64 mem_cgroup_get_limit(struct mem_cgroup *memcg);  void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx);  #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -193,7 +190,7 @@ void mem_cgroup_split_huge_fixup(struct page *head);  bool mem_cgroup_bad_page_check(struct page *page);  void mem_cgroup_print_bad_page(struct page *page);  #endif -#else /* CONFIG_CGROUP_MEM_RES_CTLR */ +#else /* CONFIG_MEMCG */  struct mem_cgroup;  static inline int mem_cgroup_newpage_charge(struct page *page, @@ -279,11 +276,10 @@ static inline struct cgroup_subsys_state  	return NULL;  } -static inline int +static inline void  mem_cgroup_prepare_migration(struct page *page, struct page *newpage, -	struct mem_cgroup **memcgp, gfp_t gfp_mask) +			     struct mem_cgroup **memcgp)  { -	return 0;  }  static inline void mem_cgroup_end_migration(struct mem_cgroup *memcg, @@ -366,12 +362,6 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,  	return 0;  } -static inline -u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) -{ -	return 0; -} -  static inline void mem_cgroup_split_huge_fixup(struct page *head)  {  } @@ -384,9 +374,9 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage,  				struct page *newpage)  {  } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR */ +#endif /* CONFIG_MEMCG */ -#if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM) +#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)  static inline bool  mem_cgroup_bad_page_check(struct page *page)  { @@ -406,7 +396,7 @@ enum {  };  struct sock; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM  void sock_update_memcg(struct sock *sk);  void sock_release_memcg(struct sock *sk);  #else @@ -416,6 +406,6 @@ static inline void sock_update_memcg(struct sock *sk)  static inline void sock_release_memcg(struct sock *sk)  {  } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ +#endif /* CONFIG_MEMCG_KMEM */  #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 855c337b20c3..ce7e6671968b 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -15,7 +15,7 @@ extern int migrate_page(struct address_space *,  extern int migrate_pages(struct list_head *l, new_page_t x,  			unsigned long private, bool offlining,  			enum migrate_mode mode); -extern int migrate_huge_pages(struct list_head *l, new_page_t x, +extern int migrate_huge_page(struct page *, new_page_t x,  			unsigned long private, bool offlining,  			enum migrate_mode mode); @@ -36,7 +36,7 @@ static inline void putback_lru_pages(struct list_head *l) {}  static inline int migrate_pages(struct list_head *l, new_page_t x,  		unsigned long private, bool offlining,  		enum migrate_mode mode) { return -ENOSYS; } -static inline int migrate_huge_pages(struct list_head *l, new_page_t x, +static inline int migrate_huge_page(struct page *page, new_page_t x,  		unsigned long private, bool offlining,  		enum migrate_mode mode) { return -ENOSYS; } diff --git a/include/linux/mm.h b/include/linux/mm.h index f9f279cf5b1b..bd079a1b0fdc 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -805,6 +805,17 @@ static inline void *page_rmapping(struct page *page)  	return (void *)((unsigned long)page->mapping & ~PAGE_MAPPING_FLAGS);  } +extern struct address_space *__page_file_mapping(struct page *); + +static inline +struct address_space *page_file_mapping(struct page *page) +{ +	if (unlikely(PageSwapCache(page))) +		return __page_file_mapping(page); + +	return page->mapping; +} +  static inline int PageAnon(struct page *page)  {  	return ((unsigned long)page->mapping & PAGE_MAPPING_ANON) != 0; @@ -821,6 +832,20 @@ static inline pgoff_t page_index(struct page *page)  	return page->index;  } +extern pgoff_t __page_file_index(struct page *page); + +/* + * Return the file index of the page. Regular pagecache pages use ->index + * whereas swapcache pages use swp_offset(->private) + */ +static inline pgoff_t page_file_index(struct page *page) +{ +	if (unlikely(PageSwapCache(page))) +		return __page_file_index(page); + +	return page->index; +} +  /*   * Return true if this page is mapped into pagetables.   */ @@ -994,6 +1019,10 @@ int get_user_pages(struct task_struct *tsk, struct mm_struct *mm,  			struct page **pages, struct vm_area_struct **vmas);  int get_user_pages_fast(unsigned long start, int nr_pages, int write,  			struct page **pages); +struct kvec; +int get_kernel_pages(const struct kvec *iov, int nr_pages, int write, +			struct page **pages); +int get_kernel_page(unsigned long start, int write, struct page **pages);  struct page *get_dump_page(unsigned long addr);  extern int try_to_release_page(struct page * page, gfp_t gfp_mask); @@ -1331,6 +1360,7 @@ void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...);  extern void setup_per_cpu_pageset(void);  extern void zone_pcp_update(struct zone *zone); +extern void zone_pcp_reset(struct zone *zone);  /* nommu.c */  extern atomic_long_t mmap_pages_allocated; @@ -1528,6 +1558,7 @@ void vm_stat_account(struct mm_struct *, unsigned long, struct file *, long);  static inline void vm_stat_account(struct mm_struct *mm,  			unsigned long flags, struct file *file, long pages)  { +	mm->total_vm += pages;  }  #endif /* CONFIG_PROC_FS */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 074eb98fe15d..bf7867200b95 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -54,6 +54,15 @@ struct page {  		union {  			pgoff_t index;		/* Our offset within mapping. */  			void *freelist;		/* slub/slob first free object */ +			bool pfmemalloc;	/* If set by the page allocator, +						 * ALLOC_NO_WATERMARKS was set +						 * and the low watermark was not +						 * met implying that the system +						 * is under some pressure. The +						 * caller should try ensure +						 * this page is only used to +						 * free other pages. +						 */  		};  		union { diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 458988bd55a1..2daa54f55db7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -201,7 +201,7 @@ struct zone_reclaim_stat {  struct lruvec {  	struct list_head lists[NR_LRU_LISTS];  	struct zone_reclaim_stat reclaim_stat; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG  	struct zone *zone;  #endif  }; @@ -209,7 +209,6 @@ struct lruvec {  /* Mask used at gathering information at once (see memcontrol.c) */  #define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE))  #define LRU_ALL_ANON (BIT(LRU_INACTIVE_ANON) | BIT(LRU_ACTIVE_ANON)) -#define LRU_ALL_EVICTABLE (LRU_ALL_FILE | LRU_ALL_ANON)  #define LRU_ALL	     ((1 << NR_LRU_LISTS) - 1)  /* Isolate clean file */ @@ -369,6 +368,10 @@ struct zone {  	 */  	spinlock_t		lock;  	int                     all_unreclaimable; /* All pages pinned */ +#if defined CONFIG_COMPACTION || defined CONFIG_CMA +	/* pfn where the last incremental compaction isolated free pages */ +	unsigned long		compact_cached_free_pfn; +#endif  #ifdef CONFIG_MEMORY_HOTPLUG  	/* see spanned/present_pages for more description */  	seqlock_t		span_seqlock; @@ -475,6 +478,14 @@ struct zone {  	 * rarely used fields:  	 */  	const char		*name; +#ifdef CONFIG_MEMORY_ISOLATION +	/* +	 * the number of MIGRATE_ISOLATE *pageblock*. +	 * We need this for free page counting. Look at zone_watermark_ok_safe. +	 * It's protected by zone->lock +	 */ +	int		nr_pageblock_isolate; +#endif  } ____cacheline_internodealigned_in_smp;  typedef enum { @@ -671,7 +682,7 @@ typedef struct pglist_data {  	int nr_zones;  #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */  	struct page *node_mem_map; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG  	struct page_cgroup *node_page_cgroup;  #endif  #endif @@ -694,6 +705,7 @@ typedef struct pglist_data {  					     range, including holes */  	int node_id;  	wait_queue_head_t kswapd_wait; +	wait_queue_head_t pfmemalloc_wait;  	struct task_struct *kswapd;	/* Protected by lock_memory_hotplug() */  	int kswapd_max_order;  	enum zone_type classzone_idx; @@ -718,7 +730,7 @@ typedef struct pglist_data {  #include <linux/memory_hotplug.h>  extern struct mutex zonelists_mutex; -void build_all_zonelists(void *data); +void build_all_zonelists(pg_data_t *pgdat, struct zone *zone);  void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx);  bool zone_watermark_ok(struct zone *z, int order, unsigned long mark,  		int classzone_idx, int alloc_flags); @@ -736,7 +748,7 @@ extern void lruvec_init(struct lruvec *lruvec, struct zone *zone);  static inline struct zone *lruvec_zone(struct lruvec *lruvec)  { -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG  	return lruvec->zone;  #else  	return container_of(lruvec, struct zone, lruvec); @@ -773,7 +785,7 @@ extern int movable_zone;  static inline int zone_movable_is_highmem(void)  { -#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE) +#if defined(CONFIG_HIGHMEM) && defined(CONFIG_HAVE_MEMBLOCK_NODE_MAP)  	return movable_zone == ZONE_HIGHMEM;  #else  	return 0; @@ -1052,7 +1064,7 @@ struct mem_section {  	/* See declaration of similar field in struct zone */  	unsigned long *pageblock_flags; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG  	/*  	 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use  	 * section. (see memcontrol.h/page_cgroup.h about this.) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 2889877318bc..1f8fc7f9bcd8 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -473,10 +473,10 @@ extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t,  			unsigned long);  extern ssize_t nfs_file_direct_read(struct kiocb *iocb,  			const struct iovec *iov, unsigned long nr_segs, -			loff_t pos); +			loff_t pos, bool uio);  extern ssize_t nfs_file_direct_write(struct kiocb *iocb,  			const struct iovec *iov, unsigned long nr_segs, -			loff_t pos); +			loff_t pos, bool uio);  /*   * linux/fs/nfs/dir.c diff --git a/include/linux/oom.h b/include/linux/oom.h index e4c29bc72e70..49a3031fda50 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -40,15 +40,36 @@ enum oom_constraint {  	CONSTRAINT_MEMCG,  }; +enum oom_scan_t { +	OOM_SCAN_OK,		/* scan thread and find its badness */ +	OOM_SCAN_CONTINUE,	/* do not consider thread for oom kill */ +	OOM_SCAN_ABORT,		/* abort the iteration and return */ +	OOM_SCAN_SELECT,	/* always select this thread first */ +}; +  extern void compare_swap_oom_score_adj(int old_val, int new_val);  extern int test_set_oom_score_adj(int new_val);  extern unsigned long oom_badness(struct task_struct *p,  		struct mem_cgroup *memcg, const nodemask_t *nodemask,  		unsigned long totalpages); +extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order, +			     unsigned int points, unsigned long totalpages, +			     struct mem_cgroup *memcg, nodemask_t *nodemask, +			     const char *message); +  extern int try_set_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags);  extern void clear_zonelist_oom(struct zonelist *zonelist, gfp_t gfp_flags); +extern void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, +			       int order, const nodemask_t *nodemask); + +extern enum oom_scan_t oom_scan_process_thread(struct task_struct *task, +		unsigned long totalpages, const nodemask_t *nodemask, +		bool force_kill); +extern void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, +				     int order); +  extern void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,  		int order, nodemask_t *mask, bool force_kill);  extern int register_oom_notifier(struct notifier_block *nb); diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index c88d2a9451af..b5d13841604e 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -7,6 +7,7 @@  #include <linux/types.h>  #include <linux/bug.h> +#include <linux/mmdebug.h>  #ifndef __GENERATING_BOUNDS_H  #include <linux/mm_types.h>  #include <generated/bounds.h> @@ -453,6 +454,34 @@ static inline int PageTransTail(struct page *page)  }  #endif +/* + * If network-based swap is enabled, sl*b must keep track of whether pages + * were allocated from pfmemalloc reserves. + */ +static inline int PageSlabPfmemalloc(struct page *page) +{ +	VM_BUG_ON(!PageSlab(page)); +	return PageActive(page); +} + +static inline void SetPageSlabPfmemalloc(struct page *page) +{ +	VM_BUG_ON(!PageSlab(page)); +	SetPageActive(page); +} + +static inline void __ClearPageSlabPfmemalloc(struct page *page) +{ +	VM_BUG_ON(!PageSlab(page)); +	__ClearPageActive(page); +} + +static inline void ClearPageSlabPfmemalloc(struct page *page) +{ +	VM_BUG_ON(!PageSlab(page)); +	ClearPageActive(page); +} +  #ifdef CONFIG_MMU  #define __PG_MLOCKED		(1 << PG_mlocked)  #else diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 3bdcab30ca41..105077aa7685 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -1,6 +1,11 @@  #ifndef __LINUX_PAGEISOLATION_H  #define __LINUX_PAGEISOLATION_H + +bool has_unmovable_pages(struct zone *zone, struct page *page, int count); +void set_pageblock_migratetype(struct page *page, int migratetype); +int move_freepages_block(struct zone *zone, struct page *page, +				int migratetype);  /*   * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE.   * If specified range includes migrate types other than MOVABLE or CMA, @@ -10,7 +15,7 @@   * free all pages in the range. test_page_isolated() can be used for   * test it.   */ -extern int +int  start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,  			 unsigned migratetype); @@ -18,7 +23,7 @@ start_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,   * Changes MIGRATE_ISOLATE to MIGRATE_MOVABLE.   * target range is [start_pfn, end_pfn)   */ -extern int +int  undo_isolate_page_range(unsigned long start_pfn, unsigned long end_pfn,  			unsigned migratetype); @@ -30,8 +35,8 @@ int test_pages_isolated(unsigned long start_pfn, unsigned long end_pfn);  /*   * Internal functions. Changes pageblock's migrate type.   */ -extern int set_migratetype_isolate(struct page *page); -extern void unset_migratetype_isolate(struct page *page, unsigned migratetype); +int set_migratetype_isolate(struct page *page); +void unset_migratetype_isolate(struct page *page, unsigned migratetype);  #endif diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index a88cdba27809..777a524716db 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -12,7 +12,7 @@ enum {  #ifndef __GENERATING_BOUNDS_H  #include <generated/bounds.h> -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG  #include <linux/bit_spinlock.h>  /* @@ -82,7 +82,7 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc)  	bit_spin_unlock(PCG_LOCK, &pc->flags);  } -#else /* CONFIG_CGROUP_MEM_RES_CTLR */ +#else /* CONFIG_MEMCG */  struct page_cgroup;  static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) @@ -102,11 +102,11 @@ static inline void __init page_cgroup_init_flatmem(void)  {  } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR */ +#endif /* CONFIG_MEMCG */  #include <linux/swap.h> -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP  extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,  					unsigned short old, unsigned short new);  extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id); @@ -138,7 +138,7 @@ static inline void swap_cgroup_swapoff(int type)  	return;  } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */ +#endif /* CONFIG_MEMCG_SWAP */  #endif /* !__GENERATING_BOUNDS_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 7cfad3bbb0cc..e42c762f0dc7 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -286,6 +286,11 @@ static inline loff_t page_offset(struct page *page)  	return ((loff_t)page->index) << PAGE_CACHE_SHIFT;  } +static inline loff_t page_file_offset(struct page *page) +{ +	return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT; +} +  extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma,  				     unsigned long address); diff --git a/include/linux/sched.h b/include/linux/sched.h index 68dcffaa62a0..c147e7024f11 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1584,7 +1584,7 @@ struct task_struct {  	/* bitmask and counter of trace recursion */  	unsigned long trace_recursion;  #endif /* CONFIG_TRACING */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ +#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */  	struct memcg_batch_info {  		int do_batch;	/* incremented when batch uncharge started */  		struct mem_cgroup *memcg; /* target memcg of uncharge */ @@ -1894,6 +1894,13 @@ static inline void rcu_copy_process(struct task_struct *p)  #endif +static inline void tsk_restore_flags(struct task_struct *task, +				unsigned long orig_flags, unsigned long flags) +{ +	task->flags &= ~flags; +	task->flags |= orig_flags & flags; +} +  #ifdef CONFIG_SMP  extern void do_set_cpus_allowed(struct task_struct *p,  			       const struct cpumask *new_mask); diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h index 07ceb97d53fa..ac6b8ee07825 100644 --- a/include/linux/shrinker.h +++ b/include/linux/shrinker.h @@ -20,7 +20,6 @@ struct shrink_control {   * 'nr_to_scan' entries and attempt to free them up.  It should return   * the number of objects which remain in the cache.  If it returns -1, it means   * it cannot do any scanning at this time (eg. there is a risk of deadlock). - * The callback must not return -1 if nr_to_scan is zero.   *   * The 'gfpmask' refers to the allocation we are currently trying to   * fulfil. diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d205c4be7f5b..7632c87da2c9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -462,6 +462,7 @@ struct sk_buff {  #ifdef CONFIG_IPV6_NDISC_NODETYPE  	__u8			ndisc_nodetype:2;  #endif +	__u8			pfmemalloc:1;  	__u8			ooo_okay:1;  	__u8			l4_rxhash:1;  	__u8			wifi_acked_valid:1; @@ -502,6 +503,15 @@ struct sk_buff {  #include <linux/slab.h> +#define SKB_ALLOC_FCLONE	0x01 +#define SKB_ALLOC_RX		0x02 + +/* Returns true if the skb was allocated from PFMEMALLOC reserves */ +static inline bool skb_pfmemalloc(const struct sk_buff *skb) +{ +	return unlikely(skb->pfmemalloc); +} +  /*   * skb might have a dst pointer attached, refcounted or not.   * _skb_refdst low order bit is set if refcount was _not_ taken @@ -565,7 +575,7 @@ extern bool skb_try_coalesce(struct sk_buff *to, struct sk_buff *from,  			     bool *fragstolen, int *delta_truesize);  extern struct sk_buff *__alloc_skb(unsigned int size, -				   gfp_t priority, int fclone, int node); +				   gfp_t priority, int flags, int node);  extern struct sk_buff *build_skb(void *data, unsigned int frag_size);  static inline struct sk_buff *alloc_skb(unsigned int size,  					gfp_t priority) @@ -576,7 +586,7 @@ static inline struct sk_buff *alloc_skb(unsigned int size,  static inline struct sk_buff *alloc_skb_fclone(unsigned int size,  					       gfp_t priority)  { -	return __alloc_skb(size, priority, 1, NUMA_NO_NODE); +	return __alloc_skb(size, priority, SKB_ALLOC_FCLONE, NUMA_NO_NODE);  }  extern void skb_recycle(struct sk_buff *skb); @@ -1237,6 +1247,17 @@ static inline void __skb_fill_page_desc(struct sk_buff *skb, int i,  {  	skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; +	/* +	 * Propagate page->pfmemalloc to the skb if we can. The problem is +	 * that not all callers have unique ownership of the page. If +	 * pfmemalloc is set, we check the mapping as a mapping implies +	 * page->index is set (index and pfmemalloc share space). +	 * If it's a valid mapping, we cannot use page->pfmemalloc but we +	 * do not lose pfmemalloc information as the pages would not be +	 * allocated using __GFP_MEMALLOC. +	 */ +	if (page->pfmemalloc && !page->mapping) +		skb->pfmemalloc	= true;  	frag->page.p		  = page;  	frag->page_offset	  = off;  	skb_frag_size_set(frag, size); @@ -1753,6 +1774,61 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,  	return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);  } +/* + *	__skb_alloc_page - allocate pages for ps-rx on a skb and preserve pfmemalloc data + *	@gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX + *	@skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used + *	@order: size of the allocation + * + * 	Allocate a new page. + * + * 	%NULL is returned if there is no free memory. +*/ +static inline struct page *__skb_alloc_pages(gfp_t gfp_mask, +					      struct sk_buff *skb, +					      unsigned int order) +{ +	struct page *page; + +	gfp_mask |= __GFP_COLD; + +	if (!(gfp_mask & __GFP_NOMEMALLOC)) +		gfp_mask |= __GFP_MEMALLOC; + +	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); +	if (skb && page && page->pfmemalloc) +		skb->pfmemalloc = true; + +	return page; +} + +/** + *	__skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data + *	@gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX + *	@skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used + * + * 	Allocate a new page. + * + * 	%NULL is returned if there is no free memory. + */ +static inline struct page *__skb_alloc_page(gfp_t gfp_mask, +					     struct sk_buff *skb) +{ +	return __skb_alloc_pages(gfp_mask, skb, 0); +} + +/** + *	skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page + *	@page: The page that was allocated from skb_alloc_page + *	@skb: The skb that may need pfmemalloc set + */ +static inline void skb_propagate_pfmemalloc(struct page *page, +					     struct sk_buff *skb) +{ +	if (page && page->pfmemalloc) +		skb->pfmemalloc = true; +} +  /**   * skb_frag_page - retrieve the page refered to by a paged fragment   * @frag: the paged fragment diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 77d278defa70..cff40aa7db62 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -174,6 +174,8 @@ struct rpc_xprt {  	unsigned long		state;		/* transport state */  	unsigned char		shutdown   : 1,	/* being shut down */  				resvport   : 1; /* use a reserved port */ +	unsigned int		swapper;	/* we're swapping over this +						   transport */  	unsigned int		bind_index;	/* bind function index */  	/* @@ -316,6 +318,7 @@ void			xprt_release_rqst_cong(struct rpc_task *task);  void			xprt_disconnect_done(struct rpc_xprt *xprt);  void			xprt_force_disconnect(struct rpc_xprt *xprt);  void			xprt_conditional_disconnect(struct rpc_xprt *xprt, unsigned int cookie); +int			xs_swapper(struct rpc_xprt *xprt, int enable);  /*   * Reserved bit positions in xprt->state diff --git a/include/linux/swap.h b/include/linux/swap.h index c84ec68eaec9..388e70601413 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -151,6 +151,7 @@ enum {  	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */  	SWP_CONTINUED	= (1 << 5),	/* swap_map has count continuation */  	SWP_BLKDEV	= (1 << 6),	/* its a block device */ +	SWP_FILE	= (1 << 7),	/* set after swap_activate success */  					/* add others here before... */  	SWP_SCANNING	= (1 << 8),	/* refcount in scan_swap_map */  }; @@ -301,7 +302,7 @@ static inline void scan_unevictable_unregister_node(struct node *node)  extern int kswapd_run(int nid);  extern void kswapd_stop(int nid); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG  extern int mem_cgroup_swappiness(struct mem_cgroup *mem);  #else  static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) @@ -309,7 +310,7 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)  	return vm_swappiness;  }  #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP  extern void mem_cgroup_uncharge_swap(swp_entry_t ent);  #else  static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) @@ -320,8 +321,14 @@ static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)  /* linux/mm/page_io.c */  extern int swap_readpage(struct page *);  extern int swap_writepage(struct page *page, struct writeback_control *wbc); +extern int swap_set_page_dirty(struct page *page);  extern void end_swap_bio_read(struct bio *bio, int err); +int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page, +		unsigned long nr_pages, sector_t start_block); +int generic_swapfile_activate(struct swap_info_struct *, struct file *, +		sector_t *); +  /* linux/mm/swap_state.c */  extern struct address_space swapper_space;  #define total_swapcache_pages  swapper_space.nrpages @@ -356,11 +363,12 @@ extern unsigned int count_swap_pages(int, int);  extern sector_t map_swap_page(struct page *, struct block_device **);  extern sector_t swapdev_block(int, pgoff_t);  extern int page_swapcount(struct page *); +extern struct swap_info_struct *page_swap_info(struct page *);  extern int reuse_swap_page(struct page *);  extern int try_to_free_swap(struct page *);  struct backing_dev_info; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG  extern void  mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);  #else diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 06f8e3858251..57f7b1091511 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -30,6 +30,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,  		FOR_ALL_ZONES(PGSTEAL_DIRECT),  		FOR_ALL_ZONES(PGSCAN_KSWAPD),  		FOR_ALL_ZONES(PGSCAN_DIRECT), +		PGSCAN_DIRECT_THROTTLE,  #ifdef CONFIG_NUMA  		PGSCAN_ZONE_RECLAIM_FAILED,  #endif diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 65efb92da996..ad2cfd53dadc 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -179,11 +179,6 @@ extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp);  #define add_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, __d)  #define sub_zone_page_state(__z, __i, __d) mod_zone_page_state(__z, __i, -(__d)) -static inline void zap_zone_vm_stats(struct zone *zone) -{ -	memset(zone->vm_stat, 0, sizeof(zone->vm_stat)); -} -  extern void inc_zone_state(struct zone *, enum zone_stat_item);  #ifdef CONFIG_SMP diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 6d0a0fcd80e7..c66fe3332d83 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -189,9 +189,4 @@ void tag_pages_for_writeback(struct address_space *mapping,  void account_page_redirty(struct page *page); -/* pdflush.c */ -extern int nr_pdflush_threads;	/* Global so it can be exported to sysctl -				   read-only. */ - -  #endif		/* WRITEBACK_H */ diff --git a/include/net/sock.h b/include/net/sock.h index e067f8c18f88..b3730239bf18 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -621,6 +621,7 @@ enum sock_flags {  	SOCK_RCVTSTAMPNS, /* %SO_TIMESTAMPNS setting */  	SOCK_LOCALROUTE, /* route locally only, %SO_DONTROUTE setting */  	SOCK_QUEUE_SHRUNK, /* write queue has been shrunk recently */ +	SOCK_MEMALLOC, /* VM depends on this socket for swapping */  	SOCK_TIMESTAMPING_TX_HARDWARE,  /* %SOF_TIMESTAMPING_TX_HARDWARE */  	SOCK_TIMESTAMPING_TX_SOFTWARE,  /* %SOF_TIMESTAMPING_TX_SOFTWARE */  	SOCK_TIMESTAMPING_RX_HARDWARE,  /* %SOF_TIMESTAMPING_RX_HARDWARE */ @@ -658,6 +659,26 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag)  	return test_bit(flag, &sk->sk_flags);  } +#ifdef CONFIG_NET +extern struct static_key memalloc_socks; +static inline int sk_memalloc_socks(void) +{ +	return static_key_false(&memalloc_socks); +} +#else + +static inline int sk_memalloc_socks(void) +{ +	return 0; +} + +#endif + +static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask) +{ +	return GFP_ATOMIC | (sk->sk_allocation & __GFP_MEMALLOC); +} +  static inline void sk_acceptq_removed(struct sock *sk)  {  	sk->sk_ack_backlog--; @@ -733,8 +754,13 @@ static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *s  	return 0;  } +extern int __sk_backlog_rcv(struct sock *sk, struct sk_buff *skb); +  static inline int sk_backlog_rcv(struct sock *sk, struct sk_buff *skb)  { +	if (sk_memalloc_socks() && skb_pfmemalloc(skb)) +		return __sk_backlog_rcv(sk, skb); +  	return sk->sk_backlog_rcv(sk, skb);  } @@ -798,6 +824,8 @@ extern int sk_stream_wait_memory(struct sock *sk, long *timeo_p);  extern void sk_stream_wait_close(struct sock *sk, long timeo_p);  extern int sk_stream_error(struct sock *sk, int flags, int err);  extern void sk_stream_kill_queues(struct sock *sk); +extern void sk_set_memalloc(struct sock *sk); +extern void sk_clear_memalloc(struct sock *sk);  extern int sk_wait_data(struct sock *sk, long *timeo); @@ -913,7 +941,7 @@ struct proto {  #ifdef SOCK_REFCNT_DEBUG  	atomic_t		socks;  #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM  	/*  	 * cgroup specific init/deinit functions. Called once for all  	 * protocols that implement it, from cgroups populate function. @@ -994,7 +1022,7 @@ inline void sk_refcnt_debug_release(const struct sock *sk)  #define sk_refcnt_debug_release(sk) do { } while (0)  #endif /* SOCK_REFCNT_DEBUG */ -#if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET)  extern struct static_key memcg_socket_limit_enabled;  static inline struct cg_proto *parent_cg_proto(struct proto *proto,  					       struct cg_proto *cg_proto) @@ -1301,12 +1329,14 @@ static inline bool sk_wmem_schedule(struct sock *sk, int size)  		__sk_mem_schedule(sk, size, SK_MEM_SEND);  } -static inline bool sk_rmem_schedule(struct sock *sk, int size) +static inline bool +sk_rmem_schedule(struct sock *sk, struct sk_buff *skb, unsigned int size)  {  	if (!sk_has_account(sk))  		return true; -	return size <= sk->sk_forward_alloc || -		__sk_mem_schedule(sk, size, SK_MEM_RECV); +	return size<= sk->sk_forward_alloc || +		__sk_mem_schedule(sk, size, SK_MEM_RECV) || +		skb_pfmemalloc(skb);  }  static inline void sk_mem_reclaim(struct sock *sk) diff --git a/include/trace/events/gfpflags.h b/include/trace/events/gfpflags.h index 9fe3a36646e9..d6fd8e5b14b7 100644 --- a/include/trace/events/gfpflags.h +++ b/include/trace/events/gfpflags.h @@ -30,6 +30,7 @@  	{(unsigned long)__GFP_COMP,		"GFP_COMP"},		\  	{(unsigned long)__GFP_ZERO,		"GFP_ZERO"},		\  	{(unsigned long)__GFP_NOMEMALLOC,	"GFP_NOMEMALLOC"},	\ +	{(unsigned long)__GFP_MEMALLOC,		"GFP_MEMALLOC"},	\  	{(unsigned long)__GFP_HARDWALL,		"GFP_HARDWALL"},	\  	{(unsigned long)__GFP_THISNODE,		"GFP_THISNODE"},	\  	{(unsigned long)__GFP_RECLAIMABLE,	"GFP_RECLAIMABLE"},	\ | 
