!1228 Per-memcg swap control

Merge Pull Request from: @ci-robot PR sync from: Liu Shixin <liushixin2@huawei.com> https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/LB3KEGYTHZN2JVMAZADIFG73PYZUD2RV/ This series patches add swap control for memory cgroup. Patch[2] add page type in memory.reclaim interface to support reclaim anon pages. Patch[4] add memory.force_swapin interface to support swap back pages proactively. Patch[5] add memory.swap.max interface to limit usage of swap for memory cgroup. Patch[6-7] add memory.swapfile interface to limit available swap device for memory cgroup. v2->v3: Enable memcg swap qos for x86_64 and arm64 by default. v1->v2: Rebase on the latest version and fix merge conflicts. Liu Shixin (7): memcg: add page type to memory.reclaim interface memcg: introduce memcg swap qos feature memcg: introduce per-memcg swapin interface memcg: add restrict to swap to cgroup1 mm/swapfile: introduce per-memcg swapfile control mm: swap_slots: add per-type slot cache config: enable memcg swap qos for x86_64 and arm64 by default Yosry Ahmed (1): mm: vmpressure: don't count proactive reclaim in vmpressure -- 2.25.1 Link:https://gitee.com/openeuler/kernel/pulls/1228 Reviewed-by: Jialin Zhang <zhangjialin11@huawei.com> Signed-off-by: Jialin Zhang <zhangjialin11@huawei.com>

!1228 Per-memcg swap control
Merge Pull Request from: @ci-robot PR sync from: Liu Shixin <liushixin2@huawei.com> https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/message/LB3KEGYTHZN2JVMAZADIFG73PYZUD2RV/ This series patches add swap control for memory cgroup. Patch[2] add page type in memory.reclaim interface to support reclaim anon pages. Patch[4] add memory.force_swapin interface to support swap back pages proactively. Patch[5] add memory.swap.max interface to limit usage of swap for memory cgroup. Patch[6-7] add memory.swapfile interface to limit available swap device for memory cgroup. v2->v3: Enable memcg swap qos for x86_64 and arm64 by default. v1->v2: Rebase on the latest version and fix merge conflicts. Liu Shixin (7): memcg: add page type to memory.reclaim interface memcg: introduce memcg swap qos feature memcg: introduce per-memcg swapin interface memcg: add restrict to swap to cgroup1 mm/swapfile: introduce per-memcg swapfile control mm: swap_slots: add per-type slot cache config: enable memcg swap qos for x86_64 and arm64 by default Yosry Ahmed (1): mm: vmpressure: don't count proactive reclaim in vmpressure -- 2.25.1 Link:https://gitee.com/openeuler/kernel/pulls/1228 Reviewed-by: Jialin Zhang <zhangjialin11@huawei.com> Signed-off-by: Jialin Zhang <zhangjialin11@huawei.com>
7602779e · openeuler-ci-bot · Gitee · 8bc2955c · 2b97c78c · 7602779e
14 changed file
--- a/Documentation/admin-guide/cgroup-v1/memory.rst
+++ b/Documentation/admin-guide/cgroup-v1/memory.rst
@@ -78,6 +78,9 @@ Brief summary of control files.
 memory.stat			     show various statistics
 memory.use_hierarchy		     set/show hierarchical account enabled
 memory.force_empty		     trigger forced page reclaim
+ memory.force_swapin		     trigger forced swapin anon page
+ memory.swap.max		     set/show limit for swap
+ memory.swapfile		     set/show available swap file
 memory.pressure_level		     set memory pressure notifications
 memory.swappiness		     set/show swappiness parameter of vmscan
 				     (See sysctl's vm.swappiness)

--- a/Documentation/admin-guide/cgroup-v2.rst
+++ b/Documentation/admin-guide/cgroup-v2.rst
@@ -1196,20 +1196,28 @@ PAGE_SIZE multiple when read back.
 	target cgroup.

 	This file accepts a single key, the number of bytes to reclaim.
-	No nested keys are currently supported.

 	Example::

 	  echo "1G" > memory.reclaim

-	The interface can be later extended with nested keys to
-	configure the reclaim behavior. For example, specify the
-	type of memory to reclaim from (anon, file, ..).
+	This file also accepts nested keys, the number of bytes to reclaim
+	with the type of memory to reclaim.
+
+	Example::
+	  echo "1G type=file" > memory.reclaim

 	Please note that the kernel can over or under reclaim from
 	the target cgroup. If less bytes are reclaimed than the
 	specified amount, -EAGAIN is returned.

+	Please note that the proactive reclaim (triggered by this
+	interface) is not meant to indicate memory pressure on the
+	memory cgroup. Therefore socket memory balancing triggered by
+	the memory reclaim normally is not exercised in this case.
+	This means that the networking layer will not adapt based on
+	reclaim induced by memory.reclaim.
+
  memory.oom.group
 	A read-write single value file which exists on non-root
 	cgroups.  The default value is "0".

--- a/arch/arm64/configs/openeuler_defconfig
+++ b/arch/arm64/configs/openeuler_defconfig
@@ -1092,6 +1092,7 @@ CONFIG_TRANSPARENT_HUGEPAGE_ALWAYS=y
 CONFIG_CLEANCACHE=y
 CONFIG_FRONTSWAP=y
 CONFIG_MEMCG_QOS=y
+CONFIG_MEMCG_SWAP_QOS=y
 CONFIG_ETMEM_SCAN=m
 CONFIG_ETMEM_SWAP=m
 CONFIG_ETMEM=y

--- a/arch/x86/configs/openeuler_defconfig
+++ b/arch/x86/configs/openeuler_defconfig
@@ -1042,6 +1042,7 @@ CONFIG_THP_SWAP=y
 CONFIG_CLEANCACHE=y
 CONFIG_FRONTSWAP=y
 CONFIG_MEMCG_QOS=y
+CONFIG_MEMCG_SWAP_QOS=y
 CONFIG_ETMEM_SCAN=m
 CONFIG_ETMEM_SWAP=m
 CONFIG_ETMEM=y

--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -50,6 +50,11 @@ enum memcg_memory_event {
 	MEMCG_NR_MEMORY_EVENTS,
 };

+enum {
+	SWAP_TYPE_ALL	= -1, /* allowd use all swap file */
+	SWAP_TYPE_NONE	= -2, /* prohibited use any swapfile */
+};
+
 struct mem_cgroup_reclaim_cookie {
 	pg_data_t *pgdat;
 	unsigned int generation;
@@ -240,6 +245,11 @@ struct obj_cgroup {
 	};
 };

+struct swap_device {
+	unsigned long max;
+	int type;
+};
+
 /*
 * The memory controller data structure. The memory controller controls both
 * page cache and RSS per cgroup. We would eventually like to provide
@@ -402,7 +412,12 @@ struct mem_cgroup {
 #else
 	KABI_RESERVE(6)
 #endif
+#ifdef CONFIG_MEMCG_SWAP_QOS
+	/* per-memcg swap device control; protected by swap_lock */
+	KABI_USE(7, struct swap_device *swap_dev)
+#else
 	KABI_RESERVE(7)
+#endif
 	KABI_RESERVE(8)

 	struct mem_cgroup_per_node *nodeinfo[0];
@@ -424,6 +439,10 @@ extern int sysctl_memcg_qos_handler(struct ctl_table *table,
 void memcg_print_bad_task(struct oom_control *oc);
 #endif

+#ifdef CONFIG_MEMCG_SWAP_QOS
+DECLARE_STATIC_KEY_FALSE(memcg_swap_qos_key);
+#endif
+
 /*
 * size of first charge trial. "32" comes from vmscan.c's magic value.
 * TODO: maybe necessary to use big numbers in big irons.
@@ -1294,6 +1313,9 @@ static inline bool memcg_has_children(struct mem_cgroup *memcg)

 int mem_cgroup_force_empty(struct mem_cgroup *memcg);

+int memcg_get_swap_type(struct page *page);
+void memcg_remove_swapfile(int type);
+
 #else /* CONFIG_MEMCG */

 #define MEM_CGROUP_ID_SHIFT	0
@@ -1701,6 +1723,15 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
 static inline void memcg_print_bad_task(struct oom_control *oc)
 {
 }
+
+static inline int memcg_get_swap_type(struct page *page)
+{
+	return SWAP_TYPE_ALL;
+}
+
+static inline void memcg_remove_swapfile(int type)
+{
+}
 #endif /* CONFIG_MEMCG */

 /* idx can be of type enum memcg_stat_item or node_stat_item */

--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2650,6 +2650,7 @@ extern int __do_munmap(struct mm_struct *, unsigned long, size_t,
 		       struct list_head *uf, bool downgrade);
 extern int do_munmap(struct mm_struct *, unsigned long, size_t,
 		     struct list_head *uf);
+extern void force_swapin_vma(struct vm_area_struct *vma);
 extern int do_madvise(struct mm_struct *mm, unsigned long start, size_t len_in, int behavior);

 extern unsigned long __do_mmap_mm(struct mm_struct *mm, struct file *file,

--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -376,10 +376,14 @@ extern unsigned long zone_reclaimable_pages(struct zone *zone);
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
 extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
+
+#define MEMCG_RECLAIM_MAY_SWAP (1 << 1)
+#define MEMCG_RECLAIM_PROACTIVE (1 << 2)
+#define MEMCG_RECLAIM_NOT_FILE (1 << 3)
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 						  unsigned long nr_pages,
 						  gfp_t gfp_mask,
-						  bool may_swap);
+						  unsigned int reclaim_options);
 extern unsigned long mem_cgroup_shrink_node(struct mem_cgroup *mem,
 						gfp_t gfp_mask, bool noswap,
 						pg_data_t *pgdat,
@@ -507,11 +511,14 @@ static inline long get_nr_swap_pages(void)
 	return atomic_long_read(&nr_swap_pages);
 }

+extern long get_nr_swap_pages_type(int type);
+
 extern void si_swapinfo(struct sysinfo *);
 extern swp_entry_t get_swap_page(struct page *page);
 extern void put_swap_page(struct page *page, swp_entry_t entry);
 extern swp_entry_t get_swap_page_of_type(int);
-extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size);
+extern int get_swap_pages(int n, swp_entry_t swp_entries[], int entry_size,
+			  int type);
 extern int add_swap_count_continuation(swp_entry_t, gfp_t);
 extern void swap_shmem_alloc(swp_entry_t);
 extern int swap_duplicate(swp_entry_t);
@@ -543,6 +550,12 @@ static inline void put_swap_device(struct swap_info_struct *si)
 	percpu_ref_put(&si->sei->users);
 }

+#ifdef CONFIG_MEMCG_SWAP_QOS
+extern int write_swapfile_for_memcg(struct address_space *mapping,
+				    int *swap_type);
+extern void read_swapfile_for_memcg(struct seq_file *m, int type);
+void enable_swap_slots_cache_max(void);
+#endif
 #else /* CONFIG_SWAP */

 static inline int swap_readpage(struct page *page, bool do_poll)

--- a/include/linux/swap_slots.h
+++ b/include/linux/swap_slots.h
@@ -23,7 +23,7 @@ struct swap_slots_cache {

 void disable_swap_slots_cache_lock(void);
 void reenable_swap_slots_cache_unlock(void);
-void enable_swap_slots_cache(void);
+void enable_swap_slots_cache(int type);
 int free_swap_slot(swp_entry_t entry);

 extern bool swap_slot_cache_enabled;

--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -512,6 +512,15 @@ config MEMCG_QOS

 	  If unsure, say "n".

+config MEMCG_SWAP_QOS
+	bool "Enable Memory Cgroup Swap Control"
+	depends on MEMCG_SWAP
+	depends on X86 || ARM64
+	default n
+	help
+	  memcg swap control include memory force swapin, swapfile control
+	  and swap limit.
+
 config ETMEM_SCAN
 	tristate "module: etmem page scan for etmem support"
 	depends on ETMEM

--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -259,6 +259,25 @@ static void force_shm_swapin_readahead(struct vm_area_struct *vma,

 	lru_add_drain();	/* Push any new pages onto the LRU now */
 }
+
+void force_swapin_vma(struct vm_area_struct *vma)
+{
+	struct file *file = vma->vm_file;
+
+	if (!can_madv_lru_vma(vma))
+		return;
+
+	if (!file) {
+		walk_page_vma(vma, &swapin_walk_ops, vma);
+		lru_add_drain();
+	} else if (shmem_mapping(file->f_mapping))
+		force_shm_swapin_readahead(vma, vma->vm_start,
+			vma->vm_end, file->f_mapping);
+}
+#else
+void force_swapin_vma(struct vm_area_struct *vma)
+{
+}
 #endif		/* CONFIG_SWAP */

 /*

--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2397,7 +2397,8 @@ static unsigned long reclaim_high(struct mem_cgroup *memcg,

 		psi_memstall_enter(&pflags);
 		nr_reclaimed += try_to_free_mem_cgroup_pages(memcg, nr_pages,
-							     gfp_mask, true);
+							gfp_mask,
+							MEMCG_RECLAIM_MAY_SWAP);
 		psi_memstall_leave(&pflags);
 	} while ((memcg = parent_mem_cgroup(memcg)) &&
 		 !mem_cgroup_is_root(memcg));
@@ -2660,7 +2661,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	enum oom_status oom_status;
 	unsigned long nr_reclaimed;
 	bool passed_oom = false;
-	bool may_swap = true;
+	unsigned int reclaim_options = MEMCG_RECLAIM_MAY_SWAP;
 	bool drained = false;
 	unsigned long pflags;

@@ -2679,7 +2680,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 		mem_over_limit = mem_cgroup_from_counter(counter, memory);
 	} else {
 		mem_over_limit = mem_cgroup_from_counter(counter, memsw);
-		may_swap = false;
+		reclaim_options &= ~MEMCG_RECLAIM_MAY_SWAP;
 	}

 	if (batch > nr_pages) {
@@ -2715,7 +2716,7 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,

 	psi_memstall_enter(&pflags);
 	nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
-						    gfp_mask, may_swap);
+						    gfp_mask, reclaim_options);
 	psi_memstall_leave(&pflags);

 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
@@ -3365,8 +3366,8 @@ static int mem_cgroup_resize_max(struct mem_cgroup *memcg,
 			continue;
 		}

-		if (!try_to_free_mem_cgroup_pages(memcg, 1,
-					GFP_KERNEL, !memsw)) {
+		if (!try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL,
+					memsw ? 0 : MEMCG_RECLAIM_MAY_SWAP)) {
 			ret = -EBUSY;
 			break;
 		}
@@ -3483,7 +3484,7 @@ int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 			return -EINTR;

 		progress = try_to_free_mem_cgroup_pages(memcg, 1,
-							GFP_KERNEL, true);
+				      GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);
 		if (!progress) {
 			nr_retries--;
 			/* maybe some writeback is necessary */
@@ -4054,6 +4055,344 @@ void memcg_print_bad_task(struct oom_control *oc)

 #endif

+#ifdef CONFIG_MEMCG_SWAP_QOS
+DEFINE_STATIC_KEY_FALSE(memcg_swap_qos_key);
+
+#ifdef CONFIG_SYSCTL
+static int sysctl_memcg_swap_qos_stat;
+
+static void memcg_swap_qos_reset(void)
+{
+	struct mem_cgroup *memcg;
+
+	for_each_mem_cgroup(memcg) {
+		WRITE_ONCE(memcg->swap_dev->max, PAGE_COUNTER_MAX);
+		WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_ALL);
+	}
+}
+
+static int sysctl_memcg_swap_qos_handler(struct ctl_table *table, int write,
+			void __user *buffer, size_t *length, loff_t *ppos)
+{
+	int ret;
+
+	ret = proc_dointvec_minmax(table, write, buffer, length, ppos);
+	if (ret)
+		return ret;
+	if (write) {
+		if (sysctl_memcg_swap_qos_stat) {
+			memcg_swap_qos_reset();
+			static_branch_enable(&memcg_swap_qos_key);
+			enable_swap_slots_cache_max();
+		} else {
+			static_branch_disable(&memcg_swap_qos_key);
+		}
+	}
+	return 0;
+}
+
+static struct ctl_table memcg_swap_qos_sysctls[] = {
+	{
+		.procname	= "memcg_swap_qos_enable",
+		.data		= &sysctl_memcg_swap_qos_stat,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_memcg_swap_qos_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{ }
+};
+
+static __init int memcg_swap_qos_sysctls_init(void)
+{
+	if (mem_cgroup_disabled() || cgroup_memory_noswap)
+		return 0;
+	register_sysctl_init("vm", memcg_swap_qos_sysctls);
+	return 0;
+}
+late_initcall(memcg_swap_qos_sysctls_init);
+#endif
+
+static int mem_cgroup_task_swapin(struct task_struct *task, void *arg)
+{
+	struct mm_struct *mm = task->mm;
+	struct vm_area_struct *vma;
+	struct blk_plug plug;
+
+	mmap_read_lock(mm);
+	blk_start_plug(&plug);
+	for (vma = mm->mmap; vma; vma = vma->vm_next)
+		force_swapin_vma(vma);
+	blk_finish_plug(&plug);
+	mmap_read_unlock(mm);
+
+	return 0;
+}
+
+static ssize_t memory_swapin(struct kernfs_open_file *of, char *buf,
+			      size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+
+	mem_cgroup_scan_tasks(memcg, mem_cgroup_task_swapin, NULL);
+
+	return nbytes;
+}
+
+static int memcg_alloc_swap_device(struct mem_cgroup *memcg)
+{
+	memcg->swap_dev = kmalloc(sizeof(struct swap_device), GFP_KERNEL);
+	if (!memcg->swap_dev)
+		return -ENOMEM;
+	return 0;
+}
+
+static void memcg_free_swap_device(struct mem_cgroup *memcg)
+{
+	if (!memcg->swap_dev)
+		return;
+
+	kfree(memcg->swap_dev);
+	memcg->swap_dev = NULL;
+}
+
+static void memcg_swap_device_init(struct mem_cgroup *memcg,
+				   struct mem_cgroup *parent)
+{
+	if (!static_branch_likely(&memcg_swap_qos_key) || !parent) {
+		WRITE_ONCE(memcg->swap_dev->max, PAGE_COUNTER_MAX);
+		WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_ALL);
+	} else {
+		WRITE_ONCE(memcg->swap_dev->max,
+			   READ_ONCE(parent->swap_dev->max));
+		WRITE_ONCE(memcg->swap_dev->type,
+			   READ_ONCE(parent->swap_dev->type));
+	}
+}
+
+u64 memcg_swapmax_read(struct cgroup_subsys_state *css, struct cftype *cft)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+
+	if (!static_branch_likely(&memcg_swap_qos_key))
+		return PAGE_COUNTER_MAX * PAGE_SIZE;
+
+	return READ_ONCE(memcg->swap_dev->max) * PAGE_SIZE;
+}
+
+static ssize_t memcg_swapmax_write(struct kernfs_open_file *of,
+				     char *buf, size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	unsigned long max;
+	int err;
+
+	if (!static_branch_likely(&memcg_swap_qos_key))
+		return -EACCES;
+
+	buf = strstrip(buf);
+	err = page_counter_memparse(buf, "max", &max);
+	if (err)
+		return err;
+
+	WRITE_ONCE(memcg->swap_dev->max, max);
+
+	return nbytes;
+}
+
+static int mem_cgroup_check_swap_for_v1(struct page *page, swp_entry_t entry)
+{
+	struct mem_cgroup *memcg, *target_memcg;
+	unsigned long swap_usage;
+	unsigned long swap_limit;
+	long nr_swap_pages = PAGE_COUNTER_MAX;
+
+	if (!static_branch_likely(&memcg_swap_qos_key))
+		return 0;
+
+	if (!entry.val)
+		return 0;
+
+	rcu_read_lock();
+	target_memcg = page_memcg(page);
+	if (!target_memcg || mem_cgroup_is_root(target_memcg)) {
+		rcu_read_unlock();
+		return 0;
+	}
+
+	if (!css_tryget_online(&target_memcg->css)) {
+		rcu_read_unlock();
+		return 0;
+	}
+	rcu_read_unlock();
+
+	for (memcg = target_memcg; memcg != root_mem_cgroup;
+	     memcg = parent_mem_cgroup(memcg)) {
+		swap_limit = READ_ONCE(memcg->swap_dev->max);
+		swap_usage = page_counter_read(&memcg->memsw) -
+			     page_counter_read(&memcg->memory);
+		nr_swap_pages = min_t(long, nr_swap_pages,
+				      swap_limit - swap_usage);
+	}
+	css_put(&target_memcg->css);
+
+	if (thp_nr_pages(page) > nr_swap_pages)
+		return -ENOMEM;
+	return 0;
+}
+
+static int memcg_swapfile_read(struct seq_file *m, void *v)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_seq(m);
+	int type;
+
+	if (!static_branch_likely(&memcg_swap_qos_key)) {
+		seq_printf(m, "all\n");
+		return 0;
+	}
+
+	type = READ_ONCE(memcg->swap_dev->type);
+	if (type == SWAP_TYPE_NONE)
+		seq_printf(m, "none\n");
+	else if (type == SWAP_TYPE_ALL)
+		seq_printf(m, "all\n");
+	else
+		read_swapfile_for_memcg(m, type);
+	return 0;
+}
+
+static ssize_t memcg_swapfile_write(struct kernfs_open_file *of, char *buf,
+				     size_t nbytes, loff_t off)
+{
+	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
+	struct filename *pathname;
+	struct file *swapfile;
+	int ret;
+
+	if (!static_branch_likely(&memcg_swap_qos_key))
+		return -EACCES;
+
+	buf = strstrip(buf);
+
+	if (!strcmp(buf, "none")) {
+		WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_NONE);
+		return nbytes;
+	} else if (!strcmp(buf, "all")) {
+		WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_ALL);
+		return nbytes;
+	}
+
+	pathname = getname_kernel(buf);
+	if (IS_ERR(pathname))
+		return PTR_ERR(pathname);
+
+	swapfile = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0);
+	if (IS_ERR(swapfile)) {
+		putname(pathname);
+		return PTR_ERR(swapfile);
+	}
+	ret = write_swapfile_for_memcg(swapfile->f_mapping,
+				       &memcg->swap_dev->type);
+	filp_close(swapfile, NULL);
+	putname(pathname);
+
+	return ret < 0 ? ret : nbytes;
+}
+
+int memcg_get_swap_type(struct page *page)
+{
+	struct mem_cgroup *memcg;
+	int type;
+
+	if (!static_branch_likely(&memcg_swap_qos_key))
+		return SWAP_TYPE_ALL;
+
+	if (!page)
+		return SWAP_TYPE_ALL;
+
+	rcu_read_lock();
+	memcg = page_memcg(page);
+	if (!memcg || mem_cgroup_is_root(memcg)) {
+		rcu_read_unlock();
+		return SWAP_TYPE_ALL;
+	}
+
+	if (!css_tryget_online(&memcg->css)) {
+		rcu_read_unlock();
+		return SWAP_TYPE_ALL;
+	}
+	rcu_read_unlock();
+
+	type = READ_ONCE(memcg->swap_dev->type);
+	css_put(&memcg->css);
+	return type;
+}
+
+void memcg_remove_swapfile(int type)
+{
+	struct mem_cgroup *memcg;
+
+	if (!static_branch_likely(&memcg_swap_qos_key))
+		return;
+
+	for_each_mem_cgroup(memcg)
+		if (READ_ONCE(memcg->swap_dev->type) == type)
+			WRITE_ONCE(memcg->swap_dev->type, SWAP_TYPE_NONE);
+}
+
+static long mem_cgroup_get_nr_swap_pages_type(struct mem_cgroup *memcg)
+{
+	int type;
+
+	if (!static_branch_likely(&memcg_swap_qos_key))
+		return mem_cgroup_get_nr_swap_pages(memcg);
+
+	type = READ_ONCE(memcg->swap_dev->type);
+	if (type == SWAP_TYPE_ALL)
+		return mem_cgroup_get_nr_swap_pages(memcg);
+	else if (type == SWAP_TYPE_NONE)
+		return 0;
+	else
+		return get_nr_swap_pages_type(type);
+}
+
+#else
+static int memcg_alloc_swap_device(struct mem_cgroup *memcg)
+{
+	return 0;
+}
+
+static void memcg_free_swap_device(struct mem_cgroup *memcg)
+{
+}
+
+static void memcg_swap_device_init(struct mem_cgroup *memcg,
+				   struct mem_cgroup *parent)
+{
+}
+
+static int mem_cgroup_check_swap_for_v1(struct page *page, swp_entry_t entry)
+{
+	return 0;
+}
+
+int memcg_get_swap_type(struct page *page)
+{
+	return SWAP_TYPE_ALL;
+}
+
+void memcg_remove_swapfile(int type)
+{
+}
+
+static long mem_cgroup_get_nr_swap_pages_type(struct mem_cgroup *memcg)
+{
+	return mem_cgroup_get_nr_swap_pages(memcg);
+}
+
+#endif
+
 #ifdef CONFIG_NUMA

 #define LRU_ALL_FILE (BIT(LRU_INACTIVE_FILE) | BIT(LRU_ACTIVE_FILE))
@@ -5230,7 +5569,7 @@ static ssize_t memory_high_write(struct kernfs_open_file *of,
 		}

 		reclaimed = try_to_free_mem_cgroup_pages(memcg, nr_pages - high,
-							 GFP_KERNEL, true);
+					GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP);

 		if (!reclaimed && !nr_retries--)
 			break;
@@ -5265,16 +5604,47 @@ static int memcg_events_local_show(struct seq_file *m, void *v)
 	return 0;
 }

+static int reclaim_param_parse(char *buf, unsigned long *nr_pages,
+			       unsigned int *reclaim_options)
+{
+	char *endp;
+	u64 bytes;
+
+	if (!strcmp(buf, "")) {
+		*nr_pages = PAGE_COUNTER_MAX;
+		return 0;
+	}
+
+	bytes = memparse(buf, &endp);
+	if (*endp == ' ') {
+		buf = endp + 1;
+		buf = strim(buf);
+		if (!strcmp(buf, "type=anon"))
+			*reclaim_options |= MEMCG_RECLAIM_NOT_FILE;
+		else if (!strcmp(buf, "type=file"))
+			*reclaim_options &= ~MEMCG_RECLAIM_MAY_SWAP;
+		else
+			return -EINVAL;
+	} else if (*endp != '\0')
+		return -EINVAL;
+
+	*nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
+
+	return 0;
+}
+
 static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 			      size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
 	unsigned int nr_retries = MAX_RECLAIM_RETRIES;
 	unsigned long nr_to_reclaim, nr_reclaimed = 0;
+	unsigned int reclaim_options;
 	int err;

+	reclaim_options = MEMCG_RECLAIM_MAY_SWAP | MEMCG_RECLAIM_PROACTIVE;
 	buf = strstrip(buf);
-	err = page_counter_memparse(buf, "", &nr_to_reclaim);
+	err = reclaim_param_parse(buf, &nr_to_reclaim, &reclaim_options);
 	if (err)
 		return err;

@@ -5288,6 +5658,11 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,
 		if (signal_pending(current))
 			return -EINTR;

+		/* If only reclaim swap pages, check swap space at first. */
+		if ((reclaim_options & MEMCG_RECLAIM_NOT_FILE) &&
+		    (mem_cgroup_get_nr_swap_pages_type(memcg) <= 0))
+			return -EAGAIN;
+
 		/* This is the final attempt, drain percpu lru caches in the
 		 * hope of introducing more evictable pages for
 		 * try_to_free_mem_cgroup_pages().
@@ -5297,7 +5672,7 @@ static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf,

 		reclaimed = try_to_free_mem_cgroup_pages(memcg,
 						nr_to_reclaim - nr_reclaimed,
-						GFP_KERNEL, true);
+						GFP_KERNEL, reclaim_options);

 		if (!reclaimed && !nr_retries--)
 			return -EAGAIN;
@@ -5710,6 +6085,25 @@ static struct cftype mem_cgroup_legacy_files[] = {
 		.name = "reclaim",
 		.write = memory_reclaim,
 	},
+#ifdef CONFIG_MEMCG_SWAP_QOS
+	{
+		.name = "force_swapin",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.write = memory_swapin,
+	},
+	{
+		.name = "swap.max",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.write = memcg_swapmax_write,
+		.read_u64 = memcg_swapmax_read,
+	},
+	{
+		.name = "swapfile",
+		.flags = CFTYPE_NOT_ON_ROOT,
+		.write = memcg_swapfile_write,
+		.seq_show = memcg_swapfile_read,
+	},
+#endif
 	{
 		.name = "high_async_ratio",
 		.flags = CFTYPE_NOT_ON_ROOT,
@@ -5854,6 +6248,7 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
 	for_each_node(node)
 		free_mem_cgroup_per_node_info(memcg, node);
 	free_percpu(memcg->vmstats_percpu);
+	memcg_free_swap_device(memcg);
 	kfree(memcg);
 }

@@ -5878,6 +6273,9 @@ static struct mem_cgroup *mem_cgroup_alloc(void)
 	if (!memcg)
 		return ERR_PTR(error);

+	if (memcg_alloc_swap_device(memcg))
+		goto fail;
+
 	memcg->id.id = idr_alloc(&mem_cgroup_idr, NULL,
 				 1, MEM_CGROUP_ID_MAX,
 				 GFP_KERNEL);
@@ -5955,17 +6353,20 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 		page_counter_init(&memcg->swap, NULL);
 		page_counter_init(&memcg->kmem, NULL);
 		page_counter_init(&memcg->tcpmem, NULL);
+		memcg_swap_device_init(memcg, NULL);
 	} else if (parent->use_hierarchy) {
 		memcg->use_hierarchy = true;
 		page_counter_init(&memcg->memory, &parent->memory);
 		page_counter_init(&memcg->swap, &parent->swap);
 		page_counter_init(&memcg->kmem, &parent->kmem);
 		page_counter_init(&memcg->tcpmem, &parent->tcpmem);
+		memcg_swap_device_init(memcg, parent);
 	} else {
 		page_counter_init(&memcg->memory, &root_mem_cgroup->memory);
 		page_counter_init(&memcg->swap, &root_mem_cgroup->swap);
 		page_counter_init(&memcg->kmem, &root_mem_cgroup->kmem);
 		page_counter_init(&memcg->tcpmem, &root_mem_cgroup->tcpmem);
+		memcg_swap_device_init(memcg, root_mem_cgroup);
 		/*
 		 * Deeper hierachy with use_hierarchy == false doesn't make
 		 * much sense so let cgroup subsystem know about this
@@ -6984,7 +7385,7 @@ static ssize_t memory_max_write(struct kernfs_open_file *of,

 		if (nr_reclaims) {
 			if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max,
-							  GFP_KERNEL, true))
+					GFP_KERNEL, MEMCG_RECLAIM_MAY_SWAP))
 				nr_reclaims--;
 			continue;
 		}
@@ -7899,7 +8300,7 @@ int mem_cgroup_try_charge_swap(struct page *page, swp_entry_t entry)
 	unsigned short oldid;

 	if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
-		return 0;
+		return mem_cgroup_check_swap_for_v1(page, entry);

 	memcg = page_memcg(page);


--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -35,6 +35,11 @@
 #include <linux/mm.h>

 static DEFINE_PER_CPU(struct swap_slots_cache, swp_slots);
+#ifdef CONFIG_MEMCG_SWAP_QOS
+static unsigned int nr_swap_slots;
+static unsigned int max_swap_slots;
+static DEFINE_PER_CPU(struct swap_slots_cache [MAX_SWAPFILES], swp_type_slots);
+#endif
 static bool	swap_slot_cache_active;
 bool	swap_slot_cache_enabled;
 static bool	swap_slot_cache_initialized;
@@ -111,7 +116,37 @@ static bool check_cache_active(void)
 	return swap_slot_cache_active;
 }

-static int alloc_swap_slot_cache(unsigned int cpu)
+#ifdef CONFIG_MEMCG_SWAP_QOS
+static inline struct swap_slots_cache *get_slots_cache(int swap_type)
+{
+	if (swap_type == SWAP_TYPE_ALL)
+		return raw_cpu_ptr(&swp_slots);
+	else
+		return raw_cpu_ptr(&swp_type_slots)[swap_type];
+}
+
+static inline struct swap_slots_cache *get_slots_cache_cpu(unsigned int cpu,
+							   int swap_type)
+{
+	if (swap_type == SWAP_TYPE_ALL)
+		return &per_cpu(swp_slots, cpu);
+	else
+		return &per_cpu(swp_type_slots, cpu)[swap_type];
+}
+#else
+static inline struct swap_slots_cache *get_slots_cache(int swap_type)
+{
+	return raw_cpu_ptr(&swp_slots);
+}
+
+static inline struct swap_slots_cache *get_slots_cache_cpu(unsigned int cpu,
+							   int swap_type)
+{
+	return &per_cpu(swp_slots, cpu);
+}
+#endif
+
+static int alloc_swap_slot_cache_cpu_type(unsigned int cpu, int swap_type)
 {
 	struct swap_slots_cache *cache;
 	swp_entry_t *slots, *slots_ret;
@@ -134,7 +169,7 @@ static int alloc_swap_slot_cache(unsigned int cpu)
 	}

 	mutex_lock(&swap_slots_cache_mutex);
-	cache = &per_cpu(swp_slots, cpu);
+	cache = get_slots_cache_cpu(cpu, swap_type);
 	if (cache->slots || cache->slots_ret) {
 		/* cache already allocated */
 		mutex_unlock(&swap_slots_cache_mutex);
@@ -166,13 +201,74 @@ static int alloc_swap_slot_cache(unsigned int cpu)
 	return 0;
 }

-static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
-				  bool free_slots)
+#ifdef CONFIG_MEMCG_SWAP_QOS
+static int __alloc_swap_slot_cache_cpu(unsigned int cpu)
+{
+	int i, ret;
+
+	ret = alloc_swap_slot_cache_cpu_type(cpu, SWAP_TYPE_ALL);
+	if (ret)
+		return ret;
+
+	for (i = 0; i < nr_swap_slots; i++) {
+		ret = alloc_swap_slot_cache_cpu_type(cpu, i);
+		if (ret)
+			return ret;
+	}
+
+	return ret;
+}
+
+static void alloc_swap_slot_cache_type(int type)
+{
+	unsigned int cpu;
+
+	if (type >= max_swap_slots)
+		max_swap_slots = type + 1;
+
+	if (!static_branch_likely(&memcg_swap_qos_key))
+		return;
+
+	/* serialize with cpu hotplug operations */
+	get_online_cpus();
+	while (type >= nr_swap_slots) {
+		for_each_online_cpu(cpu)
+			alloc_swap_slot_cache_cpu_type(cpu, nr_swap_slots);
+		nr_swap_slots++;
+	}
+	put_online_cpus();
+}
+
+void enable_swap_slots_cache_max(void)
+{
+	mutex_lock(&swap_slots_cache_enable_mutex);
+	if (max_swap_slots)
+		alloc_swap_slot_cache_type(max_swap_slots - 1);
+	mutex_unlock(&swap_slots_cache_enable_mutex);
+}
+#else
+static inline int __alloc_swap_slot_cache_cpu(unsigned int cpu)
+{
+	return alloc_swap_slot_cache_cpu_type(cpu, SWAP_TYPE_ALL);
+}
+
+static void alloc_swap_slot_cache_type(int type)
+{
+}
+#endif
+
+static int alloc_swap_slot_cache(unsigned int cpu)
+{
+	return __alloc_swap_slot_cache_cpu(cpu);
+}
+
+static void drain_slots_cache_cpu_type(unsigned int cpu, unsigned int type,
+				    bool free_slots, int swap_type)
 {
 	struct swap_slots_cache *cache;
 	swp_entry_t *slots = NULL;

-	cache = &per_cpu(swp_slots, cpu);
+	cache = get_slots_cache_cpu(cpu, swap_type);
 	if ((type & SLOTS_CACHE) && cache->slots) {
 		mutex_lock(&cache->alloc_lock);
 		swapcache_free_entries(cache->slots + cache->cur, cache->nr);
@@ -198,6 +294,30 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
 	}
 }

+#ifdef CONFIG_MEMCG_SWAP_QOS
+static void __drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
+				       bool free_slots)
+{
+	int i;
+
+	drain_slots_cache_cpu_type(cpu, type, free_slots, SWAP_TYPE_ALL);
+	for (i = 0; i < nr_swap_slots; i++)
+		drain_slots_cache_cpu_type(cpu, type, free_slots, i);
+}
+#else
+static inline void __drain_slots_cache_cpu(unsigned int cpu,
+					unsigned int type, bool free_slots)
+{
+	drain_slots_cache_cpu_type(cpu, type, free_slots, SWAP_TYPE_ALL);
+}
+#endif
+
+static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
+				  bool free_slots)
+{
+	__drain_slots_cache_cpu(cpu, type, free_slots);
+}
+
 static void __drain_swap_slots_cache(unsigned int type)
 {
 	unsigned int cpu;
@@ -237,7 +357,7 @@ static int free_slot_cache(unsigned int cpu)
 	return 0;
 }

-void enable_swap_slots_cache(void)
+void enable_swap_slots_cache(int type)
 {
 	mutex_lock(&swap_slots_cache_enable_mutex);
 	if (!swap_slot_cache_initialized) {
@@ -251,14 +371,14 @@ void enable_swap_slots_cache(void)

 		swap_slot_cache_initialized = true;
 	}
-
+	alloc_swap_slot_cache_type(type);
 	__reenable_swap_slots_cache();
 out_unlock:
 	mutex_unlock(&swap_slots_cache_enable_mutex);
 }

 /* called with swap slot cache's alloc lock held */
-static int refill_swap_slots_cache(struct swap_slots_cache *cache)
+static int refill_swap_slots_cache(struct swap_slots_cache *cache, int type)
 {
 	if (!use_swap_slot_cache || cache->nr)
 		return 0;
@@ -266,7 +386,7 @@ static int refill_swap_slots_cache(struct swap_slots_cache *cache)
 	cache->cur = 0;
 	if (swap_slot_cache_active)
 		cache->nr = get_swap_pages(SWAP_SLOTS_CACHE_SIZE,
-					   cache->slots, 1);
+					   cache->slots, 1, type);

 	return cache->nr;
 }
@@ -307,12 +427,17 @@ swp_entry_t get_swap_page(struct page *page)
 {
 	swp_entry_t entry;
 	struct swap_slots_cache *cache;
+	int type;

 	entry.val = 0;

+	type = memcg_get_swap_type(page);
+	if (type == SWAP_TYPE_NONE)
+		goto out;
+
 	if (PageTransHuge(page)) {
 		if (IS_ENABLED(CONFIG_THP_SWAP))
-			get_swap_pages(1, &entry, HPAGE_PMD_NR);
+			get_swap_pages(1, &entry, HPAGE_PMD_NR, type);
 		goto out;
 	}

@@ -325,7 +450,7 @@ swp_entry_t get_swap_page(struct page *page)
 	 * The alloc path here does not touch cache->slots_ret
 	 * so cache->free_lock is not taken.
 	 */
-	cache = raw_cpu_ptr(&swp_slots);
+	cache = get_slots_cache(type);

 	if (likely(check_cache_active() && cache->slots)) {
 		mutex_lock(&cache->alloc_lock);
@@ -335,7 +460,7 @@ swp_entry_t get_swap_page(struct page *page)
 				entry = cache->slots[cache->cur];
 				cache->slots[cache->cur++].val = 0;
 				cache->nr--;
-			} else if (refill_swap_slots_cache(cache)) {
+			} else if (refill_swap_slots_cache(cache, type)) {
 				goto repeat;
 			}
 		}
@@ -344,7 +469,7 @@ swp_entry_t get_swap_page(struct page *page)
 			goto out;
 	}

-	get_swap_pages(1, &entry, 1);
+	get_swap_pages(1, &entry, 1, type);
 out:
 	if (mem_cgroup_try_charge_swap(page, entry)) {
 		put_swap_page(page, entry);

--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -1056,7 +1056,97 @@ static unsigned long scan_swap_map(struct swap_info_struct *si,

 }

-int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
+#ifdef CONFIG_MEMCG_SWAP_QOS
+int write_swapfile_for_memcg(struct address_space *mapping, int *swap_type)
+{
+	struct swap_info_struct *si;
+	unsigned int type;
+	int ret = -EINVAL;
+
+	spin_lock(&swap_lock);
+	for (type = 0; type < nr_swapfiles; type++) {
+		si = swap_info[type];
+		if ((si->flags & SWP_WRITEOK) &&
+		    (si->swap_file->f_mapping == mapping)) {
+			WRITE_ONCE(*swap_type, type);
+			ret = 0;
+			break;
+		}
+	}
+	spin_unlock(&swap_lock);
+	return ret;
+}
+
+void read_swapfile_for_memcg(struct seq_file *m, int type)
+{
+	struct swap_info_struct *si;
+
+	spin_lock(&swap_lock);
+	if (type < nr_swapfiles) {
+		si = swap_info[type];
+		if (si->flags & SWP_WRITEOK) {
+			seq_file_path(m, si->swap_file, "\t\n\\");
+			seq_printf(m, "\n");
+		}
+	}
+	spin_unlock(&swap_lock);
+}
+
+long get_nr_swap_pages_type(int type)
+{
+	struct swap_info_struct *si;
+	long nr_swap_pages = 0;
+
+	spin_lock(&swap_lock);
+	if (type < nr_swapfiles) {
+		si = swap_info[type];
+		if (si->flags & SWP_WRITEOK)
+			nr_swap_pages = si->pages - si->inuse_pages;
+	}
+	spin_unlock(&swap_lock);
+
+	return nr_swap_pages;
+}
+
+static long get_avail_pages(unsigned long size, int type)
+{
+	long avail_pgs = 0;
+
+	if (type == SWAP_TYPE_ALL)
+		return atomic_long_read(&nr_swap_pages) / size;
+
+	spin_unlock(&swap_avail_lock);
+	avail_pgs = get_nr_swap_pages_type(type) / size;
+	spin_lock(&swap_avail_lock);
+	return avail_pgs;
+}
+
+static inline bool should_skip_swap_type(int swap_type, int type)
+{
+	if (type == SWAP_TYPE_ALL)
+		return false;
+
+	return (type != swap_type);
+}
+#else
+long get_nr_swap_pages_type(int type)
+{
+	return 0;
+}
+
+static inline long get_avail_pages(unsigned long size, int type)
+{
+	return atomic_long_read(&nr_swap_pages) / size;
+}
+
+static inline bool should_skip_swap_type(int swap_type, int type)
+{
+	return false;
+}
+#endif
+
+int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size,
+		   int type)
 {
 	unsigned long size = swap_entry_size(entry_size);
 	struct swap_info_struct *si, *next;
@@ -1069,7 +1159,7 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)

 	spin_lock(&swap_avail_lock);

-	avail_pgs = atomic_long_read(&nr_swap_pages) / size;
+	avail_pgs = get_avail_pages(size, type);
 	if (avail_pgs <= 0) {
 		spin_unlock(&swap_avail_lock);
 		goto noswap;
@@ -1086,6 +1176,11 @@ int get_swap_pages(int n_goal, swp_entry_t swp_entries[], int entry_size)
 		plist_requeue(&si->avail_lists[node], &swap_avail_heads[node]);
 		spin_unlock(&swap_avail_lock);
 		spin_lock(&si->lock);
+		if (should_skip_swap_type(si->type, type)) {
+			spin_unlock(&si->lock);
+			spin_lock(&swap_avail_lock);
+			goto nextsi;
+		}
 		if (!si->highest_bit || !(si->flags & SWP_WRITEOK)) {
 			spin_lock(&swap_avail_lock);
 			if (plist_node_empty(&si->avail_lists[node])) {
@@ -2703,6 +2798,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile)
 	cluster_info = p->cluster_info;
 	p->cluster_info = NULL;
 	frontswap_map = frontswap_map_get(p);
+	memcg_remove_swapfile(p->type);
 	spin_unlock(&p->lock);
 	spin_unlock(&swap_lock);
 	arch_swap_invalidate_area(p->type);
@@ -3457,7 +3553,7 @@ SYSCALL_DEFINE2(swapon, const char __user *, specialfile, int, swap_flags)
 	if (inode)
 		inode_unlock(inode);
 	if (!error)
-		enable_swap_slots_cache();
+		enable_swap_slots_cache(p->type);
 	return error;
 }


--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -103,6 +103,12 @@ struct scan_control {
 	/* Can pages be swapped as part of reclaim? */
 	unsigned int may_swap:1;

+	/* Should skip file pages? */
+	unsigned int not_file:1;
+
+	/* Proactive reclaim invoked by userspace through memory.reclaim */
+	unsigned int proactive:1;
+
 	/*
 	 * Cgroup memory below memory.low is protected as long as we
 	 * don't threaten to OOM. If any cgroup is reclaimed at
@@ -2461,6 +2467,11 @@ static void get_scan_count(struct lruvec *lruvec, struct scan_control *sc,
 	unsigned long ap, fp;
 	enum lru_list lru;

+	if (sc->not_file) {
+		scan_balance = SCAN_ANON;
+		goto out;
+	}
+
 	/* If we have no swap space, do not bother scanning anon pages. */
 	if (!sc->may_swap || mem_cgroup_get_nr_swap_pages(memcg) <= 0) {
 		scan_balance = SCAN_FILE;
@@ -2880,9 +2891,10 @@ static void shrink_node_memcgs(pg_data_t *pgdat, struct scan_control *sc)
 			    sc->priority);

 		/* Record the group's reclaim efficiency */
-		vmpressure(sc->gfp_mask, memcg, false,
-			   sc->nr_scanned - scanned,
-			   sc->nr_reclaimed - reclaimed);
+		if (!sc->proactive)
+			vmpressure(sc->gfp_mask, memcg, false,
+				   sc->nr_scanned - scanned,
+				   sc->nr_reclaimed - reclaimed);

 	} while ((memcg = mem_cgroup_iter(target_memcg, memcg, NULL)));
 }
@@ -3005,9 +3017,10 @@ static void shrink_node(pg_data_t *pgdat, struct scan_control *sc)
 	}

 	/* Record the subtree's reclaim efficiency */
-	vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
-		   sc->nr_scanned - nr_scanned,
-		   sc->nr_reclaimed - nr_reclaimed);
+	if (!sc->proactive)
+		vmpressure(sc->gfp_mask, sc->target_mem_cgroup, true,
+			   sc->nr_scanned - nr_scanned,
+			   sc->nr_reclaimed - nr_reclaimed);

 	if (sc->nr_reclaimed - nr_reclaimed)
 		reclaimable = true;
@@ -3252,8 +3265,9 @@ static unsigned long do_try_to_free_pages(struct zonelist *zonelist,
 		__count_zid_vm_events(ALLOCSTALL, sc->reclaim_idx, 1);

 	do {
-		vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
-				sc->priority);
+		if (!sc->proactive)
+			vmpressure_prio(sc->gfp_mask, sc->target_mem_cgroup,
+					sc->priority);
 		sc->nr_scanned = 0;
 		shrink_zones(zonelist, sc);

@@ -3562,7 +3576,7 @@ unsigned long mem_cgroup_shrink_node(struct mem_cgroup *memcg,
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 					   unsigned long nr_pages,
 					   gfp_t gfp_mask,
-					   bool may_swap)
+					   unsigned int reclaim_options)
 {
 	unsigned long nr_reclaimed;
 	unsigned int noreclaim_flag;
@@ -3575,7 +3589,9 @@ unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
 		.priority = DEF_PRIORITY,
 		.may_writepage = !laptop_mode,
 		.may_unmap = 1,
-		.may_swap = may_swap,
+		.may_swap = !!(reclaim_options & MEMCG_RECLAIM_MAY_SWAP),
+		.proactive = !!(reclaim_options & MEMCG_RECLAIM_PROACTIVE),
+		.not_file = !!(reclaim_options & MEMCG_RECLAIM_NOT_FILE),
 	};
 	/*
 	 * Traverse the ZONELIST_FALLBACK zonelist of the current node to put