diff --git a/Documentation/alibaba/interfaces.rst b/Documentation/alibaba/interfaces.rst
index 67ef70c65d7492f2c324b354abbf60d681406e53..b957e493065c3c31a72ece9b0b8aed2911758936 100644
--- a/Documentation/alibaba/interfaces.rst
+++ b/Documentation/alibaba/interfaces.rst
@@ -85,6 +85,10 @@ memory.exstat
     "wmark_min_throttled_ms" field is the total throttled time in milliseconds
     due to positive memory.wmark_min_adj under global memory pressure.
 
+    "wmark_reclaim_work_ms" field is the total background async page reclaim
+    (a.k.a, memcg kswap) work time in milliseconds, including sleep/resched
+    time currently, due to excessive usage of memory over wmark_high.
+
 zombie memcgs reaper
 ====================
     After memcg was deleted, page caches still reference to this memcg
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 9b99df80959050bf79558ac943c2d282326fd4d2..8d0950810f851c91b1902198c1a3dee17adaa4f4 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -54,6 +54,7 @@ enum memcg_stat_item {
 
 enum memcg_exstat_item {
 	MEMCG_WMARK_MIN,
+	MEMCG_WMARK_RECLAIM,
 	MEMCG_NR_EXSTAT,
 };
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 66037ae2edc74db70c6fa8ed3d5185bab87415a8..e663acde03848fbb174d20b61a366c4e3c1d72e9 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2313,6 +2313,8 @@ static int memcg_hotplug_cpu_dead(unsigned int cpu)
 static void reclaim_wmark(struct mem_cgroup *memcg)
 {
 	long nr_pages;
+	struct mem_cgroup *iter;
+	u64 start, duration;
 
 	if (is_wmark_ok(memcg, false))
 		return;
@@ -2324,7 +2326,21 @@ static void reclaim_wmark(struct mem_cgroup *memcg)
 
 	nr_pages = max(SWAP_CLUSTER_MAX, (unsigned long)nr_pages);
 
+	/*
+	 * Typically, we would like to record the actual cpu% of reclaim_wmark
+	 * work, excluding any sleep/resched time.  However, currently we just
+	 * simply record the whole duration of reclaim_wmark work for the
+	 * overhead-accuracy trade-off.
+	 */
+	start = ktime_get_ns();
 	try_to_free_mem_cgroup_pages(memcg, nr_pages, GFP_KERNEL, true);
+	duration = ktime_get_ns() - start;
+
+	css_get(&memcg->css);
+	for (iter = memcg; iter; iter = parent_mem_cgroup(iter))
+		this_cpu_add(iter->exstat_cpu->item[MEMCG_WMARK_RECLAIM],
+			     duration);
+	css_put(&memcg->css);
 }
 
 static void wmark_work_func(struct work_struct *work)
@@ -4167,17 +4183,26 @@ static int memcg_stat_show(struct seq_file *m, void *v)
 	return 0;
 }
 
+static u64 memcg_exstat_gather(struct mem_cgroup *memcg,
+			       enum memcg_exstat_item idx)
+{
+	u64 sum = 0;
+	int cpu;
+
+	for_each_online_cpu(cpu)
+		sum += per_cpu_ptr(memcg->exstat_cpu, cpu)->item[idx];
+
+	return sum;
+}
+
 static int memcg_exstat_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
-	u64 wmark_min = 0;
-	int cpu;
 
-	for_each_possible_cpu(cpu) {
-		wmark_min +=
-		per_cpu_ptr(memcg->exstat_cpu, cpu)->item[MEMCG_WMARK_MIN];
-	}
-	seq_printf(m, "wmark_min_throttled_ms %llu\n", wmark_min);
+	seq_printf(m, "wmark_min_throttled_ms %llu\n",
+		   memcg_exstat_gather(memcg, MEMCG_WMARK_MIN));
+	seq_printf(m, "wmark_reclaim_work_ms %llu\n",
+		   memcg_exstat_gather(memcg, MEMCG_WMARK_RECLAIM) / 1000000);
 
 	return 0;
 }