diff --git a/Documentation/cgroup-v2.txt b/Documentation/cgroup-v2.txt index e2f4e7948a666128d048abc3ca7c67212228380b..8f1329a5f7006e46734928cf18b7a6fa28501790 100644 --- a/Documentation/cgroup-v2.txt +++ b/Documentation/cgroup-v2.txt @@ -1387,6 +1387,12 @@ system than killing the group. Otherwise, memory.max is there to limit this type of spillover and ultimately contain buggy or even malicious applications. +Setting the original memory.limit_in_bytes below the current usage was +subject to a race condition, where concurrent charges could cause the +limit setting to fail. memory.max on the other hand will first set the +limit to prevent new charges, and then reclaim and OOM kill until the +new limit is met - or the task writing to memory.max is killed. + The combined memory+swap accounting and limiting is replaced by real control over swap space. diff --git a/mm/memcontrol.c b/mm/memcontrol.c index f7c9b4cbdf01fed191ca11323e7844887daa8544..8614e0d750e54d8ac884f05871f486e1fad786ee 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1236,7 +1236,7 @@ static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg) return limit; } -static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, +static bool mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order) { struct oom_control oc = { @@ -1314,6 +1314,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, } unlock: mutex_unlock(&oom_lock); + return chosen; } #if MAX_NUMNODES > 1 @@ -5029,6 +5030,8 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + unsigned int nr_reclaims = MEM_CGROUP_RECLAIM_RETRIES; + bool drained = false; unsigned long max; int err; @@ -5037,9 +5040,36 @@ static ssize_t memory_max_write(struct kernfs_open_file *of, if (err) return err; - err = mem_cgroup_resize_limit(memcg, max); - if (err) - return err; + xchg(&memcg->memory.limit, max); + + for (;;) { + unsigned long nr_pages = page_counter_read(&memcg->memory); + + if (nr_pages <= max) + break; + + if (signal_pending(current)) { + err = -EINTR; + break; + } + + if (!drained) { + drain_all_stock(memcg); + drained = true; + continue; + } + + if (nr_reclaims) { + if (!try_to_free_mem_cgroup_pages(memcg, nr_pages - max, + GFP_KERNEL, true)) + nr_reclaims--; + continue; + } + + mem_cgroup_events(memcg, MEMCG_OOM, 1); + if (!mem_cgroup_out_of_memory(memcg, GFP_KERNEL, 0)) + break; + } memcg_wb_domain_size_changed(memcg); return nbytes;