diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a4bb857d902c76c0da5521bc0304d32e5c6b8c50..a18e228f140bf8ade69625a00686f58a57c6b043 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -138,6 +138,7 @@ static const char * const mem_cgroup_lru_names[] = { */ enum mem_cgroup_events_target { MEM_CGROUP_TARGET_THRESH, + MEM_CGROUP_TARGET_SOFTLIMIT, MEM_CGROUP_TARGET_NUMAINFO, MEM_CGROUP_NTARGETS, }; @@ -315,6 +316,22 @@ struct mem_cgroup { atomic_t numainfo_events; atomic_t numainfo_updating; #endif + /* + * Protects soft_contributed transitions. + * See mem_cgroup_update_soft_limit + */ + spinlock_t soft_lock; + + /* + * If true then this group has increased parents' children_in_excess + * when it got over the soft limit. + * When a group falls bellow the soft limit, parents' children_in_excess + * is decreased and soft_contributed changed to false. + */ + bool soft_contributed; + + /* Number of children that are in soft limit excess */ + atomic_t children_in_excess; struct mem_cgroup_per_node *nodeinfo[0]; /* WARNING: nodeinfo must be the last member here */ @@ -802,6 +819,9 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, case MEM_CGROUP_TARGET_THRESH: next = val + THRESHOLDS_EVENTS_TARGET; break; + case MEM_CGROUP_TARGET_SOFTLIMIT: + next = val + SOFTLIMIT_EVENTS_TARGET; + break; case MEM_CGROUP_TARGET_NUMAINFO: next = val + NUMAINFO_EVENTS_TARGET; break; @@ -814,6 +834,42 @@ static bool mem_cgroup_event_ratelimit(struct mem_cgroup *memcg, return false; } +/* + * Called from rate-limitted memcg_check_events when enough + * MEM_CGROUP_TARGET_SOFTLIMIT events are accumulated and it makes sure + * that all the parents up the hierarchy will be noticed that this group + * is in excess or that it is not in excess anymore. mmecg->soft_contributed + * makes the transition a single action whenever the state flips from one to + * other. + */ +static void mem_cgroup_update_soft_limit(struct mem_cgroup *memcg) +{ + unsigned long long excess = res_counter_soft_limit_excess(&memcg->res); + struct mem_cgroup *parent = memcg; + int delta = 0; + + spin_lock(&memcg->soft_lock); + if (excess) { + if (!memcg->soft_contributed) { + delta = 1; + memcg->soft_contributed = true; + } + } else { + if (memcg->soft_contributed) { + delta = -1; + memcg->soft_contributed = false; + } + } + + /* + * Necessary to update all ancestors when hierarchy is used + * because their event counter is not touched. + */ + while (delta && (parent = parent_mem_cgroup(parent))) + atomic_add(delta, &parent->children_in_excess); + spin_unlock(&memcg->soft_lock); +} + /* * Check events in order. * @@ -824,8 +880,11 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) /* threshold event is triggered in finer grain than soft limit */ if (unlikely(mem_cgroup_event_ratelimit(memcg, MEM_CGROUP_TARGET_THRESH))) { + bool do_softlimit; bool do_numainfo __maybe_unused; + do_softlimit = mem_cgroup_event_ratelimit(memcg, + MEM_CGROUP_TARGET_SOFTLIMIT); #if MAX_NUMNODES > 1 do_numainfo = mem_cgroup_event_ratelimit(memcg, MEM_CGROUP_TARGET_NUMAINFO); @@ -833,6 +892,8 @@ static void memcg_check_events(struct mem_cgroup *memcg, struct page *page) preempt_enable(); mem_cgroup_threshold(memcg); + if (unlikely(do_softlimit)) + mem_cgroup_update_soft_limit(memcg); #if MAX_NUMNODES > 1 if (unlikely(do_numainfo)) atomic_inc(&memcg->numainfo_events); @@ -1816,6 +1877,9 @@ int mem_cgroup_select_victim_node(struct mem_cgroup *memcg) * hierarchy if * a) it is over its soft limit * b) any parent up the hierarchy is over its soft limit + * + * If the given group doesn't have any children over the limit then it + * doesn't make any sense to iterate its subtree. */ enum mem_cgroup_filter_t mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, @@ -1837,6 +1901,8 @@ mem_cgroup_soft_reclaim_eligible(struct mem_cgroup *memcg, break; } + if (!atomic_read(&memcg->children_in_excess)) + return SKIP_TREE; return SKIP; } @@ -5892,6 +5958,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) mutex_init(&memcg->thresholds_lock); spin_lock_init(&memcg->move_lock); vmpressure_init(&memcg->vmpressure); + spin_lock_init(&memcg->soft_lock); return &memcg->css; @@ -5969,6 +6036,10 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css) mem_cgroup_invalidate_reclaim_iterators(memcg); mem_cgroup_reparent_charges(memcg); + if (memcg->soft_contributed) { + while ((memcg = parent_mem_cgroup(memcg))) + atomic_dec(&memcg->children_in_excess); + } mem_cgroup_destroy_all_caches(memcg); vmpressure_cleanup(&memcg->vmpressure); }