diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6452ff4c463fd8715edc9a5043bc812521a18d6a..3e3318ddfc0e3e09a0e15825f78eb6052d628d78 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -676,8 +676,9 @@ enum { struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); -void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail, - unsigned long *pdirty, unsigned long *pwriteback); +void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, + unsigned long *pheadroom, unsigned long *pdirty, + unsigned long *pwriteback); #else /* CONFIG_CGROUP_WRITEBACK */ @@ -687,7 +688,8 @@ static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) } static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, - unsigned long *pavail, + unsigned long *pfilepages, + unsigned long *pheadroom, unsigned long *pdirty, unsigned long *pwriteback) { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 1fedbde68f595c2b83d5aa84962a667c1ada120a..882c10cfd0ba4037faab3e071169ed8f08d669d7 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -3740,44 +3740,43 @@ struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) /** * mem_cgroup_wb_stats - retrieve writeback related stats from its memcg * @wb: bdi_writeback in question - * @pavail: out parameter for number of available pages + * @pfilepages: out parameter for number of file pages + * @pheadroom: out parameter for number of allocatable pages according to memcg * @pdirty: out parameter for number of dirty pages * @pwriteback: out parameter for number of pages under writeback * - * Determine the numbers of available, dirty, and writeback pages in @wb's - * memcg. Dirty and writeback are self-explanatory. Available is a bit - * more involved. + * Determine the numbers of file, headroom, dirty, and writeback pages in + * @wb's memcg. File, dirty and writeback are self-explanatory. Headroom + * is a bit more involved. * - * A memcg's headroom is "min(max, high) - used". The available memory is - * calculated as the lowest headroom of itself and the ancestors plus the - * number of pages already being used for file pages. Note that this - * doesn't consider the actual amount of available memory in the system. - * The caller should further cap *@pavail accordingly. + * A memcg's headroom is "min(max, high) - used". In the hierarchy, the + * headroom is calculated as the lowest headroom of itself and the + * ancestors. Note that this doesn't consider the actual amount of + * available memory in the system. The caller should further cap + * *@pheadroom accordingly. */ -void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail, - unsigned long *pdirty, unsigned long *pwriteback) +void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, + unsigned long *pheadroom, unsigned long *pdirty, + unsigned long *pwriteback) { struct mem_cgroup *memcg = mem_cgroup_from_css(wb->memcg_css); struct mem_cgroup *parent; - unsigned long head_room = PAGE_COUNTER_MAX; - unsigned long file_pages; *pdirty = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_DIRTY); /* this should eventually include NR_UNSTABLE_NFS */ *pwriteback = mem_cgroup_read_stat(memcg, MEM_CGROUP_STAT_WRITEBACK); + *pfilepages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) | + (1 << LRU_ACTIVE_FILE)); + *pheadroom = PAGE_COUNTER_MAX; - file_pages = mem_cgroup_nr_lru_pages(memcg, (1 << LRU_INACTIVE_FILE) | - (1 << LRU_ACTIVE_FILE)); while ((parent = parent_mem_cgroup(memcg))) { unsigned long ceiling = min(memcg->memory.limit, memcg->high); unsigned long used = page_counter_read(&memcg->memory); - head_room = min(head_room, ceiling - min(ceiling, used)); + *pheadroom = min(*pheadroom, ceiling - min(ceiling, used)); memcg = parent; } - - *pavail = file_pages + head_room; } #else /* CONFIG_CGROUP_WRITEBACK */ diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 56c0bffa9f49f6e1c7a84537bc781ce71697d42b..2c90357c34ea4c4d81521b7bf14d669d6565cc07 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -684,13 +684,19 @@ static unsigned long hard_dirty_limit(struct wb_domain *dom, return max(thresh, dom->dirty_limit); } -/* memory available to a memcg domain is capped by system-wide clean memory */ -static void mdtc_cap_avail(struct dirty_throttle_control *mdtc) +/* + * Memory which can be further allocated to a memcg domain is capped by + * system-wide clean memory excluding the amount being used in the domain. + */ +static void mdtc_calc_avail(struct dirty_throttle_control *mdtc, + unsigned long filepages, unsigned long headroom) { struct dirty_throttle_control *gdtc = mdtc_gdtc(mdtc); - unsigned long clean = gdtc->avail - min(gdtc->avail, gdtc->dirty); + unsigned long clean = filepages - min(filepages, mdtc->dirty); + unsigned long global_clean = gdtc->avail - min(gdtc->avail, gdtc->dirty); + unsigned long other_clean = global_clean - min(global_clean, clean); - mdtc->avail = min(mdtc->avail, clean); + mdtc->avail = filepages + min(headroom, other_clean); } /** @@ -1564,16 +1570,16 @@ static void balance_dirty_pages(struct address_space *mapping, } if (mdtc) { - unsigned long writeback; + unsigned long filepages, headroom, writeback; /* * If @wb belongs to !root memcg, repeat the same * basic calculations for the memcg domain. */ - mem_cgroup_wb_stats(wb, &mdtc->avail, &mdtc->dirty, - &writeback); - mdtc_cap_avail(mdtc); + mem_cgroup_wb_stats(wb, &filepages, &headroom, + &mdtc->dirty, &writeback); mdtc->dirty += writeback; + mdtc_calc_avail(mdtc, filepages, headroom); domain_dirty_limits(mdtc); @@ -1895,10 +1901,11 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb) return true; if (mdtc) { - unsigned long writeback; + unsigned long filepages, headroom, writeback; - mem_cgroup_wb_stats(wb, &mdtc->avail, &mdtc->dirty, &writeback); - mdtc_cap_avail(mdtc); + mem_cgroup_wb_stats(wb, &filepages, &headroom, &mdtc->dirty, + &writeback); + mdtc_calc_avail(mdtc, filepages, headroom); domain_dirty_limits(mdtc); /* ditto, ignore writeback */ if (mdtc->dirty > mdtc->bg_thresh)