提交 71f87bee 编写于 作者: J Johannes Weiner 提交者: Linus Torvalds

mm: hugetlb_cgroup: convert to lockless page counters

Abandon the spinlock-protected byte counters in favor of the unlocked
page counters in the hugetlb controller as well.
Signed-off-by: NJohannes Weiner <hannes@cmpxchg.org>
Reviewed-by: NVladimir Davydov <vdavydov@parallels.com>
Acked-by: NMichal Hocko <mhocko@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: NAndrew Morton <akpm@linux-foundation.org>
Signed-off-by: NLinus Torvalds <torvalds@linux-foundation.org>
上级 3e32cb2e
...@@ -29,7 +29,7 @@ Brief summary of control files ...@@ -29,7 +29,7 @@ Brief summary of control files
hugetlb.<hugepagesize>.limit_in_bytes # set/show limit of "hugepagesize" hugetlb usage hugetlb.<hugepagesize>.limit_in_bytes # set/show limit of "hugepagesize" hugetlb usage
hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb usage recorded
hugetlb.<hugepagesize>.usage_in_bytes # show current res_counter usage for "hugepagesize" hugetlb hugetlb.<hugepagesize>.usage_in_bytes # show current usage for "hugepagesize" hugetlb
hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB limit hugetlb.<hugepagesize>.failcnt # show the number of allocation failure due to HugeTLB limit
For a system supporting two hugepage size (16M and 16G) the control For a system supporting two hugepage size (16M and 16G) the control
......
...@@ -16,7 +16,6 @@ ...@@ -16,7 +16,6 @@
#define _LINUX_HUGETLB_CGROUP_H #define _LINUX_HUGETLB_CGROUP_H
#include <linux/mmdebug.h> #include <linux/mmdebug.h>
#include <linux/res_counter.h>
struct hugetlb_cgroup; struct hugetlb_cgroup;
/* /*
......
...@@ -1051,7 +1051,8 @@ config MEMCG_KMEM ...@@ -1051,7 +1051,8 @@ config MEMCG_KMEM
config CGROUP_HUGETLB config CGROUP_HUGETLB
bool "HugeTLB Resource Controller for Control Groups" bool "HugeTLB Resource Controller for Control Groups"
depends on RESOURCE_COUNTERS && HUGETLB_PAGE depends on HUGETLB_PAGE
select PAGE_COUNTER
default n default n
help help
Provides a cgroup Resource Controller for HugeTLB pages. Provides a cgroup Resource Controller for HugeTLB pages.
......
...@@ -14,6 +14,7 @@ ...@@ -14,6 +14,7 @@
*/ */
#include <linux/cgroup.h> #include <linux/cgroup.h>
#include <linux/page_counter.h>
#include <linux/slab.h> #include <linux/slab.h>
#include <linux/hugetlb.h> #include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h> #include <linux/hugetlb_cgroup.h>
...@@ -23,7 +24,7 @@ struct hugetlb_cgroup { ...@@ -23,7 +24,7 @@ struct hugetlb_cgroup {
/* /*
* the counter to account for hugepages from hugetlb. * the counter to account for hugepages from hugetlb.
*/ */
struct res_counter hugepage[HUGE_MAX_HSTATE]; struct page_counter hugepage[HUGE_MAX_HSTATE];
}; };
#define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val)) #define MEMFILE_PRIVATE(x, val) (((x) << 16) | (val))
...@@ -60,7 +61,7 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg) ...@@ -60,7 +61,7 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
int idx; int idx;
for (idx = 0; idx < hugetlb_max_hstate; idx++) { for (idx = 0; idx < hugetlb_max_hstate; idx++) {
if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0) if (page_counter_read(&h_cg->hugepage[idx]))
return true; return true;
} }
return false; return false;
...@@ -79,12 +80,12 @@ hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) ...@@ -79,12 +80,12 @@ hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
if (parent_h_cgroup) { if (parent_h_cgroup) {
for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
res_counter_init(&h_cgroup->hugepage[idx], page_counter_init(&h_cgroup->hugepage[idx],
&parent_h_cgroup->hugepage[idx]); &parent_h_cgroup->hugepage[idx]);
} else { } else {
root_h_cgroup = h_cgroup; root_h_cgroup = h_cgroup;
for (idx = 0; idx < HUGE_MAX_HSTATE; idx++) for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
res_counter_init(&h_cgroup->hugepage[idx], NULL); page_counter_init(&h_cgroup->hugepage[idx], NULL);
} }
return &h_cgroup->css; return &h_cgroup->css;
} }
...@@ -108,9 +109,8 @@ static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css) ...@@ -108,9 +109,8 @@ static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
struct page *page) struct page *page)
{ {
int csize; unsigned int nr_pages;
struct res_counter *counter; struct page_counter *counter;
struct res_counter *fail_res;
struct hugetlb_cgroup *page_hcg; struct hugetlb_cgroup *page_hcg;
struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg); struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
...@@ -123,15 +123,15 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg, ...@@ -123,15 +123,15 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
if (!page_hcg || page_hcg != h_cg) if (!page_hcg || page_hcg != h_cg)
goto out; goto out;
csize = PAGE_SIZE << compound_order(page); nr_pages = 1 << compound_order(page);
if (!parent) { if (!parent) {
parent = root_h_cgroup; parent = root_h_cgroup;
/* root has no limit */ /* root has no limit */
res_counter_charge_nofail(&parent->hugepage[idx], page_counter_charge(&parent->hugepage[idx], nr_pages);
csize, &fail_res);
} }
counter = &h_cg->hugepage[idx]; counter = &h_cg->hugepage[idx];
res_counter_uncharge_until(counter, counter->parent, csize); /* Take the pages off the local counter */
page_counter_cancel(counter, nr_pages);
set_hugetlb_cgroup(page, parent); set_hugetlb_cgroup(page, parent);
out: out:
...@@ -166,9 +166,8 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, ...@@ -166,9 +166,8 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup **ptr) struct hugetlb_cgroup **ptr)
{ {
int ret = 0; int ret = 0;
struct res_counter *fail_res; struct page_counter *counter;
struct hugetlb_cgroup *h_cg = NULL; struct hugetlb_cgroup *h_cg = NULL;
unsigned long csize = nr_pages * PAGE_SIZE;
if (hugetlb_cgroup_disabled()) if (hugetlb_cgroup_disabled())
goto done; goto done;
...@@ -187,7 +186,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, ...@@ -187,7 +186,7 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
} }
rcu_read_unlock(); rcu_read_unlock();
ret = res_counter_charge(&h_cg->hugepage[idx], csize, &fail_res); ret = page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter);
css_put(&h_cg->css); css_put(&h_cg->css);
done: done:
*ptr = h_cg; *ptr = h_cg;
...@@ -213,7 +212,6 @@ void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, ...@@ -213,7 +212,6 @@ void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
struct page *page) struct page *page)
{ {
struct hugetlb_cgroup *h_cg; struct hugetlb_cgroup *h_cg;
unsigned long csize = nr_pages * PAGE_SIZE;
if (hugetlb_cgroup_disabled()) if (hugetlb_cgroup_disabled())
return; return;
...@@ -222,61 +220,76 @@ void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages, ...@@ -222,61 +220,76 @@ void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
if (unlikely(!h_cg)) if (unlikely(!h_cg))
return; return;
set_hugetlb_cgroup(page, NULL); set_hugetlb_cgroup(page, NULL);
res_counter_uncharge(&h_cg->hugepage[idx], csize); page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
return; return;
} }
void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages, void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
struct hugetlb_cgroup *h_cg) struct hugetlb_cgroup *h_cg)
{ {
unsigned long csize = nr_pages * PAGE_SIZE;
if (hugetlb_cgroup_disabled() || !h_cg) if (hugetlb_cgroup_disabled() || !h_cg)
return; return;
if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER) if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
return; return;
res_counter_uncharge(&h_cg->hugepage[idx], csize); page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
return; return;
} }
enum {
RES_USAGE,
RES_LIMIT,
RES_MAX_USAGE,
RES_FAILCNT,
};
static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css, static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
struct cftype *cft) struct cftype *cft)
{ {
int idx, name; struct page_counter *counter;
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css); struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
idx = MEMFILE_IDX(cft->private); counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
name = MEMFILE_ATTR(cft->private);
return res_counter_read_u64(&h_cg->hugepage[idx], name); switch (MEMFILE_ATTR(cft->private)) {
case RES_USAGE:
return (u64)page_counter_read(counter) * PAGE_SIZE;
case RES_LIMIT:
return (u64)counter->limit * PAGE_SIZE;
case RES_MAX_USAGE:
return (u64)counter->watermark * PAGE_SIZE;
case RES_FAILCNT:
return counter->failcnt;
default:
BUG();
}
} }
static DEFINE_MUTEX(hugetlb_limit_mutex);
static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off) char *buf, size_t nbytes, loff_t off)
{ {
int idx, name, ret; int ret, idx;
unsigned long long val; unsigned long nr_pages;
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
return -EINVAL;
buf = strstrip(buf); buf = strstrip(buf);
ret = page_counter_memparse(buf, &nr_pages);
if (ret)
return ret;
idx = MEMFILE_IDX(of_cft(of)->private); idx = MEMFILE_IDX(of_cft(of)->private);
name = MEMFILE_ATTR(of_cft(of)->private);
switch (name) { switch (MEMFILE_ATTR(of_cft(of)->private)) {
case RES_LIMIT: case RES_LIMIT:
if (hugetlb_cgroup_is_root(h_cg)) { mutex_lock(&hugetlb_limit_mutex);
/* Can't set limit on root */ ret = page_counter_limit(&h_cg->hugepage[idx], nr_pages);
ret = -EINVAL; mutex_unlock(&hugetlb_limit_mutex);
break;
}
/* This function does all necessary parse...reuse it */
ret = res_counter_memparse_write_strategy(buf, &val);
if (ret)
break;
val = ALIGN(val, 1ULL << huge_page_shift(&hstates[idx]));
ret = res_counter_set_limit(&h_cg->hugepage[idx], val);
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;
...@@ -288,18 +301,18 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of, ...@@ -288,18 +301,18 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of, static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
char *buf, size_t nbytes, loff_t off) char *buf, size_t nbytes, loff_t off)
{ {
int idx, name, ret = 0; int ret = 0;
struct page_counter *counter;
struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of)); struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
idx = MEMFILE_IDX(of_cft(of)->private); counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
name = MEMFILE_ATTR(of_cft(of)->private);
switch (name) { switch (MEMFILE_ATTR(of_cft(of)->private)) {
case RES_MAX_USAGE: case RES_MAX_USAGE:
res_counter_reset_max(&h_cg->hugepage[idx]); page_counter_reset_watermark(counter);
break; break;
case RES_FAILCNT: case RES_FAILCNT:
res_counter_reset_failcnt(&h_cg->hugepage[idx]); counter->failcnt = 0;
break; break;
default: default:
ret = -EINVAL; ret = -EINVAL;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册