diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 271c1c2c9f6f741016f0917c360469ae5ee872a5..dede0a2cfc45980a067e916ea1fabfeee273c7fb 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -43,6 +43,10 @@ struct res_counter { * the routines below consider this to be IRQ-safe */ spinlock_t lock; + /* + * Parent counter, used for hierarchial resource accounting + */ + struct res_counter *parent; }; /** @@ -87,7 +91,7 @@ enum { * helpers for accounting */ -void res_counter_init(struct res_counter *counter); +void res_counter_init(struct res_counter *counter, struct res_counter *parent); /* * charge - try to consume more resource. @@ -103,7 +107,7 @@ void res_counter_init(struct res_counter *counter); int __must_check res_counter_charge_locked(struct res_counter *counter, unsigned long val); int __must_check res_counter_charge(struct res_counter *counter, - unsigned long val); + unsigned long val, struct res_counter **limit_fail_at); /* * uncharge - tell that some portion of the resource is released diff --git a/kernel/res_counter.c b/kernel/res_counter.c index f275c8eca772c4dfe6bcdda6b1253a65fefe3e83..bf8e7534c803d4e7708ed6929ddd54f00d12064b 100644 --- a/kernel/res_counter.c +++ b/kernel/res_counter.c @@ -15,10 +15,11 @@ #include #include -void res_counter_init(struct res_counter *counter) +void res_counter_init(struct res_counter *counter, struct res_counter *parent) { spin_lock_init(&counter->lock); counter->limit = (unsigned long long)LLONG_MAX; + counter->parent = parent; } int res_counter_charge_locked(struct res_counter *counter, unsigned long val) @@ -34,14 +35,34 @@ int res_counter_charge_locked(struct res_counter *counter, unsigned long val) return 0; } -int res_counter_charge(struct res_counter *counter, unsigned long val) +int res_counter_charge(struct res_counter *counter, unsigned long val, + struct res_counter **limit_fail_at) { int ret; unsigned long flags; - - spin_lock_irqsave(&counter->lock, flags); - ret = res_counter_charge_locked(counter, val); - spin_unlock_irqrestore(&counter->lock, flags); + struct res_counter *c, *u; + + *limit_fail_at = NULL; + local_irq_save(flags); + for (c = counter; c != NULL; c = c->parent) { + spin_lock(&c->lock); + ret = res_counter_charge_locked(c, val); + spin_unlock(&c->lock); + if (ret < 0) { + *limit_fail_at = c; + goto undo; + } + } + ret = 0; + goto done; +undo: + for (u = counter; u != c; u = u->parent) { + spin_lock(&u->lock); + res_counter_uncharge_locked(u, val); + spin_unlock(&u->lock); + } +done: + local_irq_restore(flags); return ret; } @@ -56,10 +77,15 @@ void res_counter_uncharge_locked(struct res_counter *counter, unsigned long val) void res_counter_uncharge(struct res_counter *counter, unsigned long val) { unsigned long flags; + struct res_counter *c; - spin_lock_irqsave(&counter->lock, flags); - res_counter_uncharge_locked(counter, val); - spin_unlock_irqrestore(&counter->lock, flags); + local_irq_save(flags); + for (c = counter; c != NULL; c = c->parent) { + spin_lock(&c->lock); + res_counter_uncharge_locked(c, val); + spin_unlock(&c->lock); + } + local_irq_restore(flags); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 9846f617115d0ad688aa5ddaf953069dcd31ade6..e72fb2b4a7d899420476f6b8329062fe819d7efb 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -471,6 +471,7 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, { struct mem_cgroup *mem; int nr_retries = MEM_CGROUP_RECLAIM_RETRIES; + struct res_counter *fail_res; /* * We always charge the cgroup the mm_struct belongs to. * The mm_struct's mem_cgroup changes on task migration if the @@ -499,11 +500,12 @@ static int __mem_cgroup_try_charge(struct mm_struct *mm, int ret; bool noswap = false; - ret = res_counter_charge(&mem->res, PAGE_SIZE); + ret = res_counter_charge(&mem->res, PAGE_SIZE, &fail_res); if (likely(!ret)) { if (!do_swap_account) break; - ret = res_counter_charge(&mem->memsw, PAGE_SIZE); + ret = res_counter_charge(&mem->memsw, PAGE_SIZE, + &fail_res); if (likely(!ret)) break; /* mem+swap counter fails */ @@ -1709,22 +1711,26 @@ static void __init enable_swap_cgroup(void) static struct cgroup_subsys_state * mem_cgroup_create(struct cgroup_subsys *ss, struct cgroup *cont) { - struct mem_cgroup *mem; + struct mem_cgroup *mem, *parent; int node; mem = mem_cgroup_alloc(); if (!mem) return ERR_PTR(-ENOMEM); - res_counter_init(&mem->res); - res_counter_init(&mem->memsw); - for_each_node_state(node, N_POSSIBLE) if (alloc_mem_cgroup_per_zone_info(mem, node)) goto free_out; /* root ? */ - if (cont->parent == NULL) + if (cont->parent == NULL) { enable_swap_cgroup(); + parent = NULL; + } else + parent = mem_cgroup_from_cont(cont->parent); + + res_counter_init(&mem->res, parent ? &parent->res : NULL); + res_counter_init(&mem->memsw, parent ? &parent->memsw : NULL); + return &mem->css; free_out: