tcp_memcontrol.c 7.1 KB
Newer Older
G
Glauber Costa 已提交
1 2 3
#include <net/tcp.h>
#include <net/tcp_memcontrol.h>
#include <net/sock.h>
G
Glauber Costa 已提交
4 5
#include <net/ip.h>
#include <linux/nsproxy.h>
G
Glauber Costa 已提交
6 7 8 9 10 11 12 13 14 15
#include <linux/memcontrol.h>
#include <linux/module.h>

static inline struct tcp_memcontrol *tcp_from_cgproto(struct cg_proto *cg_proto)
{
	return container_of(cg_proto, struct tcp_memcontrol, cg_proto);
}

static void memcg_tcp_enter_memory_pressure(struct sock *sk)
{
16
	if (sk->sk_cgrp->memory_pressure)
G
Glauber Costa 已提交
17 18 19 20
		*sk->sk_cgrp->memory_pressure = 1;
}
EXPORT_SYMBOL(memcg_tcp_enter_memory_pressure);

21
int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
G
Glauber Costa 已提交
22 23 24 25 26 27 28 29 30 31
{
	/*
	 * The root cgroup does not use res_counters, but rather,
	 * rely on the data already collected by the network
	 * subsystem
	 */
	struct res_counter *res_parent = NULL;
	struct cg_proto *cg_proto, *parent_cg;
	struct tcp_memcontrol *tcp;
	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
G
Glauber Costa 已提交
32
	struct net *net = current->nsproxy->net_ns;
G
Glauber Costa 已提交
33 34 35

	cg_proto = tcp_prot.proto_cgroup(memcg);
	if (!cg_proto)
36
		return 0;
G
Glauber Costa 已提交
37 38 39

	tcp = tcp_from_cgproto(cg_proto);

G
Glauber Costa 已提交
40 41 42
	tcp->tcp_prot_mem[0] = net->ipv4.sysctl_tcp_mem[0];
	tcp->tcp_prot_mem[1] = net->ipv4.sysctl_tcp_mem[1];
	tcp->tcp_prot_mem[2] = net->ipv4.sysctl_tcp_mem[2];
G
Glauber Costa 已提交
43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
	tcp->tcp_memory_pressure = 0;

	parent_cg = tcp_prot.proto_cgroup(parent);
	if (parent_cg)
		res_parent = parent_cg->memory_allocated;

	res_counter_init(&tcp->tcp_memory_allocated, res_parent);
	percpu_counter_init(&tcp->tcp_sockets_allocated, 0);

	cg_proto->enter_memory_pressure = memcg_tcp_enter_memory_pressure;
	cg_proto->memory_pressure = &tcp->tcp_memory_pressure;
	cg_proto->sysctl_mem = tcp->tcp_prot_mem;
	cg_proto->memory_allocated = &tcp->tcp_memory_allocated;
	cg_proto->sockets_allocated = &tcp->tcp_sockets_allocated;
	cg_proto->memcg = memcg;

59
	return 0;
G
Glauber Costa 已提交
60 61 62
}
EXPORT_SYMBOL(tcp_init_cgroup);

63
void tcp_destroy_cgroup(struct mem_cgroup *memcg)
G
Glauber Costa 已提交
64 65 66 67 68 69 70 71 72 73 74 75
{
	struct cg_proto *cg_proto;
	struct tcp_memcontrol *tcp;

	cg_proto = tcp_prot.proto_cgroup(memcg);
	if (!cg_proto)
		return;

	tcp = tcp_from_cgproto(cg_proto);
	percpu_counter_destroy(&tcp->tcp_sockets_allocated);
}
EXPORT_SYMBOL(tcp_destroy_cgroup);
76 77 78 79 80 81 82 83 84 85 86 87 88 89

static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
{
	struct net *net = current->nsproxy->net_ns;
	struct tcp_memcontrol *tcp;
	struct cg_proto *cg_proto;
	u64 old_lim;
	int i;
	int ret;

	cg_proto = tcp_prot.proto_cgroup(memcg);
	if (!cg_proto)
		return -EINVAL;

90 91
	if (val > RES_COUNTER_MAX)
		val = RES_COUNTER_MAX;
92 93 94 95 96 97 98 99 100 101 102 103

	tcp = tcp_from_cgproto(cg_proto);

	old_lim = res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
	ret = res_counter_set_limit(&tcp->tcp_memory_allocated, val);
	if (ret)
		return ret;

	for (i = 0; i < 3; i++)
		tcp->tcp_prot_mem[i] = min_t(long, val >> PAGE_SHIFT,
					     net->ipv4.sysctl_tcp_mem[i]);

104
	if (val == RES_COUNTER_MAX)
105
		clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
106
	else if (val != RES_COUNTER_MAX) {
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130
		/*
		 * The active bit needs to be written after the static_key
		 * update. This is what guarantees that the socket activation
		 * function is the last one to run. See sock_update_memcg() for
		 * details, and note that we don't mark any socket as belonging
		 * to this memcg until that flag is up.
		 *
		 * We need to do this, because static_keys will span multiple
		 * sites, but we can't control their order. If we mark a socket
		 * as accounted, but the accounting functions are not patched in
		 * yet, we'll lose accounting.
		 *
		 * We never race with the readers in sock_update_memcg(),
		 * because when this value change, the code to process it is not
		 * patched in yet.
		 *
		 * The activated bit is used to guarantee that no two writers
		 * will do the update in the same memcg. Without that, we can't
		 * properly shutdown the static key.
		 */
		if (!test_and_set_bit(MEMCG_SOCK_ACTIVATED, &cg_proto->flags))
			static_key_slow_inc(&memcg_socket_limit_enabled);
		set_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
	}
131 132 133 134

	return 0;
}

135
static int tcp_cgroup_write(struct cgroup_subsys_state *css, struct cftype *cft,
136 137
			    const char *buffer)
{
138
	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
	unsigned long long val;
	int ret = 0;

	switch (cft->private) {
	case RES_LIMIT:
		/* see memcontrol.c */
		ret = res_counter_memparse_write_strategy(buffer, &val);
		if (ret)
			break;
		ret = tcp_update_limit(memcg, val);
		break;
	default:
		ret = -EINVAL;
		break;
	}
	return ret;
}

static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
{
	struct tcp_memcontrol *tcp;
	struct cg_proto *cg_proto;

	cg_proto = tcp_prot.proto_cgroup(memcg);
	if (!cg_proto)
		return default_val;

	tcp = tcp_from_cgproto(cg_proto);
	return res_counter_read_u64(&tcp->tcp_memory_allocated, type);
}

170 171 172 173 174 175 176 177 178 179 180 181 182
static u64 tcp_read_usage(struct mem_cgroup *memcg)
{
	struct tcp_memcontrol *tcp;
	struct cg_proto *cg_proto;

	cg_proto = tcp_prot.proto_cgroup(memcg);
	if (!cg_proto)
		return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;

	tcp = tcp_from_cgproto(cg_proto);
	return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_USAGE);
}

183
static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
184
{
185
	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
186 187 188 189
	u64 val;

	switch (cft->private) {
	case RES_LIMIT:
190
		val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
191
		break;
192 193 194
	case RES_USAGE:
		val = tcp_read_usage(memcg);
		break;
195
	case RES_FAILCNT:
196 197
	case RES_MAX_USAGE:
		val = tcp_read_stat(memcg, cft->private, 0);
198
		break;
199 200 201 202 203 204
	default:
		BUG();
	}
	return val;
}

205
static int tcp_cgroup_reset(struct cgroup_subsys_state *css, unsigned int event)
206 207 208 209 210
{
	struct mem_cgroup *memcg;
	struct tcp_memcontrol *tcp;
	struct cg_proto *cg_proto;

211
	memcg = mem_cgroup_from_css(css);
212 213 214 215 216 217
	cg_proto = tcp_prot.proto_cgroup(memcg);
	if (!cg_proto)
		return 0;
	tcp = tcp_from_cgproto(cg_proto);

	switch (event) {
218 219 220
	case RES_MAX_USAGE:
		res_counter_reset_max(&tcp->tcp_memory_allocated);
		break;
221 222 223 224 225 226 227 228
	case RES_FAILCNT:
		res_counter_reset_failcnt(&tcp->tcp_memory_allocated);
		break;
	}

	return 0;
}

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254
unsigned long long tcp_max_memory(const struct mem_cgroup *memcg)
{
	struct tcp_memcontrol *tcp;
	struct cg_proto *cg_proto;

	cg_proto = tcp_prot.proto_cgroup((struct mem_cgroup *)memcg);
	if (!cg_proto)
		return 0;

	tcp = tcp_from_cgproto(cg_proto);
	return res_counter_read_u64(&tcp->tcp_memory_allocated, RES_LIMIT);
}

void tcp_prot_mem(struct mem_cgroup *memcg, long val, int idx)
{
	struct tcp_memcontrol *tcp;
	struct cg_proto *cg_proto;

	cg_proto = tcp_prot.proto_cgroup(memcg);
	if (!cg_proto)
		return;

	tcp = tcp_from_cgproto(cg_proto);

	tcp->tcp_prot_mem[idx] = val;
}
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279

static struct cftype tcp_files[] = {
	{
		.name = "kmem.tcp.limit_in_bytes",
		.write_string = tcp_cgroup_write,
		.read_u64 = tcp_cgroup_read,
		.private = RES_LIMIT,
	},
	{
		.name = "kmem.tcp.usage_in_bytes",
		.read_u64 = tcp_cgroup_read,
		.private = RES_USAGE,
	},
	{
		.name = "kmem.tcp.failcnt",
		.private = RES_FAILCNT,
		.trigger = tcp_cgroup_reset,
		.read_u64 = tcp_cgroup_read,
	},
	{
		.name = "kmem.tcp.max_usage_in_bytes",
		.private = RES_MAX_USAGE,
		.trigger = tcp_cgroup_reset,
		.read_u64 = tcp_cgroup_read,
	},
280
	{ }	/* terminate */
281
};
282 283 284 285 286 287 288

static int __init tcp_memcontrol_init(void)
{
	WARN_ON(cgroup_add_cftypes(&mem_cgroup_subsys, tcp_files));
	return 0;
}
__initcall(tcp_memcontrol_init);