提交 648845ab 编写于 作者: T Tonghao Zhang 提交者: David S. Miller

sock: Move the socket inuse to namespace.

In some case, we want to know how many sockets are in use in
different _net_ namespaces. It's a key resource metric.

This patch add a member in struct netns_core. This is a counter
for socket-inuse in the _net_ namespace. The patch will add/sub
counter in the sk_alloc, sk_clone_lock and __sk_free.

This patch will not counter the socket created in kernel.
It's not very useful for userspace to know how many kernel
sockets we created.

The main reasons for doing this are that:

1. When linux calls the 'do_exit' for process to exit, the functions
'exit_task_namespaces' and 'exit_task_work' will be called sequentially.
'exit_task_namespaces' may have destroyed the _net_ namespace, but
'sock_release' called in 'exit_task_work' may use the _net_ namespace
if we counter the socket-inuse in sock_release.

2. socket and sock are in pair. More important, sock holds the _net_
namespace. We counter the socket-inuse in sock, for avoiding holding
_net_ namespace again in socket. It's a easy way to maintain the code.
Signed-off-by: NMartin Zhang <zhangjunweimartin@didichuxing.com>
Signed-off-by: NTonghao Zhang <zhangtonghao@didichuxing.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 08fc7f81
...@@ -11,6 +11,9 @@ struct netns_core { ...@@ -11,6 +11,9 @@ struct netns_core {
int sysctl_somaxconn; int sysctl_somaxconn;
#ifdef CONFIG_PROC_FS
int __percpu *sock_inuse;
#endif
struct prot_inuse __percpu *prot_inuse; struct prot_inuse __percpu *prot_inuse;
}; };
......
...@@ -1262,6 +1262,7 @@ proto_memory_pressure(struct proto *prot) ...@@ -1262,6 +1262,7 @@ proto_memory_pressure(struct proto *prot)
/* Called with local bh disabled */ /* Called with local bh disabled */
void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc); void sock_prot_inuse_add(struct net *net, struct proto *prot, int inc);
int sock_prot_inuse_get(struct net *net, struct proto *proto); int sock_prot_inuse_get(struct net *net, struct proto *proto);
int sock_inuse_get(struct net *net);
#else #else
static inline void sock_prot_inuse_add(struct net *net, struct proto *prot, static inline void sock_prot_inuse_add(struct net *net, struct proto *prot,
int inc) int inc)
......
...@@ -145,6 +145,8 @@ ...@@ -145,6 +145,8 @@
static DEFINE_MUTEX(proto_list_mutex); static DEFINE_MUTEX(proto_list_mutex);
static LIST_HEAD(proto_list); static LIST_HEAD(proto_list);
static void sock_inuse_add(struct net *net, int val);
/** /**
* sk_ns_capable - General socket capability test * sk_ns_capable - General socket capability test
* @sk: Socket to use a capability on or through * @sk: Socket to use a capability on or through
...@@ -1531,8 +1533,11 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority, ...@@ -1531,8 +1533,11 @@ struct sock *sk_alloc(struct net *net, int family, gfp_t priority,
sk->sk_kern_sock = kern; sk->sk_kern_sock = kern;
sock_lock_init(sk); sock_lock_init(sk);
sk->sk_net_refcnt = kern ? 0 : 1; sk->sk_net_refcnt = kern ? 0 : 1;
if (likely(sk->sk_net_refcnt)) if (likely(sk->sk_net_refcnt)) {
get_net(net); get_net(net);
sock_inuse_add(net, 1);
}
sock_net_set(sk, net); sock_net_set(sk, net);
refcount_set(&sk->sk_wmem_alloc, 1); refcount_set(&sk->sk_wmem_alloc, 1);
...@@ -1595,6 +1600,9 @@ void sk_destruct(struct sock *sk) ...@@ -1595,6 +1600,9 @@ void sk_destruct(struct sock *sk)
static void __sk_free(struct sock *sk) static void __sk_free(struct sock *sk)
{ {
if (likely(sk->sk_net_refcnt))
sock_inuse_add(sock_net(sk), -1);
if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt)) if (unlikely(sock_diag_has_destroy_listeners(sk) && sk->sk_net_refcnt))
sock_diag_broadcast_destroy(sk); sock_diag_broadcast_destroy(sk);
else else
...@@ -1716,6 +1724,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority) ...@@ -1716,6 +1724,8 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
newsk->sk_priority = 0; newsk->sk_priority = 0;
newsk->sk_incoming_cpu = raw_smp_processor_id(); newsk->sk_incoming_cpu = raw_smp_processor_id();
atomic64_set(&newsk->sk_cookie, 0); atomic64_set(&newsk->sk_cookie, 0);
if (likely(newsk->sk_net_refcnt))
sock_inuse_add(sock_net(newsk), 1);
/* /*
* Before updating sk_refcnt, we must commit prior changes to memory * Before updating sk_refcnt, we must commit prior changes to memory
...@@ -3061,15 +3071,44 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot) ...@@ -3061,15 +3071,44 @@ int sock_prot_inuse_get(struct net *net, struct proto *prot)
} }
EXPORT_SYMBOL_GPL(sock_prot_inuse_get); EXPORT_SYMBOL_GPL(sock_prot_inuse_get);
static void sock_inuse_add(struct net *net, int val)
{
this_cpu_add(*net->core.sock_inuse, val);
}
int sock_inuse_get(struct net *net)
{
int cpu, res = 0;
for_each_possible_cpu(cpu)
res += *per_cpu_ptr(net->core.sock_inuse, cpu);
return res;
}
EXPORT_SYMBOL_GPL(sock_inuse_get);
static int __net_init sock_inuse_init_net(struct net *net) static int __net_init sock_inuse_init_net(struct net *net)
{ {
net->core.prot_inuse = alloc_percpu(struct prot_inuse); net->core.prot_inuse = alloc_percpu(struct prot_inuse);
return net->core.prot_inuse ? 0 : -ENOMEM; if (net->core.prot_inuse == NULL)
return -ENOMEM;
net->core.sock_inuse = alloc_percpu(int);
if (net->core.sock_inuse == NULL)
goto out;
return 0;
out:
free_percpu(net->core.prot_inuse);
return -ENOMEM;
} }
static void __net_exit sock_inuse_exit_net(struct net *net) static void __net_exit sock_inuse_exit_net(struct net *net)
{ {
free_percpu(net->core.prot_inuse); free_percpu(net->core.prot_inuse);
free_percpu(net->core.sock_inuse);
} }
static struct pernet_operations net_inuse_ops = { static struct pernet_operations net_inuse_ops = {
...@@ -3112,6 +3151,10 @@ static inline void assign_proto_idx(struct proto *prot) ...@@ -3112,6 +3151,10 @@ static inline void assign_proto_idx(struct proto *prot)
static inline void release_proto_idx(struct proto *prot) static inline void release_proto_idx(struct proto *prot)
{ {
} }
static void sock_inuse_add(struct net *net, int val)
{
}
#endif #endif
static void req_prot_cleanup(struct request_sock_ops *rsk_prot) static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
......
...@@ -162,12 +162,6 @@ static const struct file_operations socket_file_ops = { ...@@ -162,12 +162,6 @@ static const struct file_operations socket_file_ops = {
static DEFINE_SPINLOCK(net_family_lock); static DEFINE_SPINLOCK(net_family_lock);
static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly;
/*
* Statistics counters of the socket lists
*/
static DEFINE_PER_CPU(int, sockets_in_use);
/* /*
* Support routines. * Support routines.
* Move socket addresses back and forth across the kernel/user * Move socket addresses back and forth across the kernel/user
...@@ -578,7 +572,6 @@ struct socket *sock_alloc(void) ...@@ -578,7 +572,6 @@ struct socket *sock_alloc(void)
inode->i_gid = current_fsgid(); inode->i_gid = current_fsgid();
inode->i_op = &sockfs_inode_ops; inode->i_op = &sockfs_inode_ops;
this_cpu_add(sockets_in_use, 1);
return sock; return sock;
} }
EXPORT_SYMBOL(sock_alloc); EXPORT_SYMBOL(sock_alloc);
...@@ -605,7 +598,6 @@ void sock_release(struct socket *sock) ...@@ -605,7 +598,6 @@ void sock_release(struct socket *sock)
if (rcu_dereference_protected(sock->wq, 1)->fasync_list) if (rcu_dereference_protected(sock->wq, 1)->fasync_list)
pr_err("%s: fasync list not empty!\n", __func__); pr_err("%s: fasync list not empty!\n", __func__);
this_cpu_sub(sockets_in_use, 1);
if (!sock->file) { if (!sock->file) {
iput(SOCK_INODE(sock)); iput(SOCK_INODE(sock));
return; return;
...@@ -2622,17 +2614,8 @@ core_initcall(sock_init); /* early initcall */ ...@@ -2622,17 +2614,8 @@ core_initcall(sock_init); /* early initcall */
#ifdef CONFIG_PROC_FS #ifdef CONFIG_PROC_FS
void socket_seq_show(struct seq_file *seq) void socket_seq_show(struct seq_file *seq)
{ {
int cpu; seq_printf(seq, "sockets: used %d\n",
int counter = 0; sock_inuse_get(seq->private));
for_each_possible_cpu(cpu)
counter += per_cpu(sockets_in_use, cpu);
/* It can be negative, by the way. 8) */
if (counter < 0)
counter = 0;
seq_printf(seq, "sockets: used %d\n", counter);
} }
#endif /* CONFIG_PROC_FS */ #endif /* CONFIG_PROC_FS */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册