提交 9db66bdc 编写于 作者: E Eric Dumazet 提交者: David S. Miller

net: convert TCP/DCCP ehash rwlocks to spinlocks

Now TCP & DCCP use RCU lookups, we can convert ehash rwlocks to spinlocks.

/proc/net/tcp and other seq_file 'readers' can safely be converted to 'writers'.

This should speedup writers, since spin_lock()/spin_unlock()
only use one atomic operation instead of two for write_lock()/write_unlock()
Signed-off-by: NEric Dumazet <dada1@cosmosbay.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 b8c26a33
...@@ -116,7 +116,7 @@ struct inet_hashinfo { ...@@ -116,7 +116,7 @@ struct inet_hashinfo {
* TIME_WAIT sockets use a separate chain (twchain). * TIME_WAIT sockets use a separate chain (twchain).
*/ */
struct inet_ehash_bucket *ehash; struct inet_ehash_bucket *ehash;
rwlock_t *ehash_locks; spinlock_t *ehash_locks;
unsigned int ehash_size; unsigned int ehash_size;
unsigned int ehash_locks_mask; unsigned int ehash_locks_mask;
...@@ -152,7 +152,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket( ...@@ -152,7 +152,7 @@ static inline struct inet_ehash_bucket *inet_ehash_bucket(
return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)]; return &hashinfo->ehash[hash & (hashinfo->ehash_size - 1)];
} }
static inline rwlock_t *inet_ehash_lockp( static inline spinlock_t *inet_ehash_lockp(
struct inet_hashinfo *hashinfo, struct inet_hashinfo *hashinfo,
unsigned int hash) unsigned int hash)
{ {
...@@ -177,16 +177,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo) ...@@ -177,16 +177,16 @@ static inline int inet_ehash_locks_alloc(struct inet_hashinfo *hashinfo)
size = 4096; size = 4096;
if (sizeof(rwlock_t) != 0) { if (sizeof(rwlock_t) != 0) {
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
if (size * sizeof(rwlock_t) > PAGE_SIZE) if (size * sizeof(spinlock_t) > PAGE_SIZE)
hashinfo->ehash_locks = vmalloc(size * sizeof(rwlock_t)); hashinfo->ehash_locks = vmalloc(size * sizeof(spinlock_t));
else else
#endif #endif
hashinfo->ehash_locks = kmalloc(size * sizeof(rwlock_t), hashinfo->ehash_locks = kmalloc(size * sizeof(spinlock_t),
GFP_KERNEL); GFP_KERNEL);
if (!hashinfo->ehash_locks) if (!hashinfo->ehash_locks)
return ENOMEM; return ENOMEM;
for (i = 0; i < size; i++) for (i = 0; i < size; i++)
rwlock_init(&hashinfo->ehash_locks[i]); spin_lock_init(&hashinfo->ehash_locks[i]);
} }
hashinfo->ehash_locks_mask = size - 1; hashinfo->ehash_locks_mask = size - 1;
return 0; return 0;
...@@ -197,7 +197,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo) ...@@ -197,7 +197,7 @@ static inline void inet_ehash_locks_free(struct inet_hashinfo *hashinfo)
if (hashinfo->ehash_locks) { if (hashinfo->ehash_locks) {
#ifdef CONFIG_NUMA #ifdef CONFIG_NUMA
unsigned int size = (hashinfo->ehash_locks_mask + 1) * unsigned int size = (hashinfo->ehash_locks_mask + 1) *
sizeof(rwlock_t); sizeof(spinlock_t);
if (size > PAGE_SIZE) if (size > PAGE_SIZE)
vfree(hashinfo->ehash_locks); vfree(hashinfo->ehash_locks);
else else
......
...@@ -271,13 +271,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, ...@@ -271,13 +271,12 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
struct net *net = sock_net(sk); struct net *net = sock_net(sk);
unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport); unsigned int hash = inet_ehashfn(net, daddr, lport, saddr, inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hinfo, hash); spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2; struct sock *sk2;
const struct hlist_nulls_node *node; const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw; struct inet_timewait_sock *tw;
prefetch(head->chain.first); spin_lock(lock);
write_lock(lock);
/* Check TIME-WAIT sockets first. */ /* Check TIME-WAIT sockets first. */
sk_nulls_for_each(sk2, node, &head->twchain) { sk_nulls_for_each(sk2, node, &head->twchain) {
...@@ -308,8 +307,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, ...@@ -308,8 +307,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
sk->sk_hash = hash; sk->sk_hash = hash;
WARN_ON(!sk_unhashed(sk)); WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain); __sk_nulls_add_node_rcu(sk, &head->chain);
spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock(lock);
if (twp) { if (twp) {
*twp = tw; *twp = tw;
...@@ -325,7 +324,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, ...@@ -325,7 +324,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row,
return 0; return 0;
not_unique: not_unique:
write_unlock(lock); spin_unlock(lock);
return -EADDRNOTAVAIL; return -EADDRNOTAVAIL;
} }
...@@ -340,7 +339,7 @@ void __inet_hash_nolisten(struct sock *sk) ...@@ -340,7 +339,7 @@ void __inet_hash_nolisten(struct sock *sk)
{ {
struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
struct hlist_nulls_head *list; struct hlist_nulls_head *list;
rwlock_t *lock; spinlock_t *lock;
struct inet_ehash_bucket *head; struct inet_ehash_bucket *head;
WARN_ON(!sk_unhashed(sk)); WARN_ON(!sk_unhashed(sk));
...@@ -350,10 +349,10 @@ void __inet_hash_nolisten(struct sock *sk) ...@@ -350,10 +349,10 @@ void __inet_hash_nolisten(struct sock *sk)
list = &head->chain; list = &head->chain;
lock = inet_ehash_lockp(hashinfo, sk->sk_hash); lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
write_lock(lock); spin_lock(lock);
__sk_nulls_add_node_rcu(sk, list); __sk_nulls_add_node_rcu(sk, list);
spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock(lock);
} }
EXPORT_SYMBOL_GPL(__inet_hash_nolisten); EXPORT_SYMBOL_GPL(__inet_hash_nolisten);
...@@ -402,12 +401,12 @@ void inet_unhash(struct sock *sk) ...@@ -402,12 +401,12 @@ void inet_unhash(struct sock *sk)
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
spin_unlock_bh(&ilb->lock); spin_unlock_bh(&ilb->lock);
} else { } else {
rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
write_lock_bh(lock); spin_lock_bh(lock);
if (__sk_nulls_del_node_init_rcu(sk)) if (__sk_nulls_del_node_init_rcu(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
write_unlock_bh(lock); spin_unlock_bh(lock);
} }
} }
EXPORT_SYMBOL_GPL(inet_unhash); EXPORT_SYMBOL_GPL(inet_unhash);
......
...@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw, ...@@ -20,16 +20,16 @@ static void __inet_twsk_kill(struct inet_timewait_sock *tw,
struct inet_bind_hashbucket *bhead; struct inet_bind_hashbucket *bhead;
struct inet_bind_bucket *tb; struct inet_bind_bucket *tb;
/* Unlink from established hashes. */ /* Unlink from established hashes. */
rwlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash); spinlock_t *lock = inet_ehash_lockp(hashinfo, tw->tw_hash);
write_lock(lock); spin_lock(lock);
if (hlist_nulls_unhashed(&tw->tw_node)) { if (hlist_nulls_unhashed(&tw->tw_node)) {
write_unlock(lock); spin_unlock(lock);
return; return;
} }
hlist_nulls_del_rcu(&tw->tw_node); hlist_nulls_del_rcu(&tw->tw_node);
sk_nulls_node_init(&tw->tw_node); sk_nulls_node_init(&tw->tw_node);
write_unlock(lock); spin_unlock(lock);
/* Disassociate with bind bucket. */ /* Disassociate with bind bucket. */
bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num, bhead = &hashinfo->bhash[inet_bhashfn(twsk_net(tw), tw->tw_num,
...@@ -76,7 +76,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, ...@@ -76,7 +76,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
const struct inet_sock *inet = inet_sk(sk); const struct inet_sock *inet = inet_sk(sk);
const struct inet_connection_sock *icsk = inet_csk(sk); const struct inet_connection_sock *icsk = inet_csk(sk);
struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash); struct inet_ehash_bucket *ehead = inet_ehash_bucket(hashinfo, sk->sk_hash);
rwlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash); spinlock_t *lock = inet_ehash_lockp(hashinfo, sk->sk_hash);
struct inet_bind_hashbucket *bhead; struct inet_bind_hashbucket *bhead;
/* Step 1: Put TW into bind hash. Original socket stays there too. /* Step 1: Put TW into bind hash. Original socket stays there too.
Note, that any socket with inet->num != 0 MUST be bound in Note, that any socket with inet->num != 0 MUST be bound in
...@@ -90,7 +90,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, ...@@ -90,7 +90,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
inet_twsk_add_bind_node(tw, &tw->tw_tb->owners); inet_twsk_add_bind_node(tw, &tw->tw_tb->owners);
spin_unlock(&bhead->lock); spin_unlock(&bhead->lock);
write_lock(lock); spin_lock(lock);
/* /*
* Step 2: Hash TW into TIMEWAIT chain. * Step 2: Hash TW into TIMEWAIT chain.
...@@ -104,7 +104,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk, ...@@ -104,7 +104,7 @@ void __inet_twsk_hashdance(struct inet_timewait_sock *tw, struct sock *sk,
if (__sk_nulls_del_node_init_rcu(sk)) if (__sk_nulls_del_node_init_rcu(sk))
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, -1);
write_unlock(lock); spin_unlock(lock);
} }
EXPORT_SYMBOL_GPL(__inet_twsk_hashdance); EXPORT_SYMBOL_GPL(__inet_twsk_hashdance);
...@@ -427,9 +427,9 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, ...@@ -427,9 +427,9 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
for (h = 0; h < (hashinfo->ehash_size); h++) { for (h = 0; h < (hashinfo->ehash_size); h++) {
struct inet_ehash_bucket *head = struct inet_ehash_bucket *head =
inet_ehash_bucket(hashinfo, h); inet_ehash_bucket(hashinfo, h);
rwlock_t *lock = inet_ehash_lockp(hashinfo, h); spinlock_t *lock = inet_ehash_lockp(hashinfo, h);
restart: restart:
write_lock(lock); spin_lock(lock);
sk_nulls_for_each(sk, node, &head->twchain) { sk_nulls_for_each(sk, node, &head->twchain) {
tw = inet_twsk(sk); tw = inet_twsk(sk);
...@@ -438,13 +438,13 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo, ...@@ -438,13 +438,13 @@ void inet_twsk_purge(struct net *net, struct inet_hashinfo *hashinfo,
continue; continue;
atomic_inc(&tw->tw_refcnt); atomic_inc(&tw->tw_refcnt);
write_unlock(lock); spin_unlock(lock);
inet_twsk_deschedule(tw, twdr); inet_twsk_deschedule(tw, twdr);
inet_twsk_put(tw); inet_twsk_put(tw);
goto restart; goto restart;
} }
write_unlock(lock); spin_unlock(lock);
} }
local_bh_enable(); local_bh_enable();
} }
......
...@@ -1970,13 +1970,13 @@ static void *established_get_first(struct seq_file *seq) ...@@ -1970,13 +1970,13 @@ static void *established_get_first(struct seq_file *seq)
struct sock *sk; struct sock *sk;
struct hlist_nulls_node *node; struct hlist_nulls_node *node;
struct inet_timewait_sock *tw; struct inet_timewait_sock *tw;
rwlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket); spinlock_t *lock = inet_ehash_lockp(&tcp_hashinfo, st->bucket);
/* Lockless fast path for the common case of empty buckets */ /* Lockless fast path for the common case of empty buckets */
if (empty_bucket(st)) if (empty_bucket(st))
continue; continue;
read_lock_bh(lock); spin_lock_bh(lock);
sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) { sk_nulls_for_each(sk, node, &tcp_hashinfo.ehash[st->bucket].chain) {
if (sk->sk_family != st->family || if (sk->sk_family != st->family ||
!net_eq(sock_net(sk), net)) { !net_eq(sock_net(sk), net)) {
...@@ -1995,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq) ...@@ -1995,7 +1995,7 @@ static void *established_get_first(struct seq_file *seq)
rc = tw; rc = tw;
goto out; goto out;
} }
read_unlock_bh(lock); spin_unlock_bh(lock);
st->state = TCP_SEQ_STATE_ESTABLISHED; st->state = TCP_SEQ_STATE_ESTABLISHED;
} }
out: out:
...@@ -2023,7 +2023,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) ...@@ -2023,7 +2023,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
cur = tw; cur = tw;
goto out; goto out;
} }
read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
st->state = TCP_SEQ_STATE_ESTABLISHED; st->state = TCP_SEQ_STATE_ESTABLISHED;
/* Look for next non empty bucket */ /* Look for next non empty bucket */
...@@ -2033,7 +2033,7 @@ static void *established_get_next(struct seq_file *seq, void *cur) ...@@ -2033,7 +2033,7 @@ static void *established_get_next(struct seq_file *seq, void *cur)
if (st->bucket >= tcp_hashinfo.ehash_size) if (st->bucket >= tcp_hashinfo.ehash_size)
return NULL; return NULL;
read_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); spin_lock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain); sk = sk_nulls_head(&tcp_hashinfo.ehash[st->bucket].chain);
} else } else
sk = sk_nulls_next(sk); sk = sk_nulls_next(sk);
...@@ -2134,7 +2134,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v) ...@@ -2134,7 +2134,7 @@ static void tcp_seq_stop(struct seq_file *seq, void *v)
case TCP_SEQ_STATE_TIME_WAIT: case TCP_SEQ_STATE_TIME_WAIT:
case TCP_SEQ_STATE_ESTABLISHED: case TCP_SEQ_STATE_ESTABLISHED:
if (v) if (v)
read_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket)); spin_unlock_bh(inet_ehash_lockp(&tcp_hashinfo, st->bucket));
break; break;
} }
} }
......
...@@ -38,14 +38,14 @@ void __inet6_hash(struct sock *sk) ...@@ -38,14 +38,14 @@ void __inet6_hash(struct sock *sk)
} else { } else {
unsigned int hash; unsigned int hash;
struct hlist_nulls_head *list; struct hlist_nulls_head *list;
rwlock_t *lock; spinlock_t *lock;
sk->sk_hash = hash = inet6_sk_ehashfn(sk); sk->sk_hash = hash = inet6_sk_ehashfn(sk);
list = &inet_ehash_bucket(hashinfo, hash)->chain; list = &inet_ehash_bucket(hashinfo, hash)->chain;
lock = inet_ehash_lockp(hashinfo, hash); lock = inet_ehash_lockp(hashinfo, hash);
write_lock(lock); spin_lock(lock);
__sk_nulls_add_node_rcu(sk, list); __sk_nulls_add_node_rcu(sk, list);
write_unlock(lock); spin_unlock(lock);
} }
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
...@@ -195,13 +195,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, ...@@ -195,13 +195,12 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr, const unsigned int hash = inet6_ehashfn(net, daddr, lport, saddr,
inet->dport); inet->dport);
struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash);
rwlock_t *lock = inet_ehash_lockp(hinfo, hash); spinlock_t *lock = inet_ehash_lockp(hinfo, hash);
struct sock *sk2; struct sock *sk2;
const struct hlist_nulls_node *node; const struct hlist_nulls_node *node;
struct inet_timewait_sock *tw; struct inet_timewait_sock *tw;
prefetch(head->chain.first); spin_lock(lock);
write_lock(lock);
/* Check TIME-WAIT sockets first. */ /* Check TIME-WAIT sockets first. */
sk_nulls_for_each(sk2, node, &head->twchain) { sk_nulls_for_each(sk2, node, &head->twchain) {
...@@ -230,8 +229,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, ...@@ -230,8 +229,8 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
WARN_ON(!sk_unhashed(sk)); WARN_ON(!sk_unhashed(sk));
__sk_nulls_add_node_rcu(sk, &head->chain); __sk_nulls_add_node_rcu(sk, &head->chain);
sk->sk_hash = hash; sk->sk_hash = hash;
spin_unlock(lock);
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
write_unlock(lock);
if (twp != NULL) { if (twp != NULL) {
*twp = tw; *twp = tw;
...@@ -246,7 +245,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, ...@@ -246,7 +245,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row,
return 0; return 0;
not_unique: not_unique:
write_unlock(lock); spin_unlock(lock);
return -EADDRNOTAVAIL; return -EADDRNOTAVAIL;
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册