提交 c377411f 编写于 作者: E Eric Dumazet 提交者: David S. Miller

net: sk_add_backlog() take rmem_alloc into account

Current socket backlog limit is not enough to really stop DDOS attacks,
because user thread spend many time to process a full backlog each
round, and user might crazy spin on socket lock.

We should add backlog size and receive_queue size (aka rmem_alloc) to
pace writers, and let user run without being slow down too much.

Introduce a sk_rcvqueues_full() helper, to avoid taking socket lock in
stress situations.

Under huge stress from a multiqueue/RPS enabled NIC, a single flow udp
receiver can now process ~200.000 pps (instead of ~100 pps before the
patch) on a 8 core machine.
Signed-off-by: NEric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 6e7676c1
...@@ -256,7 +256,6 @@ struct sock { ...@@ -256,7 +256,6 @@ struct sock {
struct sk_buff *head; struct sk_buff *head;
struct sk_buff *tail; struct sk_buff *tail;
int len; int len;
int limit;
} sk_backlog; } sk_backlog;
wait_queue_head_t *sk_sleep; wait_queue_head_t *sk_sleep;
struct dst_entry *sk_dst_cache; struct dst_entry *sk_dst_cache;
...@@ -608,10 +607,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb) ...@@ -608,10 +607,20 @@ static inline void __sk_add_backlog(struct sock *sk, struct sk_buff *skb)
skb->next = NULL; skb->next = NULL;
} }
/*
* Take into account size of receive queue and backlog queue
*/
static inline bool sk_rcvqueues_full(const struct sock *sk, const struct sk_buff *skb)
{
unsigned int qsize = sk->sk_backlog.len + atomic_read(&sk->sk_rmem_alloc);
return qsize + skb->truesize > sk->sk_rcvbuf;
}
/* The per-socket spinlock must be held here. */ /* The per-socket spinlock must be held here. */
static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb) static inline __must_check int sk_add_backlog(struct sock *sk, struct sk_buff *skb)
{ {
if (sk->sk_backlog.len >= max(sk->sk_backlog.limit, sk->sk_rcvbuf << 1)) if (sk_rcvqueues_full(sk, skb))
return -ENOBUFS; return -ENOBUFS;
__sk_add_backlog(sk, skb); __sk_add_backlog(sk, skb);
......
...@@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested) ...@@ -327,6 +327,10 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb, const int nested)
skb->dev = NULL; skb->dev = NULL;
if (sk_rcvqueues_full(sk, skb)) {
atomic_inc(&sk->sk_drops);
goto discard_and_relse;
}
if (nested) if (nested)
bh_lock_sock_nested(sk); bh_lock_sock_nested(sk);
else else
...@@ -1885,7 +1889,6 @@ void sock_init_data(struct socket *sock, struct sock *sk) ...@@ -1885,7 +1889,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
sk->sk_allocation = GFP_KERNEL; sk->sk_allocation = GFP_KERNEL;
sk->sk_rcvbuf = sysctl_rmem_default; sk->sk_rcvbuf = sysctl_rmem_default;
sk->sk_sndbuf = sysctl_wmem_default; sk->sk_sndbuf = sysctl_wmem_default;
sk->sk_backlog.limit = sk->sk_rcvbuf << 1;
sk->sk_state = TCP_CLOSE; sk->sk_state = TCP_CLOSE;
sk_set_socket(sk, sock); sk_set_socket(sk, sock);
......
...@@ -1372,6 +1372,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) ...@@ -1372,6 +1372,10 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb)
goto drop; goto drop;
} }
if (sk_rcvqueues_full(sk, skb))
goto drop;
rc = 0; rc = 0;
bh_lock_sock(sk); bh_lock_sock(sk);
......
...@@ -584,6 +584,10 @@ static void flush_stack(struct sock **stack, unsigned int count, ...@@ -584,6 +584,10 @@ static void flush_stack(struct sock **stack, unsigned int count,
sk = stack[i]; sk = stack[i];
if (skb1) { if (skb1) {
if (sk_rcvqueues_full(sk, skb)) {
kfree_skb(skb1);
goto drop;
}
bh_lock_sock(sk); bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) if (!sock_owned_by_user(sk))
udpv6_queue_rcv_skb(sk, skb1); udpv6_queue_rcv_skb(sk, skb1);
...@@ -759,6 +763,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, ...@@ -759,6 +763,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
/* deliver */ /* deliver */
if (sk_rcvqueues_full(sk, skb)) {
sock_put(sk);
goto discard;
}
bh_lock_sock(sk); bh_lock_sock(sk);
if (!sock_owned_by_user(sk)) if (!sock_owned_by_user(sk))
udpv6_queue_rcv_skb(sk, skb); udpv6_queue_rcv_skb(sk, skb);
......
...@@ -3721,9 +3721,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) ...@@ -3721,9 +3721,6 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk)
SCTP_DBG_OBJCNT_INC(sock); SCTP_DBG_OBJCNT_INC(sock);
percpu_counter_inc(&sctp_sockets_allocated); percpu_counter_inc(&sctp_sockets_allocated);
/* Set socket backlog limit. */
sk->sk_backlog.limit = sysctl_sctp_rmem[1];
local_bh_disable(); local_bh_disable();
sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
local_bh_enable(); local_bh_enable();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册