提交 28055c97 编写于 作者: D David S. Miller

Merge branch 'inet_csk_get_port-and-soreusport-fixes'

Tom Herbert says:

====================
inet: Fixes for inet_csk_get_port and soreusport

This patch set fixes a couple of issues I noticed while debugging our
softlockup issue in inet_csk_get_port.

- Don't allow jump into port scan in inet_csk_get_port if function
  was called with non-zero port number (looking up explicit port
  number).
- When inet_csk_get_port is called with zero port number (ie. perform
  scan) an reuseport is set on the socket, don't match sockets that
  also have reuseport set. The intent from the user should be
  to get a new port number and then explictly bind other
  sockets to that number using soreuseport.

Tested:

Ran first patch on production workload with no ill effect.

For second patch, ran a little listener application and first
demonstrated that unbound sockets with soreuseport can indeed
be bound to unrelated soreuseport sockets.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
...@@ -22,7 +22,8 @@ struct sock; ...@@ -22,7 +22,8 @@ struct sock;
struct sockaddr; struct sockaddr;
int inet6_csk_bind_conflict(const struct sock *sk, int inet6_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax); const struct inet_bind_bucket *tb, bool relax,
bool soreuseport_ok);
struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6, struct dst_entry *inet6_csk_route_req(const struct sock *sk, struct flowi6 *fl6,
const struct request_sock *req, u8 proto); const struct request_sock *req, u8 proto);
......
...@@ -63,7 +63,8 @@ struct inet_connection_sock_af_ops { ...@@ -63,7 +63,8 @@ struct inet_connection_sock_af_ops {
#endif #endif
void (*addr2sockaddr)(struct sock *sk, struct sockaddr *); void (*addr2sockaddr)(struct sock *sk, struct sockaddr *);
int (*bind_conflict)(const struct sock *sk, int (*bind_conflict)(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax); const struct inet_bind_bucket *tb,
bool relax, bool soreuseport_ok);
void (*mtu_reduced)(struct sock *sk); void (*mtu_reduced)(struct sock *sk);
}; };
...@@ -261,7 +262,8 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk, ...@@ -261,7 +262,8 @@ inet_csk_rto_backoff(const struct inet_connection_sock *icsk,
struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); struct sock *inet_csk_accept(struct sock *sk, int flags, int *err);
int inet_csk_bind_conflict(const struct sock *sk, int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax); const struct inet_bind_bucket *tb, bool relax,
bool soreuseport_ok);
int inet_csk_get_port(struct sock *sk, unsigned short snum); int inet_csk_get_port(struct sock *sk, unsigned short snum);
struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4, struct dst_entry *inet_csk_route_req(const struct sock *sk, struct flowi4 *fl4,
......
...@@ -45,11 +45,12 @@ void inet_get_local_port_range(struct net *net, int *low, int *high) ...@@ -45,11 +45,12 @@ void inet_get_local_port_range(struct net *net, int *low, int *high)
EXPORT_SYMBOL(inet_get_local_port_range); EXPORT_SYMBOL(inet_get_local_port_range);
int inet_csk_bind_conflict(const struct sock *sk, int inet_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax) const struct inet_bind_bucket *tb, bool relax,
bool reuseport_ok)
{ {
struct sock *sk2; struct sock *sk2;
int reuse = sk->sk_reuse; bool reuse = sk->sk_reuse;
int reuseport = sk->sk_reuseport; bool reuseport = !!sk->sk_reuseport && reuseport_ok;
kuid_t uid = sock_i_uid((struct sock *)sk); kuid_t uid = sock_i_uid((struct sock *)sk);
/* /*
...@@ -105,6 +106,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) ...@@ -105,6 +106,7 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
struct inet_bind_bucket *tb; struct inet_bind_bucket *tb;
kuid_t uid = sock_i_uid(sk); kuid_t uid = sock_i_uid(sk);
u32 remaining, offset; u32 remaining, offset;
bool reuseport_ok = !!snum;
if (port) { if (port) {
have_port: have_port:
...@@ -165,7 +167,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) ...@@ -165,7 +167,8 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
smallest_size = tb->num_owners; smallest_size = tb->num_owners;
smallest_port = port; smallest_port = port;
} }
if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false)) if (!inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, false,
reuseport_ok))
goto tb_found; goto tb_found;
goto next_port; goto next_port;
} }
...@@ -206,13 +209,14 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum) ...@@ -206,13 +209,14 @@ int inet_csk_get_port(struct sock *sk, unsigned short snum)
sk->sk_reuseport && uid_eq(tb->fastuid, uid))) && sk->sk_reuseport && uid_eq(tb->fastuid, uid))) &&
smallest_size == -1) smallest_size == -1)
goto success; goto success;
if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true)) { if (inet_csk(sk)->icsk_af_ops->bind_conflict(sk, tb, true,
reuseport_ok)) {
if ((reuse || if ((reuse ||
(tb->fastreuseport > 0 && (tb->fastreuseport > 0 &&
sk->sk_reuseport && sk->sk_reuseport &&
!rcu_access_pointer(sk->sk_reuseport_cb) && !rcu_access_pointer(sk->sk_reuseport_cb) &&
uid_eq(tb->fastuid, uid))) && uid_eq(tb->fastuid, uid))) &&
smallest_size != -1 && --attempts >= 0) { !snum && smallest_size != -1 && --attempts >= 0) {
spin_unlock_bh(&head->lock); spin_unlock_bh(&head->lock);
goto again; goto again;
} }
......
...@@ -29,11 +29,12 @@ ...@@ -29,11 +29,12 @@
#include <net/sock_reuseport.h> #include <net/sock_reuseport.h>
int inet6_csk_bind_conflict(const struct sock *sk, int inet6_csk_bind_conflict(const struct sock *sk,
const struct inet_bind_bucket *tb, bool relax) const struct inet_bind_bucket *tb, bool relax,
bool reuseport_ok)
{ {
const struct sock *sk2; const struct sock *sk2;
int reuse = sk->sk_reuse; bool reuse = !!sk->sk_reuse;
int reuseport = sk->sk_reuseport; bool reuseport = !!sk->sk_reuseport && reuseport_ok;
kuid_t uid = sock_i_uid((struct sock *)sk); kuid_t uid = sock_i_uid((struct sock *)sk);
/* We must walk the whole port owner list in this case. -DaveM */ /* We must walk the whole port owner list in this case. -DaveM */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册