提交 91b5b21c 编写于 作者: L Lawrence Brakmo 提交者: David S. Miller

bpf: Add support for changing congestion control

Added support for changing congestion control for SOCK_OPS bpf
programs through the setsockopt bpf helper function. It also adds
a new SOCK_OPS op, BPF_SOCK_OPS_NEEDS_ECN, that is needed for
congestion controls, like dctcp, that need to enable ECN in the
SYN packets.
Signed-off-by: NLawrence Brakmo <brakmo@fb.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 d9925368
......@@ -1004,7 +1004,9 @@ void tcp_get_default_congestion_control(char *name);
void tcp_get_available_congestion_control(char *buf, size_t len);
void tcp_get_allowed_congestion_control(char *buf, size_t len);
int tcp_set_allowed_congestion_control(char *allowed);
int tcp_set_congestion_control(struct sock *sk, const char *name);
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load);
void tcp_reinit_congestion_control(struct sock *sk,
const struct tcp_congestion_ops *ca);
u32 tcp_slow_start(struct tcp_sock *tp, u32 acked);
void tcp_cong_avoid_ai(struct tcp_sock *tp, u32 w, u32 acked);
......@@ -2078,4 +2080,9 @@ static inline u32 tcp_rwnd_init_bpf(struct sock *sk)
rwnd = 0;
return rwnd;
}
static inline bool tcp_bpf_ca_needs_ecn(struct sock *sk)
{
return (tcp_call_bpf(sk, BPF_SOCK_OPS_NEEDS_ECN) == 1);
}
#endif /* _TCP_H */
......@@ -778,6 +778,9 @@ enum {
* passive connection is
* established
*/
BPF_SOCK_OPS_NEEDS_ECN, /* If connection's congestion control
* needs ECN
*/
};
#endif /* _UAPI__LINUX_BPF_H__ */
......@@ -2719,8 +2719,24 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
}
} else if (level == SOL_TCP &&
sk->sk_prot->setsockopt == tcp_setsockopt) {
/* Place holder */
#ifdef CONFIG_INET
if (optname == TCP_CONGESTION) {
char name[TCP_CA_NAME_MAX];
strncpy(name, optval, min_t(long, optlen,
TCP_CA_NAME_MAX-1));
name[TCP_CA_NAME_MAX-1] = 0;
ret = tcp_set_congestion_control(sk, name, false);
if (!ret && bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
/* replacing an existing ca */
tcp_reinit_congestion_control(sk,
inet_csk(sk)->icsk_ca_ops);
} else {
ret = -EINVAL;
}
#else
ret = -EINVAL;
#endif
} else {
ret = -EINVAL;
}
......
......@@ -2481,7 +2481,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
name[val] = 0;
lock_sock(sk);
err = tcp_set_congestion_control(sk, name);
err = tcp_set_congestion_control(sk, name, true);
release_sock(sk);
return err;
}
......
......@@ -189,8 +189,8 @@ void tcp_init_congestion_control(struct sock *sk)
INET_ECN_dontxmit(sk);
}
static void tcp_reinit_congestion_control(struct sock *sk,
const struct tcp_congestion_ops *ca)
void tcp_reinit_congestion_control(struct sock *sk,
const struct tcp_congestion_ops *ca)
{
struct inet_connection_sock *icsk = inet_csk(sk);
......@@ -333,8 +333,12 @@ int tcp_set_allowed_congestion_control(char *val)
return ret;
}
/* Change congestion control for socket */
int tcp_set_congestion_control(struct sock *sk, const char *name)
/* Change congestion control for socket. If load is false, then it is the
* responsibility of the caller to call tcp_init_congestion_control or
* tcp_reinit_congestion_control (if the current congestion control was
* already initialized.
*/
int tcp_set_congestion_control(struct sock *sk, const char *name, bool load)
{
struct inet_connection_sock *icsk = inet_csk(sk);
const struct tcp_congestion_ops *ca;
......@@ -344,21 +348,29 @@ int tcp_set_congestion_control(struct sock *sk, const char *name)
return -EPERM;
rcu_read_lock();
ca = __tcp_ca_find_autoload(name);
if (!load)
ca = tcp_ca_find(name);
else
ca = __tcp_ca_find_autoload(name);
/* No change asking for existing value */
if (ca == icsk->icsk_ca_ops) {
icsk->icsk_ca_setsockopt = 1;
goto out;
}
if (!ca)
if (!ca) {
err = -ENOENT;
else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)))
} else if (!load) {
icsk->icsk_ca_ops = ca;
if (!try_module_get(ca->owner))
err = -EBUSY;
} else if (!((ca->flags & TCP_CONG_NON_RESTRICTED) ||
ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN))) {
err = -EPERM;
else if (!try_module_get(ca->owner))
} else if (!try_module_get(ca->owner)) {
err = -EBUSY;
else
} else {
tcp_reinit_congestion_control(sk, ca);
}
out:
rcu_read_unlock();
return err;
......
......@@ -6191,7 +6191,8 @@ static void tcp_ecn_create_request(struct request_sock *req,
ecn_ok = net->ipv4.sysctl_tcp_ecn || ecn_ok_dst;
if ((!ect && ecn_ok) || tcp_ca_needs_ecn(listen_sk) ||
(ecn_ok_dst & DST_FEATURE_ECN_CA))
(ecn_ok_dst & DST_FEATURE_ECN_CA) ||
tcp_bpf_ca_needs_ecn((struct sock *)req))
inet_rsk(req)->ecn_ok = 1;
}
......
......@@ -316,7 +316,8 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_CWR;
if (!(tp->ecn_flags & TCP_ECN_OK))
TCP_SKB_CB(skb)->tcp_flags &= ~TCPHDR_ECE;
else if (tcp_ca_needs_ecn(sk))
else if (tcp_ca_needs_ecn(sk) ||
tcp_bpf_ca_needs_ecn(sk))
INET_ECN_xmit(sk);
}
......@@ -324,8 +325,9 @@ static void tcp_ecn_send_synack(struct sock *sk, struct sk_buff *skb)
static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
{
struct tcp_sock *tp = tcp_sk(sk);
bool bpf_needs_ecn = tcp_bpf_ca_needs_ecn(sk);
bool use_ecn = sock_net(sk)->ipv4.sysctl_tcp_ecn == 1 ||
tcp_ca_needs_ecn(sk);
tcp_ca_needs_ecn(sk) || bpf_needs_ecn;
if (!use_ecn) {
const struct dst_entry *dst = __sk_dst_get(sk);
......@@ -339,7 +341,7 @@ static void tcp_ecn_send_syn(struct sock *sk, struct sk_buff *skb)
if (use_ecn) {
TCP_SKB_CB(skb)->tcp_flags |= TCPHDR_ECE | TCPHDR_CWR;
tp->ecn_flags = TCP_ECN_OK;
if (tcp_ca_needs_ecn(sk))
if (tcp_ca_needs_ecn(sk) || bpf_needs_ecn)
INET_ECN_xmit(sk);
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册