diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 31c93a0bdbc296a3461e792ebc66752cfac76631..db6bdc3751268351da3126f57566639fce355b12 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1006,7 +1006,8 @@ struct bpf_sock_ops { /* Definitions for bpf_sock_ops_cb_flags */ #define BPF_SOCK_OPS_RTO_CB_FLAG (1<<0) #define BPF_SOCK_OPS_RETRANS_CB_FLAG (1<<1) -#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x3 /* Mask of all currently +#define BPF_SOCK_OPS_STATE_CB_FLAG (1<<2) +#define BPF_SOCK_OPS_ALL_CB_FLAGS 0x7 /* Mask of all currently * supported cb flags */ @@ -1054,6 +1055,32 @@ enum { * Arg3: return value of * tcp_transmit_skb (0 => success) */ + BPF_SOCK_OPS_STATE_CB, /* Called when TCP changes state. + * Arg1: old_state + * Arg2: new_state + */ +}; + +/* List of TCP states. There is a build check in net/ipv4/tcp.c to detect + * changes between the TCP and BPF versions. Ideally this should never happen. + * If it does, we need to add code to convert them before calling + * the BPF sock_ops function. + */ +enum { + BPF_TCP_ESTABLISHED = 1, + BPF_TCP_SYN_SENT, + BPF_TCP_SYN_RECV, + BPF_TCP_FIN_WAIT1, + BPF_TCP_FIN_WAIT2, + BPF_TCP_TIME_WAIT, + BPF_TCP_CLOSE, + BPF_TCP_CLOSE_WAIT, + BPF_TCP_LAST_ACK, + BPF_TCP_LISTEN, + BPF_TCP_CLOSING, /* Now a valid state */ + BPF_TCP_NEW_SYN_RECV, + + BPF_TCP_MAX_STATES /* Leave at the end! */ }; #define TCP_BPF_IW 1001 /* Set TCP initial congestion window */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 88b62441e7e96e8cfafc1cc28ef1c096c181560a..f013ddc191e0c8491b562c917e5a0a1cb68e19bd 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2042,6 +2042,30 @@ void tcp_set_state(struct sock *sk, int state) { int oldstate = sk->sk_state; + /* We defined a new enum for TCP states that are exported in BPF + * so as not force the internal TCP states to be frozen. The + * following checks will detect if an internal state value ever + * differs from the BPF value. If this ever happens, then we will + * need to remap the internal value to the BPF value before calling + * tcp_call_bpf_2arg. + */ + BUILD_BUG_ON((int)BPF_TCP_ESTABLISHED != (int)TCP_ESTABLISHED); + BUILD_BUG_ON((int)BPF_TCP_SYN_SENT != (int)TCP_SYN_SENT); + BUILD_BUG_ON((int)BPF_TCP_SYN_RECV != (int)TCP_SYN_RECV); + BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT1 != (int)TCP_FIN_WAIT1); + BUILD_BUG_ON((int)BPF_TCP_FIN_WAIT2 != (int)TCP_FIN_WAIT2); + BUILD_BUG_ON((int)BPF_TCP_TIME_WAIT != (int)TCP_TIME_WAIT); + BUILD_BUG_ON((int)BPF_TCP_CLOSE != (int)TCP_CLOSE); + BUILD_BUG_ON((int)BPF_TCP_CLOSE_WAIT != (int)TCP_CLOSE_WAIT); + BUILD_BUG_ON((int)BPF_TCP_LAST_ACK != (int)TCP_LAST_ACK); + BUILD_BUG_ON((int)BPF_TCP_LISTEN != (int)TCP_LISTEN); + BUILD_BUG_ON((int)BPF_TCP_CLOSING != (int)TCP_CLOSING); + BUILD_BUG_ON((int)BPF_TCP_NEW_SYN_RECV != (int)TCP_NEW_SYN_RECV); + BUILD_BUG_ON((int)BPF_TCP_MAX_STATES != (int)TCP_MAX_STATES); + + if (BPF_SOCK_OPS_TEST_FLAG(tcp_sk(sk), BPF_SOCK_OPS_STATE_CB_FLAG)) + tcp_call_bpf_2arg(sk, BPF_SOCK_OPS_STATE_CB, oldstate, state); + switch (state) { case TCP_ESTABLISHED: if (oldstate != TCP_ESTABLISHED)