diff --git a/include/net/tcp.h b/include/net/tcp.h index 58278669cc5572ec397f028b5888f574764e298a..e3fc667f9ac2601d8f9cb50261a7948c41709664 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1983,6 +1983,11 @@ enum hrtimer_restart tcp_pace_kick(struct hrtimer *timer); #define TCP_ULP_MAX 128 #define TCP_ULP_BUF_MAX (TCP_ULP_NAME_MAX*TCP_ULP_MAX) +enum { + TCP_ULP_TLS, + TCP_ULP_BPF, +}; + struct tcp_ulp_ops { struct list_head list; @@ -1991,12 +1996,15 @@ struct tcp_ulp_ops { /* cleanup ulp */ void (*release)(struct sock *sk); + int uid; char name[TCP_ULP_NAME_MAX]; + bool user_visible; struct module *owner; }; int tcp_register_ulp(struct tcp_ulp_ops *type); void tcp_unregister_ulp(struct tcp_ulp_ops *type); int tcp_set_ulp(struct sock *sk, const char *name); +int tcp_set_ulp_id(struct sock *sk, const int ulp); void tcp_get_available_ulp(char *buf, size_t len); void tcp_cleanup_ulp(struct sock *sk); diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 0314d1783d77adf823beedd0b0934f8e6de0680a..48c33417d13c0ad40154f25aeade0c9b4cafd96a 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -86,9 +86,10 @@ struct smap_psock { struct work_struct tx_work; struct work_struct gc_work; + struct proto *sk_proto; + void (*save_close)(struct sock *sk, long timeout); void (*save_data_ready)(struct sock *sk); void (*save_write_space)(struct sock *sk); - void (*save_state_change)(struct sock *sk); }; static inline struct smap_psock *smap_psock_sk(const struct sock *sk) @@ -96,12 +97,102 @@ static inline struct smap_psock *smap_psock_sk(const struct sock *sk) return rcu_dereference_sk_user_data(sk); } +static struct proto tcp_bpf_proto; +static int bpf_tcp_init(struct sock *sk) +{ + struct smap_psock *psock; + + rcu_read_lock(); + psock = smap_psock_sk(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + return -EINVAL; + } + + if (unlikely(psock->sk_proto)) { + rcu_read_unlock(); + return -EBUSY; + } + + psock->save_close = sk->sk_prot->close; + psock->sk_proto = sk->sk_prot; + sk->sk_prot = &tcp_bpf_proto; + rcu_read_unlock(); + return 0; +} + +static void bpf_tcp_release(struct sock *sk) +{ + struct smap_psock *psock; + + rcu_read_lock(); + psock = smap_psock_sk(sk); + + if (likely(psock)) { + sk->sk_prot = psock->sk_proto; + psock->sk_proto = NULL; + } + rcu_read_unlock(); +} + +static void smap_release_sock(struct smap_psock *psock, struct sock *sock); + +static void bpf_tcp_close(struct sock *sk, long timeout) +{ + void (*close_fun)(struct sock *sk, long timeout); + struct smap_psock_map_entry *e, *tmp; + struct smap_psock *psock; + struct sock *osk; + + rcu_read_lock(); + psock = smap_psock_sk(sk); + if (unlikely(!psock)) { + rcu_read_unlock(); + return sk->sk_prot->close(sk, timeout); + } + + /* The psock may be destroyed anytime after exiting the RCU critial + * section so by the time we use close_fun the psock may no longer + * be valid. However, bpf_tcp_close is called with the sock lock + * held so the close hook and sk are still valid. + */ + close_fun = psock->save_close; + + write_lock_bh(&sk->sk_callback_lock); + list_for_each_entry_safe(e, tmp, &psock->maps, list) { + osk = cmpxchg(e->entry, sk, NULL); + if (osk == sk) { + list_del(&e->list); + smap_release_sock(psock, sk); + } + } + write_unlock_bh(&sk->sk_callback_lock); + rcu_read_unlock(); + close_fun(sk, timeout); +} + enum __sk_action { __SK_DROP = 0, __SK_PASS, __SK_REDIRECT, }; +static struct tcp_ulp_ops bpf_tcp_ulp_ops __read_mostly = { + .name = "bpf_tcp", + .uid = TCP_ULP_BPF, + .user_visible = false, + .owner = NULL, + .init = bpf_tcp_init, + .release = bpf_tcp_release, +}; + +static int bpf_tcp_ulp_register(void) +{ + tcp_bpf_proto = tcp_prot; + tcp_bpf_proto.close = bpf_tcp_close; + return tcp_register_ulp(&bpf_tcp_ulp_ops); +} + static int smap_verdict_func(struct smap_psock *psock, struct sk_buff *skb) { struct bpf_prog *prog = READ_ONCE(psock->bpf_verdict); @@ -166,68 +257,6 @@ static void smap_report_sk_error(struct smap_psock *psock, int err) sk->sk_error_report(sk); } -static void smap_release_sock(struct smap_psock *psock, struct sock *sock); - -/* Called with lock_sock(sk) held */ -static void smap_state_change(struct sock *sk) -{ - struct smap_psock_map_entry *e, *tmp; - struct smap_psock *psock; - struct socket_wq *wq; - struct sock *osk; - - rcu_read_lock(); - - /* Allowing transitions into an established syn_recv states allows - * for early binding sockets to a smap object before the connection - * is established. - */ - switch (sk->sk_state) { - case TCP_SYN_SENT: - case TCP_SYN_RECV: - case TCP_ESTABLISHED: - break; - case TCP_CLOSE_WAIT: - case TCP_CLOSING: - case TCP_LAST_ACK: - case TCP_FIN_WAIT1: - case TCP_FIN_WAIT2: - case TCP_LISTEN: - break; - case TCP_CLOSE: - /* Only release if the map entry is in fact the sock in - * question. There is a case where the operator deletes - * the sock from the map, but the TCP sock is closed before - * the psock is detached. Use cmpxchg to verify correct - * sock is removed. - */ - psock = smap_psock_sk(sk); - if (unlikely(!psock)) - break; - write_lock_bh(&sk->sk_callback_lock); - list_for_each_entry_safe(e, tmp, &psock->maps, list) { - osk = cmpxchg(e->entry, sk, NULL); - if (osk == sk) { - list_del(&e->list); - smap_release_sock(psock, sk); - } - } - write_unlock_bh(&sk->sk_callback_lock); - break; - default: - psock = smap_psock_sk(sk); - if (unlikely(!psock)) - break; - smap_report_sk_error(psock, EPIPE); - break; - } - - wq = rcu_dereference(sk->sk_wq); - if (skwq_has_sleeper(wq)) - wake_up_interruptible_all(&wq->wait); - rcu_read_unlock(); -} - static void smap_read_sock_strparser(struct strparser *strp, struct sk_buff *skb) { @@ -322,10 +351,8 @@ static void smap_stop_sock(struct smap_psock *psock, struct sock *sk) return; sk->sk_data_ready = psock->save_data_ready; sk->sk_write_space = psock->save_write_space; - sk->sk_state_change = psock->save_state_change; psock->save_data_ready = NULL; psock->save_write_space = NULL; - psock->save_state_change = NULL; strp_stop(&psock->strp); psock->strp_enabled = false; } @@ -350,6 +377,7 @@ static void smap_release_sock(struct smap_psock *psock, struct sock *sock) if (psock->refcnt) return; + tcp_cleanup_ulp(sock); smap_stop_sock(psock, sock); clear_bit(SMAP_TX_RUNNING, &psock->state); rcu_assign_sk_user_data(sock, NULL); @@ -427,10 +455,8 @@ static void smap_start_sock(struct smap_psock *psock, struct sock *sk) return; psock->save_data_ready = sk->sk_data_ready; psock->save_write_space = sk->sk_write_space; - psock->save_state_change = sk->sk_state_change; sk->sk_data_ready = smap_data_ready; sk->sk_write_space = smap_write_space; - sk->sk_state_change = smap_state_change; psock->strp_enabled = true; } @@ -509,6 +535,10 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) if (attr->value_size > KMALLOC_MAX_SIZE) return ERR_PTR(-E2BIG); + err = bpf_tcp_ulp_register(); + if (err && err != -EEXIST) + return ERR_PTR(err); + stab = kzalloc(sizeof(*stab), GFP_USER); if (!stab) return ERR_PTR(-ENOMEM); @@ -590,11 +620,6 @@ static void sock_map_free(struct bpf_map *map) } rcu_read_unlock(); - if (stab->bpf_verdict) - bpf_prog_put(stab->bpf_verdict); - if (stab->bpf_parse) - bpf_prog_put(stab->bpf_parse); - sock_map_remove_complete(stab); } @@ -754,6 +779,10 @@ static int sock_map_ctx_update_elem(struct bpf_sock_ops_kern *skops, goto out_progs; } + err = tcp_set_ulp_id(sock, TCP_ULP_BPF); + if (err) + goto out_progs; + set_bit(SMAP_TX_RUNNING, &psock->state); } @@ -866,6 +895,19 @@ static int sock_map_update_elem(struct bpf_map *map, return err; } +static void sock_map_release(struct bpf_map *map, struct file *map_file) +{ + struct bpf_stab *stab = container_of(map, struct bpf_stab, map); + struct bpf_prog *orig; + + orig = xchg(&stab->bpf_parse, NULL); + if (orig) + bpf_prog_put(orig); + orig = xchg(&stab->bpf_verdict, NULL); + if (orig) + bpf_prog_put(orig); +} + const struct bpf_map_ops sock_map_ops = { .map_alloc = sock_map_alloc, .map_free = sock_map_free, @@ -873,6 +915,7 @@ const struct bpf_map_ops sock_map_ops = { .map_get_next_key = sock_map_get_next_key, .map_update_elem = sock_map_update_elem, .map_delete_elem = sock_map_delete_elem, + .map_release = sock_map_release, }; BPF_CALL_4(bpf_sock_map_update, struct bpf_sock_ops_kern *, bpf_sock, diff --git a/net/ipv4/tcp_ulp.c b/net/ipv4/tcp_ulp.c index 6bb9e14c710a7e2bfa58ee63ff6e02461a22cbec..622caa4039e025d70d1b0592fab554ab747dc666 100644 --- a/net/ipv4/tcp_ulp.c +++ b/net/ipv4/tcp_ulp.c @@ -29,6 +29,18 @@ static struct tcp_ulp_ops *tcp_ulp_find(const char *name) return NULL; } +static struct tcp_ulp_ops *tcp_ulp_find_id(const int ulp) +{ + struct tcp_ulp_ops *e; + + list_for_each_entry_rcu(e, &tcp_ulp_list, list) { + if (e->uid == ulp) + return e; + } + + return NULL; +} + static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) { const struct tcp_ulp_ops *ulp = NULL; @@ -51,6 +63,18 @@ static const struct tcp_ulp_ops *__tcp_ulp_find_autoload(const char *name) return ulp; } +static const struct tcp_ulp_ops *__tcp_ulp_lookup(const int uid) +{ + const struct tcp_ulp_ops *ulp; + + rcu_read_lock(); + ulp = tcp_ulp_find_id(uid); + if (!ulp || !try_module_get(ulp->owner)) + ulp = NULL; + rcu_read_unlock(); + return ulp; +} + /* Attach new upper layer protocol to the list * of available protocols. */ @@ -59,13 +83,10 @@ int tcp_register_ulp(struct tcp_ulp_ops *ulp) int ret = 0; spin_lock(&tcp_ulp_list_lock); - if (tcp_ulp_find(ulp->name)) { - pr_notice("%s already registered or non-unique name\n", - ulp->name); + if (tcp_ulp_find(ulp->name)) ret = -EEXIST; - } else { + else list_add_tail_rcu(&ulp->list, &tcp_ulp_list); - } spin_unlock(&tcp_ulp_list_lock); return ret; @@ -124,6 +145,34 @@ int tcp_set_ulp(struct sock *sk, const char *name) if (!ulp_ops) return -ENOENT; + if (!ulp_ops->user_visible) { + module_put(ulp_ops->owner); + return -ENOENT; + } + + err = ulp_ops->init(sk); + if (err) { + module_put(ulp_ops->owner); + return err; + } + + icsk->icsk_ulp_ops = ulp_ops; + return 0; +} + +int tcp_set_ulp_id(struct sock *sk, int ulp) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + const struct tcp_ulp_ops *ulp_ops; + int err; + + if (icsk->icsk_ulp_ops) + return -EEXIST; + + ulp_ops = __tcp_ulp_lookup(ulp); + if (!ulp_ops) + return -ENOENT; + err = ulp_ops->init(sk); if (err) { module_put(ulp_ops->owner); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 736719c8314e83e0c0fca2dabed30a1535fbfdcb..b0d5fcea47e73488b355b479022f77cb6b97f1c8 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -484,6 +484,8 @@ static int tls_init(struct sock *sk) static struct tcp_ulp_ops tcp_tls_ulp_ops __read_mostly = { .name = "tls", + .uid = TCP_ULP_TLS, + .user_visible = true, .owner = THIS_MODULE, .init = tls_init, };