未验证 提交 6dc08b44 编写于 作者: O openeuler-ci-bot 提交者: Gitee

!1131 [sync] PR-1081: some backport bugfix for sockmap

Merge Pull Request from: @openeuler-sync-bot 
 

Origin pull request: 
https://gitee.com/openeuler/kernel/pulls/1081 
 
PR sync from:  Liu Jian <liujian56@huawei.com>
 https://mailweb.openeuler.org/hyperkitty/list/kernel@openeuler.org/thread/24ZK7TI2Q55BY7U53AB2LYQUFTABZR4L/ 
some backport bugfix for sockmap

Cong Wang (1):
  bpf, sock_map: Move cancel_work_sync() out of sock lock

Eric Dumazet (1):
  net: deal with most data-races in sk_wait_event()

Jakub Sitnicki (2):
  bpf, sockmap: Check for any of tcp_bpf_prots when cloning a listener
  bpf, sockmap: Don't let sock_map_{close,destroy,unhash} call itself

Pengcheng Yang (3):
  bpf, sockmap: Fix repeated calls to sock_put() when msg has more_data
  bpf, sockmap: Fix missing BPF_F_INGRESS flag when using apply_bytes
  bpf, sockmap: Fix data loss caused by using apply_bytes on ingress
    redirect

Wang Yufen (1):
  bpf, sockmap: Fix the sk->sk_forward_alloc warning of
    sk_stream_kill_queues

zhangmingyi (1):
  bpf: fix bpf_tcp_ingress addr use after free


-- 
2.34.1
 
 
Link:https://gitee.com/openeuler/kernel/pulls/1131 

Reviewed-by: Yue Haibing <yuehaibing@huawei.com> 
Signed-off-by: Jialin Zhang <zhangjialin11@huawei.com> 
......@@ -94,6 +94,7 @@ struct sk_psock {
u32 apply_bytes;
u32 cork_bytes;
u32 eval;
bool redir_ingress; /* undefined if sk_redir is null */
struct sk_msg *cork;
struct sk_psock_progs progs;
struct sk_psock_parser parser;
......@@ -393,7 +394,7 @@ static inline void sk_psock_report_error(struct sk_psock *psock, int err)
}
struct sk_psock *sk_psock_init(struct sock *sk, int node);
void sk_psock_stop(struct sk_psock *psock, bool wait);
void sk_psock_stop(struct sk_psock *psock);
int sk_psock_init_strp(struct sock *sk, struct sk_psock *psock);
void sk_psock_start_strp(struct sock *sk, struct sk_psock *psock);
......
......@@ -38,4 +38,16 @@
*/
#define find_closest_descending(x, a, as) __find_closest(x, a, as, >=)
/**
* is_insidevar - check if the @ptr points inside the @var memory range.
* @ptr: the pointer to a memory address.
* @var: the variable which address and size identify the memory range.
*
* Evaluates to true if the address in @ptr lies within the memory
* range allocated to @var.
*/
#define is_insidevar(ptr, var) \
((uintptr_t)(ptr) >= (uintptr_t)(var) && \
(uintptr_t)(ptr) < (uintptr_t)(var) + sizeof(var))
#endif
......@@ -2238,8 +2238,8 @@ static inline void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
#endif /* CONFIG_BPF_STREAM_PARSER */
#ifdef CONFIG_NET_SOCK_MSG
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg, u32 bytes,
int flags);
int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
struct sk_msg *msg, u32 bytes, int flags);
int __tcp_bpf_recvmsg(struct sock *sk, struct sk_psock *psock,
struct msghdr *msg, int len, int flags);
#endif /* CONFIG_NET_SOCK_MSG */
......
......@@ -695,16 +695,13 @@ static void sk_psock_link_destroy(struct sk_psock *psock)
}
}
void sk_psock_stop(struct sk_psock *psock, bool wait)
void sk_psock_stop(struct sk_psock *psock)
{
spin_lock_bh(&psock->ingress_lock);
sk_psock_clear_state(psock, SK_PSOCK_TX_ENABLED);
sk_psock_cork_free(psock);
__sk_psock_zap_ingress(psock);
spin_unlock_bh(&psock->ingress_lock);
if (wait)
cancel_work_sync(&psock->work);
}
static void sk_psock_destroy_deferred(struct work_struct *gc)
......@@ -750,7 +747,7 @@ void sk_psock_drop(struct sock *sk, struct sk_psock *psock)
sk_psock_stop_verdict(sk, psock);
write_unlock_bh(&sk->sk_callback_lock);
sk_psock_stop(psock, false);
sk_psock_stop(psock);
call_rcu(&psock->rcu, sk_psock_destroy);
}
EXPORT_SYMBOL_GPL(sk_psock_drop);
......@@ -787,13 +784,16 @@ int sk_psock_msg_verdict(struct sock *sk, struct sk_psock *psock,
ret = sk_psock_map_verd(ret, msg->sk_redir);
psock->apply_bytes = msg->apply_bytes;
if (ret == __SK_REDIRECT) {
if (psock->sk_redir)
if (psock->sk_redir) {
sock_put(psock->sk_redir);
psock->sk_redir = msg->sk_redir;
if (!psock->sk_redir) {
psock->sk_redir = NULL;
}
if (!msg->sk_redir) {
ret = __SK_DROP;
goto out;
}
psock->redir_ingress = sk_msg_to_ingress(msg);
psock->sk_redir = msg->sk_redir;
sock_hold(psock->sk_redir);
}
out:
......
......@@ -1623,15 +1623,16 @@ void sock_map_unhash(struct sock *sk)
psock = sk_psock(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
if (sk->sk_prot->unhash)
sk->sk_prot->unhash(sk);
return;
saved_unhash = READ_ONCE(sk->sk_prot)->unhash;
} else {
saved_unhash = psock->saved_unhash;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
}
saved_unhash = psock->saved_unhash;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
saved_unhash(sk);
if (WARN_ON_ONCE(saved_unhash == sock_map_unhash))
return;
if (saved_unhash)
saved_unhash(sk);
}
void sock_map_destroy(struct sock *sk)
......@@ -1643,17 +1644,18 @@ void sock_map_destroy(struct sock *sk)
psock = sk_psock_get(sk);
if (unlikely(!psock)) {
rcu_read_unlock();
if (sk->sk_prot->destroy)
sk->sk_prot->destroy(sk);
return;
saved_destroy = READ_ONCE(sk->sk_prot)->destroy;
} else {
saved_destroy = psock->saved_destroy;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
sk_psock_stop(psock);
sk_psock_put(sk, psock);
}
saved_destroy = psock->saved_destroy;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
sk_psock_stop(psock, false);
sk_psock_put(sk, psock);
saved_destroy(sk);
if (WARN_ON_ONCE(saved_destroy == sock_map_destroy))
return;
if (saved_destroy)
saved_destroy(sk);
}
EXPORT_SYMBOL_GPL(sock_map_destroy);
......@@ -1668,15 +1670,21 @@ void sock_map_close(struct sock *sk, long timeout)
if (unlikely(!psock)) {
rcu_read_unlock();
release_sock(sk);
return sk->sk_prot->close(sk, timeout);
saved_close = READ_ONCE(sk->sk_prot)->close;
} else {
saved_close = psock->saved_close;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
sk_psock_stop(psock);
release_sock(sk);
cancel_work_sync(&psock->work);
sk_psock_put(sk, psock);
}
saved_close = psock->saved_close;
sock_map_remove_links(sk, psock);
rcu_read_unlock();
sk_psock_stop(psock, true);
sk_psock_put(sk, psock);
release_sock(sk);
/* Make sure we do not recurse. This is a bug.
* Leak the socket instead of crashing on a stack overflow.
*/
if (WARN_ON_ONCE(saved_close == sock_map_close))
return;
saved_close(sk, timeout);
}
......
......@@ -73,8 +73,8 @@ int sk_stream_wait_connect(struct sock *sk, long *timeo_p)
add_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending++;
done = sk_wait_event(sk, timeo_p,
!sk->sk_err &&
!((1 << sk->sk_state) &
!READ_ONCE(sk->sk_err) &&
!((1 << READ_ONCE(sk->sk_state)) &
~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)), &wait);
remove_wait_queue(sk_sleep(sk), &wait);
sk->sk_write_pending--;
......@@ -87,9 +87,9 @@ EXPORT_SYMBOL(sk_stream_wait_connect);
* sk_stream_closing - Return 1 if we still have things to send in our buffers.
* @sk: socket to verify
*/
static inline int sk_stream_closing(struct sock *sk)
static int sk_stream_closing(const struct sock *sk)
{
return (1 << sk->sk_state) &
return (1 << READ_ONCE(sk->sk_state)) &
(TCPF_FIN_WAIT1 | TCPF_CLOSING | TCPF_LAST_ACK);
}
......@@ -142,8 +142,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p)
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk->sk_write_pending++;
sk_wait_event(sk, &current_timeo, sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
sk_wait_event(sk, &current_timeo, READ_ONCE(sk->sk_err) ||
(READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
(sk_stream_memory_free(sk) &&
!vm_wait), &wait);
sk->sk_write_pending--;
......
......@@ -6,6 +6,7 @@
#include <linux/bpf.h>
#include <linux/init.h>
#include <linux/wait.h>
#include <linux/util_macros.h>
#include <net/inet_common.h>
#include <net/tls.h>
......@@ -126,15 +127,19 @@ static int bpf_tcp_ingress(struct sock *sk, struct sk_psock *psock,
tmp->sg.end = i;
if (apply) {
apply_bytes -= size;
if (!apply_bytes)
if (!apply_bytes) {
if (sge->length)
sk_msg_iter_var_prev(i);
break;
}
}
} while (i != msg->sg.end);
if (!ret) {
msg->sg.start = i;
sk_psock_queue_msg(psock, tmp);
atomic_add(tmp->sg.size, &sk->sk_rmem_alloc);
if (sk_psock_test_state(psock, SK_PSOCK_TX_ENABLED))
atomic_add(tmp->sg.size, &sk->sk_rmem_alloc);
sk_psock_data_ready(sk, psock);
} else {
sk_msg_free(sk, tmp);
......@@ -213,10 +218,9 @@ static int tcp_bpf_push_locked(struct sock *sk, struct sk_msg *msg,
return ret;
}
int tcp_bpf_sendmsg_redir(struct sock *sk, struct sk_msg *msg,
u32 bytes, int flags)
int tcp_bpf_sendmsg_redir(struct sock *sk, bool ingress,
struct sk_msg *msg, u32 bytes, int flags)
{
bool ingress = sk_msg_to_ingress(msg);
struct sk_psock *psock = sk_psock_get(sk);
int ret;
......@@ -260,7 +264,7 @@ static int tcp_bpf_wait_data(struct sock *sk, struct sk_psock *psock,
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
ret = sk_wait_event(sk, &timeo,
!list_empty(&psock->ingress_msg) ||
!skb_queue_empty(&sk->sk_receive_queue), &wait);
!skb_queue_empty_lockless(&sk->sk_receive_queue), &wait);
sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk);
remove_wait_queue(sk_sleep(sk), &wait);
return ret;
......@@ -383,10 +387,10 @@ static int tcp_bpf_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
struct sk_msg *msg, int *copied, int flags)
{
bool cork = false, enospc = sk_msg_full(msg);
bool cork = false, enospc = sk_msg_full(msg), redir_ingress;
struct sock *sk_redir;
u32 tosend, delta = 0;
u32 eval = __SK_NONE;
u32 tosend, origsize, sent, delta = 0;
u32 eval;
int ret;
more_data:
......@@ -417,6 +421,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
tosend = msg->sg.size;
if (psock->apply_bytes && psock->apply_bytes < tosend)
tosend = psock->apply_bytes;
eval = __SK_NONE;
switch (psock->eval) {
case __SK_PASS:
......@@ -428,6 +433,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
sk_msg_apply_bytes(psock, tosend);
break;
case __SK_REDIRECT:
redir_ingress = psock->redir_ingress;
sk_redir = psock->sk_redir;
sk_msg_apply_bytes(psock, tosend);
if (!psock->apply_bytes) {
......@@ -440,10 +446,13 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
cork = true;
psock->cork = NULL;
}
sk_msg_return(sk, msg, msg->sg.size);
sk_msg_return(sk, msg, tosend);
release_sock(sk);
ret = tcp_bpf_sendmsg_redir(sk_redir, msg, tosend, flags);
origsize = msg->sg.size;
ret = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress,
msg, tosend, flags);
sent = origsize - msg->sg.size;
if (eval == __SK_REDIRECT)
sock_put(sk_redir);
......@@ -482,7 +491,7 @@ static int tcp_bpf_send_verdict(struct sock *sk, struct sk_psock *psock,
msg->sg.data[msg->sg.start].page_link &&
msg->sg.data[msg->sg.start].length) {
if (eval == __SK_REDIRECT)
sk_mem_charge(sk, msg->sg.size);
sk_mem_charge(sk, tosend - sent);
goto more_data;
}
}
......@@ -719,10 +728,9 @@ struct proto *tcp_bpf_get_proto(struct sock *sk, struct sk_psock *psock)
*/
void tcp_bpf_clone(const struct sock *sk, struct sock *newsk)
{
int family = sk->sk_family == AF_INET6 ? TCP_BPF_IPV6 : TCP_BPF_IPV4;
struct proto *prot = newsk->sk_prot;
if (prot == &tcp_bpf_prots[family][TCP_BPF_BASE])
if (is_insidevar(prot, tcp_bpf_prots))
newsk->sk_prot = sk->sk_prot_creator;
}
#endif /* CONFIG_BPF_STREAM_PARSER */
......@@ -582,7 +582,8 @@ static int llc_ui_wait_for_disc(struct sock *sk, long timeout)
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
if (sk_wait_event(sk, &timeout, sk->sk_state == TCP_CLOSE, &wait))
if (sk_wait_event(sk, &timeout,
READ_ONCE(sk->sk_state) == TCP_CLOSE, &wait))
break;
rc = -ERESTARTSYS;
if (signal_pending(current))
......@@ -602,7 +603,8 @@ static bool llc_ui_wait_for_conn(struct sock *sk, long timeout)
add_wait_queue(sk_sleep(sk), &wait);
while (1) {
if (sk_wait_event(sk, &timeout, sk->sk_state != TCP_SYN_SENT, &wait))
if (sk_wait_event(sk, &timeout,
READ_ONCE(sk->sk_state) != TCP_SYN_SENT, &wait))
break;
if (signal_pending(current) || !timeout)
break;
......@@ -621,7 +623,7 @@ static int llc_ui_wait_for_busy_core(struct sock *sk, long timeout)
while (1) {
rc = 0;
if (sk_wait_event(sk, &timeout,
(sk->sk_shutdown & RCV_SHUTDOWN) ||
(READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN) ||
(!llc_data_accept_state(llc->state) &&
!llc->remote_busy_flag &&
!llc->p_flag), &wait))
......
......@@ -64,8 +64,8 @@ static void smc_close_stream_wait(struct smc_sock *smc, long timeout)
rc = sk_wait_event(sk, &timeout,
!smc_tx_prepared_sends(&smc->conn) ||
sk->sk_err == ECONNABORTED ||
sk->sk_err == ECONNRESET ||
READ_ONCE(sk->sk_err) == ECONNABORTED ||
READ_ONCE(sk->sk_err) == ECONNRESET ||
smc->conn.killed,
&wait);
if (rc)
......
......@@ -203,9 +203,9 @@ int smc_rx_wait(struct smc_sock *smc, long *timeo,
sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk);
add_wait_queue(sk_sleep(sk), &wait);
rc = sk_wait_event(sk, timeo,
sk->sk_err ||
READ_ONCE(sk->sk_err) ||
cflags->peer_conn_abort ||
sk->sk_shutdown & RCV_SHUTDOWN ||
READ_ONCE(sk->sk_shutdown) & RCV_SHUTDOWN ||
conn->killed ||
fcrit(conn),
&wait);
......
......@@ -110,8 +110,8 @@ static int smc_tx_wait(struct smc_sock *smc, int flags)
break; /* at least 1 byte of free & no urgent data */
set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
sk_wait_event(sk, &timeo,
sk->sk_err ||
(sk->sk_shutdown & SEND_SHUTDOWN) ||
READ_ONCE(sk->sk_err) ||
(READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) ||
smc_cdc_rxed_any_close(conn) ||
(atomic_read(&conn->sndbuf_space) &&
!conn->urg_tx_pend),
......
......@@ -300,9 +300,9 @@ static void tsk_rej_rx_queue(struct sock *sk, int error)
tipc_sk_respond(sk, skb, error);
}
static bool tipc_sk_connected(struct sock *sk)
static bool tipc_sk_connected(const struct sock *sk)
{
return sk->sk_state == TIPC_ESTABLISHED;
return READ_ONCE(sk->sk_state) == TIPC_ESTABLISHED;
}
/* tipc_sk_type_connectionless - check if the socket is datagram socket
......
......@@ -92,7 +92,8 @@ int wait_on_pending_writer(struct sock *sk, long *timeo)
break;
}
if (sk_wait_event(sk, timeo, !sk->sk_write_pending, &wait))
if (sk_wait_event(sk, timeo,
!READ_ONCE(sk->sk_write_pending), &wait))
break;
}
remove_wait_queue(sk_sleep(sk), &wait);
......
......@@ -807,7 +807,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
struct sk_psock *psock;
struct sock *sk_redir;
struct tls_rec *rec;
bool enospc, policy;
bool enospc, policy, redir_ingress;
int err = 0, send;
u32 delta = 0;
......@@ -852,6 +852,7 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
}
break;
case __SK_REDIRECT:
redir_ingress = psock->redir_ingress;
sk_redir = psock->sk_redir;
memcpy(&msg_redir, msg, sizeof(*msg));
if (msg->apply_bytes < send)
......@@ -861,7 +862,8 @@ static int bpf_exec_tx_verdict(struct sk_msg *msg, struct sock *sk,
sk_msg_return_zero(sk, msg, send);
msg->sg.size -= send;
release_sock(sk);
err = tcp_bpf_sendmsg_redir(sk_redir, &msg_redir, send, flags);
err = tcp_bpf_sendmsg_redir(sk_redir, redir_ingress,
&msg_redir, send, flags);
lock_sock(sk);
if (err < 0) {
*copied -= sk_msg_free_nocharge(sk, &msg_redir);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册