提交 90b2be27 编写于 作者: E Eric Dumazet 提交者: David S. Miller

net/sched: annotate lockless accesses to qdisc->empty

KCSAN reported the following race [1]

BUG: KCSAN: data-race in __dev_queue_xmit / net_tx_action

read to 0xffff8880ba403508 of 1 bytes by task 21814 on cpu 1:
 __dev_xmit_skb net/core/dev.c:3389 [inline]
 __dev_queue_xmit+0x9db/0x1b40 net/core/dev.c:3761
 dev_queue_xmit+0x21/0x30 net/core/dev.c:3825
 neigh_hh_output include/net/neighbour.h:500 [inline]
 neigh_output include/net/neighbour.h:509 [inline]
 ip6_finish_output2+0x873/0xec0 net/ipv6/ip6_output.c:116
 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline]
 __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127
 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152
 NF_HOOK_COND include/linux/netfilter.h:294 [inline]
 ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175
 dst_output include/net/dst.h:436 [inline]
 ip6_local_out+0x74/0x90 net/ipv6/output_core.c:179
 ip6_send_skb+0x53/0x110 net/ipv6/ip6_output.c:1795
 udp_v6_send_skb.isra.0+0x3ec/0xa70 net/ipv6/udp.c:1173
 udpv6_sendmsg+0x1906/0x1c20 net/ipv6/udp.c:1471
 inet6_sendmsg+0x6d/0x90 net/ipv6/af_inet6.c:576
 sock_sendmsg_nosec net/socket.c:637 [inline]
 sock_sendmsg+0x9f/0xc0 net/socket.c:657
 ___sys_sendmsg+0x2b7/0x5d0 net/socket.c:2311
 __sys_sendmmsg+0x123/0x350 net/socket.c:2413
 __do_sys_sendmmsg net/socket.c:2442 [inline]
 __se_sys_sendmmsg net/socket.c:2439 [inline]
 __x64_sys_sendmmsg+0x64/0x80 net/socket.c:2439
 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

write to 0xffff8880ba403508 of 1 bytes by interrupt on cpu 0:
 qdisc_run_begin include/net/sch_generic.h:160 [inline]
 qdisc_run include/net/pkt_sched.h:120 [inline]
 net_tx_action+0x2b1/0x6c0 net/core/dev.c:4551
 __do_softirq+0x115/0x33f kernel/softirq.c:292
 do_softirq_own_stack+0x2a/0x40 arch/x86/entry/entry_64.S:1082
 do_softirq.part.0+0x6b/0x80 kernel/softirq.c:337
 do_softirq kernel/softirq.c:329 [inline]
 __local_bh_enable_ip+0x76/0x80 kernel/softirq.c:189
 local_bh_enable include/linux/bottom_half.h:32 [inline]
 rcu_read_unlock_bh include/linux/rcupdate.h:688 [inline]
 ip6_finish_output2+0x7bb/0xec0 net/ipv6/ip6_output.c:117
 __ip6_finish_output net/ipv6/ip6_output.c:142 [inline]
 __ip6_finish_output+0x2d7/0x330 net/ipv6/ip6_output.c:127
 ip6_finish_output+0x41/0x160 net/ipv6/ip6_output.c:152
 NF_HOOK_COND include/linux/netfilter.h:294 [inline]
 ip6_output+0xf2/0x280 net/ipv6/ip6_output.c:175
 dst_output include/net/dst.h:436 [inline]
 ip6_local_out+0x74/0x90 net/ipv6/output_core.c:179
 ip6_send_skb+0x53/0x110 net/ipv6/ip6_output.c:1795
 udp_v6_send_skb.isra.0+0x3ec/0xa70 net/ipv6/udp.c:1173
 udpv6_sendmsg+0x1906/0x1c20 net/ipv6/udp.c:1471
 inet6_sendmsg+0x6d/0x90 net/ipv6/af_inet6.c:576
 sock_sendmsg_nosec net/socket.c:637 [inline]
 sock_sendmsg+0x9f/0xc0 net/socket.c:657
 ___sys_sendmsg+0x2b7/0x5d0 net/socket.c:2311
 __sys_sendmmsg+0x123/0x350 net/socket.c:2413
 __do_sys_sendmmsg net/socket.c:2442 [inline]
 __se_sys_sendmmsg net/socket.c:2439 [inline]
 __x64_sys_sendmmsg+0x64/0x80 net/socket.c:2439
 do_syscall_64+0xcc/0x370 arch/x86/entry/common.c:290
 entry_SYSCALL_64_after_hwframe+0x44/0xa9

Reported by Kernel Concurrency Sanitizer on:
CPU: 0 PID: 21817 Comm: syz-executor.2 Not tainted 5.4.0-rc6+ #0
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011

Fixes: d518d2ed ("net/sched: fix race between deactivation and dequeue for NOLOCK qdisc")
Signed-off-by: NEric Dumazet <edumazet@google.com>
Reported-by: Nsyzbot <syzkaller@googlegroups.com>
Cc: Paolo Abeni <pabeni@redhat.com>
Cc: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 97c20ea8
...@@ -148,8 +148,8 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q) ...@@ -148,8 +148,8 @@ static inline bool qdisc_is_percpu_stats(const struct Qdisc *q)
static inline bool qdisc_is_empty(const struct Qdisc *qdisc) static inline bool qdisc_is_empty(const struct Qdisc *qdisc)
{ {
if (qdisc_is_percpu_stats(qdisc)) if (qdisc_is_percpu_stats(qdisc))
return qdisc->empty; return READ_ONCE(qdisc->empty);
return !qdisc->q.qlen; return !READ_ONCE(qdisc->q.qlen);
} }
static inline bool qdisc_run_begin(struct Qdisc *qdisc) static inline bool qdisc_run_begin(struct Qdisc *qdisc)
...@@ -157,7 +157,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) ...@@ -157,7 +157,7 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc)
if (qdisc->flags & TCQ_F_NOLOCK) { if (qdisc->flags & TCQ_F_NOLOCK) {
if (!spin_trylock(&qdisc->seqlock)) if (!spin_trylock(&qdisc->seqlock))
return false; return false;
qdisc->empty = false; WRITE_ONCE(qdisc->empty, false);
} else if (qdisc_is_running(qdisc)) { } else if (qdisc_is_running(qdisc)) {
return false; return false;
} }
......
...@@ -3607,7 +3607,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q, ...@@ -3607,7 +3607,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
qdisc_calculate_pkt_len(skb, q); qdisc_calculate_pkt_len(skb, q);
if (q->flags & TCQ_F_NOLOCK) { if (q->flags & TCQ_F_NOLOCK) {
if ((q->flags & TCQ_F_CAN_BYPASS) && q->empty && if ((q->flags & TCQ_F_CAN_BYPASS) && READ_ONCE(q->empty) &&
qdisc_run_begin(q)) { qdisc_run_begin(q)) {
if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED,
&q->state))) { &q->state))) {
......
...@@ -652,7 +652,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) ...@@ -652,7 +652,7 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
if (likely(skb)) { if (likely(skb)) {
qdisc_update_stats_at_dequeue(qdisc, skb); qdisc_update_stats_at_dequeue(qdisc, skb);
} else { } else {
qdisc->empty = true; WRITE_ONCE(qdisc->empty, true);
} }
return skb; return skb;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册