From 84882cf72cd774cf16fd338bdbf00f69ac9f9194 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 1 Nov 2021 22:26:08 -0700 Subject: [PATCH] Revert "net: avoid double accounting for pure zerocopy skbs" This reverts commit f1a456f8f3fc5828d8abcad941860380ae147b1d. WARNING: CPU: 1 PID: 6819 at net/core/skbuff.c:5429 skb_try_coalesce+0x78b/0x7e0 CPU: 1 PID: 6819 Comm: xxxxxxx Kdump: loaded Tainted: G S 5.15.0-04194-gd852503f7711 #16 RIP: 0010:skb_try_coalesce+0x78b/0x7e0 Code: e8 2a bf 41 ff 44 8b b3 bc 00 00 00 48 8b 7c 24 30 e8 19 c0 41 ff 44 89 f0 48 03 83 c0 00 00 00 48 89 44 24 40 e9 47 fb ff ff <0f> 0b e9 ca fc ff ff 4c 8d 70 ff 48 83 c0 07 48 89 44 24 38 e9 61 RSP: 0018:ffff88881f449688 EFLAGS: 00010282 RAX: 00000000fffffe96 RBX: ffff8881566e4460 RCX: ffffffff82079f7e RDX: 0000000000000003 RSI: dffffc0000000000 RDI: ffff8881566e47b0 RBP: ffff8881566e46e0 R08: ffffed102619235d R09: ffffed102619235d R10: ffff888130c91ae3 R11: ffffed102619235c R12: ffff88881f4498a0 R13: 0000000000000056 R14: 0000000000000009 R15: ffff888130c91ac0 FS: 00007fec2cbb9700(0000) GS:ffff88881f440000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007fec1b060d80 CR3: 00000003acf94005 CR4: 00000000003706e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: tcp_try_coalesce+0xeb/0x290 ? tcp_parse_options+0x610/0x610 ? mark_held_locks+0x79/0xa0 tcp_queue_rcv+0x69/0x2f0 tcp_rcv_established+0xa49/0xd40 ? tcp_data_queue+0x18a0/0x18a0 tcp_v6_do_rcv+0x1c9/0x880 ? rt6_mtu_change_route+0x100/0x100 tcp_v6_rcv+0x1624/0x1830 Signed-off-by: Jakub Kicinski --- include/linux/skbuff.h | 19 +------------------ include/net/tcp.h | 8 ++------ net/core/datagram.c | 3 +-- net/core/skbuff.c | 3 +-- net/ipv4/tcp.c | 22 ++-------------------- net/ipv4/tcp_output.c | 7 ++----- 6 files changed, 9 insertions(+), 53 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 10869906cc57..0bd6520329f6 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -454,15 +454,9 @@ enum { * all frags to avoid possible bad checksum */ SKBFL_SHARED_FRAG = BIT(1), - - /* segment contains only zerocopy data and should not be - * charged to the kernel memory. - */ - SKBFL_PURE_ZEROCOPY = BIT(2), }; #define SKBFL_ZEROCOPY_FRAG (SKBFL_ZEROCOPY_ENABLE | SKBFL_SHARED_FRAG) -#define SKBFL_ALL_ZEROCOPY (SKBFL_ZEROCOPY_FRAG | SKBFL_PURE_ZEROCOPY) /* * The callback notifies userspace to release buffers when skb DMA is done in @@ -1470,17 +1464,6 @@ static inline struct ubuf_info *skb_zcopy(struct sk_buff *skb) return is_zcopy ? skb_uarg(skb) : NULL; } -static inline bool skb_zcopy_pure(const struct sk_buff *skb) -{ - return skb_shinfo(skb)->flags & SKBFL_PURE_ZEROCOPY; -} - -static inline bool skb_pure_zcopy_same(const struct sk_buff *skb1, - const struct sk_buff *skb2) -{ - return skb_zcopy_pure(skb1) == skb_zcopy_pure(skb2); -} - static inline void net_zcopy_get(struct ubuf_info *uarg) { refcount_inc(&uarg->refcnt); @@ -1545,7 +1528,7 @@ static inline void skb_zcopy_clear(struct sk_buff *skb, bool zerocopy_success) if (!skb_zcopy_is_nouarg(skb)) uarg->callback(skb, uarg, zerocopy_success); - skb_shinfo(skb)->flags &= ~SKBFL_ALL_ZEROCOPY; + skb_shinfo(skb)->flags &= ~SKBFL_ZEROCOPY_FRAG; } } diff --git a/include/net/tcp.h b/include/net/tcp.h index af91f370432e..70972f3ac8fa 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -293,10 +293,7 @@ static inline bool tcp_out_of_memory(struct sock *sk) static inline void tcp_wmem_free_skb(struct sock *sk, struct sk_buff *skb) { sk_wmem_queued_add(sk, -skb->truesize); - if (!skb_zcopy_pure(skb)) - sk_mem_uncharge(sk, skb->truesize); - else - sk_mem_uncharge(sk, SKB_TRUESIZE(MAX_TCP_HEADER)); + sk_mem_uncharge(sk, skb->truesize); __kfree_skb(skb); } @@ -977,8 +974,7 @@ static inline bool tcp_skb_can_collapse(const struct sk_buff *to, const struct sk_buff *from) { return likely(tcp_skb_can_collapse_to(to) && - mptcp_skb_can_collapse(to, from) && - skb_pure_zcopy_same(to, from)); + mptcp_skb_can_collapse(to, from)); } /* Events passed to congestion control interface */ diff --git a/net/core/datagram.c b/net/core/datagram.c index ee290776c661..15ab9ffb27fe 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -646,8 +646,7 @@ int __zerocopy_sg_from_iter(struct sock *sk, struct sk_buff *skb, skb->truesize += truesize; if (sk && sk->sk_type == SOCK_STREAM) { sk_wmem_queued_add(sk, truesize); - if (!skb_zcopy_pure(skb)) - sk_mem_charge(sk, truesize); + sk_mem_charge(sk, truesize); } else { refcount_add(truesize, &skb->sk->sk_wmem_alloc); } diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 29e617d8d7fb..67a9188d8a49 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3433,9 +3433,8 @@ static inline void skb_split_no_header(struct sk_buff *skb, void skb_split(struct sk_buff *skb, struct sk_buff *skb1, const u32 len) { int pos = skb_headlen(skb); - const int zc_flags = SKBFL_SHARED_FRAG | SKBFL_PURE_ZEROCOPY; - skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & zc_flags; + skb_shinfo(skb1)->flags |= skb_shinfo(skb)->flags & SKBFL_SHARED_FRAG; skb_zerocopy_clone(skb1, skb, 0); if (len < pos) /* Split line is inside header. */ skb_split_inside_header(skb, skb1, len, pos); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2561c14a6e63..bc7f419184aa 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -863,7 +863,6 @@ struct sk_buff *tcp_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp, if (likely(skb)) { bool mem_scheduled; - skb->truesize = SKB_TRUESIZE(size + MAX_TCP_HEADER); if (force_schedule) { mem_scheduled = true; sk_forced_mem_schedule(sk, skb->truesize); @@ -1320,15 +1319,6 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) copy = min_t(int, copy, pfrag->size - pfrag->offset); - /* skb changing from pure zc to mixed, must charge zc */ - if (unlikely(skb_zcopy_pure(skb))) { - if (!sk_wmem_schedule(sk, skb->data_len)) - goto wait_for_space; - - sk_mem_charge(sk, skb->data_len); - skb_shinfo(skb)->flags &= ~SKBFL_PURE_ZEROCOPY; - } - if (!sk_wmem_schedule(sk, copy)) goto wait_for_space; @@ -1349,16 +1339,8 @@ int tcp_sendmsg_locked(struct sock *sk, struct msghdr *msg, size_t size) } pfrag->offset += copy; } else { - /* First append to a fragless skb builds initial - * pure zerocopy skb - */ - if (!skb->len) - skb_shinfo(skb)->flags |= SKBFL_PURE_ZEROCOPY; - - if (!skb_zcopy_pure(skb)) { - if (!sk_wmem_schedule(sk, copy)) - goto wait_for_space; - } + if (!sk_wmem_schedule(sk, copy)) + goto wait_for_space; err = skb_zerocopy_iter_stream(sk, skb, msg, copy, uarg); if (err == -EMSGSIZE || err == -EEXIST) { diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 287b57aadc37..6fbbf1558033 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1677,8 +1677,7 @@ int tcp_trim_head(struct sock *sk, struct sk_buff *skb, u32 len) if (delta_truesize) { skb->truesize -= delta_truesize; sk_wmem_queued_add(sk, -delta_truesize); - if (!skb_zcopy_pure(skb)) - sk_mem_uncharge(sk, delta_truesize); + sk_mem_uncharge(sk, delta_truesize); } /* Any change of skb->len requires recalculation of tso factor. */ @@ -2296,9 +2295,7 @@ static bool tcp_can_coalesce_send_queue_head(struct sock *sk, int len) if (len <= skb->len) break; - if (unlikely(TCP_SKB_CB(skb)->eor) || - tcp_has_tx_tstamp(skb) || - !skb_pure_zcopy_same(skb, next)) + if (unlikely(TCP_SKB_CB(skb)->eor) || tcp_has_tx_tstamp(skb)) return false; len -= skb->len; -- GitLab