diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a17ae7b8521805847aa1c81920cafd863bc4a275..32a7c7e35b715debc9c8df324c9393381f2ae900 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -176,8 +176,6 @@ struct tcp_sock { * sum(delta(snd_una)), or how many bytes * were acked. */ - struct u64_stats_sync syncp; /* protects 64bit vars (cf tcp_get_info()) */ - u32 snd_una; /* First byte we want an ack for */ u32 snd_sml; /* Last byte of the most recently transmitted small packet */ u32 rcv_tstamp; /* timestamp of last received ACK (for keepalives) */ diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 3b34024202d8144d823abe1bce5cb2d34b24e9de..4dea33e5f29572e09c29621ee8eadc4e60a9a9a2 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -861,10 +861,11 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, struct netlink_callback *cb, const struct inet_diag_req_v2 *r, struct nlattr *bc) { + bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); struct net *net = sock_net(skb->sk); - int i, num, s_i, s_num; u32 idiag_states = r->idiag_states; - bool net_admin = netlink_net_capable(cb->skb, CAP_NET_ADMIN); + int i, num, s_i, s_num; + struct sock *sk; if (idiag_states & TCPF_SYN_RECV) idiag_states |= TCPF_NEW_SYN_RECV; @@ -877,7 +878,6 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, for (i = s_i; i < INET_LHTABLE_SIZE; i++) { struct inet_listen_hashbucket *ilb; - struct sock *sk; num = 0; ilb = &hashinfo->listening_hash[i]; @@ -922,13 +922,14 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, if (!(idiag_states & ~TCPF_LISTEN)) goto out; +#define SKARR_SZ 16 for (i = s_i; i <= hashinfo->ehash_mask; i++) { struct inet_ehash_bucket *head = &hashinfo->ehash[i]; spinlock_t *lock = inet_ehash_lockp(hashinfo, i); struct hlist_nulls_node *node; - struct sock *sk; - - num = 0; + struct sock *sk_arr[SKARR_SZ]; + int num_arr[SKARR_SZ]; + int idx, accum, res; if (hlist_nulls_empty(&head->chain)) continue; @@ -936,9 +937,12 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, if (i > s_i) s_num = 0; +next_chunk: + num = 0; + accum = 0; spin_lock_bh(lock); sk_nulls_for_each(sk, node, &head->chain) { - int state, res; + int state; if (!net_eq(sock_net(sk), net)) continue; @@ -962,21 +966,35 @@ void inet_diag_dump_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *skb, if (!inet_diag_bc_sk(bc, sk)) goto next_normal; - res = sk_diag_fill(sk, skb, r, + sock_hold(sk); + num_arr[accum] = num; + sk_arr[accum] = sk; + if (++accum == SKARR_SZ) + break; +next_normal: + ++num; + } + spin_unlock_bh(lock); + res = 0; + for (idx = 0; idx < accum; idx++) { + if (res >= 0) { + res = sk_diag_fill(sk_arr[idx], skb, r, sk_user_ns(NETLINK_CB(cb->skb).sk), NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh, net_admin); - if (res < 0) { - spin_unlock_bh(lock); - goto done; + if (res < 0) + num = num_arr[idx]; } -next_normal: - ++num; + sock_gen_put(sk_arr[idx]); } - - spin_unlock_bh(lock); + if (res < 0) + break; cond_resched(); + if (accum == SKARR_SZ) { + s_num = num + 1; + goto next_chunk; + } } done: diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3251fe71f39f2395befb0e662ca19423e6b9ea90..a7d54cbcdabbbc0838c00e05074ef15560665f2d 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -405,7 +405,6 @@ void tcp_init_sock(struct sock *sk) tp->snd_ssthresh = TCP_INFINITE_SSTHRESH; tp->snd_cwnd_clamp = ~0; tp->mss_cache = TCP_MSS_DEFAULT; - u64_stats_init(&tp->syncp); tp->reordering = sock_net(sk)->ipv4.sysctl_tcp_reordering; tcp_enable_early_retrans(tp); @@ -2710,9 +2709,8 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) const struct tcp_sock *tp = tcp_sk(sk); /* iff sk_type == SOCK_STREAM */ const struct inet_connection_sock *icsk = inet_csk(sk); u32 now = tcp_time_stamp, intv; - unsigned int start; - int notsent_bytes; u64 rate64; + bool slow; u32 rate; memset(info, 0, sizeof(*info)); @@ -2721,6 +2719,27 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_state = sk_state_load(sk); + /* Report meaningful fields for all TCP states, including listeners */ + rate = READ_ONCE(sk->sk_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_pacing_rate); + + rate = READ_ONCE(sk->sk_max_pacing_rate); + rate64 = rate != ~0U ? rate : ~0ULL; + put_unaligned(rate64, &info->tcpi_max_pacing_rate); + + info->tcpi_reordering = tp->reordering; + info->tcpi_snd_cwnd = tp->snd_cwnd; + + if (info->tcpi_state == TCP_LISTEN) { + /* listeners aliased fields : + * tcpi_unacked -> Number of children ready for accept() + * tcpi_sacked -> max backlog + */ + info->tcpi_unacked = sk->sk_ack_backlog; + info->tcpi_sacked = sk->sk_max_ack_backlog; + return; + } info->tcpi_ca_state = icsk->icsk_ca_state; info->tcpi_retransmits = icsk->icsk_retransmits; info->tcpi_probes = icsk->icsk_probes_out; @@ -2748,13 +2767,9 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_snd_mss = tp->mss_cache; info->tcpi_rcv_mss = icsk->icsk_ack.rcv_mss; - if (info->tcpi_state == TCP_LISTEN) { - info->tcpi_unacked = sk->sk_ack_backlog; - info->tcpi_sacked = sk->sk_max_ack_backlog; - } else { - info->tcpi_unacked = tp->packets_out; - info->tcpi_sacked = tp->sacked_out; - } + info->tcpi_unacked = tp->packets_out; + info->tcpi_sacked = tp->sacked_out; + info->tcpi_lost = tp->lost_out; info->tcpi_retrans = tp->retrans_out; info->tcpi_fackets = tp->fackets_out; @@ -2768,34 +2783,24 @@ void tcp_get_info(struct sock *sk, struct tcp_info *info) info->tcpi_rtt = tp->srtt_us >> 3; info->tcpi_rttvar = tp->mdev_us >> 2; info->tcpi_snd_ssthresh = tp->snd_ssthresh; - info->tcpi_snd_cwnd = tp->snd_cwnd; info->tcpi_advmss = tp->advmss; - info->tcpi_reordering = tp->reordering; info->tcpi_rcv_rtt = jiffies_to_usecs(tp->rcv_rtt_est.rtt)>>3; info->tcpi_rcv_space = tp->rcvq_space.space; info->tcpi_total_retrans = tp->total_retrans; - rate = READ_ONCE(sk->sk_pacing_rate); - rate64 = rate != ~0U ? rate : ~0ULL; - put_unaligned(rate64, &info->tcpi_pacing_rate); + slow = lock_sock_fast(sk); - rate = READ_ONCE(sk->sk_max_pacing_rate); - rate64 = rate != ~0U ? rate : ~0ULL; - put_unaligned(rate64, &info->tcpi_max_pacing_rate); + put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); + put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); + info->tcpi_notsent_bytes = max_t(int, 0, tp->write_seq - tp->snd_nxt); + + unlock_sock_fast(sk, slow); - do { - start = u64_stats_fetch_begin_irq(&tp->syncp); - put_unaligned(tp->bytes_acked, &info->tcpi_bytes_acked); - put_unaligned(tp->bytes_received, &info->tcpi_bytes_received); - } while (u64_stats_fetch_retry_irq(&tp->syncp, start)); info->tcpi_segs_out = tp->segs_out; info->tcpi_segs_in = tp->segs_in; - notsent_bytes = READ_ONCE(tp->write_seq) - READ_ONCE(tp->snd_nxt); - info->tcpi_notsent_bytes = max(0, notsent_bytes); - info->tcpi_min_rtt = tcp_min_rtt(tp); info->tcpi_data_segs_in = tp->data_segs_in; info->tcpi_data_segs_out = tp->data_segs_out; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index f2c59c8e57ff195c803ac05e6897242a6b097d00..a70046fea0e89a7aff274b2702bd3b0b4d3853a9 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -3351,9 +3351,7 @@ static void tcp_snd_una_update(struct tcp_sock *tp, u32 ack) u32 delta = ack - tp->snd_una; sock_owned_by_me((struct sock *)tp); - u64_stats_update_begin_raw(&tp->syncp); tp->bytes_acked += delta; - u64_stats_update_end_raw(&tp->syncp); tp->snd_una = ack; } @@ -3363,9 +3361,7 @@ static void tcp_rcv_nxt_update(struct tcp_sock *tp, u32 seq) u32 delta = seq - tp->rcv_nxt; sock_owned_by_me((struct sock *)tp); - u64_stats_update_begin_raw(&tp->syncp); tp->bytes_received += delta; - u64_stats_update_end_raw(&tp->syncp); tp->rcv_nxt = seq; }