diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 12aac0fd6ee7cd9d5963d69a606ebbc5b9981b26..909972aa3acd7e4e3d865800f45056235efc7bef 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -13,6 +13,7 @@ struct netns_frags { int timeout; int high_thresh; int low_thresh; + int max_dist; }; /** diff --git a/include/net/ip.h b/include/net/ip.h index 1a98f1ca16383d2b9cd0867091e323ab7c81f045..cbb134b2f0e4ff2ae81aa6e45a351a2246605786 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -245,12 +245,6 @@ extern int inet_peer_threshold; extern int inet_peer_minttl; extern int inet_peer_maxttl; -/* From ip_input.c */ -extern int sysctl_ip_early_demux; - -/* From ip_output.c */ -extern int sysctl_ip_dynaddr; - void ipfrag_init(void); void ip_static_sysctl_init(void); diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 848fe80565343dfd864363afd910683e66595f92..a69cde3ce4608879ee3e4056d22ce512eb0c3ba5 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -80,9 +80,13 @@ struct netns_ipv4 { int sysctl_tcp_ecn; int sysctl_tcp_ecn_fallback; + int sysctl_ip_default_ttl; int sysctl_ip_no_pmtu_disc; int sysctl_ip_fwd_use_pmtu; int sysctl_ip_nonlocal_bind; + /* Shall we try to damage output packets if routing dev changes? */ + int sysctl_ip_dynaddr; + int sysctl_ip_early_demux; int sysctl_fwmark_reflect; int sysctl_tcp_fwmark_accept; diff --git a/include/net/route.h b/include/net/route.h index a3b9ef74a3895dbce95b0eba9b4bb298f7be9d71..9b0a523bb4280023e4429fcc5379cb5d2d2d419a 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -329,14 +329,13 @@ static inline int inet_iif(const struct sk_buff *skb) return skb->skb_iif; } -extern int sysctl_ip_default_ttl; - static inline int ip4_dst_hoplimit(const struct dst_entry *dst) { int hoplimit = dst_metric_raw(dst, RTAX_HOPLIMIT); + struct net *net = dev_net(dst->dev); if (hoplimit == 0) - hoplimit = sysctl_ip_default_ttl; + hoplimit = net->ipv4.sysctl_ip_default_ttl; return hoplimit; } diff --git a/net/bridge/netfilter/nft_reject_bridge.c b/net/bridge/netfilter/nft_reject_bridge.c index fdba3d9fbff3bac00b8acc68d1eaeda2e492e820..adc8d7221dbbc135fa3506a713c70c1130c51ad6 100644 --- a/net/bridge/netfilter/nft_reject_bridge.c +++ b/net/bridge/netfilter/nft_reject_bridge.c @@ -48,6 +48,7 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, struct iphdr *niph; const struct tcphdr *oth; struct tcphdr _oth; + struct net *net = sock_net(oldskb->sk); if (!nft_bridge_iphdr_validate(oldskb)) return; @@ -63,9 +64,9 @@ static void nft_reject_br_send_v4_tcp_reset(struct sk_buff *oldskb, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_TCP, - sysctl_ip_default_ttl); + net->ipv4.sysctl_ip_default_ttl); nf_reject_ip_tcphdr_put(nskb, oldskb, oth); - niph->ttl = sysctl_ip_default_ttl; + niph->ttl = net->ipv4.sysctl_ip_default_ttl; niph->tot_len = htons(nskb->len); ip_send_check(niph); @@ -85,6 +86,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, void *payload; __wsum csum; u8 proto; + struct net *net = sock_net(oldskb->sk); if (oldskb->csum_bad || !nft_bridge_iphdr_validate(oldskb)) return; @@ -119,7 +121,7 @@ static void nft_reject_br_send_v4_unreach(struct sk_buff *oldskb, skb_reserve(nskb, LL_MAX_HEADER); niph = nf_reject_iphdr_put(nskb, oldskb, IPPROTO_ICMP, - sysctl_ip_default_ttl); + net->ipv4.sysctl_ip_default_ttl); skb_reset_transport_header(nskb); icmph = (struct icmphdr *)skb_put(nskb, sizeof(struct icmphdr)); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index eade66db214e99b42a732c5c8aa8c3744984a06c..209d1ed2895444bfd90d2eb12f6942d984be40d5 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1095,12 +1095,6 @@ void inet_unregister_protosw(struct inet_protosw *p) } EXPORT_SYMBOL(inet_unregister_protosw); -/* - * Shall we try to damage output packets if routing dev changes? - */ - -int sysctl_ip_dynaddr __read_mostly; - static int inet_sk_reselect_saddr(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); @@ -1131,7 +1125,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) if (new_saddr == old_saddr) return 0; - if (sysctl_ip_dynaddr > 1) { + if (sock_net(sk)->ipv4.sysctl_ip_dynaddr > 1) { pr_info("%s(): shifting inet->saddr from %pI4 to %pI4\n", __func__, &old_saddr, &new_saddr); } @@ -1186,7 +1180,7 @@ int inet_sk_rebuild_header(struct sock *sk) * Other protocols have to map its equivalent state to TCP_SYN_SENT. * DCCP maps its DCCP_REQUESTING state to TCP_SYN_SENT. -acme */ - if (!sysctl_ip_dynaddr || + if (!sock_net(sk)->ipv4.sysctl_ip_dynaddr || sk->sk_state != TCP_SYN_SENT || (sk->sk_userlocks & SOCK_BINDADDR_LOCK) || (err = inet_sk_reselect_saddr(sk)) != 0) diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 7c95335bf85eee105dfe65bfcf9a8448dc83305f..2aea9f1a2a31bfe624d5bb1c56effe70e97fa88a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1224,7 +1224,9 @@ static void igmp_group_dropped(struct ip_mc_list *im) static void igmp_group_added(struct ip_mc_list *im) { struct in_device *in_dev = im->interface; +#ifdef CONFIG_IP_MULTICAST struct net *net = dev_net(in_dev->dev); +#endif if (im->loaded == 0) { im->loaded = 1; @@ -1316,7 +1318,9 @@ static void ip_mc_hash_remove(struct in_device *in_dev, void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) { struct ip_mc_list *im; +#ifdef CONFIG_IP_MULTICAST struct net *net = dev_net(in_dev->dev); +#endif ASSERT_RTNL(); @@ -1643,7 +1647,9 @@ void ip_mc_down(struct in_device *in_dev) void ip_mc_init_dev(struct in_device *in_dev) { +#ifdef CONFIG_IP_MULTICAST struct net *net = dev_net(in_dev->dev); +#endif ASSERT_RTNL(); #ifdef CONFIG_IP_MULTICAST @@ -1662,7 +1668,9 @@ void ip_mc_init_dev(struct in_device *in_dev) void ip_mc_up(struct in_device *in_dev) { struct ip_mc_list *pmc; +#ifdef CONFIG_IP_MULTICAST struct net *net = dev_net(in_dev->dev); +#endif ASSERT_RTNL(); @@ -2923,6 +2931,12 @@ static int __net_init igmp_net_init(struct net *net) goto out_sock; } + /* Sysctl initialization */ + net->ipv4.sysctl_igmp_max_memberships = 20; + net->ipv4.sysctl_igmp_max_msf = 10; + /* IGMP reports for link-local multicast groups are enabled by default */ + net->ipv4.sysctl_igmp_llm_reports = 1; + net->ipv4.sysctl_igmp_qrv = 2; return 0; out_sock: diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 187c6fcc3027779ba31d0721eb4a7389addaaf8a..efbd47d1a53155496e236eb7d4d0c893ef533d1c 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -54,8 +54,6 @@ * code now. If you change something here, _PLEASE_ update ipv6/reassembly.c * as well. Or notify me, at least. --ANK */ - -static int sysctl_ipfrag_max_dist __read_mostly = 64; static const char ip_frag_cache_name[] = "ip4-frags"; struct ipfrag_skb_cb @@ -150,7 +148,7 @@ static void ip4_frag_init(struct inet_frag_queue *q, const void *a) qp->daddr = arg->iph->daddr; qp->vif = arg->vif; qp->user = arg->user; - qp->peer = sysctl_ipfrag_max_dist ? + qp->peer = q->net->max_dist ? inet_getpeer_v4(net->ipv4.peers, arg->iph->saddr, arg->vif, 1) : NULL; } @@ -275,7 +273,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph, static int ip_frag_too_far(struct ipq *qp) { struct inet_peer *peer = qp->peer; - unsigned int max = sysctl_ipfrag_max_dist; + unsigned int max = qp->q.net->max_dist; unsigned int start, end; int rc; @@ -749,6 +747,14 @@ static struct ctl_table ip4_frags_ns_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, + { + .procname = "ipfrag_max_dist", + .data = &init_net.ipv4.frags.max_dist, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero + }, { } }; @@ -762,14 +768,6 @@ static struct ctl_table ip4_frags_ctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - { - .procname = "ipfrag_max_dist", - .data = &sysctl_ipfrag_max_dist, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &zero - }, { } }; @@ -790,10 +788,7 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net) table[1].data = &net->ipv4.frags.low_thresh; table[1].extra2 = &net->ipv4.frags.high_thresh; table[2].data = &net->ipv4.frags.timeout; - - /* Don't export sysctls to unprivileged users */ - if (net->user_ns != &init_user_ns) - table[0].procname = NULL; + table[3].data = &net->ipv4.frags.max_dist; } hdr = register_net_sysctl(net, "net/ipv4", table); @@ -865,6 +860,8 @@ static int __net_init ipv4_frags_init_net(struct net *net) */ net->ipv4.frags.timeout = IP_FRAG_TIME; + net->ipv4.frags.max_dist = 64; + res = inet_frags_init_net(&net->ipv4.frags); if (res) return res; diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 852002f64c68455474123f5bb66abb0bd5583e41..e3d782746d9db79b0ad03d1059316bebb73207a4 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -308,15 +308,12 @@ static inline bool ip_rcv_options(struct sk_buff *skb) return true; } -int sysctl_ip_early_demux __read_mostly = 1; -EXPORT_SYMBOL(sysctl_ip_early_demux); - static int ip_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { const struct iphdr *iph = ip_hdr(skb); struct rtable *rt; - if (sysctl_ip_early_demux && + if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && !skb->sk && !ip_is_fragment(iph)) { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 64878efa045c132d1ce511c326c2da04fc77a895..f734c42acdaf960e7bfdbae050fb64f4108b66d6 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -79,9 +79,6 @@ #include #include -int sysctl_ip_default_ttl __read_mostly = IPDEFTTL; -EXPORT_SYMBOL(sysctl_ip_default_ttl); - static int ip_fragment(struct net *net, struct sock *sk, struct sk_buff *skb, unsigned int mtu, diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 92808f147ef5bafa61ce538ec5cb99df8945e45d..3f1befc4e17b244e4f356b0ccf7fb863e80850e3 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1341,10 +1341,13 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname, val = inet->tos; break; case IP_TTL: + { + struct net *net = sock_net(sk); val = (inet->uc_ttl == -1 ? - sysctl_ip_default_ttl : + net->ipv4.sysctl_ip_default_ttl : inet->uc_ttl); break; + } case IP_HDRINCL: val = inet->hdrincl; break; diff --git a/net/ipv4/netfilter/ipt_SYNPROXY.c b/net/ipv4/netfilter/ipt_SYNPROXY.c index 5fdc556514bac3335f0c4f78b2c01c54f1c8b68f..7b8fbb35287785ff4cf3168c47a2e251969e8254 100644 --- a/net/ipv4/netfilter/ipt_SYNPROXY.c +++ b/net/ipv4/netfilter/ipt_SYNPROXY.c @@ -21,6 +21,7 @@ static struct iphdr * synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) { struct iphdr *iph; + struct net *net = sock_net(skb->sk); skb_reset_network_header(skb); iph = (struct iphdr *)skb_put(skb, sizeof(*iph)); @@ -29,7 +30,7 @@ synproxy_build_ip(struct sk_buff *skb, __be32 saddr, __be32 daddr) iph->tos = 0; iph->id = 0; iph->frag_off = htons(IP_DF); - iph->ttl = sysctl_ip_default_ttl; + iph->ttl = net->ipv4.sysctl_ip_default_ttl; iph->protocol = IPPROTO_TCP; iph->check = 0; iph->saddr = saddr; diff --git a/net/ipv4/proc.c b/net/ipv4/proc.c index 3abd9d7a3adf323bd688b1ab6dabda1248c67be1..9f665b63a927202b9aaf2b6b3d42205058a2ae59 100644 --- a/net/ipv4/proc.c +++ b/net/ipv4/proc.c @@ -390,7 +390,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "\nIp: %d %d", IPV4_DEVCONF_ALL(net, FORWARDING) ? 1 : 2, - sysctl_ip_default_ttl); + net->ipv4.sysctl_ip_default_ttl); BUILD_BUG_ON(offsetof(struct ipstats_mib, mibs) != 0); for (i = 0; snmp4_ipstats_list[i].name != NULL; i++) diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index b537338f5c97d0d09c80fa2f8f19f26f0c270be7..1e1fe6086dd912e5071524053581afa79578061b 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -282,15 +282,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "ip_default_ttl", - .data = &sysctl_ip_default_ttl, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec_minmax, - .extra1 = &ip_ttl_min, - .extra2 = &ip_ttl_max, - }, { .procname = "tcp_max_orphans", .data = &sysctl_tcp_max_orphans, @@ -305,20 +296,6 @@ static struct ctl_table ipv4_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, - { - .procname = "ip_early_demux", - .data = &sysctl_ip_early_demux, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, - { - .procname = "ip_dynaddr", - .data = &sysctl_ip_dynaddr, - .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = proc_dointvec - }, { .procname = "tcp_fastopen", .data = &sysctl_tcp_fastopen, @@ -752,6 +729,29 @@ static struct ctl_table ipv4_net_table[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "ip_dynaddr", + .data = &init_net.ipv4.sysctl_ip_dynaddr, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "ip_early_demux", + .data = &init_net.ipv4.sysctl_ip_early_demux, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, + { + .procname = "ip_default_ttl", + .data = &init_net.ipv4.sysctl_ip_default_ttl, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &ip_ttl_min, + .extra2 = &ip_ttl_max, + }, { .procname = "ip_local_port_range", .maxlen = sizeof(init_net.ipv4.ip_local_ports.range), @@ -988,6 +988,10 @@ static __net_init int ipv4_sysctl_init_net(struct net *net) if (!net->ipv4.sysctl_local_reserved_ports) goto err_ports; + net->ipv4.sysctl_ip_default_ttl = IPDEFTTL; + net->ipv4.sysctl_ip_dynaddr = 0; + net->ipv4.sysctl_ip_early_demux = 1; + return 0; err_ports: diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index ba5d0146e3f0b52b68ec3abb0eff7e5a46a41bbb..3f872a6bc27449f181089d53b11bed84dd96f036 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2399,12 +2399,6 @@ static int __net_init tcp_sk_init(struct net *net) net->ipv4.sysctl_tcp_fin_timeout = TCP_FIN_TIMEOUT; net->ipv4.sysctl_tcp_notsent_lowat = UINT_MAX; - net->ipv4.sysctl_igmp_max_memberships = 20; - net->ipv4.sysctl_igmp_max_msf = 10; - /* IGMP reports for link-local multicast groups are enabled by default */ - net->ipv4.sysctl_igmp_llm_reports = 1; - net->ipv4.sysctl_igmp_qrv = 2; - return 0; fail: tcp_sk_exit(net); diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 31ac3c56da4ba16f806d4ef234de7599f25a265c..c05c425c23890b4b5ce077d5ca835a7e644ab881 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -49,7 +49,7 @@ int ip6_rcv_finish(struct net *net, struct sock *sk, struct sk_buff *skb) { - if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { + if (net->ipv4.sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]);