diff --git a/include/net/ip.h b/include/net/ip.h index fe63ba95d12b15828f767841d27746e7ef8895ca..d53b5a9eae34f65730a22ac0e2d8511be042e2de 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -91,6 +91,17 @@ static inline int inet_sdif(struct sk_buff *skb) return 0; } +/* Special input handler for packets caught by router alert option. + They are selected only by protocol field, and then processed likely + local ones; but only if someone wants them! Otherwise, router + not running rsvpd will kill RSVP. + + It is user level problem, what it will make with them. + I have no idea, how it will masquearde or NAT them (it is joke, joke :-)), + but receiver should be enough clever f.e. to forward mtrace requests, + sent to multicast group to reach destination designated router. + */ + struct ip_ra_chain { struct ip_ra_chain __rcu *next; struct sock *sk; @@ -101,8 +112,6 @@ struct ip_ra_chain { struct rcu_head rcu; }; -extern struct ip_ra_chain __rcu *ip_ra_chain; - /* IP flags. */ #define IP_CE 0x8000 /* Flag: "Congestion" */ #define IP_DF 0x4000 /* Flag: "Don't Fragment" */ diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h index 382bfd7583cfc0008d164ca31e11e6ca4f04acaf..8491bc9c86b1553ab603e4363e8e38ca7ff547e0 100644 --- a/include/net/netns/ipv4.h +++ b/include/net/netns/ipv4.h @@ -49,6 +49,8 @@ struct netns_ipv4 { #endif struct ipv4_devconf *devconf_all; struct ipv4_devconf *devconf_dflt; + struct ip_ra_chain __rcu *ra_chain; + struct mutex ra_mutex; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_rules_ops *rules_ops; bool fib_has_custom_rules; diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index c340d5cfbdecd4dab11a863c988eb24eebaa88d5..95ba2c53bd9a280433959d22d60fa36b784cbe97 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -301,6 +301,7 @@ static __net_init int setup_net(struct net *net, struct user_namespace *user_ns) net->user_ns = user_ns; idr_init(&net->netns_ids); spin_lock_init(&net->nsid_lock); + mutex_init(&net->ipv4.ra_mutex); list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 57fc13c6ab2b7843a4fdb11680c82fc342f465c7..7582713dd18f37b5c27cdc85ff62626a8ad4f435 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -159,7 +159,7 @@ bool ip_call_ra_chain(struct sk_buff *skb) struct net_device *dev = skb->dev; struct net *net = dev_net(dev); - for (ra = rcu_dereference(ip_ra_chain); ra; ra = rcu_dereference(ra->next)) { + for (ra = rcu_dereference(net->ipv4.ra_chain); ra; ra = rcu_dereference(ra->next)) { struct sock *sk = ra->sk; /* If socket is bound to an interface, only report @@ -167,8 +167,7 @@ bool ip_call_ra_chain(struct sk_buff *skb) */ if (sk && inet_sk(sk)->inet_num == protocol && (!sk->sk_bound_dev_if || - sk->sk_bound_dev_if == dev->ifindex) && - net_eq(sock_net(sk), net)) { + sk->sk_bound_dev_if == dev->ifindex)) { if (ip_is_fragment(ip_hdr(skb))) { if (ip_defrag(net, skb, IP_DEFRAG_CALL_RA_CHAIN)) return true; diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 74c962b9b09c3c234388686fad4fb217c4e4a36e..5ad2d8ed3a3fe2aa51d814af442df7ff5e074d3e 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -322,20 +322,6 @@ int ip_cmsg_send(struct sock *sk, struct msghdr *msg, struct ipcm_cookie *ipc, return 0; } - -/* Special input handler for packets caught by router alert option. - They are selected only by protocol field, and then processed likely - local ones; but only if someone wants them! Otherwise, router - not running rsvpd will kill RSVP. - - It is user level problem, what it will make with them. - I have no idea, how it will masquearde or NAT them (it is joke, joke :-)), - but receiver should be enough clever f.e. to forward mtrace requests, - sent to multicast group to reach destination designated router. - */ -struct ip_ra_chain __rcu *ip_ra_chain; - - static void ip_ra_destroy_rcu(struct rcu_head *head) { struct ip_ra_chain *ra = container_of(head, struct ip_ra_chain, rcu); @@ -349,23 +335,28 @@ int ip_ra_control(struct sock *sk, unsigned char on, { struct ip_ra_chain *ra, *new_ra; struct ip_ra_chain __rcu **rap; + struct net *net = sock_net(sk); if (sk->sk_type != SOCK_RAW || inet_sk(sk)->inet_num == IPPROTO_RAW) return -EINVAL; new_ra = on ? kmalloc(sizeof(*new_ra), GFP_KERNEL) : NULL; - for (rap = &ip_ra_chain; - (ra = rtnl_dereference(*rap)) != NULL; + mutex_lock(&net->ipv4.ra_mutex); + for (rap = &net->ipv4.ra_chain; + (ra = rcu_dereference_protected(*rap, + lockdep_is_held(&net->ipv4.ra_mutex))) != NULL; rap = &ra->next) { if (ra->sk == sk) { if (on) { + mutex_unlock(&net->ipv4.ra_mutex); kfree(new_ra); return -EADDRINUSE; } /* dont let ip_call_ra_chain() use sk again */ ra->sk = NULL; RCU_INIT_POINTER(*rap, ra->next); + mutex_unlock(&net->ipv4.ra_mutex); if (ra->destructor) ra->destructor(sk); @@ -379,14 +370,17 @@ int ip_ra_control(struct sock *sk, unsigned char on, return 0; } } - if (!new_ra) + if (!new_ra) { + mutex_unlock(&net->ipv4.ra_mutex); return -ENOBUFS; + } new_ra->sk = sk; new_ra->destructor = destructor; RCU_INIT_POINTER(new_ra->next, ra); rcu_assign_pointer(*rap, new_ra); sock_hold(sk); + mutex_unlock(&net->ipv4.ra_mutex); return 0; } @@ -586,7 +580,6 @@ static bool setsockopt_needs_rtnl(int optname) case MCAST_LEAVE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: case MCAST_UNBLOCK_SOURCE: - case IP_ROUTER_ALERT: return true; } return false; @@ -639,6 +632,8 @@ static int do_ip_setsockopt(struct sock *sk, int level, /* If optlen==0, it is equivalent to val == 0 */ + if (optname == IP_ROUTER_ALERT) + return ip_ra_control(sk, val ? 1 : 0, NULL); if (ip_mroute_opt(optname)) return ip_mroute_setsockopt(sk, optname, optval, optlen); @@ -1149,9 +1144,6 @@ static int do_ip_setsockopt(struct sock *sk, int level, goto e_inval; inet->mc_all = val; break; - case IP_ROUTER_ALERT: - err = ip_ra_control(sk, val ? 1 : 0, NULL); - break; case IP_FREEBIND: if (optlen < 1) diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index d752a70855d8b6957d201db9772da3c4992b895b..f6be5db16da2640a44032770ca3ef8b90bc14b29 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1399,7 +1399,7 @@ static void mrtsock_destruct(struct sock *sk) struct net *net = sock_net(sk); struct mr_table *mrt; - ASSERT_RTNL(); + rtnl_lock(); ipmr_for_each_table(mrt, net) { if (sk == rtnl_dereference(mrt->mroute_sk)) { IPV4_DEVCONF_ALL(net, MC_FORWARDING)--; @@ -1411,6 +1411,7 @@ static void mrtsock_destruct(struct sock *sk) mroute_clean_tables(mrt, false); } } + rtnl_unlock(); } /* Socket options and virtual interface manipulation. The whole @@ -1475,8 +1476,13 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, if (sk != rcu_access_pointer(mrt->mroute_sk)) { ret = -EACCES; } else { + /* We need to unlock here because mrtsock_destruct takes + * care of rtnl itself and we can't change that due to + * the IP_ROUTER_ALERT setsockopt which runs without it. + */ + rtnl_unlock(); ret = ip_ra_control(sk, 0, NULL); - goto out_unlock; + goto out; } break; case MRT_ADD_VIF: @@ -1588,6 +1594,7 @@ int ip_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, } out_unlock: rtnl_unlock(); +out: return ret; } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 54648d20bf0f87e1b9a2cceb7bb18a31c8df319a..720bef7da2f6d13e9b09269c38d14a7945ae4c5e 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -711,9 +711,7 @@ static void raw_close(struct sock *sk, long timeout) /* * Raw sockets may have direct kernel references. Kill them. */ - rtnl_lock(); ip_ra_control(sk, 0, NULL); - rtnl_unlock(); sk_common_release(sk); }