提交 dba36b38 编写于 作者: D David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contain a rather large batch for your net that
includes accumulated bugfixes, they are:

1) Run conntrack cleanup from workqueue process context to avoid hitting
   soft lockup via watchdog for large tables. This is required by the
   IPv6 masquerading extension. From Florian Westphal.

2) Use original skbuff from nfnetlink batch when calling netlink_ack()
   on error since this needs to access the skb->sk pointer.

3) Incremental fix on top of recent Sasha Levin's lock fix for conntrack
   resizing.

4) Fix several problems in nfnetlink batch message header sanitization
   and error handling, from Phil Turnbull.

5) Select NF_DUP_IPV6 based on CONFIG_IPV6, from Arnd Bergmann.

6) Fix wrong signess in return values on nf_tables counter expression,
   from Anton Protopopov.

Due to the NetDev 1.1 organization burden, I had no chance to pass up
this to you any sooner in this release cycle, sorry about that.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
...@@ -21,6 +21,10 @@ ...@@ -21,6 +21,10 @@
#include <net/ipv6.h> #include <net/ipv6.h>
#include <net/netfilter/ipv6/nf_nat_masquerade.h> #include <net/netfilter/ipv6/nf_nat_masquerade.h>
#define MAX_WORK_COUNT 16
static atomic_t v6_worker_count;
unsigned int unsigned int
nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range, nf_nat_masquerade_ipv6(struct sk_buff *skb, const struct nf_nat_range *range,
const struct net_device *out) const struct net_device *out)
...@@ -78,14 +82,78 @@ static struct notifier_block masq_dev_notifier = { ...@@ -78,14 +82,78 @@ static struct notifier_block masq_dev_notifier = {
.notifier_call = masq_device_event, .notifier_call = masq_device_event,
}; };
struct masq_dev_work {
struct work_struct work;
struct net *net;
int ifindex;
};
static void iterate_cleanup_work(struct work_struct *work)
{
struct masq_dev_work *w;
long index;
w = container_of(work, struct masq_dev_work, work);
index = w->ifindex;
nf_ct_iterate_cleanup(w->net, device_cmp, (void *)index, 0, 0);
put_net(w->net);
kfree(w);
atomic_dec(&v6_worker_count);
module_put(THIS_MODULE);
}
/* ipv6 inet notifier is an atomic notifier, i.e. we cannot
* schedule.
*
* Unfortunately, nf_ct_iterate_cleanup can run for a long
* time if there are lots of conntracks and the system
* handles high softirq load, so it frequently calls cond_resched
* while iterating the conntrack table.
*
* So we defer nf_ct_iterate_cleanup walk to the system workqueue.
*
* As we can have 'a lot' of inet_events (depending on amount
* of ipv6 addresses being deleted), we also need to add an upper
* limit to the number of queued work items.
*/
static int masq_inet_event(struct notifier_block *this, static int masq_inet_event(struct notifier_block *this,
unsigned long event, void *ptr) unsigned long event, void *ptr)
{ {
struct inet6_ifaddr *ifa = ptr; struct inet6_ifaddr *ifa = ptr;
struct netdev_notifier_info info; const struct net_device *dev;
struct masq_dev_work *w;
struct net *net;
if (event != NETDEV_DOWN ||
atomic_read(&v6_worker_count) >= MAX_WORK_COUNT)
return NOTIFY_DONE;
dev = ifa->idev->dev;
net = maybe_get_net(dev_net(dev));
if (!net)
return NOTIFY_DONE;
netdev_notifier_info_init(&info, ifa->idev->dev); if (!try_module_get(THIS_MODULE))
return masq_device_event(this, event, &info); goto err_module;
w = kmalloc(sizeof(*w), GFP_ATOMIC);
if (w) {
atomic_inc(&v6_worker_count);
INIT_WORK(&w->work, iterate_cleanup_work);
w->ifindex = dev->ifindex;
w->net = net;
schedule_work(&w->work);
return NOTIFY_DONE;
}
module_put(THIS_MODULE);
err_module:
put_net(net);
return NOTIFY_DONE;
} }
static struct notifier_block masq_inet_notifier = { static struct notifier_block masq_inet_notifier = {
......
...@@ -891,7 +891,7 @@ config NETFILTER_XT_TARGET_TEE ...@@ -891,7 +891,7 @@ config NETFILTER_XT_TARGET_TEE
depends on IPV6 || IPV6=n depends on IPV6 || IPV6=n
depends on !NF_CONNTRACK || NF_CONNTRACK depends on !NF_CONNTRACK || NF_CONNTRACK
select NF_DUP_IPV4 select NF_DUP_IPV4
select NF_DUP_IPV6 if IP6_NF_IPTABLES != n select NF_DUP_IPV6 if IPV6
---help--- ---help---
This option adds a "TEE" target with which a packet can be cloned and This option adds a "TEE" target with which a packet can be cloned and
this clone be rerouted to another nexthop. this clone be rerouted to another nexthop.
......
...@@ -1412,6 +1412,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), ...@@ -1412,6 +1412,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
} }
spin_unlock(lockp); spin_unlock(lockp);
local_bh_enable(); local_bh_enable();
cond_resched();
} }
for_each_possible_cpu(cpu) { for_each_possible_cpu(cpu) {
...@@ -1424,6 +1425,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data), ...@@ -1424,6 +1425,7 @@ get_next_corpse(struct net *net, int (*iter)(struct nf_conn *i, void *data),
set_bit(IPS_DYING_BIT, &ct->status); set_bit(IPS_DYING_BIT, &ct->status);
} }
spin_unlock_bh(&pcpu->lock); spin_unlock_bh(&pcpu->lock);
cond_resched();
} }
return NULL; return NULL;
found: found:
...@@ -1440,6 +1442,8 @@ void nf_ct_iterate_cleanup(struct net *net, ...@@ -1440,6 +1442,8 @@ void nf_ct_iterate_cleanup(struct net *net,
struct nf_conn *ct; struct nf_conn *ct;
unsigned int bucket = 0; unsigned int bucket = 0;
might_sleep();
while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) { while ((ct = get_next_corpse(net, iter, data, &bucket)) != NULL) {
/* Time to push up daises... */ /* Time to push up daises... */
if (del_timer(&ct->timeout)) if (del_timer(&ct->timeout))
...@@ -1448,6 +1452,7 @@ void nf_ct_iterate_cleanup(struct net *net, ...@@ -1448,6 +1452,7 @@ void nf_ct_iterate_cleanup(struct net *net,
/* ... else the timer will get him soon. */ /* ... else the timer will get him soon. */
nf_ct_put(ct); nf_ct_put(ct);
cond_resched();
} }
} }
EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup); EXPORT_SYMBOL_GPL(nf_ct_iterate_cleanup);
......
...@@ -311,14 +311,14 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -311,14 +311,14 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
#endif #endif
{ {
nfnl_unlock(subsys_id); nfnl_unlock(subsys_id);
netlink_ack(skb, nlh, -EOPNOTSUPP); netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb); return kfree_skb(skb);
} }
} }
if (!ss->commit || !ss->abort) { if (!ss->commit || !ss->abort) {
nfnl_unlock(subsys_id); nfnl_unlock(subsys_id);
netlink_ack(skb, nlh, -EOPNOTSUPP); netlink_ack(oskb, nlh, -EOPNOTSUPP);
return kfree_skb(skb); return kfree_skb(skb);
} }
...@@ -328,10 +328,12 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -328,10 +328,12 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
nlh = nlmsg_hdr(skb); nlh = nlmsg_hdr(skb);
err = 0; err = 0;
if (nlmsg_len(nlh) < sizeof(struct nfgenmsg) || if (nlh->nlmsg_len < NLMSG_HDRLEN ||
skb->len < nlh->nlmsg_len) { skb->len < nlh->nlmsg_len ||
err = -EINVAL; nlmsg_len(nlh) < sizeof(struct nfgenmsg)) {
goto ack; nfnl_err_reset(&err_list);
status |= NFNL_BATCH_FAILURE;
goto done;
} }
/* Only requests are handled by the kernel */ /* Only requests are handled by the kernel */
...@@ -406,7 +408,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh, ...@@ -406,7 +408,7 @@ static void nfnetlink_rcv_batch(struct sk_buff *skb, struct nlmsghdr *nlh,
* pointing to the batch header. * pointing to the batch header.
*/ */
nfnl_err_reset(&err_list); nfnl_err_reset(&err_list);
netlink_ack(skb, nlmsg_hdr(oskb), -ENOMEM); netlink_ack(oskb, nlmsg_hdr(oskb), -ENOMEM);
status |= NFNL_BATCH_FAILURE; status |= NFNL_BATCH_FAILURE;
goto done; goto done;
} }
......
...@@ -312,7 +312,7 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout) ...@@ -312,7 +312,7 @@ static void ctnl_untimeout(struct net *net, struct ctnl_timeout *timeout)
hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode) hlist_nulls_for_each_entry(h, nn, &net->ct.hash[i], hnnode)
untimeout(h, timeout); untimeout(h, timeout);
} }
nf_conntrack_lock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]); spin_unlock(&nf_conntrack_locks[i % CONNTRACK_LOCKS]);
} }
local_bh_enable(); local_bh_enable();
} }
......
...@@ -100,7 +100,7 @@ static int nft_counter_init(const struct nft_ctx *ctx, ...@@ -100,7 +100,7 @@ static int nft_counter_init(const struct nft_ctx *ctx,
cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu); cpu_stats = netdev_alloc_pcpu_stats(struct nft_counter_percpu);
if (cpu_stats == NULL) if (cpu_stats == NULL)
return ENOMEM; return -ENOMEM;
preempt_disable(); preempt_disable();
this_cpu = this_cpu_ptr(cpu_stats); this_cpu = this_cpu_ptr(cpu_stats);
...@@ -138,7 +138,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src) ...@@ -138,7 +138,7 @@ static int nft_counter_clone(struct nft_expr *dst, const struct nft_expr *src)
cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu, cpu_stats = __netdev_alloc_pcpu_stats(struct nft_counter_percpu,
GFP_ATOMIC); GFP_ATOMIC);
if (cpu_stats == NULL) if (cpu_stats == NULL)
return ENOMEM; return -ENOMEM;
preempt_disable(); preempt_disable();
this_cpu = this_cpu_ptr(cpu_stats); this_cpu = this_cpu_ptr(cpu_stats);
......
...@@ -38,7 +38,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par) ...@@ -38,7 +38,7 @@ tee_tg4(struct sk_buff *skb, const struct xt_action_param *par)
return XT_CONTINUE; return XT_CONTINUE;
} }
#if IS_ENABLED(CONFIG_NF_DUP_IPV6) #if IS_ENABLED(CONFIG_IPV6)
static unsigned int static unsigned int
tee_tg6(struct sk_buff *skb, const struct xt_action_param *par) tee_tg6(struct sk_buff *skb, const struct xt_action_param *par)
{ {
...@@ -131,7 +131,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = { ...@@ -131,7 +131,7 @@ static struct xt_target tee_tg_reg[] __read_mostly = {
.destroy = tee_tg_destroy, .destroy = tee_tg_destroy,
.me = THIS_MODULE, .me = THIS_MODULE,
}, },
#if IS_ENABLED(CONFIG_NF_DUP_IPV6) #if IS_ENABLED(CONFIG_IPV6)
{ {
.name = "TEE", .name = "TEE",
.revision = 1, .revision = 1,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册