提交 ac196f8c 编写于 作者: D David S. Miller

Merge branch 'master' of git://1984.lsi.us.es/nf

Pablo Neira Ayuso says:

====================
The following batch contains Netfilter fixes for 3.8-rc1. They are
a mixture of old bugs that have passed unnoticed (I'll pass these to
stable) and more fresh ones from the previous merge window, they are:

* Fix for MAC address in 6in4 tunnels via NFLOG that results in ulogd
  showing up wrong address, from Bob Hockney.

* Fix a comment in nf_conntrack_ipv6, from Florent Fourcot.

* Fix a leak an error path in ctnetlink while creating an expectation,
  from Jesper Juhl.

* Fix missing ICMP time exceeded in the IPv6 defragmentation code, from
  Haibo Xi.

* Fix inconsistent handling of routing changes in MASQUERADE for the
  new connections case, from Andrew Collins.

* Fix a missing skb_reset_transport in ip[6]t_REJECT that leads to
  crashes in the ixgbe driver (since it seems to access the transport
  header with TSO enabled), from Mukund Jampala.

* Recover obsoleted NOTRACK target by including it into the CT and spot
  a warning via printk about being obsoleted. Many people don't check the
  scheduled to be removal file under Documentation, so we follow some
  less agressive approach to kill this in a year or so. Spotted by Florian
  Westphal, patch from myself.

* Fix race condition in xt_hashlimit that allows to create two or more
  entries, from myself.

* Fix crash if the CT is used due to the recently added facilities to
  consult the dying and unconfirmed conntrack lists, from myself.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -71,6 +71,7 @@ struct netns_ct {
struct hlist_head *expect_hash;
struct hlist_nulls_head unconfirmed;
struct hlist_nulls_head dying;
struct hlist_nulls_head tmpl;
struct ip_conntrack_stat __percpu *stat;
struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb;
struct nf_exp_event_notifier __rcu *nf_expect_event_cb;
......
......@@ -8,6 +8,7 @@ struct ebt_table;
struct netns_xt {
struct list_head tables[NFPROTO_NUMPROTO];
bool notrack_deprecated_warning;
#if defined(CONFIG_BRIDGE_NF_EBTABLES) || \
defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE)
struct ebt_table *broute_table;
......
......@@ -81,6 +81,7 @@ static void send_reset(struct sk_buff *oldskb, int hook)
niph->saddr = oiph->daddr;
niph->daddr = oiph->saddr;
skb_reset_transport_header(nskb);
tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
memset(tcph, 0, sizeof(*tcph));
tcph->source = oth->dest;
......
......@@ -124,23 +124,28 @@ nf_nat_ipv4_fn(unsigned int hooknum,
ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
if (ret != NF_ACCEPT)
return ret;
} else
} else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
goto oif_changed;
}
break;
default:
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_DROP;
}
if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
goto oif_changed;
}
return nf_nat_packet(ct, ctinfo, hooknum, skb);
oif_changed:
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_DROP;
}
static unsigned int
......
......@@ -132,6 +132,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb)
ip6h->saddr = oip6h->daddr;
ip6h->daddr = oip6h->saddr;
skb_reset_transport_header(nskb);
tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr));
/* Truncate to length (no data) */
tcph->doff = sizeof(struct tcphdr)/4;
......
......@@ -127,23 +127,28 @@ nf_nat_ipv6_fn(unsigned int hooknum,
ret = nf_nat_rule_find(skb, hooknum, in, out, ct);
if (ret != NF_ACCEPT)
return ret;
} else
} else {
pr_debug("Already setup manip %s for ct %p\n",
maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST",
ct);
if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
goto oif_changed;
}
break;
default:
/* ESTABLISHED */
NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED ||
ctinfo == IP_CT_ESTABLISHED_REPLY);
if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) {
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_DROP;
}
if (nf_nat_oif_changed(hooknum, ctinfo, nat, out))
goto oif_changed;
}
return nf_nat_packet(ct, ctinfo, hooknum, skb);
oif_changed:
nf_ct_kill_acct(ct, ctinfo, skb);
return NF_DROP;
}
static unsigned int
......
......@@ -81,8 +81,8 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff,
}
protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off);
/*
* (protoff == skb->len) mean that the packet doesn't have no data
* except of IPv6 & ext headers. but it's tracked anyway. - YK
* (protoff == skb->len) means the packet has not data, just
* IPv6 and possibly extensions headers, but it is tracked anyway
*/
if (protoff < 0 || (frag_off & htons(~0x7)) != 0) {
pr_debug("ip6_conntrack_core: can't find proto in pkt\n");
......
......@@ -311,7 +311,10 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
else
fq->q.fragments = skb;
skb->dev = NULL;
if (skb->dev) {
fq->iif = skb->dev->ifindex;
skb->dev = NULL;
}
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
if (payload_len > fq->q.max_size)
......
......@@ -680,6 +680,10 @@ config NETFILTER_XT_TARGET_NFQUEUE
To compile it as a module, choose M here. If unsure, say N.
config NETFILTER_XT_TARGET_NOTRACK
tristate '"NOTRACK" target support (DEPRECATED)'
select NETFILTER_XT_TARGET_CT
config NETFILTER_XT_TARGET_RATEEST
tristate '"RATEEST" target support'
depends on NETFILTER_ADVANCED
......
......@@ -1526,6 +1526,7 @@ static int nf_conntrack_init_init_net(void)
*/
#define UNCONFIRMED_NULLS_VAL ((1<<30)+0)
#define DYING_NULLS_VAL ((1<<30)+1)
#define TEMPLATE_NULLS_VAL ((1<<30)+2)
static int nf_conntrack_init_net(struct net *net)
{
......@@ -1534,6 +1535,7 @@ static int nf_conntrack_init_net(struct net *net)
atomic_set(&net->ct.count, 0);
INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL);
INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL);
INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL);
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
if (!net->ct.stat) {
ret = -ENOMEM;
......
......@@ -2624,7 +2624,7 @@ ctnetlink_create_expect(struct net *net, u16 zone,
if (!help) {
if (!cda[CTA_EXPECT_TIMEOUT]) {
err = -EINVAL;
goto out;
goto err_out;
}
exp->timeout.expires =
jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ;
......
......@@ -13,6 +13,7 @@
*/
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/if_arp.h>
#include <linux/init.h>
#include <linux/ip.h>
#include <linux/ipv6.h>
......@@ -384,6 +385,7 @@ __build_packet_message(struct nfulnl_instance *inst,
struct nfgenmsg *nfmsg;
sk_buff_data_t old_tail = inst->skb->tail;
struct sock *sk;
const unsigned char *hwhdrp;
nlh = nlmsg_put(inst->skb, 0, 0,
NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET,
......@@ -485,9 +487,17 @@ __build_packet_message(struct nfulnl_instance *inst,
if (indev && skb_mac_header_was_set(skb)) {
if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) ||
nla_put_be16(inst->skb, NFULA_HWLEN,
htons(skb->dev->hard_header_len)) ||
nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len,
skb_mac_header(skb)))
htons(skb->dev->hard_header_len)))
goto nla_put_failure;
hwhdrp = skb_mac_header(skb);
if (skb->dev->type == ARPHRD_SIT)
hwhdrp -= ETH_HLEN;
if (hwhdrp >= skb->head &&
nla_put(inst->skb, NFULA_HWHEADER,
skb->dev->hard_header_len, hwhdrp))
goto nla_put_failure;
}
......
......@@ -149,6 +149,10 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par)
__set_bit(IPS_TEMPLATE_BIT, &ct->status);
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
/* Overload tuple linked list to put us in template list. */
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&par->net->ct.tmpl);
out:
info->ct = ct;
return 0;
......@@ -289,6 +293,10 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par)
__set_bit(IPS_TEMPLATE_BIT, &ct->status);
__set_bit(IPS_CONFIRMED_BIT, &ct->status);
/* Overload tuple linked list to put us in template list. */
hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&par->net->ct.tmpl);
out:
info->ct = ct;
return 0;
......@@ -377,14 +385,60 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = {
},
};
static unsigned int
notrack_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
/* Previously seen (loopback)? Ignore. */
if (skb->nfct != NULL)
return XT_CONTINUE;
skb->nfct = &nf_ct_untracked_get()->ct_general;
skb->nfctinfo = IP_CT_NEW;
nf_conntrack_get(skb->nfct);
return XT_CONTINUE;
}
static int notrack_chk(const struct xt_tgchk_param *par)
{
if (!par->net->xt.notrack_deprecated_warning) {
pr_info("netfilter: NOTRACK target is deprecated, "
"use CT instead or upgrade iptables\n");
par->net->xt.notrack_deprecated_warning = true;
}
return 0;
}
static struct xt_target notrack_tg_reg __read_mostly = {
.name = "NOTRACK",
.revision = 0,
.family = NFPROTO_UNSPEC,
.checkentry = notrack_chk,
.target = notrack_tg,
.table = "raw",
.me = THIS_MODULE,
};
static int __init xt_ct_tg_init(void)
{
return xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
int ret;
ret = xt_register_target(&notrack_tg_reg);
if (ret < 0)
return ret;
ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
if (ret < 0) {
xt_unregister_target(&notrack_tg_reg);
return ret;
}
return 0;
}
static void __exit xt_ct_tg_exit(void)
{
xt_unregister_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg));
xt_unregister_target(&notrack_tg_reg);
}
module_init(xt_ct_tg_init);
......@@ -394,3 +448,5 @@ MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("Xtables: connection tracking target");
MODULE_ALIAS("ipt_CT");
MODULE_ALIAS("ip6t_CT");
MODULE_ALIAS("ipt_NOTRACK");
MODULE_ALIAS("ip6t_NOTRACK");
......@@ -157,11 +157,22 @@ dsthash_find(const struct xt_hashlimit_htable *ht,
/* allocate dsthash_ent, initialize dst, put in htable and lock it */
static struct dsthash_ent *
dsthash_alloc_init(struct xt_hashlimit_htable *ht,
const struct dsthash_dst *dst)
const struct dsthash_dst *dst, bool *race)
{
struct dsthash_ent *ent;
spin_lock(&ht->lock);
/* Two or more packets may race to create the same entry in the
* hashtable, double check if this packet lost race.
*/
ent = dsthash_find(ht, dst);
if (ent != NULL) {
spin_unlock(&ht->lock);
*race = true;
return ent;
}
/* initialize hash with random val at the time we allocate
* the first hashtable entry */
if (unlikely(!ht->rnd_initialized)) {
......@@ -318,7 +329,10 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo)
parent = hashlimit_net->ipt_hashlimit;
else
parent = hashlimit_net->ip6t_hashlimit;
remove_proc_entry(hinfo->pde->name, parent);
if(parent != NULL)
remove_proc_entry(hinfo->pde->name, parent);
htable_selective_cleanup(hinfo, select_all);
vfree(hinfo);
}
......@@ -585,6 +599,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
unsigned long now = jiffies;
struct dsthash_ent *dh;
struct dsthash_dst dst;
bool race = false;
u32 cost;
if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0)
......@@ -593,13 +608,18 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
rcu_read_lock_bh();
dh = dsthash_find(hinfo, &dst);
if (dh == NULL) {
dh = dsthash_alloc_init(hinfo, &dst);
dh = dsthash_alloc_init(hinfo, &dst, &race);
if (dh == NULL) {
rcu_read_unlock_bh();
goto hotdrop;
} else if (race) {
/* Already got an entry, update expiration timeout */
dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
rateinfo_recalc(dh, now, hinfo->cfg.mode);
} else {
dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
rateinfo_init(dh, hinfo);
}
dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire);
rateinfo_init(dh, hinfo);
} else {
/* update expiration timeout */
dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire);
......@@ -856,6 +876,27 @@ static int __net_init hashlimit_proc_net_init(struct net *net)
static void __net_exit hashlimit_proc_net_exit(struct net *net)
{
struct xt_hashlimit_htable *hinfo;
struct hlist_node *pos;
struct proc_dir_entry *pde;
struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
/* recent_net_exit() is called before recent_mt_destroy(). Make sure
* that the parent xt_recent proc entry is is empty before trying to
* remove it.
*/
mutex_lock(&hashlimit_mutex);
pde = hashlimit_net->ipt_hashlimit;
if (pde == NULL)
pde = hashlimit_net->ip6t_hashlimit;
hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node)
remove_proc_entry(hinfo->pde->name, pde);
hashlimit_net->ipt_hashlimit = NULL;
hashlimit_net->ip6t_hashlimit = NULL;
mutex_unlock(&hashlimit_mutex);
proc_net_remove(net, "ipt_hashlimit");
#if IS_ENABLED(CONFIG_IP6_NF_IPTABLES)
proc_net_remove(net, "ip6t_hashlimit");
......@@ -872,9 +913,6 @@ static int __net_init hashlimit_net_init(struct net *net)
static void __net_exit hashlimit_net_exit(struct net *net)
{
struct hashlimit_net *hashlimit_net = hashlimit_pernet(net);
BUG_ON(!hlist_empty(&hashlimit_net->htables));
hashlimit_proc_net_exit(net);
}
......
......@@ -431,7 +431,8 @@ static void recent_mt_destroy(const struct xt_mtdtor_param *par)
list_del(&t->list);
spin_unlock_bh(&recent_lock);
#ifdef CONFIG_PROC_FS
remove_proc_entry(t->name, recent_net->xt_recent);
if (recent_net->xt_recent != NULL)
remove_proc_entry(t->name, recent_net->xt_recent);
#endif
recent_table_flush(t);
kfree(t);
......@@ -615,6 +616,20 @@ static int __net_init recent_proc_net_init(struct net *net)
static void __net_exit recent_proc_net_exit(struct net *net)
{
struct recent_net *recent_net = recent_pernet(net);
struct recent_table *t;
/* recent_net_exit() is called before recent_mt_destroy(). Make sure
* that the parent xt_recent proc entry is is empty before trying to
* remove it.
*/
spin_lock_bh(&recent_lock);
list_for_each_entry(t, &recent_net->tables, list)
remove_proc_entry(t->name, recent_net->xt_recent);
recent_net->xt_recent = NULL;
spin_unlock_bh(&recent_lock);
proc_net_remove(net, "xt_recent");
}
#else
......@@ -638,9 +653,6 @@ static int __net_init recent_net_init(struct net *net)
static void __net_exit recent_net_exit(struct net *net)
{
struct recent_net *recent_net = recent_pernet(net);
BUG_ON(!list_empty(&recent_net->tables));
recent_proc_net_exit(net);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册