diff --git a/include/net/netns/conntrack.h b/include/net/netns/conntrack.h index a1d83cc8bf859d356b9d4b437c4d03590fa25453..923cb20051edfbb93a3fb84c8c78b7dae0993c82 100644 --- a/include/net/netns/conntrack.h +++ b/include/net/netns/conntrack.h @@ -71,6 +71,7 @@ struct netns_ct { struct hlist_head *expect_hash; struct hlist_nulls_head unconfirmed; struct hlist_nulls_head dying; + struct hlist_nulls_head tmpl; struct ip_conntrack_stat __percpu *stat; struct nf_ct_event_notifier __rcu *nf_conntrack_event_cb; struct nf_exp_event_notifier __rcu *nf_expect_event_cb; diff --git a/include/net/netns/x_tables.h b/include/net/netns/x_tables.h index 591db7d657a37d42107b38582cd8a45ad5838552..c24060ee411e9e15a91a0fc5bd189eef8425869b 100644 --- a/include/net/netns/x_tables.h +++ b/include/net/netns/x_tables.h @@ -8,6 +8,7 @@ struct ebt_table; struct netns_xt { struct list_head tables[NFPROTO_NUMPROTO]; + bool notrack_deprecated_warning; #if defined(CONFIG_BRIDGE_NF_EBTABLES) || \ defined(CONFIG_BRIDGE_NF_EBTABLES_MODULE) struct ebt_table *broute_table; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 51f13f8ec724f8c5dad543073e8a0d82639b555a..04b18c1ac3458503a2b5d79bd7fd1547c13e76d7 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -81,6 +81,7 @@ static void send_reset(struct sk_buff *oldskb, int hook) niph->saddr = oiph->daddr; niph->daddr = oiph->saddr; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); memset(tcph, 0, sizeof(*tcph)); tcph->source = oth->dest; diff --git a/net/ipv4/netfilter/iptable_nat.c b/net/ipv4/netfilter/iptable_nat.c index da2c8a368f68d9c6b72c605593aa5b83e0622344..eeaff7e4acb5c8da1c042769ac4c805579260bef 100644 --- a/net/ipv4/netfilter/iptable_nat.c +++ b/net/ipv4/netfilter/iptable_nat.c @@ -124,23 +124,28 @@ nf_nat_ipv4_fn(unsigned int hooknum, ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; - } else + } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; + } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { - nf_ct_kill_acct(ct, ctinfo, skb); - return NF_DROP; - } + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; } return nf_nat_packet(ct, ctinfo, hooknum, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; } static unsigned int diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index fd4fb34c51c7f5b9bd62294de2abb6da943ee494..029623dbd4118d18667a83d209131dc64b5ec7c4 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -132,6 +132,7 @@ static void send_reset(struct net *net, struct sk_buff *oldskb) ip6h->saddr = oip6h->daddr; ip6h->daddr = oip6h->saddr; + skb_reset_transport_header(nskb); tcph = (struct tcphdr *)skb_put(nskb, sizeof(struct tcphdr)); /* Truncate to length (no data) */ tcph->doff = sizeof(struct tcphdr)/4; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index 6c8ae24b85eb15f525e2e43a1b0e7f93068e8d85..e0e788d25b1431624ae9470f0af904803fe5d92a 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -127,23 +127,28 @@ nf_nat_ipv6_fn(unsigned int hooknum, ret = nf_nat_rule_find(skb, hooknum, in, out, ct); if (ret != NF_ACCEPT) return ret; - } else + } else { pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; + } break; default: /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) { - nf_ct_kill_acct(ct, ctinfo, skb); - return NF_DROP; - } + if (nf_nat_oif_changed(hooknum, ctinfo, nat, out)) + goto oif_changed; } return nf_nat_packet(ct, ctinfo, hooknum, skb); + +oif_changed: + nf_ct_kill_acct(ct, ctinfo, skb); + return NF_DROP; } static unsigned int diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 00ee17c3e8939b1076ae2c2026ed6dfa1b3f68de..137e245860ab41a9ec923a8655f538c8cc8d4e52 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -81,8 +81,8 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, } protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); /* - * (protoff == skb->len) mean that the packet doesn't have no data - * except of IPv6 & ext headers. but it's tracked anyway. - YK + * (protoff == skb->len) means the packet has not data, just + * IPv6 and possibly extensions headers, but it is tracked anyway */ if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 22c8ea9511852e963b982ad68ed7066bda262ad2..3dacecc9906597e3f44fa9475dda0afb8f4981c2 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -311,7 +311,10 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, else fq->q.fragments = skb; - skb->dev = NULL; + if (skb->dev) { + fq->iif = skb->dev->ifindex; + skb->dev = NULL; + } fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; if (payload_len > fq->q.max_size) diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index fefa514b99170aae55620e4007565b0ad3e814b4..390f96cc8ed4443fc9b53b3bce9712549105dee5 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -680,6 +680,10 @@ config NETFILTER_XT_TARGET_NFQUEUE To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_NOTRACK + tristate '"NOTRACK" target support (DEPRECATED)' + select NETFILTER_XT_TARGET_CT + config NETFILTER_XT_TARGET_RATEEST tristate '"RATEEST" target support' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c index 08cdc71d8e8714b75782379929fa6b050dafa7f4..016d95ead930cba8e6d2e9335cd2b64f473c09d1 100644 --- a/net/netfilter/nf_conntrack_core.c +++ b/net/netfilter/nf_conntrack_core.c @@ -1526,6 +1526,7 @@ static int nf_conntrack_init_init_net(void) */ #define UNCONFIRMED_NULLS_VAL ((1<<30)+0) #define DYING_NULLS_VAL ((1<<30)+1) +#define TEMPLATE_NULLS_VAL ((1<<30)+2) static int nf_conntrack_init_net(struct net *net) { @@ -1534,6 +1535,7 @@ static int nf_conntrack_init_net(struct net *net) atomic_set(&net->ct.count, 0); INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, UNCONFIRMED_NULLS_VAL); INIT_HLIST_NULLS_HEAD(&net->ct.dying, DYING_NULLS_VAL); + INIT_HLIST_NULLS_HEAD(&net->ct.tmpl, TEMPLATE_NULLS_VAL); net->ct.stat = alloc_percpu(struct ip_conntrack_stat); if (!net->ct.stat) { ret = -ENOMEM; diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 4e078cd84d83c2c15c70ab4270b87a05e590f4ad..627b0e50b2389120e86ed107a3af01d690e07a29 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -2624,7 +2624,7 @@ ctnetlink_create_expect(struct net *net, u16 zone, if (!help) { if (!cda[CTA_EXPECT_TIMEOUT]) { err = -EINVAL; - goto out; + goto err_out; } exp->timeout.expires = jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index 9f199f2e31fae16ff667e7a1f230e6826d015794..92fd8eca0d315ff241b066e3fab3c6f9fcdaa1bb 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -13,6 +13,7 @@ */ #include #include +#include #include #include #include @@ -384,6 +385,7 @@ __build_packet_message(struct nfulnl_instance *inst, struct nfgenmsg *nfmsg; sk_buff_data_t old_tail = inst->skb->tail; struct sock *sk; + const unsigned char *hwhdrp; nlh = nlmsg_put(inst->skb, 0, 0, NFNL_SUBSYS_ULOG << 8 | NFULNL_MSG_PACKET, @@ -485,9 +487,17 @@ __build_packet_message(struct nfulnl_instance *inst, if (indev && skb_mac_header_was_set(skb)) { if (nla_put_be16(inst->skb, NFULA_HWTYPE, htons(skb->dev->type)) || nla_put_be16(inst->skb, NFULA_HWLEN, - htons(skb->dev->hard_header_len)) || - nla_put(inst->skb, NFULA_HWHEADER, skb->dev->hard_header_len, - skb_mac_header(skb))) + htons(skb->dev->hard_header_len))) + goto nla_put_failure; + + hwhdrp = skb_mac_header(skb); + + if (skb->dev->type == ARPHRD_SIT) + hwhdrp -= ETH_HLEN; + + if (hwhdrp >= skb->head && + nla_put(inst->skb, NFULA_HWHEADER, + skb->dev->hard_header_len, hwhdrp)) goto nla_put_failure; } diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index ae7f5daeee4362bff5aee9a30a85347de70a2ccc..2a0843081840c0cd937a3fb0e749965bf9e28d0c 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -149,6 +149,10 @@ static int xt_ct_tg_check_v0(const struct xt_tgchk_param *par) __set_bit(IPS_TEMPLATE_BIT, &ct->status); __set_bit(IPS_CONFIRMED_BIT, &ct->status); + + /* Overload tuple linked list to put us in template list. */ + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &par->net->ct.tmpl); out: info->ct = ct; return 0; @@ -289,6 +293,10 @@ static int xt_ct_tg_check_v1(const struct xt_tgchk_param *par) __set_bit(IPS_TEMPLATE_BIT, &ct->status); __set_bit(IPS_CONFIRMED_BIT, &ct->status); + + /* Overload tuple linked list to put us in template list. */ + hlist_nulls_add_head_rcu(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode, + &par->net->ct.tmpl); out: info->ct = ct; return 0; @@ -377,14 +385,60 @@ static struct xt_target xt_ct_tg_reg[] __read_mostly = { }, }; +static unsigned int +notrack_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + /* Previously seen (loopback)? Ignore. */ + if (skb->nfct != NULL) + return XT_CONTINUE; + + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); + + return XT_CONTINUE; +} + +static int notrack_chk(const struct xt_tgchk_param *par) +{ + if (!par->net->xt.notrack_deprecated_warning) { + pr_info("netfilter: NOTRACK target is deprecated, " + "use CT instead or upgrade iptables\n"); + par->net->xt.notrack_deprecated_warning = true; + } + return 0; +} + +static struct xt_target notrack_tg_reg __read_mostly = { + .name = "NOTRACK", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = notrack_chk, + .target = notrack_tg, + .table = "raw", + .me = THIS_MODULE, +}; + static int __init xt_ct_tg_init(void) { - return xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); + int ret; + + ret = xt_register_target(¬rack_tg_reg); + if (ret < 0) + return ret; + + ret = xt_register_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); + if (ret < 0) { + xt_unregister_target(¬rack_tg_reg); + return ret; + } + return 0; } static void __exit xt_ct_tg_exit(void) { xt_unregister_targets(xt_ct_tg_reg, ARRAY_SIZE(xt_ct_tg_reg)); + xt_unregister_target(¬rack_tg_reg); } module_init(xt_ct_tg_init); @@ -394,3 +448,5 @@ MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("Xtables: connection tracking target"); MODULE_ALIAS("ipt_CT"); MODULE_ALIAS("ip6t_CT"); +MODULE_ALIAS("ipt_NOTRACK"); +MODULE_ALIAS("ip6t_NOTRACK"); diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index 26a668a84aa288a82dc3eb243df2728a3169b2a2..a9d7af953ceb8bcd1d961eef21d0f3cb3ee22919 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -157,11 +157,22 @@ dsthash_find(const struct xt_hashlimit_htable *ht, /* allocate dsthash_ent, initialize dst, put in htable and lock it */ static struct dsthash_ent * dsthash_alloc_init(struct xt_hashlimit_htable *ht, - const struct dsthash_dst *dst) + const struct dsthash_dst *dst, bool *race) { struct dsthash_ent *ent; spin_lock(&ht->lock); + + /* Two or more packets may race to create the same entry in the + * hashtable, double check if this packet lost race. + */ + ent = dsthash_find(ht, dst); + if (ent != NULL) { + spin_unlock(&ht->lock); + *race = true; + return ent; + } + /* initialize hash with random val at the time we allocate * the first hashtable entry */ if (unlikely(!ht->rnd_initialized)) { @@ -318,7 +329,10 @@ static void htable_destroy(struct xt_hashlimit_htable *hinfo) parent = hashlimit_net->ipt_hashlimit; else parent = hashlimit_net->ip6t_hashlimit; - remove_proc_entry(hinfo->pde->name, parent); + + if(parent != NULL) + remove_proc_entry(hinfo->pde->name, parent); + htable_selective_cleanup(hinfo, select_all); vfree(hinfo); } @@ -585,6 +599,7 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) unsigned long now = jiffies; struct dsthash_ent *dh; struct dsthash_dst dst; + bool race = false; u32 cost; if (hashlimit_init_dst(hinfo, &dst, skb, par->thoff) < 0) @@ -593,13 +608,18 @@ hashlimit_mt(const struct sk_buff *skb, struct xt_action_param *par) rcu_read_lock_bh(); dh = dsthash_find(hinfo, &dst); if (dh == NULL) { - dh = dsthash_alloc_init(hinfo, &dst); + dh = dsthash_alloc_init(hinfo, &dst, &race); if (dh == NULL) { rcu_read_unlock_bh(); goto hotdrop; + } else if (race) { + /* Already got an entry, update expiration timeout */ + dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_recalc(dh, now, hinfo->cfg.mode); + } else { + dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); + rateinfo_init(dh, hinfo); } - dh->expires = jiffies + msecs_to_jiffies(hinfo->cfg.expire); - rateinfo_init(dh, hinfo); } else { /* update expiration timeout */ dh->expires = now + msecs_to_jiffies(hinfo->cfg.expire); @@ -856,6 +876,27 @@ static int __net_init hashlimit_proc_net_init(struct net *net) static void __net_exit hashlimit_proc_net_exit(struct net *net) { + struct xt_hashlimit_htable *hinfo; + struct hlist_node *pos; + struct proc_dir_entry *pde; + struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); + + /* recent_net_exit() is called before recent_mt_destroy(). Make sure + * that the parent xt_recent proc entry is is empty before trying to + * remove it. + */ + mutex_lock(&hashlimit_mutex); + pde = hashlimit_net->ipt_hashlimit; + if (pde == NULL) + pde = hashlimit_net->ip6t_hashlimit; + + hlist_for_each_entry(hinfo, pos, &hashlimit_net->htables, node) + remove_proc_entry(hinfo->pde->name, pde); + + hashlimit_net->ipt_hashlimit = NULL; + hashlimit_net->ip6t_hashlimit = NULL; + mutex_unlock(&hashlimit_mutex); + proc_net_remove(net, "ipt_hashlimit"); #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) proc_net_remove(net, "ip6t_hashlimit"); @@ -872,9 +913,6 @@ static int __net_init hashlimit_net_init(struct net *net) static void __net_exit hashlimit_net_exit(struct net *net) { - struct hashlimit_net *hashlimit_net = hashlimit_pernet(net); - - BUG_ON(!hlist_empty(&hashlimit_net->htables)); hashlimit_proc_net_exit(net); } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 4635c9b0045981d862cb3b31a34ac9ab735eb85e..dab053e2a1a2acaeeba340de232cc85434d9949a 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -431,7 +431,8 @@ static void recent_mt_destroy(const struct xt_mtdtor_param *par) list_del(&t->list); spin_unlock_bh(&recent_lock); #ifdef CONFIG_PROC_FS - remove_proc_entry(t->name, recent_net->xt_recent); + if (recent_net->xt_recent != NULL) + remove_proc_entry(t->name, recent_net->xt_recent); #endif recent_table_flush(t); kfree(t); @@ -615,6 +616,20 @@ static int __net_init recent_proc_net_init(struct net *net) static void __net_exit recent_proc_net_exit(struct net *net) { + struct recent_net *recent_net = recent_pernet(net); + struct recent_table *t; + + /* recent_net_exit() is called before recent_mt_destroy(). Make sure + * that the parent xt_recent proc entry is is empty before trying to + * remove it. + */ + spin_lock_bh(&recent_lock); + list_for_each_entry(t, &recent_net->tables, list) + remove_proc_entry(t->name, recent_net->xt_recent); + + recent_net->xt_recent = NULL; + spin_unlock_bh(&recent_lock); + proc_net_remove(net, "xt_recent"); } #else @@ -638,9 +653,6 @@ static int __net_init recent_net_init(struct net *net) static void __net_exit recent_net_exit(struct net *net) { - struct recent_net *recent_net = recent_pernet(net); - - BUG_ON(!list_empty(&recent_net->tables)); recent_proc_net_exit(net); }