提交 9fa684ec 编写于 作者: D David S. Miller

Merge git://git.kernel.org/pub/scm/linux/kernel/git/pablo/nf

Pablo Neira Ayuso says:

====================
Netfilter fixes for net

The following patchset contains a larger than usual batch of Netfilter
fixes for your net tree. This series contains a mixture of old bugs and
recently introduced bugs, they are:

1) Fix a crash when using nft_dynset with nft_set_rbtree, which doesn't
   support the set element updates from the packet path. From Liping
   Zhang.

2) Fix leak when nft_expr_clone() fails, from Liping Zhang.

3) Fix a race when inserting new elements to the set hash from the
   packet path, also from Liping.

4) Handle segmented TCP SIP packets properly, basically avoid that the
   INVITE in the allow header create bogus expectations by performing
   stricter SIP message parsing, from Ulrich Weber.

5) nft_parse_u32_check() should return signed integer for errors, from
   John Linville.

6) Fix wrong allocation instead of connlabels, allocate 16 instead of
   32 bytes, from Florian Westphal.

7) Fix compilation breakage when building the ip_vs_sync code with
   CONFIG_OPTIMIZE_INLINING on x86, from Arnd Bergmann.

8) Destroy the new set if the transaction object cannot be allocated,
   also from Liping Zhang.

9) Use device to route duplicated packets via nft_dup only when set by
   the user, otherwise packets may not follow the right route, again
   from Liping.

10) Fix wrong maximum genetlink attribute definition in IPVS, from
    WANG Cong.

11) Ignore untracked conntrack objects from xt_connmark, from Florian
    Westphal.

12) Allow to use conntrack helpers that are registered NFPROTO_UNSPEC
    via CT target, otherwise we cannot use the h.245 helper, from
    Florian.

13) Revisit garbage collection heuristic in the new workqueue-based
    timer approach for conntrack to evict objects earlier, again from
    Florian.

14) Fix crash in nf_tables when inserting an element into a verdict map,
    from Liping Zhang.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -30,8 +30,7 @@ static inline struct nf_conn_labels *nf_ct_labels_ext_add(struct nf_conn *ct)
if (net->ct.labels_used == 0)
return NULL;
return nf_ct_ext_add_length(ct, NF_CT_EXT_LABELS,
sizeof(struct nf_conn_labels), GFP_ATOMIC);
return nf_ct_ext_add(ct, NF_CT_EXT_LABELS, GFP_ATOMIC);
#else
return NULL;
#endif
......
......@@ -145,7 +145,7 @@ static inline enum nft_registers nft_type_to_reg(enum nft_data_types type)
return type == NFT_DATA_VERDICT ? NFT_REG_VERDICT : NFT_REG_1 * NFT_REG_SIZE / NFT_REG32_SIZE;
}
unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest);
unsigned int nft_parse_register(const struct nlattr *attr);
int nft_dump_register(struct sk_buff *skb, unsigned int attr, unsigned int reg);
......@@ -542,7 +542,8 @@ void *nft_set_elem_init(const struct nft_set *set,
const struct nft_set_ext_tmpl *tmpl,
const u32 *key, const u32 *data,
u64 timeout, gfp_t gfp);
void nft_set_elem_destroy(const struct nft_set *set, void *elem);
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr);
/**
* struct nft_set_gc_batch_head - nf_tables set garbage collection batch
......@@ -693,7 +694,6 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
{
int err;
__module_get(src->ops->type->owner);
if (src->ops->clone) {
dst->ops = src->ops;
err = src->ops->clone(dst, src);
......@@ -702,6 +702,8 @@ static inline int nft_expr_clone(struct nft_expr *dst, struct nft_expr *src)
} else {
memcpy(dst, src, src->ops->size);
}
__module_get(src->ops->type->owner);
return 0;
}
......
......@@ -28,7 +28,7 @@ static void nft_dup_ipv4_eval(const struct nft_expr *expr,
struct in_addr gw = {
.s_addr = (__force __be32)regs->data[priv->sreg_addr],
};
int oif = regs->data[priv->sreg_dev];
int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
nf_dup_ipv4(pkt->net, pkt->skb, pkt->hook, &gw, oif);
}
......@@ -59,7 +59,9 @@ static int nft_dup_ipv4_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv4 *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
goto nla_put_failure;
if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
......
......@@ -26,7 +26,7 @@ static void nft_dup_ipv6_eval(const struct nft_expr *expr,
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
struct in6_addr *gw = (struct in6_addr *)&regs->data[priv->sreg_addr];
int oif = regs->data[priv->sreg_dev];
int oif = priv->sreg_dev ? regs->data[priv->sreg_dev] : -1;
nf_dup_ipv6(pkt->net, pkt->skb, pkt->hook, gw, oif);
}
......@@ -57,7 +57,9 @@ static int nft_dup_ipv6_dump(struct sk_buff *skb, const struct nft_expr *expr)
{
struct nft_dup_ipv6 *priv = nft_expr_priv(expr);
if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr) ||
if (nft_dump_register(skb, NFTA_DUP_SREG_ADDR, priv->sreg_addr))
goto nla_put_failure;
if (priv->sreg_dev &&
nft_dump_register(skb, NFTA_DUP_SREG_DEV, priv->sreg_dev))
goto nla_put_failure;
......
......@@ -2845,7 +2845,7 @@ static struct genl_family ip_vs_genl_family = {
.hdrsize = 0,
.name = IPVS_GENL_NAME,
.version = IPVS_GENL_VERSION,
.maxattr = IPVS_CMD_MAX,
.maxattr = IPVS_CMD_ATTR_MAX,
.netnsok = true, /* Make ipvsadm to work on netns */
};
......
......@@ -283,6 +283,7 @@ struct ip_vs_sync_buff {
*/
static void ntoh_seq(struct ip_vs_seq *no, struct ip_vs_seq *ho)
{
memset(ho, 0, sizeof(*ho));
ho->init_seq = get_unaligned_be32(&no->init_seq);
ho->delta = get_unaligned_be32(&no->delta);
ho->previous_delta = get_unaligned_be32(&no->previous_delta);
......@@ -917,8 +918,10 @@ static void ip_vs_proc_conn(struct netns_ipvs *ipvs, struct ip_vs_conn_param *pa
kfree(param->pe_data);
}
if (opt)
memcpy(&cp->in_seq, opt, sizeof(*opt));
if (opt) {
cp->in_seq = opt->in_seq;
cp->out_seq = opt->out_seq;
}
atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
cp->state = state;
cp->old_state = cp->state;
......
......@@ -76,6 +76,7 @@ struct conntrack_gc_work {
struct delayed_work dwork;
u32 last_bucket;
bool exiting;
long next_gc_run;
};
static __read_mostly struct kmem_cache *nf_conntrack_cachep;
......@@ -83,9 +84,11 @@ static __read_mostly spinlock_t nf_conntrack_locks_all_lock;
static __read_mostly DEFINE_SPINLOCK(nf_conntrack_locks_all_lock);
static __read_mostly bool nf_conntrack_locks_all;
/* every gc cycle scans at most 1/GC_MAX_BUCKETS_DIV part of table */
#define GC_MAX_BUCKETS_DIV 64u
#define GC_MAX_BUCKETS 8192u
#define GC_INTERVAL (5 * HZ)
/* upper bound of scan intervals */
#define GC_INTERVAL_MAX (2 * HZ)
/* maximum conntracks to evict per gc run */
#define GC_MAX_EVICTS 256u
static struct conntrack_gc_work conntrack_gc_work;
......@@ -936,13 +939,13 @@ static noinline int early_drop(struct net *net, unsigned int _hash)
static void gc_worker(struct work_struct *work)
{
unsigned int i, goal, buckets = 0, expired_count = 0;
unsigned long next_run = GC_INTERVAL;
unsigned int ratio, scanned = 0;
struct conntrack_gc_work *gc_work;
unsigned int ratio, scanned = 0;
unsigned long next_run;
gc_work = container_of(work, struct conntrack_gc_work, dwork.work);
goal = min(nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV, GC_MAX_BUCKETS);
goal = nf_conntrack_htable_size / GC_MAX_BUCKETS_DIV;
i = gc_work->last_bucket;
do {
......@@ -982,17 +985,47 @@ static void gc_worker(struct work_struct *work)
if (gc_work->exiting)
return;
/*
* Eviction will normally happen from the packet path, and not
* from this gc worker.
*
* This worker is only here to reap expired entries when system went
* idle after a busy period.
*
* The heuristics below are supposed to balance conflicting goals:
*
* 1. Minimize time until we notice a stale entry
* 2. Maximize scan intervals to not waste cycles
*
* Normally, expired_count will be 0, this increases the next_run time
* to priorize 2) above.
*
* As soon as a timed-out entry is found, move towards 1) and increase
* the scan frequency.
* In case we have lots of evictions next scan is done immediately.
*/
ratio = scanned ? expired_count * 100 / scanned : 0;
if (ratio >= 90 || expired_count == GC_MAX_EVICTS)
if (ratio >= 90 || expired_count == GC_MAX_EVICTS) {
gc_work->next_gc_run = 0;
next_run = 0;
} else if (expired_count) {
gc_work->next_gc_run /= 2U;
next_run = msecs_to_jiffies(1);
} else {
if (gc_work->next_gc_run < GC_INTERVAL_MAX)
gc_work->next_gc_run += msecs_to_jiffies(1);
next_run = gc_work->next_gc_run;
}
gc_work->last_bucket = i;
schedule_delayed_work(&gc_work->dwork, next_run);
queue_delayed_work(system_long_wq, &gc_work->dwork, next_run);
}
static void conntrack_gc_work_init(struct conntrack_gc_work *gc_work)
{
INIT_DELAYED_WORK(&gc_work->dwork, gc_worker);
gc_work->next_gc_run = GC_INTERVAL_MAX;
gc_work->exiting = false;
}
......@@ -1885,7 +1918,7 @@ int nf_conntrack_init_start(void)
nf_ct_untracked_status_or(IPS_CONFIRMED | IPS_UNTRACKED);
conntrack_gc_work_init(&conntrack_gc_work);
schedule_delayed_work(&conntrack_gc_work.dwork, GC_INTERVAL);
queue_delayed_work(system_long_wq, &conntrack_gc_work.dwork, GC_INTERVAL_MAX);
return 0;
......
......@@ -138,9 +138,14 @@ __nf_conntrack_helper_find(const char *name, u16 l3num, u8 protonum)
for (i = 0; i < nf_ct_helper_hsize; i++) {
hlist_for_each_entry_rcu(h, &nf_ct_helper_hash[i], hnode) {
if (!strcmp(h->name, name) &&
h->tuple.src.l3num == l3num &&
h->tuple.dst.protonum == protonum)
if (strcmp(h->name, name))
continue;
if (h->tuple.src.l3num != NFPROTO_UNSPEC &&
h->tuple.src.l3num != l3num)
continue;
if (h->tuple.dst.protonum == protonum)
return h;
}
}
......
......@@ -1436,9 +1436,12 @@ static int process_sip_request(struct sk_buff *skb, unsigned int protoff,
handler = &sip_handlers[i];
if (handler->request == NULL)
continue;
if (*datalen < handler->len ||
if (*datalen < handler->len + 2 ||
strncasecmp(*dptr, handler->method, handler->len))
continue;
if ((*dptr)[handler->len] != ' ' ||
!isalpha((*dptr)[handler->len+1]))
continue;
if (ct_sip_get_header(ct, *dptr, 0, *datalen, SIP_HDR_CSEQ,
&matchoff, &matchlen) <= 0) {
......
......@@ -2956,12 +2956,14 @@ static int nf_tables_newset(struct net *net, struct sock *nlsk,
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
goto err2;
goto err3;
list_add_tail_rcu(&set->list, &table->sets);
table->use++;
return 0;
err3:
ops->destroy(set);
err2:
kfree(set);
err1:
......@@ -3452,14 +3454,15 @@ void *nft_set_elem_init(const struct nft_set *set,
return elem;
}
void nft_set_elem_destroy(const struct nft_set *set, void *elem)
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
nft_data_uninit(nft_set_ext_key(ext), NFT_DATA_VALUE);
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_uninit(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
if (destroy_expr && nft_set_ext_exists(ext, NFT_SET_EXT_EXPR))
nf_tables_expr_destroy(NULL, nft_set_ext_expr(ext));
kfree(elem);
......@@ -3565,6 +3568,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
dreg = nft_type_to_reg(set->dtype);
list_for_each_entry(binding, &set->bindings, list) {
struct nft_ctx bind_ctx = {
.net = ctx->net,
.afi = ctx->afi,
.table = ctx->table,
.chain = (struct nft_chain *)binding->chain,
......@@ -3812,7 +3816,7 @@ void nft_set_gc_batch_release(struct rcu_head *rcu)
gcb = container_of(rcu, struct nft_set_gc_batch, head.rcu);
for (i = 0; i < gcb->head.cnt; i++)
nft_set_elem_destroy(gcb->head.set, gcb->elems[i]);
nft_set_elem_destroy(gcb->head.set, gcb->elems[i], true);
kfree(gcb);
}
EXPORT_SYMBOL_GPL(nft_set_gc_batch_release);
......@@ -4030,7 +4034,7 @@ static void nf_tables_commit_release(struct nft_trans *trans)
break;
case NFT_MSG_DELSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
nft_trans_elem(trans).priv);
nft_trans_elem(trans).priv, true);
break;
}
kfree(trans);
......@@ -4171,7 +4175,7 @@ static void nf_tables_abort_release(struct nft_trans *trans)
break;
case NFT_MSG_NEWSETELEM:
nft_set_elem_destroy(nft_trans_elem_set(trans),
nft_trans_elem(trans).priv);
nft_trans_elem(trans).priv, true);
break;
}
kfree(trans);
......@@ -4421,7 +4425,7 @@ static int nf_tables_check_loops(const struct nft_ctx *ctx,
* Otherwise a 0 is returned and the attribute value is stored in the
* destination variable.
*/
unsigned int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
int nft_parse_u32_check(const struct nlattr *attr, int max, u32 *dest)
{
u32 val;
......
......@@ -44,18 +44,22 @@ static void *nft_dynset_new(struct nft_set *set, const struct nft_expr *expr,
&regs->data[priv->sreg_key],
&regs->data[priv->sreg_data],
timeout, GFP_ATOMIC);
if (elem == NULL) {
if (set->size)
atomic_dec(&set->nelems);
return NULL;
}
if (elem == NULL)
goto err1;
ext = nft_set_elem_ext(set, elem);
if (priv->expr != NULL &&
nft_expr_clone(nft_set_ext_expr(ext), priv->expr) < 0)
return NULL;
goto err2;
return elem;
err2:
nft_set_elem_destroy(set, elem, false);
err1:
if (set->size)
atomic_dec(&set->nelems);
return NULL;
}
static void nft_dynset_eval(const struct nft_expr *expr,
......@@ -139,6 +143,9 @@ static int nft_dynset_init(const struct nft_ctx *ctx,
return PTR_ERR(set);
}
if (set->ops->update == NULL)
return -EOPNOTSUPP;
if (set->flags & NFT_SET_CONSTANT)
return -EBUSY;
......
......@@ -98,7 +98,7 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
const struct nft_set_ext **ext)
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
struct nft_hash_elem *he, *prev;
struct nft_hash_cmp_arg arg = {
.genmask = NFT_GENMASK_ANY,
.set = set,
......@@ -112,15 +112,24 @@ static bool nft_hash_update(struct nft_set *set, const u32 *key,
he = new(set, expr, regs);
if (he == NULL)
goto err1;
if (rhashtable_lookup_insert_key(&priv->ht, &arg, &he->node,
nft_hash_params))
prev = rhashtable_lookup_get_insert_key(&priv->ht, &arg, &he->node,
nft_hash_params);
if (IS_ERR(prev))
goto err2;
/* Another cpu may race to insert the element with the same key */
if (prev) {
nft_set_elem_destroy(set, he, true);
he = prev;
}
out:
*ext = &he->ext;
return true;
err2:
nft_set_elem_destroy(set, he);
nft_set_elem_destroy(set, he, true);
err1:
return false;
}
......@@ -332,7 +341,7 @@ static int nft_hash_init(const struct nft_set *set,
static void nft_hash_elem_destroy(void *ptr, void *arg)
{
nft_set_elem_destroy((const struct nft_set *)arg, ptr);
nft_set_elem_destroy((const struct nft_set *)arg, ptr, true);
}
static void nft_hash_destroy(const struct nft_set *set)
......
......@@ -266,7 +266,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nft_set_elem_destroy(set, rbe);
nft_set_elem_destroy(set, rbe, true);
}
}
......
......@@ -44,7 +44,7 @@ connmark_tg(struct sk_buff *skb, const struct xt_action_param *par)
u_int32_t newmark;
ct = nf_ct_get(skb, &ctinfo);
if (ct == NULL)
if (ct == NULL || nf_ct_is_untracked(ct))
return XT_CONTINUE;
switch (info->mode) {
......@@ -97,7 +97,7 @@ connmark_mt(const struct sk_buff *skb, struct xt_action_param *par)
const struct nf_conn *ct;
ct = nf_ct_get(skb, &ctinfo);
if (ct == NULL)
if (ct == NULL || nf_ct_is_untracked(ct))
return false;
return ((ct->mark & info->mask) == info->mark) ^ info->invert;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册