提交 8fb91c35 编写于 作者: D David S. Miller

Merge branch 'inet-frags-avoid-possible-races-at-netns-dismantle'

Eric Dumazet says:

====================
inet: frags: avoid possible races at netns dismantle

This patch series fixes a race happening on netns dismantle with
frag queues. While rhashtable_free_and_destroy() is running,
concurrent timers might run inet_frag_kill() and attempt
rhashtable_remove_fast() calls. This is not allowed by
rhashtable logic.

Since I do not want to add expensive synchronize_rcu() calls
in the netns dismantle path, I had to no longer inline
netns_frags structures, but dynamically allocate them.

The ten first patches make this preparation, so that
the last patch clearly shows the fix.

As this patch series is not exactly trivial, I chose to
target 5.3. We will backport it once soaked a bit.
====================
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
......@@ -4,18 +4,22 @@
#include <linux/rhashtable-types.h>
struct netns_frags {
/* Per netns frag queues directory */
struct fqdir {
/* sysctls */
long high_thresh;
long low_thresh;
int timeout;
int max_dist;
struct inet_frags *f;
struct net *net;
bool dead;
struct rhashtable rhashtable ____cacheline_aligned_in_smp;
/* Keep atomic mem on separate cachelines in structs that include it */
atomic_long_t mem ____cacheline_aligned_in_smp;
struct rcu_work destroy_rwork;
};
/**
......@@ -24,11 +28,13 @@ struct netns_frags {
* @INET_FRAG_FIRST_IN: first fragment has arrived
* @INET_FRAG_LAST_IN: final fragment has arrived
* @INET_FRAG_COMPLETE: frag queue has been processed and is due for destruction
* @INET_FRAG_HASH_DEAD: inet_frag_kill() has not removed fq from rhashtable
*/
enum {
INET_FRAG_FIRST_IN = BIT(0),
INET_FRAG_LAST_IN = BIT(1),
INET_FRAG_COMPLETE = BIT(2),
INET_FRAG_HASH_DEAD = BIT(3),
};
struct frag_v4_compare_key {
......@@ -64,7 +70,7 @@ struct frag_v6_compare_key {
* @meat: length of received fragments so far
* @flags: fragment queue flags
* @max_size: maximum received fragment size
* @net: namespace that this frag belongs to
* @fqdir: pointer to struct fqdir
* @rcu: rcu head for freeing deferall
*/
struct inet_frag_queue {
......@@ -84,7 +90,7 @@ struct inet_frag_queue {
int meat;
__u8 flags;
u16 max_size;
struct netns_frags *net;
struct fqdir *fqdir;
struct rcu_head rcu;
};
......@@ -103,16 +109,30 @@ struct inet_frags {
int inet_frags_init(struct inet_frags *);
void inet_frags_fini(struct inet_frags *);
static inline int inet_frags_init_net(struct netns_frags *nf)
static inline int fqdir_init(struct fqdir **fqdirp, struct inet_frags *f,
struct net *net)
{
atomic_long_set(&nf->mem, 0);
return rhashtable_init(&nf->rhashtable, &nf->f->rhash_params);
struct fqdir *fqdir = kzalloc(sizeof(*fqdir), GFP_KERNEL);
int res;
if (!fqdir)
return -ENOMEM;
fqdir->f = f;
fqdir->net = net;
res = rhashtable_init(&fqdir->rhashtable, &fqdir->f->rhash_params);
if (res < 0) {
kfree(fqdir);
return res;
}
*fqdirp = fqdir;
return 0;
}
void inet_frags_exit_net(struct netns_frags *nf);
void fqdir_exit(struct fqdir *fqdir);
void inet_frag_kill(struct inet_frag_queue *q);
void inet_frag_destroy(struct inet_frag_queue *q);
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key);
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key);
/* Free all skbs in the queue; return the sum of their truesizes. */
unsigned int inet_frag_rbtree_purge(struct rb_root *root);
......@@ -125,19 +145,19 @@ static inline void inet_frag_put(struct inet_frag_queue *q)
/* Memory Tracking Functions. */
static inline long frag_mem_limit(const struct netns_frags *nf)
static inline long frag_mem_limit(const struct fqdir *fqdir)
{
return atomic_long_read(&nf->mem);
return atomic_long_read(&fqdir->mem);
}
static inline void sub_frag_mem_limit(struct netns_frags *nf, long val)
static inline void sub_frag_mem_limit(struct fqdir *fqdir, long val)
{
atomic_long_sub(val, &nf->mem);
atomic_long_sub(val, &fqdir->mem);
}
static inline void add_frag_mem_limit(struct netns_frags *nf, long val)
static inline void add_frag_mem_limit(struct fqdir *fqdir, long val)
{
atomic_long_add(val, &nf->mem);
atomic_long_add(val, &fqdir->mem);
}
/* RFC 3168 support :
......
......@@ -16,7 +16,7 @@ struct netns_sysctl_lowpan {
struct netns_ieee802154_lowpan {
struct netns_sysctl_lowpan sysctl;
struct netns_frags frags;
struct fqdir *fqdir;
};
#endif
......@@ -72,7 +72,7 @@ struct netns_ipv4 {
struct inet_peer_base *peers;
struct sock * __percpu *tcp_sk;
struct netns_frags frags;
struct fqdir *fqdir;
#ifdef CONFIG_NETFILTER
struct xt_table *iptable_filter;
struct xt_table *iptable_mangle;
......
......@@ -58,7 +58,7 @@ struct netns_ipv6 {
struct ipv6_devconf *devconf_all;
struct ipv6_devconf *devconf_dflt;
struct inet_peer_base *peers;
struct netns_frags frags;
struct fqdir *fqdir;
#ifdef CONFIG_NETFILTER
struct xt_table *ip6table_filter;
struct xt_table *ip6table_mangle;
......@@ -116,7 +116,7 @@ struct netns_ipv6 {
#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6)
struct netns_nf_frag {
struct netns_frags frags;
struct fqdir *fqdir;
};
#endif
......
......@@ -79,7 +79,7 @@ fq_find(struct net *net, const struct lowpan_802154_cb *cb,
key.src = *src;
key.dst = *dst;
q = inet_frag_find(&ieee802154_lowpan->frags, &key);
q = inet_frag_find(ieee802154_lowpan->fqdir, &key);
if (!q)
return NULL;
......@@ -139,7 +139,7 @@ static int lowpan_frag_queue(struct lowpan_frag_queue *fq,
fq->q.flags |= INET_FRAG_FIRST_IN;
fq->q.meat += skb->len;
add_frag_mem_limit(fq->q.net, skb->truesize);
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
if (fq->q.flags == (INET_FRAG_FIRST_IN | INET_FRAG_LAST_IN) &&
fq->q.meat == fq->q.len) {
......@@ -326,23 +326,18 @@ int lowpan_frag_rcv(struct sk_buff *skb, u8 frag_type)
static struct ctl_table lowpan_frags_ns_ctl_table[] = {
{
.procname = "6lowpanfrag_high_thresh",
.data = &init_net.ieee802154_lowpan.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ieee802154_lowpan.frags.low_thresh
},
{
.procname = "6lowpanfrag_low_thresh",
.data = &init_net.ieee802154_lowpan.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ieee802154_lowpan.frags.high_thresh
},
{
.procname = "6lowpanfrag_time",
.data = &init_net.ieee802154_lowpan.frags.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
......@@ -377,17 +372,17 @@ static int __net_init lowpan_frags_ns_sysctl_register(struct net *net)
if (table == NULL)
goto err_alloc;
table[0].data = &ieee802154_lowpan->frags.high_thresh;
table[0].extra1 = &ieee802154_lowpan->frags.low_thresh;
table[1].data = &ieee802154_lowpan->frags.low_thresh;
table[1].extra2 = &ieee802154_lowpan->frags.high_thresh;
table[2].data = &ieee802154_lowpan->frags.timeout;
/* Don't export sysctls to unprivileged users */
if (net->user_ns != &init_user_ns)
table[0].procname = NULL;
}
table[0].data = &ieee802154_lowpan->fqdir->high_thresh;
table[0].extra1 = &ieee802154_lowpan->fqdir->low_thresh;
table[1].data = &ieee802154_lowpan->fqdir->low_thresh;
table[1].extra2 = &ieee802154_lowpan->fqdir->high_thresh;
table[2].data = &ieee802154_lowpan->fqdir->timeout;
hdr = register_net_sysctl(net, "net/ieee802154/6lowpan", table);
if (hdr == NULL)
goto err_reg;
......@@ -454,17 +449,18 @@ static int __net_init lowpan_frags_init_net(struct net *net)
net_ieee802154_lowpan(net);
int res;
ieee802154_lowpan->frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->frags.low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->frags.timeout = IPV6_FRAG_TIMEOUT;
ieee802154_lowpan->frags.f = &lowpan_frags;
res = inet_frags_init_net(&ieee802154_lowpan->frags);
res = fqdir_init(&ieee802154_lowpan->fqdir, &lowpan_frags, net);
if (res < 0)
return res;
ieee802154_lowpan->fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
ieee802154_lowpan->fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
ieee802154_lowpan->fqdir->timeout = IPV6_FRAG_TIMEOUT;
res = lowpan_frags_ns_sysctl_register(net);
if (res < 0)
inet_frags_exit_net(&ieee802154_lowpan->frags);
fqdir_exit(ieee802154_lowpan->fqdir);
return res;
}
......@@ -474,7 +470,7 @@ static void __net_exit lowpan_frags_exit_net(struct net *net)
net_ieee802154_lowpan(net);
lowpan_frags_ns_sysctl_unregister(net);
inet_frags_exit_net(&ieee802154_lowpan->frags);
fqdir_exit(ieee802154_lowpan->fqdir);
}
static struct pernet_operations lowpan_frags_ops = {
......
......@@ -124,34 +124,50 @@ void inet_frags_fini(struct inet_frags *f)
}
EXPORT_SYMBOL(inet_frags_fini);
/* called from rhashtable_free_and_destroy() at netns_frags dismantle */
static void inet_frags_free_cb(void *ptr, void *arg)
{
struct inet_frag_queue *fq = ptr;
int count;
/* If we can not cancel the timer, it means this frag_queue
* is already disappearing, we have nothing to do.
* Otherwise, we own a refcount until the end of this function.
*/
if (!del_timer(&fq->timer))
return;
count = del_timer_sync(&fq->timer) ? 1 : 0;
spin_lock_bh(&fq->lock);
if (!(fq->flags & INET_FRAG_COMPLETE)) {
fq->flags |= INET_FRAG_COMPLETE;
refcount_dec(&fq->refcnt);
count++;
} else if (fq->flags & INET_FRAG_HASH_DEAD) {
count++;
}
spin_unlock_bh(&fq->lock);
inet_frag_put(fq);
if (refcount_sub_and_test(count, &fq->refcnt))
inet_frag_destroy(fq);
}
static void fqdir_rwork_fn(struct work_struct *work)
{
struct fqdir *fqdir = container_of(to_rcu_work(work),
struct fqdir, destroy_rwork);
rhashtable_free_and_destroy(&fqdir->rhashtable, inet_frags_free_cb, NULL);
kfree(fqdir);
}
void inet_frags_exit_net(struct netns_frags *nf)
void fqdir_exit(struct fqdir *fqdir)
{
nf->high_thresh = 0; /* prevent creation of new frags */
fqdir->high_thresh = 0; /* prevent creation of new frags */
/* paired with READ_ONCE() in inet_frag_kill() :
* We want to prevent rhashtable_remove_fast() calls
*/
smp_store_release(&fqdir->dead, true);
INIT_RCU_WORK(&fqdir->destroy_rwork, fqdir_rwork_fn);
queue_rcu_work(system_wq, &fqdir->destroy_rwork);
rhashtable_free_and_destroy(&nf->rhashtable, inet_frags_free_cb, NULL);
}
EXPORT_SYMBOL(inet_frags_exit_net);
EXPORT_SYMBOL(fqdir_exit);
void inet_frag_kill(struct inet_frag_queue *fq)
{
......@@ -159,11 +175,21 @@ void inet_frag_kill(struct inet_frag_queue *fq)
refcount_dec(&fq->refcnt);
if (!(fq->flags & INET_FRAG_COMPLETE)) {
struct netns_frags *nf = fq->net;
struct fqdir *fqdir = fq->fqdir;
fq->flags |= INET_FRAG_COMPLETE;
rhashtable_remove_fast(&nf->rhashtable, &fq->node, nf->f->rhash_params);
refcount_dec(&fq->refcnt);
rcu_read_lock();
/* This READ_ONCE() is paired with smp_store_release()
* in inet_frags_exit_net().
*/
if (!READ_ONCE(fqdir->dead)) {
rhashtable_remove_fast(&fqdir->rhashtable, &fq->node,
fqdir->f->rhash_params);
refcount_dec(&fq->refcnt);
} else {
fq->flags |= INET_FRAG_HASH_DEAD;
}
rcu_read_unlock();
}
}
EXPORT_SYMBOL(inet_frag_kill);
......@@ -172,7 +198,7 @@ static void inet_frag_destroy_rcu(struct rcu_head *head)
{
struct inet_frag_queue *q = container_of(head, struct inet_frag_queue,
rcu);
struct inet_frags *f = q->net->f;
struct inet_frags *f = q->fqdir->f;
if (f->destructor)
f->destructor(q);
......@@ -203,7 +229,7 @@ EXPORT_SYMBOL(inet_frag_rbtree_purge);
void inet_frag_destroy(struct inet_frag_queue *q)
{
struct netns_frags *nf;
struct fqdir *fqdir;
unsigned int sum, sum_truesize = 0;
struct inet_frags *f;
......@@ -211,18 +237,18 @@ void inet_frag_destroy(struct inet_frag_queue *q)
WARN_ON(del_timer(&q->timer) != 0);
/* Release all fragment data. */
nf = q->net;
f = nf->f;
fqdir = q->fqdir;
f = fqdir->f;
sum_truesize = inet_frag_rbtree_purge(&q->rb_fragments);
sum = sum_truesize + f->qsize;
call_rcu(&q->rcu, inet_frag_destroy_rcu);
sub_frag_mem_limit(nf, sum);
sub_frag_mem_limit(fqdir, sum);
}
EXPORT_SYMBOL(inet_frag_destroy);
static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
static struct inet_frag_queue *inet_frag_alloc(struct fqdir *fqdir,
struct inet_frags *f,
void *arg)
{
......@@ -232,9 +258,9 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
if (!q)
return NULL;
q->net = nf;
q->fqdir = fqdir;
f->constructor(q, arg);
add_frag_mem_limit(nf, f->qsize);
add_frag_mem_limit(fqdir, f->qsize);
timer_setup(&q->timer, f->frag_expire, 0);
spin_lock_init(&q->lock);
......@@ -243,21 +269,21 @@ static struct inet_frag_queue *inet_frag_alloc(struct netns_frags *nf,
return q;
}
static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
static struct inet_frag_queue *inet_frag_create(struct fqdir *fqdir,
void *arg,
struct inet_frag_queue **prev)
{
struct inet_frags *f = nf->f;
struct inet_frags *f = fqdir->f;
struct inet_frag_queue *q;
q = inet_frag_alloc(nf, f, arg);
q = inet_frag_alloc(fqdir, f, arg);
if (!q) {
*prev = ERR_PTR(-ENOMEM);
return NULL;
}
mod_timer(&q->timer, jiffies + nf->timeout);
mod_timer(&q->timer, jiffies + fqdir->timeout);
*prev = rhashtable_lookup_get_insert_key(&nf->rhashtable, &q->key,
*prev = rhashtable_lookup_get_insert_key(&fqdir->rhashtable, &q->key,
&q->node, f->rhash_params);
if (*prev) {
q->flags |= INET_FRAG_COMPLETE;
......@@ -269,18 +295,18 @@ static struct inet_frag_queue *inet_frag_create(struct netns_frags *nf,
}
/* TODO : call from rcu_read_lock() and no longer use refcount_inc_not_zero() */
struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, void *key)
struct inet_frag_queue *inet_frag_find(struct fqdir *fqdir, void *key)
{
struct inet_frag_queue *fq = NULL, *prev;
if (!nf->high_thresh || frag_mem_limit(nf) > nf->high_thresh)
if (!fqdir->high_thresh || frag_mem_limit(fqdir) > fqdir->high_thresh)
return NULL;
rcu_read_lock();
prev = rhashtable_lookup(&nf->rhashtable, key, nf->f->rhash_params);
prev = rhashtable_lookup(&fqdir->rhashtable, key, fqdir->f->rhash_params);
if (!prev)
fq = inet_frag_create(nf, key, &prev);
fq = inet_frag_create(fqdir, key, &prev);
if (prev && !IS_ERR(prev)) {
fq = prev;
if (!refcount_inc_not_zero(&fq->refcnt))
......@@ -391,7 +417,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
delta += head->truesize;
if (delta)
add_frag_mem_limit(q->net, delta);
add_frag_mem_limit(q->fqdir, delta);
/* If the first fragment is fragmented itself, we split
* it to two chunks: the first with data and paged part
......@@ -413,7 +439,7 @@ void *inet_frag_reasm_prepare(struct inet_frag_queue *q, struct sk_buff *skb,
head->truesize += clone->truesize;
clone->csum = 0;
clone->ip_summed = head->ip_summed;
add_frag_mem_limit(q->net, clone->truesize);
add_frag_mem_limit(q->fqdir, clone->truesize);
skb_shinfo(head)->frag_list = clone;
nextp = &clone->next;
} else {
......@@ -466,7 +492,7 @@ void inet_frag_reasm_finish(struct inet_frag_queue *q, struct sk_buff *head,
rbn = rbnext;
}
}
sub_frag_mem_limit(q->net, head->truesize);
sub_frag_mem_limit(q->fqdir, head->truesize);
*nextp = NULL;
skb_mark_not_on_list(head);
......@@ -494,7 +520,7 @@ struct sk_buff *inet_frag_pull_head(struct inet_frag_queue *q)
if (head == q->fragments_tail)
q->fragments_tail = NULL;
sub_frag_mem_limit(q->net, head->truesize);
sub_frag_mem_limit(q->fqdir, head->truesize);
return head;
}
......
......@@ -82,15 +82,13 @@ static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
static void ip4_frag_init(struct inet_frag_queue *q, const void *a)
{
struct ipq *qp = container_of(q, struct ipq, q);
struct netns_ipv4 *ipv4 = container_of(q->net, struct netns_ipv4,
frags);
struct net *net = container_of(ipv4, struct net, ipv4);
struct net *net = q->fqdir->net;
const struct frag_v4_compare_key *key = a;
q->key.v4 = *key;
qp->ecn = 0;
qp->peer = q->net->max_dist ?
qp->peer = q->fqdir->max_dist ?
inet_getpeer_v4(net->ipv4.peers, key->saddr, key->vif, 1) :
NULL;
}
......@@ -142,7 +140,7 @@ static void ip_expire(struct timer_list *t)
int err;
qp = container_of(frag, struct ipq, q);
net = container_of(qp->q.net, struct net, ipv4.frags);
net = qp->q.fqdir->net;
rcu_read_lock();
spin_lock(&qp->q.lock);
......@@ -211,7 +209,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
};
struct inet_frag_queue *q;
q = inet_frag_find(&net->ipv4.frags, &key);
q = inet_frag_find(net->ipv4.fqdir, &key);
if (!q)
return NULL;
......@@ -222,7 +220,7 @@ static struct ipq *ip_find(struct net *net, struct iphdr *iph,
static int ip_frag_too_far(struct ipq *qp)
{
struct inet_peer *peer = qp->peer;
unsigned int max = qp->q.net->max_dist;
unsigned int max = qp->q.fqdir->max_dist;
unsigned int start, end;
int rc;
......@@ -236,12 +234,8 @@ static int ip_frag_too_far(struct ipq *qp)
rc = qp->q.fragments_tail && (end - start) > max;
if (rc) {
struct net *net;
net = container_of(qp->q.net, struct net, ipv4.frags);
__IP_INC_STATS(net, IPSTATS_MIB_REASMFAILS);
}
if (rc)
__IP_INC_STATS(qp->q.fqdir->net, IPSTATS_MIB_REASMFAILS);
return rc;
}
......@@ -250,13 +244,13 @@ static int ip_frag_reinit(struct ipq *qp)
{
unsigned int sum_truesize = 0;
if (!mod_timer(&qp->q.timer, jiffies + qp->q.net->timeout)) {
if (!mod_timer(&qp->q.timer, jiffies + qp->q.fqdir->timeout)) {
refcount_inc(&qp->q.refcnt);
return -ETIMEDOUT;
}
sum_truesize = inet_frag_rbtree_purge(&qp->q.rb_fragments);
sub_frag_mem_limit(qp->q.net, sum_truesize);
sub_frag_mem_limit(qp->q.fqdir, sum_truesize);
qp->q.flags = 0;
qp->q.len = 0;
......@@ -273,7 +267,7 @@ static int ip_frag_reinit(struct ipq *qp)
/* Add new segment to existing queue. */
static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
{
struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
struct net *net = qp->q.fqdir->net;
int ihl, end, flags, offset;
struct sk_buff *prev_tail;
struct net_device *dev;
......@@ -352,7 +346,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
qp->q.stamp = skb->tstamp;
qp->q.meat += skb->len;
qp->ecn |= ecn;
add_frag_mem_limit(qp->q.net, skb->truesize);
add_frag_mem_limit(qp->q.fqdir, skb->truesize);
if (offset == 0)
qp->q.flags |= INET_FRAG_FIRST_IN;
......@@ -399,7 +393,7 @@ static int ip_frag_queue(struct ipq *qp, struct sk_buff *skb)
static int ip_frag_reasm(struct ipq *qp, struct sk_buff *skb,
struct sk_buff *prev_tail, struct net_device *dev)
{
struct net *net = container_of(qp->q.net, struct net, ipv4.frags);
struct net *net = qp->q.fqdir->net;
struct iphdr *iph;
void *reasm_data;
int len, err;
......@@ -544,30 +538,24 @@ static int dist_min;
static struct ctl_table ip4_frags_ns_ctl_table[] = {
{
.procname = "ipfrag_high_thresh",
.data = &init_net.ipv4.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv4.frags.low_thresh
},
{
.procname = "ipfrag_low_thresh",
.data = &init_net.ipv4.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ipv4.frags.high_thresh
},
{
.procname = "ipfrag_time",
.data = &init_net.ipv4.frags.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "ipfrag_max_dist",
.data = &init_net.ipv4.frags.max_dist,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_minmax,
......@@ -600,13 +588,13 @@ static int __net_init ip4_frags_ns_ctl_register(struct net *net)
if (!table)
goto err_alloc;
table[0].data = &net->ipv4.frags.high_thresh;
table[0].extra1 = &net->ipv4.frags.low_thresh;
table[1].data = &net->ipv4.frags.low_thresh;
table[1].extra2 = &net->ipv4.frags.high_thresh;
table[2].data = &net->ipv4.frags.timeout;
table[3].data = &net->ipv4.frags.max_dist;
}
table[0].data = &net->ipv4.fqdir->high_thresh;
table[0].extra1 = &net->ipv4.fqdir->low_thresh;
table[1].data = &net->ipv4.fqdir->low_thresh;
table[1].extra2 = &net->ipv4.fqdir->high_thresh;
table[2].data = &net->ipv4.fqdir->timeout;
table[3].data = &net->ipv4.fqdir->max_dist;
hdr = register_net_sysctl(net, "net/ipv4", table);
if (!hdr)
......@@ -654,6 +642,9 @@ static int __net_init ipv4_frags_init_net(struct net *net)
{
int res;
res = fqdir_init(&net->ipv4.fqdir, &ip4_frags, net);
if (res < 0)
return res;
/* Fragment cache limits.
*
* The fragment memory accounting code, (tries to) account for
......@@ -668,31 +659,27 @@ static int __net_init ipv4_frags_init_net(struct net *net)
* we will prune down to 3MB, making room for approx 8 big 64K
* fragments 8x128k.
*/
net->ipv4.frags.high_thresh = 4 * 1024 * 1024;
net->ipv4.frags.low_thresh = 3 * 1024 * 1024;
net->ipv4.fqdir->high_thresh = 4 * 1024 * 1024;
net->ipv4.fqdir->low_thresh = 3 * 1024 * 1024;
/*
* Important NOTE! Fragment queue must be destroyed before MSL expires.
* RFC791 is wrong proposing to prolongate timer each fragment arrival
* by TTL.
*/
net->ipv4.frags.timeout = IP_FRAG_TIME;
net->ipv4.fqdir->timeout = IP_FRAG_TIME;
net->ipv4.frags.max_dist = 64;
net->ipv4.frags.f = &ip4_frags;
net->ipv4.fqdir->max_dist = 64;
res = inet_frags_init_net(&net->ipv4.frags);
if (res < 0)
return res;
res = ip4_frags_ns_ctl_register(net);
if (res < 0)
inet_frags_exit_net(&net->ipv4.frags);
fqdir_exit(net->ipv4.fqdir);
return res;
}
static void __net_exit ipv4_frags_exit_net(struct net *net)
{
ip4_frags_ns_ctl_unregister(net);
inet_frags_exit_net(&net->ipv4.frags);
fqdir_exit(net->ipv4.fqdir);
}
static struct pernet_operations ip4_frags_ops = {
......
......@@ -72,8 +72,8 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "RAW: inuse %d\n",
sock_prot_inuse_get(net, &raw_prot));
seq_printf(seq, "FRAG: inuse %u memory %lu\n",
atomic_read(&net->ipv4.frags.rhashtable.nelems),
frag_mem_limit(&net->ipv4.frags));
atomic_read(&net->ipv4.fqdir->rhashtable.nelems),
frag_mem_limit(net->ipv4.fqdir));
return 0;
}
......
......@@ -58,26 +58,21 @@ static struct inet_frags nf_frags;
static struct ctl_table nf_ct_frag6_sysctl_table[] = {
{
.procname = "nf_conntrack_frag6_timeout",
.data = &init_net.nf_frag.frags.timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{
.procname = "nf_conntrack_frag6_low_thresh",
.data = &init_net.nf_frag.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.nf_frag.frags.high_thresh
},
{
.procname = "nf_conntrack_frag6_high_thresh",
.data = &init_net.nf_frag.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.nf_frag.frags.low_thresh
},
{ }
};
......@@ -93,15 +88,15 @@ static int nf_ct_frag6_sysctl_register(struct net *net)
GFP_KERNEL);
if (table == NULL)
goto err_alloc;
table[0].data = &net->nf_frag.frags.timeout;
table[1].data = &net->nf_frag.frags.low_thresh;
table[1].extra2 = &net->nf_frag.frags.high_thresh;
table[2].data = &net->nf_frag.frags.high_thresh;
table[2].extra1 = &net->nf_frag.frags.low_thresh;
table[2].extra2 = &init_net.nf_frag.frags.high_thresh;
}
table[0].data = &net->nf_frag.fqdir->timeout;
table[1].data = &net->nf_frag.fqdir->low_thresh;
table[1].extra2 = &net->nf_frag.fqdir->high_thresh;
table[2].data = &net->nf_frag.fqdir->high_thresh;
table[2].extra1 = &net->nf_frag.fqdir->low_thresh;
table[2].extra2 = &init_net.nf_frag.fqdir->high_thresh;
hdr = register_net_sysctl(net, "net/netfilter", table);
if (hdr == NULL)
goto err_reg;
......@@ -148,12 +143,10 @@ static void nf_ct_frag6_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
struct net *net;
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, nf_frag.frags);
ip6frag_expire_frag_queue(net, fq);
ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
}
/* Creation primitives. */
......@@ -169,7 +162,7 @@ static struct frag_queue *fq_find(struct net *net, __be32 id, u32 user,
};
struct inet_frag_queue *q;
q = inet_frag_find(&net->nf_frag.frags, &key);
q = inet_frag_find(net->nf_frag.fqdir, &key);
if (!q)
return NULL;
......@@ -276,7 +269,7 @@ static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->ecn |= ecn;
if (payload_len > fq->q.max_size)
fq->q.max_size = payload_len;
add_frag_mem_limit(fq->q.net, skb->truesize);
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
/* The first fragment.
* nhoffset is obtained from the first fragment, of course.
......@@ -496,24 +489,24 @@ static int nf_ct_net_init(struct net *net)
{
int res;
net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT;
net->nf_frag.frags.f = &nf_frags;
res = inet_frags_init_net(&net->nf_frag.frags);
res = fqdir_init(&net->nf_frag.fqdir, &nf_frags, net);
if (res < 0)
return res;
net->nf_frag.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
net->nf_frag.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
net->nf_frag.fqdir->timeout = IPV6_FRAG_TIMEOUT;
res = nf_ct_frag6_sysctl_register(net);
if (res < 0)
inet_frags_exit_net(&net->nf_frag.frags);
fqdir_exit(net->nf_frag.fqdir);
return res;
}
static void nf_ct_net_exit(struct net *net)
{
nf_ct_frags6_sysctl_unregister(net);
inet_frags_exit_net(&net->nf_frag.frags);
fqdir_exit(net->nf_frag.fqdir);
}
static struct pernet_operations nf_ct_net_ops = {
......
......@@ -48,8 +48,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(net, &rawv6_prot));
seq_printf(seq, "FRAG6: inuse %u memory %lu\n",
atomic_read(&net->ipv6.frags.rhashtable.nelems),
frag_mem_limit(&net->ipv6.frags));
atomic_read(&net->ipv6.fqdir->rhashtable.nelems),
frag_mem_limit(net->ipv6.fqdir));
return 0;
}
......
......@@ -76,12 +76,10 @@ static void ip6_frag_expire(struct timer_list *t)
{
struct inet_frag_queue *frag = from_timer(frag, t, timer);
struct frag_queue *fq;
struct net *net;
fq = container_of(frag, struct frag_queue, q);
net = container_of(fq->q.net, struct net, ipv6.frags);
ip6frag_expire_frag_queue(net, fq);
ip6frag_expire_frag_queue(fq->q.fqdir->net, fq);
}
static struct frag_queue *
......@@ -100,7 +98,7 @@ fq_find(struct net *net, __be32 id, const struct ipv6hdr *hdr, int iif)
IPV6_ADDR_LINKLOCAL)))
key.iif = 0;
q = inet_frag_find(&net->ipv6.frags, &key);
q = inet_frag_find(net->ipv6.fqdir, &key);
if (!q)
return NULL;
......@@ -200,7 +198,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
fq->q.stamp = skb->tstamp;
fq->q.meat += skb->len;
fq->ecn |= ecn;
add_frag_mem_limit(fq->q.net, skb->truesize);
add_frag_mem_limit(fq->q.fqdir, skb->truesize);
fragsize = -skb_network_offset(skb) + skb->len;
if (fragsize > fq->q.max_size)
......@@ -254,7 +252,7 @@ static int ip6_frag_queue(struct frag_queue *fq, struct sk_buff *skb,
static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *skb,
struct sk_buff *prev_tail, struct net_device *dev)
{
struct net *net = container_of(fq->q.net, struct net, ipv6.frags);
struct net *net = fq->q.fqdir->net;
unsigned int nhoff;
void *reasm_data;
int payload_len;
......@@ -401,23 +399,18 @@ static const struct inet6_protocol frag_protocol = {
static struct ctl_table ip6_frags_ns_ctl_table[] = {
{
.procname = "ip6frag_high_thresh",
.data = &init_net.ipv6.frags.high_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
.extra1 = &init_net.ipv6.frags.low_thresh
},
{
.procname = "ip6frag_low_thresh",
.data = &init_net.ipv6.frags.low_thresh,
.maxlen = sizeof(unsigned long),
.mode = 0644,
.proc_handler = proc_doulongvec_minmax,
.extra2 = &init_net.ipv6.frags.high_thresh
},
{
.procname = "ip6frag_time",
.data = &init_net.ipv6.frags.timeout,
.maxlen = sizeof(int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
......@@ -449,12 +442,12 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net)
if (!table)
goto err_alloc;
table[0].data = &net->ipv6.frags.high_thresh;
table[0].extra1 = &net->ipv6.frags.low_thresh;
table[1].data = &net->ipv6.frags.low_thresh;
table[1].extra2 = &net->ipv6.frags.high_thresh;
table[2].data = &net->ipv6.frags.timeout;
}
table[0].data = &net->ipv6.fqdir->high_thresh;
table[0].extra1 = &net->ipv6.fqdir->low_thresh;
table[1].data = &net->ipv6.fqdir->low_thresh;
table[1].extra2 = &net->ipv6.fqdir->high_thresh;
table[2].data = &net->ipv6.fqdir->timeout;
hdr = register_net_sysctl(net, "net/ipv6", table);
if (!hdr)
......@@ -517,25 +510,24 @@ static int __net_init ipv6_frags_init_net(struct net *net)
{
int res;
net->ipv6.frags.high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.frags.low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.frags.timeout = IPV6_FRAG_TIMEOUT;
net->ipv6.frags.f = &ip6_frags;
res = inet_frags_init_net(&net->ipv6.frags);
res = fqdir_init(&net->ipv6.fqdir, &ip6_frags, net);
if (res < 0)
return res;
net->ipv6.fqdir->high_thresh = IPV6_FRAG_HIGH_THRESH;
net->ipv6.fqdir->low_thresh = IPV6_FRAG_LOW_THRESH;
net->ipv6.fqdir->timeout = IPV6_FRAG_TIMEOUT;
res = ip6_frags_ns_sysctl_register(net);
if (res < 0)
inet_frags_exit_net(&net->ipv6.frags);
fqdir_exit(net->ipv6.fqdir);
return res;
}
static void __net_exit ipv6_frags_exit_net(struct net *net)
{
ip6_frags_ns_sysctl_unregister(net);
inet_frags_exit_net(&net->ipv6.frags);
fqdir_exit(net->ipv6.fqdir);
}
static struct pernet_operations ip6_frags_ops = {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册