提交 eaae44d2 编写于 作者: D David S. Miller
......@@ -56,6 +56,18 @@ static inline int hlist_nulls_empty(const struct hlist_nulls_head *h)
return is_a_nulls(h->first);
}
static inline void hlist_nulls_add_head(struct hlist_nulls_node *n,
struct hlist_nulls_head *h)
{
struct hlist_nulls_node *first = h->first;
n->next = first;
n->pprev = &h->first;
h->first = n;
if (!is_a_nulls(first))
first->pprev = &n->next;
}
static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
{
struct hlist_nulls_node *next = n->next;
......@@ -65,6 +77,12 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n)
next->pprev = pprev;
}
static inline void hlist_nulls_del(struct hlist_nulls_node *n)
{
__hlist_nulls_del(n);
n->pprev = LIST_POISON2;
}
/**
* hlist_nulls_for_each_entry - iterate over list of given type
* @tpos: the type * to use as a loop cursor.
......
......@@ -201,6 +201,8 @@ extern struct nf_conntrack_tuple_hash *
__nf_conntrack_find(struct net *net, const struct nf_conntrack_tuple *tuple);
extern void nf_conntrack_hash_insert(struct nf_conn *ct);
extern void nf_ct_delete_from_lists(struct nf_conn *ct);
extern void nf_ct_insert_dying_list(struct nf_conn *ct);
extern void nf_conntrack_flush_report(struct net *net, u32 pid, int report);
......
......@@ -6,61 +6,54 @@
#define _NF_CONNTRACK_ECACHE_H
#include <net/netfilter/nf_conntrack.h>
#include <linux/interrupt.h>
#include <net/net_namespace.h>
#include <net/netfilter/nf_conntrack_expect.h>
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/netfilter/nf_conntrack_extend.h>
/* Connection tracking event bits */
/* Connection tracking event types */
enum ip_conntrack_events
{
/* New conntrack */
IPCT_NEW_BIT = 0,
IPCT_NEW = (1 << IPCT_NEW_BIT),
/* Expected connection */
IPCT_RELATED_BIT = 1,
IPCT_RELATED = (1 << IPCT_RELATED_BIT),
/* Destroyed conntrack */
IPCT_DESTROY_BIT = 2,
IPCT_DESTROY = (1 << IPCT_DESTROY_BIT),
/* Status has changed */
IPCT_STATUS_BIT = 3,
IPCT_STATUS = (1 << IPCT_STATUS_BIT),
IPCT_NEW = 0, /* new conntrack */
IPCT_RELATED = 1, /* related conntrack */
IPCT_DESTROY = 2, /* destroyed conntrack */
IPCT_STATUS = 3, /* status has changed */
IPCT_PROTOINFO = 4, /* protocol information has changed */
IPCT_HELPER = 5, /* new helper has been set */
IPCT_MARK = 6, /* new mark has been set */
IPCT_NATSEQADJ = 7, /* NAT is doing sequence adjustment */
IPCT_SECMARK = 8, /* new security mark has been set */
};
/* Update of protocol info */
IPCT_PROTOINFO_BIT = 4,
IPCT_PROTOINFO = (1 << IPCT_PROTOINFO_BIT),
enum ip_conntrack_expect_events {
IPEXP_NEW = 0, /* new expectation */
};
/* New helper for conntrack */
IPCT_HELPER_BIT = 5,
IPCT_HELPER = (1 << IPCT_HELPER_BIT),
struct nf_conntrack_ecache {
unsigned long cache; /* bitops want long */
unsigned long missed; /* missed events */
u32 pid; /* netlink pid of destroyer */
};
/* Mark is set */
IPCT_MARK_BIT = 6,
IPCT_MARK = (1 << IPCT_MARK_BIT),
static inline struct nf_conntrack_ecache *
nf_ct_ecache_find(const struct nf_conn *ct)
{
return nf_ct_ext_find(ct, NF_CT_EXT_ECACHE);
}
/* NAT sequence adjustment */
IPCT_NATSEQADJ_BIT = 7,
IPCT_NATSEQADJ = (1 << IPCT_NATSEQADJ_BIT),
static inline struct nf_conntrack_ecache *
nf_ct_ecache_ext_add(struct nf_conn *ct, gfp_t gfp)
{
struct net *net = nf_ct_net(ct);
/* Secmark is set */
IPCT_SECMARK_BIT = 8,
IPCT_SECMARK = (1 << IPCT_SECMARK_BIT),
};
if (!net->ct.sysctl_events)
return NULL;
enum ip_conntrack_expect_events {
IPEXP_NEW_BIT = 0,
IPEXP_NEW = (1 << IPEXP_NEW_BIT),
return nf_ct_ext_add(ct, NF_CT_EXT_ECACHE, gfp);
};
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache {
struct nf_conn *ct;
unsigned int events;
};
/* This structure is passed to event handler */
struct nf_ct_event {
struct nf_conn *ct;
......@@ -76,53 +69,88 @@ extern struct nf_ct_event_notifier *nf_conntrack_event_cb;
extern int nf_conntrack_register_notifier(struct nf_ct_event_notifier *nb);
extern void nf_conntrack_unregister_notifier(struct nf_ct_event_notifier *nb);
extern void nf_ct_deliver_cached_events(const struct nf_conn *ct);
extern void __nf_ct_event_cache_init(struct nf_conn *ct);
extern void nf_ct_event_cache_flush(struct net *net);
extern void nf_ct_deliver_cached_events(struct nf_conn *ct);
static inline void
nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache;
local_bh_disable();
ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
if (ct != ecache->ct)
__nf_ct_event_cache_init(ct);
ecache->events |= event;
local_bh_enable();
struct nf_conntrack_ecache *e;
if (nf_conntrack_event_cb == NULL)
return;
e = nf_ct_ecache_find(ct);
if (e == NULL)
return;
set_bit(event, &e->cache);
}
static inline void
nf_conntrack_event_report(enum ip_conntrack_events event,
static inline int
nf_conntrack_eventmask_report(unsigned int eventmask,
struct nf_conn *ct,
u32 pid,
int report)
{
int ret = 0;
struct net *net = nf_ct_net(ct);
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
rcu_read_lock();
notify = rcu_dereference(nf_conntrack_event_cb);
if (notify == NULL)
goto out_unlock;
if (!net->ct.sysctl_events)
goto out_unlock;
e = nf_ct_ecache_find(ct);
if (e == NULL)
goto out_unlock;
if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) {
struct nf_ct_event item = {
.ct = ct,
.pid = pid,
.pid = e->pid ? e->pid : pid,
.report = report
};
notify->fcn(event, &item);
/* This is a resent of a destroy event? If so, skip missed */
unsigned long missed = e->pid ? 0 : e->missed;
ret = notify->fcn(eventmask | missed, &item);
if (unlikely(ret < 0 || missed)) {
spin_lock_bh(&ct->lock);
if (ret < 0) {
/* This is a destroy event that has been
* triggered by a process, we store the PID
* to include it in the retransmission. */
if (eventmask & (1 << IPCT_DESTROY) &&
e->pid == 0 && pid != 0)
e->pid = pid;
else
e->missed |= eventmask;
} else
e->missed &= ~missed;
spin_unlock_bh(&ct->lock);
}
}
out_unlock:
rcu_read_unlock();
return ret;
}
static inline void
static inline int
nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct,
u32 pid, int report)
{
return nf_conntrack_eventmask_report(1 << event, ct, pid, report);
}
static inline int
nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct)
{
nf_conntrack_event_report(event, ct, 0, 0);
return nf_conntrack_eventmask_report(1 << event, ct, 0, 0);
}
struct nf_exp_event {
......@@ -145,6 +173,7 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
u32 pid,
int report)
{
struct net *net = nf_ct_exp_net(exp);
struct nf_exp_event_notifier *notify;
rcu_read_lock();
......@@ -152,13 +181,16 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event,
if (notify == NULL)
goto out_unlock;
if (!net->ct.sysctl_events)
goto out_unlock;
{
struct nf_exp_event item = {
.exp = exp,
.pid = pid,
.report = report
};
notify->fcn(event, &item);
notify->fcn(1 << event, &item);
}
out_unlock:
rcu_read_unlock();
......@@ -178,12 +210,16 @@ extern void nf_conntrack_ecache_fini(struct net *net);
static inline void nf_conntrack_event_cache(enum ip_conntrack_events event,
struct nf_conn *ct) {}
static inline void nf_conntrack_event(enum ip_conntrack_events event,
struct nf_conn *ct) {}
static inline void nf_conntrack_event_report(enum ip_conntrack_events event,
static inline int nf_conntrack_eventmask_report(unsigned int eventmask,
struct nf_conn *ct,
u32 pid,
int report) {}
int report) { return 0; }
static inline int nf_conntrack_event(enum ip_conntrack_events event,
struct nf_conn *ct) { return 0; }
static inline int nf_conntrack_event_report(enum ip_conntrack_events event,
struct nf_conn *ct,
u32 pid,
int report) { return 0; }
static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {}
static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event,
struct nf_conntrack_expect *exp) {}
......@@ -191,7 +227,6 @@ static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e,
struct nf_conntrack_expect *exp,
u32 pid,
int report) {}
static inline void nf_ct_event_cache_flush(struct net *net) {}
static inline int nf_conntrack_ecache_init(struct net *net)
{
......
......@@ -8,12 +8,14 @@ enum nf_ct_ext_id
NF_CT_EXT_HELPER,
NF_CT_EXT_NAT,
NF_CT_EXT_ACCT,
NF_CT_EXT_ECACHE,
NF_CT_EXT_NUM,
};
#define NF_CT_EXT_HELPER_TYPE struct nf_conn_help
#define NF_CT_EXT_NAT_TYPE struct nf_conn_nat
#define NF_CT_EXT_ACCT_TYPE struct nf_conn_counter
#define NF_CT_EXT_ECACHE_TYPE struct nf_conntrack_ecache
/* Extensions: optional stuff which isn't permanently in struct. */
struct nf_ct_ext {
......
......@@ -50,6 +50,8 @@ extern struct nf_conn_help *nf_ct_helper_ext_add(struct nf_conn *ct, gfp_t gfp);
extern int __nf_ct_try_assign_helper(struct nf_conn *ct, gfp_t flags);
extern void nf_ct_helper_destroy(struct nf_conn *ct);
static inline struct nf_conn_help *nfct_help(const struct nf_conn *ct)
{
return nf_ct_ext_find(ct, NF_CT_EXT_HELPER);
......
......@@ -14,16 +14,17 @@ struct netns_ct {
struct hlist_nulls_head *hash;
struct hlist_head *expect_hash;
struct hlist_nulls_head unconfirmed;
struct hlist_nulls_head dying;
struct ip_conntrack_stat *stat;
#ifdef CONFIG_NF_CONNTRACK_EVENTS
struct nf_conntrack_ecache *ecache;
#endif
int sysctl_events;
unsigned int sysctl_events_retry_timeout;
int sysctl_acct;
int sysctl_checksum;
unsigned int sysctl_log_invalid; /* Log invalid packets */
#ifdef CONFIG_SYSCTL
struct ctl_table_header *sysctl_header;
struct ctl_table_header *acct_sysctl_header;
struct ctl_table_header *event_sysctl_header;
#endif
int hash_vmalloc;
int expect_vmalloc;
......
......@@ -39,6 +39,7 @@
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_extend.h>
#include <net/netfilter/nf_conntrack_acct.h>
#include <net/netfilter/nf_conntrack_ecache.h>
#include <net/netfilter/nf_nat.h>
#include <net/netfilter/nf_nat_core.h>
......@@ -182,10 +183,6 @@ destroy_conntrack(struct nf_conntrack *nfct)
NF_CT_ASSERT(atomic_read(&nfct->use) == 0);
NF_CT_ASSERT(!timer_pending(&ct->timeout));
if (!test_bit(IPS_DYING_BIT, &ct->status))
nf_conntrack_event(IPCT_DESTROY, ct);
set_bit(IPS_DYING_BIT, &ct->status);
/* To make sure we don't get any weird locking issues here:
* destroy_conntrack() MUST NOT be called with a write lock
* to nf_conntrack_lock!!! -HW */
......@@ -219,27 +216,70 @@ destroy_conntrack(struct nf_conntrack *nfct)
nf_conntrack_free(ct);
}
static void death_by_timeout(unsigned long ul_conntrack)
void nf_ct_delete_from_lists(struct nf_conn *ct)
{
struct nf_conn *ct = (void *)ul_conntrack;
struct net *net = nf_ct_net(ct);
struct nf_conn_help *help = nfct_help(ct);
struct nf_conntrack_helper *helper;
if (help) {
rcu_read_lock();
helper = rcu_dereference(help->helper);
if (helper && helper->destroy)
helper->destroy(ct);
rcu_read_unlock();
}
nf_ct_helper_destroy(ct);
spin_lock_bh(&nf_conntrack_lock);
/* Inside lock so preempt is disabled on module removal path.
* Otherwise we can get spurious warnings. */
NF_CT_STAT_INC(net, delete_list);
clean_from_lists(ct);
spin_unlock_bh(&nf_conntrack_lock);
}
EXPORT_SYMBOL_GPL(nf_ct_delete_from_lists);
static void death_by_event(unsigned long ul_conntrack)
{
struct nf_conn *ct = (void *)ul_conntrack;
struct net *net = nf_ct_net(ct);
if (nf_conntrack_event(IPCT_DESTROY, ct) < 0) {
/* bad luck, let's retry again */
ct->timeout.expires = jiffies +
(random32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ct->timeout);
return;
}
/* we've got the event delivered, now it's dying */
set_bit(IPS_DYING_BIT, &ct->status);
spin_lock(&nf_conntrack_lock);
hlist_nulls_del(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode);
spin_unlock(&nf_conntrack_lock);
nf_ct_put(ct);
}
void nf_ct_insert_dying_list(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
/* add this conntrack to the dying list */
spin_lock_bh(&nf_conntrack_lock);
hlist_nulls_add_head(&ct->tuplehash[IP_CT_DIR_ORIGINAL].hnnode,
&net->ct.dying);
spin_unlock_bh(&nf_conntrack_lock);
/* set a new timer to retry event delivery */
setup_timer(&ct->timeout, death_by_event, (unsigned long)ct);
ct->timeout.expires = jiffies +
(random32() % net->ct.sysctl_events_retry_timeout);
add_timer(&ct->timeout);
}
EXPORT_SYMBOL_GPL(nf_ct_insert_dying_list);
static void death_by_timeout(unsigned long ul_conntrack)
{
struct nf_conn *ct = (void *)ul_conntrack;
if (!test_bit(IPS_DYING_BIT, &ct->status) &&
unlikely(nf_conntrack_event(IPCT_DESTROY, ct) < 0)) {
/* destroy event was not delivered */
nf_ct_delete_from_lists(ct);
nf_ct_insert_dying_list(ct);
return;
}
set_bit(IPS_DYING_BIT, &ct->status);
nf_ct_delete_from_lists(ct);
nf_ct_put(ct);
}
......@@ -577,6 +617,7 @@ init_conntrack(struct net *net,
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
spin_lock_bh(&nf_conntrack_lock);
exp = nf_ct_find_expectation(net, tuple);
......@@ -807,8 +848,6 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
NF_CT_ASSERT(ct->timeout.data == (unsigned long)ct);
NF_CT_ASSERT(skb);
spin_lock_bh(&nf_conntrack_lock);
/* Only update if this is not a fixed timeout */
if (test_bit(IPS_FIXED_TIMEOUT_BIT, &ct->status))
goto acct;
......@@ -822,11 +861,8 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
/* Only update the timeout if the new timeout is at least
HZ jiffies from the old timeout. Need del_timer for race
avoidance (may already be dying). */
if (newtime - ct->timeout.expires >= HZ
&& del_timer(&ct->timeout)) {
ct->timeout.expires = newtime;
add_timer(&ct->timeout);
}
if (newtime - ct->timeout.expires >= HZ)
mod_timer_pending(&ct->timeout, newtime);
}
acct:
......@@ -835,13 +871,13 @@ void __nf_ct_refresh_acct(struct nf_conn *ct,
acct = nf_conn_acct_find(ct);
if (acct) {
spin_lock_bh(&ct->lock);
acct[CTINFO2DIR(ctinfo)].packets++;
acct[CTINFO2DIR(ctinfo)].bytes +=
skb->len - skb_network_offset(skb);
spin_unlock_bh(&ct->lock);
}
}
spin_unlock_bh(&nf_conntrack_lock);
}
EXPORT_SYMBOL_GPL(__nf_ct_refresh_acct);
......@@ -853,14 +889,14 @@ bool __nf_ct_kill_acct(struct nf_conn *ct,
if (do_acct) {
struct nf_conn_counter *acct;
spin_lock_bh(&nf_conntrack_lock);
acct = nf_conn_acct_find(ct);
if (acct) {
spin_lock_bh(&ct->lock);
acct[CTINFO2DIR(ctinfo)].packets++;
acct[CTINFO2DIR(ctinfo)].bytes +=
skb->len - skb_network_offset(skb);
spin_unlock_bh(&ct->lock);
}
spin_unlock_bh(&nf_conntrack_lock);
}
if (del_timer(&ct->timeout)) {
......@@ -994,11 +1030,13 @@ static int kill_report(struct nf_conn *i, void *data)
{
struct __nf_ct_flush_report *fr = (struct __nf_ct_flush_report *)data;
/* get_next_corpse sets the dying bit for us */
nf_conntrack_event_report(IPCT_DESTROY,
i,
fr->pid,
fr->report);
/* If we fail to deliver the event, death_by_timeout() will retry */
if (nf_conntrack_event_report(IPCT_DESTROY, i,
fr->pid, fr->report) < 0)
return 1;
/* Avoid the delivery of the destroy event in death_by_timeout(). */
set_bit(IPS_DYING_BIT, &i->status);
return 1;
}
......@@ -1027,6 +1065,21 @@ void nf_conntrack_flush_report(struct net *net, u32 pid, int report)
}
EXPORT_SYMBOL_GPL(nf_conntrack_flush_report);
static void nf_ct_release_dying_list(void)
{
struct nf_conntrack_tuple_hash *h;
struct nf_conn *ct;
struct hlist_nulls_node *n;
spin_lock_bh(&nf_conntrack_lock);
hlist_nulls_for_each_entry(h, n, &init_net.ct.dying, hnnode) {
ct = nf_ct_tuplehash_to_ctrack(h);
/* never fails to remove them, no listeners at this point */
nf_ct_kill(ct);
}
spin_unlock_bh(&nf_conntrack_lock);
}
static void nf_conntrack_cleanup_init_net(void)
{
nf_conntrack_helper_fini();
......@@ -1036,10 +1089,9 @@ static void nf_conntrack_cleanup_init_net(void)
static void nf_conntrack_cleanup_net(struct net *net)
{
nf_ct_event_cache_flush(net);
nf_conntrack_ecache_fini(net);
i_see_dead_people:
nf_ct_iterate_cleanup(net, kill_all, NULL);
nf_ct_release_dying_list();
if (atomic_read(&net->ct.count) != 0) {
schedule();
goto i_see_dead_people;
......@@ -1050,6 +1102,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
nf_conntrack_ecache_fini(net);
nf_conntrack_acct_fini(net);
nf_conntrack_expect_fini(net);
free_percpu(net->ct.stat);
......@@ -1220,14 +1273,12 @@ static int nf_conntrack_init_net(struct net *net)
atomic_set(&net->ct.count, 0);
INIT_HLIST_NULLS_HEAD(&net->ct.unconfirmed, 0);
INIT_HLIST_NULLS_HEAD(&net->ct.dying, 0);
net->ct.stat = alloc_percpu(struct ip_conntrack_stat);
if (!net->ct.stat) {
ret = -ENOMEM;
goto err_stat;
}
ret = nf_conntrack_ecache_init(net);
if (ret < 0)
goto err_ecache;
net->ct.hash = nf_ct_alloc_hashtable(&nf_conntrack_htable_size,
&net->ct.hash_vmalloc, 1);
if (!net->ct.hash) {
......@@ -1241,6 +1292,9 @@ static int nf_conntrack_init_net(struct net *net)
ret = nf_conntrack_acct_init(net);
if (ret < 0)
goto err_acct;
ret = nf_conntrack_ecache_init(net);
if (ret < 0)
goto err_ecache;
/* Set up fake conntrack:
- to never be deleted, not in any hashes */
......@@ -1253,14 +1307,14 @@ static int nf_conntrack_init_net(struct net *net)
return 0;
err_ecache:
nf_conntrack_acct_fini(net);
err_acct:
nf_conntrack_expect_fini(net);
err_expect:
nf_ct_free_hashtable(net->ct.hash, net->ct.hash_vmalloc,
nf_conntrack_htable_size);
err_hash:
nf_conntrack_ecache_fini(net);
err_ecache:
free_percpu(net->ct.stat);
err_stat:
return ret;
......
......@@ -21,6 +21,7 @@
#include <net/netfilter/nf_conntrack.h>
#include <net/netfilter/nf_conntrack_core.h>
#include <net/netfilter/nf_conntrack_extend.h>
static DEFINE_MUTEX(nf_ct_ecache_mutex);
......@@ -32,94 +33,51 @@ EXPORT_SYMBOL_GPL(nf_expect_event_cb);
/* deliver cached events and clear cache entry - must be called with locally
* disabled softirqs */
static inline void
__nf_ct_deliver_cached_events(struct nf_conntrack_ecache *ecache)
void nf_ct_deliver_cached_events(struct nf_conn *ct)
{
unsigned long events;
struct nf_ct_event_notifier *notify;
struct nf_conntrack_ecache *e;
rcu_read_lock();
notify = rcu_dereference(nf_conntrack_event_cb);
if (notify == NULL)
goto out_unlock;
if (nf_ct_is_confirmed(ecache->ct) && !nf_ct_is_dying(ecache->ct)
&& ecache->events) {
e = nf_ct_ecache_find(ct);
if (e == NULL)
goto out_unlock;
events = xchg(&e->cache, 0);
if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct) && events) {
struct nf_ct_event item = {
.ct = ecache->ct,
.ct = ct,
.pid = 0,
.report = 0
};
int ret;
/* We make a copy of the missed event cache without taking
* the lock, thus we may send missed events twice. However,
* this does not harm and it happens very rarely. */
unsigned long missed = e->missed;
notify->fcn(ecache->events, &item);
ret = notify->fcn(events | missed, &item);
if (unlikely(ret < 0 || missed)) {
spin_lock_bh(&ct->lock);
if (ret < 0)
e->missed |= events;
else
e->missed &= ~missed;
spin_unlock_bh(&ct->lock);
}
}
ecache->events = 0;
nf_ct_put(ecache->ct);
ecache->ct = NULL;
out_unlock:
rcu_read_unlock();
}
/* Deliver all cached events for a particular conntrack. This is called
* by code prior to async packet handling for freeing the skb */
void nf_ct_deliver_cached_events(const struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache;
local_bh_disable();
ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
if (ecache->ct == ct)
__nf_ct_deliver_cached_events(ecache);
local_bh_enable();
}
EXPORT_SYMBOL_GPL(nf_ct_deliver_cached_events);
/* Deliver cached events for old pending events, if current conntrack != old */
void __nf_ct_event_cache_init(struct nf_conn *ct)
{
struct net *net = nf_ct_net(ct);
struct nf_conntrack_ecache *ecache;
/* take care of delivering potentially old events */
ecache = per_cpu_ptr(net->ct.ecache, raw_smp_processor_id());
BUG_ON(ecache->ct == ct);
if (ecache->ct)
__nf_ct_deliver_cached_events(ecache);
/* initialize for this conntrack/packet */
ecache->ct = ct;
nf_conntrack_get(&ct->ct_general);
}
EXPORT_SYMBOL_GPL(__nf_ct_event_cache_init);
/* flush the event cache - touches other CPU's data and must not be called
* while packets are still passing through the code */
void nf_ct_event_cache_flush(struct net *net)
{
struct nf_conntrack_ecache *ecache;
int cpu;
for_each_possible_cpu(cpu) {
ecache = per_cpu_ptr(net->ct.ecache, cpu);
if (ecache->ct)
nf_ct_put(ecache->ct);
}
}
int nf_conntrack_ecache_init(struct net *net)
{
net->ct.ecache = alloc_percpu(struct nf_conntrack_ecache);
if (!net->ct.ecache)
return -ENOMEM;
return 0;
}
void nf_conntrack_ecache_fini(struct net *net)
{
free_percpu(net->ct.ecache);
}
int nf_conntrack_register_notifier(struct nf_ct_event_notifier *new)
{
int ret = 0;
......@@ -185,3 +143,118 @@ void nf_ct_expect_unregister_notifier(struct nf_exp_event_notifier *new)
mutex_unlock(&nf_ct_ecache_mutex);
}
EXPORT_SYMBOL_GPL(nf_ct_expect_unregister_notifier);
#define NF_CT_EVENTS_DEFAULT 1
static int nf_ct_events __read_mostly = NF_CT_EVENTS_DEFAULT;
static int nf_ct_events_retry_timeout __read_mostly = 15*HZ;
#ifdef CONFIG_SYSCTL
static struct ctl_table event_sysctl_table[] = {
{
.ctl_name = CTL_UNNUMBERED,
.procname = "nf_conntrack_events",
.data = &init_net.ct.sysctl_events,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec,
},
{
.ctl_name = CTL_UNNUMBERED,
.procname = "nf_conntrack_events_retry_timeout",
.data = &init_net.ct.sysctl_events_retry_timeout,
.maxlen = sizeof(unsigned int),
.mode = 0644,
.proc_handler = proc_dointvec_jiffies,
},
{}
};
#endif /* CONFIG_SYSCTL */
static struct nf_ct_ext_type event_extend __read_mostly = {
.len = sizeof(struct nf_conntrack_ecache),
.align = __alignof__(struct nf_conntrack_ecache),
.id = NF_CT_EXT_ECACHE,
};
#ifdef CONFIG_SYSCTL
static int nf_conntrack_event_init_sysctl(struct net *net)
{
struct ctl_table *table;
table = kmemdup(event_sysctl_table, sizeof(event_sysctl_table),
GFP_KERNEL);
if (!table)
goto out;
table[0].data = &net->ct.sysctl_events;
table[1].data = &net->ct.sysctl_events_retry_timeout;
net->ct.event_sysctl_header =
register_net_sysctl_table(net,
nf_net_netfilter_sysctl_path, table);
if (!net->ct.event_sysctl_header) {
printk(KERN_ERR "nf_ct_event: can't register to sysctl.\n");
goto out_register;
}
return 0;
out_register:
kfree(table);
out:
return -ENOMEM;
}
static void nf_conntrack_event_fini_sysctl(struct net *net)
{
struct ctl_table *table;
table = net->ct.event_sysctl_header->ctl_table_arg;
unregister_net_sysctl_table(net->ct.event_sysctl_header);
kfree(table);
}
#else
static int nf_conntrack_event_init_sysctl(struct net *net)
{
return 0;
}
static void nf_conntrack_event_fini_sysctl(struct net *net)
{
}
#endif /* CONFIG_SYSCTL */
int nf_conntrack_ecache_init(struct net *net)
{
int ret;
net->ct.sysctl_events = nf_ct_events;
net->ct.sysctl_events_retry_timeout = nf_ct_events_retry_timeout;
if (net_eq(net, &init_net)) {
ret = nf_ct_extend_register(&event_extend);
if (ret < 0) {
printk(KERN_ERR "nf_ct_event: Unable to register "
"event extension.\n");
goto out_extend_register;
}
}
ret = nf_conntrack_event_init_sysctl(net);
if (ret < 0)
goto out_sysctl;
return 0;
out_sysctl:
if (net_eq(net, &init_net))
nf_ct_extend_unregister(&event_extend);
out_extend_register:
return ret;
}
void nf_conntrack_ecache_fini(struct net *net)
{
nf_conntrack_event_fini_sysctl(net);
if (net_eq(net, &init_net))
nf_ct_extend_unregister(&event_extend);
}
......@@ -136,6 +136,20 @@ static inline int unhelp(struct nf_conntrack_tuple_hash *i,
return 0;
}
void nf_ct_helper_destroy(struct nf_conn *ct)
{
struct nf_conn_help *help = nfct_help(ct);
struct nf_conntrack_helper *helper;
if (help) {
rcu_read_lock();
helper = rcu_dereference(help->helper);
if (helper && helper->destroy)
helper->destroy(ct);
rcu_read_unlock();
}
}
int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
{
unsigned int h = helper_hash(&me->tuple);
......
......@@ -463,15 +463,16 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
struct sk_buff *skb;
unsigned int type;
unsigned int flags = 0, group;
int err;
/* ignore our fake conntrack entry */
if (ct == &nf_conntrack_untracked)
return 0;
if (events & IPCT_DESTROY) {
if (events & (1 << IPCT_DESTROY)) {
type = IPCTNL_MSG_CT_DELETE;
group = NFNLGRP_CONNTRACK_DESTROY;
} else if (events & (IPCT_NEW | IPCT_RELATED)) {
} else if (events & ((1 << IPCT_NEW) | (1 << IPCT_RELATED))) {
type = IPCTNL_MSG_CT_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
group = NFNLGRP_CONNTRACK_NEW;
......@@ -519,7 +520,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
if (ctnetlink_dump_status(skb, ct) < 0)
goto nla_put_failure;
if (events & IPCT_DESTROY) {
if (events & (1 << IPCT_DESTROY)) {
if (ctnetlink_dump_counters(skb, ct, IP_CT_DIR_ORIGINAL) < 0 ||
ctnetlink_dump_counters(skb, ct, IP_CT_DIR_REPLY) < 0)
goto nla_put_failure;
......@@ -527,38 +528,41 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item)
if (ctnetlink_dump_timeout(skb, ct) < 0)
goto nla_put_failure;
if (events & IPCT_PROTOINFO
if (events & (1 << IPCT_PROTOINFO)
&& ctnetlink_dump_protoinfo(skb, ct) < 0)
goto nla_put_failure;
if ((events & IPCT_HELPER || nfct_help(ct))
if ((events & (1 << IPCT_HELPER) || nfct_help(ct))
&& ctnetlink_dump_helpinfo(skb, ct) < 0)
goto nla_put_failure;
#ifdef CONFIG_NF_CONNTRACK_SECMARK
if ((events & IPCT_SECMARK || ct->secmark)
if ((events & (1 << IPCT_SECMARK) || ct->secmark)
&& ctnetlink_dump_secmark(skb, ct) < 0)
goto nla_put_failure;
#endif
if (events & IPCT_RELATED &&
if (events & (1 << IPCT_RELATED) &&
ctnetlink_dump_master(skb, ct) < 0)
goto nla_put_failure;
if (events & IPCT_NATSEQADJ &&
if (events & (1 << IPCT_NATSEQADJ) &&
ctnetlink_dump_nat_seq_adj(skb, ct) < 0)
goto nla_put_failure;
}
#ifdef CONFIG_NF_CONNTRACK_MARK
if ((events & IPCT_MARK || ct->mark)
if ((events & (1 << IPCT_MARK) || ct->mark)
&& ctnetlink_dump_mark(skb, ct) < 0)
goto nla_put_failure;
#endif
rcu_read_unlock();
nlmsg_end(skb, nlh);
nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
err = nfnetlink_send(skb, item->pid, group, item->report, GFP_ATOMIC);
if (err == -ENOBUFS || err == -EAGAIN)
return -ENOBUFS;
return 0;
nla_put_failure:
......@@ -798,10 +802,15 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb,
}
}
nf_conntrack_event_report(IPCT_DESTROY,
ct,
if (nf_conntrack_event_report(IPCT_DESTROY, ct,
NETLINK_CB(skb).pid,
nlmsg_report(nlh));
nlmsg_report(nlh)) < 0) {
nf_ct_delete_from_lists(ct);
/* we failed to report the event, try later */
nf_ct_insert_dying_list(ct);
nf_ct_put(ct);
return 0;
}
/* death_by_timeout would report the event again */
set_bit(IPS_DYING_BIT, &ct->status);
......@@ -1253,6 +1262,7 @@ ctnetlink_create_conntrack(struct nlattr *cda[],
}
nf_ct_acct_ext_add(ct, GFP_ATOMIC);
nf_ct_ecache_ext_add(ct, GFP_ATOMIC);
#if defined(CONFIG_NF_CONNTRACK_MARK)
if (cda[CTA_MARK])
......@@ -1340,11 +1350,11 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
else
events = IPCT_NEW;
nf_conntrack_event_report(IPCT_STATUS |
IPCT_HELPER |
IPCT_PROTOINFO |
IPCT_NATSEQADJ |
IPCT_MARK | events,
nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
(1 << IPCT_HELPER) |
(1 << IPCT_PROTOINFO) |
(1 << IPCT_NATSEQADJ) |
(1 << IPCT_MARK) | events,
ct, NETLINK_CB(skb).pid,
nlmsg_report(nlh));
nf_ct_put(ct);
......@@ -1365,11 +1375,11 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb,
if (err == 0) {
nf_conntrack_get(&ct->ct_general);
spin_unlock_bh(&nf_conntrack_lock);
nf_conntrack_event_report(IPCT_STATUS |
IPCT_HELPER |
IPCT_PROTOINFO |
IPCT_NATSEQADJ |
IPCT_MARK,
nf_conntrack_eventmask_report((1 << IPCT_STATUS) |
(1 << IPCT_HELPER) |
(1 << IPCT_PROTOINFO) |
(1 << IPCT_NATSEQADJ) |
(1 << IPCT_MARK),
ct, NETLINK_CB(skb).pid,
nlmsg_report(nlh));
nf_ct_put(ct);
......@@ -1515,7 +1525,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item)
unsigned int type;
int flags = 0;
if (events & IPEXP_NEW) {
if (events & (1 << IPEXP_NEW)) {
type = IPCTNL_MSG_EXP_NEW;
flags = NLM_F_CREATE|NLM_F_EXCL;
} else
......
......@@ -248,14 +248,14 @@ static int nf_log_proc_dostring(ctl_table *table, int write, struct file *filp,
rcu_assign_pointer(nf_loggers[tindex], logger);
mutex_unlock(&nf_log_mutex);
} else {
rcu_read_lock();
logger = rcu_dereference(nf_loggers[tindex]);
mutex_lock(&nf_log_mutex);
logger = nf_loggers[tindex];
if (!logger)
table->data = "NONE";
else
table->data = logger->name;
r = proc_dostring(table, write, filp, buffer, lenp, ppos);
rcu_read_unlock();
mutex_unlock(&nf_log_mutex);
}
return r;
......
......@@ -364,14 +364,14 @@ int xt_check_match(struct xt_mtchk_param *par,
* ebt_among is exempt from centralized matchsize checking
* because it uses a dynamic-size data set.
*/
printk("%s_tables: %s match: invalid size %Zu != %u\n",
pr_err("%s_tables: %s match: invalid size %Zu != %u\n",
xt_prefix[par->family], par->match->name,
XT_ALIGN(par->match->matchsize), size);
return -EINVAL;
}
if (par->match->table != NULL &&
strcmp(par->match->table, par->table) != 0) {
printk("%s_tables: %s match: only valid in %s table, not %s\n",
pr_err("%s_tables: %s match: only valid in %s table, not %s\n",
xt_prefix[par->family], par->match->name,
par->match->table, par->table);
return -EINVAL;
......@@ -379,7 +379,7 @@ int xt_check_match(struct xt_mtchk_param *par,
if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) {
char used[64], allow[64];
printk("%s_tables: %s match: used from hooks %s, but only "
pr_err("%s_tables: %s match: used from hooks %s, but only "
"valid from %s\n",
xt_prefix[par->family], par->match->name,
textify_hooks(used, sizeof(used), par->hook_mask),
......@@ -387,7 +387,7 @@ int xt_check_match(struct xt_mtchk_param *par,
return -EINVAL;
}
if (par->match->proto && (par->match->proto != proto || inv_proto)) {
printk("%s_tables: %s match: only valid for protocol %u\n",
pr_err("%s_tables: %s match: only valid for protocol %u\n",
xt_prefix[par->family], par->match->name,
par->match->proto);
return -EINVAL;
......@@ -514,14 +514,14 @@ int xt_check_target(struct xt_tgchk_param *par,
unsigned int size, u_int8_t proto, bool inv_proto)
{
if (XT_ALIGN(par->target->targetsize) != size) {
printk("%s_tables: %s target: invalid size %Zu != %u\n",
pr_err("%s_tables: %s target: invalid size %Zu != %u\n",
xt_prefix[par->family], par->target->name,
XT_ALIGN(par->target->targetsize), size);
return -EINVAL;
}
if (par->target->table != NULL &&
strcmp(par->target->table, par->table) != 0) {
printk("%s_tables: %s target: only valid in %s table, not %s\n",
pr_err("%s_tables: %s target: only valid in %s table, not %s\n",
xt_prefix[par->family], par->target->name,
par->target->table, par->table);
return -EINVAL;
......@@ -529,7 +529,7 @@ int xt_check_target(struct xt_tgchk_param *par,
if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) {
char used[64], allow[64];
printk("%s_tables: %s target: used from hooks %s, but only "
pr_err("%s_tables: %s target: used from hooks %s, but only "
"usable from %s\n",
xt_prefix[par->family], par->target->name,
textify_hooks(used, sizeof(used), par->hook_mask),
......@@ -537,7 +537,7 @@ int xt_check_target(struct xt_tgchk_param *par,
return -EINVAL;
}
if (par->target->proto && (par->target->proto != proto || inv_proto)) {
printk("%s_tables: %s target: only valid for protocol %u\n",
pr_err("%s_tables: %s target: only valid for protocol %u\n",
xt_prefix[par->family], par->target->name,
par->target->proto);
return -EINVAL;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册