提交 31111c26 编写于 作者: D David S. Miller

Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/kaber/nf-next-2.6

Conflicts:
	Documentation/feature-removal-schedule.txt
......@@ -637,3 +637,12 @@ Why: The original implementation of memsw feature enabled by
Who: Michal Hocko <mhocko@suse.cz>
----------------------------
What: ipt_addrtype match include file
When: 2012
Why: superseded by xt_addrtype
Who: Florian Westphal <fw@strlen.de>
Files: include/linux/netfilter_ipv4/ipt_addrtype.h
>>>>>>> 2f5dc63123905a89d4260ab8ee08d19ec104db04
----------------------------
......@@ -29,6 +29,7 @@ header-y += xt_TCPMSS.h
header-y += xt_TCPOPTSTRIP.h
header-y += xt_TEE.h
header-y += xt_TPROXY.h
header-y += xt_addrtype.h
header-y += xt_cluster.h
header-y += xt_comment.h
header-y += xt_connbytes.h
......
#ifndef _XT_ADDRTYPE_H
#define _XT_ADDRTYPE_H
#include <linux/types.h>
enum {
XT_ADDRTYPE_INVERT_SOURCE = 0x0001,
XT_ADDRTYPE_INVERT_DEST = 0x0002,
XT_ADDRTYPE_LIMIT_IFACE_IN = 0x0004,
XT_ADDRTYPE_LIMIT_IFACE_OUT = 0x0008,
};
/* rtn_type enum values from rtnetlink.h, but shifted */
enum {
XT_ADDRTYPE_UNSPEC = 1 << 0,
XT_ADDRTYPE_UNICAST = 1 << 1, /* 1 << RTN_UNICAST */
XT_ADDRTYPE_LOCAL = 1 << 2, /* 1 << RTN_LOCAL, etc */
XT_ADDRTYPE_BROADCAST = 1 << 3,
XT_ADDRTYPE_ANYCAST = 1 << 4,
XT_ADDRTYPE_MULTICAST = 1 << 5,
XT_ADDRTYPE_BLACKHOLE = 1 << 6,
XT_ADDRTYPE_UNREACHABLE = 1 << 7,
XT_ADDRTYPE_PROHIBIT = 1 << 8,
XT_ADDRTYPE_THROW = 1 << 9,
XT_ADDRTYPE_NAT = 1 << 10,
XT_ADDRTYPE_XRESOLVE = 1 << 11,
};
struct xt_addrtype_info_v1 {
__u16 source; /* source-type mask */
__u16 dest; /* dest-type mask */
__u32 flags;
};
/* revision 0 */
struct xt_addrtype_info {
__u16 source; /* source-type mask */
__u16 dest; /* dest-type mask */
__u32 invert_source;
__u32 invert_dest;
};
#endif
......@@ -374,24 +374,9 @@ struct ip_vs_stats {
struct ip_vs_estimator est; /* estimator */
struct ip_vs_cpu_stats *cpustats; /* per cpu counters */
spinlock_t lock; /* spin lock */
struct ip_vs_stats_user ustats0; /* reset values */
};
/*
* Helper Macros for per cpu
* ipvs->tot_stats->ustats.count
*/
#define IPVS_STAT_INC(ipvs, count) \
__this_cpu_inc((ipvs)->ustats->count)
#define IPVS_STAT_ADD(ipvs, count, value) \
do {\
write_seqcount_begin(per_cpu_ptr((ipvs)->ustats_seq, \
raw_smp_processor_id())); \
__this_cpu_add((ipvs)->ustats->count, value); \
write_seqcount_end(per_cpu_ptr((ipvs)->ustats_seq, \
raw_smp_processor_id())); \
} while (0)
struct dst_entry;
struct iphdr;
struct ip_vs_conn;
......@@ -803,6 +788,171 @@ struct ip_vs_app {
void (*timeout_change)(struct ip_vs_app *app, int flags);
};
/* IPVS in network namespace */
struct netns_ipvs {
int gen; /* Generation */
/*
* Hash table: for real service lookups
*/
#define IP_VS_RTAB_BITS 4
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
struct list_head rs_table[IP_VS_RTAB_SIZE];
/* ip_vs_app */
struct list_head app_list;
struct mutex app_mutex;
struct lock_class_key app_key; /* mutex debuging */
/* ip_vs_proto */
#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
/* ip_vs_proto_tcp */
#ifdef CONFIG_IP_VS_PROTO_TCP
#define TCP_APP_TAB_BITS 4
#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
struct list_head tcp_apps[TCP_APP_TAB_SIZE];
spinlock_t tcp_app_lock;
#endif
/* ip_vs_proto_udp */
#ifdef CONFIG_IP_VS_PROTO_UDP
#define UDP_APP_TAB_BITS 4
#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
struct list_head udp_apps[UDP_APP_TAB_SIZE];
spinlock_t udp_app_lock;
#endif
/* ip_vs_proto_sctp */
#ifdef CONFIG_IP_VS_PROTO_SCTP
#define SCTP_APP_TAB_BITS 4
#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
/* Hash table for SCTP application incarnations */
struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
spinlock_t sctp_app_lock;
#endif
/* ip_vs_conn */
atomic_t conn_count; /* connection counter */
/* ip_vs_ctl */
struct ip_vs_stats tot_stats; /* Statistics & est. */
int num_services; /* no of virtual services */
rwlock_t rs_lock; /* real services table */
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
struct lock_class_key ctl_key; /* ctl_mutex debuging */
/* Trash for destinations */
struct list_head dest_trash;
/* Service counters */
atomic_t ftpsvc_counter;
atomic_t nullsvc_counter;
#ifdef CONFIG_SYSCTL
/* 1/rate drop and drop-entry variables */
struct delayed_work defense_work; /* Work handler */
int drop_rate;
int drop_counter;
atomic_t dropentry;
/* locks in ctl.c */
spinlock_t dropentry_lock; /* drop entry handling */
spinlock_t droppacket_lock; /* drop packet handling */
spinlock_t securetcp_lock; /* state and timeout tables */
/* sys-ctl struct */
struct ctl_table_header *sysctl_hdr;
struct ctl_table *sysctl_tbl;
#endif
/* sysctl variables */
int sysctl_amemthresh;
int sysctl_am_droprate;
int sysctl_drop_entry;
int sysctl_drop_packet;
int sysctl_secure_tcp;
#ifdef CONFIG_IP_VS_NFCT
int sysctl_conntrack;
#endif
int sysctl_snat_reroute;
int sysctl_sync_ver;
int sysctl_cache_bypass;
int sysctl_expire_nodest_conn;
int sysctl_expire_quiescent_template;
int sysctl_sync_threshold[2];
int sysctl_nat_icmp_send;
/* ip_vs_lblc */
int sysctl_lblc_expiration;
struct ctl_table_header *lblc_ctl_header;
struct ctl_table *lblc_ctl_table;
/* ip_vs_lblcr */
int sysctl_lblcr_expiration;
struct ctl_table_header *lblcr_ctl_header;
struct ctl_table *lblcr_ctl_table;
/* ip_vs_est */
struct list_head est_list; /* estimator list */
spinlock_t est_lock;
struct timer_list est_timer; /* Estimation timer */
/* ip_vs_sync */
struct list_head sync_queue;
spinlock_t sync_lock;
struct ip_vs_sync_buff *sync_buff;
spinlock_t sync_buff_lock;
struct sockaddr_in sync_mcast_addr;
struct task_struct *master_thread;
struct task_struct *backup_thread;
int send_mesg_maxlen;
int recv_mesg_maxlen;
volatile int sync_state;
volatile int master_syncid;
volatile int backup_syncid;
/* multicast interface name */
char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
/* net name space ptr */
struct net *net; /* Needed by timer routines */
};
#define DEFAULT_SYNC_THRESHOLD 3
#define DEFAULT_SYNC_PERIOD 50
#define DEFAULT_SYNC_VER 1
#ifdef CONFIG_SYSCTL
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_sync_threshold[0];
}
static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_sync_threshold[1];
}
static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_sync_ver;
}
#else
static inline int sysctl_sync_threshold(struct netns_ipvs *ipvs)
{
return DEFAULT_SYNC_THRESHOLD;
}
static inline int sysctl_sync_period(struct netns_ipvs *ipvs)
{
return DEFAULT_SYNC_PERIOD;
}
static inline int sysctl_sync_ver(struct netns_ipvs *ipvs)
{
return DEFAULT_SYNC_VER;
}
#endif
/*
* IPVS core functions
......@@ -1071,9 +1221,11 @@ extern void ip_vs_sync_cleanup(void);
*/
extern int ip_vs_estimator_init(void);
extern void ip_vs_estimator_cleanup(void);
extern void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats);
extern void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats);
extern void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats);
extern void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats);
extern void ip_vs_zero_estimator(struct ip_vs_stats *stats);
extern void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
struct ip_vs_stats *stats);
/*
* Various IPVS packet transmitters (from ip_vs_xmit.c)
......@@ -1106,6 +1258,7 @@ extern int ip_vs_icmp_xmit_v6
int offset);
#endif
#ifdef CONFIG_SYSCTL
/*
* This is a simple mechanism to ignore packets when
* we are loaded. Just set ip_vs_drop_rate to 'n' and
......@@ -1121,6 +1274,9 @@ static inline int ip_vs_todrop(struct netns_ipvs *ipvs)
ipvs->drop_counter = ipvs->drop_rate;
return 1;
}
#else
static inline int ip_vs_todrop(struct netns_ipvs *ipvs) { return 0; }
#endif
/*
* ip_vs_fwd_tag returns the forwarding tag of the connection
......@@ -1190,7 +1346,7 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
{
#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
enum ip_conntrack_info ctinfo;
struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo);
struct nf_conn *ct = nf_ct_get(skb, &ctinfo);
if (!ct || !nf_ct_is_untracked(ct)) {
nf_reset(skb);
......@@ -1208,7 +1364,11 @@ static inline void ip_vs_notrack(struct sk_buff *skb)
*/
static inline int ip_vs_conntrack_enabled(struct netns_ipvs *ipvs)
{
#ifdef CONFIG_SYSCTL
return ipvs->sysctl_conntrack;
#else
return 0;
#endif
}
extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp,
......
......@@ -20,7 +20,6 @@
#include <net/netns/conntrack.h>
#endif
#include <net/netns/xfrm.h>
#include <net/netns/ip_vs.h>
struct proc_dir_entry;
struct net_device;
......@@ -28,6 +27,7 @@ struct sock;
struct ctl_table_header;
struct net_generic;
struct sock;
struct netns_ipvs;
#define NETDEV_HASHBITS 8
......
/*
* IP Virtual Server
* Data structure for network namspace
*
*/
#ifndef IP_VS_H_
#define IP_VS_H_
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/list_nulls.h>
#include <linux/ip_vs.h>
#include <asm/atomic.h>
#include <linux/in.h>
struct ip_vs_stats;
struct ip_vs_sync_buff;
struct ctl_table_header;
struct netns_ipvs {
int gen; /* Generation */
/*
* Hash table: for real service lookups
*/
#define IP_VS_RTAB_BITS 4
#define IP_VS_RTAB_SIZE (1 << IP_VS_RTAB_BITS)
#define IP_VS_RTAB_MASK (IP_VS_RTAB_SIZE - 1)
struct list_head rs_table[IP_VS_RTAB_SIZE];
/* ip_vs_app */
struct list_head app_list;
struct mutex app_mutex;
struct lock_class_key app_key; /* mutex debuging */
/* ip_vs_proto */
#define IP_VS_PROTO_TAB_SIZE 32 /* must be power of 2 */
struct ip_vs_proto_data *proto_data_table[IP_VS_PROTO_TAB_SIZE];
/* ip_vs_proto_tcp */
#ifdef CONFIG_IP_VS_PROTO_TCP
#define TCP_APP_TAB_BITS 4
#define TCP_APP_TAB_SIZE (1 << TCP_APP_TAB_BITS)
#define TCP_APP_TAB_MASK (TCP_APP_TAB_SIZE - 1)
struct list_head tcp_apps[TCP_APP_TAB_SIZE];
spinlock_t tcp_app_lock;
#endif
/* ip_vs_proto_udp */
#ifdef CONFIG_IP_VS_PROTO_UDP
#define UDP_APP_TAB_BITS 4
#define UDP_APP_TAB_SIZE (1 << UDP_APP_TAB_BITS)
#define UDP_APP_TAB_MASK (UDP_APP_TAB_SIZE - 1)
struct list_head udp_apps[UDP_APP_TAB_SIZE];
spinlock_t udp_app_lock;
#endif
/* ip_vs_proto_sctp */
#ifdef CONFIG_IP_VS_PROTO_SCTP
#define SCTP_APP_TAB_BITS 4
#define SCTP_APP_TAB_SIZE (1 << SCTP_APP_TAB_BITS)
#define SCTP_APP_TAB_MASK (SCTP_APP_TAB_SIZE - 1)
/* Hash table for SCTP application incarnations */
struct list_head sctp_apps[SCTP_APP_TAB_SIZE];
spinlock_t sctp_app_lock;
#endif
/* ip_vs_conn */
atomic_t conn_count; /* connection counter */
/* ip_vs_ctl */
struct ip_vs_stats *tot_stats; /* Statistics & est. */
struct ip_vs_cpu_stats __percpu *cpustats; /* Stats per cpu */
seqcount_t *ustats_seq; /* u64 read retry */
int num_services; /* no of virtual services */
/* 1/rate drop and drop-entry variables */
struct delayed_work defense_work; /* Work handler */
int drop_rate;
int drop_counter;
atomic_t dropentry;
/* locks in ctl.c */
spinlock_t dropentry_lock; /* drop entry handling */
spinlock_t droppacket_lock; /* drop packet handling */
spinlock_t securetcp_lock; /* state and timeout tables */
rwlock_t rs_lock; /* real services table */
/* semaphore for IPVS sockopts. And, [gs]etsockopt may sleep. */
struct lock_class_key ctl_key; /* ctl_mutex debuging */
/* Trash for destinations */
struct list_head dest_trash;
/* Service counters */
atomic_t ftpsvc_counter;
atomic_t nullsvc_counter;
/* sys-ctl struct */
struct ctl_table_header *sysctl_hdr;
struct ctl_table *sysctl_tbl;
/* sysctl variables */
int sysctl_amemthresh;
int sysctl_am_droprate;
int sysctl_drop_entry;
int sysctl_drop_packet;
int sysctl_secure_tcp;
#ifdef CONFIG_IP_VS_NFCT
int sysctl_conntrack;
#endif
int sysctl_snat_reroute;
int sysctl_sync_ver;
int sysctl_cache_bypass;
int sysctl_expire_nodest_conn;
int sysctl_expire_quiescent_template;
int sysctl_sync_threshold[2];
int sysctl_nat_icmp_send;
/* ip_vs_lblc */
int sysctl_lblc_expiration;
struct ctl_table_header *lblc_ctl_header;
struct ctl_table *lblc_ctl_table;
/* ip_vs_lblcr */
int sysctl_lblcr_expiration;
struct ctl_table_header *lblcr_ctl_header;
struct ctl_table *lblcr_ctl_table;
/* ip_vs_est */
struct list_head est_list; /* estimator list */
spinlock_t est_lock;
struct timer_list est_timer; /* Estimation timer */
/* ip_vs_sync */
struct list_head sync_queue;
spinlock_t sync_lock;
struct ip_vs_sync_buff *sync_buff;
spinlock_t sync_buff_lock;
struct sockaddr_in sync_mcast_addr;
struct task_struct *master_thread;
struct task_struct *backup_thread;
int send_mesg_maxlen;
int recv_mesg_maxlen;
volatile int sync_state;
volatile int master_syncid;
volatile int backup_syncid;
/* multicast interface name */
char master_mcast_ifn[IP_VS_IFNAME_MAXLEN];
char backup_mcast_ifn[IP_VS_IFNAME_MAXLEN];
/* net name space ptr */
struct net *net; /* Needed by timer routines */
};
#endif /* IP_VS_H_ */
......@@ -64,16 +64,6 @@ config IP_NF_IPTABLES
if IP_NF_IPTABLES
# The matches.
config IP_NF_MATCH_ADDRTYPE
tristate '"addrtype" address type match support'
depends on NETFILTER_ADVANCED
help
This option allows you to match what routing thinks of an address,
eg. UNICAST, LOCAL, BROADCAST, ...
If you want to compile it as a module, say M here and read
<file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
config IP_NF_MATCH_AH
tristate '"ah" match support'
depends on NETFILTER_ADVANCED
......
......@@ -48,7 +48,6 @@ obj-$(CONFIG_IP_NF_RAW) += iptable_raw.o
obj-$(CONFIG_IP_NF_SECURITY) += iptable_security.o
# matches
obj-$(CONFIG_IP_NF_MATCH_ADDRTYPE) += ipt_addrtype.o
obj-$(CONFIG_IP_NF_MATCH_AH) += ipt_ah.o
obj-$(CONFIG_IP_NF_MATCH_ECN) += ipt_ecn.o
......
......@@ -1066,6 +1066,7 @@ static int do_replace(struct net *net, const void __user *user,
/* overflow check */
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
......@@ -1488,6 +1489,7 @@ static int compat_do_replace(struct net *net, void __user *user,
return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
......@@ -1740,6 +1742,7 @@ static int do_arpt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len
ret = -EFAULT;
break;
}
rev.name[sizeof(rev.name)-1] = 0;
try_then_request_module(xt_find_revision(NFPROTO_ARP, rev.name,
rev.revision, 1, &ret),
......
......@@ -1262,6 +1262,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
/* overflow check */
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
......@@ -1807,6 +1808,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
......@@ -2036,6 +2038,7 @@ do_ipt_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EFAULT;
break;
}
rev.name[sizeof(rev.name)-1] = 0;
if (cmd == IPT_SO_GET_REVISION_TARGET)
target = 1;
......
......@@ -1275,6 +1275,7 @@ do_replace(struct net *net, const void __user *user, unsigned int len)
/* overflow check */
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
......@@ -1822,6 +1823,7 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len)
return -ENOMEM;
if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters))
return -ENOMEM;
tmp.name[sizeof(tmp.name)-1] = 0;
newinfo = xt_alloc_table_info(tmp.size);
if (!newinfo)
......@@ -2051,6 +2053,7 @@ do_ip6t_get_ctl(struct sock *sk, int cmd, void __user *user, int *len)
ret = -EFAULT;
break;
}
rev.name[sizeof(rev.name)-1] = 0;
if (cmd == IP6T_SO_GET_REVISION_TARGET)
target = 1;
......
......@@ -649,6 +649,17 @@ config NETFILTER_XT_TARGET_TCPOPTSTRIP
comment "Xtables matches"
config NETFILTER_XT_MATCH_ADDRTYPE
tristate '"addrtype" address type match support'
depends on NETFILTER_ADVANCED
depends on (IPV6 || IPV6=n)
---help---
This option allows you to match what routing thinks of an address,
eg. UNICAST, LOCAL, BROADCAST, ...
If you want to compile it as a module, say M here and read
<file:Documentation/kbuild/modules.txt>. If unsure, say `N'.
config NETFILTER_XT_MATCH_CLUSTER
tristate '"cluster" match support'
depends on NF_CONNTRACK
......
......@@ -70,6 +70,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_TRACE) += xt_TRACE.o
obj-$(CONFIG_NETFILTER_XT_TARGET_IDLETIMER) += xt_IDLETIMER.o
# matches
obj-$(CONFIG_NETFILTER_XT_MATCH_ADDRTYPE) += xt_addrtype.o
obj-$(CONFIG_NETFILTER_XT_MATCH_CLUSTER) += xt_cluster.o
obj-$(CONFIG_NETFILTER_XT_MATCH_COMMENT) += xt_comment.o
obj-$(CONFIG_NETFILTER_XT_MATCH_CONNBYTES) += xt_connbytes.o
......
......@@ -612,7 +612,7 @@ ip_set_create(struct sock *ctnl, struct sk_buff *skb,
const struct nlmsghdr *nlh,
const struct nlattr * const attr[])
{
struct ip_set *set, *clash;
struct ip_set *set, *clash = NULL;
ip_set_id_t index = IPSET_INVALID_ID;
struct nlattr *tb[IPSET_ATTR_CREATE_MAX+1] = {};
const char *name, *typename;
......
......@@ -680,6 +680,16 @@ static inline void ip_vs_unbind_dest(struct ip_vs_conn *cp)
atomic_dec(&dest->refcnt);
}
static int expire_quiescent_template(struct netns_ipvs *ipvs,
struct ip_vs_dest *dest)
{
#ifdef CONFIG_SYSCTL
return ipvs->sysctl_expire_quiescent_template &&
(atomic_read(&dest->weight) == 0);
#else
return 0;
#endif
}
/*
* Checking if the destination of a connection template is available.
......@@ -696,8 +706,7 @@ int ip_vs_check_template(struct ip_vs_conn *ct)
*/
if ((dest == NULL) ||
!(dest->flags & IP_VS_DEST_F_AVAILABLE) ||
(ipvs->sysctl_expire_quiescent_template &&
(atomic_read(&dest->weight) == 0))) {
expire_quiescent_template(ipvs, dest)) {
IP_VS_DBG_BUF(9, "check_template: dest not available for "
"protocol %s s:%s:%d v:%s:%d "
"-> d:%s:%d\n",
......
......@@ -132,7 +132,7 @@ ip_vs_in_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->ustats.inbytes += skb->len;
u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(ipvs->cpustats);
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.inpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.inbytes += skb->len;
......@@ -162,7 +162,7 @@ ip_vs_out_stats(struct ip_vs_conn *cp, struct sk_buff *skb)
s->ustats.outbytes += skb->len;
u64_stats_update_end(&s->syncp);
s = this_cpu_ptr(ipvs->cpustats);
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.outpkts++;
u64_stats_update_begin(&s->syncp);
s->ustats.outbytes += skb->len;
......@@ -183,7 +183,7 @@ ip_vs_conn_stats(struct ip_vs_conn *cp, struct ip_vs_service *svc)
s = this_cpu_ptr(svc->stats.cpustats);
s->ustats.conns++;
s = this_cpu_ptr(ipvs->cpustats);
s = this_cpu_ptr(ipvs->tot_stats.cpustats);
s->ustats.conns++;
}
......@@ -499,11 +499,13 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb,
int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
struct ip_vs_proto_data *pd)
{
struct net *net;
struct netns_ipvs *ipvs;
__be16 _ports[2], *pptr;
struct ip_vs_iphdr iph;
#ifdef CONFIG_SYSCTL
struct net *net;
struct netns_ipvs *ipvs;
int unicast;
#endif
ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph);
......@@ -512,6 +514,8 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_service_put(svc);
return NF_DROP;
}
#ifdef CONFIG_SYSCTL
net = skb_net(skb);
#ifdef CONFIG_IP_VS_IPV6
......@@ -563,6 +567,7 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
ip_vs_conn_put(cp);
return ret;
}
#endif
/*
* When the virtual ftp service is presented, packets destined
......@@ -599,6 +604,33 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb,
return NF_DROP;
}
#ifdef CONFIG_SYSCTL
static int sysctl_snat_reroute(struct sk_buff *skb)
{
struct netns_ipvs *ipvs = net_ipvs(skb_net(skb));
return ipvs->sysctl_snat_reroute;
}
static int sysctl_nat_icmp_send(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
return ipvs->sysctl_nat_icmp_send;
}
static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs)
{
return ipvs->sysctl_expire_nodest_conn;
}
#else
static int sysctl_snat_reroute(struct sk_buff *skb) { return 0; }
static int sysctl_nat_icmp_send(struct net *net) { return 0; }
static int sysctl_expire_nodest_conn(struct netns_ipvs *ipvs) { return 0; }
#endif
__sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset)
{
return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0));
......@@ -631,6 +663,22 @@ static inline int ip_vs_gather_frags_v6(struct sk_buff *skb, u_int32_t user)
}
#endif
static int ip_vs_route_me_harder(int af, struct sk_buff *skb)
{
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (sysctl_snat_reroute(skb) && ip6_route_me_harder(skb) != 0)
return 1;
} else
#endif
if ((sysctl_snat_reroute(skb) ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
return 1;
return 0;
}
/*
* Packet has been made sufficiently writable in caller
* - inout: 1=in->out, 0=out->in
......@@ -737,7 +785,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
struct ip_vs_protocol *pp,
unsigned int offset, unsigned int ihl)
{
struct netns_ipvs *ipvs;
unsigned int verdict = NF_DROP;
if (IP_VS_FWD_METHOD(cp) != 0) {
......@@ -759,8 +806,6 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
if (!skb_make_writable(skb, offset))
goto out;
ipvs = net_ipvs(skb_net(skb));
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6)
ip_vs_nat_icmp_v6(skb, pp, cp, 1);
......@@ -768,16 +813,8 @@ static int handle_response_icmp(int af, struct sk_buff *skb,
#endif
ip_vs_nat_icmp(skb, pp, cp, 1);
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
goto out;
} else
#endif
if ((ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto out;
if (ip_vs_route_me_harder(af, skb))
goto out;
/* do the statistics and put it back */
ip_vs_out_stats(cp, skb);
......@@ -985,7 +1022,6 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
struct ip_vs_conn *cp, int ihl)
{
struct ip_vs_protocol *pp = pd->pp;
struct netns_ipvs *ipvs;
IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet");
......@@ -1021,18 +1057,8 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_proto_data *pd,
* if it came from this machine itself. So re-compute
* the routing information.
*/
ipvs = net_ipvs(skb_net(skb));
#ifdef CONFIG_IP_VS_IPV6
if (af == AF_INET6) {
if (ipvs->sysctl_snat_reroute && ip6_route_me_harder(skb) != 0)
goto drop;
} else
#endif
if ((ipvs->sysctl_snat_reroute ||
skb_rtable(skb)->rt_flags & RTCF_LOCAL) &&
ip_route_me_harder(skb, RTN_LOCAL) != 0)
goto drop;
if (ip_vs_route_me_harder(af, skb))
goto drop;
IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT");
......@@ -1066,7 +1092,6 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
struct ip_vs_protocol *pp;
struct ip_vs_proto_data *pd;
struct ip_vs_conn *cp;
struct netns_ipvs *ipvs;
EnterFunction(11);
......@@ -1141,11 +1166,10 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af)
* Check if the packet belongs to an existing entry
*/
cp = pp->conn_out_get(af, skb, &iph, iph.len, 0);
ipvs = net_ipvs(net);
if (likely(cp))
return handle_response(af, skb, pd, cp, iph.len);
if (ipvs->sysctl_nat_icmp_send &&
if (sysctl_nat_icmp_send(net) &&
(pp->protocol == IPPROTO_TCP ||
pp->protocol == IPPROTO_UDP ||
pp->protocol == IPPROTO_SCTP)) {
......@@ -1570,7 +1594,7 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) {
/* the destination server is not available */
if (ipvs->sysctl_expire_nodest_conn) {
if (sysctl_expire_nodest_conn(ipvs)) {
/* try to expire the connection immediately */
ip_vs_conn_expire_now(cp);
}
......@@ -1600,15 +1624,15 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
*/
if (cp->flags & IP_VS_CONN_F_ONE_PACKET)
pkts = ipvs->sysctl_sync_threshold[0];
pkts = sysctl_sync_threshold(ipvs);
else
pkts = atomic_add_return(1, &cp->in_pkts);
if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
cp->protocol == IPPROTO_SCTP) {
if ((cp->state == IP_VS_SCTP_S_ESTABLISHED &&
(pkts % ipvs->sysctl_sync_threshold[1]
== ipvs->sysctl_sync_threshold[0])) ||
(pkts % sysctl_sync_period(ipvs)
== sysctl_sync_threshold(ipvs))) ||
(cp->old_state != cp->state &&
((cp->state == IP_VS_SCTP_S_CLOSED) ||
(cp->state == IP_VS_SCTP_S_SHUT_ACK_CLI) ||
......@@ -1622,8 +1646,8 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af)
else if ((ipvs->sync_state & IP_VS_STATE_MASTER) &&
(((cp->protocol != IPPROTO_TCP ||
cp->state == IP_VS_TCP_S_ESTABLISHED) &&
(pkts % ipvs->sysctl_sync_threshold[1]
== ipvs->sysctl_sync_threshold[0])) ||
(pkts % sysctl_sync_period(ipvs)
== sysctl_sync_threshold(ipvs))) ||
((cp->protocol == IPPROTO_TCP) && (cp->old_state != cp->state) &&
((cp->state == IP_VS_TCP_S_FIN_WAIT) ||
(cp->state == IP_VS_TCP_S_CLOSE) ||
......
......@@ -86,6 +86,8 @@ static int __ip_vs_addr_is_local_v6(struct net *net,
return 0;
}
#endif
#ifdef CONFIG_SYSCTL
/*
* update_defense_level is called from keventd and from sysctl,
* so it needs to protect itself from softirqs
......@@ -227,6 +229,7 @@ static void defense_work_handler(struct work_struct *work)
ip_vs_random_dropentry(ipvs->net);
schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
}
#endif
int
ip_vs_use_count_inc(void)
......@@ -409,9 +412,11 @@ ip_vs_service_get(struct net *net, int af, __u32 fwmark, __u16 protocol,
/*
* Check the table hashed by fwmark first
*/
svc = __ip_vs_svc_fwm_find(net, af, fwmark);
if (fwmark && svc)
goto out;
if (fwmark) {
svc = __ip_vs_svc_fwm_find(net, af, fwmark);
if (svc)
goto out;
}
/*
* Check the table hashed by <protocol,addr,port>
......@@ -707,13 +712,39 @@ static void ip_vs_trash_cleanup(struct net *net)
}
}
static void
ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
{
#define IP_VS_SHOW_STATS_COUNTER(c) dst->c = src->ustats.c - src->ustats0.c
spin_lock_bh(&src->lock);
IP_VS_SHOW_STATS_COUNTER(conns);
IP_VS_SHOW_STATS_COUNTER(inpkts);
IP_VS_SHOW_STATS_COUNTER(outpkts);
IP_VS_SHOW_STATS_COUNTER(inbytes);
IP_VS_SHOW_STATS_COUNTER(outbytes);
ip_vs_read_estimator(dst, src);
spin_unlock_bh(&src->lock);
}
static void
ip_vs_zero_stats(struct ip_vs_stats *stats)
{
spin_lock_bh(&stats->lock);
memset(&stats->ustats, 0, sizeof(stats->ustats));
/* get current counters as zero point, rates are zeroed */
#define IP_VS_ZERO_STATS_COUNTER(c) stats->ustats0.c = stats->ustats.c
IP_VS_ZERO_STATS_COUNTER(conns);
IP_VS_ZERO_STATS_COUNTER(inpkts);
IP_VS_ZERO_STATS_COUNTER(outpkts);
IP_VS_ZERO_STATS_COUNTER(inbytes);
IP_VS_ZERO_STATS_COUNTER(outbytes);
ip_vs_zero_estimator(stats);
spin_unlock_bh(&stats->lock);
......@@ -772,7 +803,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest,
spin_unlock_bh(&dest->dst_lock);
if (add)
ip_vs_new_estimator(svc->net, &dest->stats);
ip_vs_start_estimator(svc->net, &dest->stats);
write_lock_bh(&__ip_vs_svc_lock);
......@@ -978,7 +1009,7 @@ static void __ip_vs_del_dest(struct net *net, struct ip_vs_dest *dest)
{
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_kill_estimator(net, &dest->stats);
ip_vs_stop_estimator(net, &dest->stats);
/*
* Remove it from the d-linked list with the real services.
......@@ -1171,7 +1202,7 @@ ip_vs_add_service(struct net *net, struct ip_vs_service_user_kern *u,
else if (svc->port == 0)
atomic_inc(&ipvs->nullsvc_counter);
ip_vs_new_estimator(net, &svc->stats);
ip_vs_start_estimator(net, &svc->stats);
/* Count only IPv4 services for old get/setsockopt interface */
if (svc->af == AF_INET)
......@@ -1323,7 +1354,7 @@ static void __ip_vs_del_service(struct ip_vs_service *svc)
if (svc->af == AF_INET)
ipvs->num_services--;
ip_vs_kill_estimator(svc->net, &svc->stats);
ip_vs_stop_estimator(svc->net, &svc->stats);
/* Unbind scheduler */
old_sched = svc->scheduler;
......@@ -1477,11 +1508,11 @@ static int ip_vs_zero_all(struct net *net)
}
}
ip_vs_zero_stats(net_ipvs(net)->tot_stats);
ip_vs_zero_stats(&net_ipvs(net)->tot_stats);
return 0;
}
#ifdef CONFIG_SYSCTL
static int
proc_do_defense_mode(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
......@@ -1503,7 +1534,6 @@ proc_do_defense_mode(ctl_table *table, int write,
return rc;
}
static int
proc_do_sync_threshold(ctl_table *table, int write,
void __user *buffer, size_t *lenp, loff_t *ppos)
......@@ -1737,6 +1767,7 @@ const struct ctl_path net_vs_ctl_path[] = {
{ }
};
EXPORT_SYMBOL_GPL(net_vs_ctl_path);
#endif
#ifdef CONFIG_PROC_FS
......@@ -1959,7 +1990,7 @@ static const struct file_operations ip_vs_info_fops = {
static int ip_vs_stats_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
struct ip_vs_stats_user show;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
......@@ -1967,22 +1998,18 @@ static int ip_vs_stats_show(struct seq_file *seq, void *v)
seq_printf(seq,
" Conns Packets Packets Bytes Bytes\n");
spin_lock_bh(&tot_stats->lock);
seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", tot_stats->ustats.conns,
tot_stats->ustats.inpkts, tot_stats->ustats.outpkts,
(unsigned long long) tot_stats->ustats.inbytes,
(unsigned long long) tot_stats->ustats.outbytes);
ip_vs_copy_stats(&show, &net_ipvs(net)->tot_stats);
seq_printf(seq, "%8X %8X %8X %16LX %16LX\n\n", show.conns,
show.inpkts, show.outpkts,
(unsigned long long) show.inbytes,
(unsigned long long) show.outbytes);
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
" Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
seq_printf(seq,"%8X %8X %8X %16X %16X\n",
tot_stats->ustats.cps,
tot_stats->ustats.inpps,
tot_stats->ustats.outpps,
tot_stats->ustats.inbps,
tot_stats->ustats.outbps);
spin_unlock_bh(&tot_stats->lock);
seq_printf(seq, "%8X %8X %8X %16X %16X\n",
show.cps, show.inpps, show.outpps,
show.inbps, show.outbps);
return 0;
}
......@@ -2003,7 +2030,9 @@ static const struct file_operations ip_vs_stats_fops = {
static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
{
struct net *net = seq_file_single_net(seq);
struct ip_vs_stats *tot_stats = net_ipvs(net)->tot_stats;
struct ip_vs_stats *tot_stats = &net_ipvs(net)->tot_stats;
struct ip_vs_cpu_stats *cpustats = tot_stats->cpustats;
struct ip_vs_stats_user rates;
int i;
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
......@@ -2013,30 +2042,43 @@ static int ip_vs_stats_percpu_show(struct seq_file *seq, void *v)
"CPU Conns Packets Packets Bytes Bytes\n");
for_each_possible_cpu(i) {
struct ip_vs_cpu_stats *u = per_cpu_ptr(net->ipvs->cpustats, i);
struct ip_vs_cpu_stats *u = per_cpu_ptr(cpustats, i);
unsigned int start;
__u64 inbytes, outbytes;
do {
start = u64_stats_fetch_begin_bh(&u->syncp);
inbytes = u->ustats.inbytes;
outbytes = u->ustats.outbytes;
} while (u64_stats_fetch_retry_bh(&u->syncp, start));
seq_printf(seq, "%3X %8X %8X %8X %16LX %16LX\n",
i, u->ustats.conns, u->ustats.inpkts,
u->ustats.outpkts, (__u64)u->ustats.inbytes,
(__u64)u->ustats.outbytes);
i, u->ustats.conns, u->ustats.inpkts,
u->ustats.outpkts, (__u64)inbytes,
(__u64)outbytes);
}
spin_lock_bh(&tot_stats->lock);
seq_printf(seq, " ~ %8X %8X %8X %16LX %16LX\n\n",
tot_stats->ustats.conns, tot_stats->ustats.inpkts,
tot_stats->ustats.outpkts,
(unsigned long long) tot_stats->ustats.inbytes,
(unsigned long long) tot_stats->ustats.outbytes);
ip_vs_read_estimator(&rates, tot_stats);
spin_unlock_bh(&tot_stats->lock);
/* 01234567 01234567 01234567 0123456701234567 0123456701234567 */
seq_puts(seq,
" Conns/s Pkts/s Pkts/s Bytes/s Bytes/s\n");
seq_printf(seq, " %8X %8X %8X %16X %16X\n",
tot_stats->ustats.cps,
tot_stats->ustats.inpps,
tot_stats->ustats.outpps,
tot_stats->ustats.inbps,
tot_stats->ustats.outbps);
spin_unlock_bh(&tot_stats->lock);
rates.cps,
rates.inpps,
rates.outpps,
rates.inbps,
rates.outbps);
return 0;
}
......@@ -2283,14 +2325,6 @@ do_ip_vs_set_ctl(struct sock *sk, int cmd, void __user *user, unsigned int len)
}
static void
ip_vs_copy_stats(struct ip_vs_stats_user *dst, struct ip_vs_stats *src)
{
spin_lock_bh(&src->lock);
memcpy(dst, &src->ustats, sizeof(*dst));
spin_unlock_bh(&src->lock);
}
static void
ip_vs_copy_service(struct ip_vs_service_entry *dst, struct ip_vs_service *src)
{
......@@ -2677,31 +2711,29 @@ static const struct nla_policy ip_vs_dest_policy[IPVS_DEST_ATTR_MAX + 1] = {
static int ip_vs_genl_fill_stats(struct sk_buff *skb, int container_type,
struct ip_vs_stats *stats)
{
struct ip_vs_stats_user ustats;
struct nlattr *nl_stats = nla_nest_start(skb, container_type);
if (!nl_stats)
return -EMSGSIZE;
spin_lock_bh(&stats->lock);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, stats->ustats.conns);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, stats->ustats.inpkts);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, stats->ustats.outpkts);
NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, stats->ustats.inbytes);
NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, stats->ustats.outbytes);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, stats->ustats.cps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, stats->ustats.inpps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, stats->ustats.outpps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, stats->ustats.inbps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, stats->ustats.outbps);
ip_vs_copy_stats(&ustats, stats);
spin_unlock_bh(&stats->lock);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_CONNS, ustats.conns);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPKTS, ustats.inpkts);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPKTS, ustats.outpkts);
NLA_PUT_U64(skb, IPVS_STATS_ATTR_INBYTES, ustats.inbytes);
NLA_PUT_U64(skb, IPVS_STATS_ATTR_OUTBYTES, ustats.outbytes);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_CPS, ustats.cps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INPPS, ustats.inpps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTPPS, ustats.outpps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_INBPS, ustats.inbps);
NLA_PUT_U32(skb, IPVS_STATS_ATTR_OUTBPS, ustats.outbps);
nla_nest_end(skb, nl_stats);
return 0;
nla_put_failure:
spin_unlock_bh(&stats->lock);
nla_nest_cancel(skb, nl_stats);
return -EMSGSIZE;
}
......@@ -3480,7 +3512,8 @@ static void ip_vs_genl_unregister(void)
/*
* per netns intit/exit func.
*/
int __net_init __ip_vs_control_init(struct net *net)
#ifdef CONFIG_SYSCTL
int __net_init __ip_vs_control_init_sysctl(struct net *net)
{
int idx;
struct netns_ipvs *ipvs = net_ipvs(net);
......@@ -3490,38 +3523,11 @@ int __net_init __ip_vs_control_init(struct net *net)
spin_lock_init(&ipvs->dropentry_lock);
spin_lock_init(&ipvs->droppacket_lock);
spin_lock_init(&ipvs->securetcp_lock);
ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
/* Initialize rs_table */
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
INIT_LIST_HEAD(&ipvs->rs_table[idx]);
INIT_LIST_HEAD(&ipvs->dest_trash);
atomic_set(&ipvs->ftpsvc_counter, 0);
atomic_set(&ipvs->nullsvc_counter, 0);
/* procfs stats */
ipvs->tot_stats = kzalloc(sizeof(struct ip_vs_stats), GFP_KERNEL);
if (ipvs->tot_stats == NULL) {
pr_err("%s(): no memory.\n", __func__);
return -ENOMEM;
}
ipvs->cpustats = alloc_percpu(struct ip_vs_cpu_stats);
if (!ipvs->cpustats) {
pr_err("%s() alloc_percpu failed\n", __func__);
goto err_alloc;
}
spin_lock_init(&ipvs->tot_stats->lock);
proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
&ip_vs_stats_percpu_fops);
if (!net_eq(net, &init_net)) {
tbl = kmemdup(vs_vars, sizeof(vs_vars), GFP_KERNEL);
if (tbl == NULL)
goto err_dup;
return -ENOMEM;
} else
tbl = vs_vars;
/* Initialize sysctl defaults */
......@@ -3543,52 +3549,94 @@ int __net_init __ip_vs_control_init(struct net *net)
tbl[idx++].data = &ipvs->sysctl_cache_bypass;
tbl[idx++].data = &ipvs->sysctl_expire_nodest_conn;
tbl[idx++].data = &ipvs->sysctl_expire_quiescent_template;
ipvs->sysctl_sync_threshold[0] = 3;
ipvs->sysctl_sync_threshold[1] = 50;
ipvs->sysctl_sync_threshold[0] = DEFAULT_SYNC_THRESHOLD;
ipvs->sysctl_sync_threshold[1] = DEFAULT_SYNC_PERIOD;
tbl[idx].data = &ipvs->sysctl_sync_threshold;
tbl[idx++].maxlen = sizeof(ipvs->sysctl_sync_threshold);
tbl[idx++].data = &ipvs->sysctl_nat_icmp_send;
#ifdef CONFIG_SYSCTL
ipvs->sysctl_hdr = register_net_sysctl_table(net, net_vs_ctl_path,
tbl);
if (ipvs->sysctl_hdr == NULL) {
if (!net_eq(net, &init_net))
kfree(tbl);
goto err_dup;
return -ENOMEM;
}
#endif
ip_vs_new_estimator(net, ipvs->tot_stats);
ip_vs_start_estimator(net, &ipvs->tot_stats);
ipvs->sysctl_tbl = tbl;
/* Schedule defense work */
INIT_DELAYED_WORK(&ipvs->defense_work, defense_work_handler);
schedule_delayed_work(&ipvs->defense_work, DEFENSE_TIMER_PERIOD);
return 0;
err_dup:
free_percpu(ipvs->cpustats);
err_alloc:
kfree(ipvs->tot_stats);
return -ENOMEM;
return 0;
}
static void __net_exit __ip_vs_control_cleanup(struct net *net)
void __net_init __ip_vs_control_cleanup_sysctl(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_trash_cleanup(net);
ip_vs_kill_estimator(net, ipvs->tot_stats);
cancel_delayed_work_sync(&ipvs->defense_work);
cancel_work_sync(&ipvs->defense_work.work);
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(ipvs->sysctl_hdr);
}
#else
int __net_init __ip_vs_control_init_sysctl(struct net *net) { return 0; }
void __net_init __ip_vs_control_cleanup_sysctl(struct net *net) { }
#endif
int __net_init __ip_vs_control_init(struct net *net)
{
int idx;
struct netns_ipvs *ipvs = net_ipvs(net);
ipvs->rs_lock = __RW_LOCK_UNLOCKED(ipvs->rs_lock);
/* Initialize rs_table */
for (idx = 0; idx < IP_VS_RTAB_SIZE; idx++)
INIT_LIST_HEAD(&ipvs->rs_table[idx]);
INIT_LIST_HEAD(&ipvs->dest_trash);
atomic_set(&ipvs->ftpsvc_counter, 0);
atomic_set(&ipvs->nullsvc_counter, 0);
/* procfs stats */
ipvs->tot_stats.cpustats = alloc_percpu(struct ip_vs_cpu_stats);
if (ipvs->tot_stats.cpustats) {
pr_err("%s(): alloc_percpu.\n", __func__);
return -ENOMEM;
}
spin_lock_init(&ipvs->tot_stats.lock);
proc_net_fops_create(net, "ip_vs", 0, &ip_vs_info_fops);
proc_net_fops_create(net, "ip_vs_stats", 0, &ip_vs_stats_fops);
proc_net_fops_create(net, "ip_vs_stats_percpu", 0,
&ip_vs_stats_percpu_fops);
if (__ip_vs_control_init_sysctl(net))
goto err;
return 0;
err:
free_percpu(ipvs->tot_stats.cpustats);
return -ENOMEM;
}
static void __net_exit __ip_vs_control_cleanup(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
ip_vs_trash_cleanup(net);
ip_vs_stop_estimator(net, &ipvs->tot_stats);
__ip_vs_control_cleanup_sysctl(net);
proc_net_remove(net, "ip_vs_stats_percpu");
proc_net_remove(net, "ip_vs_stats");
proc_net_remove(net, "ip_vs");
free_percpu(ipvs->cpustats);
kfree(ipvs->tot_stats);
free_percpu(ipvs->tot_stats.cpustats);
}
static struct pernet_operations ipvs_control_ops = {
......
......@@ -69,10 +69,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
sum->inpkts += s->ustats.inpkts;
sum->outpkts += s->ustats.outpkts;
do {
start = u64_stats_fetch_begin_bh(&s->syncp);
start = u64_stats_fetch_begin(&s->syncp);
inbytes = s->ustats.inbytes;
outbytes = s->ustats.outbytes;
} while (u64_stats_fetch_retry_bh(&s->syncp, start));
} while (u64_stats_fetch_retry(&s->syncp, start));
sum->inbytes += inbytes;
sum->outbytes += outbytes;
} else {
......@@ -80,10 +80,10 @@ static void ip_vs_read_cpu_stats(struct ip_vs_stats_user *sum,
sum->inpkts = s->ustats.inpkts;
sum->outpkts = s->ustats.outpkts;
do {
start = u64_stats_fetch_begin_bh(&s->syncp);
start = u64_stats_fetch_begin(&s->syncp);
sum->inbytes = s->ustats.inbytes;
sum->outbytes = s->ustats.outbytes;
} while (u64_stats_fetch_retry_bh(&s->syncp, start));
} while (u64_stats_fetch_retry(&s->syncp, start));
}
}
}
......@@ -101,13 +101,12 @@ static void estimation_timer(unsigned long arg)
struct netns_ipvs *ipvs;
ipvs = net_ipvs(net);
ip_vs_read_cpu_stats(&ipvs->tot_stats->ustats, ipvs->cpustats);
spin_lock(&ipvs->est_lock);
list_for_each_entry(e, &ipvs->est_list, list) {
s = container_of(e, struct ip_vs_stats, est);
ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
spin_lock(&s->lock);
ip_vs_read_cpu_stats(&s->ustats, s->cpustats);
n_conns = s->ustats.conns;
n_inpkts = s->ustats.inpkts;
n_outpkts = s->ustats.outpkts;
......@@ -118,61 +117,41 @@ static void estimation_timer(unsigned long arg)
rate = (n_conns - e->last_conns) << 9;
e->last_conns = n_conns;
e->cps += ((long)rate - (long)e->cps) >> 2;
s->ustats.cps = (e->cps + 0x1FF) >> 10;
rate = (n_inpkts - e->last_inpkts) << 9;
e->last_inpkts = n_inpkts;
e->inpps += ((long)rate - (long)e->inpps) >> 2;
s->ustats.inpps = (e->inpps + 0x1FF) >> 10;
rate = (n_outpkts - e->last_outpkts) << 9;
e->last_outpkts = n_outpkts;
e->outpps += ((long)rate - (long)e->outpps) >> 2;
s->ustats.outpps = (e->outpps + 0x1FF) >> 10;
rate = (n_inbytes - e->last_inbytes) << 4;
e->last_inbytes = n_inbytes;
e->inbps += ((long)rate - (long)e->inbps) >> 2;
s->ustats.inbps = (e->inbps + 0xF) >> 5;
rate = (n_outbytes - e->last_outbytes) << 4;
e->last_outbytes = n_outbytes;
e->outbps += ((long)rate - (long)e->outbps) >> 2;
s->ustats.outbps = (e->outbps + 0xF) >> 5;
spin_unlock(&s->lock);
}
spin_unlock(&ipvs->est_lock);
mod_timer(&ipvs->est_timer, jiffies + 2*HZ);
}
void ip_vs_new_estimator(struct net *net, struct ip_vs_stats *stats)
void ip_vs_start_estimator(struct net *net, struct ip_vs_stats *stats)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
INIT_LIST_HEAD(&est->list);
est->last_conns = stats->ustats.conns;
est->cps = stats->ustats.cps<<10;
est->last_inpkts = stats->ustats.inpkts;
est->inpps = stats->ustats.inpps<<10;
est->last_outpkts = stats->ustats.outpkts;
est->outpps = stats->ustats.outpps<<10;
est->last_inbytes = stats->ustats.inbytes;
est->inbps = stats->ustats.inbps<<5;
est->last_outbytes = stats->ustats.outbytes;
est->outbps = stats->ustats.outbps<<5;
spin_lock_bh(&ipvs->est_lock);
list_add(&est->list, &ipvs->est_list);
spin_unlock_bh(&ipvs->est_lock);
}
void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
void ip_vs_stop_estimator(struct net *net, struct ip_vs_stats *stats)
{
struct netns_ipvs *ipvs = net_ipvs(net);
struct ip_vs_estimator *est = &stats->est;
......@@ -185,13 +164,14 @@ void ip_vs_kill_estimator(struct net *net, struct ip_vs_stats *stats)
void ip_vs_zero_estimator(struct ip_vs_stats *stats)
{
struct ip_vs_estimator *est = &stats->est;
/* set counters zero, caller must hold the stats->lock lock */
est->last_inbytes = 0;
est->last_outbytes = 0;
est->last_conns = 0;
est->last_inpkts = 0;
est->last_outpkts = 0;
struct ip_vs_stats_user *u = &stats->ustats;
/* reset counters, caller must hold the stats->lock lock */
est->last_inbytes = u->inbytes;
est->last_outbytes = u->outbytes;
est->last_conns = u->conns;
est->last_inpkts = u->inpkts;
est->last_outpkts = u->outpkts;
est->cps = 0;
est->inpps = 0;
est->outpps = 0;
......@@ -199,6 +179,19 @@ void ip_vs_zero_estimator(struct ip_vs_stats *stats)
est->outbps = 0;
}
/* Get decoded rates */
void ip_vs_read_estimator(struct ip_vs_stats_user *dst,
struct ip_vs_stats *stats)
{
struct ip_vs_estimator *e = &stats->est;
dst->cps = (e->cps + 0x1FF) >> 10;
dst->inpps = (e->inpps + 0x1FF) >> 10;
dst->outpps = (e->outpps + 0x1FF) >> 10;
dst->inbps = (e->inbps + 0xF) >> 5;
dst->outbps = (e->outbps + 0xF) >> 5;
}
static int __net_init __ip_vs_estimator_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
......
......@@ -63,6 +63,8 @@
#define CHECK_EXPIRE_INTERVAL (60*HZ)
#define ENTRY_TIMEOUT (6*60*HZ)
#define DEFAULT_EXPIRATION (24*60*60*HZ)
/*
* It is for full expiration check.
* When there is no partial expiration check (garbage collection)
......@@ -112,7 +114,7 @@ struct ip_vs_lblc_table {
/*
* IPVS LBLC sysctl table
*/
#ifdef CONFIG_SYSCTL
static ctl_table vs_vars_table[] = {
{
.procname = "lblc_expiration",
......@@ -123,6 +125,7 @@ static ctl_table vs_vars_table[] = {
},
{ }
};
#endif
static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en)
{
......@@ -238,6 +241,15 @@ static void ip_vs_lblc_flush(struct ip_vs_lblc_table *tbl)
}
}
static int sysctl_lblc_expiration(struct ip_vs_service *svc)
{
#ifdef CONFIG_SYSCTL
struct netns_ipvs *ipvs = net_ipvs(svc->net);
return ipvs->sysctl_lblc_expiration;
#else
return DEFAULT_EXPIRATION;
#endif
}
static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
{
......@@ -245,7 +257,6 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
struct ip_vs_lblc_entry *en, *nxt;
unsigned long now = jiffies;
int i, j;
struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLC_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLC_TAB_MASK;
......@@ -254,7 +265,7 @@ static inline void ip_vs_lblc_full_check(struct ip_vs_service *svc)
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_before(now,
en->lastuse +
ipvs->sysctl_lblc_expiration))
sysctl_lblc_expiration(svc)))
continue;
ip_vs_lblc_free(en);
......@@ -538,6 +549,7 @@ static struct ip_vs_scheduler ip_vs_lblc_scheduler =
/*
* per netns init.
*/
#ifdef CONFIG_SYSCTL
static int __net_init __ip_vs_lblc_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
......@@ -550,10 +562,9 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
return -ENOMEM;
} else
ipvs->lblc_ctl_table = vs_vars_table;
ipvs->sysctl_lblc_expiration = 24*60*60*HZ;
ipvs->sysctl_lblc_expiration = DEFAULT_EXPIRATION;
ipvs->lblc_ctl_table[0].data = &ipvs->sysctl_lblc_expiration;
#ifdef CONFIG_SYSCTL
ipvs->lblc_ctl_header =
register_net_sysctl_table(net, net_vs_ctl_path,
ipvs->lblc_ctl_table);
......@@ -562,7 +573,6 @@ static int __net_init __ip_vs_lblc_init(struct net *net)
kfree(ipvs->lblc_ctl_table);
return -ENOMEM;
}
#endif
return 0;
}
......@@ -571,14 +581,19 @@ static void __net_exit __ip_vs_lblc_exit(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(ipvs->lblc_ctl_header);
#endif
if (!net_eq(net, &init_net))
kfree(ipvs->lblc_ctl_table);
}
#else
static int __net_init __ip_vs_lblc_init(struct net *net) { return 0; }
static void __net_exit __ip_vs_lblc_exit(struct net *net) { }
#endif
static struct pernet_operations ip_vs_lblc_ops = {
.init = __ip_vs_lblc_init,
.exit = __ip_vs_lblc_exit,
......
......@@ -63,6 +63,8 @@
#define CHECK_EXPIRE_INTERVAL (60*HZ)
#define ENTRY_TIMEOUT (6*60*HZ)
#define DEFAULT_EXPIRATION (24*60*60*HZ)
/*
* It is for full expiration check.
* When there is no partial expiration check (garbage collection)
......@@ -283,6 +285,7 @@ struct ip_vs_lblcr_table {
};
#ifdef CONFIG_SYSCTL
/*
* IPVS LBLCR sysctl table
*/
......@@ -297,6 +300,7 @@ static ctl_table vs_vars_table[] = {
},
{ }
};
#endif
static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en)
{
......@@ -410,6 +414,15 @@ static void ip_vs_lblcr_flush(struct ip_vs_lblcr_table *tbl)
}
}
static int sysctl_lblcr_expiration(struct ip_vs_service *svc)
{
#ifdef CONFIG_SYSCTL
struct netns_ipvs *ipvs = net_ipvs(svc->net);
return ipvs->sysctl_lblcr_expiration;
#else
return DEFAULT_EXPIRATION;
#endif
}
static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
{
......@@ -417,15 +430,14 @@ static inline void ip_vs_lblcr_full_check(struct ip_vs_service *svc)
unsigned long now = jiffies;
int i, j;
struct ip_vs_lblcr_entry *en, *nxt;
struct netns_ipvs *ipvs = net_ipvs(svc->net);
for (i=0, j=tbl->rover; i<IP_VS_LBLCR_TAB_SIZE; i++) {
j = (j + 1) & IP_VS_LBLCR_TAB_MASK;
write_lock(&svc->sched_lock);
list_for_each_entry_safe(en, nxt, &tbl->bucket[j], list) {
if (time_after(en->lastuse
+ ipvs->sysctl_lblcr_expiration, now))
if (time_after(en->lastuse +
sysctl_lblcr_expiration(svc), now))
continue;
ip_vs_lblcr_free(en);
......@@ -650,7 +662,6 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
read_lock(&svc->sched_lock);
en = ip_vs_lblcr_get(svc->af, tbl, &iph.daddr);
if (en) {
struct netns_ipvs *ipvs = net_ipvs(svc->net);
/* We only hold a read lock, but this is atomic */
en->lastuse = jiffies;
......@@ -662,7 +673,7 @@ ip_vs_lblcr_schedule(struct ip_vs_service *svc, const struct sk_buff *skb)
/* More than one destination + enough time passed by, cleanup */
if (atomic_read(&en->set.size) > 1 &&
time_after(jiffies, en->set.lastmod +
ipvs->sysctl_lblcr_expiration)) {
sysctl_lblcr_expiration(svc))) {
struct ip_vs_dest *m;
write_lock(&en->set.lock);
......@@ -734,6 +745,7 @@ static struct ip_vs_scheduler ip_vs_lblcr_scheduler =
/*
* per netns init.
*/
#ifdef CONFIG_SYSCTL
static int __net_init __ip_vs_lblcr_init(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
......@@ -746,10 +758,9 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
return -ENOMEM;
} else
ipvs->lblcr_ctl_table = vs_vars_table;
ipvs->sysctl_lblcr_expiration = 24*60*60*HZ;
ipvs->sysctl_lblcr_expiration = DEFAULT_EXPIRATION;
ipvs->lblcr_ctl_table[0].data = &ipvs->sysctl_lblcr_expiration;
#ifdef CONFIG_SYSCTL
ipvs->lblcr_ctl_header =
register_net_sysctl_table(net, net_vs_ctl_path,
ipvs->lblcr_ctl_table);
......@@ -758,7 +769,6 @@ static int __net_init __ip_vs_lblcr_init(struct net *net)
kfree(ipvs->lblcr_ctl_table);
return -ENOMEM;
}
#endif
return 0;
}
......@@ -767,14 +777,19 @@ static void __net_exit __ip_vs_lblcr_exit(struct net *net)
{
struct netns_ipvs *ipvs = net_ipvs(net);
#ifdef CONFIG_SYSCTL
unregister_net_sysctl_table(ipvs->lblcr_ctl_header);
#endif
if (!net_eq(net, &init_net))
kfree(ipvs->lblcr_ctl_table);
}
#else
static int __net_init __ip_vs_lblcr_init(struct net *net) { return 0; }
static void __net_exit __ip_vs_lblcr_exit(struct net *net) { }
#endif
static struct pernet_operations ip_vs_lblcr_ops = {
.init = __ip_vs_lblcr_init,
.exit = __ip_vs_lblcr_exit,
......
......@@ -92,14 +92,13 @@ ip_vs_sip_fill_param(struct ip_vs_conn_param *p, struct sk_buff *skb)
if (get_callid(dptr, dataoff, datalen, &matchoff, &matchlen))
return -EINVAL;
p->pe_data = kmalloc(matchlen, GFP_ATOMIC);
if (!p->pe_data)
return -ENOMEM;
/* N.B: pe_data is only set on success,
* this allows fallback to the default persistence logic on failure
*/
memcpy(p->pe_data, dptr + matchoff, matchlen);
p->pe_data = kmemdup(dptr + matchoff, matchlen, GFP_ATOMIC);
if (!p->pe_data)
return -ENOMEM;
p->pe_data_len = matchlen;
return 0;
......
......@@ -394,7 +394,7 @@ void ip_vs_sync_switch_mode(struct net *net, int mode)
if (!(ipvs->sync_state & IP_VS_STATE_MASTER))
return;
if (mode == ipvs->sysctl_sync_ver || !ipvs->sync_buff)
if (mode == sysctl_sync_ver(ipvs) || !ipvs->sync_buff)
return;
spin_lock_bh(&ipvs->sync_buff_lock);
......@@ -521,7 +521,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
unsigned int len, pe_name_len, pad;
/* Handle old version of the protocol */
if (ipvs->sysctl_sync_ver == 0) {
if (sysctl_sync_ver(ipvs) == 0) {
ip_vs_sync_conn_v0(net, cp);
return;
}
......@@ -650,7 +650,7 @@ void ip_vs_sync_conn(struct net *net, struct ip_vs_conn *cp)
if (cp->flags & IP_VS_CONN_F_TEMPLATE) {
int pkts = atomic_add_return(1, &cp->in_pkts);
if (pkts % ipvs->sysctl_sync_threshold[1] != 1)
if (pkts % sysctl_sync_period(ipvs) != 1)
return;
}
goto sloop;
......@@ -697,13 +697,12 @@ ip_vs_conn_fill_param_sync(struct net *net, int af, union ip_vs_sync_conn *sc,
return 1;
}
p->pe_data = kmalloc(pe_data_len, GFP_ATOMIC);
p->pe_data = kmemdup(pe_data, pe_data_len, GFP_ATOMIC);
if (!p->pe_data) {
if (p->pe->module)
module_put(p->pe->module);
return -ENOMEM;
}
memcpy(p->pe_data, pe_data, pe_data_len);
p->pe_data_len = pe_data_len;
}
return 0;
......@@ -795,7 +794,7 @@ static void ip_vs_proc_conn(struct net *net, struct ip_vs_conn_param *param,
if (opt)
memcpy(&cp->in_seq, opt, sizeof(*opt));
atomic_set(&cp->in_pkts, ipvs->sysctl_sync_threshold[0]);
atomic_set(&cp->in_pkts, sysctl_sync_threshold(ipvs));
cp->state = state;
cp->old_state = cp->state;
/*
......
......@@ -1301,6 +1301,7 @@ static void nf_conntrack_cleanup_net(struct net *net)
nf_ct_free_hashtable(net->ct.hash, net->ct.htable_size);
nf_conntrack_ecache_fini(net);
nf_conntrack_tstamp_fini(net);
nf_conntrack_acct_fini(net);
nf_conntrack_expect_fini(net);
kmem_cache_destroy(net->ct.nf_conntrack_cachep);
......
......@@ -183,14 +183,14 @@ EXPORT_SYMBOL(xt_unregister_matches);
/*
* These are weird, but module loading must not be done with mutex
* held (since they will register), and we have to have a single
* function to use try_then_request_module().
* function to use.
*/
/* Find match, grabs ref. Returns ERR_PTR() on error. */
struct xt_match *xt_find_match(u8 af, const char *name, u8 revision)
{
struct xt_match *m;
int err = 0;
int err = -ENOENT;
if (mutex_lock_interruptible(&xt[af].mutex) != 0)
return ERR_PTR(-EINTR);
......@@ -221,9 +221,13 @@ xt_request_find_match(uint8_t nfproto, const char *name, uint8_t revision)
{
struct xt_match *match;
match = try_then_request_module(xt_find_match(nfproto, name, revision),
"%st_%s", xt_prefix[nfproto], name);
return (match != NULL) ? match : ERR_PTR(-ENOENT);
match = xt_find_match(nfproto, name, revision);
if (IS_ERR(match)) {
request_module("%st_%s", xt_prefix[nfproto], name);
match = xt_find_match(nfproto, name, revision);
}
return match;
}
EXPORT_SYMBOL_GPL(xt_request_find_match);
......@@ -231,7 +235,7 @@ EXPORT_SYMBOL_GPL(xt_request_find_match);
struct xt_target *xt_find_target(u8 af, const char *name, u8 revision)
{
struct xt_target *t;
int err = 0;
int err = -ENOENT;
if (mutex_lock_interruptible(&xt[af].mutex) != 0)
return ERR_PTR(-EINTR);
......@@ -261,9 +265,13 @@ struct xt_target *xt_request_find_target(u8 af, const char *name, u8 revision)
{
struct xt_target *target;
target = try_then_request_module(xt_find_target(af, name, revision),
"%st_%s", xt_prefix[af], name);
return (target != NULL) ? target : ERR_PTR(-ENOENT);
target = xt_find_target(af, name, revision);
if (IS_ERR(target)) {
request_module("%st_%s", xt_prefix[af], name);
target = xt_find_target(af, name, revision);
}
return target;
}
EXPORT_SYMBOL_GPL(xt_request_find_target);
......
......@@ -16,12 +16,86 @@
#include <linux/ip.h>
#include <net/route.h>
#include <linux/netfilter_ipv4/ipt_addrtype.h>
#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
#include <net/ipv6.h>
#include <net/ip6_route.h>
#include <net/ip6_fib.h>
#endif
#include <linux/netfilter/xt_addrtype.h>
#include <linux/netfilter/x_tables.h>
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Patrick McHardy <kaber@trash.net>");
MODULE_DESCRIPTION("Xtables: address type match for IPv4");
MODULE_DESCRIPTION("Xtables: address type match");
MODULE_ALIAS("ipt_addrtype");
MODULE_ALIAS("ip6t_addrtype");
#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
static u32 xt_addrtype_rt6_to_type(const struct rt6_info *rt)
{
u32 ret;
if (!rt)
return XT_ADDRTYPE_UNREACHABLE;
if (rt->rt6i_flags & RTF_REJECT)
ret = XT_ADDRTYPE_UNREACHABLE;
else
ret = 0;
if (rt->rt6i_flags & RTF_LOCAL)
ret |= XT_ADDRTYPE_LOCAL;
if (rt->rt6i_flags & RTF_ANYCAST)
ret |= XT_ADDRTYPE_ANYCAST;
return ret;
}
static bool match_type6(struct net *net, const struct net_device *dev,
const struct in6_addr *addr, u16 mask)
{
int addr_type = ipv6_addr_type(addr);
if ((mask & XT_ADDRTYPE_MULTICAST) &&
!(addr_type & IPV6_ADDR_MULTICAST))
return false;
if ((mask & XT_ADDRTYPE_UNICAST) && !(addr_type & IPV6_ADDR_UNICAST))
return false;
if ((mask & XT_ADDRTYPE_UNSPEC) && addr_type != IPV6_ADDR_ANY)
return false;
if ((XT_ADDRTYPE_LOCAL | XT_ADDRTYPE_ANYCAST |
XT_ADDRTYPE_UNREACHABLE) & mask) {
struct rt6_info *rt;
u32 type;
int ifindex = dev ? dev->ifindex : 0;
rt = rt6_lookup(net, addr, NULL, ifindex, !!dev);
type = xt_addrtype_rt6_to_type(rt);
dst_release(&rt->dst);
return !!(mask & type);
}
return true;
}
static bool
addrtype_mt6(struct net *net, const struct net_device *dev,
const struct sk_buff *skb, const struct xt_addrtype_info_v1 *info)
{
const struct ipv6hdr *iph = ipv6_hdr(skb);
bool ret = true;
if (info->source)
ret &= match_type6(net, dev, &iph->saddr, info->source) ^
(info->flags & XT_ADDRTYPE_INVERT_SOURCE);
if (ret && info->dest)
ret &= match_type6(net, dev, &iph->daddr, info->dest) ^
!!(info->flags & XT_ADDRTYPE_INVERT_DEST);
return ret;
}
#endif
static inline bool match_type(struct net *net, const struct net_device *dev,
__be32 addr, u_int16_t mask)
......@@ -33,7 +107,7 @@ static bool
addrtype_mt_v0(const struct sk_buff *skb, struct xt_action_param *par)
{
struct net *net = dev_net(par->in ? par->in : par->out);
const struct ipt_addrtype_info *info = par->matchinfo;
const struct xt_addrtype_info *info = par->matchinfo;
const struct iphdr *iph = ip_hdr(skb);
bool ret = true;
......@@ -51,31 +125,36 @@ static bool
addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par)
{
struct net *net = dev_net(par->in ? par->in : par->out);
const struct ipt_addrtype_info_v1 *info = par->matchinfo;
const struct iphdr *iph = ip_hdr(skb);
const struct xt_addrtype_info_v1 *info = par->matchinfo;
const struct iphdr *iph;
const struct net_device *dev = NULL;
bool ret = true;
if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN)
if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN)
dev = par->in;
else if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT)
else if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT)
dev = par->out;
#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
if (par->family == NFPROTO_IPV6)
return addrtype_mt6(net, dev, skb, info);
#endif
iph = ip_hdr(skb);
if (info->source)
ret &= match_type(net, dev, iph->saddr, info->source) ^
(info->flags & IPT_ADDRTYPE_INVERT_SOURCE);
(info->flags & XT_ADDRTYPE_INVERT_SOURCE);
if (ret && info->dest)
ret &= match_type(net, dev, iph->daddr, info->dest) ^
!!(info->flags & IPT_ADDRTYPE_INVERT_DEST);
!!(info->flags & XT_ADDRTYPE_INVERT_DEST);
return ret;
}
static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
{
struct ipt_addrtype_info_v1 *info = par->matchinfo;
struct xt_addrtype_info_v1 *info = par->matchinfo;
if (info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN &&
info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN &&
info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
pr_info("both incoming and outgoing "
"interface limitation cannot be selected\n");
return -EINVAL;
......@@ -83,7 +162,7 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN)) &&
info->flags & IPT_ADDRTYPE_LIMIT_IFACE_OUT) {
info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) {
pr_info("output interface limitation "
"not valid in PREROUTING and INPUT\n");
return -EINVAL;
......@@ -91,12 +170,28 @@ static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par)
if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) |
(1 << NF_INET_LOCAL_OUT)) &&
info->flags & IPT_ADDRTYPE_LIMIT_IFACE_IN) {
info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) {
pr_info("input interface limitation "
"not valid in POSTROUTING and OUTPUT\n");
return -EINVAL;
}
#if defined(CONFIG_IP6_NF_IPTABLES) || defined(CONFIG_IP6_NF_IPTABLES_MODULE)
if (par->family == NFPROTO_IPV6) {
if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) {
pr_err("ipv6 BLACKHOLE matching not supported\n");
return -EINVAL;
}
if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) {
pr_err("ipv6 PROHIBT (THROW, NAT ..) matching not supported\n");
return -EINVAL;
}
if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) {
pr_err("ipv6 does not support BROADCAST matching\n");
return -EINVAL;
}
}
#endif
return 0;
}
......@@ -105,16 +200,16 @@ static struct xt_match addrtype_mt_reg[] __read_mostly = {
.name = "addrtype",
.family = NFPROTO_IPV4,
.match = addrtype_mt_v0,
.matchsize = sizeof(struct ipt_addrtype_info),
.matchsize = sizeof(struct xt_addrtype_info),
.me = THIS_MODULE
},
{
.name = "addrtype",
.family = NFPROTO_IPV4,
.family = NFPROTO_UNSPEC,
.revision = 1,
.match = addrtype_mt_v1,
.checkentry = addrtype_mt_checkentry_v1,
.matchsize = sizeof(struct ipt_addrtype_info_v1),
.matchsize = sizeof(struct xt_addrtype_info_v1),
.me = THIS_MODULE
}
};
......
......@@ -33,17 +33,17 @@
/* we will save the tuples of all connections we care about */
struct xt_connlimit_conn {
struct list_head list;
struct nf_conntrack_tuple tuple;
struct hlist_node node;
struct nf_conntrack_tuple tuple;
union nf_inet_addr addr;
};
struct xt_connlimit_data {
struct list_head iphash[256];
spinlock_t lock;
struct hlist_head iphash[256];
spinlock_t lock;
};
static u_int32_t connlimit_rnd __read_mostly;
static bool connlimit_rnd_inited __read_mostly;
static inline unsigned int connlimit_iphash(__be32 addr)
{
......@@ -101,9 +101,9 @@ static int count_them(struct net *net,
{
const struct nf_conntrack_tuple_hash *found;
struct xt_connlimit_conn *conn;
struct xt_connlimit_conn *tmp;
struct hlist_node *pos, *n;
struct nf_conn *found_ct;
struct list_head *hash;
struct hlist_head *hash;
bool addit = true;
int matches = 0;
......@@ -115,7 +115,7 @@ static int count_them(struct net *net,
rcu_read_lock();
/* check the saved connections */
list_for_each_entry_safe(conn, tmp, hash, list) {
hlist_for_each_entry_safe(conn, pos, n, hash, node) {
found = nf_conntrack_find_get(net, NF_CT_DEFAULT_ZONE,
&conn->tuple);
found_ct = NULL;
......@@ -135,7 +135,7 @@ static int count_them(struct net *net,
if (found == NULL) {
/* this one is gone */
list_del(&conn->list);
hlist_del(&conn->node);
kfree(conn);
continue;
}
......@@ -146,12 +146,12 @@ static int count_them(struct net *net,
* closed already -> ditch it
*/
nf_ct_put(found_ct);
list_del(&conn->list);
hlist_del(&conn->node);
kfree(conn);
continue;
}
if (same_source_net(addr, mask, &conn->tuple.src.u3, family))
if (same_source_net(addr, mask, &conn->addr, family))
/* same source network -> be counted! */
++matches;
nf_ct_put(found_ct);
......@@ -161,11 +161,12 @@ static int count_them(struct net *net,
if (addit) {
/* save the new connection in our list */
conn = kzalloc(sizeof(*conn), GFP_ATOMIC);
conn = kmalloc(sizeof(*conn), GFP_ATOMIC);
if (conn == NULL)
return -ENOMEM;
conn->tuple = *tuple;
list_add(&conn->list, hash);
conn->addr = *addr;
hlist_add_head(&conn->node, hash);
++matches;
}
......@@ -185,15 +186,11 @@ connlimit_mt(const struct sk_buff *skb, struct xt_action_param *par)
int connections;
ct = nf_ct_get(skb, &ctinfo);
if (ct != NULL) {
if (info->flags & XT_CONNLIMIT_DADDR)
tuple_ptr = &ct->tuplehash[IP_CT_DIR_REPLY].tuple;
else
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
} else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
par->family, &tuple)) {
if (ct != NULL)
tuple_ptr = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple;
else if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb),
par->family, &tuple))
goto hotdrop;
}
if (par->family == NFPROTO_IPV6) {
const struct ipv6hdr *iph = ipv6_hdr(skb);
......@@ -228,9 +225,13 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
unsigned int i;
int ret;
if (unlikely(!connlimit_rnd_inited)) {
get_random_bytes(&connlimit_rnd, sizeof(connlimit_rnd));
connlimit_rnd_inited = true;
if (unlikely(!connlimit_rnd)) {
u_int32_t rand;
do {
get_random_bytes(&rand, sizeof(rand));
} while (!rand);
cmpxchg(&connlimit_rnd, 0, rand);
}
ret = nf_ct_l3proto_try_module_get(par->family);
if (ret < 0) {
......@@ -248,7 +249,7 @@ static int connlimit_mt_check(const struct xt_mtchk_param *par)
spin_lock_init(&info->data->lock);
for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i)
INIT_LIST_HEAD(&info->data->iphash[i]);
INIT_HLIST_HEAD(&info->data->iphash[i]);
return 0;
}
......@@ -257,15 +258,15 @@ static void connlimit_mt_destroy(const struct xt_mtdtor_param *par)
{
const struct xt_connlimit_info *info = par->matchinfo;
struct xt_connlimit_conn *conn;
struct xt_connlimit_conn *tmp;
struct list_head *hash = info->data->iphash;
struct hlist_node *pos, *n;
struct hlist_head *hash = info->data->iphash;
unsigned int i;
nf_ct_l3proto_module_put(par->family);
for (i = 0; i < ARRAY_SIZE(info->data->iphash); ++i) {
list_for_each_entry_safe(conn, tmp, &hash[i], list) {
list_del(&conn->list);
hlist_for_each_entry_safe(conn, pos, n, &hash[i], node) {
hlist_del(&conn->node);
kfree(conn);
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册