提交 4a4f8fdb 编写于 作者: L Linus Torvalds
......@@ -75,12 +75,6 @@ enum nf_ip_hook_priorities {
#define SO_ORIGINAL_DST 80
#ifdef __KERNEL__
#ifdef CONFIG_NETFILTER_DEBUG
void nf_debug_ip_local_deliver(struct sk_buff *skb);
void nf_debug_ip_loopback_xmit(struct sk_buff *newskb);
void nf_debug_ip_finish_output2(struct sk_buff *skb);
#endif /*CONFIG_NETFILTER_DEBUG*/
extern int ip_route_me_harder(struct sk_buff **pskb);
/* Call this before modifying an existing IP packet: ensures it is
......
#ifndef _IP_CONNTRACK_CORE_H
#define _IP_CONNTRACK_CORE_H
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/lockhelp.h>
/* This header is used to share core functionality between the
standalone connection tracking module, and the compatibility layer's use
......@@ -47,6 +46,6 @@ static inline int ip_conntrack_confirm(struct sk_buff **pskb)
extern struct list_head *ip_conntrack_hash;
extern struct list_head ip_conntrack_expect_list;
DECLARE_RWLOCK_EXTERN(ip_conntrack_lock);
extern rwlock_t ip_conntrack_lock;
#endif /* _IP_CONNTRACK_CORE_H */
......@@ -50,10 +50,9 @@ struct ip_nat_multi_range_compat
#ifdef __KERNEL__
#include <linux/list.h>
#include <linux/netfilter_ipv4/lockhelp.h>
/* Protects NAT hash tables, and NAT-private part of conntracks. */
DECLARE_RWLOCK_EXTERN(ip_nat_lock);
extern rwlock_t ip_nat_lock;
/* The structure embedded in the conntrack structure. */
struct ip_nat_info
......
......@@ -2,7 +2,6 @@
#define _LISTHELP_H
#include <linux/config.h>
#include <linux/list.h>
#include <linux/netfilter_ipv4/lockhelp.h>
/* Header to do more comprehensive job than linux/list.h; assume list
is first entry in structure. */
......
#ifndef _LOCKHELP_H
#define _LOCKHELP_H
#include <linux/config.h>
#include <linux/spinlock.h>
#include <asm/atomic.h>
#include <linux/interrupt.h>
#include <linux/smp.h>
/* Header to do help in lock debugging. */
#ifdef CONFIG_NETFILTER_DEBUG
struct spinlock_debug
{
spinlock_t l;
atomic_t locked_by;
};
struct rwlock_debug
{
rwlock_t l;
long read_locked_map;
long write_locked_map;
};
#define DECLARE_LOCK(l) \
struct spinlock_debug l = { SPIN_LOCK_UNLOCKED, ATOMIC_INIT(-1) }
#define DECLARE_LOCK_EXTERN(l) \
extern struct spinlock_debug l
#define DECLARE_RWLOCK(l) \
struct rwlock_debug l = { RW_LOCK_UNLOCKED, 0, 0 }
#define DECLARE_RWLOCK_EXTERN(l) \
extern struct rwlock_debug l
#define MUST_BE_LOCKED(l) \
do { if (atomic_read(&(l)->locked_by) != smp_processor_id()) \
printk("ASSERT %s:%u %s unlocked\n", __FILE__, __LINE__, #l); \
} while(0)
#define MUST_BE_UNLOCKED(l) \
do { if (atomic_read(&(l)->locked_by) == smp_processor_id()) \
printk("ASSERT %s:%u %s locked\n", __FILE__, __LINE__, #l); \
} while(0)
/* Write locked OK as well. */
#define MUST_BE_READ_LOCKED(l) \
do { if (!((l)->read_locked_map & (1UL << smp_processor_id())) \
&& !((l)->write_locked_map & (1UL << smp_processor_id()))) \
printk("ASSERT %s:%u %s not readlocked\n", __FILE__, __LINE__, #l); \
} while(0)
#define MUST_BE_WRITE_LOCKED(l) \
do { if (!((l)->write_locked_map & (1UL << smp_processor_id()))) \
printk("ASSERT %s:%u %s not writelocked\n", __FILE__, __LINE__, #l); \
} while(0)
#define MUST_BE_READ_WRITE_UNLOCKED(l) \
do { if ((l)->read_locked_map & (1UL << smp_processor_id())) \
printk("ASSERT %s:%u %s readlocked\n", __FILE__, __LINE__, #l); \
else if ((l)->write_locked_map & (1UL << smp_processor_id())) \
printk("ASSERT %s:%u %s writelocked\n", __FILE__, __LINE__, #l); \
} while(0)
#define LOCK_BH(lk) \
do { \
MUST_BE_UNLOCKED(lk); \
spin_lock_bh(&(lk)->l); \
atomic_set(&(lk)->locked_by, smp_processor_id()); \
} while(0)
#define UNLOCK_BH(lk) \
do { \
MUST_BE_LOCKED(lk); \
atomic_set(&(lk)->locked_by, -1); \
spin_unlock_bh(&(lk)->l); \
} while(0)
#define READ_LOCK(lk) \
do { \
MUST_BE_READ_WRITE_UNLOCKED(lk); \
read_lock_bh(&(lk)->l); \
set_bit(smp_processor_id(), &(lk)->read_locked_map); \
} while(0)
#define WRITE_LOCK(lk) \
do { \
MUST_BE_READ_WRITE_UNLOCKED(lk); \
write_lock_bh(&(lk)->l); \
set_bit(smp_processor_id(), &(lk)->write_locked_map); \
} while(0)
#define READ_UNLOCK(lk) \
do { \
if (!((lk)->read_locked_map & (1UL << smp_processor_id()))) \
printk("ASSERT: %s:%u %s not readlocked\n", \
__FILE__, __LINE__, #lk); \
clear_bit(smp_processor_id(), &(lk)->read_locked_map); \
read_unlock_bh(&(lk)->l); \
} while(0)
#define WRITE_UNLOCK(lk) \
do { \
MUST_BE_WRITE_LOCKED(lk); \
clear_bit(smp_processor_id(), &(lk)->write_locked_map); \
write_unlock_bh(&(lk)->l); \
} while(0)
#else
#define DECLARE_LOCK(l) spinlock_t l = SPIN_LOCK_UNLOCKED
#define DECLARE_LOCK_EXTERN(l) extern spinlock_t l
#define DECLARE_RWLOCK(l) rwlock_t l = RW_LOCK_UNLOCKED
#define DECLARE_RWLOCK_EXTERN(l) extern rwlock_t l
#define MUST_BE_LOCKED(l)
#define MUST_BE_UNLOCKED(l)
#define MUST_BE_READ_LOCKED(l)
#define MUST_BE_WRITE_LOCKED(l)
#define MUST_BE_READ_WRITE_UNLOCKED(l)
#define LOCK_BH(l) spin_lock_bh(l)
#define UNLOCK_BH(l) spin_unlock_bh(l)
#define READ_LOCK(l) read_lock_bh(l)
#define WRITE_LOCK(l) write_lock_bh(l)
#define READ_UNLOCK(l) read_unlock_bh(l)
#define WRITE_UNLOCK(l) write_unlock_bh(l)
#endif /*CONFIG_NETFILTER_DEBUG*/
#endif /* _LOCKHELP_H */
......@@ -147,7 +147,7 @@ struct netlink_callback
int (*dump)(struct sk_buff * skb, struct netlink_callback *cb);
int (*done)(struct netlink_callback *cb);
int family;
long args[4];
long args[5];
};
struct netlink_notify
......
......@@ -193,7 +193,6 @@ struct skb_shared_info {
* @nfcache: Cache info
* @nfct: Associated connection, if any
* @nfctinfo: Relationship of this skb to the connection
* @nf_debug: Netfilter debugging
* @nf_bridge: Saved data about a bridged frame - see br_netfilter.c
* @private: Data which is private to the HIPPI implementation
* @tc_index: Traffic control index
......@@ -264,9 +263,6 @@ struct sk_buff {
__u32 nfcache;
__u32 nfctinfo;
struct nf_conntrack *nfct;
#ifdef CONFIG_NETFILTER_DEBUG
unsigned int nf_debug;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
struct nf_bridge_info *nf_bridge;
#endif
......@@ -1219,15 +1215,6 @@ static inline void nf_reset(struct sk_buff *skb)
{
nf_conntrack_put(skb->nfct);
skb->nfct = NULL;
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug = 0;
#endif
}
static inline void nf_reset_debug(struct sk_buff *skb)
{
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug = 0;
#endif
}
#ifdef CONFIG_BRIDGE_NETFILTER
......
......@@ -167,14 +167,17 @@ extern int fib6_walk_continue(struct fib6_walker_t *w);
extern int fib6_add(struct fib6_node *root,
struct rt6_info *rt,
struct nlmsghdr *nlh,
void *rtattr);
void *rtattr,
struct netlink_skb_parms *req);
extern int fib6_del(struct rt6_info *rt,
struct nlmsghdr *nlh,
void *rtattr);
void *rtattr,
struct netlink_skb_parms *req);
extern void inet6_rt_notify(int event, struct rt6_info *rt,
struct nlmsghdr *nlh);
struct nlmsghdr *nlh,
struct netlink_skb_parms *req);
extern void fib6_run_gc(unsigned long dummy);
......
......@@ -41,13 +41,16 @@ extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg);
extern int ip6_route_add(struct in6_rtmsg *rtmsg,
struct nlmsghdr *,
void *rtattr);
void *rtattr,
struct netlink_skb_parms *req);
extern int ip6_ins_rt(struct rt6_info *,
struct nlmsghdr *,
void *rtattr);
void *rtattr,
struct netlink_skb_parms *req);
extern int ip6_del_rt(struct rt6_info *,
struct nlmsghdr *,
void *rtattr);
void *rtattr,
struct netlink_skb_parms *req);
extern int ip6_rt_addr_add(struct in6_addr *addr,
struct net_device *dev,
......
......@@ -57,9 +57,6 @@ int br_forward_finish(struct sk_buff *skb)
static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
{
skb->dev = to->dev;
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug = 0;
#endif
NF_HOOK(PF_BRIDGE, NF_BR_LOCAL_OUT, skb, NULL, skb->dev,
br_forward_finish);
}
......
......@@ -23,11 +23,7 @@ const unsigned char bridge_ula[6] = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 };
static int br_pass_frame_up_finish(struct sk_buff *skb)
{
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug = 0;
#endif
netif_receive_skb(skb);
return 0;
}
......
......@@ -102,10 +102,6 @@ static int br_nf_pre_routing_finish_ipv6(struct sk_buff *skb)
{
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug ^= (1 << NF_BR_PRE_ROUTING);
#endif
if (nf_bridge->mask & BRNF_PKT_TYPE) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->mask ^= BRNF_PKT_TYPE;
......@@ -182,10 +178,6 @@ static void __br_dnat_complain(void)
* --Bart, 20021007 (updated) */
static int br_nf_pre_routing_finish_bridge(struct sk_buff *skb)
{
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug |= (1 << NF_BR_PRE_ROUTING) | (1 << NF_BR_FORWARD);
#endif
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
skb->nf_bridge->mask |= BRNF_PKT_TYPE;
......@@ -207,10 +199,6 @@ static int br_nf_pre_routing_finish(struct sk_buff *skb)
struct iphdr *iph = skb->nh.iph;
struct nf_bridge_info *nf_bridge = skb->nf_bridge;
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug ^= (1 << NF_BR_PRE_ROUTING);
#endif
if (nf_bridge->mask & BRNF_PKT_TYPE) {
skb->pkt_type = PACKET_OTHERHOST;
nf_bridge->mask ^= BRNF_PKT_TYPE;
......@@ -382,9 +370,6 @@ static unsigned int br_nf_pre_routing_ipv6(unsigned int hook,
if (hdr->nexthdr == NEXTHDR_HOP && check_hbh_len(skb))
goto inhdr_error;
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug ^= (1 << NF_IP6_PRE_ROUTING);
#endif
if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
return NF_DROP;
setup_pre_routing(skb);
......@@ -468,9 +453,6 @@ static unsigned int br_nf_pre_routing(unsigned int hook, struct sk_buff **pskb,
skb->ip_summed = CHECKSUM_NONE;
}
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug ^= (1 << NF_IP_PRE_ROUTING);
#endif
if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
return NF_DROP;
setup_pre_routing(skb);
......@@ -517,10 +499,6 @@ static int br_nf_forward_finish(struct sk_buff *skb)
struct net_device *in;
struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug ^= (1 << NF_BR_FORWARD);
#endif
if (skb->protocol != __constant_htons(ETH_P_ARP) && !IS_VLAN_ARP) {
in = nf_bridge->physindev;
if (nf_bridge->mask & BRNF_PKT_TYPE) {
......@@ -566,9 +544,6 @@ static unsigned int br_nf_forward_ip(unsigned int hook, struct sk_buff **pskb,
(*pskb)->nh.raw += VLAN_HLEN;
}
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug ^= (1 << NF_BR_FORWARD);
#endif
nf_bridge = skb->nf_bridge;
if (skb->pkt_type == PACKET_OTHERHOST) {
skb->pkt_type = PACKET_HOST;
......@@ -605,10 +580,6 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
(*pskb)->nh.raw += VLAN_HLEN;
}
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug ^= (1 << NF_BR_FORWARD);
#endif
if (skb->nh.arph->ar_pln != 4) {
if (IS_VLAN_ARP) {
skb_push(*pskb, VLAN_HLEN);
......@@ -627,9 +598,6 @@ static unsigned int br_nf_forward_arp(unsigned int hook, struct sk_buff **pskb,
/* PF_BRIDGE/LOCAL_OUT ***********************************************/
static int br_nf_local_out_finish(struct sk_buff *skb)
{
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug &= ~(1 << NF_BR_LOCAL_OUT);
#endif
if (skb->protocol == __constant_htons(ETH_P_8021Q)) {
skb_push(skb, VLAN_HLEN);
skb->nh.raw -= VLAN_HLEN;
......@@ -731,10 +699,6 @@ static unsigned int br_nf_local_out(unsigned int hook, struct sk_buff **pskb,
realoutdev, br_nf_local_out_finish,
NF_IP_PRI_BRIDGE_SABOTAGE_FORWARD + 1);
} else {
#ifdef CONFIG_NETFILTER_DEBUG
skb->nf_debug ^= (1 << NF_IP_LOCAL_OUT);
#endif
NF_HOOK_THRESH(pf, NF_IP_LOCAL_OUT, skb, realindev,
realoutdev, br_nf_local_out_finish,
NF_IP_PRI_BRIDGE_SABOTAGE_LOCAL_OUT + 1);
......@@ -779,8 +743,6 @@ static unsigned int br_nf_post_routing(unsigned int hook, struct sk_buff **pskb,
printk(KERN_CRIT "br_netfilter: skb->dst == NULL.");
goto print_error;
}
skb->nf_debug ^= (1 << NF_IP_POST_ROUTING);
#endif
/* We assume any code from br_dev_queue_push_xmit onwards doesn't care
......
......@@ -141,136 +141,6 @@ void nf_unregister_sockopt(struct nf_sockopt_ops *reg)
up(&nf_sockopt_mutex);
}
#ifdef CONFIG_NETFILTER_DEBUG
#include <net/ip.h>
#include <net/tcp.h>
#include <linux/netfilter_ipv4.h>
static void debug_print_hooks_ip(unsigned int nf_debug)
{
if (nf_debug & (1 << NF_IP_PRE_ROUTING)) {
printk("PRE_ROUTING ");
nf_debug ^= (1 << NF_IP_PRE_ROUTING);
}
if (nf_debug & (1 << NF_IP_LOCAL_IN)) {
printk("LOCAL_IN ");
nf_debug ^= (1 << NF_IP_LOCAL_IN);
}
if (nf_debug & (1 << NF_IP_FORWARD)) {
printk("FORWARD ");
nf_debug ^= (1 << NF_IP_FORWARD);
}
if (nf_debug & (1 << NF_IP_LOCAL_OUT)) {
printk("LOCAL_OUT ");
nf_debug ^= (1 << NF_IP_LOCAL_OUT);
}
if (nf_debug & (1 << NF_IP_POST_ROUTING)) {
printk("POST_ROUTING ");
nf_debug ^= (1 << NF_IP_POST_ROUTING);
}
if (nf_debug)
printk("Crap bits: 0x%04X", nf_debug);
printk("\n");
}
static void nf_dump_skb(int pf, struct sk_buff *skb)
{
printk("skb: pf=%i %s dev=%s len=%u\n",
pf,
skb->sk ? "(owned)" : "(unowned)",
skb->dev ? skb->dev->name : "(no dev)",
skb->len);
switch (pf) {
case PF_INET: {
const struct iphdr *ip = skb->nh.iph;
__u32 *opt = (__u32 *) (ip + 1);
int opti;
__u16 src_port = 0, dst_port = 0;
if (ip->protocol == IPPROTO_TCP
|| ip->protocol == IPPROTO_UDP) {
struct tcphdr *tcp=(struct tcphdr *)((__u32 *)ip+ip->ihl);
src_port = ntohs(tcp->source);
dst_port = ntohs(tcp->dest);
}
printk("PROTO=%d %u.%u.%u.%u:%hu %u.%u.%u.%u:%hu"
" L=%hu S=0x%2.2hX I=%hu F=0x%4.4hX T=%hu",
ip->protocol, NIPQUAD(ip->saddr),
src_port, NIPQUAD(ip->daddr),
dst_port,
ntohs(ip->tot_len), ip->tos, ntohs(ip->id),
ntohs(ip->frag_off), ip->ttl);
for (opti = 0; opti < (ip->ihl - sizeof(struct iphdr) / 4); opti++)
printk(" O=0x%8.8X", *opt++);
printk("\n");
}
}
}
void nf_debug_ip_local_deliver(struct sk_buff *skb)
{
/* If it's a loopback packet, it must have come through
* NF_IP_LOCAL_OUT, NF_IP_RAW_INPUT, NF_IP_PRE_ROUTING and
* NF_IP_LOCAL_IN. Otherwise, must have gone through
* NF_IP_RAW_INPUT and NF_IP_PRE_ROUTING. */
if (!skb->dev) {
printk("ip_local_deliver: skb->dev is NULL.\n");
} else {
if (skb->nf_debug != ((1<<NF_IP_PRE_ROUTING)
| (1<<NF_IP_LOCAL_IN))) {
printk("ip_local_deliver: bad skb: ");
debug_print_hooks_ip(skb->nf_debug);
nf_dump_skb(PF_INET, skb);
}
}
}
void nf_debug_ip_loopback_xmit(struct sk_buff *newskb)
{
if (newskb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
| (1 << NF_IP_POST_ROUTING))) {
printk("ip_dev_loopback_xmit: bad owned skb = %p: ",
newskb);
debug_print_hooks_ip(newskb->nf_debug);
nf_dump_skb(PF_INET, newskb);
}
}
void nf_debug_ip_finish_output2(struct sk_buff *skb)
{
/* If it's owned, it must have gone through the
* NF_IP_LOCAL_OUT and NF_IP_POST_ROUTING.
* Otherwise, must have gone through
* NF_IP_PRE_ROUTING, NF_IP_FORWARD and NF_IP_POST_ROUTING.
*/
if (skb->sk) {
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
| (1 << NF_IP_POST_ROUTING))) {
printk("ip_finish_output: bad owned skb = %p: ", skb);
debug_print_hooks_ip(skb->nf_debug);
nf_dump_skb(PF_INET, skb);
}
} else {
if (skb->nf_debug != ((1 << NF_IP_PRE_ROUTING)
| (1 << NF_IP_FORWARD)
| (1 << NF_IP_POST_ROUTING))) {
/* Fragments, entunnelled packets, TCP RSTs
generated by ipt_REJECT will have no
owners, but still may be local */
if (skb->nf_debug != ((1 << NF_IP_LOCAL_OUT)
| (1 << NF_IP_POST_ROUTING))){
printk("ip_finish_output:"
" bad unowned skb = %p: ",skb);
debug_print_hooks_ip(skb->nf_debug);
nf_dump_skb(PF_INET, skb);
}
}
}
}
#endif /*CONFIG_NETFILTER_DEBUG*/
/* Call get/setsockopt() */
static int nf_sockopt(struct sock *sk, int pf, int val,
char __user *opt, int *len, int get)
......@@ -488,14 +358,6 @@ int nf_hook_slow(int pf, unsigned int hook, struct sk_buff **pskb,
/* We may already have this, but read-locks nest anyway */
rcu_read_lock();
#ifdef CONFIG_NETFILTER_DEBUG
if (unlikely((*pskb)->nf_debug & (1 << hook))) {
printk("nf_hook: hook %i already set.\n", hook);
nf_dump_skb(pf, *pskb);
}
(*pskb)->nf_debug |= (1 << hook);
#endif
elem = &nf_hooks[pf][hook];
next_hook:
verdict = nf_iterate(&nf_hooks[pf][hook], pskb, hook, indev,
......
......@@ -365,9 +365,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, int gfp_mask)
C(nfct);
nf_conntrack_get(skb->nfct);
C(nfctinfo);
#ifdef CONFIG_NETFILTER_DEBUG
C(nf_debug);
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
C(nf_bridge);
nf_bridge_get(skb->nf_bridge);
......@@ -432,9 +429,6 @@ static void copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
new->nfct = old->nfct;
nf_conntrack_get(old->nfct);
new->nfctinfo = old->nfctinfo;
#ifdef CONFIG_NETFILTER_DEBUG
new->nf_debug = old->nf_debug;
#endif
#ifdef CONFIG_BRIDGE_NETFILTER
new->nf_bridge = old->nf_bridge;
nf_bridge_get(old->nf_bridge);
......
#
# IP configuration
#
choice
prompt "Choose IP: FIB lookup""
depends on INET
default IP_FIB_HASH
config IP_FIB_HASH
bool "FIB_HASH"
---help---
Current FIB is very proven and good enough for most users.
config IP_FIB_TRIE
bool "FIB_TRIE"
---help---
Use new experimental LC-trie as FIB lookup algoritm.
This improves lookup performance
LC-trie is described in:
IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
An experimental study of compression methods for dynamic tries
Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
endchoice
config IP_MULTICAST
bool "IP: multicasting"
depends on INET
......
......@@ -7,8 +7,10 @@ obj-y := utils.o route.o inetpeer.o protocol.o \
ip_output.o ip_sockglue.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o tcp_minisocks.o \
datagram.o raw.o udp.o arp.o icmp.o devinet.o af_inet.o igmp.o \
sysctl_net_ipv4.o fib_frontend.o fib_semantics.o fib_hash.o
sysctl_net_ipv4.o fib_frontend.o fib_semantics.o
obj-$(CONFIG_IP_FIB_HASH) += fib_hash.o
obj-$(CONFIG_IP_FIB_TRIE) += fib_trie.o
obj-$(CONFIG_PROC_FS) += proc.o
obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o
obj-$(CONFIG_IP_MROUTE) += ipmr.o
......
......@@ -1119,6 +1119,10 @@ module_init(inet_init);
#ifdef CONFIG_PROC_FS
extern int fib_proc_init(void);
extern void fib_proc_exit(void);
#ifdef CONFIG_IP_FIB_TRIE
extern int fib_stat_proc_init(void);
extern void fib_stat_proc_exit(void);
#endif
extern int ip_misc_proc_init(void);
extern int raw_proc_init(void);
extern void raw_proc_exit(void);
......@@ -1139,11 +1143,19 @@ static int __init ipv4_proc_init(void)
goto out_udp;
if (fib_proc_init())
goto out_fib;
#ifdef CONFIG_IP_FIB_TRIE
if (fib_stat_proc_init())
goto out_fib_stat;
#endif
if (ip_misc_proc_init())
goto out_misc;
out:
return rc;
out_misc:
#ifdef CONFIG_IP_FIB_TRIE
fib_stat_proc_exit();
out_fib_stat:
#endif
fib_proc_exit();
out_fib:
udp4_proc_exit();
......
/*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Robert Olsson <robert.olsson@its.uu.se> Uppsala Universitet
* & Swedish University of Agricultural Sciences.
*
* Jens Laas <jens.laas@data.slu.se> Swedish University of
* Agricultural Sciences.
*
* Hans Liss <hans.liss@its.uu.se> Uppsala Universitet
*
* This work is based on the LPC-trie which is originally descibed in:
*
* An experimental study of compression methods for dynamic tries
* Stefan Nilsson and Matti Tikkanen. Algorithmica, 33(1):19-33, 2002.
* http://www.nada.kth.se/~snilsson/public/papers/dyntrie2/
*
*
* IP-address lookup using LC-tries. Stefan Nilsson and Gunnar Karlsson
* IEEE Journal on Selected Areas in Communications, 17(6):1083-1092, June 1999
*
* Version: $Id: fib_trie.c,v 1.3 2005/06/08 14:20:01 robert Exp $
*
*
* Code from fib_hash has been reused which includes the following header:
*
*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* IPv4 FIB: lookup engine and maintenance routines.
*
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#define VERSION "0.323"
#include <linux/config.h>
#include <asm/uaccess.h>
#include <asm/system.h>
#include <asm/bitops.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/mm.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/errno.h>
#include <linux/in.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/if_arp.h>
#include <linux/proc_fs.h>
#include <linux/skbuff.h>
#include <linux/netlink.h>
#include <linux/init.h>
#include <linux/list.h>
#include <net/ip.h>
#include <net/protocol.h>
#include <net/route.h>
#include <net/tcp.h>
#include <net/sock.h>
#include <net/ip_fib.h>
#include "fib_lookup.h"
#undef CONFIG_IP_FIB_TRIE_STATS
#define MAX_CHILDS 16384
#define EXTRACT(p, n, str) ((str)<<(p)>>(32-(n)))
#define KEYLENGTH (8*sizeof(t_key))
#define MASK_PFX(k, l) (((l)==0)?0:(k >> (KEYLENGTH-l)) << (KEYLENGTH-l))
#define TKEY_GET_MASK(offset, bits) (((bits)==0)?0:((t_key)(-1) << (KEYLENGTH - bits) >> offset))
static DEFINE_RWLOCK(fib_lock);
typedef unsigned int t_key;
#define T_TNODE 0
#define T_LEAF 1
#define NODE_TYPE_MASK 0x1UL
#define NODE_PARENT(_node) \
((struct tnode *)((_node)->_parent & ~NODE_TYPE_MASK))
#define NODE_SET_PARENT(_node, _ptr) \
((_node)->_parent = (((unsigned long)(_ptr)) | \
((_node)->_parent & NODE_TYPE_MASK)))
#define NODE_INIT_PARENT(_node, _type) \
((_node)->_parent = (_type))
#define NODE_TYPE(_node) \
((_node)->_parent & NODE_TYPE_MASK)
#define IS_TNODE(n) (!(n->_parent & T_LEAF))
#define IS_LEAF(n) (n->_parent & T_LEAF)
struct node {
t_key key;
unsigned long _parent;
};
struct leaf {
t_key key;
unsigned long _parent;
struct hlist_head list;
};
struct leaf_info {
struct hlist_node hlist;
int plen;
struct list_head falh;
};
struct tnode {
t_key key;
unsigned long _parent;
unsigned short pos:5; /* 2log(KEYLENGTH) bits needed */
unsigned short bits:5; /* 2log(KEYLENGTH) bits needed */
unsigned short full_children; /* KEYLENGTH bits needed */
unsigned short empty_children; /* KEYLENGTH bits needed */
struct node *child[0];
};
#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie_use_stats {
unsigned int gets;
unsigned int backtrack;
unsigned int semantic_match_passed;
unsigned int semantic_match_miss;
unsigned int null_node_hit;
};
#endif
struct trie_stat {
unsigned int totdepth;
unsigned int maxdepth;
unsigned int tnodes;
unsigned int leaves;
unsigned int nullpointers;
unsigned int nodesizes[MAX_CHILDS];
};
struct trie {
struct node *trie;
#ifdef CONFIG_IP_FIB_TRIE_STATS
struct trie_use_stats stats;
#endif
int size;
unsigned int revision;
};
static int trie_debug = 0;
static int tnode_full(struct tnode *tn, struct node *n);
static void put_child(struct trie *t, struct tnode *tn, int i, struct node *n);
static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull);
static int tnode_child_length(struct tnode *tn);
static struct node *resize(struct trie *t, struct tnode *tn);
static struct tnode *inflate(struct trie *t, struct tnode *tn);
static struct tnode *halve(struct trie *t, struct tnode *tn);
static void tnode_free(struct tnode *tn);
static void trie_dump_seq(struct seq_file *seq, struct trie *t);
extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio);
extern int fib_detect_death(struct fib_info *fi, int order,
struct fib_info **last_resort, int *last_idx, int *dflt);
extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, int z, int tb_id,
struct nlmsghdr *n, struct netlink_skb_parms *req);
static kmem_cache_t *fn_alias_kmem;
static struct trie *trie_local = NULL, *trie_main = NULL;
static void trie_bug(char *err)
{
printk("Trie Bug: %s\n", err);
BUG();
}
static inline struct node *tnode_get_child(struct tnode *tn, int i)
{
if (i >= 1<<tn->bits)
trie_bug("tnode_get_child");
return tn->child[i];
}
static inline int tnode_child_length(struct tnode *tn)
{
return 1<<tn->bits;
}
/*
_________________________________________________________________
| i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
----------------------------------------------------------------
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
_________________________________________________________________
| C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
-----------------------------------------------------------------
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
tp->pos = 7
tp->bits = 3
n->pos = 15
n->bits=4
KEYLENGTH=32
*/
static inline t_key tkey_extract_bits(t_key a, int offset, int bits)
{
if (offset < KEYLENGTH)
return ((t_key)(a << offset)) >> (KEYLENGTH - bits);
else
return 0;
}
static inline int tkey_equals(t_key a, t_key b)
{
return a == b;
}
static inline int tkey_sub_equals(t_key a, int offset, int bits, t_key b)
{
if (bits == 0 || offset >= KEYLENGTH)
return 1;
bits = bits > KEYLENGTH ? KEYLENGTH : bits;
return ((a ^ b) << offset) >> (KEYLENGTH - bits) == 0;
}
static inline int tkey_mismatch(t_key a, int offset, t_key b)
{
t_key diff = a ^ b;
int i = offset;
if(!diff)
return 0;
while((diff << i) >> (KEYLENGTH-1) == 0)
i++;
return i;
}
/* Candiate for fib_semantics */
static void fn_free_alias(struct fib_alias *fa)
{
fib_release_info(fa->fa_info);
kmem_cache_free(fn_alias_kmem, fa);
}
/*
To understand this stuff, an understanding of keys and all their bits is
necessary. Every node in the trie has a key associated with it, but not
all of the bits in that key are significant.
Consider a node 'n' and its parent 'tp'.
If n is a leaf, every bit in its key is significant. Its presence is
necessitaded by path compression, since during a tree traversal (when
searching for a leaf - unless we are doing an insertion) we will completely
ignore all skipped bits we encounter. Thus we need to verify, at the end of
a potentially successful search, that we have indeed been walking the
correct key path.
Note that we can never "miss" the correct key in the tree if present by
following the wrong path. Path compression ensures that segments of the key
that are the same for all keys with a given prefix are skipped, but the
skipped part *is* identical for each node in the subtrie below the skipped
bit! trie_insert() in this implementation takes care of that - note the
call to tkey_sub_equals() in trie_insert().
if n is an internal node - a 'tnode' here, the various parts of its key
have many different meanings.
Example:
_________________________________________________________________
| i | i | i | i | i | i | i | N | N | N | S | S | S | S | S | C |
-----------------------------------------------------------------
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
_________________________________________________________________
| C | C | C | u | u | u | u | u | u | u | u | u | u | u | u | u |
-----------------------------------------------------------------
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
tp->pos = 7
tp->bits = 3
n->pos = 15
n->bits=4
First, let's just ignore the bits that come before the parent tp, that is
the bits from 0 to (tp->pos-1). They are *known* but at this point we do
not use them for anything.
The bits from (tp->pos) to (tp->pos + tp->bits - 1) - "N", above - are the
index into the parent's child array. That is, they will be used to find
'n' among tp's children.
The bits from (tp->pos + tp->bits) to (n->pos - 1) - "S" - are skipped bits
for the node n.
All the bits we have seen so far are significant to the node n. The rest
of the bits are really not needed or indeed known in n->key.
The bits from (n->pos) to (n->pos + n->bits - 1) - "C" - are the index into
n's child array, and will of course be different for each child.
The rest of the bits, from (n->pos + n->bits) onward, are completely unknown
at this point.
*/
static void check_tnode(struct tnode *tn)
{
if(tn && tn->pos+tn->bits > 32) {
printk("TNODE ERROR tn=%p, pos=%d, bits=%d\n", tn, tn->pos, tn->bits);
}
}
static int halve_threshold = 25;
static int inflate_threshold = 50;
static struct leaf *leaf_new(void)
{
struct leaf *l = kmalloc(sizeof(struct leaf), GFP_KERNEL);
if(l) {
NODE_INIT_PARENT(l, T_LEAF);
INIT_HLIST_HEAD(&l->list);
}
return l;
}
static struct leaf_info *leaf_info_new(int plen)
{
struct leaf_info *li = kmalloc(sizeof(struct leaf_info), GFP_KERNEL);
li->plen = plen;
INIT_LIST_HEAD(&li->falh);
return li;
}
static inline void free_leaf(struct leaf *l)
{
kfree(l);
}
static inline void free_leaf_info(struct leaf_info *li)
{
kfree(li);
}
static struct tnode* tnode_new(t_key key, int pos, int bits)
{
int nchildren = 1<<bits;
int sz = sizeof(struct tnode) + nchildren * sizeof(struct node *);
struct tnode *tn = kmalloc(sz, GFP_KERNEL);
if(tn) {
memset(tn, 0, sz);
NODE_INIT_PARENT(tn, T_TNODE);
tn->pos = pos;
tn->bits = bits;
tn->key = key;
tn->full_children = 0;
tn->empty_children = 1<<bits;
}
if(trie_debug > 0)
printk("AT %p s=%u %u\n", tn, (unsigned int) sizeof(struct tnode),
(unsigned int) (sizeof(struct node) * 1<<bits));
return tn;
}
static void tnode_free(struct tnode *tn)
{
if(!tn) {
trie_bug("tnode_free\n");
}
if(IS_LEAF(tn)) {
free_leaf((struct leaf *)tn);
if(trie_debug > 0 )
printk("FL %p \n", tn);
}
else if(IS_TNODE(tn)) {
kfree(tn);
if(trie_debug > 0 )
printk("FT %p \n", tn);
}
else {
trie_bug("tnode_free\n");
}
}
/*
* Check whether a tnode 'n' is "full", i.e. it is an internal node
* and no bits are skipped. See discussion in dyntree paper p. 6
*/
static inline int tnode_full(struct tnode *tn, struct node *n)
{
if(n == NULL || IS_LEAF(n))
return 0;
return ((struct tnode *) n)->pos == tn->pos + tn->bits;
}
static inline void put_child(struct trie *t, struct tnode *tn, int i, struct node *n)
{
tnode_put_child_reorg(tn, i, n, -1);
}
/*
* Add a child at position i overwriting the old value.
* Update the value of full_children and empty_children.
*/
static void tnode_put_child_reorg(struct tnode *tn, int i, struct node *n, int wasfull)
{
struct node *chi;
int isfull;
if(i >= 1<<tn->bits) {
printk("bits=%d, i=%d\n", tn->bits, i);
trie_bug("tnode_put_child_reorg bits");
}
write_lock_bh(&fib_lock);
chi = tn->child[i];
/* update emptyChildren */
if (n == NULL && chi != NULL)
tn->empty_children++;
else if (n != NULL && chi == NULL)
tn->empty_children--;
/* update fullChildren */
if (wasfull == -1)
wasfull = tnode_full(tn, chi);
isfull = tnode_full(tn, n);
if (wasfull && !isfull)
tn->full_children--;
else if (!wasfull && isfull)
tn->full_children++;
if(n)
NODE_SET_PARENT(n, tn);
tn->child[i] = n;
write_unlock_bh(&fib_lock);
}
static struct node *resize(struct trie *t, struct tnode *tn)
{
int i;
if (!tn)
return NULL;
if(trie_debug)
printk("In tnode_resize %p inflate_threshold=%d threshold=%d\n",
tn, inflate_threshold, halve_threshold);
/* No children */
if (tn->empty_children == tnode_child_length(tn)) {
tnode_free(tn);
return NULL;
}
/* One child */
if (tn->empty_children == tnode_child_length(tn) - 1)
for (i = 0; i < tnode_child_length(tn); i++) {
write_lock_bh(&fib_lock);
if (tn->child[i] != NULL) {
/* compress one level */
struct node *n = tn->child[i];
if(n)
NODE_INIT_PARENT(n, NODE_TYPE(n));
write_unlock_bh(&fib_lock);
tnode_free(tn);
return n;
}
write_unlock_bh(&fib_lock);
}
/*
* Double as long as the resulting node has a number of
* nonempty nodes that are above the threshold.
*/
/*
* From "Implementing a dynamic compressed trie" by Stefan Nilsson of
* the Helsinki University of Technology and Matti Tikkanen of Nokia
* Telecommunications, page 6:
* "A node is doubled if the ratio of non-empty children to all
* children in the *doubled* node is at least 'high'."
*
* 'high' in this instance is the variable 'inflate_threshold'. It
* is expressed as a percentage, so we multiply it with
* tnode_child_length() and instead of multiplying by 2 (since the
* child array will be doubled by inflate()) and multiplying
* the left-hand side by 100 (to handle the percentage thing) we
* multiply the left-hand side by 50.
*
* The left-hand side may look a bit weird: tnode_child_length(tn)
* - tn->empty_children is of course the number of non-null children
* in the current node. tn->full_children is the number of "full"
* children, that is non-null tnodes with a skip value of 0.
* All of those will be doubled in the resulting inflated tnode, so
* we just count them one extra time here.
*
* A clearer way to write this would be:
*
* to_be_doubled = tn->full_children;
* not_to_be_doubled = tnode_child_length(tn) - tn->empty_children -
* tn->full_children;
*
* new_child_length = tnode_child_length(tn) * 2;
*
* new_fill_factor = 100 * (not_to_be_doubled + 2*to_be_doubled) /
* new_child_length;
* if (new_fill_factor >= inflate_threshold)
*
* ...and so on, tho it would mess up the while() loop.
*
* anyway,
* 100 * (not_to_be_doubled + 2*to_be_doubled) / new_child_length >=
* inflate_threshold
*
* avoid a division:
* 100 * (not_to_be_doubled + 2*to_be_doubled) >=
* inflate_threshold * new_child_length
*
* expand not_to_be_doubled and to_be_doubled, and shorten:
* 100 * (tnode_child_length(tn) - tn->empty_children +
* tn->full_children ) >= inflate_threshold * new_child_length
*
* expand new_child_length:
* 100 * (tnode_child_length(tn) - tn->empty_children +
* tn->full_children ) >=
* inflate_threshold * tnode_child_length(tn) * 2
*
* shorten again:
* 50 * (tn->full_children + tnode_child_length(tn) -
* tn->empty_children ) >= inflate_threshold *
* tnode_child_length(tn)
*
*/
check_tnode(tn);
while ((tn->full_children > 0 &&
50 * (tn->full_children + tnode_child_length(tn) - tn->empty_children) >=
inflate_threshold * tnode_child_length(tn))) {
tn = inflate(t, tn);
}
check_tnode(tn);
/*
* Halve as long as the number of empty children in this
* node is above threshold.
*/
while (tn->bits > 1 &&
100 * (tnode_child_length(tn) - tn->empty_children) <
halve_threshold * tnode_child_length(tn))
tn = halve(t, tn);
/* Only one child remains */
if (tn->empty_children == tnode_child_length(tn) - 1)
for (i = 0; i < tnode_child_length(tn); i++) {
write_lock_bh(&fib_lock);
if (tn->child[i] != NULL) {
/* compress one level */
struct node *n = tn->child[i];
if(n)
NODE_INIT_PARENT(n, NODE_TYPE(n));
write_unlock_bh(&fib_lock);
tnode_free(tn);
return n;
}
write_unlock_bh(&fib_lock);
}
return (struct node *) tn;
}
static struct tnode *inflate(struct trie *t, struct tnode *tn)
{
struct tnode *inode;
struct tnode *oldtnode = tn;
int olen = tnode_child_length(tn);
int i;
if(trie_debug)
printk("In inflate\n");
tn = tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits + 1);
if (!tn)
trie_bug("tnode_new failed");
for(i = 0; i < olen; i++) {
struct node *node = tnode_get_child(oldtnode, i);
/* An empty child */
if (node == NULL)
continue;
/* A leaf or an internal node with skipped bits */
if(IS_LEAF(node) || ((struct tnode *) node)->pos >
tn->pos + tn->bits - 1) {
if(tkey_extract_bits(node->key, tn->pos + tn->bits - 1,
1) == 0)
put_child(t, tn, 2*i, node);
else
put_child(t, tn, 2*i+1, node);
continue;
}
/* An internal node with two children */
inode = (struct tnode *) node;
if (inode->bits == 1) {
put_child(t, tn, 2*i, inode->child[0]);
put_child(t, tn, 2*i+1, inode->child[1]);
tnode_free(inode);
}
/* An internal node with more than two children */
else {
struct tnode *left, *right;
int size, j;
/* We will replace this node 'inode' with two new
* ones, 'left' and 'right', each with half of the
* original children. The two new nodes will have
* a position one bit further down the key and this
* means that the "significant" part of their keys
* (see the discussion near the top of this file)
* will differ by one bit, which will be "0" in
* left's key and "1" in right's key. Since we are
* moving the key position by one step, the bit that
* we are moving away from - the bit at position
* (inode->pos) - is the one that will differ between
* left and right. So... we synthesize that bit in the
* two new keys.
* The mask 'm' below will be a single "one" bit at
* the position (inode->pos)
*/
t_key m = TKEY_GET_MASK(inode->pos, 1);
/* Use the old key, but set the new significant
* bit to zero.
*/
left = tnode_new(inode->key&(~m), inode->pos + 1,
inode->bits - 1);
if(!left)
trie_bug("tnode_new failed");
/* Use the old key, but set the new significant
* bit to one.
*/
right = tnode_new(inode->key|m, inode->pos + 1,
inode->bits - 1);
if(!right)
trie_bug("tnode_new failed");
size = tnode_child_length(left);
for(j = 0; j < size; j++) {
put_child(t, left, j, inode->child[j]);
put_child(t, right, j, inode->child[j + size]);
}
put_child(t, tn, 2*i, resize(t, left));
put_child(t, tn, 2*i+1, resize(t, right));
tnode_free(inode);
}
}
tnode_free(oldtnode);
return tn;
}
static struct tnode *halve(struct trie *t, struct tnode *tn)
{
struct tnode *oldtnode = tn;
struct node *left, *right;
int i;
int olen = tnode_child_length(tn);
if(trie_debug) printk("In halve\n");
tn=tnode_new(oldtnode->key, oldtnode->pos, oldtnode->bits - 1);
if(!tn)
trie_bug("tnode_new failed");
for(i = 0; i < olen; i += 2) {
left = tnode_get_child(oldtnode, i);
right = tnode_get_child(oldtnode, i+1);
/* At least one of the children is empty */
if (left == NULL) {
if (right == NULL) /* Both are empty */
continue;
put_child(t, tn, i/2, right);
} else if (right == NULL)
put_child(t, tn, i/2, left);
/* Two nonempty children */
else {
struct tnode *newBinNode =
tnode_new(left->key, tn->pos + tn->bits, 1);
if(!newBinNode)
trie_bug("tnode_new failed");
put_child(t, newBinNode, 0, left);
put_child(t, newBinNode, 1, right);
put_child(t, tn, i/2, resize(t, newBinNode));
}
}
tnode_free(oldtnode);
return tn;
}
static void *trie_init(struct trie *t)
{
if(t) {
t->size = 0;
t->trie = NULL;
t->revision = 0;
#ifdef CONFIG_IP_FIB_TRIE_STATS
memset(&t->stats, 0, sizeof(struct trie_use_stats));
#endif
}
return t;
}
static struct leaf_info *find_leaf_info(struct hlist_head *head, int plen)
{
struct hlist_node *node;
struct leaf_info *li;
hlist_for_each_entry(li, node, head, hlist) {
if ( li->plen == plen )
return li;
}
return NULL;
}
static inline struct list_head * get_fa_head(struct leaf *l, int plen)
{
struct list_head *fa_head=NULL;
struct leaf_info *li = find_leaf_info(&l->list, plen);
if(li)
fa_head = &li->falh;
return fa_head;
}
static void insert_leaf_info(struct hlist_head *head, struct leaf_info *new)
{
struct leaf_info *li=NULL, *last=NULL;
struct hlist_node *node, *tmp;
write_lock_bh(&fib_lock);
if(hlist_empty(head))
hlist_add_head(&new->hlist, head);
else {
hlist_for_each_entry_safe(li, node, tmp, head, hlist) {
if (new->plen > li->plen)
break;
last = li;
}
if(last)
hlist_add_after(&last->hlist, &new->hlist);
else
hlist_add_before(&new->hlist, &li->hlist);
}
write_unlock_bh(&fib_lock);
}
static struct leaf *
fib_find_node(struct trie *t, u32 key)
{
int pos;
struct tnode *tn;
struct node *n;
pos = 0;
n=t->trie;
while (n != NULL && NODE_TYPE(n) == T_TNODE) {
tn = (struct tnode *) n;
check_tnode(tn);
if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
pos=tn->pos + tn->bits;
n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
}
else
break;
}
/* Case we have found a leaf. Compare prefixes */
if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
struct leaf *l = (struct leaf *) n;
return l;
}
return NULL;
}
static struct node *trie_rebalance(struct trie *t, struct tnode *tn)
{
int i = 0;
int wasfull;
t_key cindex, key;
struct tnode *tp = NULL;
if(!tn)
BUG();
key = tn->key;
i = 0;
while (tn != NULL && NODE_PARENT(tn) != NULL) {
if( i > 10 ) {
printk("Rebalance tn=%p \n", tn);
if(tn) printk("tn->parent=%p \n", NODE_PARENT(tn));
printk("Rebalance tp=%p \n", tp);
if(tp) printk("tp->parent=%p \n", NODE_PARENT(tp));
}
if( i > 12 ) BUG();
i++;
tp = NODE_PARENT(tn);
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
wasfull = tnode_full(tp, tnode_get_child(tp, cindex));
tn = (struct tnode *) resize (t, (struct tnode *)tn);
tnode_put_child_reorg((struct tnode *)tp, cindex,(struct node*)tn, wasfull);
if(!NODE_PARENT(tn))
break;
tn = NODE_PARENT(tn);
}
/* Handle last (top) tnode */
if (IS_TNODE(tn))
tn = (struct tnode*) resize(t, (struct tnode *)tn);
return (struct node*) tn;
}
static struct list_head *
fib_insert_node(struct trie *t, u32 key, int plen)
{
int pos, newpos;
struct tnode *tp = NULL, *tn = NULL;
struct node *n;
struct leaf *l;
int missbit;
struct list_head *fa_head=NULL;
struct leaf_info *li;
t_key cindex;
pos = 0;
n=t->trie;
/* If we point to NULL, stop. Either the tree is empty and we should
* just put a new leaf in if, or we have reached an empty child slot,
* and we should just put our new leaf in that.
* If we point to a T_TNODE, check if it matches our key. Note that
* a T_TNODE might be skipping any number of bits - its 'pos' need
* not be the parent's 'pos'+'bits'!
*
* If it does match the current key, get pos/bits from it, extract
* the index from our key, push the T_TNODE and walk the tree.
*
* If it doesn't, we have to replace it with a new T_TNODE.
*
* If we point to a T_LEAF, it might or might not have the same key
* as we do. If it does, just change the value, update the T_LEAF's
* value, and return it.
* If it doesn't, we need to replace it with a T_TNODE.
*/
while (n != NULL && NODE_TYPE(n) == T_TNODE) {
tn = (struct tnode *) n;
check_tnode(tn);
if(tkey_sub_equals(tn->key, pos, tn->pos-pos, key)) {
tp = tn;
pos=tn->pos + tn->bits;
n = tnode_get_child(tn, tkey_extract_bits(key, tn->pos, tn->bits));
if(n && NODE_PARENT(n) != tn) {
printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
BUG();
}
}
else
break;
}
/*
* n ----> NULL, LEAF or TNODE
*
* tp is n's (parent) ----> NULL or TNODE
*/
if(tp && IS_LEAF(tp))
BUG();
t->revision++;
/* Case 1: n is a leaf. Compare prefixes */
if (n != NULL && IS_LEAF(n) && tkey_equals(key, n->key)) {
struct leaf *l = ( struct leaf *) n;
li = leaf_info_new(plen);
if(! li)
BUG();
fa_head = &li->falh;
insert_leaf_info(&l->list, li);
goto done;
}
t->size++;
l = leaf_new();
if(! l)
BUG();
l->key = key;
li = leaf_info_new(plen);
if(! li)
BUG();
fa_head = &li->falh;
insert_leaf_info(&l->list, li);
/* Case 2: n is NULL, and will just insert a new leaf */
if (t->trie && n == NULL) {
NODE_SET_PARENT(l, tp);
if (!tp)
BUG();
else {
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
put_child(t, (struct tnode *)tp, cindex, (struct node *)l);
}
}
/* Case 3: n is a LEAF or a TNODE and the key doesn't match. */
else {
/*
* Add a new tnode here
* first tnode need some special handling
*/
if (tp)
pos=tp->pos+tp->bits;
else
pos=0;
if(n) {
newpos = tkey_mismatch(key, pos, n->key);
tn = tnode_new(n->key, newpos, 1);
}
else {
newpos = 0;
tn = tnode_new(key, newpos, 1); /* First tnode */
}
if(!tn)
trie_bug("tnode_pfx_new failed");
NODE_SET_PARENT(tn, tp);
missbit=tkey_extract_bits(key, newpos, 1);
put_child(t, tn, missbit, (struct node *)l);
put_child(t, tn, 1-missbit, n);
if(tp) {
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
put_child(t, (struct tnode *)tp, cindex, (struct node *)tn);
}
else {
t->trie = (struct node*) tn; /* First tnode */
tp = tn;
}
}
if(tp && tp->pos+tp->bits > 32) {
printk("ERROR tp=%p pos=%d, bits=%d, key=%0x plen=%d\n",
tp, tp->pos, tp->bits, key, plen);
}
/* Rebalance the trie */
t->trie = trie_rebalance(t, tp);
done:;
return fa_head;
}
static int
fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
{
struct trie *t = (struct trie *) tb->tb_data;
struct fib_alias *fa, *new_fa;
struct list_head *fa_head=NULL;
struct fib_info *fi;
int plen = r->rtm_dst_len;
int type = r->rtm_type;
u8 tos = r->rtm_tos;
u32 key, mask;
int err;
struct leaf *l;
if (plen > 32)
return -EINVAL;
key = 0;
if (rta->rta_dst)
memcpy(&key, rta->rta_dst, 4);
key = ntohl(key);
if(trie_debug)
printk("Insert table=%d %08x/%d\n", tb->tb_id, key, plen);
mask = ntohl( inet_make_mask(plen) );
if(key & ~mask)
return -EINVAL;
key = key & mask;
if ((fi = fib_create_info(r, rta, nlhdr, &err)) == NULL)
goto err;
l = fib_find_node(t, key);
fa = NULL;
if(l) {
fa_head = get_fa_head(l, plen);
fa = fib_find_alias(fa_head, tos, fi->fib_priority);
}
/* Now fa, if non-NULL, points to the first fib alias
* with the same keys [prefix,tos,priority], if such key already
* exists or to the node before which we will insert new one.
*
* If fa is NULL, we will need to allocate a new one and
* insert to the head of f.
*
* If f is NULL, no fib node matched the destination key
* and we need to allocate a new one of those as well.
*/
if (fa &&
fa->fa_info->fib_priority == fi->fib_priority) {
struct fib_alias *fa_orig;
err = -EEXIST;
if (nlhdr->nlmsg_flags & NLM_F_EXCL)
goto out;
if (nlhdr->nlmsg_flags & NLM_F_REPLACE) {
struct fib_info *fi_drop;
u8 state;
write_lock_bh(&fib_lock);
fi_drop = fa->fa_info;
fa->fa_info = fi;
fa->fa_type = type;
fa->fa_scope = r->rtm_scope;
state = fa->fa_state;
fa->fa_state &= ~FA_S_ACCESSED;
write_unlock_bh(&fib_lock);
fib_release_info(fi_drop);
if (state & FA_S_ACCESSED)
rt_cache_flush(-1);
goto succeeded;
}
/* Error if we find a perfect match which
* uses the same scope, type, and nexthop
* information.
*/
fa_orig = fa;
list_for_each_entry(fa, fa_orig->fa_list.prev, fa_list) {
if (fa->fa_tos != tos)
break;
if (fa->fa_info->fib_priority != fi->fib_priority)
break;
if (fa->fa_type == type &&
fa->fa_scope == r->rtm_scope &&
fa->fa_info == fi) {
goto out;
}
}
if (!(nlhdr->nlmsg_flags & NLM_F_APPEND))
fa = fa_orig;
}
err = -ENOENT;
if (!(nlhdr->nlmsg_flags&NLM_F_CREATE))
goto out;
err = -ENOBUFS;
new_fa = kmem_cache_alloc(fn_alias_kmem, SLAB_KERNEL);
if (new_fa == NULL)
goto out;
new_fa->fa_info = fi;
new_fa->fa_tos = tos;
new_fa->fa_type = type;
new_fa->fa_scope = r->rtm_scope;
new_fa->fa_state = 0;
#if 0
new_fa->dst = NULL;
#endif
/*
* Insert new entry to the list.
*/
if(!fa_head)
fa_head = fib_insert_node(t, key, plen);
write_lock_bh(&fib_lock);
list_add_tail(&new_fa->fa_list,
(fa ? &fa->fa_list : fa_head));
write_unlock_bh(&fib_lock);
rt_cache_flush(-1);
rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req);
succeeded:
return 0;
out:
fib_release_info(fi);
err:;
return err;
}
static inline int check_leaf(struct trie *t, struct leaf *l, t_key key, int *plen, const struct flowi *flp,
struct fib_result *res, int *err)
{
int i;
t_key mask;
struct leaf_info *li;
struct hlist_head *hhead = &l->list;
struct hlist_node *node;
hlist_for_each_entry(li, node, hhead, hlist) {
i = li->plen;
mask = ntohl(inet_make_mask(i));
if (l->key != (key & mask))
continue;
if (((*err) = fib_semantic_match(&li->falh, flp, res, l->key, mask, i)) == 0) {
*plen = i;
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats.semantic_match_passed++;
#endif
return 1;
}
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats.semantic_match_miss++;
#endif
}
return 0;
}
static int
fn_trie_lookup(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
{
struct trie *t = (struct trie *) tb->tb_data;
int plen, ret = 0;
struct node *n;
struct tnode *pn;
int pos, bits;
t_key key=ntohl(flp->fl4_dst);
int chopped_off;
t_key cindex = 0;
int current_prefix_length = KEYLENGTH;
n = t->trie;
read_lock(&fib_lock);
if(!n)
goto failed;
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats.gets++;
#endif
/* Just a leaf? */
if (IS_LEAF(n)) {
if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret) )
goto found;
goto failed;
}
pn = (struct tnode *) n;
chopped_off = 0;
while (pn) {
pos = pn->pos;
bits = pn->bits;
if(!chopped_off)
cindex = tkey_extract_bits(MASK_PFX(key, current_prefix_length), pos, bits);
n = tnode_get_child(pn, cindex);
if (n == NULL) {
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats.null_node_hit++;
#endif
goto backtrace;
}
if (IS_TNODE(n)) {
#define HL_OPTIMIZE
#ifdef HL_OPTIMIZE
struct tnode *cn = (struct tnode *)n;
t_key node_prefix, key_prefix, pref_mismatch;
int mp;
/*
* It's a tnode, and we can do some extra checks here if we
* like, to avoid descending into a dead-end branch.
* This tnode is in the parent's child array at index
* key[p_pos..p_pos+p_bits] but potentially with some bits
* chopped off, so in reality the index may be just a
* subprefix, padded with zero at the end.
* We can also take a look at any skipped bits in this
* tnode - everything up to p_pos is supposed to be ok,
* and the non-chopped bits of the index (se previous
* paragraph) are also guaranteed ok, but the rest is
* considered unknown.
*
* The skipped bits are key[pos+bits..cn->pos].
*/
/* If current_prefix_length < pos+bits, we are already doing
* actual prefix matching, which means everything from
* pos+(bits-chopped_off) onward must be zero along some
* branch of this subtree - otherwise there is *no* valid
* prefix present. Here we can only check the skipped
* bits. Remember, since we have already indexed into the
* parent's child array, we know that the bits we chopped of
* *are* zero.
*/
/* NOTA BENE: CHECKING ONLY SKIPPED BITS FOR THE NEW NODE HERE */
if (current_prefix_length < pos+bits) {
if (tkey_extract_bits(cn->key, current_prefix_length,
cn->pos - current_prefix_length) != 0 ||
!(cn->child[0]))
goto backtrace;
}
/*
* If chopped_off=0, the index is fully validated and we
* only need to look at the skipped bits for this, the new,
* tnode. What we actually want to do is to find out if
* these skipped bits match our key perfectly, or if we will
* have to count on finding a matching prefix further down,
* because if we do, we would like to have some way of
* verifying the existence of such a prefix at this point.
*/
/* The only thing we can do at this point is to verify that
* any such matching prefix can indeed be a prefix to our
* key, and if the bits in the node we are inspecting that
* do not match our key are not ZERO, this cannot be true.
* Thus, find out where there is a mismatch (before cn->pos)
* and verify that all the mismatching bits are zero in the
* new tnode's key.
*/
/* Note: We aren't very concerned about the piece of the key
* that precede pn->pos+pn->bits, since these have already been
* checked. The bits after cn->pos aren't checked since these are
* by definition "unknown" at this point. Thus, what we want to
* see is if we are about to enter the "prefix matching" state,
* and in that case verify that the skipped bits that will prevail
* throughout this subtree are zero, as they have to be if we are
* to find a matching prefix.
*/
node_prefix = MASK_PFX(cn->key, cn->pos);
key_prefix = MASK_PFX(key, cn->pos);
pref_mismatch = key_prefix^node_prefix;
mp = 0;
/* In short: If skipped bits in this node do not match the search
* key, enter the "prefix matching" state.directly.
*/
if (pref_mismatch) {
while (!(pref_mismatch & (1<<(KEYLENGTH-1)))) {
mp++;
pref_mismatch = pref_mismatch <<1;
}
key_prefix = tkey_extract_bits(cn->key, mp, cn->pos-mp);
if (key_prefix != 0)
goto backtrace;
if (current_prefix_length >= cn->pos)
current_prefix_length=mp;
}
#endif
pn = (struct tnode *)n; /* Descend */
chopped_off = 0;
continue;
}
if (IS_LEAF(n)) {
if( check_leaf(t, (struct leaf *)n, key, &plen, flp, res, &ret))
goto found;
}
backtrace:
chopped_off++;
/* As zero don't change the child key (cindex) */
while ((chopped_off <= pn->bits) && !(cindex & (1<<(chopped_off-1)))) {
chopped_off++;
}
/* Decrease current_... with bits chopped off */
if (current_prefix_length > pn->pos + pn->bits - chopped_off)
current_prefix_length = pn->pos + pn->bits - chopped_off;
/*
* Either we do the actual chop off according or if we have
* chopped off all bits in this tnode walk up to our parent.
*/
if(chopped_off <= pn->bits)
cindex &= ~(1 << (chopped_off-1));
else {
if( NODE_PARENT(pn) == NULL)
goto failed;
/* Get Child's index */
cindex = tkey_extract_bits(pn->key, NODE_PARENT(pn)->pos, NODE_PARENT(pn)->bits);
pn = NODE_PARENT(pn);
chopped_off = 0;
#ifdef CONFIG_IP_FIB_TRIE_STATS
t->stats.backtrack++;
#endif
goto backtrace;
}
}
failed:
ret = 1;
found:
read_unlock(&fib_lock);
return ret;
}
static int trie_leaf_remove(struct trie *t, t_key key)
{
t_key cindex;
struct tnode *tp = NULL;
struct node *n = t->trie;
struct leaf *l;
if(trie_debug)
printk("entering trie_leaf_remove(%p)\n", n);
/* Note that in the case skipped bits, those bits are *not* checked!
* When we finish this, we will have NULL or a T_LEAF, and the
* T_LEAF may or may not match our key.
*/
while (n != NULL && IS_TNODE(n)) {
struct tnode *tn = (struct tnode *) n;
check_tnode(tn);
n = tnode_get_child(tn ,tkey_extract_bits(key, tn->pos, tn->bits));
if(n && NODE_PARENT(n) != tn) {
printk("BUG tn=%p, n->parent=%p\n", tn, NODE_PARENT(n));
BUG();
}
}
l = (struct leaf *) n;
if(!n || !tkey_equals(l->key, key))
return 0;
/*
* Key found.
* Remove the leaf and rebalance the tree
*/
t->revision++;
t->size--;
tp = NODE_PARENT(n);
tnode_free((struct tnode *) n);
if(tp) {
cindex = tkey_extract_bits(key, tp->pos, tp->bits);
put_child(t, (struct tnode *)tp, cindex, NULL);
t->trie = trie_rebalance(t, tp);
}
else
t->trie = NULL;
return 1;
}
static int
fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta,
struct nlmsghdr *nlhdr, struct netlink_skb_parms *req)
{
struct trie *t = (struct trie *) tb->tb_data;
u32 key, mask;
int plen = r->rtm_dst_len;
u8 tos = r->rtm_tos;
struct fib_alias *fa, *fa_to_delete;
struct list_head *fa_head;
struct leaf *l;
if (plen > 32)
return -EINVAL;
key = 0;
if (rta->rta_dst)
memcpy(&key, rta->rta_dst, 4);
key = ntohl(key);
mask = ntohl( inet_make_mask(plen) );
if(key & ~mask)
return -EINVAL;
key = key & mask;
l = fib_find_node(t, key);
if(!l)
return -ESRCH;
fa_head = get_fa_head(l, plen);
fa = fib_find_alias(fa_head, tos, 0);
if (!fa)
return -ESRCH;
if (trie_debug)
printk("Deleting %08x/%d tos=%d t=%p\n", key, plen, tos, t);
fa_to_delete = NULL;
fa_head = fa->fa_list.prev;
list_for_each_entry(fa, fa_head, fa_list) {
struct fib_info *fi = fa->fa_info;
if (fa->fa_tos != tos)
break;
if ((!r->rtm_type ||
fa->fa_type == r->rtm_type) &&
(r->rtm_scope == RT_SCOPE_NOWHERE ||
fa->fa_scope == r->rtm_scope) &&
(!r->rtm_protocol ||
fi->fib_protocol == r->rtm_protocol) &&
fib_nh_match(r, nlhdr, rta, fi) == 0) {
fa_to_delete = fa;
break;
}
}
if (fa_to_delete) {
int kill_li = 0;
struct leaf_info *li;
fa = fa_to_delete;
rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req);
l = fib_find_node(t, key);
li = find_leaf_info(&l->list, plen);
write_lock_bh(&fib_lock);
list_del(&fa->fa_list);
if(list_empty(fa_head)) {
hlist_del(&li->hlist);
kill_li = 1;
}
write_unlock_bh(&fib_lock);
if(kill_li)
free_leaf_info(li);
if(hlist_empty(&l->list))
trie_leaf_remove(t, key);
if (fa->fa_state & FA_S_ACCESSED)
rt_cache_flush(-1);
fn_free_alias(fa);
return 0;
}
return -ESRCH;
}
static int trie_flush_list(struct trie *t, struct list_head *head)
{
struct fib_alias *fa, *fa_node;
int found = 0;
list_for_each_entry_safe(fa, fa_node, head, fa_list) {
struct fib_info *fi = fa->fa_info;
if (fi && (fi->fib_flags&RTNH_F_DEAD)) {
write_lock_bh(&fib_lock);
list_del(&fa->fa_list);
write_unlock_bh(&fib_lock);
fn_free_alias(fa);
found++;
}
}
return found;
}
static int trie_flush_leaf(struct trie *t, struct leaf *l)
{
int found = 0;
struct hlist_head *lih = &l->list;
struct hlist_node *node, *tmp;
struct leaf_info *li = NULL;
hlist_for_each_entry_safe(li, node, tmp, lih, hlist) {
found += trie_flush_list(t, &li->falh);
if (list_empty(&li->falh)) {
write_lock_bh(&fib_lock);
hlist_del(&li->hlist);
write_unlock_bh(&fib_lock);
free_leaf_info(li);
}
}
return found;
}
static struct leaf *nextleaf(struct trie *t, struct leaf *thisleaf)
{
struct node *c = (struct node *) thisleaf;
struct tnode *p;
int idx;
if(c == NULL) {
if(t->trie == NULL)
return NULL;
if (IS_LEAF(t->trie)) /* trie w. just a leaf */
return (struct leaf *) t->trie;
p = (struct tnode*) t->trie; /* Start */
}
else
p = (struct tnode *) NODE_PARENT(c);
while (p) {
int pos, last;
/* Find the next child of the parent */
if(c)
pos = 1 + tkey_extract_bits(c->key, p->pos, p->bits);
else
pos = 0;
last = 1 << p->bits;
for(idx = pos; idx < last ; idx++) {
if( p->child[idx]) {
/* Decend if tnode */
while (IS_TNODE(p->child[idx])) {
p = (struct tnode*) p->child[idx];
idx = 0;
/* Rightmost non-NULL branch */
if( p && IS_TNODE(p) )
while ( p->child[idx] == NULL && idx < (1 << p->bits) ) idx++;
/* Done with this tnode? */
if( idx >= (1 << p->bits) || p->child[idx] == NULL )
goto up;
}
return (struct leaf*) p->child[idx];
}
}
up:
/* No more children go up one step */
c = (struct node*) p;
p = (struct tnode *) NODE_PARENT(p);
}
return NULL; /* Ready. Root of trie */
}
static int fn_trie_flush(struct fib_table *tb)
{
struct trie *t = (struct trie *) tb->tb_data;
struct leaf *ll = NULL, *l = NULL;
int found = 0, h;
t->revision++;
for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
found += trie_flush_leaf(t, l);
if (ll && hlist_empty(&ll->list))
trie_leaf_remove(t, ll->key);
ll = l;
}
if (ll && hlist_empty(&ll->list))
trie_leaf_remove(t, ll->key);
if(trie_debug)
printk("trie_flush found=%d\n", found);
return found;
}
static int trie_last_dflt=-1;
static void
fn_trie_select_default(struct fib_table *tb, const struct flowi *flp, struct fib_result *res)
{
struct trie *t = (struct trie *) tb->tb_data;
int order, last_idx;
struct fib_info *fi = NULL;
struct fib_info *last_resort;
struct fib_alias *fa = NULL;
struct list_head *fa_head;
struct leaf *l;
last_idx = -1;
last_resort = NULL;
order = -1;
read_lock(&fib_lock);
l = fib_find_node(t, 0);
if(!l)
goto out;
fa_head = get_fa_head(l, 0);
if(!fa_head)
goto out;
if (list_empty(fa_head))
goto out;
list_for_each_entry(fa, fa_head, fa_list) {
struct fib_info *next_fi = fa->fa_info;
if (fa->fa_scope != res->scope ||
fa->fa_type != RTN_UNICAST)
continue;
if (next_fi->fib_priority > res->fi->fib_priority)
break;
if (!next_fi->fib_nh[0].nh_gw ||
next_fi->fib_nh[0].nh_scope != RT_SCOPE_LINK)
continue;
fa->fa_state |= FA_S_ACCESSED;
if (fi == NULL) {
if (next_fi != res->fi)
break;
} else if (!fib_detect_death(fi, order, &last_resort,
&last_idx, &trie_last_dflt)) {
if (res->fi)
fib_info_put(res->fi);
res->fi = fi;
atomic_inc(&fi->fib_clntref);
trie_last_dflt = order;
goto out;
}
fi = next_fi;
order++;
}
if (order <= 0 || fi == NULL) {
trie_last_dflt = -1;
goto out;
}
if (!fib_detect_death(fi, order, &last_resort, &last_idx, &trie_last_dflt)) {
if (res->fi)
fib_info_put(res->fi);
res->fi = fi;
atomic_inc(&fi->fib_clntref);
trie_last_dflt = order;
goto out;
}
if (last_idx >= 0) {
if (res->fi)
fib_info_put(res->fi);
res->fi = last_resort;
if (last_resort)
atomic_inc(&last_resort->fib_clntref);
}
trie_last_dflt = last_idx;
out:;
read_unlock(&fib_lock);
}
static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fib_table *tb,
struct sk_buff *skb, struct netlink_callback *cb)
{
int i, s_i;
struct fib_alias *fa;
u32 xkey=htonl(key);
s_i=cb->args[3];
i = 0;
list_for_each_entry(fa, fah, fa_list) {
if (i < s_i) {
i++;
continue;
}
if (fa->fa_info->fib_nh == NULL) {
printk("Trie error _fib_nh=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
i++;
continue;
}
if (fa->fa_info == NULL) {
printk("Trie error fa_info=NULL in fa[%d] k=%08x plen=%d\n", i, key, plen);
i++;
continue;
}
if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid,
cb->nlh->nlmsg_seq,
RTM_NEWROUTE,
tb->tb_id,
fa->fa_type,
fa->fa_scope,
&xkey,
plen,
fa->fa_tos,
fa->fa_info, 0) < 0) {
cb->args[3] = i;
return -1;
}
i++;
}
cb->args[3]=i;
return skb->len;
}
static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, struct sk_buff *skb,
struct netlink_callback *cb)
{
int h, s_h;
struct list_head *fa_head;
struct leaf *l = NULL;
s_h=cb->args[2];
for (h=0; (l = nextleaf(t, l)) != NULL; h++) {
if (h < s_h)
continue;
if (h > s_h)
memset(&cb->args[3], 0,
sizeof(cb->args) - 3*sizeof(cb->args[0]));
fa_head = get_fa_head(l, plen);
if(!fa_head)
continue;
if(list_empty(fa_head))
continue;
if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) {
cb->args[2]=h;
return -1;
}
}
cb->args[2]=h;
return skb->len;
}
static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlink_callback *cb)
{
int m, s_m;
struct trie *t = (struct trie *) tb->tb_data;
s_m = cb->args[1];
read_lock(&fib_lock);
for (m=0; m<=32; m++) {
if (m < s_m)
continue;
if (m > s_m)
memset(&cb->args[2], 0,
sizeof(cb->args) - 2*sizeof(cb->args[0]));
if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) {
cb->args[1] = m;
goto out;
}
}
read_unlock(&fib_lock);
cb->args[1] = m;
return skb->len;
out:
read_unlock(&fib_lock);
return -1;
}
/* Fix more generic FIB names for init later */
#ifdef CONFIG_IP_MULTIPLE_TABLES
struct fib_table * fib_hash_init(int id)
#else
struct fib_table * __init fib_hash_init(int id)
#endif
{
struct fib_table *tb;
struct trie *t;
if (fn_alias_kmem == NULL)
fn_alias_kmem = kmem_cache_create("ip_fib_alias",
sizeof(struct fib_alias),
0, SLAB_HWCACHE_ALIGN,
NULL, NULL);
tb = kmalloc(sizeof(struct fib_table) + sizeof(struct trie),
GFP_KERNEL);
if (tb == NULL)
return NULL;
tb->tb_id = id;
tb->tb_lookup = fn_trie_lookup;
tb->tb_insert = fn_trie_insert;
tb->tb_delete = fn_trie_delete;
tb->tb_flush = fn_trie_flush;
tb->tb_select_default = fn_trie_select_default;
tb->tb_dump = fn_trie_dump;
memset(tb->tb_data, 0, sizeof(struct trie));
t = (struct trie *) tb->tb_data;
trie_init(t);
if (id == RT_TABLE_LOCAL)
trie_local=t;
else if (id == RT_TABLE_MAIN)
trie_main=t;
if (id == RT_TABLE_LOCAL)
printk("IPv4 FIB: Using LC-trie version %s\n", VERSION);
return tb;
}
/* Trie dump functions */
static void putspace_seq(struct seq_file *seq, int n)
{
while (n--) seq_printf(seq, " ");
}
static void printbin_seq(struct seq_file *seq, unsigned int v, int bits)
{
while (bits--)
seq_printf(seq, "%s", (v & (1<<bits))?"1":"0");
}
static void printnode_seq(struct seq_file *seq, int indent, struct node *n,
int pend, int cindex, int bits)
{
putspace_seq(seq, indent);
if (IS_LEAF(n))
seq_printf(seq, "|");
else
seq_printf(seq, "+");
if (bits) {
seq_printf(seq, "%d/", cindex);
printbin_seq(seq, cindex, bits);
seq_printf(seq, ": ");
}
else
seq_printf(seq, "<root>: ");
seq_printf(seq, "%s:%p ", IS_LEAF(n)?"Leaf":"Internal node", n);
if (IS_LEAF(n))
seq_printf(seq, "key=%d.%d.%d.%d\n",
n->key >> 24, (n->key >> 16) % 256, (n->key >> 8) % 256, n->key % 256);
else {
int plen=((struct tnode *)n)->pos;
t_key prf=MASK_PFX(n->key, plen);
seq_printf(seq, "key=%d.%d.%d.%d/%d\n",
prf >> 24, (prf >> 16) % 256, (prf >> 8) % 256, prf % 256, plen);
}
if (IS_LEAF(n)) {
struct leaf *l=(struct leaf *)n;
struct fib_alias *fa;
int i;
for (i=32; i>=0; i--)
if(find_leaf_info(&l->list, i)) {
struct list_head *fa_head = get_fa_head(l, i);
if(!fa_head)
continue;
if(list_empty(fa_head))
continue;
putspace_seq(seq, indent+2);
seq_printf(seq, "{/%d...dumping}\n", i);
list_for_each_entry(fa, fa_head, fa_list) {
putspace_seq(seq, indent+2);
if (fa->fa_info->fib_nh == NULL) {
seq_printf(seq, "Error _fib_nh=NULL\n");
continue;
}
if (fa->fa_info == NULL) {
seq_printf(seq, "Error fa_info=NULL\n");
continue;
}
seq_printf(seq, "{type=%d scope=%d TOS=%d}\n",
fa->fa_type,
fa->fa_scope,
fa->fa_tos);
}
}
}
else if (IS_TNODE(n)) {
struct tnode *tn=(struct tnode *)n;
putspace_seq(seq, indent); seq_printf(seq, "| ");
seq_printf(seq, "{key prefix=%08x/", tn->key&TKEY_GET_MASK(0, tn->pos));
printbin_seq(seq, tkey_extract_bits(tn->key, 0, tn->pos), tn->pos);
seq_printf(seq, "}\n");
putspace_seq(seq, indent); seq_printf(seq, "| ");
seq_printf(seq, "{pos=%d", tn->pos);
seq_printf(seq, " (skip=%d bits)", tn->pos - pend);
seq_printf(seq, " bits=%d (%u children)}\n", tn->bits, (1 << tn->bits));
putspace_seq(seq, indent); seq_printf(seq, "| ");
seq_printf(seq, "{empty=%d full=%d}\n", tn->empty_children, tn->full_children);
}
}
static void trie_dump_seq(struct seq_file *seq, struct trie *t)
{
struct node *n=t->trie;
int cindex=0;
int indent=1;
int pend=0;
int depth = 0;
read_lock(&fib_lock);
seq_printf(seq, "------ trie_dump of t=%p ------\n", t);
if (n) {
printnode_seq(seq, indent, n, pend, cindex, 0);
if (IS_TNODE(n)) {
struct tnode *tn=(struct tnode *)n;
pend = tn->pos+tn->bits;
putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
indent += 3;
depth++;
while (tn && cindex < (1 << tn->bits)) {
if (tn->child[cindex]) {
/* Got a child */
printnode_seq(seq, indent, tn->child[cindex], pend, cindex, tn->bits);
if (IS_LEAF(tn->child[cindex])) {
cindex++;
}
else {
/*
* New tnode. Decend one level
*/
depth++;
n=tn->child[cindex];
tn=(struct tnode *)n;
pend=tn->pos+tn->bits;
putspace_seq(seq, indent); seq_printf(seq, "\\--\n");
indent+=3;
cindex=0;
}
}
else
cindex++;
/*
* Test if we are done
*/
while (cindex >= (1 << tn->bits)) {
/*
* Move upwards and test for root
* pop off all traversed nodes
*/
if (NODE_PARENT(tn) == NULL) {
tn = NULL;
n = NULL;
break;
}
else {
cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
tn = NODE_PARENT(tn);
cindex++;
n=(struct node *)tn;
pend=tn->pos+tn->bits;
indent-=3;
depth--;
}
}
}
}
else n = NULL;
}
else seq_printf(seq, "------ trie is empty\n");
read_unlock(&fib_lock);
}
static struct trie_stat *trie_stat_new(void)
{
struct trie_stat *s = kmalloc(sizeof(struct trie_stat), GFP_KERNEL);
int i;
if(s) {
s->totdepth = 0;
s->maxdepth = 0;
s->tnodes = 0;
s->leaves = 0;
s->nullpointers = 0;
for(i=0; i< MAX_CHILDS; i++)
s->nodesizes[i] = 0;
}
return s;
}
static struct trie_stat *trie_collect_stats(struct trie *t)
{
struct node *n=t->trie;
struct trie_stat *s = trie_stat_new();
int cindex = 0;
int indent = 1;
int pend = 0;
int depth = 0;
read_lock(&fib_lock);
if (s) {
if (n) {
if (IS_TNODE(n)) {
struct tnode *tn = (struct tnode *)n;
pend=tn->pos+tn->bits;
indent += 3;
s->nodesizes[tn->bits]++;
depth++;
while (tn && cindex < (1 << tn->bits)) {
if (tn->child[cindex]) {
/* Got a child */
if (IS_LEAF(tn->child[cindex])) {
cindex++;
/* stats */
if (depth > s->maxdepth)
s->maxdepth = depth;
s->totdepth += depth;
s->leaves++;
}
else {
/*
* New tnode. Decend one level
*/
s->tnodes++;
s->nodesizes[tn->bits]++;
depth++;
n = tn->child[cindex];
tn = (struct tnode *)n;
pend = tn->pos+tn->bits;
indent += 3;
cindex = 0;
}
}
else {
cindex++;
s->nullpointers++;
}
/*
* Test if we are done
*/
while (cindex >= (1 << tn->bits)) {
/*
* Move upwards and test for root
* pop off all traversed nodes
*/
if (NODE_PARENT(tn) == NULL) {
tn = NULL;
n = NULL;
break;
}
else {
cindex = tkey_extract_bits(tn->key, NODE_PARENT(tn)->pos, NODE_PARENT(tn)->bits);
tn = NODE_PARENT(tn);
cindex++;
n = (struct node *)tn;
pend=tn->pos+tn->bits;
indent -= 3;
depth--;
}
}
}
}
else n = NULL;
}
}
read_unlock(&fib_lock);
return s;
}
#ifdef CONFIG_PROC_FS
static struct fib_alias *fib_triestat_get_first(struct seq_file *seq)
{
return NULL;
}
static struct fib_alias *fib_triestat_get_next(struct seq_file *seq)
{
return NULL;
}
static void *fib_triestat_seq_start(struct seq_file *seq, loff_t *pos)
{
void *v = NULL;
if (ip_fib_main_table)
v = *pos ? fib_triestat_get_next(seq) : SEQ_START_TOKEN;
return v;
}
static void *fib_triestat_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
return v == SEQ_START_TOKEN ? fib_triestat_get_first(seq) : fib_triestat_get_next(seq);
}
static void fib_triestat_seq_stop(struct seq_file *seq, void *v)
{
}
/*
* This outputs /proc/net/fib_triestats
*
* It always works in backward compatibility mode.
* The format of the file is not supposed to be changed.
*/
static void collect_and_show(struct trie *t, struct seq_file *seq)
{
int bytes = 0; /* How many bytes are used, a ref is 4 bytes */
int i, max, pointers;
struct trie_stat *stat;
int avdepth;
stat = trie_collect_stats(t);
bytes=0;
seq_printf(seq, "trie=%p\n", t);
if (stat) {
if (stat->leaves)
avdepth=stat->totdepth*100 / stat->leaves;
else
avdepth=0;
seq_printf(seq, "Aver depth: %d.%02d\n", avdepth / 100, avdepth % 100 );
seq_printf(seq, "Max depth: %4d\n", stat->maxdepth);
seq_printf(seq, "Leaves: %d\n", stat->leaves);
bytes += sizeof(struct leaf) * stat->leaves;
seq_printf(seq, "Internal nodes: %d\n", stat->tnodes);
bytes += sizeof(struct tnode) * stat->tnodes;
max = MAX_CHILDS-1;
while (max >= 0 && stat->nodesizes[max] == 0)
max--;
pointers = 0;
for (i = 1; i <= max; i++)
if (stat->nodesizes[i] != 0) {
seq_printf(seq, " %d: %d", i, stat->nodesizes[i]);
pointers += (1<<i) * stat->nodesizes[i];
}
seq_printf(seq, "\n");
seq_printf(seq, "Pointers: %d\n", pointers);
bytes += sizeof(struct node *) * pointers;
seq_printf(seq, "Null ptrs: %d\n", stat->nullpointers);
seq_printf(seq, "Total size: %d kB\n", bytes / 1024);
kfree(stat);
}
#ifdef CONFIG_IP_FIB_TRIE_STATS
seq_printf(seq, "Counters:\n---------\n");
seq_printf(seq,"gets = %d\n", t->stats.gets);
seq_printf(seq,"backtracks = %d\n", t->stats.backtrack);
seq_printf(seq,"semantic match passed = %d\n", t->stats.semantic_match_passed);
seq_printf(seq,"semantic match miss = %d\n", t->stats.semantic_match_miss);
seq_printf(seq,"null node hit= %d\n", t->stats.null_node_hit);
#ifdef CLEAR_STATS
memset(&(t->stats), 0, sizeof(t->stats));
#endif
#endif /* CONFIG_IP_FIB_TRIE_STATS */
}
static int fib_triestat_seq_show(struct seq_file *seq, void *v)
{
char bf[128];
if (v == SEQ_START_TOKEN) {
seq_printf(seq, "Basic info: size of leaf: %Zd bytes, size of tnode: %Zd bytes.\n",
sizeof(struct leaf), sizeof(struct tnode));
if (trie_local)
collect_and_show(trie_local, seq);
if (trie_main)
collect_and_show(trie_main, seq);
}
else {
snprintf(bf, sizeof(bf),
"*\t%08X\t%08X", 200, 400);
seq_printf(seq, "%-127s\n", bf);
}
return 0;
}
static struct seq_operations fib_triestat_seq_ops = {
.start = fib_triestat_seq_start,
.next = fib_triestat_seq_next,
.stop = fib_triestat_seq_stop,
.show = fib_triestat_seq_show,
};
static int fib_triestat_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
rc = seq_open(file, &fib_triestat_seq_ops);
if (rc)
goto out_kfree;
seq = file->private_data;
out:
return rc;
out_kfree:
goto out;
}
static struct file_operations fib_triestat_seq_fops = {
.owner = THIS_MODULE,
.open = fib_triestat_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
int __init fib_stat_proc_init(void)
{
if (!proc_net_fops_create("fib_triestat", S_IRUGO, &fib_triestat_seq_fops))
return -ENOMEM;
return 0;
}
void __init fib_stat_proc_exit(void)
{
proc_net_remove("fib_triestat");
}
static struct fib_alias *fib_trie_get_first(struct seq_file *seq)
{
return NULL;
}
static struct fib_alias *fib_trie_get_next(struct seq_file *seq)
{
return NULL;
}
static void *fib_trie_seq_start(struct seq_file *seq, loff_t *pos)
{
void *v = NULL;
if (ip_fib_main_table)
v = *pos ? fib_trie_get_next(seq) : SEQ_START_TOKEN;
return v;
}
static void *fib_trie_seq_next(struct seq_file *seq, void *v, loff_t *pos)
{
++*pos;
return v == SEQ_START_TOKEN ? fib_trie_get_first(seq) : fib_trie_get_next(seq);
}
static void fib_trie_seq_stop(struct seq_file *seq, void *v)
{
}
/*
* This outputs /proc/net/fib_trie.
*
* It always works in backward compatibility mode.
* The format of the file is not supposed to be changed.
*/
static int fib_trie_seq_show(struct seq_file *seq, void *v)
{
char bf[128];
if (v == SEQ_START_TOKEN) {
if (trie_local)
trie_dump_seq(seq, trie_local);
if (trie_main)
trie_dump_seq(seq, trie_main);
}
else {
snprintf(bf, sizeof(bf),
"*\t%08X\t%08X", 200, 400);
seq_printf(seq, "%-127s\n", bf);
}
return 0;
}
static struct seq_operations fib_trie_seq_ops = {
.start = fib_trie_seq_start,
.next = fib_trie_seq_next,
.stop = fib_trie_seq_stop,
.show = fib_trie_seq_show,
};
static int fib_trie_seq_open(struct inode *inode, struct file *file)
{
struct seq_file *seq;
int rc = -ENOMEM;
rc = seq_open(file, &fib_trie_seq_ops);
if (rc)
goto out_kfree;
seq = file->private_data;
out:
return rc;
out_kfree:
goto out;
}
static struct file_operations fib_trie_seq_fops = {
.owner = THIS_MODULE,
.open = fib_trie_seq_open,
.read = seq_read,
.llseek = seq_lseek,
.release = seq_release_private,
};
int __init fib_proc_init(void)
{
if (!proc_net_fops_create("fib_trie", S_IRUGO, &fib_trie_seq_fops))
return -ENOMEM;
return 0;
}
void __init fib_proc_exit(void)
{
proc_net_remove("fib_trie");
}
#endif /* CONFIG_PROC_FS */
......@@ -184,6 +184,7 @@ int ip_call_ra_chain(struct sk_buff *skb)
raw_rcv(last, skb2);
}
last = sk;
nf_reset(skb);
}
}
......@@ -200,10 +201,6 @@ static inline int ip_local_deliver_finish(struct sk_buff *skb)
{
int ihl = skb->nh.iph->ihl*4;
#ifdef CONFIG_NETFILTER_DEBUG
nf_debug_ip_local_deliver(skb);
#endif /*CONFIG_NETFILTER_DEBUG*/
__skb_pull(skb, ihl);
/* Free reference early: we don't need it any more, and it may
......
......@@ -107,10 +107,6 @@ static int ip_dev_loopback_xmit(struct sk_buff *newskb)
newskb->pkt_type = PACKET_LOOPBACK;
newskb->ip_summed = CHECKSUM_UNNECESSARY;
BUG_TRAP(newskb->dst);
#ifdef CONFIG_NETFILTER_DEBUG
nf_debug_ip_loopback_xmit(newskb);
#endif
nf_reset(newskb);
netif_rx(newskb);
return 0;
......@@ -192,10 +188,6 @@ static inline int ip_finish_output2(struct sk_buff *skb)
skb = skb2;
}
#ifdef CONFIG_NETFILTER_DEBUG
nf_debug_ip_finish_output2(skb);
#endif /*CONFIG_NETFILTER_DEBUG*/
nf_reset(skb);
if (hh) {
......@@ -415,9 +407,6 @@ static void ip_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->nf_bridge = from->nf_bridge;
nf_bridge_get(to->nf_bridge);
#endif
#ifdef CONFIG_NETFILTER_DEBUG
to->nf_debug = from->nf_debug;
#endif
#endif
}
......
......@@ -1350,6 +1350,7 @@ int ip_mr_input(struct sk_buff *skb)
*/
read_lock(&mrt_lock);
if (mroute_socket) {
nf_reset(skb);
raw_rcv(mroute_socket, skb);
read_unlock(&mrt_lock);
return 0;
......
......@@ -127,7 +127,6 @@ ip_vs_dst_reset(struct ip_vs_dest *dest)
#define IP_VS_XMIT(skb, rt) \
do { \
nf_reset_debug(skb); \
(skb)->nfcache |= NFC_IPVS_PROPERTY; \
(skb)->ip_summed = CHECKSUM_NONE; \
NF_HOOK(PF_INET, NF_IP_LOCAL_OUT, (skb), NULL, \
......
......@@ -60,7 +60,6 @@ static DECLARE_MUTEX(arpt_mutex);
#define ASSERT_READ_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
#define ASSERT_WRITE_LOCK(x) ARP_NF_ASSERT(down_trylock(&arpt_mutex) != 0)
#include <linux/netfilter_ipv4/lockhelp.h>
#include <linux/netfilter_ipv4/listhelp.h>
struct arpt_table_info {
......
......@@ -26,7 +26,6 @@
#include <net/checksum.h>
#include <net/udp.h>
#include <linux/netfilter_ipv4/lockhelp.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_amanda.h>
......@@ -42,7 +41,7 @@ static char *conns[] = { "DATA ", "MESG ", "INDEX " };
/* This is slow, but it's simple. --RR */
static char amanda_buffer[65536];
static DECLARE_LOCK(amanda_buffer_lock);
static DEFINE_SPINLOCK(amanda_buffer_lock);
unsigned int (*ip_nat_amanda_hook)(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
......@@ -76,7 +75,7 @@ static int help(struct sk_buff **pskb,
return NF_ACCEPT;
}
LOCK_BH(&amanda_buffer_lock);
spin_lock_bh(&amanda_buffer_lock);
skb_copy_bits(*pskb, dataoff, amanda_buffer, (*pskb)->len - dataoff);
data = amanda_buffer;
data_limit = amanda_buffer + (*pskb)->len - dataoff;
......@@ -134,7 +133,7 @@ static int help(struct sk_buff **pskb,
}
out:
UNLOCK_BH(&amanda_buffer_lock);
spin_unlock_bh(&amanda_buffer_lock);
return ret;
}
......
......@@ -38,10 +38,10 @@
#include <linux/percpu.h>
#include <linux/moduleparam.h>
/* This rwlock protects the main hash table, protocol/helper/expected
/* ip_conntrack_lock protects the main hash table, protocol/helper/expected
registrations, conntrack timers*/
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
#define ASSERT_READ_LOCK(x)
#define ASSERT_WRITE_LOCK(x)
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
......@@ -57,7 +57,7 @@
#define DEBUGP(format, args...)
#endif
DECLARE_RWLOCK(ip_conntrack_lock);
DEFINE_RWLOCK(ip_conntrack_lock);
/* ip_conntrack_standalone needs this */
atomic_t ip_conntrack_count = ATOMIC_INIT(0);
......@@ -147,7 +147,7 @@ static void destroy_expect(struct ip_conntrack_expect *exp)
static void unlink_expect(struct ip_conntrack_expect *exp)
{
MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
ASSERT_WRITE_LOCK(&ip_conntrack_lock);
list_del(&exp->list);
/* Logically in destroy_expect, but we hold the lock here. */
exp->master->expecting--;
......@@ -157,9 +157,9 @@ static void expectation_timed_out(unsigned long ul_expect)
{
struct ip_conntrack_expect *exp = (void *)ul_expect;
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
unlink_expect(exp);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
destroy_expect(exp);
}
......@@ -209,7 +209,7 @@ clean_from_lists(struct ip_conntrack *ct)
unsigned int ho, hr;
DEBUGP("clean_from_lists(%p)\n", ct);
MUST_BE_WRITE_LOCKED(&ip_conntrack_lock);
ASSERT_WRITE_LOCK(&ip_conntrack_lock);
ho = hash_conntrack(&ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple);
hr = hash_conntrack(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
......@@ -240,7 +240,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
if (ip_conntrack_destroyed)
ip_conntrack_destroyed(ct);
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
/* Expectations will have been removed in clean_from_lists,
* except TFTP can create an expectation on the first packet,
* before connection is in the list, so we need to clean here,
......@@ -254,7 +254,7 @@ destroy_conntrack(struct nf_conntrack *nfct)
}
CONNTRACK_STAT_INC(delete);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
if (ct->master)
ip_conntrack_put(ct->master);
......@@ -268,12 +268,12 @@ static void death_by_timeout(unsigned long ul_conntrack)
{
struct ip_conntrack *ct = (void *)ul_conntrack;
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
/* Inside lock so preempt is disabled on module removal path.
* Otherwise we can get spurious warnings. */
CONNTRACK_STAT_INC(delete_list);
clean_from_lists(ct);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
ip_conntrack_put(ct);
}
......@@ -282,7 +282,7 @@ conntrack_tuple_cmp(const struct ip_conntrack_tuple_hash *i,
const struct ip_conntrack_tuple *tuple,
const struct ip_conntrack *ignored_conntrack)
{
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
ASSERT_READ_LOCK(&ip_conntrack_lock);
return tuplehash_to_ctrack(i) != ignored_conntrack
&& ip_ct_tuple_equal(tuple, &i->tuple);
}
......@@ -294,7 +294,7 @@ __ip_conntrack_find(const struct ip_conntrack_tuple *tuple,
struct ip_conntrack_tuple_hash *h;
unsigned int hash = hash_conntrack(tuple);
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
ASSERT_READ_LOCK(&ip_conntrack_lock);
list_for_each_entry(h, &ip_conntrack_hash[hash], list) {
if (conntrack_tuple_cmp(h, tuple, ignored_conntrack)) {
CONNTRACK_STAT_INC(found);
......@@ -313,11 +313,11 @@ ip_conntrack_find_get(const struct ip_conntrack_tuple *tuple,
{
struct ip_conntrack_tuple_hash *h;
READ_LOCK(&ip_conntrack_lock);
read_lock_bh(&ip_conntrack_lock);
h = __ip_conntrack_find(tuple, ignored_conntrack);
if (h)
atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
READ_UNLOCK(&ip_conntrack_lock);
read_unlock_bh(&ip_conntrack_lock);
return h;
}
......@@ -352,7 +352,7 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
IP_NF_ASSERT(!is_confirmed(ct));
DEBUGP("Confirming conntrack %p\n", ct);
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
/* See if there's one in the list already, including reverse:
NAT could have grabbed it without realizing, since we're
......@@ -380,12 +380,12 @@ __ip_conntrack_confirm(struct sk_buff **pskb)
atomic_inc(&ct->ct_general.use);
set_bit(IPS_CONFIRMED_BIT, &ct->status);
CONNTRACK_STAT_INC(insert);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
return NF_ACCEPT;
}
CONNTRACK_STAT_INC(insert_failed);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
return NF_DROP;
}
......@@ -398,9 +398,9 @@ ip_conntrack_tuple_taken(const struct ip_conntrack_tuple *tuple,
{
struct ip_conntrack_tuple_hash *h;
READ_LOCK(&ip_conntrack_lock);
read_lock_bh(&ip_conntrack_lock);
h = __ip_conntrack_find(tuple, ignored_conntrack);
READ_UNLOCK(&ip_conntrack_lock);
read_unlock_bh(&ip_conntrack_lock);
return h != NULL;
}
......@@ -419,13 +419,13 @@ static int early_drop(struct list_head *chain)
struct ip_conntrack *ct = NULL;
int dropped = 0;
READ_LOCK(&ip_conntrack_lock);
read_lock_bh(&ip_conntrack_lock);
h = LIST_FIND_B(chain, unreplied, struct ip_conntrack_tuple_hash *);
if (h) {
ct = tuplehash_to_ctrack(h);
atomic_inc(&ct->ct_general.use);
}
READ_UNLOCK(&ip_conntrack_lock);
read_unlock_bh(&ip_conntrack_lock);
if (!ct)
return dropped;
......@@ -508,7 +508,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
conntrack->timeout.data = (unsigned long)conntrack;
conntrack->timeout.function = death_by_timeout;
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
exp = find_expectation(tuple);
if (exp) {
......@@ -532,7 +532,7 @@ init_conntrack(const struct ip_conntrack_tuple *tuple,
list_add(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL].list, &unconfirmed);
atomic_inc(&ip_conntrack_count);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
if (exp) {
if (exp->expectfn)
......@@ -723,17 +723,17 @@ void ip_conntrack_unexpect_related(struct ip_conntrack_expect *exp)
{
struct ip_conntrack_expect *i;
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
/* choose the the oldest expectation to evict */
list_for_each_entry_reverse(i, &ip_conntrack_expect_list, list) {
if (expect_matches(i, exp) && del_timer(&i->timeout)) {
unlink_expect(i);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
destroy_expect(i);
return;
}
}
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
}
struct ip_conntrack_expect *ip_conntrack_expect_alloc(void)
......@@ -760,15 +760,11 @@ static void ip_conntrack_expect_insert(struct ip_conntrack_expect *exp)
exp->master->expecting++;
list_add(&exp->list, &ip_conntrack_expect_list);
if (exp->master->helper->timeout) {
init_timer(&exp->timeout);
exp->timeout.data = (unsigned long)exp;
exp->timeout.function = expectation_timed_out;
exp->timeout.expires
= jiffies + exp->master->helper->timeout * HZ;
add_timer(&exp->timeout);
} else
exp->timeout.function = NULL;
init_timer(&exp->timeout);
exp->timeout.data = (unsigned long)exp;
exp->timeout.function = expectation_timed_out;
exp->timeout.expires = jiffies + exp->master->helper->timeout * HZ;
add_timer(&exp->timeout);
CONNTRACK_STAT_INC(expect_create);
}
......@@ -808,7 +804,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
DEBUGP("tuple: "); DUMP_TUPLE(&expect->tuple);
DEBUGP("mask: "); DUMP_TUPLE(&expect->mask);
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
list_for_each_entry(i, &ip_conntrack_expect_list, list) {
if (expect_matches(i, expect)) {
/* Refresh timer: if it's dying, ignore.. */
......@@ -832,7 +828,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
ip_conntrack_expect_insert(expect);
ret = 0;
out:
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
return ret;
}
......@@ -841,7 +837,7 @@ int ip_conntrack_expect_related(struct ip_conntrack_expect *expect)
void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
const struct ip_conntrack_tuple *newreply)
{
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
/* Should be unconfirmed, so not in hash table yet */
IP_NF_ASSERT(!is_confirmed(conntrack));
......@@ -851,15 +847,15 @@ void ip_conntrack_alter_reply(struct ip_conntrack *conntrack,
conntrack->tuplehash[IP_CT_DIR_REPLY].tuple = *newreply;
if (!conntrack->master && conntrack->expecting == 0)
conntrack->helper = ip_ct_find_helper(newreply);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
}
int ip_conntrack_helper_register(struct ip_conntrack_helper *me)
{
BUG_ON(me->timeout == 0);
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
list_prepend(&helpers, me);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
return 0;
}
......@@ -878,7 +874,7 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
struct ip_conntrack_expect *exp, *tmp;
/* Need write lock here, to delete helper. */
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
LIST_DELETE(&helpers, me);
/* Get rid of expectations */
......@@ -893,7 +889,7 @@ void ip_conntrack_helper_unregister(struct ip_conntrack_helper *me)
for (i = 0; i < ip_conntrack_htable_size; i++)
LIST_FIND_W(&ip_conntrack_hash[i], unhelp,
struct ip_conntrack_tuple_hash *, me);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
/* Someone could be still looking at the helper in a bh. */
synchronize_net();
......@@ -925,14 +921,14 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
ct->timeout.expires = extra_jiffies;
ct_add_counters(ct, ctinfo, skb);
} else {
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
/* Need del_timer for race avoidance (may already be dying). */
if (del_timer(&ct->timeout)) {
ct->timeout.expires = jiffies + extra_jiffies;
add_timer(&ct->timeout);
}
ct_add_counters(ct, ctinfo, skb);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
}
}
......@@ -940,10 +936,6 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
struct sk_buff *
ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
{
#ifdef CONFIG_NETFILTER_DEBUG
unsigned int olddebug = skb->nf_debug;
#endif
skb_orphan(skb);
local_bh_disable();
......@@ -953,12 +945,7 @@ ip_ct_gather_frags(struct sk_buff *skb, u_int32_t user)
if (skb) {
ip_send_check(skb->nh.iph);
skb->nfcache |= NFC_ALTERED;
#ifdef CONFIG_NETFILTER_DEBUG
/* Packet path as if nothing had happened. */
skb->nf_debug = olddebug;
#endif
}
return skb;
}
......@@ -997,7 +984,7 @@ get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
{
struct ip_conntrack_tuple_hash *h = NULL;
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
for (; *bucket < ip_conntrack_htable_size; (*bucket)++) {
h = LIST_FIND_W(&ip_conntrack_hash[*bucket], do_iter,
struct ip_conntrack_tuple_hash *, iter, data);
......@@ -1009,7 +996,7 @@ get_next_corpse(int (*iter)(struct ip_conntrack *i, void *data),
struct ip_conntrack_tuple_hash *, iter, data);
if (h)
atomic_inc(&tuplehash_to_ctrack(h)->ct_general.use);
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
return h;
}
......@@ -1201,14 +1188,14 @@ int __init ip_conntrack_init(void)
}
/* Don't NEED lock here, but good form anyway. */
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
for (i = 0; i < MAX_IP_CT_PROTO; i++)
ip_ct_protos[i] = &ip_conntrack_generic_protocol;
/* Sew in builtin protocols. */
ip_ct_protos[IPPROTO_TCP] = &ip_conntrack_protocol_tcp;
ip_ct_protos[IPPROTO_UDP] = &ip_conntrack_protocol_udp;
ip_ct_protos[IPPROTO_ICMP] = &ip_conntrack_protocol_icmp;
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
for (i = 0; i < ip_conntrack_htable_size; i++)
INIT_LIST_HEAD(&ip_conntrack_hash[i]);
......
......@@ -16,7 +16,6 @@
#include <net/checksum.h>
#include <net/tcp.h>
#include <linux/netfilter_ipv4/lockhelp.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_ftp.h>
#include <linux/moduleparam.h>
......@@ -28,7 +27,7 @@ MODULE_DESCRIPTION("ftp connection tracking helper");
/* This is slow, but it's simple. --RR */
static char ftp_buffer[65536];
static DECLARE_LOCK(ip_ftp_lock);
static DEFINE_SPINLOCK(ip_ftp_lock);
#define MAX_PORTS 8
static int ports[MAX_PORTS];
......@@ -319,7 +318,7 @@ static int help(struct sk_buff **pskb,
}
datalen = (*pskb)->len - dataoff;
LOCK_BH(&ip_ftp_lock);
spin_lock_bh(&ip_ftp_lock);
fb_ptr = skb_header_pointer(*pskb, dataoff,
(*pskb)->len - dataoff, ftp_buffer);
BUG_ON(fb_ptr == NULL);
......@@ -442,7 +441,7 @@ static int help(struct sk_buff **pskb,
if (ends_in_nl)
update_nl_seq(seq, ct_ftp_info,dir);
out:
UNLOCK_BH(&ip_ftp_lock);
spin_unlock_bh(&ip_ftp_lock);
return ret;
}
......
......@@ -29,7 +29,6 @@
#include <net/checksum.h>
#include <net/tcp.h>
#include <linux/netfilter_ipv4/lockhelp.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
#include <linux/netfilter_ipv4/ip_conntrack_irc.h>
#include <linux/moduleparam.h>
......@@ -41,7 +40,7 @@ static int max_dcc_channels = 8;
static unsigned int dcc_timeout = 300;
/* This is slow, but it's simple. --RR */
static char irc_buffer[65536];
static DECLARE_LOCK(irc_buffer_lock);
static DEFINE_SPINLOCK(irc_buffer_lock);
unsigned int (*ip_nat_irc_hook)(struct sk_buff **pskb,
enum ip_conntrack_info ctinfo,
......@@ -141,7 +140,7 @@ static int help(struct sk_buff **pskb,
if (dataoff >= (*pskb)->len)
return NF_ACCEPT;
LOCK_BH(&irc_buffer_lock);
spin_lock_bh(&irc_buffer_lock);
ib_ptr = skb_header_pointer(*pskb, dataoff,
(*pskb)->len - dataoff, irc_buffer);
BUG_ON(ib_ptr == NULL);
......@@ -237,7 +236,7 @@ static int help(struct sk_buff **pskb,
} /* while data < ... */
out:
UNLOCK_BH(&irc_buffer_lock);
spin_unlock_bh(&irc_buffer_lock);
return ret;
}
......
......@@ -26,7 +26,6 @@
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/lockhelp.h>
#if 0
#define DEBUGP(format, ...) printk(format, ## __VA_ARGS__)
......@@ -35,7 +34,7 @@
#endif
/* Protects conntrack->proto.sctp */
static DECLARE_RWLOCK(sctp_lock);
static DEFINE_RWLOCK(sctp_lock);
/* FIXME: Examine ipfilter's timeouts and conntrack transitions more
closely. They're more complex. --RR
......@@ -199,9 +198,9 @@ static int sctp_print_conntrack(struct seq_file *s,
DEBUGP(__FUNCTION__);
DEBUGP("\n");
READ_LOCK(&sctp_lock);
read_lock_bh(&sctp_lock);
state = conntrack->proto.sctp.state;
READ_UNLOCK(&sctp_lock);
read_unlock_bh(&sctp_lock);
return seq_printf(s, "%s ", sctp_conntrack_names[state]);
}
......@@ -343,13 +342,13 @@ static int sctp_packet(struct ip_conntrack *conntrack,
oldsctpstate = newconntrack = SCTP_CONNTRACK_MAX;
for_each_sctp_chunk (skb, sch, _sch, offset, count) {
WRITE_LOCK(&sctp_lock);
write_lock_bh(&sctp_lock);
/* Special cases of Verification tag check (Sec 8.5.1) */
if (sch->type == SCTP_CID_INIT) {
/* Sec 8.5.1 (A) */
if (sh->vtag != 0) {
WRITE_UNLOCK(&sctp_lock);
write_unlock_bh(&sctp_lock);
return -1;
}
} else if (sch->type == SCTP_CID_ABORT) {
......@@ -357,7 +356,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])
&& !(sh->vtag == conntrack->proto.sctp.vtag
[1 - CTINFO2DIR(ctinfo)])) {
WRITE_UNLOCK(&sctp_lock);
write_unlock_bh(&sctp_lock);
return -1;
}
} else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) {
......@@ -366,13 +365,13 @@ static int sctp_packet(struct ip_conntrack *conntrack,
&& !(sh->vtag == conntrack->proto.sctp.vtag
[1 - CTINFO2DIR(ctinfo)]
&& (sch->flags & 1))) {
WRITE_UNLOCK(&sctp_lock);
write_unlock_bh(&sctp_lock);
return -1;
}
} else if (sch->type == SCTP_CID_COOKIE_ECHO) {
/* Sec 8.5.1 (D) */
if (!(sh->vtag == conntrack->proto.sctp.vtag[CTINFO2DIR(ctinfo)])) {
WRITE_UNLOCK(&sctp_lock);
write_unlock_bh(&sctp_lock);
return -1;
}
}
......@@ -384,7 +383,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
if (newconntrack == SCTP_CONNTRACK_MAX) {
DEBUGP("ip_conntrack_sctp: Invalid dir=%i ctype=%u conntrack=%u\n",
CTINFO2DIR(ctinfo), sch->type, oldsctpstate);
WRITE_UNLOCK(&sctp_lock);
write_unlock_bh(&sctp_lock);
return -1;
}
......@@ -396,7 +395,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
ih = skb_header_pointer(skb, offset + sizeof(sctp_chunkhdr_t),
sizeof(_inithdr), &_inithdr);
if (ih == NULL) {
WRITE_UNLOCK(&sctp_lock);
write_unlock_bh(&sctp_lock);
return -1;
}
DEBUGP("Setting vtag %x for dir %d\n",
......@@ -405,7 +404,7 @@ static int sctp_packet(struct ip_conntrack *conntrack,
}
conntrack->proto.sctp.state = newconntrack;
WRITE_UNLOCK(&sctp_lock);
write_unlock_bh(&sctp_lock);
}
ip_ct_refresh_acct(conntrack, ctinfo, skb, *sctp_timeouts[newconntrack]);
......
......@@ -36,7 +36,6 @@
#include <linux/netfilter_ipv4.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
#include <linux/netfilter_ipv4/lockhelp.h>
#if 0
#define DEBUGP printk
......@@ -46,7 +45,7 @@
#endif
/* Protects conntrack->proto.tcp */
static DECLARE_RWLOCK(tcp_lock);
static DEFINE_RWLOCK(tcp_lock);
/* "Be conservative in what you do,
be liberal in what you accept from others."
......@@ -330,9 +329,9 @@ static int tcp_print_conntrack(struct seq_file *s,
{
enum tcp_conntrack state;
READ_LOCK(&tcp_lock);
read_lock_bh(&tcp_lock);
state = conntrack->proto.tcp.state;
READ_UNLOCK(&tcp_lock);
read_unlock_bh(&tcp_lock);
return seq_printf(s, "%s ", tcp_conntrack_names[state]);
}
......@@ -738,14 +737,14 @@ void ip_conntrack_tcp_update(struct sk_buff *skb,
end = segment_seq_plus_len(ntohl(tcph->seq), skb->len, iph, tcph);
WRITE_LOCK(&tcp_lock);
write_lock_bh(&tcp_lock);
/*
* We have to worry for the ack in the reply packet only...
*/
if (after(end, conntrack->proto.tcp.seen[dir].td_end))
conntrack->proto.tcp.seen[dir].td_end = end;
conntrack->proto.tcp.last_end = end;
WRITE_UNLOCK(&tcp_lock);
write_unlock_bh(&tcp_lock);
DEBUGP("tcp_update: sender end=%u maxend=%u maxwin=%u scale=%i "
"receiver end=%u maxend=%u maxwin=%u scale=%i\n",
sender->td_end, sender->td_maxend, sender->td_maxwin,
......@@ -857,7 +856,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
sizeof(_tcph), &_tcph);
BUG_ON(th == NULL);
WRITE_LOCK(&tcp_lock);
write_lock_bh(&tcp_lock);
old_state = conntrack->proto.tcp.state;
dir = CTINFO2DIR(ctinfo);
index = get_conntrack_index(th);
......@@ -879,7 +878,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
* that the client cannot but retransmit its SYN and
* thus initiate a clean new session.
*/
WRITE_UNLOCK(&tcp_lock);
write_unlock_bh(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: killing out of sync session ");
......@@ -894,7 +893,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
conntrack->proto.tcp.last_end =
segment_seq_plus_len(ntohl(th->seq), skb->len, iph, th);
WRITE_UNLOCK(&tcp_lock);
write_unlock_bh(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: invalid packet ignored ");
......@@ -904,7 +903,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
DEBUGP("ip_ct_tcp: Invalid dir=%i index=%u ostate=%u\n",
dir, get_conntrack_index(th),
old_state);
WRITE_UNLOCK(&tcp_lock);
write_unlock_bh(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: invalid state ");
......@@ -918,13 +917,13 @@ static int tcp_packet(struct ip_conntrack *conntrack,
conntrack->proto.tcp.seen[dir].td_end)) {
/* Attempt to reopen a closed connection.
* Delete this connection and look up again. */
WRITE_UNLOCK(&tcp_lock);
write_unlock_bh(&tcp_lock);
if (del_timer(&conntrack->timeout))
conntrack->timeout.function((unsigned long)
conntrack);
return -NF_REPEAT;
} else {
WRITE_UNLOCK(&tcp_lock);
write_unlock_bh(&tcp_lock);
if (LOG_INVALID(IPPROTO_TCP))
nf_log_packet(PF_INET, 0, skb, NULL, NULL,
"ip_ct_tcp: invalid SYN");
......@@ -949,7 +948,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
if (!tcp_in_window(&conntrack->proto.tcp, dir, index,
skb, iph, th)) {
WRITE_UNLOCK(&tcp_lock);
write_unlock_bh(&tcp_lock);
return -NF_ACCEPT;
}
in_window:
......@@ -972,7 +971,7 @@ static int tcp_packet(struct ip_conntrack *conntrack,
timeout = conntrack->proto.tcp.retrans >= ip_ct_tcp_max_retrans
&& *tcp_timeouts[new_state] > ip_ct_tcp_timeout_max_retrans
? ip_ct_tcp_timeout_max_retrans : *tcp_timeouts[new_state];
WRITE_UNLOCK(&tcp_lock);
write_unlock_bh(&tcp_lock);
if (!test_bit(IPS_SEEN_REPLY_BIT, &conntrack->status)) {
/* If only reply is a RST, we can consider ourselves not to
......
......@@ -120,6 +120,7 @@ static int udp_error(struct sk_buff *skb, enum ip_conntrack_info *ctinfo,
* and moreover root might send raw packets.
* FIXME: Source route IP option packets --RR */
if (hooknum == NF_IP_PRE_ROUTING
&& skb->ip_summed != CHECKSUM_UNNECESSARY
&& csum_tcpudp_magic(iph->saddr, iph->daddr, udplen, IPPROTO_UDP,
skb->ip_summed == CHECKSUM_HW ? skb->csum
: skb_checksum(skb, iph->ihl*4, udplen, 0))) {
......
......@@ -28,8 +28,8 @@
#include <net/checksum.h>
#include <net/ip.h>
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_conntrack_lock)
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_conntrack_lock)
#define ASSERT_READ_LOCK(x)
#define ASSERT_WRITE_LOCK(x)
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_protocol.h>
......@@ -119,7 +119,7 @@ static struct list_head *ct_get_idx(struct seq_file *seq, loff_t pos)
static void *ct_seq_start(struct seq_file *seq, loff_t *pos)
{
READ_LOCK(&ip_conntrack_lock);
read_lock_bh(&ip_conntrack_lock);
return ct_get_idx(seq, *pos);
}
......@@ -131,7 +131,7 @@ static void *ct_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void ct_seq_stop(struct seq_file *s, void *v)
{
READ_UNLOCK(&ip_conntrack_lock);
read_unlock_bh(&ip_conntrack_lock);
}
static int ct_seq_show(struct seq_file *s, void *v)
......@@ -140,7 +140,7 @@ static int ct_seq_show(struct seq_file *s, void *v)
const struct ip_conntrack *conntrack = tuplehash_to_ctrack(hash);
struct ip_conntrack_protocol *proto;
MUST_BE_READ_LOCKED(&ip_conntrack_lock);
ASSERT_READ_LOCK(&ip_conntrack_lock);
IP_NF_ASSERT(conntrack);
/* we only want to print DIR_ORIGINAL */
......@@ -239,7 +239,7 @@ static void *exp_seq_start(struct seq_file *s, loff_t *pos)
/* strange seq_file api calls stop even if we fail,
* thus we need to grab lock since stop unlocks */
READ_LOCK(&ip_conntrack_lock);
read_lock_bh(&ip_conntrack_lock);
if (list_empty(e))
return NULL;
......@@ -267,7 +267,7 @@ static void *exp_seq_next(struct seq_file *s, void *v, loff_t *pos)
static void exp_seq_stop(struct seq_file *s, void *v)
{
READ_UNLOCK(&ip_conntrack_lock);
read_unlock_bh(&ip_conntrack_lock);
}
static int exp_seq_show(struct seq_file *s, void *v)
......@@ -921,22 +921,22 @@ int ip_conntrack_protocol_register(struct ip_conntrack_protocol *proto)
{
int ret = 0;
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
if (ip_ct_protos[proto->proto] != &ip_conntrack_generic_protocol) {
ret = -EBUSY;
goto out;
}
ip_ct_protos[proto->proto] = proto;
out:
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
return ret;
}
void ip_conntrack_protocol_unregister(struct ip_conntrack_protocol *proto)
{
WRITE_LOCK(&ip_conntrack_lock);
write_lock_bh(&ip_conntrack_lock);
ip_ct_protos[proto->proto] = &ip_conntrack_generic_protocol;
WRITE_UNLOCK(&ip_conntrack_lock);
write_unlock_bh(&ip_conntrack_lock);
/* Somebody could be still looking at the proto in bh. */
synchronize_net();
......
......@@ -22,8 +22,8 @@
#include <linux/udp.h>
#include <linux/jhash.h>
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
#define ASSERT_READ_LOCK(x)
#define ASSERT_WRITE_LOCK(x)
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_core.h>
......@@ -41,7 +41,7 @@
#define DEBUGP(format, args...)
#endif
DECLARE_RWLOCK(ip_nat_lock);
DEFINE_RWLOCK(ip_nat_lock);
/* Calculated at init based on memory size */
static unsigned int ip_nat_htable_size;
......@@ -65,9 +65,9 @@ static void ip_nat_cleanup_conntrack(struct ip_conntrack *conn)
if (!(conn->status & IPS_NAT_DONE_MASK))
return;
WRITE_LOCK(&ip_nat_lock);
write_lock_bh(&ip_nat_lock);
list_del(&conn->nat.info.bysource);
WRITE_UNLOCK(&ip_nat_lock);
write_unlock_bh(&ip_nat_lock);
}
/* We do checksum mangling, so if they were wrong before they're still
......@@ -142,7 +142,7 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple,
unsigned int h = hash_by_src(tuple);
struct ip_conntrack *ct;
READ_LOCK(&ip_nat_lock);
read_lock_bh(&ip_nat_lock);
list_for_each_entry(ct, &bysource[h], nat.info.bysource) {
if (same_src(ct, tuple)) {
/* Copy source part from reply tuple. */
......@@ -151,12 +151,12 @@ find_appropriate_src(const struct ip_conntrack_tuple *tuple,
result->dst = tuple->dst;
if (in_range(result, range)) {
READ_UNLOCK(&ip_nat_lock);
read_unlock_bh(&ip_nat_lock);
return 1;
}
}
}
READ_UNLOCK(&ip_nat_lock);
read_unlock_bh(&ip_nat_lock);
return 0;
}
......@@ -297,9 +297,9 @@ ip_nat_setup_info(struct ip_conntrack *conntrack,
unsigned int srchash
= hash_by_src(&conntrack->tuplehash[IP_CT_DIR_ORIGINAL]
.tuple);
WRITE_LOCK(&ip_nat_lock);
write_lock_bh(&ip_nat_lock);
list_add(&info->bysource, &bysource[srchash]);
WRITE_UNLOCK(&ip_nat_lock);
write_unlock_bh(&ip_nat_lock);
}
/* It's done. */
......@@ -474,23 +474,23 @@ int ip_nat_protocol_register(struct ip_nat_protocol *proto)
{
int ret = 0;
WRITE_LOCK(&ip_nat_lock);
write_lock_bh(&ip_nat_lock);
if (ip_nat_protos[proto->protonum] != &ip_nat_unknown_protocol) {
ret = -EBUSY;
goto out;
}
ip_nat_protos[proto->protonum] = proto;
out:
WRITE_UNLOCK(&ip_nat_lock);
write_unlock_bh(&ip_nat_lock);
return ret;
}
/* Noone stores the protocol anywhere; simply delete it. */
void ip_nat_protocol_unregister(struct ip_nat_protocol *proto)
{
WRITE_LOCK(&ip_nat_lock);
write_lock_bh(&ip_nat_lock);
ip_nat_protos[proto->protonum] = &ip_nat_unknown_protocol;
WRITE_UNLOCK(&ip_nat_lock);
write_unlock_bh(&ip_nat_lock);
/* Someone could be still looking at the proto in a bh. */
synchronize_net();
......@@ -509,13 +509,13 @@ int __init ip_nat_init(void)
return -ENOMEM;
/* Sew in builtin protocols. */
WRITE_LOCK(&ip_nat_lock);
write_lock_bh(&ip_nat_lock);
for (i = 0; i < MAX_IP_NAT_PROTO; i++)
ip_nat_protos[i] = &ip_nat_unknown_protocol;
ip_nat_protos[IPPROTO_TCP] = &ip_nat_protocol_tcp;
ip_nat_protos[IPPROTO_UDP] = &ip_nat_protocol_udp;
ip_nat_protos[IPPROTO_ICMP] = &ip_nat_protocol_icmp;
WRITE_UNLOCK(&ip_nat_lock);
write_unlock_bh(&ip_nat_lock);
for (i = 0; i < ip_nat_htable_size; i++) {
INIT_LIST_HEAD(&bysource[i]);
......
......@@ -28,8 +28,8 @@
#include <net/tcp.h>
#include <net/udp.h>
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
#define ASSERT_READ_LOCK(x)
#define ASSERT_WRITE_LOCK(x)
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/ip_conntrack_helper.h>
......@@ -47,7 +47,7 @@
#define DUMP_OFFSET(x)
#endif
static DECLARE_LOCK(ip_nat_seqofs_lock);
static DEFINE_SPINLOCK(ip_nat_seqofs_lock);
/* Setup TCP sequence correction given this change at this sequence */
static inline void
......@@ -70,7 +70,7 @@ adjust_tcp_sequence(u32 seq,
DEBUGP("ip_nat_resize_packet: Seq_offset before: ");
DUMP_OFFSET(this_way);
LOCK_BH(&ip_nat_seqofs_lock);
spin_lock_bh(&ip_nat_seqofs_lock);
/* SYN adjust. If it's uninitialized, or this is after last
* correction, record it: we don't handle more than one
......@@ -82,7 +82,7 @@ adjust_tcp_sequence(u32 seq,
this_way->offset_before = this_way->offset_after;
this_way->offset_after += sizediff;
}
UNLOCK_BH(&ip_nat_seqofs_lock);
spin_unlock_bh(&ip_nat_seqofs_lock);
DEBUGP("ip_nat_resize_packet: Seq_offset after: ");
DUMP_OFFSET(this_way);
......@@ -142,9 +142,6 @@ static int enlarge_skb(struct sk_buff **pskb, unsigned int extra)
/* Transfer socket to new skb. */
if ((*pskb)->sk)
skb_set_owner_w(nskb, (*pskb)->sk);
#ifdef CONFIG_NETFILTER_DEBUG
nskb->nf_debug = (*pskb)->nf_debug;
#endif
kfree_skb(*pskb);
*pskb = nskb;
return 1;
......
......@@ -19,8 +19,8 @@
#include <net/route.h>
#include <linux/bitops.h>
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
#define ASSERT_READ_LOCK(x)
#define ASSERT_WRITE_LOCK(x)
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ip_nat.h>
......
......@@ -31,8 +31,8 @@
#include <net/checksum.h>
#include <linux/spinlock.h>
#define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock)
#define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock)
#define ASSERT_READ_LOCK(x)
#define ASSERT_WRITE_LOCK(x)
#include <linux/netfilter_ipv4/ip_nat.h>
#include <linux/netfilter_ipv4/ip_nat_rule.h>
......@@ -373,7 +373,6 @@ static int init_or_cleanup(int init)
cleanup_rule_init:
ip_nat_rule_cleanup();
cleanup_nothing:
MUST_BE_READ_WRITE_UNLOCKED(&ip_nat_lock);
return ret;
}
......
......@@ -67,7 +67,6 @@ static DECLARE_MUTEX(ipt_mutex);
/* Must have mutex */
#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ipt_mutex) != 0)
#include <linux/netfilter_ipv4/lockhelp.h>
#include <linux/netfilter_ipv4/listhelp.h>
#if 0
......
......@@ -29,7 +29,6 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#include <linux/netfilter_ipv4/lockhelp.h>
#define CLUSTERIP_VERSION "0.6"
......@@ -41,6 +40,8 @@
#define DEBUGP
#endif
#define ASSERT_READ_LOCK(x)
MODULE_LICENSE("GPL");
MODULE_AUTHOR("Harald Welte <laforge@netfilter.org>");
MODULE_DESCRIPTION("iptables target for CLUSTERIP");
......@@ -67,7 +68,7 @@ static LIST_HEAD(clusterip_configs);
/* clusterip_lock protects the clusterip_configs list _AND_ the configurable
* data within all structurses (num_local_nodes, local_nodes[]) */
static DECLARE_RWLOCK(clusterip_lock);
static DEFINE_RWLOCK(clusterip_lock);
#ifdef CONFIG_PROC_FS
static struct file_operations clusterip_proc_fops;
......@@ -82,9 +83,9 @@ clusterip_config_get(struct clusterip_config *c) {
static inline void
clusterip_config_put(struct clusterip_config *c) {
if (atomic_dec_and_test(&c->refcount)) {
WRITE_LOCK(&clusterip_lock);
write_lock_bh(&clusterip_lock);
list_del(&c->list);
WRITE_UNLOCK(&clusterip_lock);
write_unlock_bh(&clusterip_lock);
dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
dev_put(c->dev);
kfree(c);
......@@ -97,7 +98,7 @@ __clusterip_config_find(u_int32_t clusterip)
{
struct list_head *pos;
MUST_BE_READ_LOCKED(&clusterip_lock);
ASSERT_READ_LOCK(&clusterip_lock);
list_for_each(pos, &clusterip_configs) {
struct clusterip_config *c = list_entry(pos,
struct clusterip_config, list);
......@@ -114,14 +115,14 @@ clusterip_config_find_get(u_int32_t clusterip)
{
struct clusterip_config *c;
READ_LOCK(&clusterip_lock);
read_lock_bh(&clusterip_lock);
c = __clusterip_config_find(clusterip);
if (!c) {
READ_UNLOCK(&clusterip_lock);
read_unlock_bh(&clusterip_lock);
return NULL;
}
atomic_inc(&c->refcount);
READ_UNLOCK(&clusterip_lock);
read_unlock_bh(&clusterip_lock);
return c;
}
......@@ -160,9 +161,9 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
c->pde->data = c;
#endif
WRITE_LOCK(&clusterip_lock);
write_lock_bh(&clusterip_lock);
list_add(&c->list, &clusterip_configs);
WRITE_UNLOCK(&clusterip_lock);
write_unlock_bh(&clusterip_lock);
return c;
}
......@@ -172,25 +173,25 @@ clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
{
int i;
WRITE_LOCK(&clusterip_lock);
write_lock_bh(&clusterip_lock);
if (c->num_local_nodes >= CLUSTERIP_MAX_NODES
|| nodenum > CLUSTERIP_MAX_NODES) {
WRITE_UNLOCK(&clusterip_lock);
write_unlock_bh(&clusterip_lock);
return 1;
}
/* check if we alrady have this number in our array */
for (i = 0; i < c->num_local_nodes; i++) {
if (c->local_nodes[i] == nodenum) {
WRITE_UNLOCK(&clusterip_lock);
write_unlock_bh(&clusterip_lock);
return 1;
}
}
c->local_nodes[c->num_local_nodes++] = nodenum;
WRITE_UNLOCK(&clusterip_lock);
write_unlock_bh(&clusterip_lock);
return 0;
}
......@@ -199,10 +200,10 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
{
int i;
WRITE_LOCK(&clusterip_lock);
write_lock_bh(&clusterip_lock);
if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
WRITE_UNLOCK(&clusterip_lock);
write_unlock_bh(&clusterip_lock);
return 1;
}
......@@ -211,12 +212,12 @@ clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
c->num_local_nodes--;
WRITE_UNLOCK(&clusterip_lock);
write_unlock_bh(&clusterip_lock);
return 0;
}
}
WRITE_UNLOCK(&clusterip_lock);
write_unlock_bh(&clusterip_lock);
return 1;
}
......@@ -286,21 +287,21 @@ clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
{
int i;
READ_LOCK(&clusterip_lock);
read_lock_bh(&clusterip_lock);
if (config->num_local_nodes == 0) {
READ_UNLOCK(&clusterip_lock);
read_unlock_bh(&clusterip_lock);
return 0;
}
for (i = 0; i < config->num_local_nodes; i++) {
if (config->local_nodes[i] == hash) {
READ_UNLOCK(&clusterip_lock);
read_unlock_bh(&clusterip_lock);
return 1;
}
}
READ_UNLOCK(&clusterip_lock);
read_unlock_bh(&clusterip_lock);
return 0;
}
......@@ -578,7 +579,7 @@ static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
struct clusterip_config *c = pde->data;
unsigned int *nodeidx;
READ_LOCK(&clusterip_lock);
read_lock_bh(&clusterip_lock);
if (*pos >= c->num_local_nodes)
return NULL;
......@@ -608,7 +609,7 @@ static void clusterip_seq_stop(struct seq_file *s, void *v)
{
kfree(v);
READ_UNLOCK(&clusterip_lock);
read_unlock_bh(&clusterip_lock);
}
static int clusterip_seq_show(struct seq_file *s, void *v)
......
......@@ -33,7 +33,7 @@ MODULE_DESCRIPTION("iptables MASQUERADE target module");
#endif
/* Lock protects masq region inside conntrack */
static DECLARE_RWLOCK(masq_lock);
static DEFINE_RWLOCK(masq_lock);
/* FIXME: Multiple targets. --RR */
static int
......@@ -103,9 +103,9 @@ masquerade_target(struct sk_buff **pskb,
return NF_DROP;
}
WRITE_LOCK(&masq_lock);
write_lock_bh(&masq_lock);
ct->nat.masq_index = out->ifindex;
WRITE_UNLOCK(&masq_lock);
write_unlock_bh(&masq_lock);
/* Transfer from original range. */
newrange = ((struct ip_nat_range)
......@@ -122,9 +122,9 @@ device_cmp(struct ip_conntrack *i, void *ifindex)
{
int ret;
READ_LOCK(&masq_lock);
read_lock_bh(&masq_lock);
ret = (i->nat.masq_index == (int)(long)ifindex);
READ_UNLOCK(&masq_lock);
read_unlock_bh(&masq_lock);
return ret;
}
......
......@@ -104,10 +104,12 @@ static inline struct rtable *route_reverse(struct sk_buff *skb,
static void send_reset(struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
struct iphdr *iph = oldskb->nh.iph;
struct tcphdr _otcph, *oth, *tcph;
struct rtable *rt;
u_int16_t tmp_port;
u_int32_t tmp_addr;
unsigned int tcplen;
int needs_ack;
int hh_len;
......@@ -124,7 +126,16 @@ static void send_reset(struct sk_buff *oldskb, int hook)
if (oth->rst)
return;
/* FIXME: Check checksum --RR */
/* Check checksum */
tcplen = oldskb->len - iph->ihl * 4;
if (((hook != NF_IP_LOCAL_IN && oldskb->ip_summed != CHECKSUM_HW) ||
(hook == NF_IP_LOCAL_IN &&
oldskb->ip_summed != CHECKSUM_UNNECESSARY)) &&
csum_tcpudp_magic(iph->saddr, iph->daddr, tcplen, IPPROTO_TCP,
oldskb->ip_summed == CHECKSUM_HW ? oldskb->csum :
skb_checksum(oldskb, iph->ihl * 4, tcplen, 0)))
return;
if ((rt = route_reverse(oldskb, oth, hook)) == NULL)
return;
......
......@@ -56,7 +56,6 @@
#include <linux/netfilter.h>
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_ULOG.h>
#include <linux/netfilter_ipv4/lockhelp.h>
#include <net/sock.h>
#include <linux/bitops.h>
......@@ -99,8 +98,8 @@ typedef struct {
static ulog_buff_t ulog_buffers[ULOG_MAXNLGROUPS]; /* array of buffers */
static struct sock *nflognl; /* our socket */
static DECLARE_LOCK(ulog_lock); /* spinlock */
static struct sock *nflognl; /* our socket */
static DEFINE_SPINLOCK(ulog_lock); /* spinlock */
/* send one ulog_buff_t to userspace */
static void ulog_send(unsigned int nlgroupnum)
......@@ -135,9 +134,9 @@ static void ulog_timer(unsigned long data)
/* lock to protect against somebody modifying our structure
* from ipt_ulog_target at the same time */
LOCK_BH(&ulog_lock);
spin_lock_bh(&ulog_lock);
ulog_send(data);
UNLOCK_BH(&ulog_lock);
spin_unlock_bh(&ulog_lock);
}
static struct sk_buff *ulog_alloc_skb(unsigned int size)
......@@ -193,7 +192,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
ub = &ulog_buffers[groupnum];
LOCK_BH(&ulog_lock);
spin_lock_bh(&ulog_lock);
if (!ub->skb) {
if (!(ub->skb = ulog_alloc_skb(size)))
......@@ -278,7 +277,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
ulog_send(groupnum);
}
UNLOCK_BH(&ulog_lock);
spin_unlock_bh(&ulog_lock);
return;
......@@ -288,7 +287,7 @@ static void ipt_ulog_packet(unsigned int hooknum,
alloc_failure:
PRINTR("ipt_ULOG: Error building netlink message\n");
UNLOCK_BH(&ulog_lock);
spin_unlock_bh(&ulog_lock);
}
static unsigned int ipt_ulog_target(struct sk_buff **pskb,
......
......@@ -37,7 +37,6 @@
#include <linux/netfilter_ipv4/ip_tables.h>
#include <linux/netfilter_ipv4/ipt_hashlimit.h>
#include <linux/netfilter_ipv4/lockhelp.h>
/* FIXME: this is just for IP_NF_ASSERRT */
#include <linux/netfilter_ipv4/ip_conntrack.h>
......@@ -92,7 +91,7 @@ struct ipt_hashlimit_htable {
struct hlist_head hash[0]; /* hashtable itself */
};
static DECLARE_LOCK(hashlimit_lock); /* protects htables list */
static DEFINE_SPINLOCK(hashlimit_lock); /* protects htables list */
static DECLARE_MUTEX(hlimit_mutex); /* additional checkentry protection */
static HLIST_HEAD(hashlimit_htables);
static kmem_cache_t *hashlimit_cachep;
......@@ -233,9 +232,9 @@ static int htable_create(struct ipt_hashlimit_info *minfo)
hinfo->timer.function = htable_gc;
add_timer(&hinfo->timer);
LOCK_BH(&hashlimit_lock);
spin_lock_bh(&hashlimit_lock);
hlist_add_head(&hinfo->node, &hashlimit_htables);
UNLOCK_BH(&hashlimit_lock);
spin_unlock_bh(&hashlimit_lock);
return 0;
}
......@@ -301,15 +300,15 @@ static struct ipt_hashlimit_htable *htable_find_get(char *name)
struct ipt_hashlimit_htable *hinfo;
struct hlist_node *pos;
LOCK_BH(&hashlimit_lock);
spin_lock_bh(&hashlimit_lock);
hlist_for_each_entry(hinfo, pos, &hashlimit_htables, node) {
if (!strcmp(name, hinfo->pde->name)) {
atomic_inc(&hinfo->use);
UNLOCK_BH(&hashlimit_lock);
spin_unlock_bh(&hashlimit_lock);
return hinfo;
}
}
UNLOCK_BH(&hashlimit_lock);
spin_unlock_bh(&hashlimit_lock);
return NULL;
}
......@@ -317,9 +316,9 @@ static struct ipt_hashlimit_htable *htable_find_get(char *name)
static void htable_put(struct ipt_hashlimit_htable *hinfo)
{
if (atomic_dec_and_test(&hinfo->use)) {
LOCK_BH(&hashlimit_lock);
spin_lock_bh(&hashlimit_lock);
hlist_del(&hinfo->node);
UNLOCK_BH(&hashlimit_lock);
spin_unlock_bh(&hashlimit_lock);
htable_destroy(hinfo);
}
}
......
......@@ -53,7 +53,7 @@ match(const struct sk_buff *skb,
return ret;
}
READ_LOCK(&ip_conntrack_lock);
read_lock_bh(&ip_conntrack_lock);
if (!ct->master->helper) {
DEBUGP("ipt_helper: master ct %p has no helper\n",
exp->expectant);
......@@ -69,7 +69,7 @@ match(const struct sk_buff *skb,
ret ^= !strncmp(ct->master->helper->name, info->name,
strlen(ct->master->helper->name));
out_unlock:
READ_UNLOCK(&ip_conntrack_lock);
read_unlock_bh(&ip_conntrack_lock);
return ret;
}
......
......@@ -695,7 +695,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
if (onlink == 0) {
ip6_del_rt(rt, NULL, NULL);
ip6_del_rt(rt, NULL, NULL, NULL);
rt = NULL;
} else if (!(rt->rt6i_flags & RTF_EXPIRES)) {
rt->rt6i_expires = expires;
......@@ -1340,7 +1340,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev,
if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT))
rtmsg.rtmsg_flags |= RTF_NONEXTHOP;
ip6_route_add(&rtmsg, NULL, NULL);
ip6_route_add(&rtmsg, NULL, NULL, NULL);
}
/* Create "default" multicast route to the interface */
......@@ -1357,7 +1357,7 @@ static void addrconf_add_mroute(struct net_device *dev)
rtmsg.rtmsg_ifindex = dev->ifindex;
rtmsg.rtmsg_flags = RTF_UP;
rtmsg.rtmsg_type = RTMSG_NEWROUTE;
ip6_route_add(&rtmsg, NULL, NULL);
ip6_route_add(&rtmsg, NULL, NULL, NULL);
}
static void sit_route_add(struct net_device *dev)
......@@ -1374,7 +1374,7 @@ static void sit_route_add(struct net_device *dev)
rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP;
rtmsg.rtmsg_ifindex = dev->ifindex;
ip6_route_add(&rtmsg, NULL, NULL);
ip6_route_add(&rtmsg, NULL, NULL, NULL);
}
static void addrconf_add_lroute(struct net_device *dev)
......@@ -1467,7 +1467,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len)
if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) {
if (rt->rt6i_flags&RTF_EXPIRES) {
if (valid_lft == 0) {
ip6_del_rt(rt, NULL, NULL);
ip6_del_rt(rt, NULL, NULL, NULL);
rt = NULL;
} else {
rt->rt6i_expires = rt_expires;
......@@ -3094,7 +3094,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
switch (event) {
case RTM_NEWADDR:
dst_hold(&ifp->rt->u.dst);
if (ip6_ins_rt(ifp->rt, NULL, NULL))
if (ip6_ins_rt(ifp->rt, NULL, NULL, NULL))
dst_release(&ifp->rt->u.dst);
if (ifp->idev->cnf.forwarding)
addrconf_join_anycast(ifp);
......@@ -3104,7 +3104,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
addrconf_leave_anycast(ifp);
addrconf_leave_solict(ifp->idev, &ifp->addr);
dst_hold(&ifp->rt->u.dst);
if (ip6_del_rt(ifp->rt, NULL, NULL))
if (ip6_del_rt(ifp->rt, NULL, NULL, NULL))
dst_free(&ifp->rt->u.dst);
else
dst_release(&ifp->rt->u.dst);
......
......@@ -337,7 +337,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr)
write_unlock_bh(&idev->lock);
dst_hold(&rt->u.dst);
if (ip6_ins_rt(rt, NULL, NULL))
if (ip6_ins_rt(rt, NULL, NULL, NULL))
dst_release(&rt->u.dst);
addrconf_join_solict(dev, &aca->aca_addr);
......@@ -380,7 +380,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, struct in6_addr *addr)
addrconf_leave_solict(idev, &aca->aca_addr);
dst_hold(&aca->aca_rt->u.dst);
if (ip6_del_rt(aca->aca_rt, NULL, NULL))
if (ip6_del_rt(aca->aca_rt, NULL, NULL, NULL))
dst_free(&aca->aca_rt->u.dst);
else
dst_release(&aca->aca_rt->u.dst);
......
......@@ -394,7 +394,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr,
*/
static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
struct nlmsghdr *nlh)
struct nlmsghdr *nlh, struct netlink_skb_parms *req)
{
struct rt6_info *iter = NULL;
struct rt6_info **ins;
......@@ -449,7 +449,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt,
*ins = rt;
rt->rt6i_node = fn;
atomic_inc(&rt->rt6i_ref);
inet6_rt_notify(RTM_NEWROUTE, rt, nlh);
inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req);
rt6_stats.fib_rt_entries++;
if ((fn->fn_flags & RTN_RTINFO) == 0) {
......@@ -479,7 +479,8 @@ void fib6_force_start_gc(void)
* with source addr info in sub-trees
*/
int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
int fib6_add(struct fib6_node *root, struct rt6_info *rt,
struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
{
struct fib6_node *fn;
int err = -ENOMEM;
......@@ -552,7 +553,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh,
}
#endif
err = fib6_add_rt2node(fn, rt, nlh);
err = fib6_add_rt2node(fn, rt, nlh, req);
if (err == 0) {
fib6_start_gc(rt);
......@@ -859,7 +860,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn)
}
static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
struct nlmsghdr *nlh, void *_rtattr)
struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
{
struct fib6_walker_t *w;
struct rt6_info *rt = *rtp;
......@@ -915,11 +916,11 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp,
if (atomic_read(&rt->rt6i_ref) != 1) BUG();
}
inet6_rt_notify(RTM_DELROUTE, rt, nlh);
inet6_rt_notify(RTM_DELROUTE, rt, nlh, req);
rt6_release(rt);
}
int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
{
struct fib6_node *fn = rt->rt6i_node;
struct rt6_info **rtp;
......@@ -944,7 +945,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) {
if (*rtp == rt) {
fib6_del_route(fn, rtp, nlh, _rtattr);
fib6_del_route(fn, rtp, nlh, _rtattr, req);
return 0;
}
}
......@@ -1073,7 +1074,7 @@ static int fib6_clean_node(struct fib6_walker_t *w)
res = c->func(rt, c->arg);
if (res < 0) {
w->leaf = rt;
res = fib6_del(rt, NULL, NULL);
res = fib6_del(rt, NULL, NULL, NULL);
if (res) {
#if RT6_DEBUG >= 2
printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res);
......
......@@ -484,9 +484,6 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
to->nf_bridge = from->nf_bridge;
nf_bridge_get(to->nf_bridge);
#endif
#ifdef CONFIG_NETFILTER_DEBUG
to->nf_debug = from->nf_debug;
#endif
#endif
}
......
......@@ -423,11 +423,12 @@ int ipv6_setsockopt(struct sock *sk, int level, int optname,
psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
retv = ipv6_sock_mc_join(sk, greqs.gsr_interface,
&psin6->sin6_addr);
if (retv)
/* prior join w/ different source is ok */
if (retv && retv != -EADDRINUSE)
break;
omode = MCAST_INCLUDE;
add = 1;
} else /*IP_DROP_SOURCE_MEMBERSHIP */ {
} else /* MCAST_LEAVE_SOURCE_GROUP */ {
omode = MCAST_INCLUDE;
add = 0;
}
......
......@@ -188,6 +188,16 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, struct in6_addr *addr)
if (!ipv6_addr_is_multicast(addr))
return -EINVAL;
read_lock_bh(&ipv6_sk_mc_lock);
for (mc_lst=np->ipv6_mc_list; mc_lst; mc_lst=mc_lst->next) {
if ((ifindex == 0 || mc_lst->ifindex == ifindex) &&
ipv6_addr_equal(&mc_lst->addr, addr)) {
read_unlock_bh(&ipv6_sk_mc_lock);
return -EADDRINUSE;
}
}
read_unlock_bh(&ipv6_sk_mc_lock);
mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL);
if (mc_lst == NULL)
......@@ -349,6 +359,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
struct ipv6_pinfo *inet6 = inet6_sk(sk);
struct ip6_sf_socklist *psl;
int i, j, rv;
int leavegroup = 0;
int err;
if (pgsr->gsr_group.ss_family != AF_INET6 ||
......@@ -368,6 +379,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
err = -EADDRNOTAVAIL;
read_lock_bh(&ipv6_sk_mc_lock);
for (pmc=inet6->ipv6_mc_list; pmc; pmc=pmc->next) {
if (pgsr->gsr_interface && pmc->ifindex != pgsr->gsr_interface)
continue;
......@@ -401,6 +413,12 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
if (rv) /* source not found */
goto done;
/* special case - (INCLUDE, empty) == LEAVE_GROUP */
if (psl->sl_count == 1 && omode == MCAST_INCLUDE) {
leavegroup = 1;
goto done;
}
/* update the interface filter */
ip6_mc_del_src(idev, group, omode, 1, source, 1);
......@@ -453,9 +471,12 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
/* update the interface list */
ip6_mc_add_src(idev, group, omode, 1, source, 1);
done:
read_unlock_bh(&ipv6_sk_mc_lock);
read_unlock_bh(&idev->lock);
in6_dev_put(idev);
dev_put(dev);
if (leavegroup)
return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group);
return err;
}
......@@ -1280,15 +1301,6 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
return NULL;
skb_reserve(skb, LL_RESERVED_SPACE(dev));
if (dev->hard_header) {
unsigned char ha[MAX_ADDR_LEN];
ndisc_mc_map(&mld2_all_mcr, ha, dev, 1);
if (dev->hard_header(skb, dev, ETH_P_IPV6,ha,NULL,size) < 0) {
kfree_skb(skb);
return NULL;
}
}
if (ipv6_get_lladdr(dev, &addr_buf)) {
/* <draft-ietf-magma-mld-source-05.txt>:
......@@ -1312,6 +1324,30 @@ static struct sk_buff *mld_newpack(struct net_device *dev, int size)
return skb;
}
static inline int mld_dev_queue_xmit2(struct sk_buff *skb)
{
struct net_device *dev = skb->dev;
if (dev->hard_header) {
unsigned char ha[MAX_ADDR_LEN];
int err;
ndisc_mc_map(&skb->nh.ipv6h->daddr, ha, dev, 1);
err = dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, skb->len);
if (err < 0) {
kfree_skb(skb);
return err;
}
}
return dev_queue_xmit(skb);
}
static inline int mld_dev_queue_xmit(struct sk_buff *skb)
{
return NF_HOOK(PF_INET6, NF_IP6_POST_ROUTING, skb, NULL, skb->dev,
mld_dev_queue_xmit2);
}
static void mld_sendpack(struct sk_buff *skb)
{
struct ipv6hdr *pip6 = skb->nh.ipv6h;
......@@ -1329,7 +1365,7 @@ static void mld_sendpack(struct sk_buff *skb)
pmr->csum = csum_ipv6_magic(&pip6->saddr, &pip6->daddr, mldlen,
IPPROTO_ICMPV6, csum_partial(skb->h.raw, mldlen, 0));
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
dev_queue_xmit);
mld_dev_queue_xmit);
if (!err) {
ICMP6_INC_STATS(idev,ICMP6_MIB_OUTMSGS);
IP6_INC_STATS(IPSTATS_MIB_OUTMCASTPKTS);
......@@ -1635,12 +1671,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
}
skb_reserve(skb, LL_RESERVED_SPACE(dev));
if (dev->hard_header) {
unsigned char ha[MAX_ADDR_LEN];
ndisc_mc_map(snd_addr, ha, dev, 1);
if (dev->hard_header(skb, dev, ETH_P_IPV6, ha, NULL, full_len) < 0)
goto out;
}
if (ipv6_get_lladdr(dev, &addr_buf)) {
/* <draft-ietf-magma-mld-source-05.txt>:
......@@ -1668,7 +1698,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
idev = in6_dev_get(skb->dev);
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dev,
dev_queue_xmit);
mld_dev_queue_xmit);
if (!err) {
if (type == ICMPV6_MGM_REDUCTION)
ICMP6_INC_STATS(idev, ICMP6_MIB_OUTGROUPMEMBREDUCTIONS);
......@@ -1682,10 +1712,6 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type)
if (likely(idev != NULL))
in6_dev_put(idev);
return;
out:
IP6_INC_STATS(IPSTATS_MIB_OUTDISCARDS);
kfree_skb(skb);
}
static int ip6_mc_del1_src(struct ifmcaddr6 *pmc, int sfmode,
......
......@@ -955,7 +955,7 @@ static void ndisc_recv_na(struct sk_buff *skb)
struct rt6_info *rt;
rt = rt6_get_dflt_router(saddr, dev);
if (rt)
ip6_del_rt(rt, NULL, NULL);
ip6_del_rt(rt, NULL, NULL, NULL);
}
out:
......@@ -1096,7 +1096,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
if (rt && lifetime == 0) {
neigh_clone(neigh);
ip6_del_rt(rt, NULL, NULL);
ip6_del_rt(rt, NULL, NULL, NULL);
rt = NULL;
}
......
......@@ -71,7 +71,6 @@ static DECLARE_MUTEX(ip6t_mutex);
/* Must have mutex */
#define ASSERT_READ_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
#define ASSERT_WRITE_LOCK(x) IP_NF_ASSERT(down_trylock(&ip6t_mutex) != 0)
#include <linux/netfilter_ipv4/lockhelp.h>
#include <linux/netfilter_ipv4/listhelp.h>
#if 0
......
......@@ -366,8 +366,6 @@ ip6t_log_packet(unsigned int hooknum,
const char *level_string,
const char *prefix)
{
struct ipv6hdr *ipv6h = skb->nh.ipv6h;
spin_lock_bh(&log_lock);
printk(level_string);
printk("%sIN=%s OUT=%s ",
......@@ -377,39 +375,25 @@ ip6t_log_packet(unsigned int hooknum,
if (in && !out) {
/* MAC logging for input chain only. */
printk("MAC=");
if (skb->dev && skb->dev->hard_header_len && skb->mac.raw != (void*)ipv6h) {
if (skb->dev->type != ARPHRD_SIT){
int i;
unsigned char *p = skb->mac.raw;
for (i = 0; i < skb->dev->hard_header_len; i++,p++)
printk("%02x%c", *p,
i==skb->dev->hard_header_len - 1
? ' ':':');
} else {
int i;
unsigned char *p = skb->mac.raw;
if ( p - (ETH_ALEN*2+2) > skb->head ){
p -= (ETH_ALEN+2);
for (i = 0; i < (ETH_ALEN); i++,p++)
printk("%02x%s", *p,
i == ETH_ALEN-1 ? "->" : ":");
p -= (ETH_ALEN*2);
for (i = 0; i < (ETH_ALEN); i++,p++)
printk("%02x%c", *p,
i == ETH_ALEN-1 ? ' ' : ':');
}
if ((skb->dev->addr_len == 4) &&
skb->dev->hard_header_len > 20){
printk("TUNNEL=");
p = skb->mac.raw + 12;
for (i = 0; i < 4; i++,p++)
printk("%3d%s", *p,
i == 3 ? "->" : ".");
for (i = 0; i < 4; i++,p++)
printk("%3d%c", *p,
i == 3 ? ' ' : '.');
}
if (skb->dev && skb->dev->hard_header_len &&
skb->mac.raw != skb->nh.raw) {
unsigned char *p = skb->mac.raw;
int i;
if (skb->dev->type == ARPHRD_SIT &&
(p -= ETH_HLEN) < skb->head)
p = NULL;
if (p != NULL)
for (i = 0; i < skb->dev->hard_header_len; i++)
printk("%02x", p[i]);
printk(" ");
if (skb->dev->type == ARPHRD_SIT) {
struct iphdr *iph = (struct iphdr *)skb->mac.raw;
printk("TUNNEL=%u.%u.%u.%u->%u.%u.%u.%u ",
NIPQUAD(iph->saddr),
NIPQUAD(iph->daddr));
}
} else
printk(" ");
......
......@@ -129,13 +129,15 @@ static struct nf_hook_ops ip6t_ops[] = {
.hook = ip6t_hook,
.pf = PF_INET6,
.hooknum = NF_IP6_PRE_ROUTING,
.priority = NF_IP6_PRI_FIRST
.priority = NF_IP6_PRI_FIRST,
.owner = THIS_MODULE,
},
{
.hook = ip6t_hook,
.pf = PF_INET6,
.hooknum = NF_IP6_LOCAL_OUT,
.priority = NF_IP6_PRI_FIRST
.priority = NF_IP6_PRI_FIRST,
.owner = THIS_MODULE,
},
};
......
......@@ -384,12 +384,13 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr,
be destroyed.
*/
int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh,
void *_rtattr, struct netlink_skb_parms *req)
{
int err;
write_lock_bh(&rt6_lock);
err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr);
err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req);
write_unlock_bh(&rt6_lock);
return err;
......@@ -400,7 +401,7 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
*/
static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
struct in6_addr *saddr)
struct in6_addr *saddr, struct netlink_skb_parms *req)
{
int err;
struct rt6_info *rt;
......@@ -432,7 +433,7 @@ static struct rt6_info *rt6_cow(struct rt6_info *ort, struct in6_addr *daddr,
dst_hold(&rt->u.dst);
err = ip6_ins_rt(rt, NULL, NULL);
err = ip6_ins_rt(rt, NULL, NULL, req);
if (err == 0)
return rt;
......@@ -491,7 +492,8 @@ void ip6_route_input(struct sk_buff *skb)
read_unlock_bh(&rt6_lock);
nrt = rt6_cow(rt, &skb->nh.ipv6h->daddr,
&skb->nh.ipv6h->saddr);
&skb->nh.ipv6h->saddr,
&NETLINK_CB(skb));
dst_release(&rt->u.dst);
rt = nrt;
......@@ -551,7 +553,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl)
dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock);
nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src);
nrt = rt6_cow(rt, &fl->fl6_dst, &fl->fl6_src, NULL);
dst_release(&rt->u.dst);
rt = nrt;
......@@ -598,7 +600,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst)
if (rt) {
if (rt->rt6i_flags & RTF_CACHE)
ip6_del_rt(rt, NULL, NULL);
ip6_del_rt(rt, NULL, NULL, NULL);
else
dst_release(dst);
}
......@@ -787,7 +789,8 @@ int ipv6_get_hoplimit(struct net_device *dev)
*
*/
int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh,
void *_rtattr, struct netlink_skb_parms *req)
{
int err;
struct rtmsg *r;
......@@ -974,7 +977,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst));
rt->u.dst.dev = dev;
rt->rt6i_idev = idev;
return ip6_ins_rt(rt, nlh, _rtattr);
return ip6_ins_rt(rt, nlh, _rtattr, req);
out:
if (dev)
......@@ -986,7 +989,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
return err;
}
int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
{
int err;
......@@ -994,7 +997,7 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
rt6_reset_dflt_pointer(NULL);
err = fib6_del(rt, nlh, _rtattr);
err = fib6_del(rt, nlh, _rtattr, req);
dst_release(&rt->u.dst);
write_unlock_bh(&rt6_lock);
......@@ -1002,7 +1005,7 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr)
return err;
}
static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr)
static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req)
{
struct fib6_node *fn;
struct rt6_info *rt;
......@@ -1029,7 +1032,7 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r
dst_hold(&rt->u.dst);
read_unlock_bh(&rt6_lock);
return ip6_del_rt(rt, nlh, _rtattr);
return ip6_del_rt(rt, nlh, _rtattr, req);
}
}
read_unlock_bh(&rt6_lock);
......@@ -1136,11 +1139,11 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr,
nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev);
nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst));
if (ip6_ins_rt(nrt, NULL, NULL))
if (ip6_ins_rt(nrt, NULL, NULL, NULL))
goto out;
if (rt->rt6i_flags&RTF_CACHE) {
ip6_del_rt(rt, NULL, NULL);
ip6_del_rt(rt, NULL, NULL, NULL);
return;
}
......@@ -1204,7 +1207,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
2. It is gatewayed route or NONEXTHOP route. Action: clone it.
*/
if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) {
nrt = rt6_cow(rt, daddr, saddr);
nrt = rt6_cow(rt, daddr, saddr, NULL);
if (!nrt->u.dst.error) {
nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
if (allfrag)
......@@ -1232,7 +1235,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr,
nrt->u.dst.metrics[RTAX_MTU-1] = pmtu;
if (allfrag)
nrt->u.dst.metrics[RTAX_FEATURES-1] |= RTAX_FEATURE_ALLFRAG;
ip6_ins_rt(nrt, NULL, NULL);
ip6_ins_rt(nrt, NULL, NULL, NULL);
}
out:
......@@ -1305,7 +1308,7 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr,
rtmsg.rtmsg_ifindex = dev->ifindex;
ip6_route_add(&rtmsg, NULL, NULL);
ip6_route_add(&rtmsg, NULL, NULL, NULL);
return rt6_get_dflt_router(gwaddr, dev);
}
......@@ -1323,7 +1326,7 @@ void rt6_purge_dflt_routers(void)
read_unlock_bh(&rt6_lock);
ip6_del_rt(rt, NULL, NULL);
ip6_del_rt(rt, NULL, NULL, NULL);
goto restart;
}
......@@ -1349,10 +1352,10 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg)
rtnl_lock();
switch (cmd) {
case SIOCADDRT:
err = ip6_route_add(&rtmsg, NULL, NULL);
err = ip6_route_add(&rtmsg, NULL, NULL, NULL);
break;
case SIOCDELRT:
err = ip6_route_del(&rtmsg, NULL, NULL);
err = ip6_route_del(&rtmsg, NULL, NULL, NULL);
break;
default:
err = -EINVAL;
......@@ -1546,7 +1549,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
return -EINVAL;
return ip6_route_del(&rtmsg, nlh, arg);
return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb));
}
int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
......@@ -1556,7 +1559,7 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg)
if (inet6_rtm_to_rtmsg(r, arg, &rtmsg))
return -EINVAL;
return ip6_route_add(&rtmsg, nlh, arg);
return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb));
}
struct rt6_rtnl_dump_arg
......@@ -1566,12 +1569,9 @@ struct rt6_rtnl_dump_arg
};
static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
struct in6_addr *dst,
struct in6_addr *src,
int iif,
int type, u32 pid, u32 seq,
struct nlmsghdr *in_nlh, int prefix,
unsigned int flags)
struct in6_addr *dst, struct in6_addr *src,
int iif, int type, u32 pid, u32 seq,
int prefix, unsigned int flags)
{
struct rtmsg *rtm;
struct nlmsghdr *nlh;
......@@ -1585,10 +1585,6 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt,
}
}
if (!pid && in_nlh) {
pid = in_nlh->nlmsg_pid;
}
nlh = NLMSG_NEW(skb, pid, seq, type, sizeof(*rtm), flags);
rtm = NLMSG_DATA(nlh);
rtm->rtm_family = AF_INET6;
......@@ -1675,7 +1671,7 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg)
return rt6_fill_node(arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE,
NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq,
NULL, prefix, NLM_F_MULTI);
prefix, NLM_F_MULTI);
}
static int fib6_dump_node(struct fib6_walker_t *w)
......@@ -1823,7 +1819,7 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
&fl.fl6_dst, &fl.fl6_src,
iif,
RTM_NEWROUTE, NETLINK_CB(in_skb).pid,
nlh->nlmsg_seq, nlh, 0, 0);
nlh->nlmsg_seq, 0, 0);
if (err < 0) {
err = -EMSGSIZE;
goto out_free;
......@@ -1839,17 +1835,25 @@ int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg)
goto out;
}
void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh)
void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh,
struct netlink_skb_parms *req)
{
struct sk_buff *skb;
int size = NLMSG_SPACE(sizeof(struct rtmsg)+256);
u32 pid = current->pid;
u32 seq = 0;
if (req)
pid = req->pid;
if (nlh)
seq = nlh->nlmsg_seq;
skb = alloc_skb(size, gfp_any());
if (!skb) {
netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, ENOBUFS);
return;
}
if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, 0, 0, nlh, 0, 0) < 0) {
if (rt6_fill_node(skb, rt, NULL, NULL, 0, event, pid, seq, 0, 0) < 0) {
kfree_skb(skb);
netlink_set_err(rtnl, 0, RTMGRP_IPV6_ROUTE, EINVAL);
return;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册