提交 d002ec48 编写于 作者: L Linus Torvalds

Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

* master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6:
  [XFRM]: BEET mode
  [TCP]: Kill warning in tcp_clean_rtx_queue().
  [NET_SCHED]: Remove old estimator implementation
  [ATM]: [zatm] always *pcr in alloc_shaper()
  [ATM]: [ambassador] Change the return type to reflect reality
  [ATM]: kmalloc to kzalloc patches for drivers/atm
  [TIPC]: fix printk warning
  [XFRM]: Clearing xfrm_policy_count[] to zero during flush is incorrect.
  [XFRM] STATE: Use destination address for src hash.
  [NEIGH]: always use hash_mask under tbl lock
  [UDP]: Fix MSG_PROBE crash
  [UDP6]: Fix flowi clobbering
  [NET_SCHED]: Revert "HTB: fix incorrect use of RB_EMPTY_NODE"
  [NETFILTER]: ebt_mark: add or/and/xor action support to mark target
  [NETFILTER]: ipt_REJECT: remove largely duplicate route_reverse function
  [NETFILTER]: Honour source routing for LVS-NAT
  [NETFILTER]: add type parameter to ip_route_me_harder
  [NETFILTER]: Kconfig: fix xt_physdev dependencies
......@@ -113,15 +113,13 @@ static int __init adummy_init(void)
printk(KERN_ERR "adummy: version %s\n", DRV_VERSION);
adummy_dev = (struct adummy_dev *) kmalloc(sizeof(struct adummy_dev),
adummy_dev = kzalloc(sizeof(struct adummy_dev),
GFP_KERNEL);
if (!adummy_dev) {
printk(KERN_ERR DEV_LABEL ": kmalloc() failed\n");
printk(KERN_ERR DEV_LABEL ": kzalloc() failed\n");
err = -ENOMEM;
goto out;
}
memset(adummy_dev, 0, sizeof(struct adummy_dev));
atm_dev = atm_dev_register(DEV_LABEL, &adummy_ops, -1, NULL);
if (!atm_dev) {
printk(KERN_ERR DEV_LABEL ": atm_dev_register() failed\n");
......
......@@ -915,7 +915,7 @@ static irqreturn_t interrupt_handler(int irq, void *dev_id,
/********** make rate (not quite as much fun as Horizon) **********/
static unsigned int make_rate (unsigned int rate, rounding r,
static int make_rate (unsigned int rate, rounding r,
u16 * bits, unsigned int * actual) {
unsigned char exp = -1; // hush gcc
unsigned int man = -1; // hush gcc
......
......@@ -1784,7 +1784,7 @@ static int __devinit fs_init (struct fs_dev *dev)
write_fs (dev, RAM, (1 << (28 - FS155_VPI_BITS - FS155_VCI_BITS)) - 1);
dev->nchannels = FS155_NR_CHANNELS;
}
dev->atm_vccs = kmalloc (dev->nchannels * sizeof (struct atm_vcc *),
dev->atm_vccs = kcalloc (dev->nchannels, sizeof (struct atm_vcc *),
GFP_KERNEL);
fs_dprintk (FS_DEBUG_ALLOC, "Alloc atmvccs: %p(%Zd)\n",
dev->atm_vccs, dev->nchannels * sizeof (struct atm_vcc *));
......@@ -1794,9 +1794,8 @@ static int __devinit fs_init (struct fs_dev *dev)
/* XXX Clean up..... */
return 1;
}
memset (dev->atm_vccs, 0, dev->nchannels * sizeof (struct atm_vcc *));
dev->tx_inuse = kmalloc (dev->nchannels / 8 /* bits/byte */ , GFP_KERNEL);
dev->tx_inuse = kzalloc (dev->nchannels / 8 /* bits/byte */ , GFP_KERNEL);
fs_dprintk (FS_DEBUG_ALLOC, "Alloc tx_inuse: %p(%d)\n",
dev->atm_vccs, dev->nchannels / 8);
......@@ -1805,8 +1804,6 @@ static int __devinit fs_init (struct fs_dev *dev)
/* XXX Clean up..... */
return 1;
}
memset (dev->tx_inuse, 0, dev->nchannels / 8);
/* -- RAS1 : FS155 and 50 differ. Default (0) should be OK for both */
/* -- RAS2 : FS50 only: Default is OK. */
......@@ -1893,14 +1890,11 @@ static int __devinit firestream_init_one (struct pci_dev *pci_dev,
if (pci_enable_device(pci_dev))
goto err_out;
fs_dev = kmalloc (sizeof (struct fs_dev), GFP_KERNEL);
fs_dev = kzalloc (sizeof (struct fs_dev), GFP_KERNEL);
fs_dprintk (FS_DEBUG_ALLOC, "Alloc fs-dev: %p(%Zd)\n",
fs_dev, sizeof (struct fs_dev));
if (!fs_dev)
goto err_out;
memset (fs_dev, 0, sizeof (struct fs_dev));
atm_dev = atm_dev_register("fs", &ops, -1, NULL);
if (!atm_dev)
goto err_out_free_fs_dev;
......
......@@ -383,14 +383,12 @@ he_init_one(struct pci_dev *pci_dev, const struct pci_device_id *pci_ent)
}
pci_set_drvdata(pci_dev, atm_dev);
he_dev = (struct he_dev *) kmalloc(sizeof(struct he_dev),
he_dev = kzalloc(sizeof(struct he_dev),
GFP_KERNEL);
if (!he_dev) {
err = -ENOMEM;
goto init_one_failure;
}
memset(he_dev, 0, sizeof(struct he_dev));
he_dev->pci_dev = pci_dev;
he_dev->atm_dev = atm_dev;
he_dev->atm_dev->dev_data = he_dev;
......
......@@ -2719,7 +2719,7 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_
goto out_disable;
}
dev = kmalloc(sizeof(hrz_dev), GFP_KERNEL);
dev = kzalloc(sizeof(hrz_dev), GFP_KERNEL);
if (!dev) {
// perhaps we should be nice: deregister all adapters and abort?
PRINTD(DBG_ERR, "out of memory");
......@@ -2727,8 +2727,6 @@ static int __devinit hrz_probe(struct pci_dev *pci_dev, const struct pci_device_
goto out_release;
}
memset(dev, 0, sizeof(hrz_dev));
pci_set_drvdata(pci_dev, dev);
// grab IRQ and install handler - move this someplace more sensible
......
......@@ -642,11 +642,9 @@ alloc_scq(struct idt77252_dev *card, int class)
{
struct scq_info *scq;
scq = (struct scq_info *) kmalloc(sizeof(struct scq_info), GFP_KERNEL);
scq = kzalloc(sizeof(struct scq_info), GFP_KERNEL);
if (!scq)
return NULL;
memset(scq, 0, sizeof(struct scq_info));
scq->base = pci_alloc_consistent(card->pcidev, SCQ_SIZE,
&scq->paddr);
if (scq->base == NULL) {
......@@ -2142,11 +2140,9 @@ idt77252_init_est(struct vc_map *vc, int pcr)
{
struct rate_estimator *est;
est = kmalloc(sizeof(struct rate_estimator), GFP_KERNEL);
est = kzalloc(sizeof(struct rate_estimator), GFP_KERNEL);
if (!est)
return NULL;
memset(est, 0, sizeof(*est));
est->maxcps = pcr < 0 ? -pcr : pcr;
est->cps = est->maxcps;
est->avcps = est->cps << 5;
......@@ -2451,14 +2447,12 @@ idt77252_open(struct atm_vcc *vcc)
index = VPCI2VC(card, vpi, vci);
if (!card->vcs[index]) {
card->vcs[index] = kmalloc(sizeof(struct vc_map), GFP_KERNEL);
card->vcs[index] = kzalloc(sizeof(struct vc_map), GFP_KERNEL);
if (!card->vcs[index]) {
printk("%s: can't alloc vc in open()\n", card->name);
up(&card->mutex);
return -ENOMEM;
}
memset(card->vcs[index], 0, sizeof(struct vc_map));
card->vcs[index]->card = card;
card->vcs[index]->index = index;
......@@ -2926,13 +2920,11 @@ open_card_oam(struct idt77252_dev *card)
for (vci = 3; vci < 5; vci++) {
index = VPCI2VC(card, vpi, vci);
vc = kmalloc(sizeof(struct vc_map), GFP_KERNEL);
vc = kzalloc(sizeof(struct vc_map), GFP_KERNEL);
if (!vc) {
printk("%s: can't alloc vc\n", card->name);
return -ENOMEM;
}
memset(vc, 0, sizeof(struct vc_map));
vc->index = index;
card->vcs[index] = vc;
......@@ -2995,12 +2987,11 @@ open_card_ubr0(struct idt77252_dev *card)
{
struct vc_map *vc;
vc = kmalloc(sizeof(struct vc_map), GFP_KERNEL);
vc = kzalloc(sizeof(struct vc_map), GFP_KERNEL);
if (!vc) {
printk("%s: can't alloc vc\n", card->name);
return -ENOMEM;
}
memset(vc, 0, sizeof(struct vc_map));
card->vcs[0] = vc;
vc->class = SCHED_UBR0;
......@@ -3695,14 +3686,12 @@ idt77252_init_one(struct pci_dev *pcidev, const struct pci_device_id *id)
goto err_out_disable_pdev;
}
card = kmalloc(sizeof(struct idt77252_dev), GFP_KERNEL);
card = kzalloc(sizeof(struct idt77252_dev), GFP_KERNEL);
if (!card) {
printk("idt77252-%d: can't allocate private data\n", index);
err = -ENOMEM;
goto err_out_disable_pdev;
}
memset(card, 0, sizeof(struct idt77252_dev));
card->revision = revision;
card->index = index;
card->pcidev = pcidev;
......
......@@ -1482,16 +1482,10 @@ static inline void vcc_table_deallocate(const struct lanai_dev *lanai)
static inline struct lanai_vcc *new_lanai_vcc(void)
{
struct lanai_vcc *lvcc;
lvcc = (struct lanai_vcc *) kmalloc(sizeof(*lvcc), GFP_KERNEL);
lvcc = kzalloc(sizeof(*lvcc), GFP_KERNEL);
if (likely(lvcc != NULL)) {
lvcc->vbase = NULL;
lvcc->rx.atmvcc = lvcc->tx.atmvcc = NULL;
lvcc->nref = 0;
memset(&lvcc->stats, 0, sizeof lvcc->stats);
lvcc->rx.buf.start = lvcc->tx.buf.start = NULL;
skb_queue_head_init(&lvcc->tx.backlog);
#ifdef DEBUG
lvcc->tx.unqueue = NULL;
lvcc->vci = -1;
#endif
}
......
......@@ -603,9 +603,8 @@ static int start_rx(struct atm_dev *dev)
DPRINTK("start_rx\n");
zatm_dev = ZATM_DEV(dev);
size = sizeof(struct atm_vcc *)*zatm_dev->chans;
zatm_dev->rx_map = (struct atm_vcc **) kmalloc(size,GFP_KERNEL);
zatm_dev->rx_map = kzalloc(size,GFP_KERNEL);
if (!zatm_dev->rx_map) return -ENOMEM;
memset(zatm_dev->rx_map,0,size);
/* set VPI/VCI split (use all VCIs and give what's left to VPIs) */
zpokel(zatm_dev,(1 << dev->ci_range.vci_bits)-1,uPD98401_VRR);
/* prepare free buffer pools */
......@@ -801,6 +800,7 @@ static int alloc_shaper(struct atm_dev *dev,int *pcr,int min,int max,int ubr)
i = m = 1;
zatm_dev->ubr_ref_cnt++;
zatm_dev->ubr = shaper;
*pcr = 0;
}
else {
if (min) {
......@@ -951,9 +951,8 @@ static int open_tx_first(struct atm_vcc *vcc)
skb_queue_head_init(&zatm_vcc->tx_queue);
init_waitqueue_head(&zatm_vcc->tx_wait);
/* initialize ring */
zatm_vcc->ring = kmalloc(RING_SIZE,GFP_KERNEL);
zatm_vcc->ring = kzalloc(RING_SIZE,GFP_KERNEL);
if (!zatm_vcc->ring) return -ENOMEM;
memset(zatm_vcc->ring,0,RING_SIZE);
loop = zatm_vcc->ring+RING_ENTRIES*RING_WORDS;
loop[0] = uPD98401_TXPD_V;
loop[1] = loop[2] = 0;
......
......@@ -40,6 +40,7 @@ enum {
IPPROTO_ESP = 50, /* Encapsulation Security Payload protocol */
IPPROTO_AH = 51, /* Authentication Header protocol */
IPPROTO_BEETPH = 94, /* IP option pseudo header for BEET */
IPPROTO_PIM = 103, /* Protocol Independent Multicast */
IPPROTO_COMP = 108, /* Compression Header protocol */
......
......@@ -80,6 +80,8 @@
#define IPOPT_TS_TSANDADDR 1 /* timestamps and addresses */
#define IPOPT_TS_PRESPEC 3 /* specified modules only */
#define IPV4_BEET_PHMAXLEN 8
struct iphdr {
#if defined(__LITTLE_ENDIAN_BITFIELD)
__u8 ihl:4,
......@@ -123,4 +125,11 @@ struct ip_comp_hdr {
__be16 cpi;
};
struct ip_beet_phdr {
__u8 nexthdr;
__u8 hdrlen;
__u8 padlen;
__u8 reserved;
};
#endif /* _LINUX_IP_H */
......@@ -12,7 +12,8 @@
enum {
IPSEC_MODE_ANY = 0, /* We do not support this for SA */
IPSEC_MODE_TRANSPORT = 1,
IPSEC_MODE_TUNNEL = 2
IPSEC_MODE_TUNNEL = 2,
IPSEC_MODE_BEET = 3
};
enum {
......
#ifndef __LINUX_BRIDGE_EBT_MARK_T_H
#define __LINUX_BRIDGE_EBT_MARK_T_H
/* The target member is reused for adding new actions, the
* value of the real target is -1 to -NUM_STANDARD_TARGETS.
* For backward compatibility, the 4 lsb (2 would be enough,
* but let's play it safe) are kept to designate this target.
* The remaining bits designate the action. By making the set
* action 0xfffffff0, the result will look ok for older
* versions. [September 2006] */
#define MARK_SET_VALUE (0xfffffff0)
#define MARK_OR_VALUE (0xffffffe0)
#define MARK_AND_VALUE (0xffffffd0)
#define MARK_XOR_VALUE (0xffffffc0)
struct ebt_mark_t_info
{
unsigned long mark;
......
......@@ -77,7 +77,7 @@ enum nf_ip_hook_priorities {
#define SO_ORIGINAL_DST 80
#ifdef __KERNEL__
extern int ip_route_me_harder(struct sk_buff **pskb);
extern int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type);
extern int ip_xfrm_me_harder(struct sk_buff **pskb);
extern unsigned int nf_ip_checksum(struct sk_buff *skb, unsigned int hook,
unsigned int dataoff, u_int8_t protocol);
......
......@@ -129,7 +129,8 @@ enum
#define XFRM_MODE_TUNNEL 1
#define XFRM_MODE_ROUTEOPTIMIZATION 2
#define XFRM_MODE_IN_TRIGGER 3
#define XFRM_MODE_MAX 4
#define XFRM_MODE_BEET 4
#define XFRM_MODE_MAX 5
/* Netlink configuration messages. */
enum {
......
......@@ -22,24 +22,37 @@ static int ebt_target_mark(struct sk_buff **pskb, unsigned int hooknr,
const void *data, unsigned int datalen)
{
struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
int action = info->target & -16;
if ((*pskb)->nfmark != info->mark)
if (action == MARK_SET_VALUE)
(*pskb)->nfmark = info->mark;
else if (action == MARK_OR_VALUE)
(*pskb)->nfmark |= info->mark;
else if (action == MARK_AND_VALUE)
(*pskb)->nfmark &= info->mark;
else
(*pskb)->nfmark ^= info->mark;
return info->target;
return info->target | -16;
}
static int ebt_target_mark_check(const char *tablename, unsigned int hookmask,
const struct ebt_entry *e, void *data, unsigned int datalen)
{
struct ebt_mark_t_info *info = (struct ebt_mark_t_info *)data;
int tmp;
if (datalen != EBT_ALIGN(sizeof(struct ebt_mark_t_info)))
return -EINVAL;
if (BASE_CHAIN && info->target == EBT_RETURN)
tmp = info->target | -16;
if (BASE_CHAIN && tmp == EBT_RETURN)
return -EINVAL;
CLEAR_BASE_CHAIN_BIT;
if (INVALID_TARGET)
if (tmp < -NUM_STANDARD_TARGETS || tmp >= 0)
return -EINVAL;
tmp = info->target & -16;
if (tmp != MARK_SET_VALUE && tmp != MARK_OR_VALUE &&
tmp != MARK_AND_VALUE && tmp != MARK_XOR_VALUE)
return -EINVAL;
return 0;
}
......
......@@ -344,12 +344,12 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey,
{
struct neighbour *n;
int key_len = tbl->key_len;
u32 hash_val = tbl->hash(pkey, dev) & tbl->hash_mask;
u32 hash_val = tbl->hash(pkey, dev);
NEIGH_CACHE_STAT_INC(tbl, lookups);
read_lock_bh(&tbl->lock);
for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) {
neigh_hold(n);
NEIGH_CACHE_STAT_INC(tbl, hits);
......@@ -364,12 +364,12 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, const void *pkey)
{
struct neighbour *n;
int key_len = tbl->key_len;
u32 hash_val = tbl->hash(pkey, NULL) & tbl->hash_mask;
u32 hash_val = tbl->hash(pkey, NULL);
NEIGH_CACHE_STAT_INC(tbl, lookups);
read_lock_bh(&tbl->lock);
for (n = tbl->hash_buckets[hash_val]; n; n = n->next) {
for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) {
if (!memcmp(n->primary_key, pkey, key_len)) {
neigh_hold(n);
NEIGH_CACHE_STAT_INC(tbl, hits);
......@@ -1998,12 +1998,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
int rc, h, s_h = cb->args[1];
int idx, s_idx = idx = cb->args[2];
read_lock_bh(&tbl->lock);
for (h = 0; h <= tbl->hash_mask; h++) {
if (h < s_h)
continue;
if (h > s_h)
s_idx = 0;
read_lock_bh(&tbl->lock);
for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next, idx++) {
if (idx < s_idx)
continue;
......@@ -2016,8 +2016,8 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb,
goto out;
}
}
read_unlock_bh(&tbl->lock);
}
read_unlock_bh(&tbl->lock);
rc = skb->len;
out:
cb->args[1] = h;
......
......@@ -434,6 +434,15 @@ config INET_XFRM_MODE_TUNNEL
If unsure, say Y.
config INET_XFRM_MODE_BEET
tristate "IP: IPsec BEET mode"
default y
select XFRM
---help---
Support for IPsec BEET mode.
If unsure, say Y.
config INET_DIAG
tristate "INET: socket monitoring interface"
default y
......
......@@ -23,6 +23,7 @@ obj-$(CONFIG_INET_AH) += ah4.o
obj-$(CONFIG_INET_ESP) += esp4.o
obj-$(CONFIG_INET_IPCOMP) += ipcomp.o
obj-$(CONFIG_INET_XFRM_TUNNEL) += xfrm4_tunnel.o
obj-$(CONFIG_INET_XFRM_MODE_BEET) += xfrm4_mode_beet.o
obj-$(CONFIG_INET_TUNNEL) += tunnel4.o
obj-$(CONFIG_INET_XFRM_MODE_TRANSPORT) += xfrm4_mode_transport.o
obj-$(CONFIG_INET_XFRM_MODE_TUNNEL) += xfrm4_mode_tunnel.o
......
......@@ -253,7 +253,8 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb)
* as per draft-ietf-ipsec-udp-encaps-06,
* section 3.1.2
*/
if (x->props.mode == XFRM_MODE_TRANSPORT)
if (x->props.mode == XFRM_MODE_TRANSPORT ||
x->props.mode == XFRM_MODE_BEET)
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
......@@ -271,17 +272,28 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu)
{
struct esp_data *esp = x->data;
u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4);
int enclen = 0;
if (x->props.mode == XFRM_MODE_TUNNEL) {
mtu = ALIGN(mtu + 2, blksize);
} else {
/* The worst case. */
switch (x->props.mode) {
case XFRM_MODE_TUNNEL:
mtu = ALIGN(mtu +2, blksize);
break;
default:
case XFRM_MODE_TRANSPORT:
/* The worst case */
mtu = ALIGN(mtu + 2, 4) + blksize - 4;
break;
case XFRM_MODE_BEET:
/* The worst case. */
enclen = IPV4_BEET_PHMAXLEN;
mtu = ALIGN(mtu + enclen + 2, blksize);
break;
}
if (esp->conf.padlen)
mtu = ALIGN(mtu, esp->conf.padlen);
return mtu + x->props.header_len + esp->auth.icv_trunc_len;
return mtu + x->props.header_len + esp->auth.icv_trunc_len - enclen;
}
static void esp4_err(struct sk_buff *skb, u32 info)
......
......@@ -206,6 +206,7 @@ static void ipcomp4_err(struct sk_buff *skb, u32 info)
static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
{
struct xfrm_state *t;
u8 mode = XFRM_MODE_TUNNEL;
t = xfrm_state_alloc();
if (t == NULL)
......@@ -216,7 +217,9 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x)
t->id.daddr.a4 = x->id.daddr.a4;
memcpy(&t->sel, &x->sel, sizeof(t->sel));
t->props.family = AF_INET;
t->props.mode = XFRM_MODE_TUNNEL;
if (x->props.mode == XFRM_MODE_BEET)
mode = x->props.mode;
t->props.mode = mode;
t->props.saddr.a4 = x->props.saddr.a4;
t->props.flags = x->props.flags;
......
......@@ -813,6 +813,16 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb,
skb->nh.iph->saddr = cp->vaddr;
ip_send_check(skb->nh.iph);
/* For policy routing, packets originating from this
* machine itself may be routed differently to packets
* passing through. We want this packet to be routed as
* if it came from this machine itself. So re-compute
* the routing information.
*/
if (ip_route_me_harder(pskb, RTN_LOCAL) != 0)
goto drop;
skb = *pskb;
IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT");
ip_vs_out_stats(cp, skb);
......
......@@ -8,7 +8,7 @@
#include <net/ip.h>
/* route_me_harder function, used by iptable_nat, iptable_mangle + ip_queue */
int ip_route_me_harder(struct sk_buff **pskb)
int ip_route_me_harder(struct sk_buff **pskb, unsigned addr_type)
{
struct iphdr *iph = (*pskb)->nh.iph;
struct rtable *rt;
......@@ -16,10 +16,13 @@ int ip_route_me_harder(struct sk_buff **pskb)
struct dst_entry *odst;
unsigned int hh_len;
if (addr_type == RTN_UNSPEC)
addr_type = inet_addr_type(iph->saddr);
/* some non-standard hacks like ipt_REJECT.c:send_reset() can cause
* packets with foreign saddr to appear on the NF_IP_LOCAL_OUT hook.
*/
if (inet_addr_type(iph->saddr) == RTN_LOCAL) {
if (addr_type == RTN_LOCAL) {
fl.nl_u.ip4_u.daddr = iph->daddr;
fl.nl_u.ip4_u.saddr = iph->saddr;
fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
......@@ -156,7 +159,7 @@ static int nf_ip_reroute(struct sk_buff **pskb, const struct nf_info *info)
if (!(iph->tos == rt_info->tos
&& iph->daddr == rt_info->daddr
&& iph->saddr == rt_info->saddr))
return ip_route_me_harder(pskb);
return ip_route_me_harder(pskb, RTN_UNSPEC);
}
return 0;
}
......
......@@ -265,7 +265,8 @@ ip_nat_local_fn(unsigned int hooknum,
ct->tuplehash[!dir].tuple.src.u.all
#endif
)
return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
if (ip_route_me_harder(pskb, RTN_UNSPEC))
ret = NF_DROP;
}
return ret;
}
......
......@@ -38,76 +38,16 @@ MODULE_DESCRIPTION("iptables REJECT target module");
#define DEBUGP(format, args...)
#endif
static inline struct rtable *route_reverse(struct sk_buff *skb,
struct tcphdr *tcph, int hook)
{
struct iphdr *iph = skb->nh.iph;
struct dst_entry *odst;
struct flowi fl = {};
struct rtable *rt;
/* We don't require ip forwarding to be enabled to be able to
* send a RST reply for bridged traffic. */
if (hook != NF_IP_FORWARD
#ifdef CONFIG_BRIDGE_NETFILTER
|| (skb->nf_bridge && skb->nf_bridge->mask & BRNF_BRIDGED)
#endif
) {
fl.nl_u.ip4_u.daddr = iph->saddr;
if (hook == NF_IP_LOCAL_IN)
fl.nl_u.ip4_u.saddr = iph->daddr;
fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
if (ip_route_output_key(&rt, &fl) != 0)
return NULL;
} else {
/* non-local src, find valid iif to satisfy
* rp-filter when calling ip_route_input. */
fl.nl_u.ip4_u.daddr = iph->daddr;
if (ip_route_output_key(&rt, &fl) != 0)
return NULL;
odst = skb->dst;
if (ip_route_input(skb, iph->saddr, iph->daddr,
RT_TOS(iph->tos), rt->u.dst.dev) != 0) {
dst_release(&rt->u.dst);
return NULL;
}
dst_release(&rt->u.dst);
rt = (struct rtable *)skb->dst;
skb->dst = odst;
fl.nl_u.ip4_u.daddr = iph->saddr;
fl.nl_u.ip4_u.saddr = iph->daddr;
fl.nl_u.ip4_u.tos = RT_TOS(iph->tos);
}
if (rt->u.dst.error) {
dst_release(&rt->u.dst);
return NULL;
}
fl.proto = IPPROTO_TCP;
fl.fl_ip_sport = tcph->dest;
fl.fl_ip_dport = tcph->source;
security_skb_classify_flow(skb, &fl);
xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0);
return rt;
}
/* Send RST reply */
static void send_reset(struct sk_buff *oldskb, int hook)
{
struct sk_buff *nskb;
struct iphdr *iph = oldskb->nh.iph;
struct tcphdr _otcph, *oth, *tcph;
struct rtable *rt;
__be16 tmp_port;
__be32 tmp_addr;
int needs_ack;
int hh_len;
unsigned int addr_type;
/* IP header checks: fragment. */
if (oldskb->nh.iph->frag_off & htons(IP_OFFSET))
......@@ -126,23 +66,13 @@ static void send_reset(struct sk_buff *oldskb, int hook)
if (nf_ip_checksum(oldskb, hook, iph->ihl * 4, IPPROTO_TCP))
return;
if ((rt = route_reverse(oldskb, oth, hook)) == NULL)
return;
hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
/* We need a linear, writeable skb. We also need to expand
headroom in case hh_len of incoming interface < hh_len of
outgoing interface */
nskb = skb_copy_expand(oldskb, hh_len, skb_tailroom(oldskb),
nskb = skb_copy_expand(oldskb, LL_MAX_HEADER, skb_tailroom(oldskb),
GFP_ATOMIC);
if (!nskb) {
dst_release(&rt->u.dst);
if (!nskb)
return;
}
dst_release(nskb->dst);
nskb->dst = &rt->u.dst;
/* This packet will not be the same as the other: clear nf fields */
nf_reset(nskb);
......@@ -184,6 +114,21 @@ static void send_reset(struct sk_buff *oldskb, int hook)
tcph->window = 0;
tcph->urg_ptr = 0;
/* Set DF, id = 0 */
nskb->nh.iph->frag_off = htons(IP_DF);
nskb->nh.iph->id = 0;
addr_type = RTN_UNSPEC;
if (hook != NF_IP_FORWARD
#ifdef CONFIG_BRIDGE_NETFILTER
|| (nskb->nf_bridge && nskb->nf_bridge->mask & BRNF_BRIDGED)
#endif
)
addr_type = RTN_LOCAL;
if (ip_route_me_harder(&nskb, addr_type))
goto free_nskb;
/* Adjust TCP checksum */
nskb->ip_summed = CHECKSUM_NONE;
tcph->check = 0;
......@@ -192,12 +137,8 @@ static void send_reset(struct sk_buff *oldskb, int hook)
nskb->nh.iph->daddr,
csum_partial((char *)tcph,
sizeof(struct tcphdr), 0));
/* Adjust IP TTL, DF */
/* Adjust IP TTL */
nskb->nh.iph->ttl = dst_metric(nskb->dst, RTAX_HOPLIMIT);
/* Set DF, id = 0 */
nskb->nh.iph->frag_off = htons(IP_DF);
nskb->nh.iph->id = 0;
/* Adjust IP checksum */
nskb->nh.iph->check = 0;
......
......@@ -157,7 +157,8 @@ ipt_local_hook(unsigned int hook,
|| (*pskb)->nfmark != nfmark
#endif
|| (*pskb)->nh.iph->tos != tos))
return ip_route_me_harder(pskb) == 0 ? ret : NF_DROP;
if (ip_route_me_harder(pskb, RTN_UNSPEC))
ret = NF_DROP;
return ret;
}
......
......@@ -2259,7 +2259,7 @@ static int tcp_clean_rtx_queue(struct sock *sk, __s32 *seq_rtt_p)
u32 pkts_acked = 0;
void (*rtt_sample)(struct sock *sk, u32 usrtt)
= icsk->icsk_ca_ops->rtt_sample;
struct timeval tv;
struct timeval tv = { .tv_sec = 0, .tv_usec = 0 };
while ((skb = skb_peek(&sk->sk_write_queue)) &&
skb != sk->sk_send_head) {
......
......@@ -675,6 +675,8 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
udp_flush_pending_frames(sk);
else if (!corkreq)
err = udp_push_pending_frames(sk, up);
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
up->pending = 0;
release_sock(sk);
out:
......
/*
* xfrm4_mode_beet.c - BEET mode encapsulation for IPv4.
*
* Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
* Miika Komu <miika@iki.fi>
* Herbert Xu <herbert@gondor.apana.org.au>
* Abhinav Pathak <abhinav.pathak@hiit.fi>
* Jeff Ahrenholz <ahrenholz@gmail.com>
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/stringify.h>
#include <net/dst.h>
#include <net/ip.h>
#include <net/xfrm.h>
/* Add encapsulation header.
*
* The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
* The following fields in it shall be filled in by x->type->output:
* tot_len
* check
*
* On exit, skb->h will be set to the start of the payload to be processed
* by x->type->output and skb->nh will be set to the top IP header.
*/
static int xfrm4_beet_output(struct xfrm_state *x, struct sk_buff *skb)
{
struct iphdr *iph, *top_iph = NULL;
int hdrlen, optlen;
iph = skb->nh.iph;
skb->h.ipiph = iph;
hdrlen = 0;
optlen = iph->ihl * 4 - sizeof(*iph);
if (unlikely(optlen))
hdrlen += IPV4_BEET_PHMAXLEN - (optlen & 4);
skb->nh.raw = skb_push(skb, x->props.header_len + hdrlen);
top_iph = skb->nh.iph;
hdrlen = iph->ihl * 4 - optlen;
skb->h.raw += hdrlen;
memmove(top_iph, iph, hdrlen);
if (unlikely(optlen)) {
struct ip_beet_phdr *ph;
BUG_ON(optlen < 0);
ph = (struct ip_beet_phdr *)skb->h.raw;
ph->padlen = 4 - (optlen & 4);
ph->hdrlen = (optlen + ph->padlen + sizeof(*ph)) / 8;
ph->nexthdr = top_iph->protocol;
top_iph->protocol = IPPROTO_BEETPH;
top_iph->ihl = sizeof(struct iphdr) / 4;
}
top_iph->saddr = x->props.saddr.a4;
top_iph->daddr = x->id.daddr.a4;
return 0;
}
static int xfrm4_beet_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct iphdr *iph = skb->nh.iph;
int phlen = 0;
int optlen = 0;
__u8 ph_nexthdr = 0, protocol = 0;
int err = -EINVAL;
protocol = iph->protocol;
if (unlikely(iph->protocol == IPPROTO_BEETPH)) {
struct ip_beet_phdr *ph = (struct ip_beet_phdr*)(iph + 1);
if (!pskb_may_pull(skb, sizeof(*ph)))
goto out;
phlen = ph->hdrlen * 8;
optlen = phlen - ph->padlen - sizeof(*ph);
if (optlen < 0 || optlen & 3 || optlen > 250)
goto out;
if (!pskb_may_pull(skb, phlen))
goto out;
ph_nexthdr = ph->nexthdr;
}
skb_push(skb, sizeof(*iph) - phlen + optlen);
memmove(skb->data, skb->nh.raw, sizeof(*iph));
skb->nh.raw = skb->data;
iph = skb->nh.iph;
iph->ihl = (sizeof(*iph) + optlen) / 4;
iph->tot_len = htons(skb->len);
iph->daddr = x->sel.daddr.a4;
iph->saddr = x->sel.saddr.a4;
if (ph_nexthdr)
iph->protocol = ph_nexthdr;
else
iph->protocol = protocol;
iph->check = 0;
iph->check = ip_fast_csum(skb->nh.raw, iph->ihl);
err = 0;
out:
return err;
}
static struct xfrm_mode xfrm4_beet_mode = {
.input = xfrm4_beet_input,
.output = xfrm4_beet_output,
.owner = THIS_MODULE,
.encap = XFRM_MODE_BEET,
};
static int __init xfrm4_beet_init(void)
{
return xfrm_register_mode(&xfrm4_beet_mode, AF_INET);
}
static void __exit xfrm4_beet_exit(void)
{
int err;
err = xfrm_unregister_mode(&xfrm4_beet_mode, AF_INET);
BUG_ON(err);
}
module_init(xfrm4_beet_init);
module_exit(xfrm4_beet_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_MODE(AF_INET, XFRM_MODE_BEET);
......@@ -136,6 +136,16 @@ config INET6_XFRM_MODE_TUNNEL
If unsure, say Y.
config INET6_XFRM_MODE_BEET
tristate "IPv6: IPsec BEET mode"
depends on IPV6
default IPV6
select XFRM
---help---
Support for IPsec BEET mode.
If unsure, say Y.
config INET6_XFRM_MODE_ROUTEOPTIMIZATION
tristate "IPv6: MIPv6 route optimization mode (EXPERIMENTAL)"
depends on IPV6 && EXPERIMENTAL
......
......@@ -26,6 +26,7 @@ obj-$(CONFIG_INET6_TUNNEL) += tunnel6.o
obj-$(CONFIG_INET6_XFRM_MODE_TRANSPORT) += xfrm6_mode_transport.o
obj-$(CONFIG_INET6_XFRM_MODE_TUNNEL) += xfrm6_mode_tunnel.o
obj-$(CONFIG_INET6_XFRM_MODE_ROUTEOPTIMIZATION) += xfrm6_mode_ro.o
obj-$(CONFIG_INET6_XFRM_MODE_BEET) += xfrm6_mode_beet.o
obj-$(CONFIG_NETFILTER) += netfilter/
obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o
......
......@@ -199,6 +199,7 @@ static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
{
struct xfrm_state *t = NULL;
u8 mode = XFRM_MODE_TUNNEL;
t = xfrm_state_alloc();
if (!t)
......@@ -212,7 +213,9 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x)
memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr));
memcpy(&t->sel, &x->sel, sizeof(t->sel));
t->props.family = AF_INET6;
t->props.mode = XFRM_MODE_TUNNEL;
if (x->props.mode == XFRM_MODE_BEET)
mode = x->props.mode;
t->props.mode = mode;
memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr));
if (xfrm_init_state(t))
......
......@@ -546,7 +546,7 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct in6_addr *daddr, *final_p = NULL, final;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct flowi *fl = &inet->cork.fl;
struct flowi fl;
struct dst_entry *dst;
int addr_len = msg->msg_namelen;
int ulen = len;
......@@ -626,19 +626,19 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
}
ulen += sizeof(struct udphdr);
memset(fl, 0, sizeof(*fl));
memset(&fl, 0, sizeof(fl));
if (sin6) {
if (sin6->sin6_port == 0)
return -EINVAL;
fl->fl_ip_dport = sin6->sin6_port;
fl.fl_ip_dport = sin6->sin6_port;
daddr = &sin6->sin6_addr;
if (np->sndflow) {
fl->fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
fl.fl6_flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
daddr = &flowlabel->dst;
......@@ -656,32 +656,32 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
fl->oif = sin6->sin6_scope_id;
fl.oif = sin6->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
fl->fl_ip_dport = inet->dport;
fl.fl_ip_dport = inet->dport;
daddr = &np->daddr;
fl->fl6_flowlabel = np->flow_label;
fl.fl6_flowlabel = np->flow_label;
connected = 1;
}
if (!fl->oif)
fl->oif = sk->sk_bound_dev_if;
if (!fl.oif)
fl.oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(*opt);
err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass);
err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
}
if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
}
......@@ -695,39 +695,39 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
fl->proto = IPPROTO_UDP;
ipv6_addr_copy(&fl->fl6_dst, daddr);
if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr))
ipv6_addr_copy(&fl->fl6_src, &np->saddr);
fl->fl_ip_sport = inet->sport;
fl.proto = IPPROTO_UDP;
ipv6_addr_copy(&fl.fl6_dst, daddr);
if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
ipv6_addr_copy(&fl.fl6_src, &np->saddr);
fl.fl_ip_sport = inet->sport;
/* merge ip6_build_xmit from ip6_output */
if (opt && opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
ipv6_addr_copy(&final, &fl->fl6_dst);
ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
ipv6_addr_copy(&final, &fl.fl6_dst);
ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
final_p = &final;
connected = 0;
}
if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) {
fl->oif = np->mcast_oif;
if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) {
fl.oif = np->mcast_oif;
connected = 0;
}
security_sk_classify_flow(sk, fl);
security_sk_classify_flow(sk, &fl);
err = ip6_sk_dst_lookup(sk, &dst, fl);
err = ip6_sk_dst_lookup(sk, &dst, &fl);
if (err)
goto out;
if (final_p)
ipv6_addr_copy(&fl->fl6_dst, final_p);
ipv6_addr_copy(&fl.fl6_dst, final_p);
if ((err = xfrm_lookup(&dst, fl, sk, 0)) < 0)
if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
goto out;
if (hlimit < 0) {
if (ipv6_addr_is_multicast(&fl->fl6_dst))
if (ipv6_addr_is_multicast(&fl.fl6_dst))
hlimit = np->mcast_hops;
else
hlimit = np->hop_limit;
......@@ -763,21 +763,23 @@ static int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk,
do_append_data:
up->len += ulen;
err = ip6_append_data(sk, ip_generic_getfrag, msg->msg_iov, ulen,
sizeof(struct udphdr), hlimit, tclass, opt, fl,
sizeof(struct udphdr), hlimit, tclass, opt, &fl,
(struct rt6_info*)dst,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
err = udp_v6_push_pending_frames(sk, up);
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
up->pending = 0;
if (dst) {
if (connected) {
ip6_dst_store(sk, dst,
ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ?
ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ?
&np->daddr : NULL,
#ifdef CONFIG_IPV6_SUBTREES
ipv6_addr_equal(&fl->fl6_src, &np->saddr) ?
ipv6_addr_equal(&fl.fl6_src, &np->saddr) ?
&np->saddr :
#endif
NULL);
......
/*
* xfrm6_mode_beet.c - BEET mode encapsulation for IPv6.
*
* Copyright (c) 2006 Diego Beltrami <diego.beltrami@gmail.com>
* Miika Komu <miika@iki.fi>
* Herbert Xu <herbert@gondor.apana.org.au>
* Abhinav Pathak <abhinav.pathak@hiit.fi>
* Jeff Ahrenholz <ahrenholz@gmail.com>
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/skbuff.h>
#include <linux/stringify.h>
#include <net/dsfield.h>
#include <net/dst.h>
#include <net/inet_ecn.h>
#include <net/ipv6.h>
#include <net/xfrm.h>
/* Add encapsulation header.
*
* The top IP header will be constructed per draft-nikander-esp-beet-mode-06.txt.
* The following fields in it shall be filled in by x->type->output:
* payload_len
*
* On exit, skb->h will be set to the start of the encapsulation header to be
* filled in by x->type->output and skb->nh will be set to the nextheader field
* of the extension header directly preceding the encapsulation header, or in
* its absence, that of the top IP header. The value of skb->data will always
* point to the top IP header.
*/
static int xfrm6_beet_output(struct xfrm_state *x, struct sk_buff *skb)
{
struct ipv6hdr *iph, *top_iph;
u8 *prevhdr;
int hdr_len;
skb_push(skb, x->props.header_len);
iph = skb->nh.ipv6h;
hdr_len = ip6_find_1stfragopt(skb, &prevhdr);
skb->nh.raw = prevhdr - x->props.header_len;
skb->h.raw = skb->data + hdr_len;
memmove(skb->data, iph, hdr_len);
skb->nh.raw = skb->data;
top_iph = skb->nh.ipv6h;
skb->nh.raw = &top_iph->nexthdr;
skb->h.ipv6h = top_iph + 1;
ipv6_addr_copy(&top_iph->saddr, (struct in6_addr *)&x->props.saddr);
ipv6_addr_copy(&top_iph->daddr, (struct in6_addr *)&x->id.daddr);
return 0;
}
static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb)
{
struct ipv6hdr *ip6h;
int size = sizeof(struct ipv6hdr);
int err = -EINVAL;
if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
goto out;
skb_push(skb, size);
memmove(skb->data, skb->nh.raw, size);
skb->nh.raw = skb->data;
skb->mac.raw = memmove(skb->data - skb->mac_len,
skb->mac.raw, skb->mac_len);
ip6h = skb->nh.ipv6h;
ip6h->payload_len = htons(skb->len - size);
ipv6_addr_copy(&ip6h->daddr, (struct in6_addr *) &x->sel.daddr.a6);
ipv6_addr_copy(&ip6h->saddr, (struct in6_addr *) &x->sel.saddr.a6);
err = 0;
out:
return err;
}
static struct xfrm_mode xfrm6_beet_mode = {
.input = xfrm6_beet_input,
.output = xfrm6_beet_output,
.owner = THIS_MODULE,
.encap = XFRM_MODE_BEET,
};
static int __init xfrm6_beet_init(void)
{
return xfrm_register_mode(&xfrm6_beet_mode, AF_INET6);
}
static void __exit xfrm6_beet_exit(void)
{
int err;
err = xfrm_unregister_mode(&xfrm6_beet_mode, AF_INET6);
BUG_ON(err);
}
module_init(xfrm6_beet_init);
module_exit(xfrm6_beet_exit);
MODULE_LICENSE("GPL");
MODULE_ALIAS_XFRM_MODE(AF_INET6, XFRM_MODE_BEET);
......@@ -365,7 +365,7 @@ config NETFILTER_XT_MATCH_MULTIPORT
config NETFILTER_XT_MATCH_PHYSDEV
tristate '"physdev" match support'
depends on NETFILTER_XTABLES && BRIDGE_NETFILTER
depends on NETFILTER_XTABLES && BRIDGE && BRIDGE_NETFILTER
help
Physdev packet matching matches against the physical bridge ports
the IP packet arrived on or will leave by.
......
/*
* net/sched/estimator.c Simple rate estimator.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*
* Authors: Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
*/
#include <asm/uaccess.h>
#include <asm/system.h>
#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/string.h>
#include <linux/mm.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/interrupt.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <net/sock.h>
#include <net/pkt_sched.h>
/*
This code is NOT intended to be used for statistics collection,
its purpose is to provide a base for statistical multiplexing
for controlled load service.
If you need only statistics, run a user level daemon which
periodically reads byte counters.
Unfortunately, rate estimation is not a very easy task.
F.e. I did not find a simple way to estimate the current peak rate
and even failed to formulate the problem 8)8)
So I preferred not to built an estimator into the scheduler,
but run this task separately.
Ideally, it should be kernel thread(s), but for now it runs
from timers, which puts apparent top bounds on the number of rated
flows, has minimal overhead on small, but is enough
to handle controlled load service, sets of aggregates.
We measure rate over A=(1<<interval) seconds and evaluate EWMA:
avrate = avrate*(1-W) + rate*W
where W is chosen as negative power of 2: W = 2^(-ewma_log)
The resulting time constant is:
T = A/(-ln(1-W))
NOTES.
* The stored value for avbps is scaled by 2^5, so that maximal
rate is ~1Gbit, avpps is scaled by 2^10.
* Minimal interval is HZ/4=250msec (it is the greatest common divisor
for HZ=100 and HZ=1024 8)), maximal interval
is (HZ*2^EST_MAX_INTERVAL)/4 = 8sec. Shorter intervals
are too expensive, longer ones can be implemented
at user level painlessly.
*/
#define EST_MAX_INTERVAL 5
struct qdisc_estimator
{
struct qdisc_estimator *next;
struct tc_stats *stats;
spinlock_t *stats_lock;
unsigned interval;
int ewma_log;
u64 last_bytes;
u32 last_packets;
u32 avpps;
u32 avbps;
};
struct qdisc_estimator_head
{
struct timer_list timer;
struct qdisc_estimator *list;
};
static struct qdisc_estimator_head elist[EST_MAX_INTERVAL+1];
/* Estimator array lock */
static DEFINE_RWLOCK(est_lock);
static void est_timer(unsigned long arg)
{
int idx = (int)arg;
struct qdisc_estimator *e;
read_lock(&est_lock);
for (e = elist[idx].list; e; e = e->next) {
struct tc_stats *st = e->stats;
u64 nbytes;
u32 npackets;
u32 rate;
spin_lock(e->stats_lock);
nbytes = st->bytes;
npackets = st->packets;
rate = (nbytes - e->last_bytes)<<(7 - idx);
e->last_bytes = nbytes;
e->avbps += ((long)rate - (long)e->avbps) >> e->ewma_log;
st->bps = (e->avbps+0xF)>>5;
rate = (npackets - e->last_packets)<<(12 - idx);
e->last_packets = npackets;
e->avpps += ((long)rate - (long)e->avpps) >> e->ewma_log;
e->stats->pps = (e->avpps+0x1FF)>>10;
spin_unlock(e->stats_lock);
}
mod_timer(&elist[idx].timer, jiffies + ((HZ<<idx)/4));
read_unlock(&est_lock);
}
int qdisc_new_estimator(struct tc_stats *stats, spinlock_t *stats_lock, struct rtattr *opt)
{
struct qdisc_estimator *est;
struct tc_estimator *parm = RTA_DATA(opt);
if (RTA_PAYLOAD(opt) < sizeof(*parm))
return -EINVAL;
if (parm->interval < -2 || parm->interval > 3)
return -EINVAL;
est = kzalloc(sizeof(*est), GFP_KERNEL);
if (est == NULL)
return -ENOBUFS;
est->interval = parm->interval + 2;
est->stats = stats;
est->stats_lock = stats_lock;
est->ewma_log = parm->ewma_log;
est->last_bytes = stats->bytes;
est->avbps = stats->bps<<5;
est->last_packets = stats->packets;
est->avpps = stats->pps<<10;
est->next = elist[est->interval].list;
if (est->next == NULL) {
init_timer(&elist[est->interval].timer);
elist[est->interval].timer.data = est->interval;
elist[est->interval].timer.expires = jiffies + ((HZ<<est->interval)/4);
elist[est->interval].timer.function = est_timer;
add_timer(&elist[est->interval].timer);
}
write_lock_bh(&est_lock);
elist[est->interval].list = est;
write_unlock_bh(&est_lock);
return 0;
}
void qdisc_kill_estimator(struct tc_stats *stats)
{
int idx;
struct qdisc_estimator *est, **pest;
for (idx=0; idx <= EST_MAX_INTERVAL; idx++) {
int killed = 0;
pest = &elist[idx].list;
while ((est=*pest) != NULL) {
if (est->stats != stats) {
pest = &est->next;
continue;
}
write_lock_bh(&est_lock);
*pest = est->next;
write_unlock_bh(&est_lock);
kfree(est);
killed++;
}
if (killed && elist[idx].list == NULL)
del_timer(&elist[idx].timer);
}
}
EXPORT_SYMBOL(qdisc_kill_estimator);
EXPORT_SYMBOL(qdisc_new_estimator);
......@@ -391,7 +391,7 @@ static inline void htb_add_class_to_row(struct htb_sched *q,
/* If this triggers, it is a bug in this code, but it need not be fatal */
static void htb_safe_rb_erase(struct rb_node *rb, struct rb_root *root)
{
if (!RB_EMPTY_NODE(rb)) {
if (RB_EMPTY_NODE(rb)) {
WARN_ON(1);
} else {
rb_erase(rb, root);
......
......@@ -1666,7 +1666,8 @@ static void link_retransmit_failure(struct link *l_ptr, struct sk_buff *buf)
char addr_string[16];
tipc_printf(TIPC_OUTPUT, "Msg seq number: %u, ", msg_seqno(msg));
tipc_printf(TIPC_OUTPUT, "Outstanding acks: %u\n", (u32)TIPC_SKB_CB(buf)->handle);
tipc_printf(TIPC_OUTPUT, "Outstanding acks: %lu\n",
(unsigned long) TIPC_SKB_CB(buf)->handle);
n_ptr = l_ptr->owner->next;
tipc_node_lock(n_ptr);
......
......@@ -41,17 +41,18 @@ static inline unsigned int __xfrm_dst_hash(xfrm_address_t *daddr, xfrm_address_t
return (h ^ (h >> 16)) & hmask;
}
static inline unsigned __xfrm_src_hash(xfrm_address_t *saddr,
static inline unsigned __xfrm_src_hash(xfrm_address_t *daddr,
xfrm_address_t *saddr,
unsigned short family,
unsigned int hmask)
{
unsigned int h = family;
switch (family) {
case AF_INET:
h ^= __xfrm4_addr_hash(saddr);
h ^= __xfrm4_daddr_saddr_hash(daddr, saddr);
break;
case AF_INET6:
h ^= __xfrm6_addr_hash(saddr);
h ^= __xfrm6_daddr_saddr_hash(daddr, saddr);
break;
};
return (h ^ (h >> 16)) & hmask;
......
......@@ -778,8 +778,9 @@ void xfrm_policy_flush(u8 type)
for (dir = 0; dir < XFRM_POLICY_MAX; dir++) {
struct xfrm_policy *pol;
struct hlist_node *entry;
int i;
int i, killed;
killed = 0;
again1:
hlist_for_each_entry(pol, entry,
&xfrm_policy_inexact[dir], bydst) {
......@@ -790,6 +791,7 @@ void xfrm_policy_flush(u8 type)
write_unlock_bh(&xfrm_policy_lock);
xfrm_policy_kill(pol);
killed++;
write_lock_bh(&xfrm_policy_lock);
goto again1;
......@@ -807,13 +809,14 @@ void xfrm_policy_flush(u8 type)
write_unlock_bh(&xfrm_policy_lock);
xfrm_policy_kill(pol);
killed++;
write_lock_bh(&xfrm_policy_lock);
goto again2;
}
}
xfrm_policy_count[dir] = 0;
xfrm_policy_count[dir] -= killed;
}
atomic_inc(&flow_cache_genid);
write_unlock_bh(&xfrm_policy_lock);
......
......@@ -63,10 +63,11 @@ static inline unsigned int xfrm_dst_hash(xfrm_address_t *daddr,
return __xfrm_dst_hash(daddr, saddr, reqid, family, xfrm_state_hmask);
}
static inline unsigned int xfrm_src_hash(xfrm_address_t *addr,
static inline unsigned int xfrm_src_hash(xfrm_address_t *daddr,
xfrm_address_t *saddr,
unsigned short family)
{
return __xfrm_src_hash(addr, family, xfrm_state_hmask);
return __xfrm_src_hash(daddr, saddr, family, xfrm_state_hmask);
}
static inline unsigned int
......@@ -92,7 +93,8 @@ static void xfrm_hash_transfer(struct hlist_head *list,
nhashmask);
hlist_add_head(&x->bydst, ndsttable+h);
h = __xfrm_src_hash(&x->props.saddr, x->props.family,
h = __xfrm_src_hash(&x->id.daddr, &x->props.saddr,
x->props.family,
nhashmask);
hlist_add_head(&x->bysrc, nsrctable+h);
......@@ -458,7 +460,7 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi,
static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family)
{
unsigned int h = xfrm_src_hash(saddr, family);
unsigned int h = xfrm_src_hash(daddr, saddr, family);
struct xfrm_state *x;
struct hlist_node *entry;
......@@ -587,7 +589,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr,
if (km_query(x, tmpl, pol) == 0) {
x->km.state = XFRM_STATE_ACQ;
hlist_add_head(&x->bydst, xfrm_state_bydst+h);
h = xfrm_src_hash(saddr, family);
h = xfrm_src_hash(daddr, saddr, family);
hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
if (x->id.spi) {
h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family);
......@@ -622,7 +624,7 @@ static void __xfrm_state_insert(struct xfrm_state *x)
x->props.reqid, x->props.family);
hlist_add_head(&x->bydst, xfrm_state_bydst+h);
h = xfrm_src_hash(&x->props.saddr, x->props.family);
h = xfrm_src_hash(&x->id.daddr, &x->props.saddr, x->props.family);
hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
if (x->id.spi) {
......@@ -748,7 +750,7 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re
x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ;
add_timer(&x->timer);
hlist_add_head(&x->bydst, xfrm_state_bydst+h);
h = xfrm_src_hash(saddr, family);
h = xfrm_src_hash(daddr, saddr, family);
hlist_add_head(&x->bysrc, xfrm_state_bysrc+h);
wake_up(&km_waitq);
}
......
......@@ -211,6 +211,7 @@ static int verify_newsa_info(struct xfrm_usersa_info *p,
case XFRM_MODE_TRANSPORT:
case XFRM_MODE_TUNNEL:
case XFRM_MODE_ROUTEOPTIMIZATION:
case XFRM_MODE_BEET:
break;
default:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册