提交 21f130a2 编写于 作者: D David S. Miller

Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/net-2.6

......@@ -2211,6 +2211,15 @@ D: OV511 driver
S: (address available on request)
S: USA
N: Ian McDonald
E: iam4@cs.waikato.ac.nz
E: imcdnzl@gmail.com
W: http://wand.net.nz/~iam4
W: http://imcdnzl.blogspot.com
D: DCCP, CCID3
S: Hamilton
S: New Zealand
N: Patrick McHardy
E: kaber@trash.net
P: 1024D/12155E80 B128 7DE6 FF0A C2B2 48BE AB4C C9D4 964E 1215 5E80
......@@ -2246,19 +2255,12 @@ S: D-90453 Nuernberg
S: Germany
N: Arnaldo Carvalho de Melo
E: acme@conectiva.com.br
E: acme@kernel.org
E: acme@gnu.org
W: http://bazar2.conectiva.com.br/~acme
W: http://advogato.org/person/acme
E: acme@mandriva.com
E: acme@ghostprotocols.net
W: http://oops.ghostprotocols.net:81/blog/
P: 1024D/9224DF01 D5DF E3BB E3C8 BCBB F8AD 841A B6AB 4681 9224 DF01
D: wanrouter hacking
D: misc Makefile, Config.in, drivers and network stacks fixes
D: IPX & LLC network stacks maintainer
D: Cyclom 2X synchronous card driver
D: wl3501 PCMCIA wireless card driver
D: i18n for minicom, net-tools, util-linux, fetchmail, etc
S: Conectiva S.A.
D: IPX, LLC, DCCP, cyc2x, wl3501_cs, net/ hacks
S: Mandriva
S: R. Tocantins, 89 - Cristo Rei
S: 80050-430 - Curitiba - Paran
S: Brazil
......
......@@ -686,6 +686,13 @@ P: Guennadi Liakhovetski
M: g.liakhovetski@gmx.de
S: Maintained
DCCP PROTOCOL
P: Arnaldo Carvalho de Melo
M: acme@mandriva.com
L: dccp@vger.kernel.org
W: http://www.wlug.org.nz/DCCP
S: Maintained
DECnet NETWORK LAYER
P: Patrick Caulfield
M: patrick@tykepenguin.com
......@@ -2271,12 +2278,6 @@ M: R.E.Wolff@BitWizard.nl
L: linux-kernel@vger.kernel.org ?
S: Supported
SPX NETWORK LAYER
P: Jay Schulist
M: jschlst@samba.org
L: netdev@vger.kernel.org
S: Supported
SRM (Alpha) environment access
P: Jan-Benedict Glaw
M: jbglaw@lug-owl.de
......
......@@ -67,8 +67,8 @@
#define DRV_MODULE_NAME "tg3"
#define PFX DRV_MODULE_NAME ": "
#define DRV_MODULE_VERSION "3.39"
#define DRV_MODULE_RELDATE "September 5, 2005"
#define DRV_MODULE_VERSION "3.40"
#define DRV_MODULE_RELDATE "September 15, 2005"
#define TG3_DEF_MAC_MODE 0
#define TG3_DEF_RX_MODE 0
......@@ -3442,31 +3442,47 @@ static void tg3_tx_timeout(struct net_device *dev)
schedule_work(&tp->reset_task);
}
/* Test for DMA buffers crossing any 4GB boundaries: 4G, 8G, etc */
static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len)
{
u32 base = (u32) mapping & 0xffffffff;
return ((base > 0xffffdcc0) &&
(base + len + 8 < base));
}
static void tg3_set_txd(struct tg3 *, int, dma_addr_t, int, u32, u32);
static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb,
u32 guilty_entry, int guilty_len,
u32 last_plus_one, u32 *start, u32 mss)
u32 last_plus_one, u32 *start,
u32 base_flags, u32 mss)
{
struct sk_buff *new_skb = skb_copy(skb, GFP_ATOMIC);
dma_addr_t new_addr;
dma_addr_t new_addr = 0;
u32 entry = *start;
int i;
int i, ret = 0;
if (!new_skb) {
dev_kfree_skb(skb);
return -1;
ret = -1;
} else {
/* New SKB is guaranteed to be linear. */
entry = *start;
new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len,
PCI_DMA_TODEVICE);
/* Make sure new skb does not cross any 4G boundaries.
* Drop the packet if it does.
*/
if (tg3_4g_overflow_test(new_addr, new_skb->len)) {
ret = -1;
dev_kfree_skb(new_skb);
new_skb = NULL;
} else {
tg3_set_txd(tp, entry, new_addr, new_skb->len,
base_flags, 1 | (mss << 1));
*start = NEXT_TX(entry);
}
}
/* New SKB is guaranteed to be linear. */
entry = *start;
new_addr = pci_map_single(tp->pdev, new_skb->data, new_skb->len,
PCI_DMA_TODEVICE);
tg3_set_txd(tp, entry, new_addr, new_skb->len,
(skb->ip_summed == CHECKSUM_HW) ?
TXD_FLAG_TCPUDP_CSUM : 0, 1 | (mss << 1));
*start = NEXT_TX(entry);
/* Now clean up the sw ring entries. */
i = 0;
while (entry != last_plus_one) {
......@@ -3491,7 +3507,7 @@ static int tigon3_4gb_hwbug_workaround(struct tg3 *tp, struct sk_buff *skb,
dev_kfree_skb(skb);
return 0;
return ret;
}
static void tg3_set_txd(struct tg3 *tp, int entry,
......@@ -3517,19 +3533,10 @@ static void tg3_set_txd(struct tg3 *tp, int entry,
txd->vlan_tag = vlan_tag << TXD_VLAN_TAG_SHIFT;
}
static inline int tg3_4g_overflow_test(dma_addr_t mapping, int len)
{
u32 base = (u32) mapping & 0xffffffff;
return ((base > 0xffffdcc0) &&
(base + len + 8 < base));
}
static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
{
struct tg3 *tp = netdev_priv(dev);
dma_addr_t mapping;
unsigned int i;
u32 len, entry, base_flags, mss;
int would_hit_hwbug;
......@@ -3624,7 +3631,7 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
would_hit_hwbug = 0;
if (tg3_4g_overflow_test(mapping, len))
would_hit_hwbug = entry + 1;
would_hit_hwbug = 1;
tg3_set_txd(tp, entry, mapping, len, base_flags,
(skb_shinfo(skb)->nr_frags == 0) | (mss << 1));
......@@ -3648,12 +3655,8 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
tp->tx_buffers[entry].skb = NULL;
pci_unmap_addr_set(&tp->tx_buffers[entry], mapping, mapping);
if (tg3_4g_overflow_test(mapping, len)) {
/* Only one should match. */
if (would_hit_hwbug)
BUG();
would_hit_hwbug = entry + 1;
}
if (tg3_4g_overflow_test(mapping, len))
would_hit_hwbug = 1;
if (tp->tg3_flags2 & TG3_FLG2_HW_TSO)
tg3_set_txd(tp, entry, mapping, len,
......@@ -3669,34 +3672,15 @@ static int tg3_start_xmit(struct sk_buff *skb, struct net_device *dev)
if (would_hit_hwbug) {
u32 last_plus_one = entry;
u32 start;
unsigned int len = 0;
would_hit_hwbug -= 1;
entry = entry - 1 - skb_shinfo(skb)->nr_frags;
entry &= (TG3_TX_RING_SIZE - 1);
start = entry;
i = 0;
while (entry != last_plus_one) {
if (i == 0)
len = skb_headlen(skb);
else
len = skb_shinfo(skb)->frags[i-1].size;
if (entry == would_hit_hwbug)
break;
i++;
entry = NEXT_TX(entry);
}
start = entry - 1 - skb_shinfo(skb)->nr_frags;
start &= (TG3_TX_RING_SIZE - 1);
/* If the workaround fails due to memory/mapping
* failure, silently drop this packet.
*/
if (tigon3_4gb_hwbug_workaround(tp, skb,
entry, len,
last_plus_one,
&start, mss))
if (tigon3_4gb_hwbug_workaround(tp, skb, last_plus_one,
&start, base_flags, mss))
goto out_unlock;
entry = start;
......@@ -9271,6 +9255,8 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
static struct pci_device_id write_reorder_chipsets[] = {
{ PCI_DEVICE(PCI_VENDOR_ID_AMD,
PCI_DEVICE_ID_AMD_FE_GATE_700C) },
{ PCI_DEVICE(PCI_VENDOR_ID_AMD,
PCI_DEVICE_ID_AMD_K8_NB) },
{ },
};
u32 misc_ctrl_reg;
......@@ -9285,7 +9271,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
tp->tg3_flags2 |= TG3_FLG2_SUN_570X;
#endif
/* If we have an AMD 762 chipset, write
/* If we have an AMD 762 or K8 chipset, write
* reordering to the mailbox registers done by the host
* controller can cause major troubles. We read back from
* every mailbox register write to force the writes to be
......@@ -9532,7 +9518,7 @@ static int __devinit tg3_get_invariants(struct tg3 *tp)
tp->write32_rx_mbox = tg3_write_indirect_mbox;
iounmap(tp->regs);
tp->regs = 0;
tp->regs = NULL;
pci_read_config_word(tp->pdev, PCI_COMMAND, &pci_cmd);
pci_cmd &= ~PCI_COMMAND_MEMORY;
......@@ -10680,7 +10666,7 @@ static int __devinit tg3_init_one(struct pci_dev *pdev,
err_out_iounmap:
if (tp->regs) {
iounmap(tp->regs);
tp->regs = 0;
tp->regs = NULL;
}
err_out_free_dev:
......@@ -10705,7 +10691,7 @@ static void __devexit tg3_remove_one(struct pci_dev *pdev)
unregister_netdev(dev);
if (tp->regs) {
iounmap(tp->regs);
tp->regs = 0;
tp->regs = NULL;
}
free_netdev(dev);
pci_release_regions(pdev);
......
......@@ -4,16 +4,6 @@
#include <linux/types.h>
#include <asm/byteorder.h>
/* Structure describing an Internet (DCCP) socket address. */
struct sockaddr_dccp {
__u16 sdccp_family; /* Address family */
__u16 sdccp_port; /* Port number */
__u32 sdccp_addr; /* Internet address */
__u32 sdccp_service; /* Service */
/* Pad to size of `struct sockaddr': 16 bytes . */
__u32 sdccp_pad;
};
/**
* struct dccp_hdr - generic part of DCCP packet header
*
......@@ -188,6 +178,9 @@ enum {
/* DCCP socket options */
#define DCCP_SOCKOPT_PACKET_SIZE 1
#define DCCP_SOCKOPT_SERVICE 2
#define DCCP_SERVICE_LIST_MAX_LEN 32
#ifdef __KERNEL__
......@@ -382,6 +375,25 @@ enum dccp_role {
DCCP_ROLE_SERVER,
};
struct dccp_service_list {
__u32 dccpsl_nr;
__u32 dccpsl_list[0];
};
#define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1)
static inline int dccp_list_has_service(const struct dccp_service_list *sl,
const u32 service)
{
if (likely(sl != NULL)) {
u32 i = sl->dccpsl_nr;
while (i--)
if (sl->dccpsl_list[i] == service)
return 1;
}
return 0;
}
/**
* struct dccp_sock - DCCP socket state
*
......@@ -417,7 +429,8 @@ struct dccp_sock {
__u64 dccps_gss;
__u64 dccps_gsr;
__u64 dccps_gar;
unsigned long dccps_service;
__u32 dccps_service;
struct dccp_service_list *dccps_service_list;
struct timeval dccps_timestamp_time;
__u32 dccps_timestamp_echo;
__u32 dccps_packet_size;
......@@ -443,6 +456,11 @@ static inline struct dccp_sock *dccp_sk(const struct sock *sk)
return (struct dccp_sock *)sk;
}
static inline int dccp_service_not_initialized(const struct sock *sk)
{
return dccp_sk(sk)->dccps_service == DCCP_SERVICE_INVALID_VALUE;
}
static inline const char *dccp_role(const struct sock *sk)
{
switch (dccp_sk(sk)->dccps_role) {
......
......@@ -491,6 +491,7 @@
#define PCI_DEVICE_ID_AMI_MEGARAID2 0x9060
#define PCI_VENDOR_ID_AMD 0x1022
#define PCI_DEVICE_ID_AMD_K8_NB 0x1100
#define PCI_DEVICE_ID_AMD_LANCE 0x2000
#define PCI_DEVICE_ID_AMD_LANCE_HOME 0x2001
#define PCI_DEVICE_ID_AMD_SCSI 0x2020
......
......@@ -140,6 +140,7 @@ config BRIDGE_NETFILTER
If unsure, say N.
source "net/netfilter/Kconfig"
source "net/ipv4/netfilter/Kconfig"
source "net/ipv6/netfilter/Kconfig"
source "net/decnet/netfilter/Kconfig"
......@@ -206,8 +207,6 @@ config NET_PKTGEN
To compile this code as a module, choose M here: the
module will be called pktgen.
source "net/netfilter/Kconfig"
endmenu
endmenu
......
......@@ -258,13 +258,12 @@ extern int dccp_v4_send_reset(struct sock *sk,
extern void dccp_send_close(struct sock *sk, const int active);
struct dccp_skb_cb {
__u8 dccpd_type;
__u8 dccpd_reset_code;
__u8 dccpd_service;
__u8 dccpd_ccval;
__u8 dccpd_type:4;
__u8 dccpd_ccval:4;
__u8 dccpd_reset_code;
__u16 dccpd_opt_len;
__u64 dccpd_seq;
__u64 dccpd_ack_seq;
int dccpd_opt_len;
};
#define DCCP_SKB_CB(__skb) ((struct dccp_skb_cb *)&((__skb)->cb[0]))
......
......@@ -384,9 +384,9 @@ static int dccp_rcv_request_sent_state_process(struct sock *sk,
}
out_invalid_packet:
return 1; /* dccp_v4_do_rcv will send a reset, but...
FIXME: the reset code should be
DCCP_RESET_CODE_PACKET_ERROR */
/* dccp_v4_do_rcv will send a reset */
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_PACKET_ERROR;
return 1;
}
static int dccp_rcv_respond_partopen_state_process(struct sock *sk,
......@@ -433,6 +433,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
struct dccp_hdr *dh, unsigned len)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
const int old_state = sk->sk_state;
int queued = 0;
......@@ -473,7 +474,8 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (dh->dccph_type == DCCP_PKT_RESET)
goto discard;
/* Caller (dccp_v4_do_rcv) will send Reset(No Connection)*/
/* Caller (dccp_v4_do_rcv) will send Reset */
dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
return 1;
}
......@@ -487,8 +489,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
if (dccp_parse_options(sk, skb))
goto discard;
if (DCCP_SKB_CB(skb)->dccpd_ack_seq !=
DCCP_PKT_WITHOUT_ACK_SEQ)
if (dcb->dccpd_ack_seq != DCCP_PKT_WITHOUT_ACK_SEQ)
dccp_event_ack_recv(sk, skb);
ccid_hc_rx_packet_recv(dp->dccps_hc_rx_ccid, sk, skb);
......@@ -500,7 +501,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
*/
if (dp->dccps_options.dccpo_send_ack_vector) {
if (dccp_ackpkts_add(dp->dccps_hc_rx_ackpkts, sk,
DCCP_SKB_CB(skb)->dccpd_seq,
dcb->dccpd_seq,
DCCP_ACKPKTS_STATE_RECEIVED))
goto discard;
/*
......@@ -551,8 +552,7 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
dh->dccph_type == DCCP_PKT_REQUEST) ||
(sk->sk_state == DCCP_RESPOND &&
dh->dccph_type == DCCP_PKT_DATA)) {
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_PKT_SYNC);
dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNC);
goto discard;
} else if (dh->dccph_type == DCCP_PKT_CLOSEREQ) {
dccp_rcv_closereq(sk, skb);
......@@ -563,13 +563,13 @@ int dccp_rcv_state_process(struct sock *sk, struct sk_buff *skb,
}
if (unlikely(dh->dccph_type == DCCP_PKT_SYNC)) {
dccp_send_sync(sk, DCCP_SKB_CB(skb)->dccpd_seq,
DCCP_PKT_SYNCACK);
dccp_send_sync(sk, dcb->dccpd_seq, DCCP_PKT_SYNCACK);
goto discard;
}
switch (sk->sk_state) {
case DCCP_CLOSED:
dcb->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
return 1;
case DCCP_REQUESTING:
......
......@@ -246,6 +246,9 @@ static int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr,
dp->dccps_role = DCCP_ROLE_CLIENT;
if (dccp_service_not_initialized(sk))
return -EPROTO;
if (addr_len < sizeof(struct sockaddr_in))
return -EINVAL;
......@@ -661,6 +664,16 @@ static inline u64 dccp_v4_init_sequence(const struct sock *sk,
dccp_hdr(skb)->dccph_sport);
}
static inline int dccp_bad_service_code(const struct sock *sk,
const __u32 service)
{
const struct dccp_sock *dp = dccp_sk(sk);
if (dp->dccps_service == service)
return 0;
return !dccp_list_has_service(dp->dccps_service_list, service);
}
int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
{
struct inet_request_sock *ireq;
......@@ -669,13 +682,22 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
struct dccp_request_sock *dreq;
const __u32 saddr = skb->nh.iph->saddr;
const __u32 daddr = skb->nh.iph->daddr;
const __u32 service = dccp_hdr_request(skb)->dccph_req_service;
struct dccp_skb_cb *dcb = DCCP_SKB_CB(skb);
__u8 reset_code = DCCP_RESET_CODE_TOO_BUSY;
struct dst_entry *dst = NULL;
/* Never answer to DCCP_PKT_REQUESTs send to broadcast or multicast */
if (((struct rtable *)skb->dst)->rt_flags &
(RTCF_BROADCAST | RTCF_MULTICAST))
(RTCF_BROADCAST | RTCF_MULTICAST)) {
reset_code = DCCP_RESET_CODE_NO_CONNECTION;
goto drop;
}
if (dccp_bad_service_code(sk, service)) {
reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
goto drop;
}
/*
* TW buckets are converted to open requests without
* limitations, they conserve resources and peer is
......@@ -718,9 +740,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
* dccp_create_openreq_child.
*/
dreq = dccp_rsk(req);
dreq->dreq_isr = DCCP_SKB_CB(skb)->dccpd_seq;
dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
dreq->dreq_service = dccp_hdr_request(skb)->dccph_req_service;
dreq->dreq_isr = dcb->dccpd_seq;
dreq->dreq_iss = dccp_v4_init_sequence(sk, skb);
dreq->dreq_service = service;
if (dccp_v4_send_response(sk, req, dst))
goto drop_and_free;
......@@ -735,6 +757,7 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb)
__reqsk_free(req);
drop:
DCCP_INC_STATS_BH(DCCP_MIB_ATTEMPTFAILS);
dcb->dccpd_reset_code = reset_code;
return -1;
}
......@@ -1005,7 +1028,6 @@ int dccp_v4_do_rcv(struct sock *sk, struct sk_buff *skb)
return 0;
reset:
DCCP_SKB_CB(skb)->dccpd_reset_code = DCCP_RESET_CODE_NO_CONNECTION;
dccp_v4_ctl_send_reset(skb);
discard:
kfree_skb(skb);
......@@ -1280,6 +1302,7 @@ static int dccp_v4_init_sock(struct sock *sk)
sk->sk_write_space = dccp_write_space;
dp->dccps_mss_cache = 536;
dp->dccps_role = DCCP_ROLE_UNDEFINED;
dp->dccps_service = DCCP_SERVICE_INVALID_VALUE;
return 0;
}
......@@ -1301,6 +1324,11 @@ static int dccp_v4_destroy_sock(struct sock *sk)
if (inet_csk(sk)->icsk_bind_hash != NULL)
inet_put_port(&dccp_hashinfo, sk);
if (dp->dccps_service_list != NULL) {
kfree(dp->dccps_service_list);
dp->dccps_service_list = NULL;
}
ccid_hc_rx_exit(dp->dccps_hc_rx_ccid, sk);
ccid_hc_tx_exit(dp->dccps_hc_tx_ccid, sk);
dccp_ackpkts_free(dp->dccps_hc_rx_ackpkts);
......
......@@ -93,9 +93,11 @@ struct sock *dccp_create_openreq_child(struct sock *sk,
struct inet_connection_sock *newicsk = inet_csk(sk);
struct dccp_sock *newdp = dccp_sk(newsk);
newdp->dccps_role = DCCP_ROLE_SERVER;
newdp->dccps_hc_rx_ackpkts = NULL;
newdp->dccps_role = DCCP_ROLE_SERVER;
newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
newdp->dccps_service_list = NULL;
newdp->dccps_service = dreq->dreq_service;
newicsk->icsk_rto = DCCP_TIMEOUT_INIT;
do_gettimeofday(&newdp->dccps_epoch);
if (newdp->dccps_options.dccpo_send_ack_vector) {
......
......@@ -85,7 +85,7 @@ int dccp_transmit_skb(struct sock *sk, struct sk_buff *skb)
switch (dcb->dccpd_type) {
case DCCP_PKT_REQUEST:
dccp_hdr_request(skb)->dccph_req_service =
dcb->dccpd_service;
dp->dccps_service;
break;
case DCCP_PKT_RESET:
dccp_hdr_reset(skb)->dccph_reset_code =
......@@ -270,6 +270,7 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
struct request_sock *req)
{
struct dccp_hdr *dh;
struct dccp_request_sock *dreq;
const int dccp_header_size = sizeof(struct dccp_hdr) +
sizeof(struct dccp_hdr_ext) +
sizeof(struct dccp_hdr_response);
......@@ -285,8 +286,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
skb->dst = dst_clone(dst);
skb->csum = 0;
dreq = dccp_rsk(req);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_RESPONSE;
DCCP_SKB_CB(skb)->dccpd_seq = dccp_rsk(req)->dreq_iss;
DCCP_SKB_CB(skb)->dccpd_seq = dreq->dreq_iss;
dccp_insert_options(sk, skb);
skb->h.raw = skb_push(skb, dccp_header_size);
......@@ -300,8 +302,9 @@ struct sk_buff *dccp_make_response(struct sock *sk, struct dst_entry *dst,
DCCP_SKB_CB(skb)->dccpd_opt_len) / 4;
dh->dccph_type = DCCP_PKT_RESPONSE;
dh->dccph_x = 1;
dccp_hdr_set_seq(dh, dccp_rsk(req)->dreq_iss);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dccp_rsk(req)->dreq_isr);
dccp_hdr_set_seq(dh, dreq->dreq_iss);
dccp_hdr_set_ack(dccp_hdr_ack_bits(skb), dreq->dreq_isr);
dccp_hdr_response(skb)->dccph_resp_service = dreq->dreq_service;
dh->dccph_checksum = dccp_v4_checksum(skb, inet_rsk(req)->loc_addr,
inet_rsk(req)->rmt_addr);
......@@ -397,9 +400,6 @@ int dccp_connect(struct sock *sk)
skb_reserve(skb, MAX_DCCP_HEADER);
DCCP_SKB_CB(skb)->dccpd_type = DCCP_PKT_REQUEST;
/* FIXME: set service to something meaningful, coming
* from userspace*/
DCCP_SKB_CB(skb)->dccpd_service = 0;
skb->csum = 0;
skb_set_owner_w(skb, sk);
......
......@@ -94,7 +94,15 @@ EXPORT_SYMBOL_GPL(dccp_state_name);
static inline int dccp_listen_start(struct sock *sk)
{
dccp_sk(sk)->dccps_role = DCCP_ROLE_LISTEN;
struct dccp_sock *dp = dccp_sk(sk);
dp->dccps_role = DCCP_ROLE_LISTEN;
/*
* Apps need to use setsockopt(DCCP_SOCKOPT_SERVICE)
* before calling listen()
*/
if (dccp_service_not_initialized(sk))
return -EPROTO;
return inet_csk_listen_start(sk, TCP_SYNQ_HSIZE);
}
......@@ -202,6 +210,42 @@ int dccp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return -ENOIOCTLCMD;
}
static int dccp_setsockopt_service(struct sock *sk, const u32 service,
char __user *optval, int optlen)
{
struct dccp_sock *dp = dccp_sk(sk);
struct dccp_service_list *sl = NULL;
if (service == DCCP_SERVICE_INVALID_VALUE ||
optlen > DCCP_SERVICE_LIST_MAX_LEN * sizeof(u32))
return -EINVAL;
if (optlen > sizeof(service)) {
sl = kmalloc(optlen, GFP_KERNEL);
if (sl == NULL)
return -ENOMEM;
sl->dccpsl_nr = optlen / sizeof(u32) - 1;
if (copy_from_user(sl->dccpsl_list,
optval + sizeof(service),
optlen - sizeof(service)) ||
dccp_list_has_service(sl, DCCP_SERVICE_INVALID_VALUE)) {
kfree(sl);
return -EFAULT;
}
}
lock_sock(sk);
dp->dccps_service = service;
if (dp->dccps_service_list != NULL)
kfree(dp->dccps_service_list);
dp->dccps_service_list = sl;
release_sock(sk);
return 0;
}
int dccp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
......@@ -218,8 +262,10 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
if (get_user(val, (int __user *)optval))
return -EFAULT;
lock_sock(sk);
if (optname == DCCP_SOCKOPT_SERVICE)
return dccp_setsockopt_service(sk, val, optval, optlen);
lock_sock(sk);
dp = dccp_sk(sk);
err = 0;
......@@ -236,6 +282,37 @@ int dccp_setsockopt(struct sock *sk, int level, int optname,
return err;
}
static int dccp_getsockopt_service(struct sock *sk, int len,
u32 __user *optval,
int __user *optlen)
{
const struct dccp_sock *dp = dccp_sk(sk);
const struct dccp_service_list *sl;
int err = -ENOENT, slen = 0, total_len = sizeof(u32);
lock_sock(sk);
if (dccp_service_not_initialized(sk))
goto out;
if ((sl = dp->dccps_service_list) != NULL) {
slen = sl->dccpsl_nr * sizeof(u32);
total_len += slen;
}
err = -EINVAL;
if (total_len > len)
goto out;
err = 0;
if (put_user(total_len, optlen) ||
put_user(dp->dccps_service, optval) ||
(sl != NULL && copy_to_user(optval + 1, sl->dccpsl_list, slen)))
err = -EFAULT;
out:
release_sock(sk);
return err;
}
int dccp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
......@@ -248,6 +325,10 @@ int dccp_getsockopt(struct sock *sk, int level, int optname,
if (get_user(len, optlen))
return -EFAULT;
if (optname == DCCP_SOCKOPT_SERVICE)
return dccp_getsockopt_service(sk, len,
(u32 __user *)optval, optlen);
len = min_t(unsigned int, len, sizeof(int));
if (len < 0)
return -EINVAL;
......
......@@ -8,6 +8,7 @@ menu "IP: Netfilter Configuration"
# connection tracking, helpers and protocols
config IP_NF_CONNTRACK
tristate "Connection tracking (required for masq/NAT)"
select NETFILTER_NETLINK if IP_NF_CONNTRACK_NETLINK!=n
---help---
Connection tracking keeps a record of what packets have passed
through your machine, in order to figure out how they are related
......@@ -51,6 +52,15 @@ config IP_NF_CONNTRACK_EVENTS
IF unsure, say `N'.
config IP_NF_CONNTRACK_NETLINK
tristate 'Connection tracking netlink interface'
depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
default IP_NF_CONNTRACK if NETFILTER_NETLINK=y
default m if NETFILTER_NETLINK=m
help
This option enables support for a netlink-based userspace interface
config IP_NF_CT_PROTO_SCTP
tristate 'SCTP protocol connection tracking support (EXPERIMENTAL)'
depends on IP_NF_CONNTRACK && EXPERIMENTAL
......@@ -774,11 +784,5 @@ config IP_NF_ARP_MANGLE
Allows altering the ARP packet payload: source and destination
hardware and network addresses.
config IP_NF_CONNTRACK_NETLINK
tristate 'Connection tracking netlink interface'
depends on IP_NF_CONNTRACK && NETFILTER_NETLINK
help
This option enables support for a netlink-based userspace interface
endmenu
......@@ -1143,7 +1143,10 @@ void ip_ct_refresh_acct(struct ip_conntrack *ct,
if (del_timer(&ct->timeout)) {
ct->timeout.expires = jiffies + extra_jiffies;
add_timer(&ct->timeout);
ip_conntrack_event_cache(IPCT_REFRESH, skb);
/* FIXME: We loose some REFRESH events if this function
* is called without an skb. I'll fix this later -HW */
if (skb)
ip_conntrack_event_cache(IPCT_REFRESH, skb);
}
ct_add_counters(ct, ctinfo, skb);
write_unlock_bh(&ip_conntrack_lock);
......
......@@ -13,6 +13,7 @@
#include <linux/config.h>
#include <linux/proc_fs.h>
#include <linux/jhash.h>
#include <linux/bitops.h>
#include <linux/skbuff.h>
#include <linux/ip.h>
#include <linux/tcp.h>
......@@ -30,7 +31,7 @@
#include <linux/netfilter_ipv4/ipt_CLUSTERIP.h>
#include <linux/netfilter_ipv4/ip_conntrack.h>
#define CLUSTERIP_VERSION "0.7"
#define CLUSTERIP_VERSION "0.8"
#define DEBUG_CLUSTERIP
......@@ -49,13 +50,14 @@ MODULE_DESCRIPTION("iptables target for CLUSTERIP");
struct clusterip_config {
struct list_head list; /* list of all configs */
atomic_t refcount; /* reference count */
atomic_t entries; /* number of entries/rules
* referencing us */
u_int32_t clusterip; /* the IP address */
u_int8_t clustermac[ETH_ALEN]; /* the MAC address */
struct net_device *dev; /* device */
u_int16_t num_total_nodes; /* total number of nodes */
u_int16_t num_local_nodes; /* number of local nodes */
u_int16_t local_nodes[CLUSTERIP_MAX_NODES]; /* node number array */
unsigned long local_nodes; /* node number array */
#ifdef CONFIG_PROC_FS
struct proc_dir_entry *pde; /* proc dir entry */
......@@ -66,8 +68,7 @@ struct clusterip_config {
static LIST_HEAD(clusterip_configs);
/* clusterip_lock protects the clusterip_configs list _AND_ the configurable
* data within all structurses (num_local_nodes, local_nodes[]) */
/* clusterip_lock protects the clusterip_configs list */
static DEFINE_RWLOCK(clusterip_lock);
#ifdef CONFIG_PROC_FS
......@@ -76,23 +77,48 @@ static struct proc_dir_entry *clusterip_procdir;
#endif
static inline void
clusterip_config_get(struct clusterip_config *c) {
clusterip_config_get(struct clusterip_config *c)
{
atomic_inc(&c->refcount);
}
static inline void
clusterip_config_put(struct clusterip_config *c) {
if (atomic_dec_and_test(&c->refcount)) {
clusterip_config_put(struct clusterip_config *c)
{
if (atomic_dec_and_test(&c->refcount))
kfree(c);
}
/* increase the count of entries(rules) using/referencing this config */
static inline void
clusterip_config_entry_get(struct clusterip_config *c)
{
atomic_inc(&c->entries);
}
/* decrease the count of entries using/referencing this config. If last
* entry(rule) is removed, remove the config from lists, but don't free it
* yet, since proc-files could still be holding references */
static inline void
clusterip_config_entry_put(struct clusterip_config *c)
{
if (atomic_dec_and_test(&c->entries)) {
write_lock_bh(&clusterip_lock);
list_del(&c->list);
write_unlock_bh(&clusterip_lock);
dev_mc_delete(c->dev, c->clustermac, ETH_ALEN, 0);
dev_put(c->dev);
kfree(c);
/* In case anyone still accesses the file, the open/close
* functions are also incrementing the refcount on their own,
* so it's safe to remove the entry even if it's in use. */
#ifdef CONFIG_PROC_FS
remove_proc_entry(c->pde->name, c->pde->parent);
#endif
}
}
static struct clusterip_config *
__clusterip_config_find(u_int32_t clusterip)
{
......@@ -111,7 +137,7 @@ __clusterip_config_find(u_int32_t clusterip)
}
static inline struct clusterip_config *
clusterip_config_find_get(u_int32_t clusterip)
clusterip_config_find_get(u_int32_t clusterip, int entry)
{
struct clusterip_config *c;
......@@ -122,11 +148,24 @@ clusterip_config_find_get(u_int32_t clusterip)
return NULL;
}
atomic_inc(&c->refcount);
if (entry)
atomic_inc(&c->entries);
read_unlock_bh(&clusterip_lock);
return c;
}
static void
clusterip_config_init_nodelist(struct clusterip_config *c,
const struct ipt_clusterip_tgt_info *i)
{
int n;
for (n = 0; n < i->num_local_nodes; n++) {
set_bit(i->local_nodes[n] - 1, &c->local_nodes);
}
}
static struct clusterip_config *
clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
struct net_device *dev)
......@@ -143,11 +182,11 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
c->clusterip = ip;
memcpy(&c->clustermac, &i->clustermac, ETH_ALEN);
c->num_total_nodes = i->num_total_nodes;
c->num_local_nodes = i->num_local_nodes;
memcpy(&c->local_nodes, &i->local_nodes, sizeof(c->local_nodes));
clusterip_config_init_nodelist(c, i);
c->hash_mode = i->hash_mode;
c->hash_initval = i->hash_initval;
atomic_set(&c->refcount, 1);
atomic_set(&c->entries, 1);
#ifdef CONFIG_PROC_FS
/* create proc dir entry */
......@@ -171,53 +210,28 @@ clusterip_config_init(struct ipt_clusterip_tgt_info *i, u_int32_t ip,
static int
clusterip_add_node(struct clusterip_config *c, u_int16_t nodenum)
{
int i;
write_lock_bh(&clusterip_lock);
if (c->num_local_nodes >= CLUSTERIP_MAX_NODES
|| nodenum > CLUSTERIP_MAX_NODES) {
write_unlock_bh(&clusterip_lock);
if (nodenum == 0 ||
nodenum > c->num_total_nodes)
return 1;
}
/* check if we alrady have this number in our array */
for (i = 0; i < c->num_local_nodes; i++) {
if (c->local_nodes[i] == nodenum) {
write_unlock_bh(&clusterip_lock);
return 1;
}
}
c->local_nodes[c->num_local_nodes++] = nodenum;
/* check if we already have this number in our bitfield */
if (test_and_set_bit(nodenum - 1, &c->local_nodes))
return 1;
write_unlock_bh(&clusterip_lock);
return 0;
}
static int
clusterip_del_node(struct clusterip_config *c, u_int16_t nodenum)
{
int i;
write_lock_bh(&clusterip_lock);
if (c->num_local_nodes <= 1 || nodenum > CLUSTERIP_MAX_NODES) {
write_unlock_bh(&clusterip_lock);
if (nodenum == 0 ||
nodenum > c->num_total_nodes)
return 1;
}
for (i = 0; i < c->num_local_nodes; i++) {
if (c->local_nodes[i] == nodenum) {
int size = sizeof(u_int16_t)*(c->num_local_nodes-(i+1));
memmove(&c->local_nodes[i], &c->local_nodes[i+1], size);
c->num_local_nodes--;
write_unlock_bh(&clusterip_lock);
return 0;
}
}
if (test_and_clear_bit(nodenum - 1, &c->local_nodes))
return 0;
write_unlock_bh(&clusterip_lock);
return 1;
}
......@@ -285,25 +299,7 @@ clusterip_hashfn(struct sk_buff *skb, struct clusterip_config *config)
static inline int
clusterip_responsible(struct clusterip_config *config, u_int32_t hash)
{
int i;
read_lock_bh(&clusterip_lock);
if (config->num_local_nodes == 0) {
read_unlock_bh(&clusterip_lock);
return 0;
}
for (i = 0; i < config->num_local_nodes; i++) {
if (config->local_nodes[i] == hash) {
read_unlock_bh(&clusterip_lock);
return 1;
}
}
read_unlock_bh(&clusterip_lock);
return 0;
return test_bit(hash - 1, &config->local_nodes);
}
/***********************************************************************
......@@ -415,8 +411,26 @@ checkentry(const char *tablename,
/* FIXME: further sanity checks */
config = clusterip_config_find_get(e->ip.dst.s_addr);
if (!config) {
config = clusterip_config_find_get(e->ip.dst.s_addr, 1);
if (config) {
if (cipinfo->config != NULL) {
/* Case A: This is an entry that gets reloaded, since
* it still has a cipinfo->config pointer. Simply
* increase the entry refcount and return */
if (cipinfo->config != config) {
printk(KERN_ERR "CLUSTERIP: Reloaded entry "
"has invalid config pointer!\n");
return 0;
}
clusterip_config_entry_get(cipinfo->config);
} else {
/* Case B: This is a new rule referring to an existing
* clusterip config. */
cipinfo->config = config;
clusterip_config_entry_get(cipinfo->config);
}
} else {
/* Case C: This is a completely new clusterip config */
if (!(cipinfo->flags & CLUSTERIP_FLAG_NEW)) {
printk(KERN_WARNING "CLUSTERIP: no config found for %u.%u.%u.%u, need 'new'\n", NIPQUAD(e->ip.dst.s_addr));
return 0;
......@@ -443,10 +457,9 @@ checkentry(const char *tablename,
}
dev_mc_add(config->dev,config->clustermac, ETH_ALEN, 0);
}
cipinfo->config = config;
}
cipinfo->config = config;
return 1;
}
......@@ -455,13 +468,10 @@ static void destroy(void *matchinfo, unsigned int matchinfosize)
{
struct ipt_clusterip_tgt_info *cipinfo = matchinfo;
/* we first remove the proc entry and then drop the reference
* count. In case anyone still accesses the file, the open/close
* functions are also incrementing the refcount on their own */
#ifdef CONFIG_PROC_FS
remove_proc_entry(cipinfo->config->pde->name,
cipinfo->config->pde->parent);
#endif
/* if no more entries are referencing the config, remove it
* from the list and destroy the proc entry */
clusterip_config_entry_put(cipinfo->config);
clusterip_config_put(cipinfo->config);
}
......@@ -533,7 +543,7 @@ arp_mangle(unsigned int hook,
/* if there is no clusterip configuration for the arp reply's
* source ip, we don't want to mangle it */
c = clusterip_config_find_get(payload->src_ip);
c = clusterip_config_find_get(payload->src_ip, 0);
if (!c)
return NF_ACCEPT;
......@@ -574,56 +584,69 @@ static struct nf_hook_ops cip_arp_ops = {
#ifdef CONFIG_PROC_FS
struct clusterip_seq_position {
unsigned int pos; /* position */
unsigned int weight; /* number of bits set == size */
unsigned int bit; /* current bit */
unsigned long val; /* current value */
};
static void *clusterip_seq_start(struct seq_file *s, loff_t *pos)
{
struct proc_dir_entry *pde = s->private;
struct clusterip_config *c = pde->data;
unsigned int *nodeidx;
read_lock_bh(&clusterip_lock);
if (*pos >= c->num_local_nodes)
unsigned int weight;
u_int32_t local_nodes;
struct clusterip_seq_position *idx;
/* FIXME: possible race */
local_nodes = c->local_nodes;
weight = hweight32(local_nodes);
if (*pos >= weight)
return NULL;
nodeidx = kmalloc(sizeof(unsigned int), GFP_KERNEL);
if (!nodeidx)
idx = kmalloc(sizeof(struct clusterip_seq_position), GFP_KERNEL);
if (!idx)
return ERR_PTR(-ENOMEM);
*nodeidx = *pos;
return nodeidx;
idx->pos = *pos;
idx->weight = weight;
idx->bit = ffs(local_nodes);
idx->val = local_nodes;
clear_bit(idx->bit - 1, &idx->val);
return idx;
}
static void *clusterip_seq_next(struct seq_file *s, void *v, loff_t *pos)
{
struct proc_dir_entry *pde = s->private;
struct clusterip_config *c = pde->data;
unsigned int *nodeidx = (unsigned int *)v;
struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
*pos = ++(*nodeidx);
if (*pos >= c->num_local_nodes) {
*pos = ++idx->pos;
if (*pos >= idx->weight) {
kfree(v);
return NULL;
}
return nodeidx;
idx->bit = ffs(idx->val);
clear_bit(idx->bit - 1, &idx->val);
return idx;
}
static void clusterip_seq_stop(struct seq_file *s, void *v)
{
kfree(v);
read_unlock_bh(&clusterip_lock);
}
static int clusterip_seq_show(struct seq_file *s, void *v)
{
struct proc_dir_entry *pde = s->private;
struct clusterip_config *c = pde->data;
unsigned int *nodeidx = (unsigned int *)v;
struct clusterip_seq_position *idx = (struct clusterip_seq_position *)v;
if (*nodeidx != 0)
if (idx->pos != 0)
seq_putc(s, ',');
seq_printf(s, "%u", c->local_nodes[*nodeidx]);
if (*nodeidx == c->num_local_nodes-1)
seq_printf(s, "%u", idx->bit);
if (idx->pos == idx->weight - 1)
seq_putc(s, '\n');
return 0;
......
......@@ -1862,7 +1862,8 @@ asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned int flag
if (err < 0)
goto out_freeiov;
}
err = __put_user(msg_sys.msg_flags, COMPAT_FLAGS(msg));
err = __put_user((msg_sys.msg_flags & ~MSG_CMSG_COMPAT),
COMPAT_FLAGS(msg));
if (err)
goto out_freeiov;
if (MSG_CMSG_COMPAT & flags)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册