提交 db8dac20 编写于 作者: D David S. Miller

[UDP]: Revert udplite and code split.

This reverts commit db1ed684 ("[IPV6]
UDP: Rename IPv6 UDP files."), commit
8be8af8f ("[IPV4] UDP: Move
IPv4-specific bits to other file.") and commit
e898d4db ("[UDP]: Allow users to
configure UDP-Lite.").

First, udplite is of such small cost, and it is a core protocol just
like TCP and normal UDP are.

We spent enormous amounts of effort to make udplite share as much code
with core UDP as possible.  All of that work is less valuable if we're
just going to slap a config option on udplite support.

It is also causing build failures, as reported on linux-next, showing
that the changeset was not tested very well.  In fact, this is the
second build failure resulting from the udplite change.

Finally, the config options provided was a bool, instead of a modular
option.  Meaning the udplite code does not even get build tested
by allmodconfig builds, and furthermore the user is not presented
with a reasonable modular build option which is particularly needed
by distribution vendors.
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 ba0fa459
......@@ -70,10 +70,8 @@ struct udp_sock {
#define UDPLITE_BIT 0x1 /* set by udplite proto init function */
#define UDPLITE_SEND_CC 0x2 /* set via udplite setsockopt */
#define UDPLITE_RECV_CC 0x4 /* set via udplite setsocktopt */
#ifdef CONFIG_IP_UDPLITE
__u8 pcflag; /* marks socket as UDP-Lite if > 0 */
__u8 unused[3];
#endif
/*
* For encapsulation sockets.
*/
......@@ -85,15 +83,7 @@ static inline struct udp_sock *udp_sk(const struct sock *sk)
return (struct udp_sock *)sk;
}
#ifdef CONFIG_IP_UDPLITE
#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag)
#define IS_PROTO_UDPLITE(__proto) ((__proto) == IPPROTO_UDPLITE)
#define IS_SOL_UDPFAMILY(level) ((level) == SOL_UDP || (level) == SOL_UDPLITE)
#else
#define IS_UDPLITE(__sk) 0
#define IS_PROTO_UDPLITE(__proto) 0
#define IS_SOL_UDPFAMILY(level) ((level) == SOL_UDP)
#endif
#endif
......
......@@ -599,13 +599,8 @@ extern int tcp6_proc_init(void);
extern void tcp6_proc_exit(void);
extern int udp6_proc_init(void);
extern void udp6_proc_exit(void);
#ifdef CONFIG_IP_UDPLITE
extern int udplite6_proc_init(void);
extern void udplite6_proc_exit(void);
#else
static inline int udplite6_proc_init(void) { return 0; }
static inline void udplite6_proc_exit(void) { }
#endif
extern int ipv6_misc_proc_init(void);
extern void ipv6_misc_proc_exit(void);
extern int snmp6_register_dev(struct inet6_dev *idev);
......
......@@ -27,13 +27,8 @@ extern int rawv6_init(void);
extern void rawv6_exit(void);
extern int udpv6_init(void);
extern void udpv6_exit(void);
#ifdef CONFIG_IP_UDPLITE
extern int udplitev6_init(void);
extern void udplitev6_exit(void);
#else
static inline int udplitev6_init(void) { return 0; }
static inline void udplitev6_exit(void) { }
#endif
extern int tcpv6_init(void);
extern void tcpv6_exit(void);
......
......@@ -25,9 +25,7 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset,
/* Designate sk as UDP-Lite socket */
static inline int udplite_sk_init(struct sock *sk)
{
#ifdef CONFIG_IP_UDPLITE
udp_sk(sk)->pcflag = UDPLITE_BIT;
#endif
return 0;
}
......@@ -71,7 +69,7 @@ static inline int udplite_checksum_init(struct sk_buff *skb, struct udphdr *uh)
static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh)
{
int cscov = up->len;
#ifdef CONFIG_IP_UDPLITE
/*
* Sender has set `partial coverage' option on UDP-Lite socket
*/
......@@ -95,15 +93,13 @@ static inline int udplite_sender_cscov(struct udp_sock *up, struct udphdr *uh)
* illegal, we fall back to the defaults here.
*/
}
#endif
return cscov;
}
static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
{
__wsum csum = 0;
#ifdef CONFIG_IP_UDPLITE
int cscov = udplite_sender_cscov(udp_sk(sk), udp_hdr(skb));
__wsum csum = 0;
skb->ip_summed = CHECKSUM_NONE; /* no HW support for checksumming */
......@@ -116,7 +112,6 @@ static inline __wsum udplite_csum_outgoing(struct sock *sk, struct sk_buff *skb)
if ((cscov -= len) <= 0)
break;
}
#endif
return csum;
}
......
......@@ -632,15 +632,5 @@ config TCP_MD5SIG
If unsure, say N.
config IP_UDPLITE
bool "IP: UDP-Lite Protocol (RFC 3828)"
default n
---help---
UDP-Lite (RFC 3828) is a UDP-like protocol with variable-length
checksum. Read <file:Documentation/networking/udplite.txt> for
details.
If unsure, say N.
source "net/ipv4/ipvs/Kconfig"
......@@ -8,7 +8,7 @@ obj-y := route.o inetpeer.o protocol.o \
inet_timewait_sock.o inet_connection_sock.o \
tcp.o tcp_input.o tcp_output.o tcp_timer.o tcp_ipv4.o \
tcp_minisocks.o tcp_cong.o \
datagram.o raw.o udp.o udp_ipv4.o \
datagram.o raw.o udp.o udplite.o \
arp.o icmp.o devinet.o af_inet.o igmp.o \
fib_frontend.o fib_semantics.o \
inet_fragment.o
......@@ -49,7 +49,6 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o
obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o
obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o
obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o
obj-$(CONFIG_IP_UDPLITE) += udplite_ipv4.o
obj-$(CONFIG_NETLABEL) += cipso_ipv4.o
obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \
......
......@@ -1317,18 +1317,15 @@ static int __init init_ipv4_mibs(void)
if (snmp_mib_init((void **)udp_statistics,
sizeof(struct udp_mib)) < 0)
goto err_udp_mib;
#ifdef CONFIG_IP_UDPLITE
if (snmp_mib_init((void **)udplite_statistics,
sizeof(struct udp_mib)) < 0)
goto err_udplite_mib;
#endif
tcp_mib_init();
return 0;
#ifdef CONFIG_IP_UDPLITE
err_udplite_mib:
#endif
snmp_mib_free((void **)udp_statistics);
err_udp_mib:
snmp_mib_free((void **)tcp_statistics);
......@@ -1426,10 +1423,8 @@ static int __init inet_init(void)
/* Setup UDP memory threshold */
udp_init();
#ifdef CONFIG_IP_UDPLITE
/* Add UDP-Lite (RFC 3828) */
udplite4_register();
#endif
/*
* Set the ICMP layer up
......
......@@ -59,9 +59,7 @@ static int sockstat_seq_show(struct seq_file *seq, void *v)
atomic_read(&tcp_memory_allocated));
seq_printf(seq, "UDP: inuse %d mem %d\n", sock_prot_inuse_get(&udp_prot),
atomic_read(&udp_memory_allocated));
#ifdef CONFIG_IP_UDPLITE
seq_printf(seq, "UDPLITE: inuse %d\n", sock_prot_inuse_get(&udplite_prot));
#endif
seq_printf(seq, "RAW: inuse %d\n", sock_prot_inuse_get(&raw_prot));
seq_printf(seq, "FRAG: inuse %d memory %d\n",
ip_frag_nqueues(&init_net), ip_frag_mem(&init_net));
......@@ -351,7 +349,6 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
snmp_fold_field((void **)udp_statistics,
snmp4_udp_list[i].entry));
#ifdef CONFIG_IP_UDPLITE
/* the UDP and UDP-Lite MIBs are the same */
seq_puts(seq, "\nUdpLite:");
for (i = 0; snmp4_udp_list[i].name != NULL; i++)
......@@ -362,7 +359,7 @@ static int snmp_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, " %lu",
snmp_fold_field((void **)udplite_statistics,
snmp4_udp_list[i].entry));
#endif
seq_putc(seq, '\n');
return 0;
}
......
......@@ -246,6 +246,553 @@ int udp_get_port(struct sock *sk, unsigned short snum,
return __udp_lib_get_port(sk, snum, udp_hash, scmp);
}
int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
return ( !ipv6_only_sock(sk2) &&
(!inet1->rcv_saddr || !inet2->rcv_saddr ||
inet1->rcv_saddr == inet2->rcv_saddr ));
}
static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
}
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport,
int dif, struct hlist_head udptable[])
{
struct sock *sk, *result = NULL;
struct hlist_node *node;
unsigned short hnum = ntohs(dport);
int badness = -1;
read_lock(&udp_hash_lock);
sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
struct inet_sock *inet = inet_sk(sk);
if (sk->sk_net == net && sk->sk_hash == hnum &&
!ipv6_only_sock(sk)) {
int score = (sk->sk_family == PF_INET ? 1 : 0);
if (inet->rcv_saddr) {
if (inet->rcv_saddr != daddr)
continue;
score+=2;
}
if (inet->daddr) {
if (inet->daddr != saddr)
continue;
score+=2;
}
if (inet->dport) {
if (inet->dport != sport)
continue;
score+=2;
}
if (sk->sk_bound_dev_if) {
if (sk->sk_bound_dev_if != dif)
continue;
score+=2;
}
if (score == 9) {
result = sk;
break;
} else if (score > badness) {
result = sk;
badness = score;
}
}
}
if (result)
sock_hold(result);
read_unlock(&udp_hash_lock);
return result;
}
static inline struct sock *udp_v4_mcast_next(struct sock *sk,
__be16 loc_port, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
int dif)
{
struct hlist_node *node;
struct sock *s = sk;
unsigned short hnum = ntohs(loc_port);
sk_for_each_from(s, node) {
struct inet_sock *inet = inet_sk(s);
if (s->sk_hash != hnum ||
(inet->daddr && inet->daddr != rmt_addr) ||
(inet->dport != rmt_port && inet->dport) ||
(inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
ipv6_only_sock(s) ||
(s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
continue;
if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
continue;
goto found;
}
s = NULL;
found:
return s;
}
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
* be closed and the error returned to the user. If err > 0
* it's just the icmp type << 8 | icmp code.
* Header points to the ip header of the error packet. We move
* on past this. Then (as it used to claim before adjustment)
* header points to the first 8 bytes of the udp header. We need
* to find the appropriate port.
*/
void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
{
struct inet_sock *inet;
struct iphdr *iph = (struct iphdr*)skb->data;
struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct sock *sk;
int harderr;
int err;
sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
iph->saddr, uh->source, skb->dev->ifindex, udptable);
if (sk == NULL) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return; /* No socket for error */
}
err = 0;
harderr = 0;
inet = inet_sk(sk);
switch (type) {
default:
case ICMP_TIME_EXCEEDED:
err = EHOSTUNREACH;
break;
case ICMP_SOURCE_QUENCH:
goto out;
case ICMP_PARAMETERPROB:
err = EPROTO;
harderr = 1;
break;
case ICMP_DEST_UNREACH:
if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
if (inet->pmtudisc != IP_PMTUDISC_DONT) {
err = EMSGSIZE;
harderr = 1;
break;
}
goto out;
}
err = EHOSTUNREACH;
if (code <= NR_ICMP_UNREACH) {
harderr = icmp_err_convert[code].fatal;
err = icmp_err_convert[code].errno;
}
break;
}
/*
* RFC1122: OK. Passes ICMP errors back to application, as per
* 4.1.3.3.
*/
if (!inet->recverr) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else {
ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
}
sk->sk_err = err;
sk->sk_error_report(sk);
out:
sock_put(sk);
}
void udp_err(struct sk_buff *skb, u32 info)
{
__udp4_lib_err(skb, info, udp_hash);
}
/*
* Throw away all pending data and cancel the corking. Socket is locked.
*/
static void udp_flush_pending_frames(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
if (up->pending) {
up->len = 0;
up->pending = 0;
ip_flush_pending_frames(sk);
}
}
/**
* udp4_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
* @skb: sk_buff containing the filled-in UDP header
* (checksum field must be zeroed out)
*/
static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, int len )
{
unsigned int offset;
struct udphdr *uh = udp_hdr(skb);
__wsum csum = 0;
if (skb_queue_len(&sk->sk_write_queue) == 1) {
/*
* Only one fragment on the socket.
*/
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
} else {
/*
* HW-checksum won't work as there are two or more
* fragments on the socket so that all csums of sk_buffs
* should be together
*/
offset = skb_transport_offset(skb);
skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
skb->ip_summed = CHECKSUM_NONE;
skb_queue_walk(&sk->sk_write_queue, skb) {
csum = csum_add(csum, skb->csum);
}
uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
}
}
/*
* Push out all pending data as one UDP datagram. Socket is locked.
*/
static int udp_push_pending_frames(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct flowi *fl = &inet->cork.fl;
struct sk_buff *skb;
struct udphdr *uh;
int err = 0;
int is_udplite = IS_UDPLITE(sk);
__wsum csum = 0;
/* Grab the skbuff where UDP header space exists. */
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
goto out;
/*
* Create a UDP header
*/
uh = udp_hdr(skb);
uh->source = fl->fl_ip_sport;
uh->dest = fl->fl_ip_dport;
uh->len = htons(up->len);
uh->check = 0;
if (is_udplite) /* UDP-Lite */
csum = udplite_csum_outgoing(sk, skb);
else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE;
goto send;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
goto send;
} else /* `normal' UDP */
csum = udp_csum_outgoing(sk, skb);
/* add protocol-dependent pseudo-header */
uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
sk->sk_protocol, csum );
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
send:
err = ip_push_pending_frames(sk);
out:
up->len = 0;
up->pending = 0;
if (!err)
UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
return err;
}
int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
{
struct inet_sock *inet = inet_sk(sk);
struct udp_sock *up = udp_sk(sk);
int ulen = len;
struct ipcm_cookie ipc;
struct rtable *rt = NULL;
int free = 0;
int connected = 0;
__be32 daddr, faddr, saddr;
__be16 dport;
u8 tos;
int err, is_udplite = IS_UDPLITE(sk);
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
if (len > 0xFFFF)
return -EMSGSIZE;
/*
* Check the flags.
*/
if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
return -EOPNOTSUPP;
ipc.opt = NULL;
if (up->pending) {
/*
* There are pending frames.
* The socket lock must be held while it's corked.
*/
lock_sock(sk);
if (likely(up->pending)) {
if (unlikely(up->pending != AF_INET)) {
release_sock(sk);
return -EINVAL;
}
goto do_append_data;
}
release_sock(sk);
}
ulen += sizeof(struct udphdr);
/*
* Get and verify the address.
*/
if (msg->msg_name) {
struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
if (msg->msg_namelen < sizeof(*usin))
return -EINVAL;
if (usin->sin_family != AF_INET) {
if (usin->sin_family != AF_UNSPEC)
return -EAFNOSUPPORT;
}
daddr = usin->sin_addr.s_addr;
dport = usin->sin_port;
if (dport == 0)
return -EINVAL;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
daddr = inet->daddr;
dport = inet->dport;
/* Open fast path for connected socket.
Route will not be used, if at least one option is set.
*/
connected = 1;
}
ipc.addr = inet->saddr;
ipc.oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
err = ip_cmsg_send(msg, &ipc);
if (err)
return err;
if (ipc.opt)
free = 1;
connected = 0;
}
if (!ipc.opt)
ipc.opt = inet->opt;
saddr = ipc.addr;
ipc.addr = faddr = daddr;
if (ipc.opt && ipc.opt->srr) {
if (!daddr)
return -EINVAL;
faddr = ipc.opt->faddr;
connected = 0;
}
tos = RT_TOS(inet->tos);
if (sock_flag(sk, SOCK_LOCALROUTE) ||
(msg->msg_flags & MSG_DONTROUTE) ||
(ipc.opt && ipc.opt->is_strictroute)) {
tos |= RTO_ONLINK;
connected = 0;
}
if (ipv4_is_multicast(daddr)) {
if (!ipc.oif)
ipc.oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
connected = 0;
}
if (connected)
rt = (struct rtable*)sk_dst_check(sk, 0);
if (rt == NULL) {
struct flowi fl = { .oif = ipc.oif,
.nl_u = { .ip4_u =
{ .daddr = faddr,
.saddr = saddr,
.tos = tos } },
.proto = sk->sk_protocol,
.uli_u = { .ports =
{ .sport = inet->sport,
.dport = dport } } };
security_sk_classify_flow(sk, &fl);
err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
if (err) {
if (err == -ENETUNREACH)
IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
goto out;
}
err = -EACCES;
if ((rt->rt_flags & RTCF_BROADCAST) &&
!sock_flag(sk, SOCK_BROADCAST))
goto out;
if (connected)
sk_dst_set(sk, dst_clone(&rt->u.dst));
}
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
back_from_confirm:
saddr = rt->rt_src;
if (!ipc.addr)
daddr = ipc.addr = rt->rt_dst;
lock_sock(sk);
if (unlikely(up->pending)) {
/* The socket is already corked while preparing it. */
/* ... which is an evident application bug. --ANK */
release_sock(sk);
LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
err = -EINVAL;
goto out;
}
/*
* Now cork the socket to pend data.
*/
inet->cork.fl.fl4_dst = daddr;
inet->cork.fl.fl_ip_dport = dport;
inet->cork.fl.fl4_src = saddr;
inet->cork.fl.fl_ip_sport = inet->sport;
up->pending = AF_INET;
do_append_data:
up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
sizeof(struct udphdr), &ipc, rt,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_flush_pending_frames(sk);
else if (!corkreq)
err = udp_push_pending_frames(sk);
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
up->pending = 0;
release_sock(sk);
out:
ip_rt_put(rt);
if (free)
kfree(ipc.opt);
if (!err)
return len;
/*
* ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
* ENOBUFS might not be good (it's not tunable per se), but otherwise
* we don't have a good statistic (IpOutDiscards but it can be too many
* things). We could add another new stat but at least for now that
* seems like overkill.
*/
if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
}
return err;
do_confirm:
dst_confirm(&rt->u.dst);
if (!(msg->msg_flags&MSG_PROBE) || len)
goto back_from_confirm;
err = 0;
goto out;
}
int udp_sendpage(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
struct udp_sock *up = udp_sk(sk);
int ret;
if (!up->pending) {
struct msghdr msg = { .msg_flags = flags|MSG_MORE };
/* Call udp_sendmsg to specify destination address which
* sendpage interface can't pass.
* This will succeed only when the socket is connected.
*/
ret = udp_sendmsg(NULL, sk, &msg, 0);
if (ret < 0)
return ret;
}
lock_sock(sk);
if (unlikely(!up->pending)) {
release_sock(sk);
LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
return -EINVAL;
}
ret = ip_append_page(sk, page, offset, size, flags);
if (ret == -EOPNOTSUPP) {
release_sock(sk);
return sock_no_sendpage(sk->sk_socket, page, offset,
size, flags);
}
if (ret < 0) {
udp_flush_pending_frames(sk);
goto out;
}
up->len += size;
if (!(up->corkflag || (flags&MSG_MORE)))
ret = udp_push_pending_frames(sk);
if (!ret)
ret = size;
out:
release_sock(sk);
return ret;
}
/*
* IOCTL requests applicable to the UDP protocol
*/
......@@ -286,6 +833,107 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg)
return 0;
}
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
*/
int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int noblock, int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
struct sk_buff *skb;
unsigned int ulen, copied;
int peeked;
int err;
int is_udplite = IS_UDPLITE(sk);
/*
* Check any passed addresses
*/
if (addr_len)
*addr_len=sizeof(*sin);
if (flags & MSG_ERRQUEUE)
return ip_recv_error(sk, msg, len);
try_again:
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, &err);
if (!skb)
goto out;
ulen = skb->len - sizeof(struct udphdr);
copied = len;
if (copied > ulen)
copied = ulen;
else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
/*
* If checksum is needed at all, try to do it while copying the
* data. If the data is truncated, or if we only want a partial
* coverage checksum (UDP-Lite), do it before the copy.
*/
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
if (udp_lib_checksum_complete(skb))
goto csum_copy_err;
}
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
msg->msg_iov, copied );
else {
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
if (err == -EINVAL)
goto csum_copy_err;
}
if (err)
goto out_free;
if (!peeked)
UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
sock_recv_timestamp(msg, sk, skb);
/* Copy the address. */
if (sin)
{
sin->sin_family = AF_INET;
sin->sin_port = udp_hdr(skb)->source;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
}
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
err = copied;
if (flags & MSG_TRUNC)
err = ulen;
out_free:
lock_sock(sk);
skb_free_datagram(sk, skb);
release_sock(sk);
out:
return err;
csum_copy_err:
lock_sock(sk);
if (!skb_kill_datagram(sk, skb, flags))
UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
release_sock(sk);
if (noblock)
return -EAGAIN;
goto try_again;
}
int udp_disconnect(struct sock *sk, int flags)
{
struct inet_sock *inet = inet_sk(sk);
......@@ -308,6 +956,319 @@ int udp_disconnect(struct sock *sk, int flags)
return 0;
}
/* returns:
* -1: error
* 0: success
* >0: "udp encap" protocol resubmission
*
* Note that in the success and error cases, the skb is assumed to
* have either been requeued or freed.
*/
int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
int rc;
int is_udplite = IS_UDPLITE(sk);
/*
* Charge it to the socket, dropping if the queue is full.
*/
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto drop;
nf_reset(skb);
if (up->encap_type) {
/*
* This is an encapsulation socket so pass the skb to
* the socket's udp_encap_rcv() hook. Otherwise, just
* fall through and pass this up the UDP socket.
* up->encap_rcv() returns the following value:
* =0 if skb was successfully passed to the encap
* handler or was discarded by it.
* >0 if skb should be passed on to UDP.
* <0 if skb should be resubmitted as proto -N
*/
/* if we're overly short, let UDP handle it */
if (skb->len > sizeof(struct udphdr) &&
up->encap_rcv != NULL) {
int ret;
ret = (*up->encap_rcv)(sk, skb);
if (ret <= 0) {
UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
is_udplite);
return -ret;
}
}
/* FALLTHROUGH -- it's a UDP Packet */
}
/*
* UDP-Lite specific tests, ignored on UDP sockets
*/
if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
/*
* MIB statistics other than incrementing the error count are
* disabled for the following two types of errors: these depend
* on the application settings, not on the functioning of the
* protocol stack as such.
*
* RFC 3828 here recommends (sec 3.3): "There should also be a
* way ... to ... at least let the receiving application block
* delivery of packets with coverage values less than a value
* provided by the application."
*/
if (up->pcrlen == 0) { /* full coverage was set */
LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
"%d while full coverage %d requested\n",
UDP_SKB_CB(skb)->cscov, skb->len);
goto drop;
}
/* The next case involves violating the min. coverage requested
* by the receiver. This is subtle: if receiver wants x and x is
* greater than the buffersize/MTU then receiver will complain
* that it wants x while sender emits packets of smaller size y.
* Therefore the above ...()->partial_cov statement is essential.
*/
if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
LIMIT_NETDEBUG(KERN_WARNING
"UDPLITE: coverage %d too small, need min %d\n",
UDP_SKB_CB(skb)->cscov, up->pcrlen);
goto drop;
}
}
if (sk->sk_filter) {
if (udp_lib_checksum_complete(skb))
goto drop;
}
if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
}
return 0;
drop:
UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return -1;
}
/*
* Multicasts and broadcasts go to each listener.
*
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
*/
static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
struct udphdr *uh,
__be32 saddr, __be32 daddr,
struct hlist_head udptable[])
{
struct sock *sk;
int dif;
read_lock(&udp_hash_lock);
sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
dif = skb->dev->ifindex;
sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
struct sock *sknext = NULL;
do {
struct sk_buff *skb1 = skb;
sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
uh->source, saddr, dif);
if (sknext)
skb1 = skb_clone(skb, GFP_ATOMIC);
if (skb1) {
int ret = 0;
bh_lock_sock_nested(sk);
if (!sock_owned_by_user(sk))
ret = udp_queue_rcv_skb(sk, skb1);
else
sk_add_backlog(sk, skb1);
bh_unlock_sock(sk);
if (ret > 0)
/* we should probably re-process instead
* of dropping packets here. */
kfree_skb(skb1);
}
sk = sknext;
} while (sknext);
} else
kfree_skb(skb);
read_unlock(&udp_hash_lock);
return 0;
}
/* Initialize UDP checksum. If exited with zero value (success),
* CHECKSUM_UNNECESSARY means, that no more checks are required.
* Otherwise, csum completion requires chacksumming packet body,
* including udp header and folding it to skb->csum.
*/
static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
int proto)
{
const struct iphdr *iph;
int err;
UDP_SKB_CB(skb)->partial_cov = 0;
UDP_SKB_CB(skb)->cscov = skb->len;
if (proto == IPPROTO_UDPLITE) {
err = udplite_checksum_init(skb, uh);
if (err)
return err;
}
iph = ip_hdr(skb);
if (uh->check == 0) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
proto, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
if (!skb_csum_unnecessary(skb))
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
skb->len, proto, 0);
/* Probably, we should checksum udp header (it should be in cache
* in any case) and data in tiny packets (< rx copybreak).
*/
return 0;
}
/*
* All we need to do is get the socket, and then do a checksum.
*/
int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
int proto)
{
struct sock *sk;
struct udphdr *uh = udp_hdr(skb);
unsigned short ulen;
struct rtable *rt = (struct rtable*)skb->dst;
__be32 saddr = ip_hdr(skb)->saddr;
__be32 daddr = ip_hdr(skb)->daddr;
/*
* Validate the packet.
*/
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto drop; /* No space for header. */
ulen = ntohs(uh->len);
if (ulen > skb->len)
goto short_packet;
if (proto == IPPROTO_UDP) {
/* UDP validates ulen. */
if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
goto short_packet;
uh = udp_hdr(skb);
}
if (udp4_csum_init(skb, uh, proto))
goto csum_error;
if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
uh->dest, inet_iif(skb), udptable);
if (sk != NULL) {
int ret = 0;
bh_lock_sock_nested(sk);
if (!sock_owned_by_user(sk))
ret = udp_queue_rcv_skb(sk, skb);
else
sk_add_backlog(sk, skb);
bh_unlock_sock(sk);
sock_put(sk);
/* a return value > 0 means to resubmit the input, but
* it wants the return to be -protocol, or 0
*/
if (ret > 0)
return -ret;
return 0;
}
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
nf_reset(skb);
/* No socket. Drop packet silently, if checksum is wrong */
if (udp_lib_checksum_complete(skb))
goto csum_error;
UDP_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
/*
* Hmm. We got an UDP packet to a port to which we
* don't wanna listen. Ignore it.
*/
kfree_skb(skb);
return 0;
short_packet:
LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
proto == IPPROTO_UDPLITE ? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
ulen,
skb->len,
NIPQUAD(daddr),
ntohs(uh->dest));
goto drop;
csum_error:
/*
* RFC1122: OK. Discards the bad packet silently (as far as
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
proto == IPPROTO_UDPLITE ? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
NIPQUAD(daddr),
ntohs(uh->dest),
ulen);
drop:
UDP_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
return 0;
}
int udp_rcv(struct sk_buff *skb)
{
return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
}
int udp_destroy_sock(struct sock *sk)
{
lock_sock(sk);
udp_flush_pending_frames(sk);
release_sock(sk);
return 0;
}
/*
* Socket option code for UDP
*/
......@@ -318,9 +1279,7 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
struct udp_sock *up = udp_sk(sk);
int val;
int err = 0;
#ifdef CONFIG_IP_UDPLITE
int is_udplite = IS_UDPLITE(sk);
#endif
if (optlen<sizeof(int))
return -EINVAL;
......@@ -356,7 +1315,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
}
break;
#ifdef CONFIG_IP_UDPLITE
/*
* UDP-Lite's partial checksum coverage (RFC 3828).
*/
......@@ -382,7 +1340,6 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
up->pcrlen = val;
up->pcflag |= UDPLITE_RECV_CC;
break;
#endif
default:
err = -ENOPROTOOPT;
......@@ -392,6 +1349,26 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
return err;
}
int udp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
if (level == SOL_UDP || level == SOL_UDPLITE)
return udp_lib_setsockopt(sk, level, optname, optval, optlen,
udp_push_pending_frames);
return ip_setsockopt(sk, level, optname, optval, optlen);
}
#ifdef CONFIG_COMPAT
int compat_udp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
if (level == SOL_UDP || level == SOL_UDPLITE)
return udp_lib_setsockopt(sk, level, optname, optval, optlen,
udp_push_pending_frames);
return compat_ip_setsockopt(sk, level, optname, optval, optlen);
}
#endif
int udp_lib_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
......@@ -436,6 +1413,23 @@ int udp_lib_getsockopt(struct sock *sk, int level, int optname,
return 0;
}
int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
if (level == SOL_UDP || level == SOL_UDPLITE)
return udp_lib_getsockopt(sk, level, optname, optval, optlen);
return ip_getsockopt(sk, level, optname, optval, optlen);
}
#ifdef CONFIG_COMPAT
int compat_udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
if (level == SOL_UDP || level == SOL_UDPLITE)
return udp_lib_getsockopt(sk, level, optname, optval, optlen);
return compat_ip_getsockopt(sk, level, optname, optval, optlen);
}
#endif
/**
* udp_poll - wait for a UDP event.
* @file - file struct
......@@ -480,6 +1474,36 @@ unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait)
}
DEFINE_PROTO_INUSE(udp)
struct proto udp_prot = {
.name = "UDP",
.owner = THIS_MODULE,
.close = udp_lib_close,
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
.destroy = udp_destroy_sock,
.setsockopt = udp_setsockopt,
.getsockopt = udp_getsockopt,
.sendmsg = udp_sendmsg,
.recvmsg = udp_recvmsg,
.sendpage = udp_sendpage,
.backlog_rcv = udp_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
.memory_allocated = &udp_memory_allocated,
.sysctl_mem = sysctl_udp_mem,
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp_sock),
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
.compat_getsockopt = compat_udp_getsockopt,
#endif
REF_PROTO_INUSE(udp)
};
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
......@@ -612,6 +1636,62 @@ void udp_proc_unregister(struct udp_seq_afinfo *afinfo)
proc_net_remove(&init_net, afinfo->name);
memset(afinfo->seq_fops, 0, sizeof(*afinfo->seq_fops));
}
/* ------------------------------------------------------------------------ */
static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
{
struct inet_sock *inet = inet_sk(sp);
__be32 dest = inet->daddr;
__be32 src = inet->rcv_saddr;
__u16 destp = ntohs(inet->dport);
__u16 srcp = ntohs(inet->sport);
sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
bucket, src, srcp, dest, destp, sp->sk_state,
atomic_read(&sp->sk_wmem_alloc),
atomic_read(&sp->sk_rmem_alloc),
0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp);
}
int udp4_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
seq_printf(seq, "%-127s\n",
" sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
"inode");
else {
char tmpbuf[129];
struct udp_iter_state *state = seq->private;
udp4_format_sock(v, tmpbuf, state->bucket);
seq_printf(seq, "%-127s\n", tmpbuf);
}
return 0;
}
/* ------------------------------------------------------------------------ */
static struct file_operations udp4_seq_fops;
static struct udp_seq_afinfo udp4_seq_afinfo = {
.owner = THIS_MODULE,
.name = "udp",
.family = AF_INET,
.hashtable = udp_hash,
.seq_show = udp4_seq_show,
.seq_fops = &udp4_seq_fops,
};
int __init udp4_proc_init(void)
{
return udp_proc_register(&udp4_seq_afinfo);
}
void udp4_proc_exit(void)
{
udp_proc_unregister(&udp4_seq_afinfo);
}
#endif /* CONFIG_PROC_FS */
void __init udp_init(void)
......@@ -638,6 +1718,8 @@ EXPORT_SYMBOL(udp_hash);
EXPORT_SYMBOL(udp_hash_lock);
EXPORT_SYMBOL(udp_ioctl);
EXPORT_SYMBOL(udp_get_port);
EXPORT_SYMBOL(udp_prot);
EXPORT_SYMBOL(udp_sendmsg);
EXPORT_SYMBOL(udp_lib_getsockopt);
EXPORT_SYMBOL(udp_lib_setsockopt);
EXPORT_SYMBOL(udp_poll);
......
/*
* INET An implementation of the TCP/IP protocol suite for the LINUX
* operating system. INET is implemented using the BSD Socket
* interface as the means of communication with the user level.
*
* UDP for IPv4.
*
* For full credits, see net/ipv4/udp.c.
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU General Public License
* as published by the Free Software Foundation; either version
* 2 of the License, or (at your option) any later version.
*/
#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/ioctls.h>
#include <linux/bootmem.h>
#include <linux/types.h>
#include <linux/fcntl.h>
#include <linux/module.h>
#include <linux/socket.h>
#include <linux/sockios.h>
#include <linux/igmp.h>
#include <linux/in.h>
#include <linux/errno.h>
#include <linux/timer.h>
#include <linux/mm.h>
#include <linux/inet.h>
#include <linux/netdevice.h>
#include <net/tcp_states.h>
#include <linux/skbuff.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include <net/net_namespace.h>
#include <net/icmp.h>
#include <net/route.h>
#include <net/checksum.h>
#include <net/xfrm.h>
#include "udp_impl.h"
int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2)
{
struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2);
return ( !ipv6_only_sock(sk2) &&
(!inet1->rcv_saddr || !inet2->rcv_saddr ||
inet1->rcv_saddr == inet2->rcv_saddr ));
}
static inline int udp_v4_get_port(struct sock *sk, unsigned short snum)
{
return udp_get_port(sk, snum, ipv4_rcv_saddr_equal);
}
/* UDP is nearly always wildcards out the wazoo, it makes no sense to try
* harder than this. -DaveM
*/
static struct sock *__udp4_lib_lookup(struct net *net, __be32 saddr,
__be16 sport, __be32 daddr, __be16 dport,
int dif, struct hlist_head udptable[])
{
struct sock *sk, *result = NULL;
struct hlist_node *node;
unsigned short hnum = ntohs(dport);
int badness = -1;
read_lock(&udp_hash_lock);
sk_for_each(sk, node, &udptable[hnum & (UDP_HTABLE_SIZE - 1)]) {
struct inet_sock *inet = inet_sk(sk);
if (sk->sk_net == net && sk->sk_hash == hnum &&
!ipv6_only_sock(sk)) {
int score = (sk->sk_family == PF_INET ? 1 : 0);
if (inet->rcv_saddr) {
if (inet->rcv_saddr != daddr)
continue;
score+=2;
}
if (inet->daddr) {
if (inet->daddr != saddr)
continue;
score+=2;
}
if (inet->dport) {
if (inet->dport != sport)
continue;
score+=2;
}
if (sk->sk_bound_dev_if) {
if (sk->sk_bound_dev_if != dif)
continue;
score+=2;
}
if (score == 9) {
result = sk;
break;
} else if (score > badness) {
result = sk;
badness = score;
}
}
}
if (result)
sock_hold(result);
read_unlock(&udp_hash_lock);
return result;
}
static inline struct sock *udp_v4_mcast_next(struct sock *sk,
__be16 loc_port, __be32 loc_addr,
__be16 rmt_port, __be32 rmt_addr,
int dif)
{
struct hlist_node *node;
struct sock *s = sk;
unsigned short hnum = ntohs(loc_port);
sk_for_each_from(s, node) {
struct inet_sock *inet = inet_sk(s);
if (s->sk_hash != hnum ||
(inet->daddr && inet->daddr != rmt_addr) ||
(inet->dport != rmt_port && inet->dport) ||
(inet->rcv_saddr && inet->rcv_saddr != loc_addr) ||
ipv6_only_sock(s) ||
(s->sk_bound_dev_if && s->sk_bound_dev_if != dif))
continue;
if (!ip_mc_sf_allow(s, loc_addr, rmt_addr, dif))
continue;
goto found;
}
s = NULL;
found:
return s;
}
/*
* This routine is called by the ICMP module when it gets some
* sort of error condition. If err < 0 then the socket should
* be closed and the error returned to the user. If err > 0
* it's just the icmp type << 8 | icmp code.
* Header points to the ip header of the error packet. We move
* on past this. Then (as it used to claim before adjustment)
* header points to the first 8 bytes of the udp header. We need
* to find the appropriate port.
*/
void __udp4_lib_err(struct sk_buff *skb, u32 info, struct hlist_head udptable[])
{
struct inet_sock *inet;
struct iphdr *iph = (struct iphdr*)skb->data;
struct udphdr *uh = (struct udphdr*)(skb->data+(iph->ihl<<2));
const int type = icmp_hdr(skb)->type;
const int code = icmp_hdr(skb)->code;
struct sock *sk;
int harderr;
int err;
sk = __udp4_lib_lookup(skb->dev->nd_net, iph->daddr, uh->dest,
iph->saddr, uh->source, skb->dev->ifindex, udptable);
if (sk == NULL) {
ICMP_INC_STATS_BH(ICMP_MIB_INERRORS);
return; /* No socket for error */
}
err = 0;
harderr = 0;
inet = inet_sk(sk);
switch (type) {
default:
case ICMP_TIME_EXCEEDED:
err = EHOSTUNREACH;
break;
case ICMP_SOURCE_QUENCH:
goto out;
case ICMP_PARAMETERPROB:
err = EPROTO;
harderr = 1;
break;
case ICMP_DEST_UNREACH:
if (code == ICMP_FRAG_NEEDED) { /* Path MTU discovery */
if (inet->pmtudisc != IP_PMTUDISC_DONT) {
err = EMSGSIZE;
harderr = 1;
break;
}
goto out;
}
err = EHOSTUNREACH;
if (code <= NR_ICMP_UNREACH) {
harderr = icmp_err_convert[code].fatal;
err = icmp_err_convert[code].errno;
}
break;
}
/*
* RFC1122: OK. Passes ICMP errors back to application, as per
* 4.1.3.3.
*/
if (!inet->recverr) {
if (!harderr || sk->sk_state != TCP_ESTABLISHED)
goto out;
} else {
ip_icmp_error(sk, skb, err, uh->dest, info, (u8*)(uh+1));
}
sk->sk_err = err;
sk->sk_error_report(sk);
out:
sock_put(sk);
}
void udp_err(struct sk_buff *skb, u32 info)
{
__udp4_lib_err(skb, info, udp_hash);
}
/*
* Throw away all pending data and cancel the corking. Socket is locked.
*/
static void udp_flush_pending_frames(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
if (up->pending) {
up->len = 0;
up->pending = 0;
ip_flush_pending_frames(sk);
}
}
/**
* udp4_hwcsum_outgoing - handle outgoing HW checksumming
* @sk: socket we are sending on
* @skb: sk_buff containing the filled-in UDP header
* (checksum field must be zeroed out)
*/
static void udp4_hwcsum_outgoing(struct sock *sk, struct sk_buff *skb,
__be32 src, __be32 dst, int len )
{
unsigned int offset;
struct udphdr *uh = udp_hdr(skb);
__wsum csum = 0;
if (skb_queue_len(&sk->sk_write_queue) == 1) {
/*
* Only one fragment on the socket.
*/
skb->csum_start = skb_transport_header(skb) - skb->head;
skb->csum_offset = offsetof(struct udphdr, check);
uh->check = ~csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, 0);
} else {
/*
* HW-checksum won't work as there are two or more
* fragments on the socket so that all csums of sk_buffs
* should be together
*/
offset = skb_transport_offset(skb);
skb->csum = skb_checksum(skb, offset, skb->len - offset, 0);
skb->ip_summed = CHECKSUM_NONE;
skb_queue_walk(&sk->sk_write_queue, skb) {
csum = csum_add(csum, skb->csum);
}
uh->check = csum_tcpudp_magic(src, dst, len, IPPROTO_UDP, csum);
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
}
}
/*
* Push out all pending data as one UDP datagram. Socket is locked.
*/
static int udp_push_pending_frames(struct sock *sk)
{
struct udp_sock *up = udp_sk(sk);
struct inet_sock *inet = inet_sk(sk);
struct flowi *fl = &inet->cork.fl;
struct sk_buff *skb;
struct udphdr *uh;
int err = 0;
int is_udplite = IS_UDPLITE(sk);
__wsum csum = 0;
/* Grab the skbuff where UDP header space exists. */
if ((skb = skb_peek(&sk->sk_write_queue)) == NULL)
goto out;
/*
* Create a UDP header
*/
uh = udp_hdr(skb);
uh->source = fl->fl_ip_sport;
uh->dest = fl->fl_ip_dport;
uh->len = htons(up->len);
uh->check = 0;
if (is_udplite) /* UDP-Lite */
csum = udplite_csum_outgoing(sk, skb);
else if (sk->sk_no_check == UDP_CSUM_NOXMIT) { /* UDP csum disabled */
skb->ip_summed = CHECKSUM_NONE;
goto send;
} else if (skb->ip_summed == CHECKSUM_PARTIAL) { /* UDP hardware csum */
udp4_hwcsum_outgoing(sk, skb, fl->fl4_src,fl->fl4_dst, up->len);
goto send;
} else /* `normal' UDP */
csum = udp_csum_outgoing(sk, skb);
/* add protocol-dependent pseudo-header */
uh->check = csum_tcpudp_magic(fl->fl4_src, fl->fl4_dst, up->len,
sk->sk_protocol, csum );
if (uh->check == 0)
uh->check = CSUM_MANGLED_0;
send:
err = ip_push_pending_frames(sk);
out:
up->len = 0;
up->pending = 0;
if (!err)
UDP_INC_STATS_USER(UDP_MIB_OUTDATAGRAMS, is_udplite);
return err;
}
int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len)
{
struct inet_sock *inet = inet_sk(sk);
struct udp_sock *up = udp_sk(sk);
int ulen = len;
struct ipcm_cookie ipc;
struct rtable *rt = NULL;
int free = 0;
int connected = 0;
__be32 daddr, faddr, saddr;
__be16 dport;
u8 tos;
int err, is_udplite = IS_UDPLITE(sk);
int corkreq = up->corkflag || msg->msg_flags&MSG_MORE;
int (*getfrag)(void *, char *, int, int, int, struct sk_buff *);
if (len > 0xFFFF)
return -EMSGSIZE;
/*
* Check the flags.
*/
if (msg->msg_flags&MSG_OOB) /* Mirror BSD error message compatibility */
return -EOPNOTSUPP;
ipc.opt = NULL;
if (up->pending) {
/*
* There are pending frames.
* The socket lock must be held while it's corked.
*/
lock_sock(sk);
if (likely(up->pending)) {
if (unlikely(up->pending != AF_INET)) {
release_sock(sk);
return -EINVAL;
}
goto do_append_data;
}
release_sock(sk);
}
ulen += sizeof(struct udphdr);
/*
* Get and verify the address.
*/
if (msg->msg_name) {
struct sockaddr_in * usin = (struct sockaddr_in*)msg->msg_name;
if (msg->msg_namelen < sizeof(*usin))
return -EINVAL;
if (usin->sin_family != AF_INET) {
if (usin->sin_family != AF_UNSPEC)
return -EAFNOSUPPORT;
}
daddr = usin->sin_addr.s_addr;
dport = usin->sin_port;
if (dport == 0)
return -EINVAL;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
daddr = inet->daddr;
dport = inet->dport;
/* Open fast path for connected socket.
Route will not be used, if at least one option is set.
*/
connected = 1;
}
ipc.addr = inet->saddr;
ipc.oif = sk->sk_bound_dev_if;
if (msg->msg_controllen) {
err = ip_cmsg_send(msg, &ipc);
if (err)
return err;
if (ipc.opt)
free = 1;
connected = 0;
}
if (!ipc.opt)
ipc.opt = inet->opt;
saddr = ipc.addr;
ipc.addr = faddr = daddr;
if (ipc.opt && ipc.opt->srr) {
if (!daddr)
return -EINVAL;
faddr = ipc.opt->faddr;
connected = 0;
}
tos = RT_TOS(inet->tos);
if (sock_flag(sk, SOCK_LOCALROUTE) ||
(msg->msg_flags & MSG_DONTROUTE) ||
(ipc.opt && ipc.opt->is_strictroute)) {
tos |= RTO_ONLINK;
connected = 0;
}
if (ipv4_is_multicast(daddr)) {
if (!ipc.oif)
ipc.oif = inet->mc_index;
if (!saddr)
saddr = inet->mc_addr;
connected = 0;
}
if (connected)
rt = (struct rtable*)sk_dst_check(sk, 0);
if (rt == NULL) {
struct flowi fl = { .oif = ipc.oif,
.nl_u = { .ip4_u =
{ .daddr = faddr,
.saddr = saddr,
.tos = tos } },
.proto = sk->sk_protocol,
.uli_u = { .ports =
{ .sport = inet->sport,
.dport = dport } } };
security_sk_classify_flow(sk, &fl);
err = ip_route_output_flow(&init_net, &rt, &fl, sk, 1);
if (err) {
if (err == -ENETUNREACH)
IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES);
goto out;
}
err = -EACCES;
if ((rt->rt_flags & RTCF_BROADCAST) &&
!sock_flag(sk, SOCK_BROADCAST))
goto out;
if (connected)
sk_dst_set(sk, dst_clone(&rt->u.dst));
}
if (msg->msg_flags&MSG_CONFIRM)
goto do_confirm;
back_from_confirm:
saddr = rt->rt_src;
if (!ipc.addr)
daddr = ipc.addr = rt->rt_dst;
lock_sock(sk);
if (unlikely(up->pending)) {
/* The socket is already corked while preparing it. */
/* ... which is an evident application bug. --ANK */
release_sock(sk);
LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 2\n");
err = -EINVAL;
goto out;
}
/*
* Now cork the socket to pend data.
*/
inet->cork.fl.fl4_dst = daddr;
inet->cork.fl.fl_ip_dport = dport;
inet->cork.fl.fl4_src = saddr;
inet->cork.fl.fl_ip_sport = inet->sport;
up->pending = AF_INET;
do_append_data:
up->len += ulen;
getfrag = is_udplite ? udplite_getfrag : ip_generic_getfrag;
err = ip_append_data(sk, getfrag, msg->msg_iov, ulen,
sizeof(struct udphdr), &ipc, rt,
corkreq ? msg->msg_flags|MSG_MORE : msg->msg_flags);
if (err)
udp_flush_pending_frames(sk);
else if (!corkreq)
err = udp_push_pending_frames(sk);
else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
up->pending = 0;
release_sock(sk);
out:
ip_rt_put(rt);
if (free)
kfree(ipc.opt);
if (!err)
return len;
/*
* ENOBUFS = no kernel mem, SOCK_NOSPACE = no sndbuf space. Reporting
* ENOBUFS might not be good (it's not tunable per se), but otherwise
* we don't have a good statistic (IpOutDiscards but it can be too many
* things). We could add another new stat but at least for now that
* seems like overkill.
*/
if (err == -ENOBUFS || test_bit(SOCK_NOSPACE, &sk->sk_socket->flags)) {
UDP_INC_STATS_USER(UDP_MIB_SNDBUFERRORS, is_udplite);
}
return err;
do_confirm:
dst_confirm(&rt->u.dst);
if (!(msg->msg_flags&MSG_PROBE) || len)
goto back_from_confirm;
err = 0;
goto out;
}
int udp_sendpage(struct sock *sk, struct page *page, int offset,
size_t size, int flags)
{
struct udp_sock *up = udp_sk(sk);
int ret;
if (!up->pending) {
struct msghdr msg = { .msg_flags = flags|MSG_MORE };
/* Call udp_sendmsg to specify destination address which
* sendpage interface can't pass.
* This will succeed only when the socket is connected.
*/
ret = udp_sendmsg(NULL, sk, &msg, 0);
if (ret < 0)
return ret;
}
lock_sock(sk);
if (unlikely(!up->pending)) {
release_sock(sk);
LIMIT_NETDEBUG(KERN_DEBUG "udp cork app bug 3\n");
return -EINVAL;
}
ret = ip_append_page(sk, page, offset, size, flags);
if (ret == -EOPNOTSUPP) {
release_sock(sk);
return sock_no_sendpage(sk->sk_socket, page, offset,
size, flags);
}
if (ret < 0) {
udp_flush_pending_frames(sk);
goto out;
}
up->len += size;
if (!(up->corkflag || (flags&MSG_MORE)))
ret = udp_push_pending_frames(sk);
if (!ret)
ret = size;
out:
release_sock(sk);
return ret;
}
/*
* This should be easy, if there is something there we
* return it, otherwise we block.
*/
int udp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
size_t len, int noblock, int flags, int *addr_len)
{
struct inet_sock *inet = inet_sk(sk);
struct sockaddr_in *sin = (struct sockaddr_in *)msg->msg_name;
struct sk_buff *skb;
unsigned int ulen, copied;
int peeked;
int err;
int is_udplite = IS_UDPLITE(sk);
/*
* Check any passed addresses
*/
if (addr_len)
*addr_len=sizeof(*sin);
if (flags & MSG_ERRQUEUE)
return ip_recv_error(sk, msg, len);
try_again:
skb = __skb_recv_datagram(sk, flags | (noblock ? MSG_DONTWAIT : 0),
&peeked, &err);
if (!skb)
goto out;
ulen = skb->len - sizeof(struct udphdr);
copied = len;
if (copied > ulen)
copied = ulen;
else if (copied < ulen)
msg->msg_flags |= MSG_TRUNC;
/*
* If checksum is needed at all, try to do it while copying the
* data. If the data is truncated, or if we only want a partial
* coverage checksum (UDP-Lite), do it before the copy.
*/
if (copied < ulen || UDP_SKB_CB(skb)->partial_cov) {
if (udp_lib_checksum_complete(skb))
goto csum_copy_err;
}
if (skb_csum_unnecessary(skb))
err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
msg->msg_iov, copied );
else {
err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
if (err == -EINVAL)
goto csum_copy_err;
}
if (err)
goto out_free;
if (!peeked)
UDP_INC_STATS_USER(UDP_MIB_INDATAGRAMS, is_udplite);
sock_recv_timestamp(msg, sk, skb);
/* Copy the address. */
if (sin)
{
sin->sin_family = AF_INET;
sin->sin_port = udp_hdr(skb)->source;
sin->sin_addr.s_addr = ip_hdr(skb)->saddr;
memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
}
if (inet->cmsg_flags)
ip_cmsg_recv(msg, skb);
err = copied;
if (flags & MSG_TRUNC)
err = ulen;
out_free:
lock_sock(sk);
skb_free_datagram(sk, skb);
release_sock(sk);
out:
return err;
csum_copy_err:
lock_sock(sk);
if (!skb_kill_datagram(sk, skb, flags))
UDP_INC_STATS_USER(UDP_MIB_INERRORS, is_udplite);
release_sock(sk);
if (noblock)
return -EAGAIN;
goto try_again;
}
/* returns:
* -1: error
* 0: success
* >0: "udp encap" protocol resubmission
*
* Note that in the success and error cases, the skb is assumed to
* have either been requeued or freed.
*/
int udp_queue_rcv_skb(struct sock * sk, struct sk_buff *skb)
{
struct udp_sock *up = udp_sk(sk);
int rc;
int is_udplite = IS_UDPLITE(sk);
/*
* Charge it to the socket, dropping if the queue is full.
*/
if (!xfrm4_policy_check(sk, XFRM_POLICY_IN, skb))
goto drop;
nf_reset(skb);
if (up->encap_type) {
/*
* This is an encapsulation socket so pass the skb to
* the socket's udp_encap_rcv() hook. Otherwise, just
* fall through and pass this up the UDP socket.
* up->encap_rcv() returns the following value:
* =0 if skb was successfully passed to the encap
* handler or was discarded by it.
* >0 if skb should be passed on to UDP.
* <0 if skb should be resubmitted as proto -N
*/
/* if we're overly short, let UDP handle it */
if (skb->len > sizeof(struct udphdr) &&
up->encap_rcv != NULL) {
int ret;
ret = (*up->encap_rcv)(sk, skb);
if (ret <= 0) {
UDP_INC_STATS_BH(UDP_MIB_INDATAGRAMS,
is_udplite);
return -ret;
}
}
/* FALLTHROUGH -- it's a UDP Packet */
}
/*
* UDP-Lite specific tests, ignored on UDP sockets
*/
if ((is_udplite & UDPLITE_RECV_CC) && UDP_SKB_CB(skb)->partial_cov) {
/*
* MIB statistics other than incrementing the error count are
* disabled for the following two types of errors: these depend
* on the application settings, not on the functioning of the
* protocol stack as such.
*
* RFC 3828 here recommends (sec 3.3): "There should also be a
* way ... to ... at least let the receiving application block
* delivery of packets with coverage values less than a value
* provided by the application."
*/
if (up->pcrlen == 0) { /* full coverage was set */
LIMIT_NETDEBUG(KERN_WARNING "UDPLITE: partial coverage "
"%d while full coverage %d requested\n",
UDP_SKB_CB(skb)->cscov, skb->len);
goto drop;
}
/* The next case involves violating the min. coverage requested
* by the receiver. This is subtle: if receiver wants x and x is
* greater than the buffersize/MTU then receiver will complain
* that it wants x while sender emits packets of smaller size y.
* Therefore the above ...()->partial_cov statement is essential.
*/
if (UDP_SKB_CB(skb)->cscov < up->pcrlen) {
LIMIT_NETDEBUG(KERN_WARNING
"UDPLITE: coverage %d too small, need min %d\n",
UDP_SKB_CB(skb)->cscov, up->pcrlen);
goto drop;
}
}
if (sk->sk_filter) {
if (udp_lib_checksum_complete(skb))
goto drop;
}
if ((rc = sock_queue_rcv_skb(sk,skb)) < 0) {
/* Note that an ENOMEM error is charged twice */
if (rc == -ENOMEM)
UDP_INC_STATS_BH(UDP_MIB_RCVBUFERRORS, is_udplite);
goto drop;
}
return 0;
drop:
UDP_INC_STATS_BH(UDP_MIB_INERRORS, is_udplite);
kfree_skb(skb);
return -1;
}
/*
* Multicasts and broadcasts go to each listener.
*
* Note: called only from the BH handler context,
* so we don't need to lock the hashes.
*/
static int __udp4_lib_mcast_deliver(struct sk_buff *skb,
struct udphdr *uh,
__be32 saddr, __be32 daddr,
struct hlist_head udptable[])
{
struct sock *sk;
int dif;
read_lock(&udp_hash_lock);
sk = sk_head(&udptable[ntohs(uh->dest) & (UDP_HTABLE_SIZE - 1)]);
dif = skb->dev->ifindex;
sk = udp_v4_mcast_next(sk, uh->dest, daddr, uh->source, saddr, dif);
if (sk) {
struct sock *sknext = NULL;
do {
struct sk_buff *skb1 = skb;
sknext = udp_v4_mcast_next(sk_next(sk), uh->dest, daddr,
uh->source, saddr, dif);
if (sknext)
skb1 = skb_clone(skb, GFP_ATOMIC);
if (skb1) {
int ret = 0;
bh_lock_sock_nested(sk);
if (!sock_owned_by_user(sk))
ret = udp_queue_rcv_skb(sk, skb1);
else
sk_add_backlog(sk, skb1);
bh_unlock_sock(sk);
if (ret > 0)
/* we should probably re-process instead
* of dropping packets here. */
kfree_skb(skb1);
}
sk = sknext;
} while (sknext);
} else
kfree_skb(skb);
read_unlock(&udp_hash_lock);
return 0;
}
/* Initialize UDP checksum. If exited with zero value (success),
* CHECKSUM_UNNECESSARY means, that no more checks are required.
* Otherwise, csum completion requires chacksumming packet body,
* including udp header and folding it to skb->csum.
*/
static inline int udp4_csum_init(struct sk_buff *skb, struct udphdr *uh,
int proto)
{
const struct iphdr *iph;
int err;
UDP_SKB_CB(skb)->partial_cov = 0;
UDP_SKB_CB(skb)->cscov = skb->len;
if (IS_PROTO_UDPLITE(proto)) {
err = udplite_checksum_init(skb, uh);
if (err)
return err;
}
iph = ip_hdr(skb);
if (uh->check == 0) {
skb->ip_summed = CHECKSUM_UNNECESSARY;
} else if (skb->ip_summed == CHECKSUM_COMPLETE) {
if (!csum_tcpudp_magic(iph->saddr, iph->daddr, skb->len,
proto, skb->csum))
skb->ip_summed = CHECKSUM_UNNECESSARY;
}
if (!skb_csum_unnecessary(skb))
skb->csum = csum_tcpudp_nofold(iph->saddr, iph->daddr,
skb->len, proto, 0);
/* Probably, we should checksum udp header (it should be in cache
* in any case) and data in tiny packets (< rx copybreak).
*/
return 0;
}
/*
* All we need to do is get the socket, and then do a checksum.
*/
int __udp4_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
int proto)
{
struct sock *sk;
struct udphdr *uh = udp_hdr(skb);
unsigned short ulen;
struct rtable *rt = skb->rtable;
__be32 saddr = ip_hdr(skb)->saddr;
__be32 daddr = ip_hdr(skb)->daddr;
/*
* Validate the packet.
*/
if (!pskb_may_pull(skb, sizeof(struct udphdr)))
goto drop; /* No space for header. */
ulen = ntohs(uh->len);
if (ulen > skb->len)
goto short_packet;
if (IS_PROTO_UDPLITE(proto)) {
/* UDP validates ulen. */
if (ulen < sizeof(*uh) || pskb_trim_rcsum(skb, ulen))
goto short_packet;
uh = udp_hdr(skb);
}
if (udp4_csum_init(skb, uh, proto))
goto csum_error;
if (rt->rt_flags & (RTCF_BROADCAST|RTCF_MULTICAST))
return __udp4_lib_mcast_deliver(skb, uh, saddr, daddr, udptable);
sk = __udp4_lib_lookup(skb->dev->nd_net, saddr, uh->source, daddr,
uh->dest, inet_iif(skb), udptable);
if (sk != NULL) {
int ret = 0;
bh_lock_sock_nested(sk);
if (!sock_owned_by_user(sk))
ret = udp_queue_rcv_skb(sk, skb);
else
sk_add_backlog(sk, skb);
bh_unlock_sock(sk);
sock_put(sk);
/* a return value > 0 means to resubmit the input, but
* it wants the return to be -protocol, or 0
*/
if (ret > 0)
return -ret;
return 0;
}
if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb))
goto drop;
nf_reset(skb);
/* No socket. Drop packet silently, if checksum is wrong */
if (udp_lib_checksum_complete(skb))
goto csum_error;
UDP_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto));
icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0);
/*
* Hmm. We got an UDP packet to a port to which we
* don't wanna listen. Ignore it.
*/
kfree_skb(skb);
return 0;
short_packet:
LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: short packet: From %u.%u.%u.%u:%u %d/%d to %u.%u.%u.%u:%u\n",
IS_PROTO_UDPLITE(proto) ? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
ulen,
skb->len,
NIPQUAD(daddr),
ntohs(uh->dest));
goto drop;
csum_error:
/*
* RFC1122: OK. Discards the bad packet silently (as far as
* the network is concerned, anyway) as per 4.1.3.4 (MUST).
*/
LIMIT_NETDEBUG(KERN_DEBUG "UDP%s: bad checksum. From %d.%d.%d.%d:%d to %d.%d.%d.%d:%d ulen %d\n",
IS_PROTO_UDPLITE(proto) ? "-Lite" : "",
NIPQUAD(saddr),
ntohs(uh->source),
NIPQUAD(daddr),
ntohs(uh->dest),
ulen);
drop:
UDP_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto));
kfree_skb(skb);
return 0;
}
int udp_rcv(struct sk_buff *skb)
{
return __udp4_lib_rcv(skb, udp_hash, IPPROTO_UDP);
}
int udp_destroy_sock(struct sock *sk)
{
lock_sock(sk);
udp_flush_pending_frames(sk);
release_sock(sk);
return 0;
}
int udp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
if (IS_SOL_UDPFAMILY(level))
return udp_lib_setsockopt(sk, level, optname, optval, optlen,
udp_push_pending_frames);
return ip_setsockopt(sk, level, optname, optval, optlen);
}
#ifdef CONFIG_COMPAT
int compat_udp_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
if (IS_SOL_UDPFAMILY(level))
return udp_lib_setsockopt(sk, level, optname, optval, optlen,
udp_push_pending_frames);
return compat_ip_setsockopt(sk, level, optname, optval, optlen);
}
#endif
int udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
if (IS_SOL_UDPFAMILY(level))
return udp_lib_getsockopt(sk, level, optname, optval, optlen);
return ip_getsockopt(sk, level, optname, optval, optlen);
}
#ifdef CONFIG_COMPAT
int compat_udp_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
if (IS_SOL_UDPFAMILY(level))
return udp_lib_getsockopt(sk, level, optname, optval, optlen);
return compat_ip_getsockopt(sk, level, optname, optval, optlen);
}
#endif
/* ------------------------------------------------------------------------ */
DEFINE_PROTO_INUSE(udp)
struct proto udp_prot = {
.name = "UDP",
.owner = THIS_MODULE,
.close = udp_lib_close,
.connect = ip4_datagram_connect,
.disconnect = udp_disconnect,
.ioctl = udp_ioctl,
.destroy = udp_destroy_sock,
.setsockopt = udp_setsockopt,
.getsockopt = udp_getsockopt,
.sendmsg = udp_sendmsg,
.recvmsg = udp_recvmsg,
.sendpage = udp_sendpage,
.backlog_rcv = udp_queue_rcv_skb,
.hash = udp_lib_hash,
.unhash = udp_lib_unhash,
.get_port = udp_v4_get_port,
.memory_allocated = &udp_memory_allocated,
.sysctl_mem = sysctl_udp_mem,
.sysctl_wmem = &sysctl_udp_wmem_min,
.sysctl_rmem = &sysctl_udp_rmem_min,
.obj_size = sizeof(struct udp_sock),
#ifdef CONFIG_COMPAT
.compat_setsockopt = compat_udp_setsockopt,
.compat_getsockopt = compat_udp_getsockopt,
#endif
REF_PROTO_INUSE(udp)
};
/* ------------------------------------------------------------------------ */
static void udp4_format_sock(struct sock *sp, char *tmpbuf, int bucket)
{
struct inet_sock *inet = inet_sk(sp);
__be32 dest = inet->daddr;
__be32 src = inet->rcv_saddr;
__u16 destp = ntohs(inet->dport);
__u16 srcp = ntohs(inet->sport);
sprintf(tmpbuf, "%4d: %08X:%04X %08X:%04X"
" %02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p",
bucket, src, srcp, dest, destp, sp->sk_state,
atomic_read(&sp->sk_wmem_alloc),
atomic_read(&sp->sk_rmem_alloc),
0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp),
atomic_read(&sp->sk_refcnt), sp);
}
int udp4_seq_show(struct seq_file *seq, void *v)
{
if (v == SEQ_START_TOKEN)
seq_printf(seq, "%-127s\n",
" sl local_address rem_address st tx_queue "
"rx_queue tr tm->when retrnsmt uid timeout "
"inode");
else {
char tmpbuf[129];
struct udp_iter_state *state = seq->private;
udp4_format_sock(v, tmpbuf, state->bucket);
seq_printf(seq, "%-127s\n", tmpbuf);
}
return 0;
}
/* ------------------------------------------------------------------------ */
#ifdef CONFIG_PROC_FS
static struct file_operations udp4_seq_fops;
static struct udp_seq_afinfo udp4_seq_afinfo = {
.owner = THIS_MODULE,
.name = "udp",
.family = AF_INET,
.hashtable = udp_hash,
.seq_show = udp4_seq_show,
.seq_fops = &udp4_seq_fops,
};
int __init udp4_proc_init(void)
{
return udp_proc_register(&udp4_seq_afinfo);
}
void udp4_proc_exit(void)
{
udp_proc_unregister(&udp4_seq_afinfo);
}
#endif /* CONFIG_PROC_FS */
EXPORT_SYMBOL(udp_prot);
EXPORT_SYMBOL(udp_sendmsg);
......@@ -6,7 +6,7 @@ obj-$(CONFIG_IPV6) += ipv6.o
ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o \
addrlabel.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp_ipv6.o \
route.o ip6_fib.o ipv6_sockglue.o ndisc.o udp.o udplite.o \
raw.o protocol.o icmp.o mcast.o reassembly.o tcp_ipv6.o \
exthdrs.o datagram.o ip6_flowlabel.o inet6_connection_sock.o
......@@ -17,7 +17,6 @@ ipv6-$(CONFIG_NETFILTER) += netfilter.o
ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o
ipv6-$(CONFIG_PROC_FS) += proc.o
ipv6-$(CONFIG_SYN_COOKIES) += syncookies.o
ipv6-$(CONFIG_IP_UDPLITE) += udplite_ipv6.o
ipv6-objs += $(ipv6-y)
......
......@@ -813,16 +813,12 @@ static int __init init_ipv6_mibs(void)
goto err_icmpmsg_mib;
if (snmp_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0)
goto err_udp_mib;
#ifdef CONFIG_IP_UDPLITE
if (snmp_mib_init((void **)udplite_stats_in6,
sizeof (struct udp_mib)) < 0)
goto err_udplite_mib;
#endif
return 0;
#ifdef CONFIG_IP_UDPLITE
err_udplite_mib:
#endif
snmp_mib_free((void **)udp_stats_in6);
err_udp_mib:
snmp_mib_free((void **)icmpv6msg_statistics);
......@@ -841,9 +837,7 @@ static void cleanup_ipv6_mibs(void)
snmp_mib_free((void **)icmpv6_statistics);
snmp_mib_free((void **)icmpv6msg_statistics);
snmp_mib_free((void **)udp_stats_in6);
#ifdef CONFIG_IP_UDPLITE
snmp_mib_free((void **)udplite_stats_in6);
#endif
}
static int inet6_net_init(struct net *net)
......@@ -888,11 +882,9 @@ static int __init inet6_init(void)
if (err)
goto out_unregister_tcp_proto;
#ifdef CONFIG_IP_UDPLITE
err = proto_register(&udplitev6_prot, 1);
if (err)
goto out_unregister_udp_proto;
#endif
err = proto_register(&rawv6_prot, 1);
if (err)
......@@ -1063,10 +1055,8 @@ static int __init inet6_init(void)
out_unregister_raw_proto:
proto_unregister(&rawv6_prot);
out_unregister_udplite_proto:
#ifdef CONFIG_IP_UDPLITE
proto_unregister(&udplitev6_prot);
out_unregister_udp_proto:
#endif
proto_unregister(&udpv6_prot);
out_unregister_tcp_proto:
proto_unregister(&tcpv6_prot);
......@@ -1085,9 +1075,7 @@ static void __exit inet6_exit(void)
ipv6_sysctl_unregister();
#endif
udpv6_exit();
#ifdef CONFIG_IP_UDPLITE
udplitev6_exit();
#endif
tcpv6_exit();
/* Cleanup code parts. */
......@@ -1117,9 +1105,7 @@ static void __exit inet6_exit(void)
unregister_pernet_subsys(&inet6_net_ops);
cleanup_ipv6_mibs();
proto_unregister(&rawv6_prot);
#ifdef CONFIG_IP_UDPLITE
proto_unregister(&udplitev6_prot);
#endif
proto_unregister(&udpv6_prot);
proto_unregister(&tcpv6_prot);
}
......
......@@ -127,9 +127,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
struct sk_buff *pktopt;
if (sk->sk_protocol != IPPROTO_UDP &&
#ifdef CONFIG_IP_UDPLITE
sk->sk_protocol != IPPROTO_UDPLITE &&
#endif
sk->sk_protocol != IPPROTO_TCP)
break;
......@@ -169,7 +167,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
} else {
struct proto *prot = &udp_prot;
if (IS_PROTO_UDPLITE(sk->sk_protocol))
if (sk->sk_protocol == IPPROTO_UDPLITE)
prot = &udplite_prot;
local_bh_disable();
sock_prot_inuse_add(sk->sk_prot, -1);
......@@ -734,9 +732,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
switch (optname) {
case IPV6_ADDRFORM:
if (sk->sk_protocol != IPPROTO_UDP &&
#ifdef CONFIG_IP_UDPLITE
sk->sk_protocol != IPPROTO_UDPLITE &&
#endif
sk->sk_protocol != IPPROTO_TCP)
return -EINVAL;
if (sk->sk_state != TCP_ESTABLISHED)
......
......@@ -39,10 +39,8 @@ static int sockstat6_seq_show(struct seq_file *seq, void *v)
sock_prot_inuse_get(&tcpv6_prot));
seq_printf(seq, "UDP6: inuse %d\n",
sock_prot_inuse_get(&udpv6_prot));
#ifdef CONFIG_IP_UDPLITE
seq_printf(seq, "UDPLITE6: inuse %d\n",
sock_prot_inuse_get(&udplitev6_prot));
#endif
seq_printf(seq, "RAW6: inuse %d\n",
sock_prot_inuse_get(&rawv6_prot));
seq_printf(seq, "FRAG6: inuse %d memory %d\n",
......@@ -113,7 +111,6 @@ static struct snmp_mib snmp6_udp6_list[] = {
SNMP_MIB_SENTINEL
};
#ifdef CONFIG_IP_UDPLITE
static struct snmp_mib snmp6_udplite6_list[] = {
SNMP_MIB_ITEM("UdpLite6InDatagrams", UDP_MIB_INDATAGRAMS),
SNMP_MIB_ITEM("UdpLite6NoPorts", UDP_MIB_NOPORTS),
......@@ -121,7 +118,6 @@ static struct snmp_mib snmp6_udplite6_list[] = {
SNMP_MIB_ITEM("UdpLite6OutDatagrams", UDP_MIB_OUTDATAGRAMS),
SNMP_MIB_SENTINEL
};
#endif
static void snmp6_seq_show_icmpv6msg(struct seq_file *seq, void **mib)
{
......@@ -180,9 +176,7 @@ static int snmp6_seq_show(struct seq_file *seq, void *v)
snmp6_seq_show_item(seq, (void **)icmpv6_statistics, snmp6_icmp6_list);
snmp6_seq_show_icmpv6msg(seq, (void **)icmpv6msg_statistics);
snmp6_seq_show_item(seq, (void **)udp_stats_in6, snmp6_udp6_list);
#ifdef CONFIG_IP_UDPLITE
snmp6_seq_show_item(seq, (void **)udplite_stats_in6, snmp6_udplite6_list);
#endif
}
return 0;
}
......
......@@ -400,7 +400,7 @@ static inline int udp6_csum_init(struct sk_buff *skb, struct udphdr *uh,
UDP_SKB_CB(skb)->partial_cov = 0;
UDP_SKB_CB(skb)->cscov = skb->len;
if (IS_PROTO_UDPLITE(proto)) {
if (proto == IPPROTO_UDPLITE) {
err = udplite_checksum_init(skb, uh);
if (err)
return err;
......@@ -489,7 +489,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
if (udp_lib_checksum_complete(skb))
goto discard;
UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, IS_PROTO_UDPLITE(proto));
UDP6_INC_STATS_BH(UDP_MIB_NOPORTS, proto == IPPROTO_UDPLITE);
icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0, dev);
......@@ -510,11 +510,11 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct hlist_head udptable[],
short_packet:
LIMIT_NETDEBUG(KERN_DEBUG "UDP%sv6: short packet: %d/%u\n",
IS_PROTO_UDPLITE(proto) ? "-Lite" : "",
proto == IPPROTO_UDPLITE ? "-Lite" : "",
ulen, skb->len);
discard:
UDP6_INC_STATS_BH(UDP_MIB_INERRORS, IS_PROTO_UDPLITE(proto));
UDP6_INC_STATS_BH(UDP_MIB_INERRORS, proto == IPPROTO_UDPLITE);
kfree_skb(skb);
return 0;
}
......@@ -890,7 +890,7 @@ int udpv6_destroy_sock(struct sock *sk)
int udpv6_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
if (IS_SOL_UDPFAMILY(level))
if (level == SOL_UDP || level == SOL_UDPLITE)
return udp_lib_setsockopt(sk, level, optname, optval, optlen,
udp_v6_push_pending_frames);
return ipv6_setsockopt(sk, level, optname, optval, optlen);
......@@ -900,7 +900,7 @@ int udpv6_setsockopt(struct sock *sk, int level, int optname,
int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
char __user *optval, int optlen)
{
if (IS_SOL_UDPFAMILY(level))
if (level == SOL_UDP || level == SOL_UDPLITE)
return udp_lib_setsockopt(sk, level, optname, optval, optlen,
udp_v6_push_pending_frames);
return compat_ipv6_setsockopt(sk, level, optname, optval, optlen);
......@@ -910,7 +910,7 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname,
int udpv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
if (IS_SOL_UDPFAMILY(level))
if (level == SOL_UDP || level == SOL_UDPLITE)
return udp_lib_getsockopt(sk, level, optname, optval, optlen);
return ipv6_getsockopt(sk, level, optname, optval, optlen);
}
......@@ -919,7 +919,7 @@ int udpv6_getsockopt(struct sock *sk, int level, int optname,
int compat_udpv6_getsockopt(struct sock *sk, int level, int optname,
char __user *optval, int __user *optlen)
{
if (IS_SOL_UDPFAMILY(level))
if (level == SOL_UDP || level == SOL_UDPLITE)
return udp_lib_getsockopt(sk, level, optname, optval, optlen);
return compat_ipv6_getsockopt(sk, level, optname, optval, optlen);
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册