loopback.c 7.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9
/*
 * INET		An implementation of the TCP/IP protocol suite for the LINUX
 *		operating system.  INET is implemented using the  BSD Socket
 *		interface as the means of communication with the user level.
 *
 *		Pseudo-driver for the loopback interface.
 *
 * Version:	@(#)loopback.c	1.0.4b	08/16/93
 *
10
 * Authors:	Ross Biro
L
Linus Torvalds 已提交
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
 *		Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
 *		Donald Becker, <becker@scyld.com>
 *
 *		Alan Cox	:	Fixed oddments for NET3.014
 *		Alan Cox	:	Rejig for NET3.029 snap #3
 *		Alan Cox	: 	Fixed NET3.029 bugs and sped up
 *		Larry McVoy	:	Tiny tweak to double performance
 *		Alan Cox	:	Backed out LMV's tweak - the linux mm
 *					can't take it...
 *              Michael Griffith:       Don't bother computing the checksums
 *                                      on packets received on the loopback
 *                                      interface.
 *		Alexey Kuznetsov:	Potential hang under some extreme
 *					cases removed.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 */
#include <linux/kernel.h>
#include <linux/jiffies.h>
#include <linux/module.h>
#include <linux/interrupt.h>
#include <linux/fs.h>
#include <linux/types.h>
#include <linux/string.h>
#include <linux/socket.h>
#include <linux/errno.h>
#include <linux/fcntl.h>
#include <linux/in.h>
#include <linux/init.h>

#include <asm/system.h>
#include <asm/uaccess.h>
#include <asm/io.h>

#include <linux/inet.h>
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/skbuff.h>
#include <linux/ethtool.h>
#include <net/sock.h>
#include <net/checksum.h>
#include <linux/if_ether.h>	/* For the statistics structure. */
#include <linux/if_arp.h>	/* For ARPHRD_ETHER */
#include <linux/ip.h>
#include <linux/tcp.h>
#include <linux/percpu.h>
60
#include <net/net_namespace.h>
L
Linus Torvalds 已提交
61

62 63 64 65
struct pcpu_lstats {
	unsigned long packets;
	unsigned long bytes;
};
L
Linus Torvalds 已提交
66 67 68 69 70 71 72 73 74

#define LOOPBACK_OVERHEAD (128 + MAX_HEADER + 16 + 16)

/* KISS: just allocate small chunks and copy bits.
 *
 * So, in fact, this is documentation, explaining what we expect
 * of largesending device modulo TCP checksum, which is ignored for loopback.
 */

75
#ifdef LOOPBACK_TSO
L
Linus Torvalds 已提交
76 77
static void emulate_large_send_offload(struct sk_buff *skb)
{
78
	struct iphdr *iph = ip_hdr(skb);
79 80
	struct tcphdr *th = (struct tcphdr *)(skb_network_header(skb) +
					      (iph->ihl * 4));
L
Linus Torvalds 已提交
81
	unsigned int doffset = (iph->ihl + th->doff) * 4;
82
	unsigned int mtu = skb_shinfo(skb)->gso_size + doffset;
L
Linus Torvalds 已提交
83 84 85 86 87 88 89 90 91 92 93
	unsigned int offset = 0;
	u32 seq = ntohl(th->seq);
	u16 id  = ntohs(iph->id);

	while (offset + doffset < skb->len) {
		unsigned int frag_size = min(mtu, skb->len - offset) - doffset;
		struct sk_buff *nskb = alloc_skb(mtu + 32, GFP_ATOMIC);

		if (!nskb)
			break;
		skb_reserve(nskb, 32);
94
		skb_set_mac_header(nskb, -ETH_HLEN);
95
		skb_reset_network_header(nskb);
96
		iph = ip_hdr(nskb);
97 98
		skb_copy_to_linear_data(nskb, skb_network_header(skb),
					doffset);
L
Linus Torvalds 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112
		if (skb_copy_bits(skb,
				  doffset + offset,
				  nskb->data + doffset,
				  frag_size))
			BUG();
		skb_put(nskb, doffset + frag_size);
		nskb->ip_summed = CHECKSUM_UNNECESSARY;
		nskb->dev = skb->dev;
		nskb->priority = skb->priority;
		nskb->protocol = skb->protocol;
		nskb->dst = dst_clone(skb->dst);
		memcpy(nskb->cb, skb->cb, sizeof(skb->cb));
		nskb->pkt_type = skb->pkt_type;

113
		th = (struct tcphdr *)(skb_network_header(nskb) + iph->ihl * 4);
L
Linus Torvalds 已提交
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
		iph->tot_len = htons(frag_size + doffset);
		iph->id = htons(id);
		iph->check = 0;
		iph->check = ip_fast_csum((unsigned char *) iph, iph->ihl);
		th->seq = htonl(seq);
		if (offset + doffset + frag_size < skb->len)
			th->fin = th->psh = 0;
		netif_rx(nskb);
		offset += frag_size;
		seq += frag_size;
		id++;
	}

	dev_kfree_skb(skb);
}
129
#endif /* LOOPBACK_TSO */
L
Linus Torvalds 已提交
130 131 132 133 134 135 136

/*
 * The higher levels take care of making this non-reentrant (it's
 * called with bh's disabled).
 */
static int loopback_xmit(struct sk_buff *skb, struct net_device *dev)
{
137
	struct pcpu_lstats *pcpu_lstats, *lb_stats;
L
Linus Torvalds 已提交
138 139 140

	skb_orphan(skb);

141
	skb->protocol = eth_type_trans(skb,dev);
L
Linus Torvalds 已提交
142 143 144 145
#ifndef LOOPBACK_MUST_CHECKSUM
	skb->ip_summed = CHECKSUM_UNNECESSARY;
#endif

146
#ifdef LOOPBACK_TSO
H
Herbert Xu 已提交
147
	if (skb_is_gso(skb)) {
L
Linus Torvalds 已提交
148
		BUG_ON(skb->protocol != htons(ETH_P_IP));
149
		BUG_ON(ip_hdr(skb)->protocol != IPPROTO_TCP);
L
Linus Torvalds 已提交
150 151 152 153

		emulate_large_send_offload(skb);
		return 0;
	}
154
#endif
L
Linus Torvalds 已提交
155 156
	dev->last_rx = jiffies;

157
	/* it's OK to use per_cpu_ptr() because BHs are off */
158 159
	pcpu_lstats = netdev_priv(dev);
	lb_stats = per_cpu_ptr(pcpu_lstats, smp_processor_id());
160 161
	lb_stats->bytes += skb->len;
	lb_stats->packets++;
L
Linus Torvalds 已提交
162 163 164

	netif_rx(skb);

165
	return 0;
L
Linus Torvalds 已提交
166 167 168 169
}

static struct net_device_stats *get_stats(struct net_device *dev)
{
170
	const struct pcpu_lstats *pcpu_lstats;
171
	struct net_device_stats *stats = &dev->stats;
172 173
	unsigned long bytes = 0;
	unsigned long packets = 0;
L
Linus Torvalds 已提交
174 175
	int i;

176
	pcpu_lstats = netdev_priv(dev);
177
	for_each_possible_cpu(i) {
178
		const struct pcpu_lstats *lb_stats;
L
Linus Torvalds 已提交
179

180
		lb_stats = per_cpu_ptr(pcpu_lstats, i);
181 182
		bytes   += lb_stats->bytes;
		packets += lb_stats->packets;
L
Linus Torvalds 已提交
183
	}
184 185 186 187
	stats->rx_packets = packets;
	stats->tx_packets = packets;
	stats->rx_bytes = bytes;
	stats->tx_bytes = bytes;
L
Linus Torvalds 已提交
188 189 190
	return stats;
}

191
static u32 always_on(struct net_device *dev)
L
Linus Torvalds 已提交
192 193 194 195
{
	return 1;
}

196
static const struct ethtool_ops loopback_ethtool_ops = {
197
	.get_link		= always_on,
L
Linus Torvalds 已提交
198
	.set_tso		= ethtool_op_set_tso,
199 200 201
	.get_tx_csum		= always_on,
	.get_sg			= always_on,
	.get_rx_csum		= always_on,
L
Linus Torvalds 已提交
202 203
};

204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
static int loopback_dev_init(struct net_device *dev)
{
	struct pcpu_lstats *lstats;

	lstats = alloc_percpu(struct pcpu_lstats);
	if (!lstats)
		return -ENOMEM;

	dev->priv = lstats;
	return 0;
}

static void loopback_dev_free(struct net_device *dev)
{
	struct pcpu_lstats *lstats = netdev_priv(dev);

	free_percpu(lstats);
	free_netdev(dev);
}

224
/*
225 226
 * The loopback device is special. There is only one instance
 * per network namespace.
227
 */
228 229 230 231 232 233 234 235 236 237 238
static void loopback_setup(struct net_device *dev)
{
	dev->get_stats		= &get_stats;
	dev->mtu		= (16 * 1024) + 20 + 20 + 12;
	dev->hard_start_xmit	= loopback_xmit;
	dev->hard_header_len	= ETH_HLEN;	/* 14	*/
	dev->addr_len		= ETH_ALEN;	/* 6	*/
	dev->tx_queue_len	= 0;
	dev->type		= ARPHRD_LOOPBACK;	/* 0x0001*/
	dev->flags		= IFF_LOOPBACK;
	dev->features 		= NETIF_F_SG | NETIF_F_FRAGLIST
239
#ifdef LOOPBACK_TSO
240
		| NETIF_F_TSO
241
#endif
242 243 244
		| NETIF_F_NO_CSUM
		| NETIF_F_HIGHDMA
		| NETIF_F_LLTX
245
		| NETIF_F_NETNS_LOCAL;
246
	dev->ethtool_ops	= &loopback_ethtool_ops;
247
	dev->header_ops		= &eth_header_ops;
248 249
	dev->init = loopback_dev_init;
	dev->destructor = loopback_dev_free;
250
}
251

252
/* Setup and register the loopback device. */
253
static __net_init int loopback_net_init(struct net *net)
L
Linus Torvalds 已提交
254
{
255 256 257 258 259 260 261
	struct net_device *dev;
	int err;

	err = -ENOMEM;
	dev = alloc_netdev(0, "lo", loopback_setup);
	if (!dev)
		goto out;
262

263
	dev->nd_net = net;
264
	err = register_netdev(dev);
265
	if (err)
266
		goto out_free_netdev;
267

268
	net->loopback_dev = dev;
269
	return 0;
270

L
Linus Torvalds 已提交
271

272 273
out_free_netdev:
	free_netdev(dev);
274 275 276 277
out:
	if (net == &init_net)
		panic("loopback: Failed to register netdevice: %d\n", err);
	return err;
278 279
}

280
static __net_exit void loopback_net_exit(struct net *net)
281 282 283 284 285 286
{
	struct net_device *dev = net->loopback_dev;

	unregister_netdev(dev);
}

287
static struct pernet_operations __net_initdata loopback_net_ops = {
288 289 290 291 292 293 294 295
       .init = loopback_net_init,
       .exit = loopback_net_exit,
};

static int __init loopback_init(void)
{
	return register_pernet_device(&loopback_net_ops);
}
296

297 298 299
/* Loopback is special. It should be initialized before any other network
 * device and network subsystem.
 */
300
fs_initcall(loopback_init);