netpoll.c 21.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14
#include <linux/moduleparam.h>
L
Linus Torvalds 已提交
15 16 17
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
18
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
19 20 21 22 23 24 25 26
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
27
#include <linux/slab.h>
28
#include <linux/export.h>
L
Linus Torvalds 已提交
29 30 31
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
32
#include <trace/events/napi.h>
L
Linus Torvalds 已提交
33 34 35 36 37 38 39 40 41

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

42
static struct sk_buff_head skb_pool;
L
Linus Torvalds 已提交
43 44 45

static atomic_t trapped;

S
Stephen Hemminger 已提交
46
#define USEC_PER_POLL	50
47 48
#define NETPOLL_RX_ENABLED  1
#define NETPOLL_RX_DROP     2
L
Linus Torvalds 已提交
49 50 51 52 53

#define MAX_SKB_SIZE \
		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
				sizeof(struct iphdr) + sizeof(struct ethhdr))

54
static void zap_completion_queue(void);
55
static void arp_reply(struct sk_buff *skb);
L
Linus Torvalds 已提交
56

57 58 59
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

60 61 62 63 64 65 66
#define np_info(np, fmt, ...)				\
	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_err(np, fmt, ...)				\
	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_notice(np, fmt, ...)				\
	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)

D
David Howells 已提交
67
static void queue_process(struct work_struct *work)
L
Linus Torvalds 已提交
68
{
69 70
	struct netpoll_info *npinfo =
		container_of(work, struct netpoll_info, tx_work.work);
L
Linus Torvalds 已提交
71
	struct sk_buff *skb;
I
Ingo Molnar 已提交
72
	unsigned long flags;
L
Linus Torvalds 已提交
73

S
Stephen Hemminger 已提交
74 75
	while ((skb = skb_dequeue(&npinfo->txq))) {
		struct net_device *dev = skb->dev;
76
		const struct net_device_ops *ops = dev->netdev_ops;
77
		struct netdev_queue *txq;
L
Linus Torvalds 已提交
78

S
Stephen Hemminger 已提交
79 80 81 82
		if (!netif_device_present(dev) || !netif_running(dev)) {
			__kfree_skb(skb);
			continue;
		}
L
Linus Torvalds 已提交
83

84 85
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

I
Ingo Molnar 已提交
86
		local_irq_save(flags);
87
		__netif_tx_lock(txq, smp_processor_id());
88
		if (netif_xmit_frozen_or_stopped(txq) ||
89
		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
90
			skb_queue_head(&npinfo->txq, skb);
91
			__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
92
			local_irq_restore(flags);
L
Linus Torvalds 已提交
93

94
			schedule_delayed_work(&npinfo->tx_work, HZ/10);
S
Stephen Hemminger 已提交
95 96
			return;
		}
97
		__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
98
		local_irq_restore(flags);
L
Linus Torvalds 已提交
99 100 101
	}
}

102 103
static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
			    unsigned short ulen, __be32 saddr, __be32 daddr)
L
Linus Torvalds 已提交
104
{
105
	__wsum psum;
106

107
	if (uh->check == 0 || skb_csum_unnecessary(skb))
L
Linus Torvalds 已提交
108 109
		return 0;

110 111
	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);

112
	if (skb->ip_summed == CHECKSUM_COMPLETE &&
113
	    !csum_fold(csum_add(psum, skb->csum)))
114
		return 0;
L
Linus Torvalds 已提交
115

116
	skb->csum = psum;
L
Linus Torvalds 已提交
117

118
	return __skb_checksum_complete(skb);
L
Linus Torvalds 已提交
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
}

/*
 * Check whether delayed processing was scheduled for our NIC. If so,
 * we attempt to grab the poll lock and use ->poll() to pump the card.
 * If this fails, either we've recursed in ->poll() or it's already
 * running on another CPU.
 *
 * Note: we don't mask interrupts with this lock because we're using
 * trylock here and interrupts are already disabled in the softirq
 * case. Further, we test the poll_owner to avoid recursion on UP
 * systems where the lock doesn't exist.
 *
 * In cases where there is bi-directional communications, reading only
 * one message at a time can lead to packets being dropped by the
 * network adapter, forcing superfluous retries and possibly timeouts.
 * Thus, we set our budget to greater than 1.
 */
137 138 139 140 141 142 143 144 145 146 147 148
static int poll_one_napi(struct netpoll_info *npinfo,
			 struct napi_struct *napi, int budget)
{
	int work;

	/* net_rx_action's ->poll() invocations and our's are
	 * synchronized by this test which is only made while
	 * holding the napi->poll_lock.
	 */
	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
		return budget;

149
	npinfo->rx_flags |= NETPOLL_RX_DROP;
150
	atomic_inc(&trapped);
151
	set_bit(NAPI_STATE_NPSVC, &napi->state);
152 153

	work = napi->poll(napi, budget);
154
	trace_napi_poll(napi);
155

156
	clear_bit(NAPI_STATE_NPSVC, &napi->state);
157
	atomic_dec(&trapped);
158
	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
159 160 161 162

	return budget - work;
}

163
static void poll_napi(struct net_device *dev)
L
Linus Torvalds 已提交
164
{
165
	struct napi_struct *napi;
L
Linus Torvalds 已提交
166 167
	int budget = 16;

168
	list_for_each_entry(napi, &dev->napi_list, dev_list) {
169
		if (napi->poll_owner != smp_processor_id() &&
170
		    spin_trylock(&napi->poll_lock)) {
171
			budget = poll_one_napi(dev->npinfo, napi, budget);
172
			spin_unlock(&napi->poll_lock);
173 174 175

			if (!budget)
				break;
176
		}
L
Linus Torvalds 已提交
177 178 179
	}
}

180 181
static void service_arp_queue(struct netpoll_info *npi)
{
182 183
	if (npi) {
		struct sk_buff *skb;
184

185 186
		while ((skb = skb_dequeue(&npi->arp_tx)))
			arp_reply(skb);
187 188 189
	}
}

190
static void netpoll_poll_dev(struct net_device *dev)
L
Linus Torvalds 已提交
191
{
192
	const struct net_device_ops *ops;
193

194 195 196 197 198
	if (!dev || !netif_running(dev))
		return;

	ops = dev->netdev_ops;
	if (!ops->ndo_poll_controller)
L
Linus Torvalds 已提交
199 200 201
		return;

	/* Process pending work on NIC */
202
	ops->ndo_poll_controller(dev);
203 204

	poll_napi(dev);
L
Linus Torvalds 已提交
205

206 207 208 209 210 211 212 213 214 215 216
	if (dev->priv_flags & IFF_SLAVE) {
		if (dev->npinfo) {
			struct net_device *bond_dev = dev->master;
			struct sk_buff *skb;
			while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
				skb->dev = bond_dev;
				skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
			}
		}
	}

217
	service_arp_queue(dev->npinfo);
218

219
	zap_completion_queue();
L
Linus Torvalds 已提交
220 221 222 223 224 225 226
}

static void refill_skbs(void)
{
	struct sk_buff *skb;
	unsigned long flags;

227 228
	spin_lock_irqsave(&skb_pool.lock, flags);
	while (skb_pool.qlen < MAX_SKBS) {
L
Linus Torvalds 已提交
229 230 231 232
		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
		if (!skb)
			break;

233
		__skb_queue_tail(&skb_pool, skb);
L
Linus Torvalds 已提交
234
	}
235
	spin_unlock_irqrestore(&skb_pool.lock, flags);
L
Linus Torvalds 已提交
236 237
}

238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
static void zap_completion_queue(void)
{
	unsigned long flags;
	struct softnet_data *sd = &get_cpu_var(softnet_data);

	if (sd->completion_queue) {
		struct sk_buff *clist;

		local_irq_save(flags);
		clist = sd->completion_queue;
		sd->completion_queue = NULL;
		local_irq_restore(flags);

		while (clist != NULL) {
			struct sk_buff *skb = clist;
			clist = clist->next;
			if (skb->destructor) {
				atomic_inc(&skb->users);
				dev_kfree_skb_any(skb); /* put this one back */
			} else {
				__kfree_skb(skb);
			}
		}
	}

	put_cpu_var(softnet_data);
}

266
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
L
Linus Torvalds 已提交
267
{
268 269
	int count = 0;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
270

271
	zap_completion_queue();
272
	refill_skbs();
L
Linus Torvalds 已提交
273 274 275
repeat:

	skb = alloc_skb(len, GFP_ATOMIC);
276 277
	if (!skb)
		skb = skb_dequeue(&skb_pool);
L
Linus Torvalds 已提交
278 279

	if (!skb) {
280
		if (++count < 10) {
281
			netpoll_poll_dev(np->dev);
282
			goto repeat;
L
Linus Torvalds 已提交
283
		}
284
		return NULL;
L
Linus Torvalds 已提交
285 286 287 288 289 290 291
	}

	atomic_set(&skb->users, 1);
	skb_reserve(skb, reserve);
	return skb;
}

292 293 294 295 296 297 298 299 300 301 302
static int netpoll_owner_active(struct net_device *dev)
{
	struct napi_struct *napi;

	list_for_each_entry(napi, &dev->napi_list, dev_list) {
		if (napi->poll_owner == smp_processor_id())
			return 1;
	}
	return 0;
}

303 304
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
			     struct net_device *dev)
L
Linus Torvalds 已提交
305
{
S
Stephen Hemminger 已提交
306 307
	int status = NETDEV_TX_BUSY;
	unsigned long tries;
308
	const struct net_device_ops *ops = dev->netdev_ops;
H
Herbert Xu 已提交
309
	/* It is up to the caller to keep npinfo alive. */
310
	struct netpoll_info *npinfo = np->dev->npinfo;
S
Stephen Hemminger 已提交
311

312 313 314 315
	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
		__kfree_skb(skb);
		return;
	}
S
Stephen Hemminger 已提交
316 317

	/* don't get messages out of order, and no recursion */
318
	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
319
		struct netdev_queue *txq;
320 321
		unsigned long flags;

322 323
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

324
		local_irq_save(flags);
325 326 327
		/* try until next clock tick */
		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
		     tries > 0; --tries) {
328
			if (__netif_tx_trylock(txq)) {
329
				if (!netif_xmit_stopped(txq)) {
330
					status = ops->ndo_start_xmit(skb, dev);
E
Eric Dumazet 已提交
331 332 333
					if (status == NETDEV_TX_OK)
						txq_trans_update(txq);
				}
334
				__netif_tx_unlock(txq);
335 336 337 338 339

				if (status == NETDEV_TX_OK)
					break;

			}
340 341

			/* tickle device maybe there is some cleanup */
342
			netpoll_poll_dev(np->dev);
343 344

			udelay(USEC_PER_POLL);
M
Matt Mackall 已提交
345
		}
346 347 348 349 350

		WARN_ONCE(!irqs_disabled(),
			"netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
			dev->name, ops->ndo_start_xmit);

351
		local_irq_restore(flags);
L
Linus Torvalds 已提交
352 353
	}

S
Stephen Hemminger 已提交
354
	if (status != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
355
		skb_queue_tail(&npinfo->txq, skb);
356
		schedule_delayed_work(&npinfo->tx_work,0);
L
Linus Torvalds 已提交
357 358
	}
}
359
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
L
Linus Torvalds 已提交
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
	int total_len, eth_len, ip_len, udp_len;
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;

	udp_len = len + sizeof(*udph);
	ip_len = eth_len = udp_len + sizeof(*iph);
	total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;

	skb = find_skb(np, total_len, total_len - len);
	if (!skb)
		return;

377
	skb_copy_to_linear_data(skb, msg, len);
L
Linus Torvalds 已提交
378 379
	skb->len += len;

380 381 382
	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
L
Linus Torvalds 已提交
383 384 385 386
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);
	udph->check = 0;
387 388
	udph->check = csum_tcpudp_magic(np->local_ip,
					np->remote_ip,
389
					udp_len, IPPROTO_UDP,
390
					csum_partial(udph, udp_len, 0));
391
	if (udph->check == 0)
392
		udph->check = CSUM_MANGLED_0;
L
Linus Torvalds 已提交
393

394 395
	skb_push(skb, sizeof(*iph));
	skb_reset_network_header(skb);
396
	iph = ip_hdr(skb);
L
Linus Torvalds 已提交
397 398 399 400 401 402 403 404 405 406

	/* iph->version = 4; iph->ihl = 5; */
	put_unaligned(0x45, (unsigned char *)iph);
	iph->tos      = 0;
	put_unaligned(htons(ip_len), &(iph->tot_len));
	iph->id       = 0;
	iph->frag_off = 0;
	iph->ttl      = 64;
	iph->protocol = IPPROTO_UDP;
	iph->check    = 0;
407 408
	put_unaligned(np->local_ip, &(iph->saddr));
	put_unaligned(np->remote_ip, &(iph->daddr));
L
Linus Torvalds 已提交
409 410 411
	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
412
	skb_reset_mac_header(skb);
413
	skb->protocol = eth->h_proto = htons(ETH_P_IP);
414 415
	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
L
Linus Torvalds 已提交
416 417 418 419 420

	skb->dev = np->dev;

	netpoll_send_skb(np, skb);
}
E
Eric Dumazet 已提交
421
EXPORT_SYMBOL(netpoll_send_udp);
L
Linus Torvalds 已提交
422 423 424

static void arp_reply(struct sk_buff *skb)
{
425
	struct netpoll_info *npinfo = skb->dev->npinfo;
L
Linus Torvalds 已提交
426 427 428
	struct arphdr *arp;
	unsigned char *arp_ptr;
	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
A
Al Viro 已提交
429
	__be32 sip, tip;
430
	unsigned char *sha;
L
Linus Torvalds 已提交
431
	struct sk_buff *send_skb;
432 433
	struct netpoll *np, *tmp;
	unsigned long flags;
434
	int hlen, tlen;
435 436 437 438 439 440 441 442 443 444 445 446 447
	int hits = 0;

	if (list_empty(&npinfo->rx_np))
		return;

	/* Before checking the packet, we do some early
	   inspection whether this is interesting at all */
	spin_lock_irqsave(&npinfo->rx_lock, flags);
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (np->dev == skb->dev)
			hits++;
	}
	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
L
Linus Torvalds 已提交
448

449 450
	/* No netpoll struct is using this dev */
	if (!hits)
451
		return;
L
Linus Torvalds 已提交
452 453 454 455 456

	/* No arp on this interface */
	if (skb->dev->flags & IFF_NOARP)
		return;

457
	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
L
Linus Torvalds 已提交
458 459
		return;

460
	skb_reset_network_header(skb);
461
	skb_reset_transport_header(skb);
462
	arp = arp_hdr(skb);
L
Linus Torvalds 已提交
463 464 465 466 467 468 469

	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
	    arp->ar_pro != htons(ETH_P_IP) ||
	    arp->ar_op != htons(ARPOP_REQUEST))
		return;

470 471 472 473
	arp_ptr = (unsigned char *)(arp+1);
	/* save the location of the src hw addr */
	sha = arp_ptr;
	arp_ptr += skb->dev->addr_len;
L
Linus Torvalds 已提交
474
	memcpy(&sip, arp_ptr, 4);
475
	arp_ptr += 4;
476 477
	/* If we actually cared about dst hw addr,
	   it would get copied here */
478
	arp_ptr += skb->dev->addr_len;
L
Linus Torvalds 已提交
479 480 481
	memcpy(&tip, arp_ptr, 4);

	/* Should we ignore arp? */
482
	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
L
Linus Torvalds 已提交
483 484
		return;

485
	size = arp_hdr_len(skb->dev);
L
Linus Torvalds 已提交
486

487 488 489 490
	spin_lock_irqsave(&npinfo->rx_lock, flags);
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (tip != np->local_ip)
			continue;
L
Linus Torvalds 已提交
491

492 493 494
		hlen = LL_RESERVED_SPACE(np->dev);
		tlen = np->dev->needed_tailroom;
		send_skb = find_skb(np, size + hlen + tlen, hlen);
495 496
		if (!send_skb)
			continue;
L
Linus Torvalds 已提交
497

498 499 500 501
		skb_reset_network_header(send_skb);
		arp = (struct arphdr *) skb_put(send_skb, size);
		send_skb->dev = skb->dev;
		send_skb->protocol = htons(ETH_P_ARP);
L
Linus Torvalds 已提交
502

503 504 505 506 507 508 509
		/* Fill the device header for the ARP frame */
		if (dev_hard_header(send_skb, skb->dev, ptype,
				    sha, np->dev->dev_addr,
				    send_skb->len) < 0) {
			kfree_skb(send_skb);
			continue;
		}
L
Linus Torvalds 已提交
510

511 512 513 514 515 516 517
		/*
		 * Fill out the arp protocol part.
		 *
		 * we only support ethernet device type,
		 * which (according to RFC 1390) should
		 * always equal 1 (Ethernet).
		 */
L
Linus Torvalds 已提交
518

519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540
		arp->ar_hrd = htons(np->dev->type);
		arp->ar_pro = htons(ETH_P_IP);
		arp->ar_hln = np->dev->addr_len;
		arp->ar_pln = 4;
		arp->ar_op = htons(type);

		arp_ptr = (unsigned char *)(arp + 1);
		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
		arp_ptr += np->dev->addr_len;
		memcpy(arp_ptr, &tip, 4);
		arp_ptr += 4;
		memcpy(arp_ptr, sha, np->dev->addr_len);
		arp_ptr += np->dev->addr_len;
		memcpy(arp_ptr, &sip, 4);

		netpoll_send_skb(np, send_skb);

		/* If there are several rx_hooks for the same address,
		   we're fine by sending a single reply */
		break;
	}
	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
L
Linus Torvalds 已提交
541 542 543 544 545
}

int __netpoll_rx(struct sk_buff *skb)
{
	int proto, len, ulen;
546
	int hits = 0;
547
	const struct iphdr *iph;
L
Linus Torvalds 已提交
548
	struct udphdr *uh;
549 550
	struct netpoll_info *npinfo = skb->dev->npinfo;
	struct netpoll *np, *tmp;
551

552
	if (list_empty(&npinfo->rx_np))
L
Linus Torvalds 已提交
553
		goto out;
554

L
Linus Torvalds 已提交
555 556 557
	if (skb->dev->type != ARPHRD_ETHER)
		goto out;

558
	/* check if netpoll clients need ARP */
559
	if (skb->protocol == htons(ETH_P_ARP) &&
L
Linus Torvalds 已提交
560
	    atomic_read(&trapped)) {
561
		skb_queue_tail(&npinfo->arp_tx, skb);
L
Linus Torvalds 已提交
562 563 564 565 566 567 568 569 570 571 572 573 574
		return 1;
	}

	proto = ntohs(eth_hdr(skb)->h_proto);
	if (proto != ETH_P_IP)
		goto out;
	if (skb->pkt_type == PACKET_OTHERHOST)
		goto out;
	if (skb_shared(skb))
		goto out;

	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
		goto out;
575
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
576 577 578 579
	if (iph->ihl < 5 || iph->version != 4)
		goto out;
	if (!pskb_may_pull(skb, iph->ihl*4))
		goto out;
580
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
581 582 583 584 585 586 587
	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
		goto out;

	len = ntohs(iph->tot_len);
	if (skb->len < len || len < iph->ihl*4)
		goto out;

588 589 590 591 592 593 594
	/*
	 * Our transport medium may have padded the buffer out.
	 * Now We trim to the true length of the frame.
	 */
	if (pskb_trim_rcsum(skb, len))
		goto out;

595
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
596 597 598 599 600 601 602 603 604
	if (iph->protocol != IPPROTO_UDP)
		goto out;

	len -= iph->ihl*4;
	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
	ulen = ntohs(uh->len);

	if (ulen != len)
		goto out;
605
	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
L
Linus Torvalds 已提交
606 607
		goto out;

608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (np->local_ip && np->local_ip != iph->daddr)
			continue;
		if (np->remote_ip && np->remote_ip != iph->saddr)
			continue;
		if (np->local_port && np->local_port != ntohs(uh->dest))
			continue;

		np->rx_hook(np, ntohs(uh->source),
			       (char *)(uh+1),
			       ulen - sizeof(struct udphdr));
		hits++;
	}

	if (!hits)
		goto out;
L
Linus Torvalds 已提交
624 625 626 627 628 629 630 631 632 633 634 635 636

	kfree_skb(skb);
	return 1;

out:
	if (atomic_read(&trapped)) {
		kfree_skb(skb);
		return 1;
	}

	return 0;
}

637 638
void netpoll_print_options(struct netpoll *np)
{
639 640 641 642 643 644
	np_info(np, "local port %d\n", np->local_port);
	np_info(np, "local IP %pI4\n", &np->local_ip);
	np_info(np, "interface '%s'\n", np->dev_name);
	np_info(np, "remote port %d\n", np->remote_port);
	np_info(np, "remote IP %pI4\n", &np->remote_ip);
	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
645
}
E
Eric Dumazet 已提交
646
EXPORT_SYMBOL(netpoll_print_options);
647

L
Linus Torvalds 已提交
648 649 650 651
int netpoll_parse_options(struct netpoll *np, char *opt)
{
	char *cur=opt, *delim;

652
	if (*cur != '@') {
L
Linus Torvalds 已提交
653 654
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
655 656 657
		*delim = 0;
		np->local_port = simple_strtol(cur, NULL, 10);
		cur = delim;
L
Linus Torvalds 已提交
658 659 660
	}
	cur++;

661
	if (*cur != '/') {
L
Linus Torvalds 已提交
662 663
		if ((delim = strchr(cur, '/')) == NULL)
			goto parse_failed;
664
		*delim = 0;
665
		np->local_ip = in_aton(cur);
666
		cur = delim;
L
Linus Torvalds 已提交
667 668 669
	}
	cur++;

670
	if (*cur != ',') {
L
Linus Torvalds 已提交
671 672 673
		/* parse out dev name */
		if ((delim = strchr(cur, ',')) == NULL)
			goto parse_failed;
674
		*delim = 0;
L
Linus Torvalds 已提交
675
		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
676
		cur = delim;
L
Linus Torvalds 已提交
677 678 679
	}
	cur++;

680
	if (*cur != '@') {
L
Linus Torvalds 已提交
681 682 683
		/* dst port */
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
684
		*delim = 0;
685
		if (*cur == ' ' || *cur == '\t')
686
			np_info(np, "warning: whitespace is not allowed\n");
687 688
		np->remote_port = simple_strtol(cur, NULL, 10);
		cur = delim;
L
Linus Torvalds 已提交
689 690 691 692 693 694
	}
	cur++;

	/* dst ip */
	if ((delim = strchr(cur, '/')) == NULL)
		goto parse_failed;
695
	*delim = 0;
696
	np->remote_ip = in_aton(cur);
697
	cur = delim + 1;
L
Linus Torvalds 已提交
698

699
	if (*cur != 0) {
L
Linus Torvalds 已提交
700
		/* MAC address */
701
		if (!mac_pton(cur, np->remote_mac))
L
Linus Torvalds 已提交
702 703 704
			goto parse_failed;
	}

705
	netpoll_print_options(np);
L
Linus Torvalds 已提交
706 707 708 709

	return 0;

 parse_failed:
710
	np_info(np, "couldn't parse config at '%s'!\n", cur);
L
Linus Torvalds 已提交
711 712
	return -1;
}
E
Eric Dumazet 已提交
713
EXPORT_SYMBOL(netpoll_parse_options);
L
Linus Torvalds 已提交
714

715
int __netpoll_setup(struct netpoll *np)
L
Linus Torvalds 已提交
716
{
717
	struct net_device *ndev = np->dev;
718
	struct netpoll_info *npinfo;
H
Herbert Xu 已提交
719
	const struct net_device_ops *ops;
720
	unsigned long flags;
S
Stephen Hemminger 已提交
721
	int err;
L
Linus Torvalds 已提交
722

723 724
	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
	    !ndev->netdev_ops->ndo_poll_controller) {
725 726
		np_err(np, "%s doesn't support polling, aborting\n",
		       np->dev_name);
727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768
		err = -ENOTSUPP;
		goto out;
	}

	if (!ndev->npinfo) {
		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
		if (!npinfo) {
			err = -ENOMEM;
			goto out;
		}

		npinfo->rx_flags = 0;
		INIT_LIST_HEAD(&npinfo->rx_np);

		spin_lock_init(&npinfo->rx_lock);
		skb_queue_head_init(&npinfo->arp_tx);
		skb_queue_head_init(&npinfo->txq);
		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

		atomic_set(&npinfo->refcnt, 1);

		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_setup) {
			err = ops->ndo_netpoll_setup(ndev, npinfo);
			if (err)
				goto free_npinfo;
		}
	} else {
		npinfo = ndev->npinfo;
		atomic_inc(&npinfo->refcnt);
	}

	npinfo->netpoll = np;

	if (np->rx_hook) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
		list_add_tail(&np->rx, &npinfo->rx_np);
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}

	/* last thing to do is link it to the net device structure */
769
	rcu_assign_pointer(ndev->npinfo, npinfo);
770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785

	return 0;

free_npinfo:
	kfree(npinfo);
out:
	return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
	struct net_device *ndev = NULL;
	struct in_device *in_dev;
	int err;

L
Linus Torvalds 已提交
786
	if (np->dev_name)
787
		ndev = dev_get_by_name(&init_net, np->dev_name);
L
Linus Torvalds 已提交
788
	if (!ndev) {
789
		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
S
Stephen Hemminger 已提交
790
		return -ENODEV;
L
Linus Torvalds 已提交
791 792
	}

793
	if (ndev->master) {
794
		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
795 796
		err = -EBUSY;
		goto put;
797 798
	}

L
Linus Torvalds 已提交
799 800 801
	if (!netif_running(ndev)) {
		unsigned long atmost, atleast;

802
		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
L
Linus Torvalds 已提交
803

804
		rtnl_lock();
S
Stephen Hemminger 已提交
805 806 807 808
		err = dev_open(ndev);
		rtnl_unlock();

		if (err) {
809
			np_err(np, "failed to open %s\n", ndev->name);
810
			goto put;
L
Linus Torvalds 已提交
811 812 813
		}

		atleast = jiffies + HZ/10;
814
		atmost = jiffies + carrier_timeout * HZ;
L
Linus Torvalds 已提交
815 816
		while (!netif_carrier_ok(ndev)) {
			if (time_after(jiffies, atmost)) {
817
				np_notice(np, "timeout waiting for carrier\n");
L
Linus Torvalds 已提交
818 819
				break;
			}
820
			msleep(1);
L
Linus Torvalds 已提交
821 822 823 824 825 826 827 828
		}

		/* If carrier appears to come up instantly, we don't
		 * trust it and pause so that we don't pump all our
		 * queued console messages into the bitbucket.
		 */

		if (time_before(jiffies, atleast)) {
829
			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
L
Linus Torvalds 已提交
830 831 832 833 834 835
			msleep(4000);
		}
	}

	if (!np->local_ip) {
		rcu_read_lock();
836
		in_dev = __in_dev_get_rcu(ndev);
L
Linus Torvalds 已提交
837 838 839

		if (!in_dev || !in_dev->ifa_list) {
			rcu_read_unlock();
840 841
			np_err(np, "no IP address for %s, aborting\n",
			       np->dev_name);
S
Stephen Hemminger 已提交
842
			err = -EDESTADDRREQ;
843
			goto put;
L
Linus Torvalds 已提交
844 845
		}

846
		np->local_ip = in_dev->ifa_list->ifa_local;
L
Linus Torvalds 已提交
847
		rcu_read_unlock();
848
		np_info(np, "local IP %pI4\n", &np->local_ip);
L
Linus Torvalds 已提交
849 850
	}

851 852 853 854 855 856
	np->dev = ndev;

	/* fill up the skb queue */
	refill_skbs();

	rtnl_lock();
857
	err = __netpoll_setup(np);
858
	rtnl_unlock();
859

860 861 862
	if (err)
		goto put;

L
Linus Torvalds 已提交
863 864
	return 0;

865
put:
L
Linus Torvalds 已提交
866
	dev_put(ndev);
S
Stephen Hemminger 已提交
867
	return err;
L
Linus Torvalds 已提交
868
}
E
Eric Dumazet 已提交
869
EXPORT_SYMBOL(netpoll_setup);
L
Linus Torvalds 已提交
870

871 872
static int __init netpoll_init(void)
{
873 874 875 876 877
	skb_queue_head_init(&skb_pool);
	return 0;
}
core_initcall(netpoll_init);

878
void __netpoll_cleanup(struct netpoll *np)
L
Linus Torvalds 已提交
879
{
880 881 882
	struct netpoll_info *npinfo;
	unsigned long flags;

883 884
	npinfo = np->dev->npinfo;
	if (!npinfo)
885
		return;
S
Stephen Hemminger 已提交
886

887 888 889 890 891 892 893
	if (!list_empty(&npinfo->rx_np)) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_del(&np->rx);
		if (list_empty(&npinfo->rx_np))
			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}
H
Herbert Xu 已提交
894

895 896
	if (atomic_dec_and_test(&npinfo->refcnt)) {
		const struct net_device_ops *ops;
H
Herbert Xu 已提交
897

898 899 900
		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_cleanup)
			ops->ndo_netpoll_cleanup(np->dev);
H
Herbert Xu 已提交
901

902
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
H
Herbert Xu 已提交
903

904 905
		/* avoid racing with NAPI reading npinfo */
		synchronize_rcu_bh();
S
Stephen Hemminger 已提交
906

907 908
		skb_queue_purge(&npinfo->arp_tx);
		skb_queue_purge(&npinfo->txq);
909
		cancel_delayed_work_sync(&npinfo->tx_work);
S
Stephen Hemminger 已提交
910

911 912 913
		/* clean after last, unfinished work */
		__skb_queue_purge(&npinfo->txq);
		kfree(npinfo);
914
	}
915 916
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
917

918 919 920 921
void netpoll_cleanup(struct netpoll *np)
{
	if (!np->dev)
		return;
922

923 924 925 926 927
	rtnl_lock();
	__netpoll_cleanup(np);
	rtnl_unlock();

	dev_put(np->dev);
L
Linus Torvalds 已提交
928 929
	np->dev = NULL;
}
E
Eric Dumazet 已提交
930
EXPORT_SYMBOL(netpoll_cleanup);
L
Linus Torvalds 已提交
931 932 933 934 935

int netpoll_trap(void)
{
	return atomic_read(&trapped);
}
E
Eric Dumazet 已提交
936
EXPORT_SYMBOL(netpoll_trap);
L
Linus Torvalds 已提交
937 938 939 940 941 942 943 944 945

void netpoll_set_trap(int trap)
{
	if (trap)
		atomic_inc(&trapped);
	else
		atomic_dec(&trapped);
}
EXPORT_SYMBOL(netpoll_set_trap);