netpoll.c 21.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

12
#include <linux/moduleparam.h>
L
Linus Torvalds 已提交
13 14 15
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
16
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
25
#include <linux/slab.h>
26
#include <linux/export.h>
L
Linus Torvalds 已提交
27 28 29
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
30
#include <trace/events/napi.h>
L
Linus Torvalds 已提交
31 32 33 34 35 36 37 38 39

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

40
static struct sk_buff_head skb_pool;
L
Linus Torvalds 已提交
41 42 43

static atomic_t trapped;

S
Stephen Hemminger 已提交
44
#define USEC_PER_POLL	50
45 46
#define NETPOLL_RX_ENABLED  1
#define NETPOLL_RX_DROP     2
L
Linus Torvalds 已提交
47 48 49 50 51

#define MAX_SKB_SIZE \
		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
				sizeof(struct iphdr) + sizeof(struct ethhdr))

52
static void zap_completion_queue(void);
53
static void arp_reply(struct sk_buff *skb);
L
Linus Torvalds 已提交
54

55 56 57
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

D
David Howells 已提交
58
static void queue_process(struct work_struct *work)
L
Linus Torvalds 已提交
59
{
60 61
	struct netpoll_info *npinfo =
		container_of(work, struct netpoll_info, tx_work.work);
L
Linus Torvalds 已提交
62
	struct sk_buff *skb;
I
Ingo Molnar 已提交
63
	unsigned long flags;
L
Linus Torvalds 已提交
64

S
Stephen Hemminger 已提交
65 66
	while ((skb = skb_dequeue(&npinfo->txq))) {
		struct net_device *dev = skb->dev;
67
		const struct net_device_ops *ops = dev->netdev_ops;
68
		struct netdev_queue *txq;
L
Linus Torvalds 已提交
69

S
Stephen Hemminger 已提交
70 71 72 73
		if (!netif_device_present(dev) || !netif_running(dev)) {
			__kfree_skb(skb);
			continue;
		}
L
Linus Torvalds 已提交
74

75 76
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

I
Ingo Molnar 已提交
77
		local_irq_save(flags);
78
		__netif_tx_lock(txq, smp_processor_id());
79
		if (netif_tx_queue_frozen_or_stopped(txq) ||
80
		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
81
			skb_queue_head(&npinfo->txq, skb);
82
			__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
83
			local_irq_restore(flags);
L
Linus Torvalds 已提交
84

85
			schedule_delayed_work(&npinfo->tx_work, HZ/10);
S
Stephen Hemminger 已提交
86 87
			return;
		}
88
		__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
89
		local_irq_restore(flags);
L
Linus Torvalds 已提交
90 91 92
	}
}

93 94
static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
			    unsigned short ulen, __be32 saddr, __be32 daddr)
L
Linus Torvalds 已提交
95
{
96
	__wsum psum;
97

98
	if (uh->check == 0 || skb_csum_unnecessary(skb))
L
Linus Torvalds 已提交
99 100
		return 0;

101 102
	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);

103
	if (skb->ip_summed == CHECKSUM_COMPLETE &&
104
	    !csum_fold(csum_add(psum, skb->csum)))
105
		return 0;
L
Linus Torvalds 已提交
106

107
	skb->csum = psum;
L
Linus Torvalds 已提交
108

109
	return __skb_checksum_complete(skb);
L
Linus Torvalds 已提交
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
}

/*
 * Check whether delayed processing was scheduled for our NIC. If so,
 * we attempt to grab the poll lock and use ->poll() to pump the card.
 * If this fails, either we've recursed in ->poll() or it's already
 * running on another CPU.
 *
 * Note: we don't mask interrupts with this lock because we're using
 * trylock here and interrupts are already disabled in the softirq
 * case. Further, we test the poll_owner to avoid recursion on UP
 * systems where the lock doesn't exist.
 *
 * In cases where there is bi-directional communications, reading only
 * one message at a time can lead to packets being dropped by the
 * network adapter, forcing superfluous retries and possibly timeouts.
 * Thus, we set our budget to greater than 1.
 */
128 129 130 131 132 133 134 135 136 137 138 139
static int poll_one_napi(struct netpoll_info *npinfo,
			 struct napi_struct *napi, int budget)
{
	int work;

	/* net_rx_action's ->poll() invocations and our's are
	 * synchronized by this test which is only made while
	 * holding the napi->poll_lock.
	 */
	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
		return budget;

140
	npinfo->rx_flags |= NETPOLL_RX_DROP;
141
	atomic_inc(&trapped);
142
	set_bit(NAPI_STATE_NPSVC, &napi->state);
143 144

	work = napi->poll(napi, budget);
145
	trace_napi_poll(napi);
146

147
	clear_bit(NAPI_STATE_NPSVC, &napi->state);
148
	atomic_dec(&trapped);
149
	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
150 151 152 153

	return budget - work;
}

154
static void poll_napi(struct net_device *dev)
L
Linus Torvalds 已提交
155
{
156
	struct napi_struct *napi;
L
Linus Torvalds 已提交
157 158
	int budget = 16;

159
	list_for_each_entry(napi, &dev->napi_list, dev_list) {
160
		if (napi->poll_owner != smp_processor_id() &&
161
		    spin_trylock(&napi->poll_lock)) {
162
			budget = poll_one_napi(dev->npinfo, napi, budget);
163
			spin_unlock(&napi->poll_lock);
164 165 166

			if (!budget)
				break;
167
		}
L
Linus Torvalds 已提交
168 169 170
	}
}

171 172
static void service_arp_queue(struct netpoll_info *npi)
{
173 174
	if (npi) {
		struct sk_buff *skb;
175

176 177
		while ((skb = skb_dequeue(&npi->arp_tx)))
			arp_reply(skb);
178 179 180
	}
}

181
static void netpoll_poll_dev(struct net_device *dev)
L
Linus Torvalds 已提交
182
{
183
	const struct net_device_ops *ops;
184

185 186 187 188 189
	if (!dev || !netif_running(dev))
		return;

	ops = dev->netdev_ops;
	if (!ops->ndo_poll_controller)
L
Linus Torvalds 已提交
190 191 192
		return;

	/* Process pending work on NIC */
193
	ops->ndo_poll_controller(dev);
194 195

	poll_napi(dev);
L
Linus Torvalds 已提交
196

197 198 199 200 201 202 203 204 205 206 207
	if (dev->priv_flags & IFF_SLAVE) {
		if (dev->npinfo) {
			struct net_device *bond_dev = dev->master;
			struct sk_buff *skb;
			while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
				skb->dev = bond_dev;
				skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
			}
		}
	}

208
	service_arp_queue(dev->npinfo);
209

210
	zap_completion_queue();
L
Linus Torvalds 已提交
211 212 213 214 215 216 217
}

static void refill_skbs(void)
{
	struct sk_buff *skb;
	unsigned long flags;

218 219
	spin_lock_irqsave(&skb_pool.lock, flags);
	while (skb_pool.qlen < MAX_SKBS) {
L
Linus Torvalds 已提交
220 221 222 223
		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
		if (!skb)
			break;

224
		__skb_queue_tail(&skb_pool, skb);
L
Linus Torvalds 已提交
225
	}
226
	spin_unlock_irqrestore(&skb_pool.lock, flags);
L
Linus Torvalds 已提交
227 228
}

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
static void zap_completion_queue(void)
{
	unsigned long flags;
	struct softnet_data *sd = &get_cpu_var(softnet_data);

	if (sd->completion_queue) {
		struct sk_buff *clist;

		local_irq_save(flags);
		clist = sd->completion_queue;
		sd->completion_queue = NULL;
		local_irq_restore(flags);

		while (clist != NULL) {
			struct sk_buff *skb = clist;
			clist = clist->next;
			if (skb->destructor) {
				atomic_inc(&skb->users);
				dev_kfree_skb_any(skb); /* put this one back */
			} else {
				__kfree_skb(skb);
			}
		}
	}

	put_cpu_var(softnet_data);
}

257
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
L
Linus Torvalds 已提交
258
{
259 260
	int count = 0;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
261

262
	zap_completion_queue();
263
	refill_skbs();
L
Linus Torvalds 已提交
264 265 266
repeat:

	skb = alloc_skb(len, GFP_ATOMIC);
267 268
	if (!skb)
		skb = skb_dequeue(&skb_pool);
L
Linus Torvalds 已提交
269 270

	if (!skb) {
271
		if (++count < 10) {
272
			netpoll_poll_dev(np->dev);
273
			goto repeat;
L
Linus Torvalds 已提交
274
		}
275
		return NULL;
L
Linus Torvalds 已提交
276 277 278 279 280 281 282
	}

	atomic_set(&skb->users, 1);
	skb_reserve(skb, reserve);
	return skb;
}

283 284 285 286 287 288 289 290 291 292 293
static int netpoll_owner_active(struct net_device *dev)
{
	struct napi_struct *napi;

	list_for_each_entry(napi, &dev->napi_list, dev_list) {
		if (napi->poll_owner == smp_processor_id())
			return 1;
	}
	return 0;
}

294 295
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
			     struct net_device *dev)
L
Linus Torvalds 已提交
296
{
S
Stephen Hemminger 已提交
297 298
	int status = NETDEV_TX_BUSY;
	unsigned long tries;
299
	const struct net_device_ops *ops = dev->netdev_ops;
H
Herbert Xu 已提交
300
	/* It is up to the caller to keep npinfo alive. */
301
	struct netpoll_info *npinfo = np->dev->npinfo;
S
Stephen Hemminger 已提交
302

303 304 305 306
	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
		__kfree_skb(skb);
		return;
	}
S
Stephen Hemminger 已提交
307 308

	/* don't get messages out of order, and no recursion */
309
	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
310
		struct netdev_queue *txq;
311 312
		unsigned long flags;

313 314
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

315
		local_irq_save(flags);
316 317 318
		/* try until next clock tick */
		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
		     tries > 0; --tries) {
319
			if (__netif_tx_trylock(txq)) {
E
Eric Dumazet 已提交
320
				if (!netif_tx_queue_stopped(txq)) {
321
					status = ops->ndo_start_xmit(skb, dev);
E
Eric Dumazet 已提交
322 323 324
					if (status == NETDEV_TX_OK)
						txq_trans_update(txq);
				}
325
				__netif_tx_unlock(txq);
326 327 328 329 330

				if (status == NETDEV_TX_OK)
					break;

			}
331 332

			/* tickle device maybe there is some cleanup */
333
			netpoll_poll_dev(np->dev);
334 335

			udelay(USEC_PER_POLL);
M
Matt Mackall 已提交
336
		}
337 338 339 340 341

		WARN_ONCE(!irqs_disabled(),
			"netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
			dev->name, ops->ndo_start_xmit);

342
		local_irq_restore(flags);
L
Linus Torvalds 已提交
343 344
	}

S
Stephen Hemminger 已提交
345
	if (status != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
346
		skb_queue_tail(&npinfo->txq, skb);
347
		schedule_delayed_work(&npinfo->tx_work,0);
L
Linus Torvalds 已提交
348 349
	}
}
350
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
L
Linus Torvalds 已提交
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
	int total_len, eth_len, ip_len, udp_len;
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;

	udp_len = len + sizeof(*udph);
	ip_len = eth_len = udp_len + sizeof(*iph);
	total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;

	skb = find_skb(np, total_len, total_len - len);
	if (!skb)
		return;

368
	skb_copy_to_linear_data(skb, msg, len);
L
Linus Torvalds 已提交
369 370
	skb->len += len;

371 372 373
	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
L
Linus Torvalds 已提交
374 375 376 377
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);
	udph->check = 0;
378 379
	udph->check = csum_tcpudp_magic(np->local_ip,
					np->remote_ip,
380
					udp_len, IPPROTO_UDP,
381
					csum_partial(udph, udp_len, 0));
382
	if (udph->check == 0)
383
		udph->check = CSUM_MANGLED_0;
L
Linus Torvalds 已提交
384

385 386
	skb_push(skb, sizeof(*iph));
	skb_reset_network_header(skb);
387
	iph = ip_hdr(skb);
L
Linus Torvalds 已提交
388 389 390 391 392 393 394 395 396 397

	/* iph->version = 4; iph->ihl = 5; */
	put_unaligned(0x45, (unsigned char *)iph);
	iph->tos      = 0;
	put_unaligned(htons(ip_len), &(iph->tot_len));
	iph->id       = 0;
	iph->frag_off = 0;
	iph->ttl      = 64;
	iph->protocol = IPPROTO_UDP;
	iph->check    = 0;
398 399
	put_unaligned(np->local_ip, &(iph->saddr));
	put_unaligned(np->remote_ip, &(iph->daddr));
L
Linus Torvalds 已提交
400 401 402
	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
403
	skb_reset_mac_header(skb);
404
	skb->protocol = eth->h_proto = htons(ETH_P_IP);
405 406
	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
L
Linus Torvalds 已提交
407 408 409 410 411

	skb->dev = np->dev;

	netpoll_send_skb(np, skb);
}
E
Eric Dumazet 已提交
412
EXPORT_SYMBOL(netpoll_send_udp);
L
Linus Torvalds 已提交
413 414 415

static void arp_reply(struct sk_buff *skb)
{
416
	struct netpoll_info *npinfo = skb->dev->npinfo;
L
Linus Torvalds 已提交
417 418 419
	struct arphdr *arp;
	unsigned char *arp_ptr;
	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
A
Al Viro 已提交
420
	__be32 sip, tip;
421
	unsigned char *sha;
L
Linus Torvalds 已提交
422
	struct sk_buff *send_skb;
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
	struct netpoll *np, *tmp;
	unsigned long flags;
	int hits = 0;

	if (list_empty(&npinfo->rx_np))
		return;

	/* Before checking the packet, we do some early
	   inspection whether this is interesting at all */
	spin_lock_irqsave(&npinfo->rx_lock, flags);
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (np->dev == skb->dev)
			hits++;
	}
	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
L
Linus Torvalds 已提交
438

439 440
	/* No netpoll struct is using this dev */
	if (!hits)
441
		return;
L
Linus Torvalds 已提交
442 443 444 445 446

	/* No arp on this interface */
	if (skb->dev->flags & IFF_NOARP)
		return;

447
	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
L
Linus Torvalds 已提交
448 449
		return;

450
	skb_reset_network_header(skb);
451
	skb_reset_transport_header(skb);
452
	arp = arp_hdr(skb);
L
Linus Torvalds 已提交
453 454 455 456 457 458 459

	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
	    arp->ar_pro != htons(ETH_P_IP) ||
	    arp->ar_op != htons(ARPOP_REQUEST))
		return;

460 461 462 463
	arp_ptr = (unsigned char *)(arp+1);
	/* save the location of the src hw addr */
	sha = arp_ptr;
	arp_ptr += skb->dev->addr_len;
L
Linus Torvalds 已提交
464
	memcpy(&sip, arp_ptr, 4);
465
	arp_ptr += 4;
466 467
	/* If we actually cared about dst hw addr,
	   it would get copied here */
468
	arp_ptr += skb->dev->addr_len;
L
Linus Torvalds 已提交
469 470 471
	memcpy(&tip, arp_ptr, 4);

	/* Should we ignore arp? */
472
	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
L
Linus Torvalds 已提交
473 474
		return;

475
	size = arp_hdr_len(skb->dev);
L
Linus Torvalds 已提交
476

477 478 479 480
	spin_lock_irqsave(&npinfo->rx_lock, flags);
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (tip != np->local_ip)
			continue;
L
Linus Torvalds 已提交
481

482 483 484 485
		send_skb = find_skb(np, size + LL_ALLOCATED_SPACE(np->dev),
				    LL_RESERVED_SPACE(np->dev));
		if (!send_skb)
			continue;
L
Linus Torvalds 已提交
486

487 488 489 490
		skb_reset_network_header(send_skb);
		arp = (struct arphdr *) skb_put(send_skb, size);
		send_skb->dev = skb->dev;
		send_skb->protocol = htons(ETH_P_ARP);
L
Linus Torvalds 已提交
491

492 493 494 495 496 497 498
		/* Fill the device header for the ARP frame */
		if (dev_hard_header(send_skb, skb->dev, ptype,
				    sha, np->dev->dev_addr,
				    send_skb->len) < 0) {
			kfree_skb(send_skb);
			continue;
		}
L
Linus Torvalds 已提交
499

500 501 502 503 504 505 506
		/*
		 * Fill out the arp protocol part.
		 *
		 * we only support ethernet device type,
		 * which (according to RFC 1390) should
		 * always equal 1 (Ethernet).
		 */
L
Linus Torvalds 已提交
507

508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529
		arp->ar_hrd = htons(np->dev->type);
		arp->ar_pro = htons(ETH_P_IP);
		arp->ar_hln = np->dev->addr_len;
		arp->ar_pln = 4;
		arp->ar_op = htons(type);

		arp_ptr = (unsigned char *)(arp + 1);
		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
		arp_ptr += np->dev->addr_len;
		memcpy(arp_ptr, &tip, 4);
		arp_ptr += 4;
		memcpy(arp_ptr, sha, np->dev->addr_len);
		arp_ptr += np->dev->addr_len;
		memcpy(arp_ptr, &sip, 4);

		netpoll_send_skb(np, send_skb);

		/* If there are several rx_hooks for the same address,
		   we're fine by sending a single reply */
		break;
	}
	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
L
Linus Torvalds 已提交
530 531 532 533 534
}

int __netpoll_rx(struct sk_buff *skb)
{
	int proto, len, ulen;
535
	int hits = 0;
536
	const struct iphdr *iph;
L
Linus Torvalds 已提交
537
	struct udphdr *uh;
538 539
	struct netpoll_info *npinfo = skb->dev->npinfo;
	struct netpoll *np, *tmp;
540

541
	if (list_empty(&npinfo->rx_np))
L
Linus Torvalds 已提交
542
		goto out;
543

L
Linus Torvalds 已提交
544 545 546
	if (skb->dev->type != ARPHRD_ETHER)
		goto out;

547
	/* check if netpoll clients need ARP */
548
	if (skb->protocol == htons(ETH_P_ARP) &&
L
Linus Torvalds 已提交
549
	    atomic_read(&trapped)) {
550
		skb_queue_tail(&npinfo->arp_tx, skb);
L
Linus Torvalds 已提交
551 552 553 554 555 556 557 558 559 560 561 562 563
		return 1;
	}

	proto = ntohs(eth_hdr(skb)->h_proto);
	if (proto != ETH_P_IP)
		goto out;
	if (skb->pkt_type == PACKET_OTHERHOST)
		goto out;
	if (skb_shared(skb))
		goto out;

	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
		goto out;
564
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
565 566 567 568
	if (iph->ihl < 5 || iph->version != 4)
		goto out;
	if (!pskb_may_pull(skb, iph->ihl*4))
		goto out;
569
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
570 571 572 573 574 575 576
	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
		goto out;

	len = ntohs(iph->tot_len);
	if (skb->len < len || len < iph->ihl*4)
		goto out;

577 578 579 580 581 582 583
	/*
	 * Our transport medium may have padded the buffer out.
	 * Now We trim to the true length of the frame.
	 */
	if (pskb_trim_rcsum(skb, len))
		goto out;

584
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
585 586 587 588 589 590 591 592 593
	if (iph->protocol != IPPROTO_UDP)
		goto out;

	len -= iph->ihl*4;
	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
	ulen = ntohs(uh->len);

	if (ulen != len)
		goto out;
594
	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
L
Linus Torvalds 已提交
595 596
		goto out;

597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (np->local_ip && np->local_ip != iph->daddr)
			continue;
		if (np->remote_ip && np->remote_ip != iph->saddr)
			continue;
		if (np->local_port && np->local_port != ntohs(uh->dest))
			continue;

		np->rx_hook(np, ntohs(uh->source),
			       (char *)(uh+1),
			       ulen - sizeof(struct udphdr));
		hits++;
	}

	if (!hits)
		goto out;
L
Linus Torvalds 已提交
613 614 615 616 617 618 619 620 621 622 623 624 625

	kfree_skb(skb);
	return 1;

out:
	if (atomic_read(&trapped)) {
		kfree_skb(skb);
		return 1;
	}

	return 0;
}

626 627 628 629
void netpoll_print_options(struct netpoll *np)
{
	printk(KERN_INFO "%s: local port %d\n",
			 np->name, np->local_port);
630 631
	printk(KERN_INFO "%s: local IP %pI4\n",
			 np->name, &np->local_ip);
632
	printk(KERN_INFO "%s: interface '%s'\n",
633 634 635
			 np->name, np->dev_name);
	printk(KERN_INFO "%s: remote port %d\n",
			 np->name, np->remote_port);
636 637
	printk(KERN_INFO "%s: remote IP %pI4\n",
			 np->name, &np->remote_ip);
J
Johannes Berg 已提交
638 639
	printk(KERN_INFO "%s: remote ethernet address %pM\n",
	                 np->name, np->remote_mac);
640
}
E
Eric Dumazet 已提交
641
EXPORT_SYMBOL(netpoll_print_options);
642

L
Linus Torvalds 已提交
643 644 645 646
int netpoll_parse_options(struct netpoll *np, char *opt)
{
	char *cur=opt, *delim;

647
	if (*cur != '@') {
L
Linus Torvalds 已提交
648 649
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
650 651 652
		*delim = 0;
		np->local_port = simple_strtol(cur, NULL, 10);
		cur = delim;
L
Linus Torvalds 已提交
653 654 655
	}
	cur++;

656
	if (*cur != '/') {
L
Linus Torvalds 已提交
657 658
		if ((delim = strchr(cur, '/')) == NULL)
			goto parse_failed;
659
		*delim = 0;
660
		np->local_ip = in_aton(cur);
661
		cur = delim;
L
Linus Torvalds 已提交
662 663 664
	}
	cur++;

665
	if (*cur != ',') {
L
Linus Torvalds 已提交
666 667 668
		/* parse out dev name */
		if ((delim = strchr(cur, ',')) == NULL)
			goto parse_failed;
669
		*delim = 0;
L
Linus Torvalds 已提交
670
		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
671
		cur = delim;
L
Linus Torvalds 已提交
672 673 674
	}
	cur++;

675
	if (*cur != '@') {
L
Linus Torvalds 已提交
676 677 678
		/* dst port */
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
679
		*delim = 0;
680 681 682
		if (*cur == ' ' || *cur == '\t')
			printk(KERN_INFO "%s: warning: whitespace"
					"is not allowed\n", np->name);
683 684
		np->remote_port = simple_strtol(cur, NULL, 10);
		cur = delim;
L
Linus Torvalds 已提交
685 686 687 688 689 690
	}
	cur++;

	/* dst ip */
	if ((delim = strchr(cur, '/')) == NULL)
		goto parse_failed;
691
	*delim = 0;
692
	np->remote_ip = in_aton(cur);
693
	cur = delim + 1;
L
Linus Torvalds 已提交
694

695
	if (*cur != 0) {
L
Linus Torvalds 已提交
696
		/* MAC address */
697
		if (!mac_pton(cur, np->remote_mac))
L
Linus Torvalds 已提交
698 699 700
			goto parse_failed;
	}

701
	netpoll_print_options(np);
L
Linus Torvalds 已提交
702 703 704 705

	return 0;

 parse_failed:
706
	printk(KERN_INFO "%s: couldn't parse config at '%s'!\n",
L
Linus Torvalds 已提交
707 708 709
	       np->name, cur);
	return -1;
}
E
Eric Dumazet 已提交
710
EXPORT_SYMBOL(netpoll_parse_options);
L
Linus Torvalds 已提交
711

712
int __netpoll_setup(struct netpoll *np)
L
Linus Torvalds 已提交
713
{
714
	struct net_device *ndev = np->dev;
715
	struct netpoll_info *npinfo;
H
Herbert Xu 已提交
716
	const struct net_device_ops *ops;
717
	unsigned long flags;
S
Stephen Hemminger 已提交
718
	int err;
L
Linus Torvalds 已提交
719

720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765
	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
	    !ndev->netdev_ops->ndo_poll_controller) {
		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
		       np->name, np->dev_name);
		err = -ENOTSUPP;
		goto out;
	}

	if (!ndev->npinfo) {
		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
		if (!npinfo) {
			err = -ENOMEM;
			goto out;
		}

		npinfo->rx_flags = 0;
		INIT_LIST_HEAD(&npinfo->rx_np);

		spin_lock_init(&npinfo->rx_lock);
		skb_queue_head_init(&npinfo->arp_tx);
		skb_queue_head_init(&npinfo->txq);
		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

		atomic_set(&npinfo->refcnt, 1);

		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_setup) {
			err = ops->ndo_netpoll_setup(ndev, npinfo);
			if (err)
				goto free_npinfo;
		}
	} else {
		npinfo = ndev->npinfo;
		atomic_inc(&npinfo->refcnt);
	}

	npinfo->netpoll = np;

	if (np->rx_hook) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
		list_add_tail(&np->rx, &npinfo->rx_np);
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}

	/* last thing to do is link it to the net device structure */
766
	RCU_INIT_POINTER(ndev->npinfo, npinfo);
767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782

	return 0;

free_npinfo:
	kfree(npinfo);
out:
	return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
	struct net_device *ndev = NULL;
	struct in_device *in_dev;
	int err;

L
Linus Torvalds 已提交
783
	if (np->dev_name)
784
		ndev = dev_get_by_name(&init_net, np->dev_name);
L
Linus Torvalds 已提交
785 786 787
	if (!ndev) {
		printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
		       np->name, np->dev_name);
S
Stephen Hemminger 已提交
788
		return -ENODEV;
L
Linus Torvalds 已提交
789 790
	}

791 792 793
	if (ndev->master) {
		printk(KERN_ERR "%s: %s is a slave device, aborting.\n",
		       np->name, np->dev_name);
794 795
		err = -EBUSY;
		goto put;
796 797
	}

L
Linus Torvalds 已提交
798 799 800 801 802 803
	if (!netif_running(ndev)) {
		unsigned long atmost, atleast;

		printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
		       np->name, np->dev_name);

804
		rtnl_lock();
S
Stephen Hemminger 已提交
805 806 807 808
		err = dev_open(ndev);
		rtnl_unlock();

		if (err) {
L
Linus Torvalds 已提交
809
			printk(KERN_ERR "%s: failed to open %s\n",
S
Stephen Hemminger 已提交
810
			       np->name, ndev->name);
811
			goto put;
L
Linus Torvalds 已提交
812 813 814
		}

		atleast = jiffies + HZ/10;
815
		atmost = jiffies + carrier_timeout * HZ;
L
Linus Torvalds 已提交
816 817 818 819 820 821 822
		while (!netif_carrier_ok(ndev)) {
			if (time_after(jiffies, atmost)) {
				printk(KERN_NOTICE
				       "%s: timeout waiting for carrier\n",
				       np->name);
				break;
			}
823
			msleep(1);
L
Linus Torvalds 已提交
824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840
		}

		/* If carrier appears to come up instantly, we don't
		 * trust it and pause so that we don't pump all our
		 * queued console messages into the bitbucket.
		 */

		if (time_before(jiffies, atleast)) {
			printk(KERN_NOTICE "%s: carrier detect appears"
			       " untrustworthy, waiting 4 seconds\n",
			       np->name);
			msleep(4000);
		}
	}

	if (!np->local_ip) {
		rcu_read_lock();
841
		in_dev = __in_dev_get_rcu(ndev);
L
Linus Torvalds 已提交
842 843 844 845 846

		if (!in_dev || !in_dev->ifa_list) {
			rcu_read_unlock();
			printk(KERN_ERR "%s: no IP address for %s, aborting\n",
			       np->name, np->dev_name);
S
Stephen Hemminger 已提交
847
			err = -EDESTADDRREQ;
848
			goto put;
L
Linus Torvalds 已提交
849 850
		}

851
		np->local_ip = in_dev->ifa_list->ifa_local;
L
Linus Torvalds 已提交
852
		rcu_read_unlock();
853
		printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip);
L
Linus Torvalds 已提交
854 855
	}

856 857 858 859 860 861
	np->dev = ndev;

	/* fill up the skb queue */
	refill_skbs();

	rtnl_lock();
862
	err = __netpoll_setup(np);
863
	rtnl_unlock();
864

865 866 867
	if (err)
		goto put;

L
Linus Torvalds 已提交
868 869
	return 0;

870
put:
L
Linus Torvalds 已提交
871
	dev_put(ndev);
S
Stephen Hemminger 已提交
872
	return err;
L
Linus Torvalds 已提交
873
}
E
Eric Dumazet 已提交
874
EXPORT_SYMBOL(netpoll_setup);
L
Linus Torvalds 已提交
875

876 877
static int __init netpoll_init(void)
{
878 879 880 881 882
	skb_queue_head_init(&skb_pool);
	return 0;
}
core_initcall(netpoll_init);

883
void __netpoll_cleanup(struct netpoll *np)
L
Linus Torvalds 已提交
884
{
885 886 887
	struct netpoll_info *npinfo;
	unsigned long flags;

888 889
	npinfo = np->dev->npinfo;
	if (!npinfo)
890
		return;
S
Stephen Hemminger 已提交
891

892 893 894 895 896 897 898
	if (!list_empty(&npinfo->rx_np)) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_del(&np->rx);
		if (list_empty(&npinfo->rx_np))
			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}
H
Herbert Xu 已提交
899

900 901
	if (atomic_dec_and_test(&npinfo->refcnt)) {
		const struct net_device_ops *ops;
H
Herbert Xu 已提交
902

903 904 905
		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_cleanup)
			ops->ndo_netpoll_cleanup(np->dev);
H
Herbert Xu 已提交
906

907
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
H
Herbert Xu 已提交
908

909 910
		/* avoid racing with NAPI reading npinfo */
		synchronize_rcu_bh();
S
Stephen Hemminger 已提交
911

912 913
		skb_queue_purge(&npinfo->arp_tx);
		skb_queue_purge(&npinfo->txq);
914
		cancel_delayed_work_sync(&npinfo->tx_work);
S
Stephen Hemminger 已提交
915

916 917 918
		/* clean after last, unfinished work */
		__skb_queue_purge(&npinfo->txq);
		kfree(npinfo);
919
	}
920 921
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
922

923 924 925 926
void netpoll_cleanup(struct netpoll *np)
{
	if (!np->dev)
		return;
927

928 929 930 931 932
	rtnl_lock();
	__netpoll_cleanup(np);
	rtnl_unlock();

	dev_put(np->dev);
L
Linus Torvalds 已提交
933 934
	np->dev = NULL;
}
E
Eric Dumazet 已提交
935
EXPORT_SYMBOL(netpoll_cleanup);
L
Linus Torvalds 已提交
936 937 938 939 940

int netpoll_trap(void)
{
	return atomic_read(&trapped);
}
E
Eric Dumazet 已提交
941
EXPORT_SYMBOL(netpoll_trap);
L
Linus Torvalds 已提交
942 943 944 945 946 947 948 949 950

void netpoll_set_trap(int trap)
{
	if (trap)
		atomic_inc(&trapped);
	else
		atomic_dec(&trapped);
}
EXPORT_SYMBOL(netpoll_set_trap);