netpoll.c 21.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

12
#include <linux/moduleparam.h>
L
Linus Torvalds 已提交
13 14 15
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
16
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
17 18 19 20 21 22 23 24
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
25
#include <linux/slab.h>
26
#include <linux/export.h>
L
Linus Torvalds 已提交
27 28 29
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
30
#include <trace/events/napi.h>
L
Linus Torvalds 已提交
31 32 33 34 35 36 37 38 39

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

40
static struct sk_buff_head skb_pool;
L
Linus Torvalds 已提交
41 42 43

static atomic_t trapped;

S
Stephen Hemminger 已提交
44
#define USEC_PER_POLL	50
45 46
#define NETPOLL_RX_ENABLED  1
#define NETPOLL_RX_DROP     2
L
Linus Torvalds 已提交
47 48 49 50 51

#define MAX_SKB_SIZE \
		(MAX_UDP_CHUNK + sizeof(struct udphdr) + \
				sizeof(struct iphdr) + sizeof(struct ethhdr))

52
static void zap_completion_queue(void);
53
static void arp_reply(struct sk_buff *skb);
L
Linus Torvalds 已提交
54

55 56 57
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

D
David Howells 已提交
58
static void queue_process(struct work_struct *work)
L
Linus Torvalds 已提交
59
{
60 61
	struct netpoll_info *npinfo =
		container_of(work, struct netpoll_info, tx_work.work);
L
Linus Torvalds 已提交
62
	struct sk_buff *skb;
I
Ingo Molnar 已提交
63
	unsigned long flags;
L
Linus Torvalds 已提交
64

S
Stephen Hemminger 已提交
65 66
	while ((skb = skb_dequeue(&npinfo->txq))) {
		struct net_device *dev = skb->dev;
67
		const struct net_device_ops *ops = dev->netdev_ops;
68
		struct netdev_queue *txq;
L
Linus Torvalds 已提交
69

S
Stephen Hemminger 已提交
70 71 72 73
		if (!netif_device_present(dev) || !netif_running(dev)) {
			__kfree_skb(skb);
			continue;
		}
L
Linus Torvalds 已提交
74

75 76
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

I
Ingo Molnar 已提交
77
		local_irq_save(flags);
78
		__netif_tx_lock(txq, smp_processor_id());
79
		if (netif_tx_queue_frozen_or_stopped(txq) ||
80
		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
81
			skb_queue_head(&npinfo->txq, skb);
82
			__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
83
			local_irq_restore(flags);
L
Linus Torvalds 已提交
84

85
			schedule_delayed_work(&npinfo->tx_work, HZ/10);
S
Stephen Hemminger 已提交
86 87
			return;
		}
88
		__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
89
		local_irq_restore(flags);
L
Linus Torvalds 已提交
90 91 92
	}
}

93 94
static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
			    unsigned short ulen, __be32 saddr, __be32 daddr)
L
Linus Torvalds 已提交
95
{
96
	__wsum psum;
97

98
	if (uh->check == 0 || skb_csum_unnecessary(skb))
L
Linus Torvalds 已提交
99 100
		return 0;

101 102
	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);

103
	if (skb->ip_summed == CHECKSUM_COMPLETE &&
104
	    !csum_fold(csum_add(psum, skb->csum)))
105
		return 0;
L
Linus Torvalds 已提交
106

107
	skb->csum = psum;
L
Linus Torvalds 已提交
108

109
	return __skb_checksum_complete(skb);
L
Linus Torvalds 已提交
110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
}

/*
 * Check whether delayed processing was scheduled for our NIC. If so,
 * we attempt to grab the poll lock and use ->poll() to pump the card.
 * If this fails, either we've recursed in ->poll() or it's already
 * running on another CPU.
 *
 * Note: we don't mask interrupts with this lock because we're using
 * trylock here and interrupts are already disabled in the softirq
 * case. Further, we test the poll_owner to avoid recursion on UP
 * systems where the lock doesn't exist.
 *
 * In cases where there is bi-directional communications, reading only
 * one message at a time can lead to packets being dropped by the
 * network adapter, forcing superfluous retries and possibly timeouts.
 * Thus, we set our budget to greater than 1.
 */
128 129 130 131 132 133 134 135 136 137 138 139
static int poll_one_napi(struct netpoll_info *npinfo,
			 struct napi_struct *napi, int budget)
{
	int work;

	/* net_rx_action's ->poll() invocations and our's are
	 * synchronized by this test which is only made while
	 * holding the napi->poll_lock.
	 */
	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
		return budget;

140
	npinfo->rx_flags |= NETPOLL_RX_DROP;
141
	atomic_inc(&trapped);
142
	set_bit(NAPI_STATE_NPSVC, &napi->state);
143 144

	work = napi->poll(napi, budget);
145
	trace_napi_poll(napi);
146

147
	clear_bit(NAPI_STATE_NPSVC, &napi->state);
148
	atomic_dec(&trapped);
149
	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
150 151 152 153

	return budget - work;
}

154
static void poll_napi(struct net_device *dev)
L
Linus Torvalds 已提交
155
{
156
	struct napi_struct *napi;
L
Linus Torvalds 已提交
157 158
	int budget = 16;

159
	list_for_each_entry(napi, &dev->napi_list, dev_list) {
160
		if (napi->poll_owner != smp_processor_id() &&
161
		    spin_trylock(&napi->poll_lock)) {
162
			budget = poll_one_napi(dev->npinfo, napi, budget);
163
			spin_unlock(&napi->poll_lock);
164 165 166

			if (!budget)
				break;
167
		}
L
Linus Torvalds 已提交
168 169 170
	}
}

171 172
static void service_arp_queue(struct netpoll_info *npi)
{
173 174
	if (npi) {
		struct sk_buff *skb;
175

176 177
		while ((skb = skb_dequeue(&npi->arp_tx)))
			arp_reply(skb);
178 179 180
	}
}

181
static void netpoll_poll_dev(struct net_device *dev)
L
Linus Torvalds 已提交
182
{
183
	const struct net_device_ops *ops;
184

185 186 187 188 189
	if (!dev || !netif_running(dev))
		return;

	ops = dev->netdev_ops;
	if (!ops->ndo_poll_controller)
L
Linus Torvalds 已提交
190 191 192
		return;

	/* Process pending work on NIC */
193
	ops->ndo_poll_controller(dev);
194 195

	poll_napi(dev);
L
Linus Torvalds 已提交
196

197 198 199 200 201 202 203 204 205 206 207
	if (dev->priv_flags & IFF_SLAVE) {
		if (dev->npinfo) {
			struct net_device *bond_dev = dev->master;
			struct sk_buff *skb;
			while ((skb = skb_dequeue(&dev->npinfo->arp_tx))) {
				skb->dev = bond_dev;
				skb_queue_tail(&bond_dev->npinfo->arp_tx, skb);
			}
		}
	}

208
	service_arp_queue(dev->npinfo);
209

210
	zap_completion_queue();
L
Linus Torvalds 已提交
211 212 213 214 215 216 217
}

static void refill_skbs(void)
{
	struct sk_buff *skb;
	unsigned long flags;

218 219
	spin_lock_irqsave(&skb_pool.lock, flags);
	while (skb_pool.qlen < MAX_SKBS) {
L
Linus Torvalds 已提交
220 221 222 223
		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
		if (!skb)
			break;

224
		__skb_queue_tail(&skb_pool, skb);
L
Linus Torvalds 已提交
225
	}
226
	spin_unlock_irqrestore(&skb_pool.lock, flags);
L
Linus Torvalds 已提交
227 228
}

229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
static void zap_completion_queue(void)
{
	unsigned long flags;
	struct softnet_data *sd = &get_cpu_var(softnet_data);

	if (sd->completion_queue) {
		struct sk_buff *clist;

		local_irq_save(flags);
		clist = sd->completion_queue;
		sd->completion_queue = NULL;
		local_irq_restore(flags);

		while (clist != NULL) {
			struct sk_buff *skb = clist;
			clist = clist->next;
			if (skb->destructor) {
				atomic_inc(&skb->users);
				dev_kfree_skb_any(skb); /* put this one back */
			} else {
				__kfree_skb(skb);
			}
		}
	}

	put_cpu_var(softnet_data);
}

257
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
L
Linus Torvalds 已提交
258
{
259 260
	int count = 0;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
261

262
	zap_completion_queue();
263
	refill_skbs();
L
Linus Torvalds 已提交
264 265 266
repeat:

	skb = alloc_skb(len, GFP_ATOMIC);
267 268
	if (!skb)
		skb = skb_dequeue(&skb_pool);
L
Linus Torvalds 已提交
269 270

	if (!skb) {
271
		if (++count < 10) {
272
			netpoll_poll_dev(np->dev);
273
			goto repeat;
L
Linus Torvalds 已提交
274
		}
275
		return NULL;
L
Linus Torvalds 已提交
276 277 278 279 280 281 282
	}

	atomic_set(&skb->users, 1);
	skb_reserve(skb, reserve);
	return skb;
}

283 284 285 286 287 288 289 290 291 292 293
static int netpoll_owner_active(struct net_device *dev)
{
	struct napi_struct *napi;

	list_for_each_entry(napi, &dev->napi_list, dev_list) {
		if (napi->poll_owner == smp_processor_id())
			return 1;
	}
	return 0;
}

294 295
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
			     struct net_device *dev)
L
Linus Torvalds 已提交
296
{
S
Stephen Hemminger 已提交
297 298
	int status = NETDEV_TX_BUSY;
	unsigned long tries;
299
	const struct net_device_ops *ops = dev->netdev_ops;
H
Herbert Xu 已提交
300
	/* It is up to the caller to keep npinfo alive. */
301
	struct netpoll_info *npinfo = np->dev->npinfo;
S
Stephen Hemminger 已提交
302

303 304 305 306
	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
		__kfree_skb(skb);
		return;
	}
S
Stephen Hemminger 已提交
307 308

	/* don't get messages out of order, and no recursion */
309
	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
310
		struct netdev_queue *txq;
311 312
		unsigned long flags;

313 314
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

315
		local_irq_save(flags);
316 317 318
		/* try until next clock tick */
		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
		     tries > 0; --tries) {
319
			if (__netif_tx_trylock(txq)) {
E
Eric Dumazet 已提交
320
				if (!netif_tx_queue_stopped(txq)) {
321
					status = ops->ndo_start_xmit(skb, dev);
E
Eric Dumazet 已提交
322 323 324
					if (status == NETDEV_TX_OK)
						txq_trans_update(txq);
				}
325
				__netif_tx_unlock(txq);
326 327 328 329 330

				if (status == NETDEV_TX_OK)
					break;

			}
331 332

			/* tickle device maybe there is some cleanup */
333
			netpoll_poll_dev(np->dev);
334 335

			udelay(USEC_PER_POLL);
M
Matt Mackall 已提交
336
		}
337 338 339 340 341

		WARN_ONCE(!irqs_disabled(),
			"netpoll_send_skb(): %s enabled interrupts in poll (%pF)\n",
			dev->name, ops->ndo_start_xmit);

342
		local_irq_restore(flags);
L
Linus Torvalds 已提交
343 344
	}

S
Stephen Hemminger 已提交
345
	if (status != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
346
		skb_queue_tail(&npinfo->txq, skb);
347
		schedule_delayed_work(&npinfo->tx_work,0);
L
Linus Torvalds 已提交
348 349
	}
}
350
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
L
Linus Torvalds 已提交
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
	int total_len, eth_len, ip_len, udp_len;
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;

	udp_len = len + sizeof(*udph);
	ip_len = eth_len = udp_len + sizeof(*iph);
	total_len = eth_len + ETH_HLEN + NET_IP_ALIGN;

	skb = find_skb(np, total_len, total_len - len);
	if (!skb)
		return;

368
	skb_copy_to_linear_data(skb, msg, len);
L
Linus Torvalds 已提交
369 370
	skb->len += len;

371 372 373
	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
L
Linus Torvalds 已提交
374 375 376 377
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);
	udph->check = 0;
378 379
	udph->check = csum_tcpudp_magic(np->local_ip,
					np->remote_ip,
380
					udp_len, IPPROTO_UDP,
381
					csum_partial(udph, udp_len, 0));
382
	if (udph->check == 0)
383
		udph->check = CSUM_MANGLED_0;
L
Linus Torvalds 已提交
384

385 386
	skb_push(skb, sizeof(*iph));
	skb_reset_network_header(skb);
387
	iph = ip_hdr(skb);
L
Linus Torvalds 已提交
388 389 390 391 392 393 394 395 396 397

	/* iph->version = 4; iph->ihl = 5; */
	put_unaligned(0x45, (unsigned char *)iph);
	iph->tos      = 0;
	put_unaligned(htons(ip_len), &(iph->tot_len));
	iph->id       = 0;
	iph->frag_off = 0;
	iph->ttl      = 64;
	iph->protocol = IPPROTO_UDP;
	iph->check    = 0;
398 399
	put_unaligned(np->local_ip, &(iph->saddr));
	put_unaligned(np->remote_ip, &(iph->daddr));
L
Linus Torvalds 已提交
400 401 402
	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
403
	skb_reset_mac_header(skb);
404
	skb->protocol = eth->h_proto = htons(ETH_P_IP);
405 406
	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
L
Linus Torvalds 已提交
407 408 409 410 411

	skb->dev = np->dev;

	netpoll_send_skb(np, skb);
}
E
Eric Dumazet 已提交
412
EXPORT_SYMBOL(netpoll_send_udp);
L
Linus Torvalds 已提交
413 414 415

static void arp_reply(struct sk_buff *skb)
{
416
	struct netpoll_info *npinfo = skb->dev->npinfo;
L
Linus Torvalds 已提交
417 418 419
	struct arphdr *arp;
	unsigned char *arp_ptr;
	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
A
Al Viro 已提交
420
	__be32 sip, tip;
421
	unsigned char *sha;
L
Linus Torvalds 已提交
422
	struct sk_buff *send_skb;
423 424
	struct netpoll *np, *tmp;
	unsigned long flags;
425
	int hlen, tlen;
426 427 428 429 430 431 432 433 434 435 436 437 438
	int hits = 0;

	if (list_empty(&npinfo->rx_np))
		return;

	/* Before checking the packet, we do some early
	   inspection whether this is interesting at all */
	spin_lock_irqsave(&npinfo->rx_lock, flags);
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (np->dev == skb->dev)
			hits++;
	}
	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
L
Linus Torvalds 已提交
439

440 441
	/* No netpoll struct is using this dev */
	if (!hits)
442
		return;
L
Linus Torvalds 已提交
443 444 445 446 447

	/* No arp on this interface */
	if (skb->dev->flags & IFF_NOARP)
		return;

448
	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
L
Linus Torvalds 已提交
449 450
		return;

451
	skb_reset_network_header(skb);
452
	skb_reset_transport_header(skb);
453
	arp = arp_hdr(skb);
L
Linus Torvalds 已提交
454 455 456 457 458 459 460

	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
	    arp->ar_pro != htons(ETH_P_IP) ||
	    arp->ar_op != htons(ARPOP_REQUEST))
		return;

461 462 463 464
	arp_ptr = (unsigned char *)(arp+1);
	/* save the location of the src hw addr */
	sha = arp_ptr;
	arp_ptr += skb->dev->addr_len;
L
Linus Torvalds 已提交
465
	memcpy(&sip, arp_ptr, 4);
466
	arp_ptr += 4;
467 468
	/* If we actually cared about dst hw addr,
	   it would get copied here */
469
	arp_ptr += skb->dev->addr_len;
L
Linus Torvalds 已提交
470 471 472
	memcpy(&tip, arp_ptr, 4);

	/* Should we ignore arp? */
473
	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
L
Linus Torvalds 已提交
474 475
		return;

476
	size = arp_hdr_len(skb->dev);
L
Linus Torvalds 已提交
477

478 479 480 481
	spin_lock_irqsave(&npinfo->rx_lock, flags);
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (tip != np->local_ip)
			continue;
L
Linus Torvalds 已提交
482

483 484 485
		hlen = LL_RESERVED_SPACE(np->dev);
		tlen = np->dev->needed_tailroom;
		send_skb = find_skb(np, size + hlen + tlen, hlen);
486 487
		if (!send_skb)
			continue;
L
Linus Torvalds 已提交
488

489 490 491 492
		skb_reset_network_header(send_skb);
		arp = (struct arphdr *) skb_put(send_skb, size);
		send_skb->dev = skb->dev;
		send_skb->protocol = htons(ETH_P_ARP);
L
Linus Torvalds 已提交
493

494 495 496 497 498 499 500
		/* Fill the device header for the ARP frame */
		if (dev_hard_header(send_skb, skb->dev, ptype,
				    sha, np->dev->dev_addr,
				    send_skb->len) < 0) {
			kfree_skb(send_skb);
			continue;
		}
L
Linus Torvalds 已提交
501

502 503 504 505 506 507 508
		/*
		 * Fill out the arp protocol part.
		 *
		 * we only support ethernet device type,
		 * which (according to RFC 1390) should
		 * always equal 1 (Ethernet).
		 */
L
Linus Torvalds 已提交
509

510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
		arp->ar_hrd = htons(np->dev->type);
		arp->ar_pro = htons(ETH_P_IP);
		arp->ar_hln = np->dev->addr_len;
		arp->ar_pln = 4;
		arp->ar_op = htons(type);

		arp_ptr = (unsigned char *)(arp + 1);
		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
		arp_ptr += np->dev->addr_len;
		memcpy(arp_ptr, &tip, 4);
		arp_ptr += 4;
		memcpy(arp_ptr, sha, np->dev->addr_len);
		arp_ptr += np->dev->addr_len;
		memcpy(arp_ptr, &sip, 4);

		netpoll_send_skb(np, send_skb);

		/* If there are several rx_hooks for the same address,
		   we're fine by sending a single reply */
		break;
	}
	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
L
Linus Torvalds 已提交
532 533 534 535 536
}

int __netpoll_rx(struct sk_buff *skb)
{
	int proto, len, ulen;
537
	int hits = 0;
538
	const struct iphdr *iph;
L
Linus Torvalds 已提交
539
	struct udphdr *uh;
540 541
	struct netpoll_info *npinfo = skb->dev->npinfo;
	struct netpoll *np, *tmp;
542

543
	if (list_empty(&npinfo->rx_np))
L
Linus Torvalds 已提交
544
		goto out;
545

L
Linus Torvalds 已提交
546 547 548
	if (skb->dev->type != ARPHRD_ETHER)
		goto out;

549
	/* check if netpoll clients need ARP */
550
	if (skb->protocol == htons(ETH_P_ARP) &&
L
Linus Torvalds 已提交
551
	    atomic_read(&trapped)) {
552
		skb_queue_tail(&npinfo->arp_tx, skb);
L
Linus Torvalds 已提交
553 554 555 556 557 558 559 560 561 562 563 564 565
		return 1;
	}

	proto = ntohs(eth_hdr(skb)->h_proto);
	if (proto != ETH_P_IP)
		goto out;
	if (skb->pkt_type == PACKET_OTHERHOST)
		goto out;
	if (skb_shared(skb))
		goto out;

	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
		goto out;
566
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
567 568 569 570
	if (iph->ihl < 5 || iph->version != 4)
		goto out;
	if (!pskb_may_pull(skb, iph->ihl*4))
		goto out;
571
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
572 573 574 575 576 577 578
	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
		goto out;

	len = ntohs(iph->tot_len);
	if (skb->len < len || len < iph->ihl*4)
		goto out;

579 580 581 582 583 584 585
	/*
	 * Our transport medium may have padded the buffer out.
	 * Now We trim to the true length of the frame.
	 */
	if (pskb_trim_rcsum(skb, len))
		goto out;

586
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
587 588 589 590 591 592 593 594 595
	if (iph->protocol != IPPROTO_UDP)
		goto out;

	len -= iph->ihl*4;
	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
	ulen = ntohs(uh->len);

	if (ulen != len)
		goto out;
596
	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
L
Linus Torvalds 已提交
597 598
		goto out;

599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (np->local_ip && np->local_ip != iph->daddr)
			continue;
		if (np->remote_ip && np->remote_ip != iph->saddr)
			continue;
		if (np->local_port && np->local_port != ntohs(uh->dest))
			continue;

		np->rx_hook(np, ntohs(uh->source),
			       (char *)(uh+1),
			       ulen - sizeof(struct udphdr));
		hits++;
	}

	if (!hits)
		goto out;
L
Linus Torvalds 已提交
615 616 617 618 619 620 621 622 623 624 625 626 627

	kfree_skb(skb);
	return 1;

out:
	if (atomic_read(&trapped)) {
		kfree_skb(skb);
		return 1;
	}

	return 0;
}

628 629 630 631
void netpoll_print_options(struct netpoll *np)
{
	printk(KERN_INFO "%s: local port %d\n",
			 np->name, np->local_port);
632 633
	printk(KERN_INFO "%s: local IP %pI4\n",
			 np->name, &np->local_ip);
634
	printk(KERN_INFO "%s: interface '%s'\n",
635 636 637
			 np->name, np->dev_name);
	printk(KERN_INFO "%s: remote port %d\n",
			 np->name, np->remote_port);
638 639
	printk(KERN_INFO "%s: remote IP %pI4\n",
			 np->name, &np->remote_ip);
J
Johannes Berg 已提交
640 641
	printk(KERN_INFO "%s: remote ethernet address %pM\n",
	                 np->name, np->remote_mac);
642
}
E
Eric Dumazet 已提交
643
EXPORT_SYMBOL(netpoll_print_options);
644

L
Linus Torvalds 已提交
645 646 647 648
int netpoll_parse_options(struct netpoll *np, char *opt)
{
	char *cur=opt, *delim;

649
	if (*cur != '@') {
L
Linus Torvalds 已提交
650 651
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
652 653 654
		*delim = 0;
		np->local_port = simple_strtol(cur, NULL, 10);
		cur = delim;
L
Linus Torvalds 已提交
655 656 657
	}
	cur++;

658
	if (*cur != '/') {
L
Linus Torvalds 已提交
659 660
		if ((delim = strchr(cur, '/')) == NULL)
			goto parse_failed;
661
		*delim = 0;
662
		np->local_ip = in_aton(cur);
663
		cur = delim;
L
Linus Torvalds 已提交
664 665 666
	}
	cur++;

667
	if (*cur != ',') {
L
Linus Torvalds 已提交
668 669 670
		/* parse out dev name */
		if ((delim = strchr(cur, ',')) == NULL)
			goto parse_failed;
671
		*delim = 0;
L
Linus Torvalds 已提交
672
		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
673
		cur = delim;
L
Linus Torvalds 已提交
674 675 676
	}
	cur++;

677
	if (*cur != '@') {
L
Linus Torvalds 已提交
678 679 680
		/* dst port */
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
681
		*delim = 0;
682 683 684
		if (*cur == ' ' || *cur == '\t')
			printk(KERN_INFO "%s: warning: whitespace"
					"is not allowed\n", np->name);
685 686
		np->remote_port = simple_strtol(cur, NULL, 10);
		cur = delim;
L
Linus Torvalds 已提交
687 688 689 690 691 692
	}
	cur++;

	/* dst ip */
	if ((delim = strchr(cur, '/')) == NULL)
		goto parse_failed;
693
	*delim = 0;
694
	np->remote_ip = in_aton(cur);
695
	cur = delim + 1;
L
Linus Torvalds 已提交
696

697
	if (*cur != 0) {
L
Linus Torvalds 已提交
698
		/* MAC address */
699
		if (!mac_pton(cur, np->remote_mac))
L
Linus Torvalds 已提交
700 701 702
			goto parse_failed;
	}

703
	netpoll_print_options(np);
L
Linus Torvalds 已提交
704 705 706 707

	return 0;

 parse_failed:
708
	printk(KERN_INFO "%s: couldn't parse config at '%s'!\n",
L
Linus Torvalds 已提交
709 710 711
	       np->name, cur);
	return -1;
}
E
Eric Dumazet 已提交
712
EXPORT_SYMBOL(netpoll_parse_options);
L
Linus Torvalds 已提交
713

714
int __netpoll_setup(struct netpoll *np)
L
Linus Torvalds 已提交
715
{
716
	struct net_device *ndev = np->dev;
717
	struct netpoll_info *npinfo;
H
Herbert Xu 已提交
718
	const struct net_device_ops *ops;
719
	unsigned long flags;
S
Stephen Hemminger 已提交
720
	int err;
L
Linus Torvalds 已提交
721

722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767
	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
	    !ndev->netdev_ops->ndo_poll_controller) {
		printk(KERN_ERR "%s: %s doesn't support polling, aborting.\n",
		       np->name, np->dev_name);
		err = -ENOTSUPP;
		goto out;
	}

	if (!ndev->npinfo) {
		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
		if (!npinfo) {
			err = -ENOMEM;
			goto out;
		}

		npinfo->rx_flags = 0;
		INIT_LIST_HEAD(&npinfo->rx_np);

		spin_lock_init(&npinfo->rx_lock);
		skb_queue_head_init(&npinfo->arp_tx);
		skb_queue_head_init(&npinfo->txq);
		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

		atomic_set(&npinfo->refcnt, 1);

		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_setup) {
			err = ops->ndo_netpoll_setup(ndev, npinfo);
			if (err)
				goto free_npinfo;
		}
	} else {
		npinfo = ndev->npinfo;
		atomic_inc(&npinfo->refcnt);
	}

	npinfo->netpoll = np;

	if (np->rx_hook) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
		list_add_tail(&np->rx, &npinfo->rx_np);
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}

	/* last thing to do is link it to the net device structure */
768
	RCU_INIT_POINTER(ndev->npinfo, npinfo);
769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784

	return 0;

free_npinfo:
	kfree(npinfo);
out:
	return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
	struct net_device *ndev = NULL;
	struct in_device *in_dev;
	int err;

L
Linus Torvalds 已提交
785
	if (np->dev_name)
786
		ndev = dev_get_by_name(&init_net, np->dev_name);
L
Linus Torvalds 已提交
787 788 789
	if (!ndev) {
		printk(KERN_ERR "%s: %s doesn't exist, aborting.\n",
		       np->name, np->dev_name);
S
Stephen Hemminger 已提交
790
		return -ENODEV;
L
Linus Torvalds 已提交
791 792
	}

793 794 795
	if (ndev->master) {
		printk(KERN_ERR "%s: %s is a slave device, aborting.\n",
		       np->name, np->dev_name);
796 797
		err = -EBUSY;
		goto put;
798 799
	}

L
Linus Torvalds 已提交
800 801 802 803 804 805
	if (!netif_running(ndev)) {
		unsigned long atmost, atleast;

		printk(KERN_INFO "%s: device %s not up yet, forcing it\n",
		       np->name, np->dev_name);

806
		rtnl_lock();
S
Stephen Hemminger 已提交
807 808 809 810
		err = dev_open(ndev);
		rtnl_unlock();

		if (err) {
L
Linus Torvalds 已提交
811
			printk(KERN_ERR "%s: failed to open %s\n",
S
Stephen Hemminger 已提交
812
			       np->name, ndev->name);
813
			goto put;
L
Linus Torvalds 已提交
814 815 816
		}

		atleast = jiffies + HZ/10;
817
		atmost = jiffies + carrier_timeout * HZ;
L
Linus Torvalds 已提交
818 819 820 821 822 823 824
		while (!netif_carrier_ok(ndev)) {
			if (time_after(jiffies, atmost)) {
				printk(KERN_NOTICE
				       "%s: timeout waiting for carrier\n",
				       np->name);
				break;
			}
825
			msleep(1);
L
Linus Torvalds 已提交
826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842
		}

		/* If carrier appears to come up instantly, we don't
		 * trust it and pause so that we don't pump all our
		 * queued console messages into the bitbucket.
		 */

		if (time_before(jiffies, atleast)) {
			printk(KERN_NOTICE "%s: carrier detect appears"
			       " untrustworthy, waiting 4 seconds\n",
			       np->name);
			msleep(4000);
		}
	}

	if (!np->local_ip) {
		rcu_read_lock();
843
		in_dev = __in_dev_get_rcu(ndev);
L
Linus Torvalds 已提交
844 845 846 847 848

		if (!in_dev || !in_dev->ifa_list) {
			rcu_read_unlock();
			printk(KERN_ERR "%s: no IP address for %s, aborting\n",
			       np->name, np->dev_name);
S
Stephen Hemminger 已提交
849
			err = -EDESTADDRREQ;
850
			goto put;
L
Linus Torvalds 已提交
851 852
		}

853
		np->local_ip = in_dev->ifa_list->ifa_local;
L
Linus Torvalds 已提交
854
		rcu_read_unlock();
855
		printk(KERN_INFO "%s: local IP %pI4\n", np->name, &np->local_ip);
L
Linus Torvalds 已提交
856 857
	}

858 859 860 861 862 863
	np->dev = ndev;

	/* fill up the skb queue */
	refill_skbs();

	rtnl_lock();
864
	err = __netpoll_setup(np);
865
	rtnl_unlock();
866

867 868 869
	if (err)
		goto put;

L
Linus Torvalds 已提交
870 871
	return 0;

872
put:
L
Linus Torvalds 已提交
873
	dev_put(ndev);
S
Stephen Hemminger 已提交
874
	return err;
L
Linus Torvalds 已提交
875
}
E
Eric Dumazet 已提交
876
EXPORT_SYMBOL(netpoll_setup);
L
Linus Torvalds 已提交
877

878 879
static int __init netpoll_init(void)
{
880 881 882 883 884
	skb_queue_head_init(&skb_pool);
	return 0;
}
core_initcall(netpoll_init);

885
void __netpoll_cleanup(struct netpoll *np)
L
Linus Torvalds 已提交
886
{
887 888 889
	struct netpoll_info *npinfo;
	unsigned long flags;

890 891
	npinfo = np->dev->npinfo;
	if (!npinfo)
892
		return;
S
Stephen Hemminger 已提交
893

894 895 896 897 898 899 900
	if (!list_empty(&npinfo->rx_np)) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_del(&np->rx);
		if (list_empty(&npinfo->rx_np))
			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}
H
Herbert Xu 已提交
901

902 903
	if (atomic_dec_and_test(&npinfo->refcnt)) {
		const struct net_device_ops *ops;
H
Herbert Xu 已提交
904

905 906 907
		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_cleanup)
			ops->ndo_netpoll_cleanup(np->dev);
H
Herbert Xu 已提交
908

909
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
H
Herbert Xu 已提交
910

911 912
		/* avoid racing with NAPI reading npinfo */
		synchronize_rcu_bh();
S
Stephen Hemminger 已提交
913

914 915
		skb_queue_purge(&npinfo->arp_tx);
		skb_queue_purge(&npinfo->txq);
916
		cancel_delayed_work_sync(&npinfo->tx_work);
S
Stephen Hemminger 已提交
917

918 919 920
		/* clean after last, unfinished work */
		__skb_queue_purge(&npinfo->txq);
		kfree(npinfo);
921
	}
922 923
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
924

925 926 927 928
void netpoll_cleanup(struct netpoll *np)
{
	if (!np->dev)
		return;
929

930 931 932 933 934
	rtnl_lock();
	__netpoll_cleanup(np);
	rtnl_unlock();

	dev_put(np->dev);
L
Linus Torvalds 已提交
935 936
	np->dev = NULL;
}
E
Eric Dumazet 已提交
937
EXPORT_SYMBOL(netpoll_cleanup);
L
Linus Torvalds 已提交
938 939 940 941 942

int netpoll_trap(void)
{
	return atomic_read(&trapped);
}
E
Eric Dumazet 已提交
943
EXPORT_SYMBOL(netpoll_trap);
L
Linus Torvalds 已提交
944 945 946 947 948 949 950 951 952

void netpoll_set_trap(int trap)
{
	if (trap)
		atomic_inc(&trapped);
	else
		atomic_dec(&trapped);
}
EXPORT_SYMBOL(netpoll_set_trap);