netpoll.c 22.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14
#include <linux/moduleparam.h>
L
Linus Torvalds 已提交
15 16 17
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
18
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
19 20 21 22 23 24 25 26
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
27
#include <linux/slab.h>
28
#include <linux/export.h>
29
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
30 31 32
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
33
#include <trace/events/napi.h>
L
Linus Torvalds 已提交
34 35 36 37 38 39 40 41 42

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

43
static struct sk_buff_head skb_pool;
L
Linus Torvalds 已提交
44 45 46

static atomic_t trapped;

S
Stephen Hemminger 已提交
47
#define USEC_PER_POLL	50
48 49
#define NETPOLL_RX_ENABLED  1
#define NETPOLL_RX_DROP     2
L
Linus Torvalds 已提交
50

J
Joe Perches 已提交
51 52 53 54 55
#define MAX_SKB_SIZE							\
	(sizeof(struct ethhdr) +					\
	 sizeof(struct iphdr) +						\
	 sizeof(struct udphdr) +					\
	 MAX_UDP_CHUNK)
L
Linus Torvalds 已提交
56

57
static void zap_completion_queue(void);
58
static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
L
Linus Torvalds 已提交
59

60 61 62
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

63 64 65 66 67 68 69
#define np_info(np, fmt, ...)				\
	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_err(np, fmt, ...)				\
	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_notice(np, fmt, ...)				\
	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)

D
David Howells 已提交
70
static void queue_process(struct work_struct *work)
L
Linus Torvalds 已提交
71
{
72 73
	struct netpoll_info *npinfo =
		container_of(work, struct netpoll_info, tx_work.work);
L
Linus Torvalds 已提交
74
	struct sk_buff *skb;
I
Ingo Molnar 已提交
75
	unsigned long flags;
L
Linus Torvalds 已提交
76

S
Stephen Hemminger 已提交
77 78
	while ((skb = skb_dequeue(&npinfo->txq))) {
		struct net_device *dev = skb->dev;
79
		const struct net_device_ops *ops = dev->netdev_ops;
80
		struct netdev_queue *txq;
L
Linus Torvalds 已提交
81

S
Stephen Hemminger 已提交
82 83 84 85
		if (!netif_device_present(dev) || !netif_running(dev)) {
			__kfree_skb(skb);
			continue;
		}
L
Linus Torvalds 已提交
86

87 88
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

I
Ingo Molnar 已提交
89
		local_irq_save(flags);
90
		__netif_tx_lock(txq, smp_processor_id());
91
		if (netif_xmit_frozen_or_stopped(txq) ||
92
		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
93
			skb_queue_head(&npinfo->txq, skb);
94
			__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
95
			local_irq_restore(flags);
L
Linus Torvalds 已提交
96

97
			schedule_delayed_work(&npinfo->tx_work, HZ/10);
S
Stephen Hemminger 已提交
98 99
			return;
		}
100
		__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
101
		local_irq_restore(flags);
L
Linus Torvalds 已提交
102 103 104
	}
}

105 106
static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
			    unsigned short ulen, __be32 saddr, __be32 daddr)
L
Linus Torvalds 已提交
107
{
108
	__wsum psum;
109

110
	if (uh->check == 0 || skb_csum_unnecessary(skb))
L
Linus Torvalds 已提交
111 112
		return 0;

113 114
	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);

115
	if (skb->ip_summed == CHECKSUM_COMPLETE &&
116
	    !csum_fold(csum_add(psum, skb->csum)))
117
		return 0;
L
Linus Torvalds 已提交
118

119
	skb->csum = psum;
L
Linus Torvalds 已提交
120

121
	return __skb_checksum_complete(skb);
L
Linus Torvalds 已提交
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
}

/*
 * Check whether delayed processing was scheduled for our NIC. If so,
 * we attempt to grab the poll lock and use ->poll() to pump the card.
 * If this fails, either we've recursed in ->poll() or it's already
 * running on another CPU.
 *
 * Note: we don't mask interrupts with this lock because we're using
 * trylock here and interrupts are already disabled in the softirq
 * case. Further, we test the poll_owner to avoid recursion on UP
 * systems where the lock doesn't exist.
 *
 * In cases where there is bi-directional communications, reading only
 * one message at a time can lead to packets being dropped by the
 * network adapter, forcing superfluous retries and possibly timeouts.
 * Thus, we set our budget to greater than 1.
 */
140 141 142 143 144 145 146 147 148 149 150 151
static int poll_one_napi(struct netpoll_info *npinfo,
			 struct napi_struct *napi, int budget)
{
	int work;

	/* net_rx_action's ->poll() invocations and our's are
	 * synchronized by this test which is only made while
	 * holding the napi->poll_lock.
	 */
	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
		return budget;

152
	npinfo->rx_flags |= NETPOLL_RX_DROP;
153
	atomic_inc(&trapped);
154
	set_bit(NAPI_STATE_NPSVC, &napi->state);
155 156

	work = napi->poll(napi, budget);
157
	trace_napi_poll(napi);
158

159
	clear_bit(NAPI_STATE_NPSVC, &napi->state);
160
	atomic_dec(&trapped);
161
	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
162 163 164 165

	return budget - work;
}

166
static void poll_napi(struct net_device *dev)
L
Linus Torvalds 已提交
167
{
168
	struct napi_struct *napi;
L
Linus Torvalds 已提交
169 170
	int budget = 16;

171 172
	WARN_ON_ONCE(!irqs_disabled());

173
	list_for_each_entry(napi, &dev->napi_list, dev_list) {
174
		local_irq_enable();
175
		if (napi->poll_owner != smp_processor_id() &&
176
		    spin_trylock(&napi->poll_lock)) {
177
			rcu_read_lock_bh();
178 179
			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
					       napi, budget);
180
			rcu_read_unlock_bh();
181
			spin_unlock(&napi->poll_lock);
182

183 184
			if (!budget) {
				local_irq_disable();
185
				break;
186
			}
187
		}
188
		local_irq_disable();
L
Linus Torvalds 已提交
189 190 191
	}
}

192 193
static void service_arp_queue(struct netpoll_info *npi)
{
194 195
	if (npi) {
		struct sk_buff *skb;
196

197
		while ((skb = skb_dequeue(&npi->arp_tx)))
198
			netpoll_arp_reply(skb, npi);
199 200 201
	}
}

202
static void netpoll_poll_dev(struct net_device *dev)
L
Linus Torvalds 已提交
203
{
204
	const struct net_device_ops *ops;
205
	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
206

207 208 209 210 211
	if (!dev || !netif_running(dev))
		return;

	ops = dev->netdev_ops;
	if (!ops->ndo_poll_controller)
L
Linus Torvalds 已提交
212 213 214
		return;

	/* Process pending work on NIC */
215
	ops->ndo_poll_controller(dev);
216 217

	poll_napi(dev);
L
Linus Torvalds 已提交
218

219
	if (dev->flags & IFF_SLAVE) {
220
		if (ni) {
221 222
			struct net_device *bond_dev = dev->master;
			struct sk_buff *skb;
223 224
			struct netpoll_info *bond_ni = rcu_dereference_bh(bond_dev->npinfo);
			while ((skb = skb_dequeue(&ni->arp_tx))) {
225
				skb->dev = bond_dev;
226
				skb_queue_tail(&bond_ni->arp_tx, skb);
227 228 229 230
			}
		}
	}

231
	service_arp_queue(ni);
232

233
	zap_completion_queue();
L
Linus Torvalds 已提交
234 235 236 237 238 239 240
}

static void refill_skbs(void)
{
	struct sk_buff *skb;
	unsigned long flags;

241 242
	spin_lock_irqsave(&skb_pool.lock, flags);
	while (skb_pool.qlen < MAX_SKBS) {
L
Linus Torvalds 已提交
243 244 245 246
		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
		if (!skb)
			break;

247
		__skb_queue_tail(&skb_pool, skb);
L
Linus Torvalds 已提交
248
	}
249
	spin_unlock_irqrestore(&skb_pool.lock, flags);
L
Linus Torvalds 已提交
250 251
}

252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279
static void zap_completion_queue(void)
{
	unsigned long flags;
	struct softnet_data *sd = &get_cpu_var(softnet_data);

	if (sd->completion_queue) {
		struct sk_buff *clist;

		local_irq_save(flags);
		clist = sd->completion_queue;
		sd->completion_queue = NULL;
		local_irq_restore(flags);

		while (clist != NULL) {
			struct sk_buff *skb = clist;
			clist = clist->next;
			if (skb->destructor) {
				atomic_inc(&skb->users);
				dev_kfree_skb_any(skb); /* put this one back */
			} else {
				__kfree_skb(skb);
			}
		}
	}

	put_cpu_var(softnet_data);
}

280
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
L
Linus Torvalds 已提交
281
{
282 283
	int count = 0;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
284

285
	zap_completion_queue();
286
	refill_skbs();
L
Linus Torvalds 已提交
287 288 289
repeat:

	skb = alloc_skb(len, GFP_ATOMIC);
290 291
	if (!skb)
		skb = skb_dequeue(&skb_pool);
L
Linus Torvalds 已提交
292 293

	if (!skb) {
294
		if (++count < 10) {
295
			netpoll_poll_dev(np->dev);
296
			goto repeat;
L
Linus Torvalds 已提交
297
		}
298
		return NULL;
L
Linus Torvalds 已提交
299 300 301 302 303 304 305
	}

	atomic_set(&skb->users, 1);
	skb_reserve(skb, reserve);
	return skb;
}

306 307 308 309 310 311 312 313 314 315 316
static int netpoll_owner_active(struct net_device *dev)
{
	struct napi_struct *napi;

	list_for_each_entry(napi, &dev->napi_list, dev_list) {
		if (napi->poll_owner == smp_processor_id())
			return 1;
	}
	return 0;
}

317
/* call with IRQ disabled */
318 319
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
			     struct net_device *dev)
L
Linus Torvalds 已提交
320
{
S
Stephen Hemminger 已提交
321 322
	int status = NETDEV_TX_BUSY;
	unsigned long tries;
323
	const struct net_device_ops *ops = dev->netdev_ops;
H
Herbert Xu 已提交
324
	/* It is up to the caller to keep npinfo alive. */
325
	struct netpoll_info *npinfo;
S
Stephen Hemminger 已提交
326

327 328 329
	WARN_ON_ONCE(!irqs_disabled());

	npinfo = rcu_dereference_bh(np->dev->npinfo);
330 331 332 333
	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
		__kfree_skb(skb);
		return;
	}
S
Stephen Hemminger 已提交
334 335

	/* don't get messages out of order, and no recursion */
336
	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
337
		struct netdev_queue *txq;
338

339 340
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

341 342 343
		/* try until next clock tick */
		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
		     tries > 0; --tries) {
344
			if (__netif_tx_trylock(txq)) {
345
				if (!netif_xmit_stopped(txq)) {
346 347 348 349 350 351 352 353
					if (vlan_tx_tag_present(skb) &&
					    !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
						skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
						if (unlikely(!skb))
							break;
						skb->vlan_tci = 0;
					}

354
					status = ops->ndo_start_xmit(skb, dev);
E
Eric Dumazet 已提交
355 356 357
					if (status == NETDEV_TX_OK)
						txq_trans_update(txq);
				}
358
				__netif_tx_unlock(txq);
359 360 361 362 363

				if (status == NETDEV_TX_OK)
					break;

			}
364 365

			/* tickle device maybe there is some cleanup */
366
			netpoll_poll_dev(np->dev);
367 368

			udelay(USEC_PER_POLL);
M
Matt Mackall 已提交
369
		}
370 371

		WARN_ONCE(!irqs_disabled(),
372
			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
373 374
			dev->name, ops->ndo_start_xmit);

L
Linus Torvalds 已提交
375 376
	}

S
Stephen Hemminger 已提交
377
	if (status != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
378
		skb_queue_tail(&npinfo->txq, skb);
379
		schedule_delayed_work(&npinfo->tx_work,0);
L
Linus Torvalds 已提交
380 381
	}
}
382
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
L
Linus Torvalds 已提交
383 384 385

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
386
	int total_len, ip_len, udp_len;
L
Linus Torvalds 已提交
387 388 389 390 391 392
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;

	udp_len = len + sizeof(*udph);
393 394
	ip_len = udp_len + sizeof(*iph);
	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
L
Linus Torvalds 已提交
395

396 397
	skb = find_skb(np, total_len + np->dev->needed_tailroom,
		       total_len - len);
L
Linus Torvalds 已提交
398 399 400
	if (!skb)
		return;

401
	skb_copy_to_linear_data(skb, msg, len);
402
	skb_put(skb, len);
L
Linus Torvalds 已提交
403

404 405 406
	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
L
Linus Torvalds 已提交
407 408 409 410
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);
	udph->check = 0;
411 412
	udph->check = csum_tcpudp_magic(np->local_ip,
					np->remote_ip,
413
					udp_len, IPPROTO_UDP,
414
					csum_partial(udph, udp_len, 0));
415
	if (udph->check == 0)
416
		udph->check = CSUM_MANGLED_0;
L
Linus Torvalds 已提交
417

418 419
	skb_push(skb, sizeof(*iph));
	skb_reset_network_header(skb);
420
	iph = ip_hdr(skb);
L
Linus Torvalds 已提交
421 422 423 424 425 426 427 428 429 430

	/* iph->version = 4; iph->ihl = 5; */
	put_unaligned(0x45, (unsigned char *)iph);
	iph->tos      = 0;
	put_unaligned(htons(ip_len), &(iph->tot_len));
	iph->id       = 0;
	iph->frag_off = 0;
	iph->ttl      = 64;
	iph->protocol = IPPROTO_UDP;
	iph->check    = 0;
431 432
	put_unaligned(np->local_ip, &(iph->saddr));
	put_unaligned(np->remote_ip, &(iph->daddr));
L
Linus Torvalds 已提交
433 434 435
	iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

	eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
436
	skb_reset_mac_header(skb);
437
	skb->protocol = eth->h_proto = htons(ETH_P_IP);
438 439
	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
L
Linus Torvalds 已提交
440 441 442 443 444

	skb->dev = np->dev;

	netpoll_send_skb(np, skb);
}
E
Eric Dumazet 已提交
445
EXPORT_SYMBOL(netpoll_send_udp);
L
Linus Torvalds 已提交
446

447
static void netpoll_arp_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
L
Linus Torvalds 已提交
448 449 450 451
{
	struct arphdr *arp;
	unsigned char *arp_ptr;
	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
A
Al Viro 已提交
452
	__be32 sip, tip;
453
	unsigned char *sha;
L
Linus Torvalds 已提交
454
	struct sk_buff *send_skb;
455 456
	struct netpoll *np, *tmp;
	unsigned long flags;
457
	int hlen, tlen;
458 459 460 461 462 463 464 465 466 467 468 469 470
	int hits = 0;

	if (list_empty(&npinfo->rx_np))
		return;

	/* Before checking the packet, we do some early
	   inspection whether this is interesting at all */
	spin_lock_irqsave(&npinfo->rx_lock, flags);
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (np->dev == skb->dev)
			hits++;
	}
	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
L
Linus Torvalds 已提交
471

472 473
	/* No netpoll struct is using this dev */
	if (!hits)
474
		return;
L
Linus Torvalds 已提交
475 476 477 478 479

	/* No arp on this interface */
	if (skb->dev->flags & IFF_NOARP)
		return;

480
	if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
L
Linus Torvalds 已提交
481 482
		return;

483
	skb_reset_network_header(skb);
484
	skb_reset_transport_header(skb);
485
	arp = arp_hdr(skb);
L
Linus Torvalds 已提交
486 487 488 489 490 491 492

	if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
	     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
	    arp->ar_pro != htons(ETH_P_IP) ||
	    arp->ar_op != htons(ARPOP_REQUEST))
		return;

493 494 495 496
	arp_ptr = (unsigned char *)(arp+1);
	/* save the location of the src hw addr */
	sha = arp_ptr;
	arp_ptr += skb->dev->addr_len;
L
Linus Torvalds 已提交
497
	memcpy(&sip, arp_ptr, 4);
498
	arp_ptr += 4;
499 500
	/* If we actually cared about dst hw addr,
	   it would get copied here */
501
	arp_ptr += skb->dev->addr_len;
L
Linus Torvalds 已提交
502 503 504
	memcpy(&tip, arp_ptr, 4);

	/* Should we ignore arp? */
505
	if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
L
Linus Torvalds 已提交
506 507
		return;

508
	size = arp_hdr_len(skb->dev);
L
Linus Torvalds 已提交
509

510 511 512 513
	spin_lock_irqsave(&npinfo->rx_lock, flags);
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (tip != np->local_ip)
			continue;
L
Linus Torvalds 已提交
514

515 516 517
		hlen = LL_RESERVED_SPACE(np->dev);
		tlen = np->dev->needed_tailroom;
		send_skb = find_skb(np, size + hlen + tlen, hlen);
518 519
		if (!send_skb)
			continue;
L
Linus Torvalds 已提交
520

521 522 523 524
		skb_reset_network_header(send_skb);
		arp = (struct arphdr *) skb_put(send_skb, size);
		send_skb->dev = skb->dev;
		send_skb->protocol = htons(ETH_P_ARP);
L
Linus Torvalds 已提交
525

526 527 528 529 530 531 532
		/* Fill the device header for the ARP frame */
		if (dev_hard_header(send_skb, skb->dev, ptype,
				    sha, np->dev->dev_addr,
				    send_skb->len) < 0) {
			kfree_skb(send_skb);
			continue;
		}
L
Linus Torvalds 已提交
533

534 535 536 537 538 539 540
		/*
		 * Fill out the arp protocol part.
		 *
		 * we only support ethernet device type,
		 * which (according to RFC 1390) should
		 * always equal 1 (Ethernet).
		 */
L
Linus Torvalds 已提交
541

542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
		arp->ar_hrd = htons(np->dev->type);
		arp->ar_pro = htons(ETH_P_IP);
		arp->ar_hln = np->dev->addr_len;
		arp->ar_pln = 4;
		arp->ar_op = htons(type);

		arp_ptr = (unsigned char *)(arp + 1);
		memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
		arp_ptr += np->dev->addr_len;
		memcpy(arp_ptr, &tip, 4);
		arp_ptr += 4;
		memcpy(arp_ptr, sha, np->dev->addr_len);
		arp_ptr += np->dev->addr_len;
		memcpy(arp_ptr, &sip, 4);

		netpoll_send_skb(np, send_skb);

		/* If there are several rx_hooks for the same address,
		   we're fine by sending a single reply */
		break;
	}
	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
L
Linus Torvalds 已提交
564 565
}

566
int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
L
Linus Torvalds 已提交
567 568
{
	int proto, len, ulen;
569
	int hits = 0;
570
	const struct iphdr *iph;
L
Linus Torvalds 已提交
571
	struct udphdr *uh;
572
	struct netpoll *np, *tmp;
573

574
	if (list_empty(&npinfo->rx_np))
L
Linus Torvalds 已提交
575
		goto out;
576

L
Linus Torvalds 已提交
577 578 579
	if (skb->dev->type != ARPHRD_ETHER)
		goto out;

580
	/* check if netpoll clients need ARP */
581
	if (skb->protocol == htons(ETH_P_ARP) &&
L
Linus Torvalds 已提交
582
	    atomic_read(&trapped)) {
583
		skb_queue_tail(&npinfo->arp_tx, skb);
L
Linus Torvalds 已提交
584 585 586
		return 1;
	}

587 588 589 590 591 592
	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
		skb = vlan_untag(skb);
		if (unlikely(!skb))
			goto out;
	}

L
Linus Torvalds 已提交
593 594 595 596 597 598 599 600 601 602
	proto = ntohs(eth_hdr(skb)->h_proto);
	if (proto != ETH_P_IP)
		goto out;
	if (skb->pkt_type == PACKET_OTHERHOST)
		goto out;
	if (skb_shared(skb))
		goto out;

	if (!pskb_may_pull(skb, sizeof(struct iphdr)))
		goto out;
603
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
604 605 606 607
	if (iph->ihl < 5 || iph->version != 4)
		goto out;
	if (!pskb_may_pull(skb, iph->ihl*4))
		goto out;
608
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
609 610 611 612 613 614 615
	if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
		goto out;

	len = ntohs(iph->tot_len);
	if (skb->len < len || len < iph->ihl*4)
		goto out;

616 617 618 619 620 621 622
	/*
	 * Our transport medium may have padded the buffer out.
	 * Now We trim to the true length of the frame.
	 */
	if (pskb_trim_rcsum(skb, len))
		goto out;

623
	iph = (struct iphdr *)skb->data;
L
Linus Torvalds 已提交
624 625 626 627 628 629 630 631 632
	if (iph->protocol != IPPROTO_UDP)
		goto out;

	len -= iph->ihl*4;
	uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
	ulen = ntohs(uh->len);

	if (ulen != len)
		goto out;
633
	if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
L
Linus Torvalds 已提交
634 635
		goto out;

636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (np->local_ip && np->local_ip != iph->daddr)
			continue;
		if (np->remote_ip && np->remote_ip != iph->saddr)
			continue;
		if (np->local_port && np->local_port != ntohs(uh->dest))
			continue;

		np->rx_hook(np, ntohs(uh->source),
			       (char *)(uh+1),
			       ulen - sizeof(struct udphdr));
		hits++;
	}

	if (!hits)
		goto out;
L
Linus Torvalds 已提交
652 653 654 655 656 657 658 659 660 661 662 663 664

	kfree_skb(skb);
	return 1;

out:
	if (atomic_read(&trapped)) {
		kfree_skb(skb);
		return 1;
	}

	return 0;
}

665 666
void netpoll_print_options(struct netpoll *np)
{
667 668 669 670 671 672
	np_info(np, "local port %d\n", np->local_port);
	np_info(np, "local IP %pI4\n", &np->local_ip);
	np_info(np, "interface '%s'\n", np->dev_name);
	np_info(np, "remote port %d\n", np->remote_port);
	np_info(np, "remote IP %pI4\n", &np->remote_ip);
	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
673
}
E
Eric Dumazet 已提交
674
EXPORT_SYMBOL(netpoll_print_options);
675

L
Linus Torvalds 已提交
676 677 678 679
int netpoll_parse_options(struct netpoll *np, char *opt)
{
	char *cur=opt, *delim;

680
	if (*cur != '@') {
L
Linus Torvalds 已提交
681 682
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
683 684 685
		*delim = 0;
		np->local_port = simple_strtol(cur, NULL, 10);
		cur = delim;
L
Linus Torvalds 已提交
686 687 688
	}
	cur++;

689
	if (*cur != '/') {
L
Linus Torvalds 已提交
690 691
		if ((delim = strchr(cur, '/')) == NULL)
			goto parse_failed;
692
		*delim = 0;
693
		np->local_ip = in_aton(cur);
694
		cur = delim;
L
Linus Torvalds 已提交
695 696 697
	}
	cur++;

698
	if (*cur != ',') {
L
Linus Torvalds 已提交
699 700 701
		/* parse out dev name */
		if ((delim = strchr(cur, ',')) == NULL)
			goto parse_failed;
702
		*delim = 0;
L
Linus Torvalds 已提交
703
		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
704
		cur = delim;
L
Linus Torvalds 已提交
705 706 707
	}
	cur++;

708
	if (*cur != '@') {
L
Linus Torvalds 已提交
709 710 711
		/* dst port */
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
712
		*delim = 0;
713
		if (*cur == ' ' || *cur == '\t')
714
			np_info(np, "warning: whitespace is not allowed\n");
715 716
		np->remote_port = simple_strtol(cur, NULL, 10);
		cur = delim;
L
Linus Torvalds 已提交
717 718 719 720 721 722
	}
	cur++;

	/* dst ip */
	if ((delim = strchr(cur, '/')) == NULL)
		goto parse_failed;
723
	*delim = 0;
724
	np->remote_ip = in_aton(cur);
725
	cur = delim + 1;
L
Linus Torvalds 已提交
726

727
	if (*cur != 0) {
L
Linus Torvalds 已提交
728
		/* MAC address */
729
		if (!mac_pton(cur, np->remote_mac))
L
Linus Torvalds 已提交
730 731 732
			goto parse_failed;
	}

733
	netpoll_print_options(np);
L
Linus Torvalds 已提交
734 735 736 737

	return 0;

 parse_failed:
738
	np_info(np, "couldn't parse config at '%s'!\n", cur);
L
Linus Torvalds 已提交
739 740
	return -1;
}
E
Eric Dumazet 已提交
741
EXPORT_SYMBOL(netpoll_parse_options);
L
Linus Torvalds 已提交
742

743
int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
L
Linus Torvalds 已提交
744
{
745
	struct netpoll_info *npinfo;
H
Herbert Xu 已提交
746
	const struct net_device_ops *ops;
747
	unsigned long flags;
S
Stephen Hemminger 已提交
748
	int err;
L
Linus Torvalds 已提交
749

750 751 752
	np->dev = ndev;
	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);

753 754
	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
	    !ndev->netdev_ops->ndo_poll_controller) {
755 756
		np_err(np, "%s doesn't support polling, aborting\n",
		       np->dev_name);
757 758 759 760 761
		err = -ENOTSUPP;
		goto out;
	}

	if (!ndev->npinfo) {
762
		npinfo = kmalloc(sizeof(*npinfo), gfp);
763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779
		if (!npinfo) {
			err = -ENOMEM;
			goto out;
		}

		npinfo->rx_flags = 0;
		INIT_LIST_HEAD(&npinfo->rx_np);

		spin_lock_init(&npinfo->rx_lock);
		skb_queue_head_init(&npinfo->arp_tx);
		skb_queue_head_init(&npinfo->txq);
		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

		atomic_set(&npinfo->refcnt, 1);

		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_setup) {
780
			err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798
			if (err)
				goto free_npinfo;
		}
	} else {
		npinfo = ndev->npinfo;
		atomic_inc(&npinfo->refcnt);
	}

	npinfo->netpoll = np;

	if (np->rx_hook) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
		list_add_tail(&np->rx, &npinfo->rx_np);
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}

	/* last thing to do is link it to the net device structure */
799
	rcu_assign_pointer(ndev->npinfo, npinfo);
800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815

	return 0;

free_npinfo:
	kfree(npinfo);
out:
	return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
	struct net_device *ndev = NULL;
	struct in_device *in_dev;
	int err;

L
Linus Torvalds 已提交
816
	if (np->dev_name)
817
		ndev = dev_get_by_name(&init_net, np->dev_name);
L
Linus Torvalds 已提交
818
	if (!ndev) {
819
		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
S
Stephen Hemminger 已提交
820
		return -ENODEV;
L
Linus Torvalds 已提交
821 822
	}

823
	if (ndev->master) {
824
		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
825 826
		err = -EBUSY;
		goto put;
827 828
	}

L
Linus Torvalds 已提交
829 830 831
	if (!netif_running(ndev)) {
		unsigned long atmost, atleast;

832
		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
L
Linus Torvalds 已提交
833

834
		rtnl_lock();
S
Stephen Hemminger 已提交
835 836 837 838
		err = dev_open(ndev);
		rtnl_unlock();

		if (err) {
839
			np_err(np, "failed to open %s\n", ndev->name);
840
			goto put;
L
Linus Torvalds 已提交
841 842 843
		}

		atleast = jiffies + HZ/10;
844
		atmost = jiffies + carrier_timeout * HZ;
L
Linus Torvalds 已提交
845 846
		while (!netif_carrier_ok(ndev)) {
			if (time_after(jiffies, atmost)) {
847
				np_notice(np, "timeout waiting for carrier\n");
L
Linus Torvalds 已提交
848 849
				break;
			}
850
			msleep(1);
L
Linus Torvalds 已提交
851 852 853 854 855 856 857 858
		}

		/* If carrier appears to come up instantly, we don't
		 * trust it and pause so that we don't pump all our
		 * queued console messages into the bitbucket.
		 */

		if (time_before(jiffies, atleast)) {
859
			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
L
Linus Torvalds 已提交
860 861 862 863 864 865
			msleep(4000);
		}
	}

	if (!np->local_ip) {
		rcu_read_lock();
866
		in_dev = __in_dev_get_rcu(ndev);
L
Linus Torvalds 已提交
867 868 869

		if (!in_dev || !in_dev->ifa_list) {
			rcu_read_unlock();
870 871
			np_err(np, "no IP address for %s, aborting\n",
			       np->dev_name);
S
Stephen Hemminger 已提交
872
			err = -EDESTADDRREQ;
873
			goto put;
L
Linus Torvalds 已提交
874 875
		}

876
		np->local_ip = in_dev->ifa_list->ifa_local;
L
Linus Torvalds 已提交
877
		rcu_read_unlock();
878
		np_info(np, "local IP %pI4\n", &np->local_ip);
L
Linus Torvalds 已提交
879 880
	}

881 882 883 884
	/* fill up the skb queue */
	refill_skbs();

	rtnl_lock();
885
	err = __netpoll_setup(np, ndev, GFP_KERNEL);
886
	rtnl_unlock();
887

888 889 890
	if (err)
		goto put;

L
Linus Torvalds 已提交
891 892
	return 0;

893
put:
L
Linus Torvalds 已提交
894
	dev_put(ndev);
S
Stephen Hemminger 已提交
895
	return err;
L
Linus Torvalds 已提交
896
}
E
Eric Dumazet 已提交
897
EXPORT_SYMBOL(netpoll_setup);
L
Linus Torvalds 已提交
898

899 900
static int __init netpoll_init(void)
{
901 902 903 904 905
	skb_queue_head_init(&skb_pool);
	return 0;
}
core_initcall(netpoll_init);

906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923
static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
{
	struct netpoll_info *npinfo =
			container_of(rcu_head, struct netpoll_info, rcu);

	skb_queue_purge(&npinfo->arp_tx);
	skb_queue_purge(&npinfo->txq);

	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
	cancel_delayed_work(&npinfo->tx_work);

	/* clean after last, unfinished work */
	__skb_queue_purge(&npinfo->txq);
	/* now cancel it again */
	cancel_delayed_work(&npinfo->tx_work);
	kfree(npinfo);
}

924
void __netpoll_cleanup(struct netpoll *np)
L
Linus Torvalds 已提交
925
{
926 927 928
	struct netpoll_info *npinfo;
	unsigned long flags;

929 930
	npinfo = np->dev->npinfo;
	if (!npinfo)
931
		return;
S
Stephen Hemminger 已提交
932

933 934 935 936 937 938 939
	if (!list_empty(&npinfo->rx_np)) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_del(&np->rx);
		if (list_empty(&npinfo->rx_np))
			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}
H
Herbert Xu 已提交
940

941 942
	if (atomic_dec_and_test(&npinfo->refcnt)) {
		const struct net_device_ops *ops;
H
Herbert Xu 已提交
943

944 945 946
		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_cleanup)
			ops->ndo_netpoll_cleanup(np->dev);
H
Herbert Xu 已提交
947

948
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
949 950 951 952
		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
	}
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
H
Herbert Xu 已提交
953

954 955 956
static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
{
	struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
S
Stephen Hemminger 已提交
957

958 959 960
	__netpoll_cleanup(np);
	kfree(np);
}
S
Stephen Hemminger 已提交
961

962 963 964
void __netpoll_free_rcu(struct netpoll *np)
{
	call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
965
}
966
EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
967

968 969 970 971
void netpoll_cleanup(struct netpoll *np)
{
	if (!np->dev)
		return;
972

973 974 975 976 977
	rtnl_lock();
	__netpoll_cleanup(np);
	rtnl_unlock();

	dev_put(np->dev);
L
Linus Torvalds 已提交
978 979
	np->dev = NULL;
}
E
Eric Dumazet 已提交
980
EXPORT_SYMBOL(netpoll_cleanup);
L
Linus Torvalds 已提交
981 982 983 984 985

int netpoll_trap(void)
{
	return atomic_read(&trapped);
}
E
Eric Dumazet 已提交
986
EXPORT_SYMBOL(netpoll_trap);
L
Linus Torvalds 已提交
987 988 989 990 991 992 993 994 995

void netpoll_set_trap(int trap)
{
	if (trap)
		atomic_inc(&trapped);
	else
		atomic_dec(&trapped);
}
EXPORT_SYMBOL(netpoll_set_trap);