netpoll.c 23.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14
#include <linux/moduleparam.h>
L
Linus Torvalds 已提交
15 16 17
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
18
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
19 20 21 22 23 24 25 26
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
27
#include <linux/slab.h>
28
#include <linux/export.h>
29
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
30 31 32
#include <net/tcp.h>
#include <net/udp.h>
#include <asm/unaligned.h>
33
#include <trace/events/napi.h>
L
Linus Torvalds 已提交
34 35 36 37 38 39 40 41 42

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

43
static struct sk_buff_head skb_pool;
L
Linus Torvalds 已提交
44 45 46

static atomic_t trapped;

S
Stephen Hemminger 已提交
47
#define USEC_PER_POLL	50
48 49
#define NETPOLL_RX_ENABLED  1
#define NETPOLL_RX_DROP     2
L
Linus Torvalds 已提交
50

J
Joe Perches 已提交
51 52 53 54 55
#define MAX_SKB_SIZE							\
	(sizeof(struct ethhdr) +					\
	 sizeof(struct iphdr) +						\
	 sizeof(struct udphdr) +					\
	 MAX_UDP_CHUNK)
L
Linus Torvalds 已提交
56

57
static void zap_completion_queue(void);
C
Cong Wang 已提交
58
static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
L
Linus Torvalds 已提交
59

60 61 62
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

63 64 65 66 67 68 69
#define np_info(np, fmt, ...)				\
	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_err(np, fmt, ...)				\
	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_notice(np, fmt, ...)				\
	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)

D
David Howells 已提交
70
static void queue_process(struct work_struct *work)
L
Linus Torvalds 已提交
71
{
72 73
	struct netpoll_info *npinfo =
		container_of(work, struct netpoll_info, tx_work.work);
L
Linus Torvalds 已提交
74
	struct sk_buff *skb;
I
Ingo Molnar 已提交
75
	unsigned long flags;
L
Linus Torvalds 已提交
76

S
Stephen Hemminger 已提交
77 78
	while ((skb = skb_dequeue(&npinfo->txq))) {
		struct net_device *dev = skb->dev;
79
		const struct net_device_ops *ops = dev->netdev_ops;
80
		struct netdev_queue *txq;
L
Linus Torvalds 已提交
81

S
Stephen Hemminger 已提交
82 83 84 85
		if (!netif_device_present(dev) || !netif_running(dev)) {
			__kfree_skb(skb);
			continue;
		}
L
Linus Torvalds 已提交
86

87 88
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

I
Ingo Molnar 已提交
89
		local_irq_save(flags);
90
		__netif_tx_lock(txq, smp_processor_id());
91
		if (netif_xmit_frozen_or_stopped(txq) ||
92
		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
93
			skb_queue_head(&npinfo->txq, skb);
94
			__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
95
			local_irq_restore(flags);
L
Linus Torvalds 已提交
96

97
			schedule_delayed_work(&npinfo->tx_work, HZ/10);
S
Stephen Hemminger 已提交
98 99
			return;
		}
100
		__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
101
		local_irq_restore(flags);
L
Linus Torvalds 已提交
102 103 104
	}
}

105 106
static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
			    unsigned short ulen, __be32 saddr, __be32 daddr)
L
Linus Torvalds 已提交
107
{
108
	__wsum psum;
109

110
	if (uh->check == 0 || skb_csum_unnecessary(skb))
L
Linus Torvalds 已提交
111 112
		return 0;

113 114
	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);

115
	if (skb->ip_summed == CHECKSUM_COMPLETE &&
116
	    !csum_fold(csum_add(psum, skb->csum)))
117
		return 0;
L
Linus Torvalds 已提交
118

119
	skb->csum = psum;
L
Linus Torvalds 已提交
120

121
	return __skb_checksum_complete(skb);
L
Linus Torvalds 已提交
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
}

/*
 * Check whether delayed processing was scheduled for our NIC. If so,
 * we attempt to grab the poll lock and use ->poll() to pump the card.
 * If this fails, either we've recursed in ->poll() or it's already
 * running on another CPU.
 *
 * Note: we don't mask interrupts with this lock because we're using
 * trylock here and interrupts are already disabled in the softirq
 * case. Further, we test the poll_owner to avoid recursion on UP
 * systems where the lock doesn't exist.
 *
 * In cases where there is bi-directional communications, reading only
 * one message at a time can lead to packets being dropped by the
 * network adapter, forcing superfluous retries and possibly timeouts.
 * Thus, we set our budget to greater than 1.
 */
140 141 142 143 144 145 146 147 148 149 150 151
static int poll_one_napi(struct netpoll_info *npinfo,
			 struct napi_struct *napi, int budget)
{
	int work;

	/* net_rx_action's ->poll() invocations and our's are
	 * synchronized by this test which is only made while
	 * holding the napi->poll_lock.
	 */
	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
		return budget;

152
	npinfo->rx_flags |= NETPOLL_RX_DROP;
153
	atomic_inc(&trapped);
154
	set_bit(NAPI_STATE_NPSVC, &napi->state);
155 156

	work = napi->poll(napi, budget);
157
	trace_napi_poll(napi);
158

159
	clear_bit(NAPI_STATE_NPSVC, &napi->state);
160
	atomic_dec(&trapped);
161
	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
162 163 164 165

	return budget - work;
}

166
static void poll_napi(struct net_device *dev)
L
Linus Torvalds 已提交
167
{
168
	struct napi_struct *napi;
L
Linus Torvalds 已提交
169 170
	int budget = 16;

171
	list_for_each_entry(napi, &dev->napi_list, dev_list) {
172
		if (napi->poll_owner != smp_processor_id() &&
173
		    spin_trylock(&napi->poll_lock)) {
174 175
			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
					       napi, budget);
176
			spin_unlock(&napi->poll_lock);
177

178
			if (!budget)
179
				break;
180
		}
L
Linus Torvalds 已提交
181 182 183
	}
}

C
Cong Wang 已提交
184
static void service_neigh_queue(struct netpoll_info *npi)
185
{
186 187
	if (npi) {
		struct sk_buff *skb;
188

C
Cong Wang 已提交
189 190
		while ((skb = skb_dequeue(&npi->neigh_tx)))
			netpoll_neigh_reply(skb, npi);
191 192 193
	}
}

194
static void netpoll_poll_dev(struct net_device *dev)
L
Linus Torvalds 已提交
195
{
196
	const struct net_device_ops *ops;
197
	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
198

199 200 201 202 203
	if (!dev || !netif_running(dev))
		return;

	ops = dev->netdev_ops;
	if (!ops->ndo_poll_controller)
L
Linus Torvalds 已提交
204 205 206
		return;

	/* Process pending work on NIC */
207
	ops->ndo_poll_controller(dev);
208 209

	poll_napi(dev);
L
Linus Torvalds 已提交
210

211
	if (dev->flags & IFF_SLAVE) {
212
		if (ni) {
213
			struct net_device *bond_dev;
214
			struct sk_buff *skb;
215 216 217 218
			struct netpoll_info *bond_ni;

			bond_dev = netdev_master_upper_dev_get_rcu(dev);
			bond_ni = rcu_dereference_bh(bond_dev->npinfo);
C
Cong Wang 已提交
219
			while ((skb = skb_dequeue(&ni->neigh_tx))) {
220
				skb->dev = bond_dev;
C
Cong Wang 已提交
221
				skb_queue_tail(&bond_ni->neigh_tx, skb);
222 223 224 225
			}
		}
	}

C
Cong Wang 已提交
226
	service_neigh_queue(ni);
227

228
	zap_completion_queue();
L
Linus Torvalds 已提交
229 230 231 232 233 234 235
}

static void refill_skbs(void)
{
	struct sk_buff *skb;
	unsigned long flags;

236 237
	spin_lock_irqsave(&skb_pool.lock, flags);
	while (skb_pool.qlen < MAX_SKBS) {
L
Linus Torvalds 已提交
238 239 240 241
		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
		if (!skb)
			break;

242
		__skb_queue_tail(&skb_pool, skb);
L
Linus Torvalds 已提交
243
	}
244
	spin_unlock_irqrestore(&skb_pool.lock, flags);
L
Linus Torvalds 已提交
245 246
}

247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
static void zap_completion_queue(void)
{
	unsigned long flags;
	struct softnet_data *sd = &get_cpu_var(softnet_data);

	if (sd->completion_queue) {
		struct sk_buff *clist;

		local_irq_save(flags);
		clist = sd->completion_queue;
		sd->completion_queue = NULL;
		local_irq_restore(flags);

		while (clist != NULL) {
			struct sk_buff *skb = clist;
			clist = clist->next;
			if (skb->destructor) {
				atomic_inc(&skb->users);
				dev_kfree_skb_any(skb); /* put this one back */
			} else {
				__kfree_skb(skb);
			}
		}
	}

	put_cpu_var(softnet_data);
}

275
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
L
Linus Torvalds 已提交
276
{
277 278
	int count = 0;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
279

280
	zap_completion_queue();
281
	refill_skbs();
L
Linus Torvalds 已提交
282 283 284
repeat:

	skb = alloc_skb(len, GFP_ATOMIC);
285 286
	if (!skb)
		skb = skb_dequeue(&skb_pool);
L
Linus Torvalds 已提交
287 288

	if (!skb) {
289
		if (++count < 10) {
290
			netpoll_poll_dev(np->dev);
291
			goto repeat;
L
Linus Torvalds 已提交
292
		}
293
		return NULL;
L
Linus Torvalds 已提交
294 295 296 297 298 299 300
	}

	atomic_set(&skb->users, 1);
	skb_reserve(skb, reserve);
	return skb;
}

301 302 303 304 305 306 307 308 309 310 311
static int netpoll_owner_active(struct net_device *dev)
{
	struct napi_struct *napi;

	list_for_each_entry(napi, &dev->napi_list, dev_list) {
		if (napi->poll_owner == smp_processor_id())
			return 1;
	}
	return 0;
}

312
/* call with IRQ disabled */
313 314
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
			     struct net_device *dev)
L
Linus Torvalds 已提交
315
{
S
Stephen Hemminger 已提交
316 317
	int status = NETDEV_TX_BUSY;
	unsigned long tries;
318
	const struct net_device_ops *ops = dev->netdev_ops;
H
Herbert Xu 已提交
319
	/* It is up to the caller to keep npinfo alive. */
320
	struct netpoll_info *npinfo;
S
Stephen Hemminger 已提交
321

322 323 324
	WARN_ON_ONCE(!irqs_disabled());

	npinfo = rcu_dereference_bh(np->dev->npinfo);
325 326 327 328
	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
		__kfree_skb(skb);
		return;
	}
S
Stephen Hemminger 已提交
329 330

	/* don't get messages out of order, and no recursion */
331
	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
332
		struct netdev_queue *txq;
333

334
		txq = netdev_pick_tx(dev, skb);
335

336 337 338
		/* try until next clock tick */
		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
		     tries > 0; --tries) {
339
			if (__netif_tx_trylock(txq)) {
340
				if (!netif_xmit_stopped(txq)) {
341 342 343 344 345 346 347 348
					if (vlan_tx_tag_present(skb) &&
					    !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
						skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
						if (unlikely(!skb))
							break;
						skb->vlan_tci = 0;
					}

349
					status = ops->ndo_start_xmit(skb, dev);
E
Eric Dumazet 已提交
350 351 352
					if (status == NETDEV_TX_OK)
						txq_trans_update(txq);
				}
353
				__netif_tx_unlock(txq);
354 355 356 357 358

				if (status == NETDEV_TX_OK)
					break;

			}
359 360

			/* tickle device maybe there is some cleanup */
361
			netpoll_poll_dev(np->dev);
362 363

			udelay(USEC_PER_POLL);
M
Matt Mackall 已提交
364
		}
365 366

		WARN_ONCE(!irqs_disabled(),
367
			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
368 369
			dev->name, ops->ndo_start_xmit);

L
Linus Torvalds 已提交
370 371
	}

S
Stephen Hemminger 已提交
372
	if (status != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
373
		skb_queue_tail(&npinfo->txq, skb);
374
		schedule_delayed_work(&npinfo->tx_work,0);
L
Linus Torvalds 已提交
375 376
	}
}
377
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
L
Linus Torvalds 已提交
378 379 380

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
381
	int total_len, ip_len, udp_len;
L
Linus Torvalds 已提交
382 383 384 385
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;
386
	static atomic_t ip_ident;
L
Linus Torvalds 已提交
387 388

	udp_len = len + sizeof(*udph);
C
Cong Wang 已提交
389 390 391
	if (!np->ipv6)
		ip_len = udp_len + sizeof(*iph);

392
	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
L
Linus Torvalds 已提交
393

394 395
	skb = find_skb(np, total_len + np->dev->needed_tailroom,
		       total_len - len);
L
Linus Torvalds 已提交
396 397 398
	if (!skb)
		return;

399
	skb_copy_to_linear_data(skb, msg, len);
400
	skb_put(skb, len);
L
Linus Torvalds 已提交
401

402 403 404
	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
L
Linus Torvalds 已提交
405 406 407
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);
C
Cong Wang 已提交
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439

	if (!np->ipv6) {
		udph->check = 0;
		udph->check = csum_tcpudp_magic(np->local_ip.ip,
						np->remote_ip.ip,
						udp_len, IPPROTO_UDP,
						csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*iph));
		skb_reset_network_header(skb);
		iph = ip_hdr(skb);

		/* iph->version = 4; iph->ihl = 5; */
		put_unaligned(0x45, (unsigned char *)iph);
		iph->tos      = 0;
		put_unaligned(htons(ip_len), &(iph->tot_len));
		iph->id       = htons(atomic_inc_return(&ip_ident));
		iph->frag_off = 0;
		iph->ttl      = 64;
		iph->protocol = IPPROTO_UDP;
		iph->check    = 0;
		put_unaligned(np->local_ip.ip, &(iph->saddr));
		put_unaligned(np->remote_ip.ip, &(iph->daddr));
		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IP);
	}

440 441
	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
L
Linus Torvalds 已提交
442 443 444 445 446

	skb->dev = np->dev;

	netpoll_send_skb(np, skb);
}
E
Eric Dumazet 已提交
447
EXPORT_SYMBOL(netpoll_send_udp);
L
Linus Torvalds 已提交
448

C
Cong Wang 已提交
449
static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
L
Linus Torvalds 已提交
450 451 452 453
{
	struct arphdr *arp;
	unsigned char *arp_ptr;
	int size, type = ARPOP_REPLY, ptype = ETH_P_ARP;
A
Al Viro 已提交
454
	__be32 sip, tip;
455
	unsigned char *sha;
L
Linus Torvalds 已提交
456
	struct sk_buff *send_skb;
457 458
	struct netpoll *np, *tmp;
	unsigned long flags;
459
	int hlen, tlen;
C
Cong Wang 已提交
460
	int hits = 0, proto;
461 462 463 464 465 466 467 468 469 470 471 472

	if (list_empty(&npinfo->rx_np))
		return;

	/* Before checking the packet, we do some early
	   inspection whether this is interesting at all */
	spin_lock_irqsave(&npinfo->rx_lock, flags);
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (np->dev == skb->dev)
			hits++;
	}
	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
L
Linus Torvalds 已提交
473

474 475
	/* No netpoll struct is using this dev */
	if (!hits)
476
		return;
L
Linus Torvalds 已提交
477

C
Cong Wang 已提交
478 479 480 481 482
	proto = ntohs(eth_hdr(skb)->h_proto);
	if (proto == ETH_P_IP) {
		/* No arp on this interface */
		if (skb->dev->flags & IFF_NOARP)
			return;
L
Linus Torvalds 已提交
483

C
Cong Wang 已提交
484 485
		if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
			return;
L
Linus Torvalds 已提交
486

C
Cong Wang 已提交
487 488 489
		skb_reset_network_header(skb);
		skb_reset_transport_header(skb);
		arp = arp_hdr(skb);
L
Linus Torvalds 已提交
490

C
Cong Wang 已提交
491 492 493 494 495
		if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
		     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
		    arp->ar_pro != htons(ETH_P_IP) ||
		    arp->ar_op != htons(ARPOP_REQUEST))
			return;
L
Linus Torvalds 已提交
496

C
Cong Wang 已提交
497 498 499 500 501 502 503 504 505 506
		arp_ptr = (unsigned char *)(arp+1);
		/* save the location of the src hw addr */
		sha = arp_ptr;
		arp_ptr += skb->dev->addr_len;
		memcpy(&sip, arp_ptr, 4);
		arp_ptr += 4;
		/* If we actually cared about dst hw addr,
		   it would get copied here */
		arp_ptr += skb->dev->addr_len;
		memcpy(&tip, arp_ptr, 4);
L
Linus Torvalds 已提交
507

C
Cong Wang 已提交
508 509 510
		/* Should we ignore arp? */
		if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
			return;
L
Linus Torvalds 已提交
511

C
Cong Wang 已提交
512
		size = arp_hdr_len(skb->dev);
L
Linus Torvalds 已提交
513

C
Cong Wang 已提交
514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
			if (tip != np->local_ip.ip)
				continue;

			hlen = LL_RESERVED_SPACE(np->dev);
			tlen = np->dev->needed_tailroom;
			send_skb = find_skb(np, size + hlen + tlen, hlen);
			if (!send_skb)
				continue;

			skb_reset_network_header(send_skb);
			arp = (struct arphdr *) skb_put(send_skb, size);
			send_skb->dev = skb->dev;
			send_skb->protocol = htons(ETH_P_ARP);

			/* Fill the device header for the ARP frame */
			if (dev_hard_header(send_skb, skb->dev, ptype,
					    sha, np->dev->dev_addr,
					    send_skb->len) < 0) {
				kfree_skb(send_skb);
				continue;
			}
L
Linus Torvalds 已提交
537

C
Cong Wang 已提交
538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565
			/*
			 * Fill out the arp protocol part.
			 *
			 * we only support ethernet device type,
			 * which (according to RFC 1390) should
			 * always equal 1 (Ethernet).
			 */

			arp->ar_hrd = htons(np->dev->type);
			arp->ar_pro = htons(ETH_P_IP);
			arp->ar_hln = np->dev->addr_len;
			arp->ar_pln = 4;
			arp->ar_op = htons(type);

			arp_ptr = (unsigned char *)(arp + 1);
			memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
			arp_ptr += np->dev->addr_len;
			memcpy(arp_ptr, &tip, 4);
			arp_ptr += 4;
			memcpy(arp_ptr, sha, np->dev->addr_len);
			arp_ptr += np->dev->addr_len;
			memcpy(arp_ptr, &sip, 4);

			netpoll_send_skb(np, send_skb);

			/* If there are several rx_hooks for the same address,
			   we're fine by sending a single reply */
			break;
566
		}
C
Cong Wang 已提交
567
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
568
	}
L
Linus Torvalds 已提交
569 570
}

571
int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
L
Linus Torvalds 已提交
572 573
{
	int proto, len, ulen;
574
	int hits = 0;
575
	const struct iphdr *iph;
L
Linus Torvalds 已提交
576
	struct udphdr *uh;
577
	struct netpoll *np, *tmp;
578

579
	if (list_empty(&npinfo->rx_np))
L
Linus Torvalds 已提交
580
		goto out;
581

L
Linus Torvalds 已提交
582 583 584
	if (skb->dev->type != ARPHRD_ETHER)
		goto out;

585
	/* check if netpoll clients need ARP */
586
	if (skb->protocol == htons(ETH_P_ARP) &&
L
Linus Torvalds 已提交
587
	    atomic_read(&trapped)) {
C
Cong Wang 已提交
588
		skb_queue_tail(&npinfo->neigh_tx, skb);
L
Linus Torvalds 已提交
589 590 591
		return 1;
	}

592 593 594 595 596 597
	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
		skb = vlan_untag(skb);
		if (unlikely(!skb))
			goto out;
	}

L
Linus Torvalds 已提交
598
	proto = ntohs(eth_hdr(skb)->h_proto);
C
Cong Wang 已提交
599
	if (proto != ETH_P_IP && proto != ETH_P_IPV6)
L
Linus Torvalds 已提交
600 601 602 603 604 605
		goto out;
	if (skb->pkt_type == PACKET_OTHERHOST)
		goto out;
	if (skb_shared(skb))
		goto out;

C
Cong Wang 已提交
606 607 608 609 610 611 612 613 614 615 616
	if (proto == ETH_P_IP) {
		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
			goto out;
		iph = (struct iphdr *)skb->data;
		if (iph->ihl < 5 || iph->version != 4)
			goto out;
		if (!pskb_may_pull(skb, iph->ihl*4))
			goto out;
		iph = (struct iphdr *)skb->data;
		if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
			goto out;
617

C
Cong Wang 已提交
618 619 620
		len = ntohs(iph->tot_len);
		if (skb->len < len || len < iph->ihl*4)
			goto out;
L
Linus Torvalds 已提交
621

C
Cong Wang 已提交
622 623 624 625 626 627
		/*
		 * Our transport medium may have padded the buffer out.
		 * Now We trim to the true length of the frame.
		 */
		if (pskb_trim_rcsum(skb, len))
			goto out;
L
Linus Torvalds 已提交
628

C
Cong Wang 已提交
629 630 631
		iph = (struct iphdr *)skb->data;
		if (iph->protocol != IPPROTO_UDP)
			goto out;
L
Linus Torvalds 已提交
632

C
Cong Wang 已提交
633 634 635
		len -= iph->ihl*4;
		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
		ulen = ntohs(uh->len);
636

C
Cong Wang 已提交
637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653
		if (ulen != len)
			goto out;
		if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
			goto out;
		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
			if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
				continue;
			if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
				continue;
			if (np->local_port && np->local_port != ntohs(uh->dest))
				continue;

			np->rx_hook(np, ntohs(uh->source),
				       (char *)(uh+1),
				       ulen - sizeof(struct udphdr));
			hits++;
		}
654 655 656 657
	}

	if (!hits)
		goto out;
L
Linus Torvalds 已提交
658 659 660 661 662 663 664 665 666 667 668 669 670

	kfree_skb(skb);
	return 1;

out:
	if (atomic_read(&trapped)) {
		kfree_skb(skb);
		return 1;
	}

	return 0;
}

671 672
void netpoll_print_options(struct netpoll *np)
{
673
	np_info(np, "local port %d\n", np->local_port);
C
Cong Wang 已提交
674 675
	if (!np->ipv6)
		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
676 677
	np_info(np, "interface '%s'\n", np->dev_name);
	np_info(np, "remote port %d\n", np->remote_port);
C
Cong Wang 已提交
678 679
	if (!np->ipv6)
		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
680
	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
681
}
E
Eric Dumazet 已提交
682
EXPORT_SYMBOL(netpoll_print_options);
683

C
Cong Wang 已提交
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
{
	const char *end;

	if (!strchr(str, ':') &&
	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
		if (!*end)
			return 0;
	}
	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
#if IS_ENABLED(CONFIG_IPV6)
		if (!*end)
			return 1;
#else
		return -1;
#endif
	}
	return -1;
}

L
Linus Torvalds 已提交
704 705 706
int netpoll_parse_options(struct netpoll *np, char *opt)
{
	char *cur=opt, *delim;
C
Cong Wang 已提交
707
	int ipv6;
L
Linus Torvalds 已提交
708

709
	if (*cur != '@') {
L
Linus Torvalds 已提交
710 711
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
712
		*delim = 0;
713 714
		if (kstrtou16(cur, 10, &np->local_port))
			goto parse_failed;
715
		cur = delim;
L
Linus Torvalds 已提交
716 717 718
	}
	cur++;

719
	if (*cur != '/') {
L
Linus Torvalds 已提交
720 721
		if ((delim = strchr(cur, '/')) == NULL)
			goto parse_failed;
722
		*delim = 0;
C
Cong Wang 已提交
723 724 725 726 727
		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
		if (ipv6 < 0)
			goto parse_failed;
		else
			np->ipv6 = (bool)ipv6;
728
		cur = delim;
L
Linus Torvalds 已提交
729 730 731
	}
	cur++;

732
	if (*cur != ',') {
L
Linus Torvalds 已提交
733 734 735
		/* parse out dev name */
		if ((delim = strchr(cur, ',')) == NULL)
			goto parse_failed;
736
		*delim = 0;
L
Linus Torvalds 已提交
737
		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
738
		cur = delim;
L
Linus Torvalds 已提交
739 740 741
	}
	cur++;

742
	if (*cur != '@') {
L
Linus Torvalds 已提交
743 744 745
		/* dst port */
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
746
		*delim = 0;
747
		if (*cur == ' ' || *cur == '\t')
748
			np_info(np, "warning: whitespace is not allowed\n");
749 750
		if (kstrtou16(cur, 10, &np->remote_port))
			goto parse_failed;
751
		cur = delim;
L
Linus Torvalds 已提交
752 753 754 755 756 757
	}
	cur++;

	/* dst ip */
	if ((delim = strchr(cur, '/')) == NULL)
		goto parse_failed;
758
	*delim = 0;
C
Cong Wang 已提交
759 760 761 762 763 764 765
	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
	if (ipv6 < 0)
		goto parse_failed;
	else if (np->ipv6 != (bool)ipv6)
		goto parse_failed;
	else
		np->ipv6 = (bool)ipv6;
766
	cur = delim + 1;
L
Linus Torvalds 已提交
767

768
	if (*cur != 0) {
L
Linus Torvalds 已提交
769
		/* MAC address */
770
		if (!mac_pton(cur, np->remote_mac))
L
Linus Torvalds 已提交
771 772 773
			goto parse_failed;
	}

774
	netpoll_print_options(np);
L
Linus Torvalds 已提交
775 776 777 778

	return 0;

 parse_failed:
779
	np_info(np, "couldn't parse config at '%s'!\n", cur);
L
Linus Torvalds 已提交
780 781
	return -1;
}
E
Eric Dumazet 已提交
782
EXPORT_SYMBOL(netpoll_parse_options);
L
Linus Torvalds 已提交
783

784
int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
L
Linus Torvalds 已提交
785
{
786
	struct netpoll_info *npinfo;
H
Herbert Xu 已提交
787
	const struct net_device_ops *ops;
788
	unsigned long flags;
S
Stephen Hemminger 已提交
789
	int err;
L
Linus Torvalds 已提交
790

791 792 793
	np->dev = ndev;
	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);

794 795
	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
	    !ndev->netdev_ops->ndo_poll_controller) {
796 797
		np_err(np, "%s doesn't support polling, aborting\n",
		       np->dev_name);
798 799 800 801 802
		err = -ENOTSUPP;
		goto out;
	}

	if (!ndev->npinfo) {
803
		npinfo = kmalloc(sizeof(*npinfo), gfp);
804 805 806 807 808 809 810 811 812
		if (!npinfo) {
			err = -ENOMEM;
			goto out;
		}

		npinfo->rx_flags = 0;
		INIT_LIST_HEAD(&npinfo->rx_np);

		spin_lock_init(&npinfo->rx_lock);
C
Cong Wang 已提交
813
		skb_queue_head_init(&npinfo->neigh_tx);
814 815 816 817 818 819 820
		skb_queue_head_init(&npinfo->txq);
		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

		atomic_set(&npinfo->refcnt, 1);

		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_setup) {
821
			err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839
			if (err)
				goto free_npinfo;
		}
	} else {
		npinfo = ndev->npinfo;
		atomic_inc(&npinfo->refcnt);
	}

	npinfo->netpoll = np;

	if (np->rx_hook) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
		list_add_tail(&np->rx, &npinfo->rx_np);
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}

	/* last thing to do is link it to the net device structure */
840
	rcu_assign_pointer(ndev->npinfo, npinfo);
841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856

	return 0;

free_npinfo:
	kfree(npinfo);
out:
	return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
	struct net_device *ndev = NULL;
	struct in_device *in_dev;
	int err;

L
Linus Torvalds 已提交
857
	if (np->dev_name)
858
		ndev = dev_get_by_name(&init_net, np->dev_name);
L
Linus Torvalds 已提交
859
	if (!ndev) {
860
		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
S
Stephen Hemminger 已提交
861
		return -ENODEV;
L
Linus Torvalds 已提交
862 863
	}

864
	if (netdev_master_upper_dev_get(ndev)) {
865
		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
866 867
		err = -EBUSY;
		goto put;
868 869
	}

L
Linus Torvalds 已提交
870 871 872
	if (!netif_running(ndev)) {
		unsigned long atmost, atleast;

873
		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
L
Linus Torvalds 已提交
874

875
		rtnl_lock();
S
Stephen Hemminger 已提交
876 877 878 879
		err = dev_open(ndev);
		rtnl_unlock();

		if (err) {
880
			np_err(np, "failed to open %s\n", ndev->name);
881
			goto put;
L
Linus Torvalds 已提交
882 883 884
		}

		atleast = jiffies + HZ/10;
885
		atmost = jiffies + carrier_timeout * HZ;
L
Linus Torvalds 已提交
886 887
		while (!netif_carrier_ok(ndev)) {
			if (time_after(jiffies, atmost)) {
888
				np_notice(np, "timeout waiting for carrier\n");
L
Linus Torvalds 已提交
889 890
				break;
			}
891
			msleep(1);
L
Linus Torvalds 已提交
892 893 894 895 896 897 898 899
		}

		/* If carrier appears to come up instantly, we don't
		 * trust it and pause so that we don't pump all our
		 * queued console messages into the bitbucket.
		 */

		if (time_before(jiffies, atleast)) {
900
			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
L
Linus Torvalds 已提交
901 902 903 904
			msleep(4000);
		}
	}

C
Cong Wang 已提交
905 906 907 908
	if (!np->local_ip.ip) {
		if (!np->ipv6) {
			rcu_read_lock();
			in_dev = __in_dev_get_rcu(ndev);
L
Linus Torvalds 已提交
909

C
Cong Wang 已提交
910 911 912 913 914 915 916 917 918 919

			if (!in_dev || !in_dev->ifa_list) {
				rcu_read_unlock();
				np_err(np, "no IP address for %s, aborting\n",
				       np->dev_name);
				err = -EDESTADDRREQ;
				goto put;
			}

			np->local_ip.ip = in_dev->ifa_list->ifa_local;
L
Linus Torvalds 已提交
920
			rcu_read_unlock();
C
Cong Wang 已提交
921
			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
L
Linus Torvalds 已提交
922 923 924
		}
	}

925 926 927 928
	/* fill up the skb queue */
	refill_skbs();

	rtnl_lock();
929
	err = __netpoll_setup(np, ndev, GFP_KERNEL);
930
	rtnl_unlock();
931

932 933 934
	if (err)
		goto put;

L
Linus Torvalds 已提交
935 936
	return 0;

937
put:
L
Linus Torvalds 已提交
938
	dev_put(ndev);
S
Stephen Hemminger 已提交
939
	return err;
L
Linus Torvalds 已提交
940
}
E
Eric Dumazet 已提交
941
EXPORT_SYMBOL(netpoll_setup);
L
Linus Torvalds 已提交
942

943 944
static int __init netpoll_init(void)
{
945 946 947 948 949
	skb_queue_head_init(&skb_pool);
	return 0;
}
core_initcall(netpoll_init);

950 951 952 953 954
static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
{
	struct netpoll_info *npinfo =
			container_of(rcu_head, struct netpoll_info, rcu);

C
Cong Wang 已提交
955
	skb_queue_purge(&npinfo->neigh_tx);
956 957 958 959 960 961 962 963 964 965 966 967
	skb_queue_purge(&npinfo->txq);

	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
	cancel_delayed_work(&npinfo->tx_work);

	/* clean after last, unfinished work */
	__skb_queue_purge(&npinfo->txq);
	/* now cancel it again */
	cancel_delayed_work(&npinfo->tx_work);
	kfree(npinfo);
}

968
void __netpoll_cleanup(struct netpoll *np)
L
Linus Torvalds 已提交
969
{
970 971 972
	struct netpoll_info *npinfo;
	unsigned long flags;

973 974
	npinfo = np->dev->npinfo;
	if (!npinfo)
975
		return;
S
Stephen Hemminger 已提交
976

977 978 979 980 981 982 983
	if (!list_empty(&npinfo->rx_np)) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_del(&np->rx);
		if (list_empty(&npinfo->rx_np))
			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}
H
Herbert Xu 已提交
984

985 986
	if (atomic_dec_and_test(&npinfo->refcnt)) {
		const struct net_device_ops *ops;
H
Herbert Xu 已提交
987

988 989 990
		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_cleanup)
			ops->ndo_netpoll_cleanup(np->dev);
H
Herbert Xu 已提交
991

992
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
993 994 995 996
		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
	}
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
H
Herbert Xu 已提交
997

998 999 1000
static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
{
	struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
S
Stephen Hemminger 已提交
1001

1002 1003 1004
	__netpoll_cleanup(np);
	kfree(np);
}
S
Stephen Hemminger 已提交
1005

1006 1007 1008
void __netpoll_free_rcu(struct netpoll *np)
{
	call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
1009
}
1010
EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
1011

1012 1013 1014 1015
void netpoll_cleanup(struct netpoll *np)
{
	if (!np->dev)
		return;
1016

1017 1018 1019 1020 1021
	rtnl_lock();
	__netpoll_cleanup(np);
	rtnl_unlock();

	dev_put(np->dev);
L
Linus Torvalds 已提交
1022 1023
	np->dev = NULL;
}
E
Eric Dumazet 已提交
1024
EXPORT_SYMBOL(netpoll_cleanup);
L
Linus Torvalds 已提交
1025 1026 1027 1028 1029

int netpoll_trap(void)
{
	return atomic_read(&trapped);
}
E
Eric Dumazet 已提交
1030
EXPORT_SYMBOL(netpoll_trap);
L
Linus Torvalds 已提交
1031 1032 1033 1034 1035 1036 1037 1038 1039

void netpoll_set_trap(int trap)
{
	if (trap)
		atomic_inc(&trapped);
	else
		atomic_dec(&trapped);
}
EXPORT_SYMBOL(netpoll_set_trap);