netpoll.c 29.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14
#include <linux/moduleparam.h>
L
Linus Torvalds 已提交
15 16 17
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
18
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
19 20 21 22 23 24 25 26
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
27
#include <linux/slab.h>
28
#include <linux/export.h>
29
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
30 31
#include <net/tcp.h>
#include <net/udp.h>
C
Cong Wang 已提交
32 33 34
#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/ip6_checksum.h>
L
Linus Torvalds 已提交
35
#include <asm/unaligned.h>
36
#include <trace/events/napi.h>
L
Linus Torvalds 已提交
37 38 39 40 41 42 43 44 45

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

46
static struct sk_buff_head skb_pool;
L
Linus Torvalds 已提交
47 48 49

static atomic_t trapped;

S
Stephen Hemminger 已提交
50
#define USEC_PER_POLL	50
51 52
#define NETPOLL_RX_ENABLED  1
#define NETPOLL_RX_DROP     2
L
Linus Torvalds 已提交
53

J
Joe Perches 已提交
54 55 56 57 58
#define MAX_SKB_SIZE							\
	(sizeof(struct ethhdr) +					\
	 sizeof(struct iphdr) +						\
	 sizeof(struct udphdr) +					\
	 MAX_UDP_CHUNK)
L
Linus Torvalds 已提交
59

60
static void zap_completion_queue(void);
C
Cong Wang 已提交
61
static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
L
Linus Torvalds 已提交
62

63 64 65
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

66 67 68 69 70 71 72
#define np_info(np, fmt, ...)				\
	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_err(np, fmt, ...)				\
	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_notice(np, fmt, ...)				\
	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)

D
David Howells 已提交
73
static void queue_process(struct work_struct *work)
L
Linus Torvalds 已提交
74
{
75 76
	struct netpoll_info *npinfo =
		container_of(work, struct netpoll_info, tx_work.work);
L
Linus Torvalds 已提交
77
	struct sk_buff *skb;
I
Ingo Molnar 已提交
78
	unsigned long flags;
L
Linus Torvalds 已提交
79

S
Stephen Hemminger 已提交
80 81
	while ((skb = skb_dequeue(&npinfo->txq))) {
		struct net_device *dev = skb->dev;
82
		const struct net_device_ops *ops = dev->netdev_ops;
83
		struct netdev_queue *txq;
L
Linus Torvalds 已提交
84

S
Stephen Hemminger 已提交
85 86 87 88
		if (!netif_device_present(dev) || !netif_running(dev)) {
			__kfree_skb(skb);
			continue;
		}
L
Linus Torvalds 已提交
89

90 91
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

I
Ingo Molnar 已提交
92
		local_irq_save(flags);
93
		__netif_tx_lock(txq, smp_processor_id());
94
		if (netif_xmit_frozen_or_stopped(txq) ||
95
		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
96
			skb_queue_head(&npinfo->txq, skb);
97
			__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
98
			local_irq_restore(flags);
L
Linus Torvalds 已提交
99

100
			schedule_delayed_work(&npinfo->tx_work, HZ/10);
S
Stephen Hemminger 已提交
101 102
			return;
		}
103
		__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
104
		local_irq_restore(flags);
L
Linus Torvalds 已提交
105 106 107
	}
}

108 109
static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
			    unsigned short ulen, __be32 saddr, __be32 daddr)
L
Linus Torvalds 已提交
110
{
111
	__wsum psum;
112

113
	if (uh->check == 0 || skb_csum_unnecessary(skb))
L
Linus Torvalds 已提交
114 115
		return 0;

116 117
	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);

118
	if (skb->ip_summed == CHECKSUM_COMPLETE &&
119
	    !csum_fold(csum_add(psum, skb->csum)))
120
		return 0;
L
Linus Torvalds 已提交
121

122
	skb->csum = psum;
L
Linus Torvalds 已提交
123

124
	return __skb_checksum_complete(skb);
L
Linus Torvalds 已提交
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142
}

/*
 * Check whether delayed processing was scheduled for our NIC. If so,
 * we attempt to grab the poll lock and use ->poll() to pump the card.
 * If this fails, either we've recursed in ->poll() or it's already
 * running on another CPU.
 *
 * Note: we don't mask interrupts with this lock because we're using
 * trylock here and interrupts are already disabled in the softirq
 * case. Further, we test the poll_owner to avoid recursion on UP
 * systems where the lock doesn't exist.
 *
 * In cases where there is bi-directional communications, reading only
 * one message at a time can lead to packets being dropped by the
 * network adapter, forcing superfluous retries and possibly timeouts.
 * Thus, we set our budget to greater than 1.
 */
143 144 145 146 147 148 149 150 151 152 153 154
static int poll_one_napi(struct netpoll_info *npinfo,
			 struct napi_struct *napi, int budget)
{
	int work;

	/* net_rx_action's ->poll() invocations and our's are
	 * synchronized by this test which is only made while
	 * holding the napi->poll_lock.
	 */
	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
		return budget;

155
	npinfo->rx_flags |= NETPOLL_RX_DROP;
156
	atomic_inc(&trapped);
157
	set_bit(NAPI_STATE_NPSVC, &napi->state);
158 159

	work = napi->poll(napi, budget);
160
	trace_napi_poll(napi);
161

162
	clear_bit(NAPI_STATE_NPSVC, &napi->state);
163
	atomic_dec(&trapped);
164
	npinfo->rx_flags &= ~NETPOLL_RX_DROP;
165 166 167 168

	return budget - work;
}

169
static void poll_napi(struct net_device *dev)
L
Linus Torvalds 已提交
170
{
171
	struct napi_struct *napi;
L
Linus Torvalds 已提交
172 173
	int budget = 16;

174
	list_for_each_entry(napi, &dev->napi_list, dev_list) {
175
		if (napi->poll_owner != smp_processor_id() &&
176
		    spin_trylock(&napi->poll_lock)) {
177 178
			budget = poll_one_napi(rcu_dereference_bh(dev->npinfo),
					       napi, budget);
179
			spin_unlock(&napi->poll_lock);
180

181
			if (!budget)
182
				break;
183
		}
L
Linus Torvalds 已提交
184 185 186
	}
}

C
Cong Wang 已提交
187
static void service_neigh_queue(struct netpoll_info *npi)
188
{
189 190
	if (npi) {
		struct sk_buff *skb;
191

C
Cong Wang 已提交
192 193
		while ((skb = skb_dequeue(&npi->neigh_tx)))
			netpoll_neigh_reply(skb, npi);
194 195 196
	}
}

197
static void netpoll_poll_dev(struct net_device *dev)
L
Linus Torvalds 已提交
198
{
199
	const struct net_device_ops *ops;
200
	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
201

202 203 204 205 206
	if (!dev || !netif_running(dev))
		return;

	ops = dev->netdev_ops;
	if (!ops->ndo_poll_controller)
L
Linus Torvalds 已提交
207 208 209
		return;

	/* Process pending work on NIC */
210
	ops->ndo_poll_controller(dev);
211 212

	poll_napi(dev);
L
Linus Torvalds 已提交
213

214
	if (dev->flags & IFF_SLAVE) {
215
		if (ni) {
216
			struct net_device *bond_dev;
217
			struct sk_buff *skb;
218 219 220 221
			struct netpoll_info *bond_ni;

			bond_dev = netdev_master_upper_dev_get_rcu(dev);
			bond_ni = rcu_dereference_bh(bond_dev->npinfo);
C
Cong Wang 已提交
222
			while ((skb = skb_dequeue(&ni->neigh_tx))) {
223
				skb->dev = bond_dev;
C
Cong Wang 已提交
224
				skb_queue_tail(&bond_ni->neigh_tx, skb);
225 226 227 228
			}
		}
	}

C
Cong Wang 已提交
229
	service_neigh_queue(ni);
230

231
	zap_completion_queue();
L
Linus Torvalds 已提交
232 233 234 235 236 237 238
}

static void refill_skbs(void)
{
	struct sk_buff *skb;
	unsigned long flags;

239 240
	spin_lock_irqsave(&skb_pool.lock, flags);
	while (skb_pool.qlen < MAX_SKBS) {
L
Linus Torvalds 已提交
241 242 243 244
		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
		if (!skb)
			break;

245
		__skb_queue_tail(&skb_pool, skb);
L
Linus Torvalds 已提交
246
	}
247
	spin_unlock_irqrestore(&skb_pool.lock, flags);
L
Linus Torvalds 已提交
248 249
}

250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277
static void zap_completion_queue(void)
{
	unsigned long flags;
	struct softnet_data *sd = &get_cpu_var(softnet_data);

	if (sd->completion_queue) {
		struct sk_buff *clist;

		local_irq_save(flags);
		clist = sd->completion_queue;
		sd->completion_queue = NULL;
		local_irq_restore(flags);

		while (clist != NULL) {
			struct sk_buff *skb = clist;
			clist = clist->next;
			if (skb->destructor) {
				atomic_inc(&skb->users);
				dev_kfree_skb_any(skb); /* put this one back */
			} else {
				__kfree_skb(skb);
			}
		}
	}

	put_cpu_var(softnet_data);
}

278
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
L
Linus Torvalds 已提交
279
{
280 281
	int count = 0;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
282

283
	zap_completion_queue();
284
	refill_skbs();
L
Linus Torvalds 已提交
285 286 287
repeat:

	skb = alloc_skb(len, GFP_ATOMIC);
288 289
	if (!skb)
		skb = skb_dequeue(&skb_pool);
L
Linus Torvalds 已提交
290 291

	if (!skb) {
292
		if (++count < 10) {
293
			netpoll_poll_dev(np->dev);
294
			goto repeat;
L
Linus Torvalds 已提交
295
		}
296
		return NULL;
L
Linus Torvalds 已提交
297 298 299 300 301 302 303
	}

	atomic_set(&skb->users, 1);
	skb_reserve(skb, reserve);
	return skb;
}

304 305 306 307 308 309 310 311 312 313 314
static int netpoll_owner_active(struct net_device *dev)
{
	struct napi_struct *napi;

	list_for_each_entry(napi, &dev->napi_list, dev_list) {
		if (napi->poll_owner == smp_processor_id())
			return 1;
	}
	return 0;
}

315
/* call with IRQ disabled */
316 317
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
			     struct net_device *dev)
L
Linus Torvalds 已提交
318
{
S
Stephen Hemminger 已提交
319 320
	int status = NETDEV_TX_BUSY;
	unsigned long tries;
321
	const struct net_device_ops *ops = dev->netdev_ops;
H
Herbert Xu 已提交
322
	/* It is up to the caller to keep npinfo alive. */
323
	struct netpoll_info *npinfo;
S
Stephen Hemminger 已提交
324

325 326 327
	WARN_ON_ONCE(!irqs_disabled());

	npinfo = rcu_dereference_bh(np->dev->npinfo);
328 329 330 331
	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
		__kfree_skb(skb);
		return;
	}
S
Stephen Hemminger 已提交
332 333

	/* don't get messages out of order, and no recursion */
334
	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
335
		struct netdev_queue *txq;
336

337
		txq = netdev_pick_tx(dev, skb);
338

339 340 341
		/* try until next clock tick */
		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
		     tries > 0; --tries) {
342
			if (__netif_tx_trylock(txq)) {
343
				if (!netif_xmit_stopped(txq)) {
344 345 346 347 348 349 350 351
					if (vlan_tx_tag_present(skb) &&
					    !(netif_skb_features(skb) & NETIF_F_HW_VLAN_TX)) {
						skb = __vlan_put_tag(skb, vlan_tx_tag_get(skb));
						if (unlikely(!skb))
							break;
						skb->vlan_tci = 0;
					}

352
					status = ops->ndo_start_xmit(skb, dev);
E
Eric Dumazet 已提交
353 354 355
					if (status == NETDEV_TX_OK)
						txq_trans_update(txq);
				}
356
				__netif_tx_unlock(txq);
357 358 359 360 361

				if (status == NETDEV_TX_OK)
					break;

			}
362 363

			/* tickle device maybe there is some cleanup */
364
			netpoll_poll_dev(np->dev);
365 366

			udelay(USEC_PER_POLL);
M
Matt Mackall 已提交
367
		}
368 369

		WARN_ONCE(!irqs_disabled(),
370
			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
371 372
			dev->name, ops->ndo_start_xmit);

L
Linus Torvalds 已提交
373 374
	}

S
Stephen Hemminger 已提交
375
	if (status != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
376
		skb_queue_tail(&npinfo->txq, skb);
377
		schedule_delayed_work(&npinfo->tx_work,0);
L
Linus Torvalds 已提交
378 379
	}
}
380
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
L
Linus Torvalds 已提交
381 382 383

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
384
	int total_len, ip_len, udp_len;
L
Linus Torvalds 已提交
385 386 387 388
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;
389
	static atomic_t ip_ident;
C
Cong Wang 已提交
390
	struct ipv6hdr *ip6h;
L
Linus Torvalds 已提交
391 392

	udp_len = len + sizeof(*udph);
C
Cong Wang 已提交
393 394 395
	if (np->ipv6)
		ip_len = udp_len + sizeof(*ip6h);
	else
C
Cong Wang 已提交
396 397
		ip_len = udp_len + sizeof(*iph);

398
	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
L
Linus Torvalds 已提交
399

400 401
	skb = find_skb(np, total_len + np->dev->needed_tailroom,
		       total_len - len);
L
Linus Torvalds 已提交
402 403 404
	if (!skb)
		return;

405
	skb_copy_to_linear_data(skb, msg, len);
406
	skb_put(skb, len);
L
Linus Torvalds 已提交
407

408 409 410
	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
L
Linus Torvalds 已提交
411 412 413
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);
C
Cong Wang 已提交
414

C
Cong Wang 已提交
415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
	if (np->ipv6) {
		udph->check = 0;
		udph->check = csum_ipv6_magic(&np->local_ip.in6,
					      &np->remote_ip.in6,
					      udp_len, IPPROTO_UDP,
					      csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*ip6h));
		skb_reset_network_header(skb);
		ip6h = ipv6_hdr(skb);

		/* ip6h->version = 6; ip6h->priority = 0; */
		put_unaligned(0x60, (unsigned char *)ip6h);
		ip6h->flow_lbl[0] = 0;
		ip6h->flow_lbl[1] = 0;
		ip6h->flow_lbl[2] = 0;

		ip6h->payload_len = htons(sizeof(struct udphdr) + len);
		ip6h->nexthdr = IPPROTO_UDP;
		ip6h->hop_limit = 32;
		ip6h->saddr = np->local_ip.in6;
		ip6h->daddr = np->remote_ip.in6;

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
	} else {
C
Cong Wang 已提交
444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473
		udph->check = 0;
		udph->check = csum_tcpudp_magic(np->local_ip.ip,
						np->remote_ip.ip,
						udp_len, IPPROTO_UDP,
						csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*iph));
		skb_reset_network_header(skb);
		iph = ip_hdr(skb);

		/* iph->version = 4; iph->ihl = 5; */
		put_unaligned(0x45, (unsigned char *)iph);
		iph->tos      = 0;
		put_unaligned(htons(ip_len), &(iph->tot_len));
		iph->id       = htons(atomic_inc_return(&ip_ident));
		iph->frag_off = 0;
		iph->ttl      = 64;
		iph->protocol = IPPROTO_UDP;
		iph->check    = 0;
		put_unaligned(np->local_ip.ip, &(iph->saddr));
		put_unaligned(np->remote_ip.ip, &(iph->daddr));
		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IP);
	}

474 475
	memcpy(eth->h_source, np->dev->dev_addr, ETH_ALEN);
	memcpy(eth->h_dest, np->remote_mac, ETH_ALEN);
L
Linus Torvalds 已提交
476 477 478 479 480

	skb->dev = np->dev;

	netpoll_send_skb(np, skb);
}
E
Eric Dumazet 已提交
481
EXPORT_SYMBOL(netpoll_send_udp);
L
Linus Torvalds 已提交
482

C
Cong Wang 已提交
483
static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
L
Linus Torvalds 已提交
484
{
C
Cong Wang 已提交
485
	int size, type = ARPOP_REPLY;
A
Al Viro 已提交
486
	__be32 sip, tip;
487
	unsigned char *sha;
L
Linus Torvalds 已提交
488
	struct sk_buff *send_skb;
489 490
	struct netpoll *np, *tmp;
	unsigned long flags;
491
	int hlen, tlen;
C
Cong Wang 已提交
492
	int hits = 0, proto;
493 494 495 496 497 498 499 500 501 502 503 504

	if (list_empty(&npinfo->rx_np))
		return;

	/* Before checking the packet, we do some early
	   inspection whether this is interesting at all */
	spin_lock_irqsave(&npinfo->rx_lock, flags);
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (np->dev == skb->dev)
			hits++;
	}
	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
L
Linus Torvalds 已提交
505

506 507
	/* No netpoll struct is using this dev */
	if (!hits)
508
		return;
L
Linus Torvalds 已提交
509

C
Cong Wang 已提交
510 511
	proto = ntohs(eth_hdr(skb)->h_proto);
	if (proto == ETH_P_IP) {
C
Cong Wang 已提交
512 513
		struct arphdr *arp;
		unsigned char *arp_ptr;
C
Cong Wang 已提交
514 515 516
		/* No arp on this interface */
		if (skb->dev->flags & IFF_NOARP)
			return;
L
Linus Torvalds 已提交
517

C
Cong Wang 已提交
518 519
		if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
			return;
L
Linus Torvalds 已提交
520

C
Cong Wang 已提交
521 522 523
		skb_reset_network_header(skb);
		skb_reset_transport_header(skb);
		arp = arp_hdr(skb);
L
Linus Torvalds 已提交
524

C
Cong Wang 已提交
525 526 527 528 529
		if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
		     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
		    arp->ar_pro != htons(ETH_P_IP) ||
		    arp->ar_op != htons(ARPOP_REQUEST))
			return;
L
Linus Torvalds 已提交
530

C
Cong Wang 已提交
531 532 533 534 535 536 537 538 539 540
		arp_ptr = (unsigned char *)(arp+1);
		/* save the location of the src hw addr */
		sha = arp_ptr;
		arp_ptr += skb->dev->addr_len;
		memcpy(&sip, arp_ptr, 4);
		arp_ptr += 4;
		/* If we actually cared about dst hw addr,
		   it would get copied here */
		arp_ptr += skb->dev->addr_len;
		memcpy(&tip, arp_ptr, 4);
L
Linus Torvalds 已提交
541

C
Cong Wang 已提交
542 543 544
		/* Should we ignore arp? */
		if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
			return;
L
Linus Torvalds 已提交
545

C
Cong Wang 已提交
546
		size = arp_hdr_len(skb->dev);
L
Linus Torvalds 已提交
547

C
Cong Wang 已提交
548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
			if (tip != np->local_ip.ip)
				continue;

			hlen = LL_RESERVED_SPACE(np->dev);
			tlen = np->dev->needed_tailroom;
			send_skb = find_skb(np, size + hlen + tlen, hlen);
			if (!send_skb)
				continue;

			skb_reset_network_header(send_skb);
			arp = (struct arphdr *) skb_put(send_skb, size);
			send_skb->dev = skb->dev;
			send_skb->protocol = htons(ETH_P_ARP);

			/* Fill the device header for the ARP frame */
C
Cong Wang 已提交
565
			if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
C
Cong Wang 已提交
566 567 568 569 570
					    sha, np->dev->dev_addr,
					    send_skb->len) < 0) {
				kfree_skb(send_skb);
				continue;
			}
L
Linus Torvalds 已提交
571

C
Cong Wang 已提交
572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
			/*
			 * Fill out the arp protocol part.
			 *
			 * we only support ethernet device type,
			 * which (according to RFC 1390) should
			 * always equal 1 (Ethernet).
			 */

			arp->ar_hrd = htons(np->dev->type);
			arp->ar_pro = htons(ETH_P_IP);
			arp->ar_hln = np->dev->addr_len;
			arp->ar_pln = 4;
			arp->ar_op = htons(type);

			arp_ptr = (unsigned char *)(arp + 1);
			memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
			arp_ptr += np->dev->addr_len;
			memcpy(arp_ptr, &tip, 4);
			arp_ptr += 4;
			memcpy(arp_ptr, sha, np->dev->addr_len);
			arp_ptr += np->dev->addr_len;
			memcpy(arp_ptr, &sip, 4);

			netpoll_send_skb(np, send_skb);

			/* If there are several rx_hooks for the same address,
			   we're fine by sending a single reply */
			break;
600
		}
C
Cong Wang 已提交
601
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
C
Cong Wang 已提交
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690
	} else if( proto == ETH_P_IPV6) {
#if IS_ENABLED(CONFIG_IPV6)
		struct nd_msg *msg;
		u8 *lladdr = NULL;
		struct ipv6hdr *hdr;
		struct icmp6hdr *icmp6h;
		const struct in6_addr *saddr;
		const struct in6_addr *daddr;
		struct inet6_dev *in6_dev = NULL;
		struct in6_addr *target;

		in6_dev = in6_dev_get(skb->dev);
		if (!in6_dev || !in6_dev->cnf.accept_ra)
			return;

		if (!pskb_may_pull(skb, skb->len))
			return;

		msg = (struct nd_msg *)skb_transport_header(skb);

		__skb_push(skb, skb->data - skb_transport_header(skb));

		if (ipv6_hdr(skb)->hop_limit != 255)
			return;
		if (msg->icmph.icmp6_code != 0)
			return;
		if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
			return;

		saddr = &ipv6_hdr(skb)->saddr;
		daddr = &ipv6_hdr(skb)->daddr;

		size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);

		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
			if (memcmp(daddr, &np->local_ip, sizeof(*daddr)))
				continue;

			hlen = LL_RESERVED_SPACE(np->dev);
			tlen = np->dev->needed_tailroom;
			send_skb = find_skb(np, size + hlen + tlen, hlen);
			if (!send_skb)
				continue;

			send_skb->protocol = htons(ETH_P_IPV6);
			send_skb->dev = skb->dev;

			skb_reset_network_header(send_skb);
			skb_put(send_skb, sizeof(struct ipv6hdr));
			hdr = ipv6_hdr(send_skb);

			*(__be32*)hdr = htonl(0x60000000);

			hdr->payload_len = htons(size);
			hdr->nexthdr = IPPROTO_ICMPV6;
			hdr->hop_limit = 255;
			hdr->saddr = *saddr;
			hdr->daddr = *daddr;

			send_skb->transport_header = send_skb->tail;
			skb_put(send_skb, size);

			icmp6h = (struct icmp6hdr *)skb_transport_header(skb);
			icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
			icmp6h->icmp6_router = 0;
			icmp6h->icmp6_solicited = 1;
			target = (struct in6_addr *)skb_transport_header(send_skb) + sizeof(struct icmp6hdr);
			*target = msg->target;
			icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
							      IPPROTO_ICMPV6,
							      csum_partial(icmp6h,
									   size, 0));

			if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
					    lladdr, np->dev->dev_addr,
					    send_skb->len) < 0) {
				kfree_skb(send_skb);
				continue;
			}

			netpoll_send_skb(np, send_skb);

			/* If there are several rx_hooks for the same address,
			   we're fine by sending a single reply */
			break;
		}
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
#endif
691
	}
L
Linus Torvalds 已提交
692 693
}

C
Cong Wang 已提交
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719
static bool pkt_is_ns(struct sk_buff *skb)
{
	struct nd_msg *msg;
	struct ipv6hdr *hdr;

	if (skb->protocol != htons(ETH_P_ARP))
		return false;
	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
		return false;

	msg = (struct nd_msg *)skb_transport_header(skb);
	__skb_push(skb, skb->data - skb_transport_header(skb));
	hdr = ipv6_hdr(skb);

	if (hdr->nexthdr != IPPROTO_ICMPV6)
		return false;
	if (hdr->hop_limit != 255)
		return false;
	if (msg->icmph.icmp6_code != 0)
		return false;
	if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
		return false;

	return true;
}

720
int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
L
Linus Torvalds 已提交
721 722
{
	int proto, len, ulen;
723
	int hits = 0;
724
	const struct iphdr *iph;
L
Linus Torvalds 已提交
725
	struct udphdr *uh;
726
	struct netpoll *np, *tmp;
727

728
	if (list_empty(&npinfo->rx_np))
L
Linus Torvalds 已提交
729
		goto out;
730

L
Linus Torvalds 已提交
731 732 733
	if (skb->dev->type != ARPHRD_ETHER)
		goto out;

734
	/* check if netpoll clients need ARP */
C
Cong Wang 已提交
735 736 737 738
	if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
		skb_queue_tail(&npinfo->neigh_tx, skb);
		return 1;
	} else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
C
Cong Wang 已提交
739
		skb_queue_tail(&npinfo->neigh_tx, skb);
L
Linus Torvalds 已提交
740 741 742
		return 1;
	}

743 744 745 746 747 748
	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
		skb = vlan_untag(skb);
		if (unlikely(!skb))
			goto out;
	}

L
Linus Torvalds 已提交
749
	proto = ntohs(eth_hdr(skb)->h_proto);
C
Cong Wang 已提交
750
	if (proto != ETH_P_IP && proto != ETH_P_IPV6)
L
Linus Torvalds 已提交
751 752 753 754 755 756
		goto out;
	if (skb->pkt_type == PACKET_OTHERHOST)
		goto out;
	if (skb_shared(skb))
		goto out;

C
Cong Wang 已提交
757 758 759 760 761 762 763 764 765 766 767
	if (proto == ETH_P_IP) {
		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
			goto out;
		iph = (struct iphdr *)skb->data;
		if (iph->ihl < 5 || iph->version != 4)
			goto out;
		if (!pskb_may_pull(skb, iph->ihl*4))
			goto out;
		iph = (struct iphdr *)skb->data;
		if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
			goto out;
768

C
Cong Wang 已提交
769 770 771
		len = ntohs(iph->tot_len);
		if (skb->len < len || len < iph->ihl*4)
			goto out;
L
Linus Torvalds 已提交
772

C
Cong Wang 已提交
773 774 775 776 777 778
		/*
		 * Our transport medium may have padded the buffer out.
		 * Now We trim to the true length of the frame.
		 */
		if (pskb_trim_rcsum(skb, len))
			goto out;
L
Linus Torvalds 已提交
779

C
Cong Wang 已提交
780 781 782
		iph = (struct iphdr *)skb->data;
		if (iph->protocol != IPPROTO_UDP)
			goto out;
L
Linus Torvalds 已提交
783

C
Cong Wang 已提交
784 785 786
		len -= iph->ihl*4;
		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
		ulen = ntohs(uh->len);
787

C
Cong Wang 已提交
788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
		if (ulen != len)
			goto out;
		if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
			goto out;
		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
			if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
				continue;
			if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
				continue;
			if (np->local_port && np->local_port != ntohs(uh->dest))
				continue;

			np->rx_hook(np, ntohs(uh->source),
				       (char *)(uh+1),
				       ulen - sizeof(struct udphdr));
			hits++;
		}
C
Cong Wang 已提交
805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843
	} else {
#if IS_ENABLED(CONFIG_IPV6)
		const struct ipv6hdr *ip6h;

		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
			goto out;
		ip6h = (struct ipv6hdr *)skb->data;
		if (ip6h->version != 6)
			goto out;
		len = ntohs(ip6h->payload_len);
		if (!len)
			goto out;
		if (len + sizeof(struct ipv6hdr) > skb->len)
			goto out;
		if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
			goto out;
		ip6h = ipv6_hdr(skb);
		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
			goto out;
		uh = udp_hdr(skb);
		ulen = ntohs(uh->len);
		if (ulen != skb->len)
			goto out;
		if (udp6_csum_init(skb, uh, IPPROTO_UDP))
			goto out;
		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
			if (memcmp(&np->local_ip.in6, &ip6h->daddr, sizeof(struct in6_addr)) != 0)
				continue;
			if (memcmp(&np->remote_ip.in6, &ip6h->saddr, sizeof(struct in6_addr)) != 0)
				continue;
			if (np->local_port && np->local_port != ntohs(uh->dest))
				continue;

			np->rx_hook(np, ntohs(uh->source),
				       (char *)(uh+1),
				       ulen - sizeof(struct udphdr));
			hits++;
		}
#endif
844 845 846 847
	}

	if (!hits)
		goto out;
L
Linus Torvalds 已提交
848 849 850 851 852 853 854 855 856 857 858 859 860

	kfree_skb(skb);
	return 1;

out:
	if (atomic_read(&trapped)) {
		kfree_skb(skb);
		return 1;
	}

	return 0;
}

861 862
void netpoll_print_options(struct netpoll *np)
{
863
	np_info(np, "local port %d\n", np->local_port);
C
Cong Wang 已提交
864 865 866
	if (np->ipv6)
		np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
	else
C
Cong Wang 已提交
867
		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
868 869
	np_info(np, "interface '%s'\n", np->dev_name);
	np_info(np, "remote port %d\n", np->remote_port);
C
Cong Wang 已提交
870 871 872
	if (np->ipv6)
		np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
	else
C
Cong Wang 已提交
873
		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
874
	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
875
}
E
Eric Dumazet 已提交
876
EXPORT_SYMBOL(netpoll_print_options);
877

C
Cong Wang 已提交
878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897
static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
{
	const char *end;

	if (!strchr(str, ':') &&
	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
		if (!*end)
			return 0;
	}
	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
#if IS_ENABLED(CONFIG_IPV6)
		if (!*end)
			return 1;
#else
		return -1;
#endif
	}
	return -1;
}

L
Linus Torvalds 已提交
898 899 900
int netpoll_parse_options(struct netpoll *np, char *opt)
{
	char *cur=opt, *delim;
C
Cong Wang 已提交
901
	int ipv6;
L
Linus Torvalds 已提交
902

903
	if (*cur != '@') {
L
Linus Torvalds 已提交
904 905
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
906
		*delim = 0;
907 908
		if (kstrtou16(cur, 10, &np->local_port))
			goto parse_failed;
909
		cur = delim;
L
Linus Torvalds 已提交
910 911 912
	}
	cur++;

913
	if (*cur != '/') {
L
Linus Torvalds 已提交
914 915
		if ((delim = strchr(cur, '/')) == NULL)
			goto parse_failed;
916
		*delim = 0;
C
Cong Wang 已提交
917 918 919 920 921
		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
		if (ipv6 < 0)
			goto parse_failed;
		else
			np->ipv6 = (bool)ipv6;
922
		cur = delim;
L
Linus Torvalds 已提交
923 924 925
	}
	cur++;

926
	if (*cur != ',') {
L
Linus Torvalds 已提交
927 928 929
		/* parse out dev name */
		if ((delim = strchr(cur, ',')) == NULL)
			goto parse_failed;
930
		*delim = 0;
L
Linus Torvalds 已提交
931
		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
932
		cur = delim;
L
Linus Torvalds 已提交
933 934 935
	}
	cur++;

936
	if (*cur != '@') {
L
Linus Torvalds 已提交
937 938 939
		/* dst port */
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
940
		*delim = 0;
941
		if (*cur == ' ' || *cur == '\t')
942
			np_info(np, "warning: whitespace is not allowed\n");
943 944
		if (kstrtou16(cur, 10, &np->remote_port))
			goto parse_failed;
945
		cur = delim;
L
Linus Torvalds 已提交
946 947 948 949 950 951
	}
	cur++;

	/* dst ip */
	if ((delim = strchr(cur, '/')) == NULL)
		goto parse_failed;
952
	*delim = 0;
C
Cong Wang 已提交
953 954 955 956 957 958 959
	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
	if (ipv6 < 0)
		goto parse_failed;
	else if (np->ipv6 != (bool)ipv6)
		goto parse_failed;
	else
		np->ipv6 = (bool)ipv6;
960
	cur = delim + 1;
L
Linus Torvalds 已提交
961

962
	if (*cur != 0) {
L
Linus Torvalds 已提交
963
		/* MAC address */
964
		if (!mac_pton(cur, np->remote_mac))
L
Linus Torvalds 已提交
965 966 967
			goto parse_failed;
	}

968
	netpoll_print_options(np);
L
Linus Torvalds 已提交
969 970 971 972

	return 0;

 parse_failed:
973
	np_info(np, "couldn't parse config at '%s'!\n", cur);
L
Linus Torvalds 已提交
974 975
	return -1;
}
E
Eric Dumazet 已提交
976
EXPORT_SYMBOL(netpoll_parse_options);
L
Linus Torvalds 已提交
977

978
int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
L
Linus Torvalds 已提交
979
{
980
	struct netpoll_info *npinfo;
H
Herbert Xu 已提交
981
	const struct net_device_ops *ops;
982
	unsigned long flags;
S
Stephen Hemminger 已提交
983
	int err;
L
Linus Torvalds 已提交
984

985 986 987
	np->dev = ndev;
	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);

988 989
	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
	    !ndev->netdev_ops->ndo_poll_controller) {
990 991
		np_err(np, "%s doesn't support polling, aborting\n",
		       np->dev_name);
992 993 994 995 996
		err = -ENOTSUPP;
		goto out;
	}

	if (!ndev->npinfo) {
997
		npinfo = kmalloc(sizeof(*npinfo), gfp);
998 999 1000 1001 1002 1003 1004 1005 1006
		if (!npinfo) {
			err = -ENOMEM;
			goto out;
		}

		npinfo->rx_flags = 0;
		INIT_LIST_HEAD(&npinfo->rx_np);

		spin_lock_init(&npinfo->rx_lock);
C
Cong Wang 已提交
1007
		skb_queue_head_init(&npinfo->neigh_tx);
1008 1009 1010 1011 1012 1013 1014
		skb_queue_head_init(&npinfo->txq);
		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

		atomic_set(&npinfo->refcnt, 1);

		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_setup) {
1015
			err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033
			if (err)
				goto free_npinfo;
		}
	} else {
		npinfo = ndev->npinfo;
		atomic_inc(&npinfo->refcnt);
	}

	npinfo->netpoll = np;

	if (np->rx_hook) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		npinfo->rx_flags |= NETPOLL_RX_ENABLED;
		list_add_tail(&np->rx, &npinfo->rx_np);
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}

	/* last thing to do is link it to the net device structure */
1034
	rcu_assign_pointer(ndev->npinfo, npinfo);
1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050

	return 0;

free_npinfo:
	kfree(npinfo);
out:
	return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
	struct net_device *ndev = NULL;
	struct in_device *in_dev;
	int err;

1051
	rtnl_lock();
L
Linus Torvalds 已提交
1052
	if (np->dev_name)
1053
		ndev = __dev_get_by_name(&init_net, np->dev_name);
L
Linus Torvalds 已提交
1054
	if (!ndev) {
1055
		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
1056 1057
		err = -ENODEV;
		goto unlock;
L
Linus Torvalds 已提交
1058
	}
1059
	dev_hold(ndev);
L
Linus Torvalds 已提交
1060

1061
	if (netdev_master_upper_dev_get(ndev)) {
1062
		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
1063 1064
		err = -EBUSY;
		goto put;
1065 1066
	}

L
Linus Torvalds 已提交
1067 1068 1069
	if (!netif_running(ndev)) {
		unsigned long atmost, atleast;

1070
		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
L
Linus Torvalds 已提交
1071

S
Stephen Hemminger 已提交
1072 1073 1074
		err = dev_open(ndev);

		if (err) {
1075
			np_err(np, "failed to open %s\n", ndev->name);
1076
			goto put;
L
Linus Torvalds 已提交
1077 1078
		}

1079
		rtnl_unlock();
L
Linus Torvalds 已提交
1080
		atleast = jiffies + HZ/10;
1081
		atmost = jiffies + carrier_timeout * HZ;
L
Linus Torvalds 已提交
1082 1083
		while (!netif_carrier_ok(ndev)) {
			if (time_after(jiffies, atmost)) {
1084
				np_notice(np, "timeout waiting for carrier\n");
L
Linus Torvalds 已提交
1085 1086
				break;
			}
1087
			msleep(1);
L
Linus Torvalds 已提交
1088 1089 1090 1091 1092 1093 1094 1095
		}

		/* If carrier appears to come up instantly, we don't
		 * trust it and pause so that we don't pump all our
		 * queued console messages into the bitbucket.
		 */

		if (time_before(jiffies, atleast)) {
1096
			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
L
Linus Torvalds 已提交
1097 1098
			msleep(4000);
		}
1099
		rtnl_lock();
L
Linus Torvalds 已提交
1100 1101
	}

C
Cong Wang 已提交
1102 1103
	if (!np->local_ip.ip) {
		if (!np->ipv6) {
1104
			in_dev = __in_dev_get_rtnl(ndev);
C
Cong Wang 已提交
1105 1106 1107 1108 1109 1110 1111 1112 1113 1114

			if (!in_dev || !in_dev->ifa_list) {
				np_err(np, "no IP address for %s, aborting\n",
				       np->dev_name);
				err = -EDESTADDRREQ;
				goto put;
			}

			np->local_ip.ip = in_dev->ifa_list->ifa_local;
			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
C
Cong Wang 已提交
1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141 1142
		} else {
#if IS_ENABLED(CONFIG_IPV6)
			struct inet6_dev *idev;

			err = -EDESTADDRREQ;
			idev = __in6_dev_get(ndev);
			if (idev) {
				struct inet6_ifaddr *ifp;

				read_lock_bh(&idev->lock);
				list_for_each_entry(ifp, &idev->addr_list, if_list) {
					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
						continue;
					np->local_ip.in6 = ifp->addr;
					err = 0;
					break;
				}
				read_unlock_bh(&idev->lock);
			}
			if (err) {
				np_err(np, "no IPv6 address for %s, aborting\n",
				       np->dev_name);
				goto put;
			} else
				np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
#else
			np_err(np, "IPv6 is not supported %s, aborting\n",
			       np->dev_name);
1143
			err = -EINVAL;
C
Cong Wang 已提交
1144 1145
			goto put;
#endif
L
Linus Torvalds 已提交
1146 1147 1148
		}
	}

1149 1150 1151
	/* fill up the skb queue */
	refill_skbs();

1152
	err = __netpoll_setup(np, ndev, GFP_KERNEL);
1153 1154 1155
	if (err)
		goto put;

1156
	rtnl_unlock();
L
Linus Torvalds 已提交
1157 1158
	return 0;

1159
put:
L
Linus Torvalds 已提交
1160
	dev_put(ndev);
1161 1162
unlock:
	rtnl_unlock();
S
Stephen Hemminger 已提交
1163
	return err;
L
Linus Torvalds 已提交
1164
}
E
Eric Dumazet 已提交
1165
EXPORT_SYMBOL(netpoll_setup);
L
Linus Torvalds 已提交
1166

1167 1168
static int __init netpoll_init(void)
{
1169 1170 1171 1172 1173
	skb_queue_head_init(&skb_pool);
	return 0;
}
core_initcall(netpoll_init);

1174 1175 1176 1177 1178
static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
{
	struct netpoll_info *npinfo =
			container_of(rcu_head, struct netpoll_info, rcu);

C
Cong Wang 已提交
1179
	skb_queue_purge(&npinfo->neigh_tx);
1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191
	skb_queue_purge(&npinfo->txq);

	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
	cancel_delayed_work(&npinfo->tx_work);

	/* clean after last, unfinished work */
	__skb_queue_purge(&npinfo->txq);
	/* now cancel it again */
	cancel_delayed_work(&npinfo->tx_work);
	kfree(npinfo);
}

1192
void __netpoll_cleanup(struct netpoll *np)
L
Linus Torvalds 已提交
1193
{
1194 1195 1196
	struct netpoll_info *npinfo;
	unsigned long flags;

1197 1198
	npinfo = np->dev->npinfo;
	if (!npinfo)
1199
		return;
S
Stephen Hemminger 已提交
1200

1201 1202 1203 1204 1205 1206 1207
	if (!list_empty(&npinfo->rx_np)) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_del(&np->rx);
		if (list_empty(&npinfo->rx_np))
			npinfo->rx_flags &= ~NETPOLL_RX_ENABLED;
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}
H
Herbert Xu 已提交
1208

1209 1210
	if (atomic_dec_and_test(&npinfo->refcnt)) {
		const struct net_device_ops *ops;
H
Herbert Xu 已提交
1211

1212 1213 1214
		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_cleanup)
			ops->ndo_netpoll_cleanup(np->dev);
H
Herbert Xu 已提交
1215

1216
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
1217 1218 1219 1220
		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
	}
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
H
Herbert Xu 已提交
1221

1222 1223 1224
static void rcu_cleanup_netpoll(struct rcu_head *rcu_head)
{
	struct netpoll *np = container_of(rcu_head, struct netpoll, rcu);
S
Stephen Hemminger 已提交
1225

1226 1227 1228
	__netpoll_cleanup(np);
	kfree(np);
}
S
Stephen Hemminger 已提交
1229

1230 1231 1232
void __netpoll_free_rcu(struct netpoll *np)
{
	call_rcu_bh(&np->rcu, rcu_cleanup_netpoll);
1233
}
1234
EXPORT_SYMBOL_GPL(__netpoll_free_rcu);
1235

1236 1237 1238 1239
void netpoll_cleanup(struct netpoll *np)
{
	if (!np->dev)
		return;
1240

1241 1242 1243 1244 1245
	rtnl_lock();
	__netpoll_cleanup(np);
	rtnl_unlock();

	dev_put(np->dev);
L
Linus Torvalds 已提交
1246 1247
	np->dev = NULL;
}
E
Eric Dumazet 已提交
1248
EXPORT_SYMBOL(netpoll_cleanup);
L
Linus Torvalds 已提交
1249 1250 1251 1252 1253

int netpoll_trap(void)
{
	return atomic_read(&trapped);
}
E
Eric Dumazet 已提交
1254
EXPORT_SYMBOL(netpoll_trap);
L
Linus Torvalds 已提交
1255 1256 1257 1258 1259 1260 1261 1262 1263

void netpoll_set_trap(int trap)
{
	if (trap)
		atomic_inc(&trapped);
	else
		atomic_dec(&trapped);
}
EXPORT_SYMBOL(netpoll_set_trap);