netpoll.c 30.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14
#include <linux/moduleparam.h>
15
#include <linux/kernel.h>
L
Linus Torvalds 已提交
16 17 18
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
19
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
20 21 22 23 24 25 26 27
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
28
#include <linux/slab.h>
29
#include <linux/export.h>
30
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
31 32
#include <net/tcp.h>
#include <net/udp.h>
C
Cong Wang 已提交
33 34 35
#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/ip6_checksum.h>
L
Linus Torvalds 已提交
36
#include <asm/unaligned.h>
37
#include <trace/events/napi.h>
L
Linus Torvalds 已提交
38 39 40 41 42 43 44 45 46

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

47
static struct sk_buff_head skb_pool;
L
Linus Torvalds 已提交
48 49 50

static atomic_t trapped;

51
DEFINE_STATIC_SRCU(netpoll_srcu);
52

S
Stephen Hemminger 已提交
53
#define USEC_PER_POLL	50
L
Linus Torvalds 已提交
54

J
Joe Perches 已提交
55 56 57 58 59
#define MAX_SKB_SIZE							\
	(sizeof(struct ethhdr) +					\
	 sizeof(struct iphdr) +						\
	 sizeof(struct udphdr) +					\
	 MAX_UDP_CHUNK)
L
Linus Torvalds 已提交
60

61
static void zap_completion_queue(void);
C
Cong Wang 已提交
62
static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo);
63
static void netpoll_async_cleanup(struct work_struct *work);
L
Linus Torvalds 已提交
64

65 66 67
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

68 69 70 71 72 73 74
#define np_info(np, fmt, ...)				\
	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_err(np, fmt, ...)				\
	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_notice(np, fmt, ...)				\
	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)

D
David Howells 已提交
75
static void queue_process(struct work_struct *work)
L
Linus Torvalds 已提交
76
{
77 78
	struct netpoll_info *npinfo =
		container_of(work, struct netpoll_info, tx_work.work);
L
Linus Torvalds 已提交
79
	struct sk_buff *skb;
I
Ingo Molnar 已提交
80
	unsigned long flags;
L
Linus Torvalds 已提交
81

S
Stephen Hemminger 已提交
82 83
	while ((skb = skb_dequeue(&npinfo->txq))) {
		struct net_device *dev = skb->dev;
84
		const struct net_device_ops *ops = dev->netdev_ops;
85
		struct netdev_queue *txq;
L
Linus Torvalds 已提交
86

S
Stephen Hemminger 已提交
87 88 89 90
		if (!netif_device_present(dev) || !netif_running(dev)) {
			__kfree_skb(skb);
			continue;
		}
L
Linus Torvalds 已提交
91

92 93
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

I
Ingo Molnar 已提交
94
		local_irq_save(flags);
95
		__netif_tx_lock(txq, smp_processor_id());
96
		if (netif_xmit_frozen_or_stopped(txq) ||
97
		    ops->ndo_start_xmit(skb, dev) != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
98
			skb_queue_head(&npinfo->txq, skb);
99
			__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
100
			local_irq_restore(flags);
L
Linus Torvalds 已提交
101

102
			schedule_delayed_work(&npinfo->tx_work, HZ/10);
S
Stephen Hemminger 已提交
103 104
			return;
		}
105
		__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
106
		local_irq_restore(flags);
L
Linus Torvalds 已提交
107 108 109
	}
}

110 111
static __sum16 checksum_udp(struct sk_buff *skb, struct udphdr *uh,
			    unsigned short ulen, __be32 saddr, __be32 daddr)
L
Linus Torvalds 已提交
112
{
113
	__wsum psum;
114

115
	if (uh->check == 0 || skb_csum_unnecessary(skb))
L
Linus Torvalds 已提交
116 117
		return 0;

118 119
	psum = csum_tcpudp_nofold(saddr, daddr, ulen, IPPROTO_UDP, 0);

120
	if (skb->ip_summed == CHECKSUM_COMPLETE &&
121
	    !csum_fold(csum_add(psum, skb->csum)))
122
		return 0;
L
Linus Torvalds 已提交
123

124
	skb->csum = psum;
L
Linus Torvalds 已提交
125

126
	return __skb_checksum_complete(skb);
L
Linus Torvalds 已提交
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144
}

/*
 * Check whether delayed processing was scheduled for our NIC. If so,
 * we attempt to grab the poll lock and use ->poll() to pump the card.
 * If this fails, either we've recursed in ->poll() or it's already
 * running on another CPU.
 *
 * Note: we don't mask interrupts with this lock because we're using
 * trylock here and interrupts are already disabled in the softirq
 * case. Further, we test the poll_owner to avoid recursion on UP
 * systems where the lock doesn't exist.
 *
 * In cases where there is bi-directional communications, reading only
 * one message at a time can lead to packets being dropped by the
 * network adapter, forcing superfluous retries and possibly timeouts.
 * Thus, we set our budget to greater than 1.
 */
145
static int poll_one_napi(struct napi_struct *napi, int budget)
146 147 148 149 150 151 152 153 154 155
{
	int work;

	/* net_rx_action's ->poll() invocations and our's are
	 * synchronized by this test which is only made while
	 * holding the napi->poll_lock.
	 */
	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
		return budget;

156
	set_bit(NAPI_STATE_NPSVC, &napi->state);
157 158

	work = napi->poll(napi, budget);
159
	WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
160
	trace_napi_poll(napi);
161

162
	clear_bit(NAPI_STATE_NPSVC, &napi->state);
163 164 165 166

	return budget - work;
}

167
static void poll_napi(struct net_device *dev, int budget)
L
Linus Torvalds 已提交
168
{
169
	struct napi_struct *napi;
L
Linus Torvalds 已提交
170

171
	list_for_each_entry(napi, &dev->napi_list, dev_list) {
172
		if (napi->poll_owner != smp_processor_id() &&
173
		    spin_trylock(&napi->poll_lock)) {
174
			budget = poll_one_napi(napi, budget);
175 176
			spin_unlock(&napi->poll_lock);
		}
L
Linus Torvalds 已提交
177 178 179
	}
}

C
Cong Wang 已提交
180
static void service_neigh_queue(struct netpoll_info *npi)
181
{
182 183
	if (npi) {
		struct sk_buff *skb;
184

C
Cong Wang 已提交
185 186
		while ((skb = skb_dequeue(&npi->neigh_tx)))
			netpoll_neigh_reply(skb, npi);
187 188 189
	}
}

190
static void netpoll_poll_dev(struct net_device *dev)
L
Linus Torvalds 已提交
191
{
192
	const struct net_device_ops *ops;
193
	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
194 195
	bool rx_processing = netpoll_rx_processing(ni);
	int budget = rx_processing? 16 : 0;
196

197 198 199 200
	/* Don't do any rx activity if the dev_lock mutex is held
	 * the dev_open/close paths use this to block netpoll activity
	 * while changing device state
	 */
201
	if (down_trylock(&ni->dev_lock))
202 203
		return;

204
	if (!netif_running(dev)) {
205
		up(&ni->dev_lock);
206
		return;
207
	}
208

209 210
	if (rx_processing)
		atomic_inc(&trapped);
211

212
	ops = dev->netdev_ops;
213
	if (!ops->ndo_poll_controller) {
214
		up(&ni->dev_lock);
L
Linus Torvalds 已提交
215
		return;
216
	}
L
Linus Torvalds 已提交
217 218

	/* Process pending work on NIC */
219
	ops->ndo_poll_controller(dev);
220

221
	poll_napi(dev, budget);
L
Linus Torvalds 已提交
222

223 224
	if (rx_processing)
		atomic_dec(&trapped);
225

226
	up(&ni->dev_lock);
227

228
	if (dev->flags & IFF_SLAVE) {
229
		if (ni) {
230
			struct net_device *bond_dev;
231
			struct sk_buff *skb;
232 233 234 235
			struct netpoll_info *bond_ni;

			bond_dev = netdev_master_upper_dev_get_rcu(dev);
			bond_ni = rcu_dereference_bh(bond_dev->npinfo);
C
Cong Wang 已提交
236
			while ((skb = skb_dequeue(&ni->neigh_tx))) {
237
				skb->dev = bond_dev;
C
Cong Wang 已提交
238
				skb_queue_tail(&bond_ni->neigh_tx, skb);
239 240 241 242
			}
		}
	}

C
Cong Wang 已提交
243
	service_neigh_queue(ni);
244

245
	zap_completion_queue();
L
Linus Torvalds 已提交
246 247
}

248
void netpoll_rx_disable(struct net_device *dev)
249 250 251 252 253 254 255
{
	struct netpoll_info *ni;
	int idx;
	might_sleep();
	idx = srcu_read_lock(&netpoll_srcu);
	ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
	if (ni)
256
		down(&ni->dev_lock);
257 258 259 260 261 262 263 264 265 266
	srcu_read_unlock(&netpoll_srcu, idx);
}
EXPORT_SYMBOL(netpoll_rx_disable);

void netpoll_rx_enable(struct net_device *dev)
{
	struct netpoll_info *ni;
	rcu_read_lock();
	ni = rcu_dereference(dev->npinfo);
	if (ni)
267
		up(&ni->dev_lock);
268 269 270 271
	rcu_read_unlock();
}
EXPORT_SYMBOL(netpoll_rx_enable);

L
Linus Torvalds 已提交
272 273 274 275 276
static void refill_skbs(void)
{
	struct sk_buff *skb;
	unsigned long flags;

277 278
	spin_lock_irqsave(&skb_pool.lock, flags);
	while (skb_pool.qlen < MAX_SKBS) {
L
Linus Torvalds 已提交
279 280 281 282
		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
		if (!skb)
			break;

283
		__skb_queue_tail(&skb_pool, skb);
L
Linus Torvalds 已提交
284
	}
285
	spin_unlock_irqrestore(&skb_pool.lock, flags);
L
Linus Torvalds 已提交
286 287
}

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
static void zap_completion_queue(void)
{
	unsigned long flags;
	struct softnet_data *sd = &get_cpu_var(softnet_data);

	if (sd->completion_queue) {
		struct sk_buff *clist;

		local_irq_save(flags);
		clist = sd->completion_queue;
		sd->completion_queue = NULL;
		local_irq_restore(flags);

		while (clist != NULL) {
			struct sk_buff *skb = clist;
			clist = clist->next;
			if (skb->destructor) {
				atomic_inc(&skb->users);
				dev_kfree_skb_any(skb); /* put this one back */
			} else {
				__kfree_skb(skb);
			}
		}
	}

	put_cpu_var(softnet_data);
}

316
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
L
Linus Torvalds 已提交
317
{
318 319
	int count = 0;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
320

321
	zap_completion_queue();
322
	refill_skbs();
L
Linus Torvalds 已提交
323 324 325
repeat:

	skb = alloc_skb(len, GFP_ATOMIC);
326 327
	if (!skb)
		skb = skb_dequeue(&skb_pool);
L
Linus Torvalds 已提交
328 329

	if (!skb) {
330
		if (++count < 10) {
331
			netpoll_poll_dev(np->dev);
332
			goto repeat;
L
Linus Torvalds 已提交
333
		}
334
		return NULL;
L
Linus Torvalds 已提交
335 336 337 338 339 340 341
	}

	atomic_set(&skb->users, 1);
	skb_reserve(skb, reserve);
	return skb;
}

342 343 344 345 346 347 348 349 350 351 352
static int netpoll_owner_active(struct net_device *dev)
{
	struct napi_struct *napi;

	list_for_each_entry(napi, &dev->napi_list, dev_list) {
		if (napi->poll_owner == smp_processor_id())
			return 1;
	}
	return 0;
}

353
/* call with IRQ disabled */
354 355
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
			     struct net_device *dev)
L
Linus Torvalds 已提交
356
{
S
Stephen Hemminger 已提交
357 358
	int status = NETDEV_TX_BUSY;
	unsigned long tries;
359
	const struct net_device_ops *ops = dev->netdev_ops;
H
Herbert Xu 已提交
360
	/* It is up to the caller to keep npinfo alive. */
361
	struct netpoll_info *npinfo;
S
Stephen Hemminger 已提交
362

363 364 365
	WARN_ON_ONCE(!irqs_disabled());

	npinfo = rcu_dereference_bh(np->dev->npinfo);
366 367 368 369
	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
		__kfree_skb(skb);
		return;
	}
S
Stephen Hemminger 已提交
370 371

	/* don't get messages out of order, and no recursion */
372
	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
373
		struct netdev_queue *txq;
374

375
		txq = netdev_pick_tx(dev, skb, NULL);
376

377 378 379
		/* try until next clock tick */
		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
		     tries > 0; --tries) {
380
			if (__netif_tx_trylock(txq)) {
381
				if (!netif_xmit_stopped(txq)) {
382
					if (vlan_tx_tag_present(skb) &&
383 384 385
					    !vlan_hw_offload_capable(netif_skb_features(skb),
								     skb->vlan_proto)) {
						skb = __vlan_put_tag(skb, skb->vlan_proto, vlan_tx_tag_get(skb));
386 387 388 389 390 391 392 393
						if (unlikely(!skb)) {
							/* This is actually a packet drop, but we
							 * don't want the code at the end of this
							 * function to try and re-queue a NULL skb.
							 */
							status = NETDEV_TX_OK;
							goto unlock_txq;
						}
394 395 396
						skb->vlan_tci = 0;
					}

397
					status = ops->ndo_start_xmit(skb, dev);
E
Eric Dumazet 已提交
398 399 400
					if (status == NETDEV_TX_OK)
						txq_trans_update(txq);
				}
401
			unlock_txq:
402
				__netif_tx_unlock(txq);
403 404 405 406 407

				if (status == NETDEV_TX_OK)
					break;

			}
408 409

			/* tickle device maybe there is some cleanup */
410
			netpoll_poll_dev(np->dev);
411 412

			udelay(USEC_PER_POLL);
M
Matt Mackall 已提交
413
		}
414 415

		WARN_ONCE(!irqs_disabled(),
416
			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
417 418
			dev->name, ops->ndo_start_xmit);

L
Linus Torvalds 已提交
419 420
	}

S
Stephen Hemminger 已提交
421
	if (status != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
422
		skb_queue_tail(&npinfo->txq, skb);
423
		schedule_delayed_work(&npinfo->tx_work,0);
L
Linus Torvalds 已提交
424 425
	}
}
426
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
L
Linus Torvalds 已提交
427 428 429

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
430
	int total_len, ip_len, udp_len;
L
Linus Torvalds 已提交
431 432 433 434
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;
435
	static atomic_t ip_ident;
C
Cong Wang 已提交
436
	struct ipv6hdr *ip6h;
L
Linus Torvalds 已提交
437 438

	udp_len = len + sizeof(*udph);
C
Cong Wang 已提交
439 440 441
	if (np->ipv6)
		ip_len = udp_len + sizeof(*ip6h);
	else
C
Cong Wang 已提交
442 443
		ip_len = udp_len + sizeof(*iph);

444
	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
L
Linus Torvalds 已提交
445

446 447
	skb = find_skb(np, total_len + np->dev->needed_tailroom,
		       total_len - len);
L
Linus Torvalds 已提交
448 449 450
	if (!skb)
		return;

451
	skb_copy_to_linear_data(skb, msg, len);
452
	skb_put(skb, len);
L
Linus Torvalds 已提交
453

454 455 456
	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
L
Linus Torvalds 已提交
457 458 459
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);
C
Cong Wang 已提交
460

C
Cong Wang 已提交
461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489
	if (np->ipv6) {
		udph->check = 0;
		udph->check = csum_ipv6_magic(&np->local_ip.in6,
					      &np->remote_ip.in6,
					      udp_len, IPPROTO_UDP,
					      csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*ip6h));
		skb_reset_network_header(skb);
		ip6h = ipv6_hdr(skb);

		/* ip6h->version = 6; ip6h->priority = 0; */
		put_unaligned(0x60, (unsigned char *)ip6h);
		ip6h->flow_lbl[0] = 0;
		ip6h->flow_lbl[1] = 0;
		ip6h->flow_lbl[2] = 0;

		ip6h->payload_len = htons(sizeof(struct udphdr) + len);
		ip6h->nexthdr = IPPROTO_UDP;
		ip6h->hop_limit = 32;
		ip6h->saddr = np->local_ip.in6;
		ip6h->daddr = np->remote_ip.in6;

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
	} else {
C
Cong Wang 已提交
490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
		udph->check = 0;
		udph->check = csum_tcpudp_magic(np->local_ip.ip,
						np->remote_ip.ip,
						udp_len, IPPROTO_UDP,
						csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*iph));
		skb_reset_network_header(skb);
		iph = ip_hdr(skb);

		/* iph->version = 4; iph->ihl = 5; */
		put_unaligned(0x45, (unsigned char *)iph);
		iph->tos      = 0;
		put_unaligned(htons(ip_len), &(iph->tot_len));
		iph->id       = htons(atomic_inc_return(&ip_ident));
		iph->frag_off = 0;
		iph->ttl      = 64;
		iph->protocol = IPPROTO_UDP;
		iph->check    = 0;
		put_unaligned(np->local_ip.ip, &(iph->saddr));
		put_unaligned(np->remote_ip.ip, &(iph->daddr));
		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IP);
	}

J
Joe Perches 已提交
520 521
	ether_addr_copy(eth->h_source, np->dev->dev_addr);
	ether_addr_copy(eth->h_dest, np->remote_mac);
L
Linus Torvalds 已提交
522 523 524 525 526

	skb->dev = np->dev;

	netpoll_send_skb(np, skb);
}
E
Eric Dumazet 已提交
527
EXPORT_SYMBOL(netpoll_send_udp);
L
Linus Torvalds 已提交
528

C
Cong Wang 已提交
529
static void netpoll_neigh_reply(struct sk_buff *skb, struct netpoll_info *npinfo)
L
Linus Torvalds 已提交
530
{
C
Cong Wang 已提交
531
	int size, type = ARPOP_REPLY;
A
Al Viro 已提交
532
	__be32 sip, tip;
533
	unsigned char *sha;
L
Linus Torvalds 已提交
534
	struct sk_buff *send_skb;
535 536
	struct netpoll *np, *tmp;
	unsigned long flags;
537
	int hlen, tlen;
C
Cong Wang 已提交
538
	int hits = 0, proto;
539

540
	if (!netpoll_rx_processing(npinfo))
541 542 543 544 545 546 547 548 549 550
		return;

	/* Before checking the packet, we do some early
	   inspection whether this is interesting at all */
	spin_lock_irqsave(&npinfo->rx_lock, flags);
	list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
		if (np->dev == skb->dev)
			hits++;
	}
	spin_unlock_irqrestore(&npinfo->rx_lock, flags);
L
Linus Torvalds 已提交
551

552 553
	/* No netpoll struct is using this dev */
	if (!hits)
554
		return;
L
Linus Torvalds 已提交
555

C
Cong Wang 已提交
556
	proto = ntohs(eth_hdr(skb)->h_proto);
557
	if (proto == ETH_P_ARP) {
C
Cong Wang 已提交
558 559
		struct arphdr *arp;
		unsigned char *arp_ptr;
C
Cong Wang 已提交
560 561 562
		/* No arp on this interface */
		if (skb->dev->flags & IFF_NOARP)
			return;
L
Linus Torvalds 已提交
563

C
Cong Wang 已提交
564 565
		if (!pskb_may_pull(skb, arp_hdr_len(skb->dev)))
			return;
L
Linus Torvalds 已提交
566

C
Cong Wang 已提交
567 568 569
		skb_reset_network_header(skb);
		skb_reset_transport_header(skb);
		arp = arp_hdr(skb);
L
Linus Torvalds 已提交
570

C
Cong Wang 已提交
571 572 573 574 575
		if ((arp->ar_hrd != htons(ARPHRD_ETHER) &&
		     arp->ar_hrd != htons(ARPHRD_IEEE802)) ||
		    arp->ar_pro != htons(ETH_P_IP) ||
		    arp->ar_op != htons(ARPOP_REQUEST))
			return;
L
Linus Torvalds 已提交
576

C
Cong Wang 已提交
577 578 579 580 581 582 583 584 585 586
		arp_ptr = (unsigned char *)(arp+1);
		/* save the location of the src hw addr */
		sha = arp_ptr;
		arp_ptr += skb->dev->addr_len;
		memcpy(&sip, arp_ptr, 4);
		arp_ptr += 4;
		/* If we actually cared about dst hw addr,
		   it would get copied here */
		arp_ptr += skb->dev->addr_len;
		memcpy(&tip, arp_ptr, 4);
L
Linus Torvalds 已提交
587

C
Cong Wang 已提交
588 589 590
		/* Should we ignore arp? */
		if (ipv4_is_loopback(tip) || ipv4_is_multicast(tip))
			return;
L
Linus Torvalds 已提交
591

C
Cong Wang 已提交
592
		size = arp_hdr_len(skb->dev);
L
Linus Torvalds 已提交
593

C
Cong Wang 已提交
594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
			if (tip != np->local_ip.ip)
				continue;

			hlen = LL_RESERVED_SPACE(np->dev);
			tlen = np->dev->needed_tailroom;
			send_skb = find_skb(np, size + hlen + tlen, hlen);
			if (!send_skb)
				continue;

			skb_reset_network_header(send_skb);
			arp = (struct arphdr *) skb_put(send_skb, size);
			send_skb->dev = skb->dev;
			send_skb->protocol = htons(ETH_P_ARP);

			/* Fill the device header for the ARP frame */
C
Cong Wang 已提交
611
			if (dev_hard_header(send_skb, skb->dev, ETH_P_ARP,
C
Cong Wang 已提交
612 613 614 615 616
					    sha, np->dev->dev_addr,
					    send_skb->len) < 0) {
				kfree_skb(send_skb);
				continue;
			}
L
Linus Torvalds 已提交
617

C
Cong Wang 已提交
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
			/*
			 * Fill out the arp protocol part.
			 *
			 * we only support ethernet device type,
			 * which (according to RFC 1390) should
			 * always equal 1 (Ethernet).
			 */

			arp->ar_hrd = htons(np->dev->type);
			arp->ar_pro = htons(ETH_P_IP);
			arp->ar_hln = np->dev->addr_len;
			arp->ar_pln = 4;
			arp->ar_op = htons(type);

			arp_ptr = (unsigned char *)(arp + 1);
			memcpy(arp_ptr, np->dev->dev_addr, np->dev->addr_len);
			arp_ptr += np->dev->addr_len;
			memcpy(arp_ptr, &tip, 4);
			arp_ptr += 4;
			memcpy(arp_ptr, sha, np->dev->addr_len);
			arp_ptr += np->dev->addr_len;
			memcpy(arp_ptr, &sip, 4);

			netpoll_send_skb(np, send_skb);

643 644 645
			/* If there are several rx_skb_hooks for the same
			 * address we're fine by sending a single reply
			 */
C
Cong Wang 已提交
646
			break;
647
		}
C
Cong Wang 已提交
648
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
C
Cong Wang 已提交
649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684
	} else if( proto == ETH_P_IPV6) {
#if IS_ENABLED(CONFIG_IPV6)
		struct nd_msg *msg;
		u8 *lladdr = NULL;
		struct ipv6hdr *hdr;
		struct icmp6hdr *icmp6h;
		const struct in6_addr *saddr;
		const struct in6_addr *daddr;
		struct inet6_dev *in6_dev = NULL;
		struct in6_addr *target;

		in6_dev = in6_dev_get(skb->dev);
		if (!in6_dev || !in6_dev->cnf.accept_ra)
			return;

		if (!pskb_may_pull(skb, skb->len))
			return;

		msg = (struct nd_msg *)skb_transport_header(skb);

		__skb_push(skb, skb->data - skb_transport_header(skb));

		if (ipv6_hdr(skb)->hop_limit != 255)
			return;
		if (msg->icmph.icmp6_code != 0)
			return;
		if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
			return;

		saddr = &ipv6_hdr(skb)->saddr;
		daddr = &ipv6_hdr(skb)->daddr;

		size = sizeof(struct icmp6hdr) + sizeof(struct in6_addr);

		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
685
			if (!ipv6_addr_equal(daddr, &np->local_ip.in6))
C
Cong Wang 已提交
686 687 688 689 690 691 692 693 694 695 696 697
				continue;

			hlen = LL_RESERVED_SPACE(np->dev);
			tlen = np->dev->needed_tailroom;
			send_skb = find_skb(np, size + hlen + tlen, hlen);
			if (!send_skb)
				continue;

			send_skb->protocol = htons(ETH_P_IPV6);
			send_skb->dev = skb->dev;

			skb_reset_network_header(send_skb);
698
			hdr = (struct ipv6hdr *) skb_put(send_skb, sizeof(struct ipv6hdr));
C
Cong Wang 已提交
699 700 701 702 703 704 705
			*(__be32*)hdr = htonl(0x60000000);
			hdr->payload_len = htons(size);
			hdr->nexthdr = IPPROTO_ICMPV6;
			hdr->hop_limit = 255;
			hdr->saddr = *saddr;
			hdr->daddr = *daddr;

706
			icmp6h = (struct icmp6hdr *) skb_put(send_skb, sizeof(struct icmp6hdr));
C
Cong Wang 已提交
707 708 709
			icmp6h->icmp6_type = NDISC_NEIGHBOUR_ADVERTISEMENT;
			icmp6h->icmp6_router = 0;
			icmp6h->icmp6_solicited = 1;
710 711

			target = (struct in6_addr *) skb_put(send_skb, sizeof(struct in6_addr));
C
Cong Wang 已提交
712 713 714 715 716 717 718 719 720 721 722 723 724 725 726
			*target = msg->target;
			icmp6h->icmp6_cksum = csum_ipv6_magic(saddr, daddr, size,
							      IPPROTO_ICMPV6,
							      csum_partial(icmp6h,
									   size, 0));

			if (dev_hard_header(send_skb, skb->dev, ETH_P_IPV6,
					    lladdr, np->dev->dev_addr,
					    send_skb->len) < 0) {
				kfree_skb(send_skb);
				continue;
			}

			netpoll_send_skb(np, send_skb);

727 728 729
			/* If there are several rx_skb_hooks for the same
			 * address, we're fine by sending a single reply
			 */
C
Cong Wang 已提交
730 731 732 733
			break;
		}
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
#endif
734
	}
L
Linus Torvalds 已提交
735 736
}

C
Cong Wang 已提交
737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
static bool pkt_is_ns(struct sk_buff *skb)
{
	struct nd_msg *msg;
	struct ipv6hdr *hdr;

	if (skb->protocol != htons(ETH_P_ARP))
		return false;
	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr) + sizeof(struct nd_msg)))
		return false;

	msg = (struct nd_msg *)skb_transport_header(skb);
	__skb_push(skb, skb->data - skb_transport_header(skb));
	hdr = ipv6_hdr(skb);

	if (hdr->nexthdr != IPPROTO_ICMPV6)
		return false;
	if (hdr->hop_limit != 255)
		return false;
	if (msg->icmph.icmp6_code != 0)
		return false;
	if (msg->icmph.icmp6_type != NDISC_NEIGHBOUR_SOLICITATION)
		return false;

	return true;
}

763
int __netpoll_rx(struct sk_buff *skb, struct netpoll_info *npinfo)
L
Linus Torvalds 已提交
764
{
765 766
	int proto, len, ulen, data_len;
	int hits = 0, offset;
767
	const struct iphdr *iph;
L
Linus Torvalds 已提交
768
	struct udphdr *uh;
769
	struct netpoll *np, *tmp;
770
	uint16_t source;
771

772
	if (!netpoll_rx_processing(npinfo))
L
Linus Torvalds 已提交
773
		goto out;
774

L
Linus Torvalds 已提交
775 776 777
	if (skb->dev->type != ARPHRD_ETHER)
		goto out;

778
	/* check if netpoll clients need ARP */
C
Cong Wang 已提交
779 780 781 782
	if (skb->protocol == htons(ETH_P_ARP) && atomic_read(&trapped)) {
		skb_queue_tail(&npinfo->neigh_tx, skb);
		return 1;
	} else if (pkt_is_ns(skb) && atomic_read(&trapped)) {
C
Cong Wang 已提交
783
		skb_queue_tail(&npinfo->neigh_tx, skb);
L
Linus Torvalds 已提交
784 785 786
		return 1;
	}

787 788 789 790 791 792
	if (skb->protocol == cpu_to_be16(ETH_P_8021Q)) {
		skb = vlan_untag(skb);
		if (unlikely(!skb))
			goto out;
	}

L
Linus Torvalds 已提交
793
	proto = ntohs(eth_hdr(skb)->h_proto);
C
Cong Wang 已提交
794
	if (proto != ETH_P_IP && proto != ETH_P_IPV6)
L
Linus Torvalds 已提交
795 796 797 798 799 800
		goto out;
	if (skb->pkt_type == PACKET_OTHERHOST)
		goto out;
	if (skb_shared(skb))
		goto out;

C
Cong Wang 已提交
801 802 803 804 805 806 807 808 809 810 811
	if (proto == ETH_P_IP) {
		if (!pskb_may_pull(skb, sizeof(struct iphdr)))
			goto out;
		iph = (struct iphdr *)skb->data;
		if (iph->ihl < 5 || iph->version != 4)
			goto out;
		if (!pskb_may_pull(skb, iph->ihl*4))
			goto out;
		iph = (struct iphdr *)skb->data;
		if (ip_fast_csum((u8 *)iph, iph->ihl) != 0)
			goto out;
812

C
Cong Wang 已提交
813 814 815
		len = ntohs(iph->tot_len);
		if (skb->len < len || len < iph->ihl*4)
			goto out;
L
Linus Torvalds 已提交
816

C
Cong Wang 已提交
817 818 819 820 821 822
		/*
		 * Our transport medium may have padded the buffer out.
		 * Now We trim to the true length of the frame.
		 */
		if (pskb_trim_rcsum(skb, len))
			goto out;
L
Linus Torvalds 已提交
823

C
Cong Wang 已提交
824 825 826
		iph = (struct iphdr *)skb->data;
		if (iph->protocol != IPPROTO_UDP)
			goto out;
L
Linus Torvalds 已提交
827

C
Cong Wang 已提交
828 829
		len -= iph->ihl*4;
		uh = (struct udphdr *)(((char *)iph) + iph->ihl*4);
830
		offset = (unsigned char *)(uh + 1) - skb->data;
C
Cong Wang 已提交
831
		ulen = ntohs(uh->len);
832 833
		data_len = skb->len - offset;
		source = ntohs(uh->source);
834

C
Cong Wang 已提交
835 836 837 838 839 840 841 842 843 844 845 846
		if (ulen != len)
			goto out;
		if (checksum_udp(skb, uh, ulen, iph->saddr, iph->daddr))
			goto out;
		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
			if (np->local_ip.ip && np->local_ip.ip != iph->daddr)
				continue;
			if (np->remote_ip.ip && np->remote_ip.ip != iph->saddr)
				continue;
			if (np->local_port && np->local_port != ntohs(uh->dest))
				continue;

847
			np->rx_skb_hook(np, source, skb, offset, data_len);
C
Cong Wang 已提交
848 849
			hits++;
		}
C
Cong Wang 已提交
850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869
	} else {
#if IS_ENABLED(CONFIG_IPV6)
		const struct ipv6hdr *ip6h;

		if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
			goto out;
		ip6h = (struct ipv6hdr *)skb->data;
		if (ip6h->version != 6)
			goto out;
		len = ntohs(ip6h->payload_len);
		if (!len)
			goto out;
		if (len + sizeof(struct ipv6hdr) > skb->len)
			goto out;
		if (pskb_trim_rcsum(skb, len + sizeof(struct ipv6hdr)))
			goto out;
		ip6h = ipv6_hdr(skb);
		if (!pskb_may_pull(skb, sizeof(struct udphdr)))
			goto out;
		uh = udp_hdr(skb);
870
		offset = (unsigned char *)(uh + 1) - skb->data;
C
Cong Wang 已提交
871
		ulen = ntohs(uh->len);
872 873
		data_len = skb->len - offset;
		source = ntohs(uh->source);
C
Cong Wang 已提交
874 875 876 877 878
		if (ulen != skb->len)
			goto out;
		if (udp6_csum_init(skb, uh, IPPROTO_UDP))
			goto out;
		list_for_each_entry_safe(np, tmp, &npinfo->rx_np, rx) {
879
			if (!ipv6_addr_equal(&np->local_ip.in6, &ip6h->daddr))
C
Cong Wang 已提交
880
				continue;
881
			if (!ipv6_addr_equal(&np->remote_ip.in6, &ip6h->saddr))
C
Cong Wang 已提交
882 883 884 885
				continue;
			if (np->local_port && np->local_port != ntohs(uh->dest))
				continue;

886
			np->rx_skb_hook(np, source, skb, offset, data_len);
C
Cong Wang 已提交
887 888 889
			hits++;
		}
#endif
890 891 892 893
	}

	if (!hits)
		goto out;
L
Linus Torvalds 已提交
894 895 896 897 898 899 900 901 902 903 904 905 906

	kfree_skb(skb);
	return 1;

out:
	if (atomic_read(&trapped)) {
		kfree_skb(skb);
		return 1;
	}

	return 0;
}

907 908
void netpoll_print_options(struct netpoll *np)
{
909
	np_info(np, "local port %d\n", np->local_port);
C
Cong Wang 已提交
910 911 912
	if (np->ipv6)
		np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
	else
C
Cong Wang 已提交
913
		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
914 915
	np_info(np, "interface '%s'\n", np->dev_name);
	np_info(np, "remote port %d\n", np->remote_port);
C
Cong Wang 已提交
916 917 918
	if (np->ipv6)
		np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
	else
C
Cong Wang 已提交
919
		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
920
	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
921
}
E
Eric Dumazet 已提交
922
EXPORT_SYMBOL(netpoll_print_options);
923

C
Cong Wang 已提交
924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943
static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
{
	const char *end;

	if (!strchr(str, ':') &&
	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
		if (!*end)
			return 0;
	}
	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
#if IS_ENABLED(CONFIG_IPV6)
		if (!*end)
			return 1;
#else
		return -1;
#endif
	}
	return -1;
}

L
Linus Torvalds 已提交
944 945 946
int netpoll_parse_options(struct netpoll *np, char *opt)
{
	char *cur=opt, *delim;
C
Cong Wang 已提交
947
	int ipv6;
948
	bool ipversion_set = false;
L
Linus Torvalds 已提交
949

950
	if (*cur != '@') {
L
Linus Torvalds 已提交
951 952
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
953
		*delim = 0;
954 955
		if (kstrtou16(cur, 10, &np->local_port))
			goto parse_failed;
956
		cur = delim;
L
Linus Torvalds 已提交
957 958 959
	}
	cur++;

960
	if (*cur != '/') {
961
		ipversion_set = true;
L
Linus Torvalds 已提交
962 963
		if ((delim = strchr(cur, '/')) == NULL)
			goto parse_failed;
964
		*delim = 0;
C
Cong Wang 已提交
965 966 967 968 969
		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
		if (ipv6 < 0)
			goto parse_failed;
		else
			np->ipv6 = (bool)ipv6;
970
		cur = delim;
L
Linus Torvalds 已提交
971 972 973
	}
	cur++;

974
	if (*cur != ',') {
L
Linus Torvalds 已提交
975 976 977
		/* parse out dev name */
		if ((delim = strchr(cur, ',')) == NULL)
			goto parse_failed;
978
		*delim = 0;
L
Linus Torvalds 已提交
979
		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
980
		cur = delim;
L
Linus Torvalds 已提交
981 982 983
	}
	cur++;

984
	if (*cur != '@') {
L
Linus Torvalds 已提交
985 986 987
		/* dst port */
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
988
		*delim = 0;
989
		if (*cur == ' ' || *cur == '\t')
990
			np_info(np, "warning: whitespace is not allowed\n");
991 992
		if (kstrtou16(cur, 10, &np->remote_port))
			goto parse_failed;
993
		cur = delim;
L
Linus Torvalds 已提交
994 995 996 997 998 999
	}
	cur++;

	/* dst ip */
	if ((delim = strchr(cur, '/')) == NULL)
		goto parse_failed;
1000
	*delim = 0;
C
Cong Wang 已提交
1001 1002 1003
	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
	if (ipv6 < 0)
		goto parse_failed;
1004
	else if (ipversion_set && np->ipv6 != (bool)ipv6)
C
Cong Wang 已提交
1005 1006 1007
		goto parse_failed;
	else
		np->ipv6 = (bool)ipv6;
1008
	cur = delim + 1;
L
Linus Torvalds 已提交
1009

1010
	if (*cur != 0) {
L
Linus Torvalds 已提交
1011
		/* MAC address */
1012
		if (!mac_pton(cur, np->remote_mac))
L
Linus Torvalds 已提交
1013 1014 1015
			goto parse_failed;
	}

1016
	netpoll_print_options(np);
L
Linus Torvalds 已提交
1017 1018 1019 1020

	return 0;

 parse_failed:
1021
	np_info(np, "couldn't parse config at '%s'!\n", cur);
L
Linus Torvalds 已提交
1022 1023
	return -1;
}
E
Eric Dumazet 已提交
1024
EXPORT_SYMBOL(netpoll_parse_options);
L
Linus Torvalds 已提交
1025

1026
int __netpoll_setup(struct netpoll *np, struct net_device *ndev, gfp_t gfp)
L
Linus Torvalds 已提交
1027
{
1028
	struct netpoll_info *npinfo;
H
Herbert Xu 已提交
1029
	const struct net_device_ops *ops;
1030
	unsigned long flags;
S
Stephen Hemminger 已提交
1031
	int err;
L
Linus Torvalds 已提交
1032

1033 1034
	np->dev = ndev;
	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
1035
	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
1036

1037 1038
	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
	    !ndev->netdev_ops->ndo_poll_controller) {
1039 1040
		np_err(np, "%s doesn't support polling, aborting\n",
		       np->dev_name);
1041 1042 1043 1044 1045
		err = -ENOTSUPP;
		goto out;
	}

	if (!ndev->npinfo) {
1046
		npinfo = kmalloc(sizeof(*npinfo), gfp);
1047 1048 1049 1050 1051 1052 1053 1054
		if (!npinfo) {
			err = -ENOMEM;
			goto out;
		}

		INIT_LIST_HEAD(&npinfo->rx_np);

		spin_lock_init(&npinfo->rx_lock);
1055
		sema_init(&npinfo->dev_lock, 1);
C
Cong Wang 已提交
1056
		skb_queue_head_init(&npinfo->neigh_tx);
1057 1058 1059 1060 1061 1062 1063
		skb_queue_head_init(&npinfo->txq);
		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

		atomic_set(&npinfo->refcnt, 1);

		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_setup) {
1064
			err = ops->ndo_netpoll_setup(ndev, npinfo, gfp);
1065 1066 1067 1068
			if (err)
				goto free_npinfo;
		}
	} else {
N
Neil Horman 已提交
1069
		npinfo = rtnl_dereference(ndev->npinfo);
1070 1071 1072 1073 1074
		atomic_inc(&npinfo->refcnt);
	}

	npinfo->netpoll = np;

1075
	if (np->rx_skb_hook) {
1076 1077 1078 1079 1080 1081
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_add_tail(&np->rx, &npinfo->rx_np);
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}

	/* last thing to do is link it to the net device structure */
1082
	rcu_assign_pointer(ndev->npinfo, npinfo);
1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098

	return 0;

free_npinfo:
	kfree(npinfo);
out:
	return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
	struct net_device *ndev = NULL;
	struct in_device *in_dev;
	int err;

1099
	rtnl_lock();
1100 1101 1102 1103
	if (np->dev_name) {
		struct net *net = current->nsproxy->net_ns;
		ndev = __dev_get_by_name(net, np->dev_name);
	}
L
Linus Torvalds 已提交
1104
	if (!ndev) {
1105
		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
1106 1107
		err = -ENODEV;
		goto unlock;
L
Linus Torvalds 已提交
1108
	}
1109
	dev_hold(ndev);
L
Linus Torvalds 已提交
1110

1111
	if (netdev_master_upper_dev_get(ndev)) {
1112
		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
1113 1114
		err = -EBUSY;
		goto put;
1115 1116
	}

L
Linus Torvalds 已提交
1117 1118 1119
	if (!netif_running(ndev)) {
		unsigned long atmost, atleast;

1120
		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
L
Linus Torvalds 已提交
1121

S
Stephen Hemminger 已提交
1122 1123 1124
		err = dev_open(ndev);

		if (err) {
1125
			np_err(np, "failed to open %s\n", ndev->name);
1126
			goto put;
L
Linus Torvalds 已提交
1127 1128
		}

1129
		rtnl_unlock();
L
Linus Torvalds 已提交
1130
		atleast = jiffies + HZ/10;
1131
		atmost = jiffies + carrier_timeout * HZ;
L
Linus Torvalds 已提交
1132 1133
		while (!netif_carrier_ok(ndev)) {
			if (time_after(jiffies, atmost)) {
1134
				np_notice(np, "timeout waiting for carrier\n");
L
Linus Torvalds 已提交
1135 1136
				break;
			}
1137
			msleep(1);
L
Linus Torvalds 已提交
1138 1139 1140 1141 1142 1143 1144 1145
		}

		/* If carrier appears to come up instantly, we don't
		 * trust it and pause so that we don't pump all our
		 * queued console messages into the bitbucket.
		 */

		if (time_before(jiffies, atleast)) {
1146
			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
L
Linus Torvalds 已提交
1147 1148
			msleep(4000);
		}
1149
		rtnl_lock();
L
Linus Torvalds 已提交
1150 1151
	}

C
Cong Wang 已提交
1152 1153
	if (!np->local_ip.ip) {
		if (!np->ipv6) {
1154
			in_dev = __in_dev_get_rtnl(ndev);
C
Cong Wang 已提交
1155 1156 1157 1158 1159 1160 1161 1162 1163 1164

			if (!in_dev || !in_dev->ifa_list) {
				np_err(np, "no IP address for %s, aborting\n",
				       np->dev_name);
				err = -EDESTADDRREQ;
				goto put;
			}

			np->local_ip.ip = in_dev->ifa_list->ifa_local;
			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
C
Cong Wang 已提交
1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192
		} else {
#if IS_ENABLED(CONFIG_IPV6)
			struct inet6_dev *idev;

			err = -EDESTADDRREQ;
			idev = __in6_dev_get(ndev);
			if (idev) {
				struct inet6_ifaddr *ifp;

				read_lock_bh(&idev->lock);
				list_for_each_entry(ifp, &idev->addr_list, if_list) {
					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
						continue;
					np->local_ip.in6 = ifp->addr;
					err = 0;
					break;
				}
				read_unlock_bh(&idev->lock);
			}
			if (err) {
				np_err(np, "no IPv6 address for %s, aborting\n",
				       np->dev_name);
				goto put;
			} else
				np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
#else
			np_err(np, "IPv6 is not supported %s, aborting\n",
			       np->dev_name);
1193
			err = -EINVAL;
C
Cong Wang 已提交
1194 1195
			goto put;
#endif
L
Linus Torvalds 已提交
1196 1197 1198
		}
	}

1199 1200 1201
	/* fill up the skb queue */
	refill_skbs();

1202
	err = __netpoll_setup(np, ndev, GFP_KERNEL);
1203 1204 1205
	if (err)
		goto put;

1206
	rtnl_unlock();
L
Linus Torvalds 已提交
1207 1208
	return 0;

1209
put:
L
Linus Torvalds 已提交
1210
	dev_put(ndev);
1211 1212
unlock:
	rtnl_unlock();
S
Stephen Hemminger 已提交
1213
	return err;
L
Linus Torvalds 已提交
1214
}
E
Eric Dumazet 已提交
1215
EXPORT_SYMBOL(netpoll_setup);
L
Linus Torvalds 已提交
1216

1217 1218
static int __init netpoll_init(void)
{
1219 1220 1221 1222 1223
	skb_queue_head_init(&skb_pool);
	return 0;
}
core_initcall(netpoll_init);

1224 1225 1226 1227 1228
static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
{
	struct netpoll_info *npinfo =
			container_of(rcu_head, struct netpoll_info, rcu);

C
Cong Wang 已提交
1229
	skb_queue_purge(&npinfo->neigh_tx);
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241
	skb_queue_purge(&npinfo->txq);

	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
	cancel_delayed_work(&npinfo->tx_work);

	/* clean after last, unfinished work */
	__skb_queue_purge(&npinfo->txq);
	/* now cancel it again */
	cancel_delayed_work(&npinfo->tx_work);
	kfree(npinfo);
}

1242
void __netpoll_cleanup(struct netpoll *np)
L
Linus Torvalds 已提交
1243
{
1244 1245 1246
	struct netpoll_info *npinfo;
	unsigned long flags;

N
Neil Horman 已提交
1247 1248 1249 1250 1251
	/* rtnl_dereference would be preferable here but
	 * rcu_cleanup_netpoll path can put us in here safely without
	 * holding the rtnl, so plain rcu_dereference it is
	 */
	npinfo = rtnl_dereference(np->dev->npinfo);
1252
	if (!npinfo)
1253
		return;
S
Stephen Hemminger 已提交
1254

1255 1256 1257 1258 1259
	if (!list_empty(&npinfo->rx_np)) {
		spin_lock_irqsave(&npinfo->rx_lock, flags);
		list_del(&np->rx);
		spin_unlock_irqrestore(&npinfo->rx_lock, flags);
	}
H
Herbert Xu 已提交
1260

1261 1262
	synchronize_srcu(&netpoll_srcu);

1263 1264
	if (atomic_dec_and_test(&npinfo->refcnt)) {
		const struct net_device_ops *ops;
H
Herbert Xu 已提交
1265

1266 1267 1268
		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_cleanup)
			ops->ndo_netpoll_cleanup(np->dev);
H
Herbert Xu 已提交
1269

1270
		rcu_assign_pointer(np->dev->npinfo, NULL);
1271 1272 1273 1274
		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
	}
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
H
Herbert Xu 已提交
1275

1276
static void netpoll_async_cleanup(struct work_struct *work)
1277
{
1278
	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
S
Stephen Hemminger 已提交
1279

1280
	rtnl_lock();
1281
	__netpoll_cleanup(np);
1282
	rtnl_unlock();
1283 1284
	kfree(np);
}
S
Stephen Hemminger 已提交
1285

1286
void __netpoll_free_async(struct netpoll *np)
1287
{
1288
	schedule_work(&np->cleanup_work);
1289
}
1290
EXPORT_SYMBOL_GPL(__netpoll_free_async);
1291

1292 1293 1294
void netpoll_cleanup(struct netpoll *np)
{
	rtnl_lock();
1295 1296
	if (!np->dev)
		goto out;
1297 1298
	__netpoll_cleanup(np);
	dev_put(np->dev);
L
Linus Torvalds 已提交
1299
	np->dev = NULL;
1300 1301
out:
	rtnl_unlock();
L
Linus Torvalds 已提交
1302
}
E
Eric Dumazet 已提交
1303
EXPORT_SYMBOL(netpoll_cleanup);
L
Linus Torvalds 已提交
1304 1305 1306 1307 1308

int netpoll_trap(void)
{
	return atomic_read(&trapped);
}
E
Eric Dumazet 已提交
1309
EXPORT_SYMBOL(netpoll_trap);
L
Linus Torvalds 已提交
1310 1311 1312 1313 1314 1315 1316 1317 1318

void netpoll_set_trap(int trap)
{
	if (trap)
		atomic_inc(&trapped);
	else
		atomic_dec(&trapped);
}
EXPORT_SYMBOL(netpoll_set_trap);