netpoll.c 19.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14
#include <linux/moduleparam.h>
15
#include <linux/kernel.h>
L
Linus Torvalds 已提交
16 17 18
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
19
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
20 21 22 23 24 25 26 27
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
28
#include <linux/slab.h>
29
#include <linux/export.h>
30
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
31 32
#include <net/tcp.h>
#include <net/udp.h>
C
Cong Wang 已提交
33 34 35
#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/ip6_checksum.h>
L
Linus Torvalds 已提交
36
#include <asm/unaligned.h>
37
#include <trace/events/napi.h>
L
Linus Torvalds 已提交
38 39 40 41 42 43 44 45 46

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

47
static struct sk_buff_head skb_pool;
L
Linus Torvalds 已提交
48

49
DEFINE_STATIC_SRCU(netpoll_srcu);
50

S
Stephen Hemminger 已提交
51
#define USEC_PER_POLL	50
L
Linus Torvalds 已提交
52

J
Joe Perches 已提交
53 54 55 56 57
#define MAX_SKB_SIZE							\
	(sizeof(struct ethhdr) +					\
	 sizeof(struct iphdr) +						\
	 sizeof(struct udphdr) +					\
	 MAX_UDP_CHUNK)
L
Linus Torvalds 已提交
58

59
static void zap_completion_queue(void);
60
static void netpoll_async_cleanup(struct work_struct *work);
L
Linus Torvalds 已提交
61

62 63 64
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

65 66 67 68 69 70 71
#define np_info(np, fmt, ...)				\
	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_err(np, fmt, ...)				\
	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_notice(np, fmt, ...)				\
	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)

72 73 74 75 76 77 78 79
static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
			      struct netdev_queue *txq)
{
	int status = NETDEV_TX_OK;
	netdev_features_t features;

	features = netif_skb_features(skb);

80
	if (skb_vlan_tag_present(skb) &&
81
	    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
82
		skb = __vlan_hwaccel_push_inside(skb);
83 84 85 86 87 88 89 90 91
		if (unlikely(!skb)) {
			/* This is actually a packet drop, but we
			 * don't want the code that calls this
			 * function to try and operate on a NULL skb.
			 */
			goto out;
		}
	}

92
	status = netdev_start_xmit(skb, dev, txq, false);
93 94 95 96 97

out:
	return status;
}

D
David Howells 已提交
98
static void queue_process(struct work_struct *work)
L
Linus Torvalds 已提交
99
{
100 101
	struct netpoll_info *npinfo =
		container_of(work, struct netpoll_info, tx_work.work);
L
Linus Torvalds 已提交
102
	struct sk_buff *skb;
I
Ingo Molnar 已提交
103
	unsigned long flags;
L
Linus Torvalds 已提交
104

S
Stephen Hemminger 已提交
105 106
	while ((skb = skb_dequeue(&npinfo->txq))) {
		struct net_device *dev = skb->dev;
107
		struct netdev_queue *txq;
L
Linus Torvalds 已提交
108

S
Stephen Hemminger 已提交
109
		if (!netif_device_present(dev) || !netif_running(dev)) {
110
			kfree_skb(skb);
S
Stephen Hemminger 已提交
111 112
			continue;
		}
L
Linus Torvalds 已提交
113

114
		txq = skb_get_tx_queue(dev, skb);
115

I
Ingo Molnar 已提交
116
		local_irq_save(flags);
117
		HARD_TX_LOCK(dev, txq, smp_processor_id());
118
		if (netif_xmit_frozen_or_stopped(txq) ||
119
		    netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
120
			skb_queue_head(&npinfo->txq, skb);
121
			HARD_TX_UNLOCK(dev, txq);
I
Ingo Molnar 已提交
122
			local_irq_restore(flags);
L
Linus Torvalds 已提交
123

124
			schedule_delayed_work(&npinfo->tx_work, HZ/10);
S
Stephen Hemminger 已提交
125 126
			return;
		}
127
		HARD_TX_UNLOCK(dev, txq);
I
Ingo Molnar 已提交
128
		local_irq_restore(flags);
L
Linus Torvalds 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142
	}
}

/*
 * Check whether delayed processing was scheduled for our NIC. If so,
 * we attempt to grab the poll lock and use ->poll() to pump the card.
 * If this fails, either we've recursed in ->poll() or it's already
 * running on another CPU.
 *
 * Note: we don't mask interrupts with this lock because we're using
 * trylock here and interrupts are already disabled in the softirq
 * case. Further, we test the poll_owner to avoid recursion on UP
 * systems where the lock doesn't exist.
 */
143
static void poll_one_napi(struct napi_struct *napi)
144
{
145
	int work = 0;
146 147 148 149 150 151

	/* net_rx_action's ->poll() invocations and our's are
	 * synchronized by this test which is only made while
	 * holding the napi->poll_lock.
	 */
	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
152
		return;
153

154 155 156 157 158
	/* If we set this bit but see that it has already been set,
	 * that indicates that napi has been disabled and we need
	 * to abort this operation
	 */
	if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
159
		return;
160

161 162 163 164 165
	/* We explicilty pass the polling call a budget of 0 to
	 * indicate that we are clearing the Tx path only.
	 */
	work = napi->poll(napi, 0);
	WARN_ONCE(work, "%pF exceeded budget in poll\n", napi->poll);
166
	trace_napi_poll(napi, work, 0);
167

168
	clear_bit(NAPI_STATE_NPSVC, &napi->state);
169 170
}

171
static void poll_napi(struct net_device *dev)
L
Linus Torvalds 已提交
172
{
173
	struct napi_struct *napi;
E
Eric Dumazet 已提交
174
	int cpu = smp_processor_id();
L
Linus Torvalds 已提交
175

176
	list_for_each_entry(napi, &dev->napi_list, dev_list) {
E
Eric Dumazet 已提交
177
		if (cmpxchg(&napi->poll_owner, -1, cpu) == -1) {
178
			poll_one_napi(napi);
E
Eric Dumazet 已提交
179
			smp_store_release(&napi->poll_owner, -1);
180
		}
L
Linus Torvalds 已提交
181 182 183
	}
}

184
static void netpoll_poll_dev(struct net_device *dev)
L
Linus Torvalds 已提交
185
{
186
	const struct net_device_ops *ops;
187
	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
188

189 190 191 192
	/* Don't do any rx activity if the dev_lock mutex is held
	 * the dev_open/close paths use this to block netpoll activity
	 * while changing device state
	 */
193
	if (down_trylock(&ni->dev_lock))
194 195
		return;

196
	if (!netif_running(dev)) {
197
		up(&ni->dev_lock);
198
		return;
199
	}
200 201

	ops = dev->netdev_ops;
202
	if (!ops->ndo_poll_controller) {
203
		up(&ni->dev_lock);
L
Linus Torvalds 已提交
204
		return;
205
	}
L
Linus Torvalds 已提交
206 207

	/* Process pending work on NIC */
208
	ops->ndo_poll_controller(dev);
209

210
	poll_napi(dev);
L
Linus Torvalds 已提交
211

212
	up(&ni->dev_lock);
213

214
	zap_completion_queue();
L
Linus Torvalds 已提交
215 216
}

217
void netpoll_poll_disable(struct net_device *dev)
218 219 220 221 222 223 224
{
	struct netpoll_info *ni;
	int idx;
	might_sleep();
	idx = srcu_read_lock(&netpoll_srcu);
	ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
	if (ni)
225
		down(&ni->dev_lock);
226 227
	srcu_read_unlock(&netpoll_srcu, idx);
}
228
EXPORT_SYMBOL(netpoll_poll_disable);
229

230
void netpoll_poll_enable(struct net_device *dev)
231 232 233 234 235
{
	struct netpoll_info *ni;
	rcu_read_lock();
	ni = rcu_dereference(dev->npinfo);
	if (ni)
236
		up(&ni->dev_lock);
237 238
	rcu_read_unlock();
}
239
EXPORT_SYMBOL(netpoll_poll_enable);
240

L
Linus Torvalds 已提交
241 242 243 244 245
static void refill_skbs(void)
{
	struct sk_buff *skb;
	unsigned long flags;

246 247
	spin_lock_irqsave(&skb_pool.lock, flags);
	while (skb_pool.qlen < MAX_SKBS) {
L
Linus Torvalds 已提交
248 249 250 251
		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
		if (!skb)
			break;

252
		__skb_queue_tail(&skb_pool, skb);
L
Linus Torvalds 已提交
253
	}
254
	spin_unlock_irqrestore(&skb_pool.lock, flags);
L
Linus Torvalds 已提交
255 256
}

257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
static void zap_completion_queue(void)
{
	unsigned long flags;
	struct softnet_data *sd = &get_cpu_var(softnet_data);

	if (sd->completion_queue) {
		struct sk_buff *clist;

		local_irq_save(flags);
		clist = sd->completion_queue;
		sd->completion_queue = NULL;
		local_irq_restore(flags);

		while (clist != NULL) {
			struct sk_buff *skb = clist;
			clist = clist->next;
273
			if (!skb_irq_freeable(skb)) {
274 275 276 277 278 279 280 281 282 283 284
				atomic_inc(&skb->users);
				dev_kfree_skb_any(skb); /* put this one back */
			} else {
				__kfree_skb(skb);
			}
		}
	}

	put_cpu_var(softnet_data);
}

285
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
L
Linus Torvalds 已提交
286
{
287 288
	int count = 0;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
289

290
	zap_completion_queue();
291
	refill_skbs();
L
Linus Torvalds 已提交
292 293 294
repeat:

	skb = alloc_skb(len, GFP_ATOMIC);
295 296
	if (!skb)
		skb = skb_dequeue(&skb_pool);
L
Linus Torvalds 已提交
297 298

	if (!skb) {
299
		if (++count < 10) {
300
			netpoll_poll_dev(np->dev);
301
			goto repeat;
L
Linus Torvalds 已提交
302
		}
303
		return NULL;
L
Linus Torvalds 已提交
304 305 306 307 308 309 310
	}

	atomic_set(&skb->users, 1);
	skb_reserve(skb, reserve);
	return skb;
}

311 312 313 314 315 316 317 318 319 320 321
static int netpoll_owner_active(struct net_device *dev)
{
	struct napi_struct *napi;

	list_for_each_entry(napi, &dev->napi_list, dev_list) {
		if (napi->poll_owner == smp_processor_id())
			return 1;
	}
	return 0;
}

322
/* call with IRQ disabled */
323 324
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
			     struct net_device *dev)
L
Linus Torvalds 已提交
325
{
S
Stephen Hemminger 已提交
326 327
	int status = NETDEV_TX_BUSY;
	unsigned long tries;
H
Herbert Xu 已提交
328
	/* It is up to the caller to keep npinfo alive. */
329
	struct netpoll_info *npinfo;
S
Stephen Hemminger 已提交
330

331 332 333
	WARN_ON_ONCE(!irqs_disabled());

	npinfo = rcu_dereference_bh(np->dev->npinfo);
334
	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
335
		dev_kfree_skb_irq(skb);
336 337
		return;
	}
S
Stephen Hemminger 已提交
338 339

	/* don't get messages out of order, and no recursion */
340
	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
341
		struct netdev_queue *txq;
342

343
		txq = netdev_pick_tx(dev, skb, NULL);
344

345 346 347
		/* try until next clock tick */
		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
		     tries > 0; --tries) {
348
			if (HARD_TX_TRYLOCK(dev, txq)) {
349 350 351
				if (!netif_xmit_stopped(txq))
					status = netpoll_start_xmit(skb, dev, txq);

352
				HARD_TX_UNLOCK(dev, txq);
353 354 355 356 357

				if (status == NETDEV_TX_OK)
					break;

			}
358 359

			/* tickle device maybe there is some cleanup */
360
			netpoll_poll_dev(np->dev);
361 362

			udelay(USEC_PER_POLL);
M
Matt Mackall 已提交
363
		}
364 365

		WARN_ONCE(!irqs_disabled(),
366
			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
367
			dev->name, dev->netdev_ops->ndo_start_xmit);
368

L
Linus Torvalds 已提交
369 370
	}

S
Stephen Hemminger 已提交
371
	if (status != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
372
		skb_queue_tail(&npinfo->txq, skb);
373
		schedule_delayed_work(&npinfo->tx_work,0);
L
Linus Torvalds 已提交
374 375
	}
}
376
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
L
Linus Torvalds 已提交
377 378 379

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
380
	int total_len, ip_len, udp_len;
L
Linus Torvalds 已提交
381 382 383 384
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;
385
	static atomic_t ip_ident;
C
Cong Wang 已提交
386
	struct ipv6hdr *ip6h;
L
Linus Torvalds 已提交
387

388 389
	WARN_ON_ONCE(!irqs_disabled());

L
Linus Torvalds 已提交
390
	udp_len = len + sizeof(*udph);
C
Cong Wang 已提交
391 392 393
	if (np->ipv6)
		ip_len = udp_len + sizeof(*ip6h);
	else
C
Cong Wang 已提交
394 395
		ip_len = udp_len + sizeof(*iph);

396
	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
L
Linus Torvalds 已提交
397

398 399
	skb = find_skb(np, total_len + np->dev->needed_tailroom,
		       total_len - len);
L
Linus Torvalds 已提交
400 401 402
	if (!skb)
		return;

403
	skb_copy_to_linear_data(skb, msg, len);
404
	skb_put(skb, len);
L
Linus Torvalds 已提交
405

406 407 408
	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
L
Linus Torvalds 已提交
409 410 411
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);
C
Cong Wang 已提交
412

C
Cong Wang 已提交
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441
	if (np->ipv6) {
		udph->check = 0;
		udph->check = csum_ipv6_magic(&np->local_ip.in6,
					      &np->remote_ip.in6,
					      udp_len, IPPROTO_UDP,
					      csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*ip6h));
		skb_reset_network_header(skb);
		ip6h = ipv6_hdr(skb);

		/* ip6h->version = 6; ip6h->priority = 0; */
		put_unaligned(0x60, (unsigned char *)ip6h);
		ip6h->flow_lbl[0] = 0;
		ip6h->flow_lbl[1] = 0;
		ip6h->flow_lbl[2] = 0;

		ip6h->payload_len = htons(sizeof(struct udphdr) + len);
		ip6h->nexthdr = IPPROTO_UDP;
		ip6h->hop_limit = 32;
		ip6h->saddr = np->local_ip.in6;
		ip6h->daddr = np->remote_ip.in6;

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
	} else {
C
Cong Wang 已提交
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
		udph->check = 0;
		udph->check = csum_tcpudp_magic(np->local_ip.ip,
						np->remote_ip.ip,
						udp_len, IPPROTO_UDP,
						csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*iph));
		skb_reset_network_header(skb);
		iph = ip_hdr(skb);

		/* iph->version = 4; iph->ihl = 5; */
		put_unaligned(0x45, (unsigned char *)iph);
		iph->tos      = 0;
		put_unaligned(htons(ip_len), &(iph->tot_len));
		iph->id       = htons(atomic_inc_return(&ip_ident));
		iph->frag_off = 0;
		iph->ttl      = 64;
		iph->protocol = IPPROTO_UDP;
		iph->check    = 0;
		put_unaligned(np->local_ip.ip, &(iph->saddr));
		put_unaligned(np->remote_ip.ip, &(iph->daddr));
		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IP);
	}

J
Joe Perches 已提交
472 473
	ether_addr_copy(eth->h_source, np->dev->dev_addr);
	ether_addr_copy(eth->h_dest, np->remote_mac);
L
Linus Torvalds 已提交
474 475 476 477 478

	skb->dev = np->dev;

	netpoll_send_skb(np, skb);
}
E
Eric Dumazet 已提交
479
EXPORT_SYMBOL(netpoll_send_udp);
L
Linus Torvalds 已提交
480

481 482
void netpoll_print_options(struct netpoll *np)
{
483
	np_info(np, "local port %d\n", np->local_port);
C
Cong Wang 已提交
484 485 486
	if (np->ipv6)
		np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
	else
C
Cong Wang 已提交
487
		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
488 489
	np_info(np, "interface '%s'\n", np->dev_name);
	np_info(np, "remote port %d\n", np->remote_port);
C
Cong Wang 已提交
490 491 492
	if (np->ipv6)
		np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
	else
C
Cong Wang 已提交
493
		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
494
	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
495
}
E
Eric Dumazet 已提交
496
EXPORT_SYMBOL(netpoll_print_options);
497

C
Cong Wang 已提交
498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
{
	const char *end;

	if (!strchr(str, ':') &&
	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
		if (!*end)
			return 0;
	}
	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
#if IS_ENABLED(CONFIG_IPV6)
		if (!*end)
			return 1;
#else
		return -1;
#endif
	}
	return -1;
}

L
Linus Torvalds 已提交
518 519 520
int netpoll_parse_options(struct netpoll *np, char *opt)
{
	char *cur=opt, *delim;
C
Cong Wang 已提交
521
	int ipv6;
522
	bool ipversion_set = false;
L
Linus Torvalds 已提交
523

524
	if (*cur != '@') {
L
Linus Torvalds 已提交
525 526
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
527
		*delim = 0;
528 529
		if (kstrtou16(cur, 10, &np->local_port))
			goto parse_failed;
530
		cur = delim;
L
Linus Torvalds 已提交
531 532 533
	}
	cur++;

534
	if (*cur != '/') {
535
		ipversion_set = true;
L
Linus Torvalds 已提交
536 537
		if ((delim = strchr(cur, '/')) == NULL)
			goto parse_failed;
538
		*delim = 0;
C
Cong Wang 已提交
539 540 541 542 543
		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
		if (ipv6 < 0)
			goto parse_failed;
		else
			np->ipv6 = (bool)ipv6;
544
		cur = delim;
L
Linus Torvalds 已提交
545 546 547
	}
	cur++;

548
	if (*cur != ',') {
L
Linus Torvalds 已提交
549 550 551
		/* parse out dev name */
		if ((delim = strchr(cur, ',')) == NULL)
			goto parse_failed;
552
		*delim = 0;
L
Linus Torvalds 已提交
553
		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
554
		cur = delim;
L
Linus Torvalds 已提交
555 556 557
	}
	cur++;

558
	if (*cur != '@') {
L
Linus Torvalds 已提交
559 560 561
		/* dst port */
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
562
		*delim = 0;
563
		if (*cur == ' ' || *cur == '\t')
564
			np_info(np, "warning: whitespace is not allowed\n");
565 566
		if (kstrtou16(cur, 10, &np->remote_port))
			goto parse_failed;
567
		cur = delim;
L
Linus Torvalds 已提交
568 569 570 571 572 573
	}
	cur++;

	/* dst ip */
	if ((delim = strchr(cur, '/')) == NULL)
		goto parse_failed;
574
	*delim = 0;
C
Cong Wang 已提交
575 576 577
	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
	if (ipv6 < 0)
		goto parse_failed;
578
	else if (ipversion_set && np->ipv6 != (bool)ipv6)
C
Cong Wang 已提交
579 580 581
		goto parse_failed;
	else
		np->ipv6 = (bool)ipv6;
582
	cur = delim + 1;
L
Linus Torvalds 已提交
583

584
	if (*cur != 0) {
L
Linus Torvalds 已提交
585
		/* MAC address */
586
		if (!mac_pton(cur, np->remote_mac))
L
Linus Torvalds 已提交
587 588 589
			goto parse_failed;
	}

590
	netpoll_print_options(np);
L
Linus Torvalds 已提交
591 592 593 594

	return 0;

 parse_failed:
595
	np_info(np, "couldn't parse config at '%s'!\n", cur);
L
Linus Torvalds 已提交
596 597
	return -1;
}
E
Eric Dumazet 已提交
598
EXPORT_SYMBOL(netpoll_parse_options);
L
Linus Torvalds 已提交
599

600
int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
L
Linus Torvalds 已提交
601
{
602
	struct netpoll_info *npinfo;
H
Herbert Xu 已提交
603
	const struct net_device_ops *ops;
S
Stephen Hemminger 已提交
604
	int err;
L
Linus Torvalds 已提交
605

606
	np->dev = ndev;
607
	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
608
	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
609

610 611
	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
	    !ndev->netdev_ops->ndo_poll_controller) {
612 613
		np_err(np, "%s doesn't support polling, aborting\n",
		       np->dev_name);
614 615 616 617 618
		err = -ENOTSUPP;
		goto out;
	}

	if (!ndev->npinfo) {
619
		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
620 621 622 623 624
		if (!npinfo) {
			err = -ENOMEM;
			goto out;
		}

625
		sema_init(&npinfo->dev_lock, 1);
626 627 628 629 630 631 632
		skb_queue_head_init(&npinfo->txq);
		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

		atomic_set(&npinfo->refcnt, 1);

		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_setup) {
633
			err = ops->ndo_netpoll_setup(ndev, npinfo);
634 635 636 637
			if (err)
				goto free_npinfo;
		}
	} else {
N
Neil Horman 已提交
638
		npinfo = rtnl_dereference(ndev->npinfo);
639 640 641 642 643 644
		atomic_inc(&npinfo->refcnt);
	}

	npinfo->netpoll = np;

	/* last thing to do is link it to the net device structure */
645
	rcu_assign_pointer(ndev->npinfo, npinfo);
646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661

	return 0;

free_npinfo:
	kfree(npinfo);
out:
	return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
	struct net_device *ndev = NULL;
	struct in_device *in_dev;
	int err;

662
	rtnl_lock();
663 664 665 666
	if (np->dev_name) {
		struct net *net = current->nsproxy->net_ns;
		ndev = __dev_get_by_name(net, np->dev_name);
	}
L
Linus Torvalds 已提交
667
	if (!ndev) {
668
		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
669 670
		err = -ENODEV;
		goto unlock;
L
Linus Torvalds 已提交
671
	}
672
	dev_hold(ndev);
L
Linus Torvalds 已提交
673

674
	if (netdev_master_upper_dev_get(ndev)) {
675
		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
676 677
		err = -EBUSY;
		goto put;
678 679
	}

L
Linus Torvalds 已提交
680 681 682
	if (!netif_running(ndev)) {
		unsigned long atmost, atleast;

683
		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
L
Linus Torvalds 已提交
684

S
Stephen Hemminger 已提交
685 686 687
		err = dev_open(ndev);

		if (err) {
688
			np_err(np, "failed to open %s\n", ndev->name);
689
			goto put;
L
Linus Torvalds 已提交
690 691
		}

692
		rtnl_unlock();
L
Linus Torvalds 已提交
693
		atleast = jiffies + HZ/10;
694
		atmost = jiffies + carrier_timeout * HZ;
L
Linus Torvalds 已提交
695 696
		while (!netif_carrier_ok(ndev)) {
			if (time_after(jiffies, atmost)) {
697
				np_notice(np, "timeout waiting for carrier\n");
L
Linus Torvalds 已提交
698 699
				break;
			}
700
			msleep(1);
L
Linus Torvalds 已提交
701 702 703 704 705 706 707 708
		}

		/* If carrier appears to come up instantly, we don't
		 * trust it and pause so that we don't pump all our
		 * queued console messages into the bitbucket.
		 */

		if (time_before(jiffies, atleast)) {
709
			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
L
Linus Torvalds 已提交
710 711
			msleep(4000);
		}
712
		rtnl_lock();
L
Linus Torvalds 已提交
713 714
	}

C
Cong Wang 已提交
715 716
	if (!np->local_ip.ip) {
		if (!np->ipv6) {
717
			in_dev = __in_dev_get_rtnl(ndev);
C
Cong Wang 已提交
718 719 720 721 722 723 724 725 726 727

			if (!in_dev || !in_dev->ifa_list) {
				np_err(np, "no IP address for %s, aborting\n",
				       np->dev_name);
				err = -EDESTADDRREQ;
				goto put;
			}

			np->local_ip.ip = in_dev->ifa_list->ifa_local;
			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
C
Cong Wang 已提交
728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755
		} else {
#if IS_ENABLED(CONFIG_IPV6)
			struct inet6_dev *idev;

			err = -EDESTADDRREQ;
			idev = __in6_dev_get(ndev);
			if (idev) {
				struct inet6_ifaddr *ifp;

				read_lock_bh(&idev->lock);
				list_for_each_entry(ifp, &idev->addr_list, if_list) {
					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
						continue;
					np->local_ip.in6 = ifp->addr;
					err = 0;
					break;
				}
				read_unlock_bh(&idev->lock);
			}
			if (err) {
				np_err(np, "no IPv6 address for %s, aborting\n",
				       np->dev_name);
				goto put;
			} else
				np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
#else
			np_err(np, "IPv6 is not supported %s, aborting\n",
			       np->dev_name);
756
			err = -EINVAL;
C
Cong Wang 已提交
757 758
			goto put;
#endif
L
Linus Torvalds 已提交
759 760 761
		}
	}

762 763 764
	/* fill up the skb queue */
	refill_skbs();

765
	err = __netpoll_setup(np, ndev);
766 767 768
	if (err)
		goto put;

769
	rtnl_unlock();
L
Linus Torvalds 已提交
770 771
	return 0;

772
put:
L
Linus Torvalds 已提交
773
	dev_put(ndev);
774 775
unlock:
	rtnl_unlock();
S
Stephen Hemminger 已提交
776
	return err;
L
Linus Torvalds 已提交
777
}
E
Eric Dumazet 已提交
778
EXPORT_SYMBOL(netpoll_setup);
L
Linus Torvalds 已提交
779

780 781
static int __init netpoll_init(void)
{
782 783 784 785 786
	skb_queue_head_init(&skb_pool);
	return 0;
}
core_initcall(netpoll_init);

787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
{
	struct netpoll_info *npinfo =
			container_of(rcu_head, struct netpoll_info, rcu);

	skb_queue_purge(&npinfo->txq);

	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
	cancel_delayed_work(&npinfo->tx_work);

	/* clean after last, unfinished work */
	__skb_queue_purge(&npinfo->txq);
	/* now cancel it again */
	cancel_delayed_work(&npinfo->tx_work);
	kfree(npinfo);
}

804
void __netpoll_cleanup(struct netpoll *np)
L
Linus Torvalds 已提交
805
{
806 807
	struct netpoll_info *npinfo;

N
Neil Horman 已提交
808 809 810 811 812
	/* rtnl_dereference would be preferable here but
	 * rcu_cleanup_netpoll path can put us in here safely without
	 * holding the rtnl, so plain rcu_dereference it is
	 */
	npinfo = rtnl_dereference(np->dev->npinfo);
813
	if (!npinfo)
814
		return;
S
Stephen Hemminger 已提交
815

816 817
	synchronize_srcu(&netpoll_srcu);

818 819
	if (atomic_dec_and_test(&npinfo->refcnt)) {
		const struct net_device_ops *ops;
H
Herbert Xu 已提交
820

821 822 823
		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_cleanup)
			ops->ndo_netpoll_cleanup(np->dev);
H
Herbert Xu 已提交
824

825
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
826
		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
D
david decotigny 已提交
827 828
	} else
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
829 830
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
H
Herbert Xu 已提交
831

832
static void netpoll_async_cleanup(struct work_struct *work)
833
{
834
	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
S
Stephen Hemminger 已提交
835

836
	rtnl_lock();
837
	__netpoll_cleanup(np);
838
	rtnl_unlock();
839 840
	kfree(np);
}
S
Stephen Hemminger 已提交
841

842
void __netpoll_free_async(struct netpoll *np)
843
{
844
	schedule_work(&np->cleanup_work);
845
}
846
EXPORT_SYMBOL_GPL(__netpoll_free_async);
847

848 849 850
void netpoll_cleanup(struct netpoll *np)
{
	rtnl_lock();
851 852
	if (!np->dev)
		goto out;
853 854
	__netpoll_cleanup(np);
	dev_put(np->dev);
L
Linus Torvalds 已提交
855
	np->dev = NULL;
856 857
out:
	rtnl_unlock();
L
Linus Torvalds 已提交
858
}
E
Eric Dumazet 已提交
859
EXPORT_SYMBOL(netpoll_cleanup);