netpoll.c 19.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14
#include <linux/moduleparam.h>
15
#include <linux/kernel.h>
L
Linus Torvalds 已提交
16 17 18
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
19
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
20 21 22 23 24 25 26 27
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
28
#include <linux/slab.h>
29
#include <linux/export.h>
30
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
31 32
#include <net/tcp.h>
#include <net/udp.h>
C
Cong Wang 已提交
33 34 35
#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/ip6_checksum.h>
L
Linus Torvalds 已提交
36
#include <asm/unaligned.h>
37
#include <trace/events/napi.h>
L
Linus Torvalds 已提交
38 39 40 41 42 43 44 45 46

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

47
static struct sk_buff_head skb_pool;
L
Linus Torvalds 已提交
48

49
DEFINE_STATIC_SRCU(netpoll_srcu);
50

S
Stephen Hemminger 已提交
51
#define USEC_PER_POLL	50
L
Linus Torvalds 已提交
52

J
Joe Perches 已提交
53 54 55 56 57
#define MAX_SKB_SIZE							\
	(sizeof(struct ethhdr) +					\
	 sizeof(struct iphdr) +						\
	 sizeof(struct udphdr) +					\
	 MAX_UDP_CHUNK)
L
Linus Torvalds 已提交
58

59
static void zap_completion_queue(void);
60
static void netpoll_async_cleanup(struct work_struct *work);
L
Linus Torvalds 已提交
61

62 63 64
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

65 66 67 68 69 70 71
#define np_info(np, fmt, ...)				\
	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_err(np, fmt, ...)				\
	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_notice(np, fmt, ...)				\
	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)

72 73 74 75 76 77 78 79
static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
			      struct netdev_queue *txq)
{
	int status = NETDEV_TX_OK;
	netdev_features_t features;

	features = netif_skb_features(skb);

80
	if (skb_vlan_tag_present(skb) &&
81
	    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
82
		skb = __vlan_hwaccel_push_inside(skb);
83 84 85 86 87 88 89 90 91
		if (unlikely(!skb)) {
			/* This is actually a packet drop, but we
			 * don't want the code that calls this
			 * function to try and operate on a NULL skb.
			 */
			goto out;
		}
	}

92
	status = netdev_start_xmit(skb, dev, txq, false);
93 94 95 96 97

out:
	return status;
}

D
David Howells 已提交
98
static void queue_process(struct work_struct *work)
L
Linus Torvalds 已提交
99
{
100 101
	struct netpoll_info *npinfo =
		container_of(work, struct netpoll_info, tx_work.work);
L
Linus Torvalds 已提交
102
	struct sk_buff *skb;
I
Ingo Molnar 已提交
103
	unsigned long flags;
L
Linus Torvalds 已提交
104

S
Stephen Hemminger 已提交
105 106
	while ((skb = skb_dequeue(&npinfo->txq))) {
		struct net_device *dev = skb->dev;
107
		struct netdev_queue *txq;
L
Linus Torvalds 已提交
108

S
Stephen Hemminger 已提交
109
		if (!netif_device_present(dev) || !netif_running(dev)) {
110
			kfree_skb(skb);
S
Stephen Hemminger 已提交
111 112
			continue;
		}
L
Linus Torvalds 已提交
113

114
		txq = skb_get_tx_queue(dev, skb);
115

I
Ingo Molnar 已提交
116
		local_irq_save(flags);
117
		HARD_TX_LOCK(dev, txq, smp_processor_id());
118
		if (netif_xmit_frozen_or_stopped(txq) ||
119
		    netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
120
			skb_queue_head(&npinfo->txq, skb);
121
			HARD_TX_UNLOCK(dev, txq);
I
Ingo Molnar 已提交
122
			local_irq_restore(flags);
L
Linus Torvalds 已提交
123

124
			schedule_delayed_work(&npinfo->tx_work, HZ/10);
S
Stephen Hemminger 已提交
125 126
			return;
		}
127
		HARD_TX_UNLOCK(dev, txq);
I
Ingo Molnar 已提交
128
		local_irq_restore(flags);
L
Linus Torvalds 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142
	}
}

/*
 * Check whether delayed processing was scheduled for our NIC. If so,
 * we attempt to grab the poll lock and use ->poll() to pump the card.
 * If this fails, either we've recursed in ->poll() or it's already
 * running on another CPU.
 *
 * Note: we don't mask interrupts with this lock because we're using
 * trylock here and interrupts are already disabled in the softirq
 * case. Further, we test the poll_owner to avoid recursion on UP
 * systems where the lock doesn't exist.
 */
143
static int poll_one_napi(struct napi_struct *napi, int budget)
144
{
145
	int work = 0;
146 147 148 149 150 151 152 153

	/* net_rx_action's ->poll() invocations and our's are
	 * synchronized by this test which is only made while
	 * holding the napi->poll_lock.
	 */
	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
		return budget;

154 155 156 157 158 159
	/* If we set this bit but see that it has already been set,
	 * that indicates that napi has been disabled and we need
	 * to abort this operation
	 */
	if (test_and_set_bit(NAPI_STATE_NPSVC, &napi->state))
		goto out;
160 161

	work = napi->poll(napi, budget);
162
	WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
163
	trace_napi_poll(napi);
164

165
	clear_bit(NAPI_STATE_NPSVC, &napi->state);
166

167
out:
168 169 170
	return budget - work;
}

171
static void poll_napi(struct net_device *dev, int budget)
L
Linus Torvalds 已提交
172
{
173
	struct napi_struct *napi;
L
Linus Torvalds 已提交
174

175
	list_for_each_entry(napi, &dev->napi_list, dev_list) {
176
		if (napi->poll_owner != smp_processor_id() &&
177
		    spin_trylock(&napi->poll_lock)) {
178
			budget = poll_one_napi(napi, budget);
179 180
			spin_unlock(&napi->poll_lock);
		}
L
Linus Torvalds 已提交
181 182 183
	}
}

184
static void netpoll_poll_dev(struct net_device *dev)
L
Linus Torvalds 已提交
185
{
186
	const struct net_device_ops *ops;
187
	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
188
	int budget = 0;
189

190 191 192 193
	/* Don't do any rx activity if the dev_lock mutex is held
	 * the dev_open/close paths use this to block netpoll activity
	 * while changing device state
	 */
194
	if (down_trylock(&ni->dev_lock))
195 196
		return;

197
	if (!netif_running(dev)) {
198
		up(&ni->dev_lock);
199
		return;
200
	}
201 202

	ops = dev->netdev_ops;
203
	if (!ops->ndo_poll_controller) {
204
		up(&ni->dev_lock);
L
Linus Torvalds 已提交
205
		return;
206
	}
L
Linus Torvalds 已提交
207 208

	/* Process pending work on NIC */
209
	ops->ndo_poll_controller(dev);
210

211
	poll_napi(dev, budget);
L
Linus Torvalds 已提交
212

213
	up(&ni->dev_lock);
214

215
	zap_completion_queue();
L
Linus Torvalds 已提交
216 217
}

218
void netpoll_poll_disable(struct net_device *dev)
219 220 221 222 223 224 225
{
	struct netpoll_info *ni;
	int idx;
	might_sleep();
	idx = srcu_read_lock(&netpoll_srcu);
	ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
	if (ni)
226
		down(&ni->dev_lock);
227 228
	srcu_read_unlock(&netpoll_srcu, idx);
}
229
EXPORT_SYMBOL(netpoll_poll_disable);
230

231
void netpoll_poll_enable(struct net_device *dev)
232 233 234 235 236
{
	struct netpoll_info *ni;
	rcu_read_lock();
	ni = rcu_dereference(dev->npinfo);
	if (ni)
237
		up(&ni->dev_lock);
238 239
	rcu_read_unlock();
}
240
EXPORT_SYMBOL(netpoll_poll_enable);
241

L
Linus Torvalds 已提交
242 243 244 245 246
static void refill_skbs(void)
{
	struct sk_buff *skb;
	unsigned long flags;

247 248
	spin_lock_irqsave(&skb_pool.lock, flags);
	while (skb_pool.qlen < MAX_SKBS) {
L
Linus Torvalds 已提交
249 250 251 252
		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
		if (!skb)
			break;

253
		__skb_queue_tail(&skb_pool, skb);
L
Linus Torvalds 已提交
254
	}
255
	spin_unlock_irqrestore(&skb_pool.lock, flags);
L
Linus Torvalds 已提交
256 257
}

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
static void zap_completion_queue(void)
{
	unsigned long flags;
	struct softnet_data *sd = &get_cpu_var(softnet_data);

	if (sd->completion_queue) {
		struct sk_buff *clist;

		local_irq_save(flags);
		clist = sd->completion_queue;
		sd->completion_queue = NULL;
		local_irq_restore(flags);

		while (clist != NULL) {
			struct sk_buff *skb = clist;
			clist = clist->next;
274
			if (!skb_irq_freeable(skb)) {
275 276 277 278 279 280 281 282 283 284 285
				atomic_inc(&skb->users);
				dev_kfree_skb_any(skb); /* put this one back */
			} else {
				__kfree_skb(skb);
			}
		}
	}

	put_cpu_var(softnet_data);
}

286
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
L
Linus Torvalds 已提交
287
{
288 289
	int count = 0;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
290

291
	zap_completion_queue();
292
	refill_skbs();
L
Linus Torvalds 已提交
293 294 295
repeat:

	skb = alloc_skb(len, GFP_ATOMIC);
296 297
	if (!skb)
		skb = skb_dequeue(&skb_pool);
L
Linus Torvalds 已提交
298 299

	if (!skb) {
300
		if (++count < 10) {
301
			netpoll_poll_dev(np->dev);
302
			goto repeat;
L
Linus Torvalds 已提交
303
		}
304
		return NULL;
L
Linus Torvalds 已提交
305 306 307 308 309 310 311
	}

	atomic_set(&skb->users, 1);
	skb_reserve(skb, reserve);
	return skb;
}

312 313 314 315 316 317 318 319 320 321 322
static int netpoll_owner_active(struct net_device *dev)
{
	struct napi_struct *napi;

	list_for_each_entry(napi, &dev->napi_list, dev_list) {
		if (napi->poll_owner == smp_processor_id())
			return 1;
	}
	return 0;
}

323
/* call with IRQ disabled */
324 325
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
			     struct net_device *dev)
L
Linus Torvalds 已提交
326
{
S
Stephen Hemminger 已提交
327 328
	int status = NETDEV_TX_BUSY;
	unsigned long tries;
H
Herbert Xu 已提交
329
	/* It is up to the caller to keep npinfo alive. */
330
	struct netpoll_info *npinfo;
S
Stephen Hemminger 已提交
331

332 333 334
	WARN_ON_ONCE(!irqs_disabled());

	npinfo = rcu_dereference_bh(np->dev->npinfo);
335
	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
336
		dev_kfree_skb_irq(skb);
337 338
		return;
	}
S
Stephen Hemminger 已提交
339 340

	/* don't get messages out of order, and no recursion */
341
	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
342
		struct netdev_queue *txq;
343

344
		txq = netdev_pick_tx(dev, skb, NULL);
345

346 347 348
		/* try until next clock tick */
		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
		     tries > 0; --tries) {
349
			if (HARD_TX_TRYLOCK(dev, txq)) {
350 351 352
				if (!netif_xmit_stopped(txq))
					status = netpoll_start_xmit(skb, dev, txq);

353
				HARD_TX_UNLOCK(dev, txq);
354 355 356 357 358

				if (status == NETDEV_TX_OK)
					break;

			}
359 360

			/* tickle device maybe there is some cleanup */
361
			netpoll_poll_dev(np->dev);
362 363

			udelay(USEC_PER_POLL);
M
Matt Mackall 已提交
364
		}
365 366

		WARN_ONCE(!irqs_disabled(),
367
			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
368
			dev->name, dev->netdev_ops->ndo_start_xmit);
369

L
Linus Torvalds 已提交
370 371
	}

S
Stephen Hemminger 已提交
372
	if (status != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
373
		skb_queue_tail(&npinfo->txq, skb);
374
		schedule_delayed_work(&npinfo->tx_work,0);
L
Linus Torvalds 已提交
375 376
	}
}
377
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
L
Linus Torvalds 已提交
378 379 380

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
381
	int total_len, ip_len, udp_len;
L
Linus Torvalds 已提交
382 383 384 385
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;
386
	static atomic_t ip_ident;
C
Cong Wang 已提交
387
	struct ipv6hdr *ip6h;
L
Linus Torvalds 已提交
388

389 390
	WARN_ON_ONCE(!irqs_disabled());

L
Linus Torvalds 已提交
391
	udp_len = len + sizeof(*udph);
C
Cong Wang 已提交
392 393 394
	if (np->ipv6)
		ip_len = udp_len + sizeof(*ip6h);
	else
C
Cong Wang 已提交
395 396
		ip_len = udp_len + sizeof(*iph);

397
	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
L
Linus Torvalds 已提交
398

399 400
	skb = find_skb(np, total_len + np->dev->needed_tailroom,
		       total_len - len);
L
Linus Torvalds 已提交
401 402 403
	if (!skb)
		return;

404
	skb_copy_to_linear_data(skb, msg, len);
405
	skb_put(skb, len);
L
Linus Torvalds 已提交
406

407 408 409
	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
L
Linus Torvalds 已提交
410 411 412
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);
C
Cong Wang 已提交
413

C
Cong Wang 已提交
414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442
	if (np->ipv6) {
		udph->check = 0;
		udph->check = csum_ipv6_magic(&np->local_ip.in6,
					      &np->remote_ip.in6,
					      udp_len, IPPROTO_UDP,
					      csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*ip6h));
		skb_reset_network_header(skb);
		ip6h = ipv6_hdr(skb);

		/* ip6h->version = 6; ip6h->priority = 0; */
		put_unaligned(0x60, (unsigned char *)ip6h);
		ip6h->flow_lbl[0] = 0;
		ip6h->flow_lbl[1] = 0;
		ip6h->flow_lbl[2] = 0;

		ip6h->payload_len = htons(sizeof(struct udphdr) + len);
		ip6h->nexthdr = IPPROTO_UDP;
		ip6h->hop_limit = 32;
		ip6h->saddr = np->local_ip.in6;
		ip6h->daddr = np->remote_ip.in6;

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
	} else {
C
Cong Wang 已提交
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
		udph->check = 0;
		udph->check = csum_tcpudp_magic(np->local_ip.ip,
						np->remote_ip.ip,
						udp_len, IPPROTO_UDP,
						csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*iph));
		skb_reset_network_header(skb);
		iph = ip_hdr(skb);

		/* iph->version = 4; iph->ihl = 5; */
		put_unaligned(0x45, (unsigned char *)iph);
		iph->tos      = 0;
		put_unaligned(htons(ip_len), &(iph->tot_len));
		iph->id       = htons(atomic_inc_return(&ip_ident));
		iph->frag_off = 0;
		iph->ttl      = 64;
		iph->protocol = IPPROTO_UDP;
		iph->check    = 0;
		put_unaligned(np->local_ip.ip, &(iph->saddr));
		put_unaligned(np->remote_ip.ip, &(iph->daddr));
		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IP);
	}

J
Joe Perches 已提交
473 474
	ether_addr_copy(eth->h_source, np->dev->dev_addr);
	ether_addr_copy(eth->h_dest, np->remote_mac);
L
Linus Torvalds 已提交
475 476 477 478 479

	skb->dev = np->dev;

	netpoll_send_skb(np, skb);
}
E
Eric Dumazet 已提交
480
EXPORT_SYMBOL(netpoll_send_udp);
L
Linus Torvalds 已提交
481

482 483
void netpoll_print_options(struct netpoll *np)
{
484
	np_info(np, "local port %d\n", np->local_port);
C
Cong Wang 已提交
485 486 487
	if (np->ipv6)
		np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
	else
C
Cong Wang 已提交
488
		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
489 490
	np_info(np, "interface '%s'\n", np->dev_name);
	np_info(np, "remote port %d\n", np->remote_port);
C
Cong Wang 已提交
491 492 493
	if (np->ipv6)
		np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
	else
C
Cong Wang 已提交
494
		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
495
	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
496
}
E
Eric Dumazet 已提交
497
EXPORT_SYMBOL(netpoll_print_options);
498

C
Cong Wang 已提交
499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518
static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
{
	const char *end;

	if (!strchr(str, ':') &&
	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
		if (!*end)
			return 0;
	}
	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
#if IS_ENABLED(CONFIG_IPV6)
		if (!*end)
			return 1;
#else
		return -1;
#endif
	}
	return -1;
}

L
Linus Torvalds 已提交
519 520 521
int netpoll_parse_options(struct netpoll *np, char *opt)
{
	char *cur=opt, *delim;
C
Cong Wang 已提交
522
	int ipv6;
523
	bool ipversion_set = false;
L
Linus Torvalds 已提交
524

525
	if (*cur != '@') {
L
Linus Torvalds 已提交
526 527
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
528
		*delim = 0;
529 530
		if (kstrtou16(cur, 10, &np->local_port))
			goto parse_failed;
531
		cur = delim;
L
Linus Torvalds 已提交
532 533 534
	}
	cur++;

535
	if (*cur != '/') {
536
		ipversion_set = true;
L
Linus Torvalds 已提交
537 538
		if ((delim = strchr(cur, '/')) == NULL)
			goto parse_failed;
539
		*delim = 0;
C
Cong Wang 已提交
540 541 542 543 544
		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
		if (ipv6 < 0)
			goto parse_failed;
		else
			np->ipv6 = (bool)ipv6;
545
		cur = delim;
L
Linus Torvalds 已提交
546 547 548
	}
	cur++;

549
	if (*cur != ',') {
L
Linus Torvalds 已提交
550 551 552
		/* parse out dev name */
		if ((delim = strchr(cur, ',')) == NULL)
			goto parse_failed;
553
		*delim = 0;
L
Linus Torvalds 已提交
554
		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
555
		cur = delim;
L
Linus Torvalds 已提交
556 557 558
	}
	cur++;

559
	if (*cur != '@') {
L
Linus Torvalds 已提交
560 561 562
		/* dst port */
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
563
		*delim = 0;
564
		if (*cur == ' ' || *cur == '\t')
565
			np_info(np, "warning: whitespace is not allowed\n");
566 567
		if (kstrtou16(cur, 10, &np->remote_port))
			goto parse_failed;
568
		cur = delim;
L
Linus Torvalds 已提交
569 570 571 572 573 574
	}
	cur++;

	/* dst ip */
	if ((delim = strchr(cur, '/')) == NULL)
		goto parse_failed;
575
	*delim = 0;
C
Cong Wang 已提交
576 577 578
	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
	if (ipv6 < 0)
		goto parse_failed;
579
	else if (ipversion_set && np->ipv6 != (bool)ipv6)
C
Cong Wang 已提交
580 581 582
		goto parse_failed;
	else
		np->ipv6 = (bool)ipv6;
583
	cur = delim + 1;
L
Linus Torvalds 已提交
584

585
	if (*cur != 0) {
L
Linus Torvalds 已提交
586
		/* MAC address */
587
		if (!mac_pton(cur, np->remote_mac))
L
Linus Torvalds 已提交
588 589 590
			goto parse_failed;
	}

591
	netpoll_print_options(np);
L
Linus Torvalds 已提交
592 593 594 595

	return 0;

 parse_failed:
596
	np_info(np, "couldn't parse config at '%s'!\n", cur);
L
Linus Torvalds 已提交
597 598
	return -1;
}
E
Eric Dumazet 已提交
599
EXPORT_SYMBOL(netpoll_parse_options);
L
Linus Torvalds 已提交
600

601
int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
L
Linus Torvalds 已提交
602
{
603
	struct netpoll_info *npinfo;
H
Herbert Xu 已提交
604
	const struct net_device_ops *ops;
S
Stephen Hemminger 已提交
605
	int err;
L
Linus Torvalds 已提交
606

607 608
	np->dev = ndev;
	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
609
	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
610

611 612
	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
	    !ndev->netdev_ops->ndo_poll_controller) {
613 614
		np_err(np, "%s doesn't support polling, aborting\n",
		       np->dev_name);
615 616 617 618 619
		err = -ENOTSUPP;
		goto out;
	}

	if (!ndev->npinfo) {
620
		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
621 622 623 624 625
		if (!npinfo) {
			err = -ENOMEM;
			goto out;
		}

626
		sema_init(&npinfo->dev_lock, 1);
627 628 629 630 631 632 633
		skb_queue_head_init(&npinfo->txq);
		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

		atomic_set(&npinfo->refcnt, 1);

		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_setup) {
634
			err = ops->ndo_netpoll_setup(ndev, npinfo);
635 636 637 638
			if (err)
				goto free_npinfo;
		}
	} else {
N
Neil Horman 已提交
639
		npinfo = rtnl_dereference(ndev->npinfo);
640 641 642 643 644 645
		atomic_inc(&npinfo->refcnt);
	}

	npinfo->netpoll = np;

	/* last thing to do is link it to the net device structure */
646
	rcu_assign_pointer(ndev->npinfo, npinfo);
647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662

	return 0;

free_npinfo:
	kfree(npinfo);
out:
	return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
	struct net_device *ndev = NULL;
	struct in_device *in_dev;
	int err;

663
	rtnl_lock();
664 665 666 667
	if (np->dev_name) {
		struct net *net = current->nsproxy->net_ns;
		ndev = __dev_get_by_name(net, np->dev_name);
	}
L
Linus Torvalds 已提交
668
	if (!ndev) {
669
		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
670 671
		err = -ENODEV;
		goto unlock;
L
Linus Torvalds 已提交
672
	}
673
	dev_hold(ndev);
L
Linus Torvalds 已提交
674

675
	if (netdev_master_upper_dev_get(ndev)) {
676
		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
677 678
		err = -EBUSY;
		goto put;
679 680
	}

L
Linus Torvalds 已提交
681 682 683
	if (!netif_running(ndev)) {
		unsigned long atmost, atleast;

684
		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
L
Linus Torvalds 已提交
685

S
Stephen Hemminger 已提交
686 687 688
		err = dev_open(ndev);

		if (err) {
689
			np_err(np, "failed to open %s\n", ndev->name);
690
			goto put;
L
Linus Torvalds 已提交
691 692
		}

693
		rtnl_unlock();
L
Linus Torvalds 已提交
694
		atleast = jiffies + HZ/10;
695
		atmost = jiffies + carrier_timeout * HZ;
L
Linus Torvalds 已提交
696 697
		while (!netif_carrier_ok(ndev)) {
			if (time_after(jiffies, atmost)) {
698
				np_notice(np, "timeout waiting for carrier\n");
L
Linus Torvalds 已提交
699 700
				break;
			}
701
			msleep(1);
L
Linus Torvalds 已提交
702 703 704 705 706 707 708 709
		}

		/* If carrier appears to come up instantly, we don't
		 * trust it and pause so that we don't pump all our
		 * queued console messages into the bitbucket.
		 */

		if (time_before(jiffies, atleast)) {
710
			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
L
Linus Torvalds 已提交
711 712
			msleep(4000);
		}
713
		rtnl_lock();
L
Linus Torvalds 已提交
714 715
	}

C
Cong Wang 已提交
716 717
	if (!np->local_ip.ip) {
		if (!np->ipv6) {
718
			in_dev = __in_dev_get_rtnl(ndev);
C
Cong Wang 已提交
719 720 721 722 723 724 725 726 727 728

			if (!in_dev || !in_dev->ifa_list) {
				np_err(np, "no IP address for %s, aborting\n",
				       np->dev_name);
				err = -EDESTADDRREQ;
				goto put;
			}

			np->local_ip.ip = in_dev->ifa_list->ifa_local;
			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
C
Cong Wang 已提交
729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756
		} else {
#if IS_ENABLED(CONFIG_IPV6)
			struct inet6_dev *idev;

			err = -EDESTADDRREQ;
			idev = __in6_dev_get(ndev);
			if (idev) {
				struct inet6_ifaddr *ifp;

				read_lock_bh(&idev->lock);
				list_for_each_entry(ifp, &idev->addr_list, if_list) {
					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
						continue;
					np->local_ip.in6 = ifp->addr;
					err = 0;
					break;
				}
				read_unlock_bh(&idev->lock);
			}
			if (err) {
				np_err(np, "no IPv6 address for %s, aborting\n",
				       np->dev_name);
				goto put;
			} else
				np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
#else
			np_err(np, "IPv6 is not supported %s, aborting\n",
			       np->dev_name);
757
			err = -EINVAL;
C
Cong Wang 已提交
758 759
			goto put;
#endif
L
Linus Torvalds 已提交
760 761 762
		}
	}

763 764 765
	/* fill up the skb queue */
	refill_skbs();

766
	err = __netpoll_setup(np, ndev);
767 768 769
	if (err)
		goto put;

770
	rtnl_unlock();
L
Linus Torvalds 已提交
771 772
	return 0;

773
put:
L
Linus Torvalds 已提交
774
	dev_put(ndev);
775 776
unlock:
	rtnl_unlock();
S
Stephen Hemminger 已提交
777
	return err;
L
Linus Torvalds 已提交
778
}
E
Eric Dumazet 已提交
779
EXPORT_SYMBOL(netpoll_setup);
L
Linus Torvalds 已提交
780

781 782
static int __init netpoll_init(void)
{
783 784 785 786 787
	skb_queue_head_init(&skb_pool);
	return 0;
}
core_initcall(netpoll_init);

788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803 804
static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
{
	struct netpoll_info *npinfo =
			container_of(rcu_head, struct netpoll_info, rcu);

	skb_queue_purge(&npinfo->txq);

	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
	cancel_delayed_work(&npinfo->tx_work);

	/* clean after last, unfinished work */
	__skb_queue_purge(&npinfo->txq);
	/* now cancel it again */
	cancel_delayed_work(&npinfo->tx_work);
	kfree(npinfo);
}

805
void __netpoll_cleanup(struct netpoll *np)
L
Linus Torvalds 已提交
806
{
807 808
	struct netpoll_info *npinfo;

N
Neil Horman 已提交
809 810 811 812 813
	/* rtnl_dereference would be preferable here but
	 * rcu_cleanup_netpoll path can put us in here safely without
	 * holding the rtnl, so plain rcu_dereference it is
	 */
	npinfo = rtnl_dereference(np->dev->npinfo);
814
	if (!npinfo)
815
		return;
S
Stephen Hemminger 已提交
816

817 818
	synchronize_srcu(&netpoll_srcu);

819 820
	if (atomic_dec_and_test(&npinfo->refcnt)) {
		const struct net_device_ops *ops;
H
Herbert Xu 已提交
821

822 823 824
		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_cleanup)
			ops->ndo_netpoll_cleanup(np->dev);
H
Herbert Xu 已提交
825

826
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
827
		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
D
david decotigny 已提交
828 829
	} else
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
830 831
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
H
Herbert Xu 已提交
832

833
static void netpoll_async_cleanup(struct work_struct *work)
834
{
835
	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
S
Stephen Hemminger 已提交
836

837
	rtnl_lock();
838
	__netpoll_cleanup(np);
839
	rtnl_unlock();
840 841
	kfree(np);
}
S
Stephen Hemminger 已提交
842

843
void __netpoll_free_async(struct netpoll *np)
844
{
845
	schedule_work(&np->cleanup_work);
846
}
847
EXPORT_SYMBOL_GPL(__netpoll_free_async);
848

849 850 851
void netpoll_cleanup(struct netpoll *np)
{
	rtnl_lock();
852 853
	if (!np->dev)
		goto out;
854 855
	__netpoll_cleanup(np);
	dev_put(np->dev);
L
Linus Torvalds 已提交
856
	np->dev = NULL;
857 858
out:
	rtnl_unlock();
L
Linus Torvalds 已提交
859
}
E
Eric Dumazet 已提交
860
EXPORT_SYMBOL(netpoll_cleanup);