netpoll.c 19.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14
#include <linux/moduleparam.h>
15
#include <linux/kernel.h>
L
Linus Torvalds 已提交
16 17 18
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
19
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
20 21 22 23 24 25 26 27
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
28
#include <linux/slab.h>
29
#include <linux/export.h>
30
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
31 32
#include <net/tcp.h>
#include <net/udp.h>
C
Cong Wang 已提交
33 34 35
#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/ip6_checksum.h>
L
Linus Torvalds 已提交
36
#include <asm/unaligned.h>
37
#include <trace/events/napi.h>
L
Linus Torvalds 已提交
38 39 40 41 42 43 44 45 46

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

47
static struct sk_buff_head skb_pool;
L
Linus Torvalds 已提交
48

49
DEFINE_STATIC_SRCU(netpoll_srcu);
50

S
Stephen Hemminger 已提交
51
#define USEC_PER_POLL	50
L
Linus Torvalds 已提交
52

J
Joe Perches 已提交
53 54 55 56 57
#define MAX_SKB_SIZE							\
	(sizeof(struct ethhdr) +					\
	 sizeof(struct iphdr) +						\
	 sizeof(struct udphdr) +					\
	 MAX_UDP_CHUNK)
L
Linus Torvalds 已提交
58

59
static void zap_completion_queue(void);
60
static void netpoll_async_cleanup(struct work_struct *work);
L
Linus Torvalds 已提交
61

62 63 64
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

65 66 67 68 69 70 71
#define np_info(np, fmt, ...)				\
	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_err(np, fmt, ...)				\
	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_notice(np, fmt, ...)				\
	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)

72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
			      struct netdev_queue *txq)
{
	int status = NETDEV_TX_OK;
	netdev_features_t features;

	features = netif_skb_features(skb);

	if (vlan_tx_tag_present(skb) &&
	    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
		skb = __vlan_put_tag(skb, skb->vlan_proto,
				     vlan_tx_tag_get(skb));
		if (unlikely(!skb)) {
			/* This is actually a packet drop, but we
			 * don't want the code that calls this
			 * function to try and operate on a NULL skb.
			 */
			goto out;
		}
		skb->vlan_tci = 0;
	}

D
David S. Miller 已提交
94
	status = netdev_start_xmit(skb, dev);
95 96 97 98 99 100 101
	if (status == NETDEV_TX_OK)
		txq_trans_update(txq);

out:
	return status;
}

D
David Howells 已提交
102
static void queue_process(struct work_struct *work)
L
Linus Torvalds 已提交
103
{
104 105
	struct netpoll_info *npinfo =
		container_of(work, struct netpoll_info, tx_work.work);
L
Linus Torvalds 已提交
106
	struct sk_buff *skb;
I
Ingo Molnar 已提交
107
	unsigned long flags;
L
Linus Torvalds 已提交
108

S
Stephen Hemminger 已提交
109 110
	while ((skb = skb_dequeue(&npinfo->txq))) {
		struct net_device *dev = skb->dev;
111
		struct netdev_queue *txq;
L
Linus Torvalds 已提交
112

S
Stephen Hemminger 已提交
113
		if (!netif_device_present(dev) || !netif_running(dev)) {
114
			kfree_skb(skb);
S
Stephen Hemminger 已提交
115 116
			continue;
		}
L
Linus Torvalds 已提交
117

118
		txq = skb_get_tx_queue(dev, skb);
119

I
Ingo Molnar 已提交
120
		local_irq_save(flags);
121
		HARD_TX_LOCK(dev, txq, smp_processor_id());
122
		if (netif_xmit_frozen_or_stopped(txq) ||
123
		    netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
124
			skb_queue_head(&npinfo->txq, skb);
125
			HARD_TX_UNLOCK(dev, txq);
I
Ingo Molnar 已提交
126
			local_irq_restore(flags);
L
Linus Torvalds 已提交
127

128
			schedule_delayed_work(&npinfo->tx_work, HZ/10);
S
Stephen Hemminger 已提交
129 130
			return;
		}
131
		HARD_TX_UNLOCK(dev, txq);
I
Ingo Molnar 已提交
132
		local_irq_restore(flags);
L
Linus Torvalds 已提交
133 134 135 136 137 138 139 140 141 142 143 144 145 146
	}
}

/*
 * Check whether delayed processing was scheduled for our NIC. If so,
 * we attempt to grab the poll lock and use ->poll() to pump the card.
 * If this fails, either we've recursed in ->poll() or it's already
 * running on another CPU.
 *
 * Note: we don't mask interrupts with this lock because we're using
 * trylock here and interrupts are already disabled in the softirq
 * case. Further, we test the poll_owner to avoid recursion on UP
 * systems where the lock doesn't exist.
 */
147
static int poll_one_napi(struct napi_struct *napi, int budget)
148 149 150 151 152 153 154 155 156 157
{
	int work;

	/* net_rx_action's ->poll() invocations and our's are
	 * synchronized by this test which is only made while
	 * holding the napi->poll_lock.
	 */
	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
		return budget;

158
	set_bit(NAPI_STATE_NPSVC, &napi->state);
159 160

	work = napi->poll(napi, budget);
161
	WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
162
	trace_napi_poll(napi);
163

164
	clear_bit(NAPI_STATE_NPSVC, &napi->state);
165 166 167 168

	return budget - work;
}

169
static void poll_napi(struct net_device *dev, int budget)
L
Linus Torvalds 已提交
170
{
171
	struct napi_struct *napi;
L
Linus Torvalds 已提交
172

173
	list_for_each_entry(napi, &dev->napi_list, dev_list) {
174
		if (napi->poll_owner != smp_processor_id() &&
175
		    spin_trylock(&napi->poll_lock)) {
176
			budget = poll_one_napi(napi, budget);
177 178
			spin_unlock(&napi->poll_lock);
		}
L
Linus Torvalds 已提交
179 180 181
	}
}

182
static void netpoll_poll_dev(struct net_device *dev)
L
Linus Torvalds 已提交
183
{
184
	const struct net_device_ops *ops;
185
	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
186
	int budget = 0;
187

188 189 190 191
	/* Don't do any rx activity if the dev_lock mutex is held
	 * the dev_open/close paths use this to block netpoll activity
	 * while changing device state
	 */
192
	if (down_trylock(&ni->dev_lock))
193 194
		return;

195
	if (!netif_running(dev)) {
196
		up(&ni->dev_lock);
197
		return;
198
	}
199 200

	ops = dev->netdev_ops;
201
	if (!ops->ndo_poll_controller) {
202
		up(&ni->dev_lock);
L
Linus Torvalds 已提交
203
		return;
204
	}
L
Linus Torvalds 已提交
205 206

	/* Process pending work on NIC */
207
	ops->ndo_poll_controller(dev);
208

209
	poll_napi(dev, budget);
L
Linus Torvalds 已提交
210

211
	up(&ni->dev_lock);
212

213
	zap_completion_queue();
L
Linus Torvalds 已提交
214 215
}

216
void netpoll_poll_disable(struct net_device *dev)
217 218 219 220 221 222 223
{
	struct netpoll_info *ni;
	int idx;
	might_sleep();
	idx = srcu_read_lock(&netpoll_srcu);
	ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
	if (ni)
224
		down(&ni->dev_lock);
225 226
	srcu_read_unlock(&netpoll_srcu, idx);
}
227
EXPORT_SYMBOL(netpoll_poll_disable);
228

229
void netpoll_poll_enable(struct net_device *dev)
230 231 232 233 234
{
	struct netpoll_info *ni;
	rcu_read_lock();
	ni = rcu_dereference(dev->npinfo);
	if (ni)
235
		up(&ni->dev_lock);
236 237
	rcu_read_unlock();
}
238
EXPORT_SYMBOL(netpoll_poll_enable);
239

L
Linus Torvalds 已提交
240 241 242 243 244
static void refill_skbs(void)
{
	struct sk_buff *skb;
	unsigned long flags;

245 246
	spin_lock_irqsave(&skb_pool.lock, flags);
	while (skb_pool.qlen < MAX_SKBS) {
L
Linus Torvalds 已提交
247 248 249 250
		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
		if (!skb)
			break;

251
		__skb_queue_tail(&skb_pool, skb);
L
Linus Torvalds 已提交
252
	}
253
	spin_unlock_irqrestore(&skb_pool.lock, flags);
L
Linus Torvalds 已提交
254 255
}

256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271
static void zap_completion_queue(void)
{
	unsigned long flags;
	struct softnet_data *sd = &get_cpu_var(softnet_data);

	if (sd->completion_queue) {
		struct sk_buff *clist;

		local_irq_save(flags);
		clist = sd->completion_queue;
		sd->completion_queue = NULL;
		local_irq_restore(flags);

		while (clist != NULL) {
			struct sk_buff *skb = clist;
			clist = clist->next;
272
			if (!skb_irq_freeable(skb)) {
273 274 275 276 277 278 279 280 281 282 283
				atomic_inc(&skb->users);
				dev_kfree_skb_any(skb); /* put this one back */
			} else {
				__kfree_skb(skb);
			}
		}
	}

	put_cpu_var(softnet_data);
}

284
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
L
Linus Torvalds 已提交
285
{
286 287
	int count = 0;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
288

289
	zap_completion_queue();
290
	refill_skbs();
L
Linus Torvalds 已提交
291 292 293
repeat:

	skb = alloc_skb(len, GFP_ATOMIC);
294 295
	if (!skb)
		skb = skb_dequeue(&skb_pool);
L
Linus Torvalds 已提交
296 297

	if (!skb) {
298
		if (++count < 10) {
299
			netpoll_poll_dev(np->dev);
300
			goto repeat;
L
Linus Torvalds 已提交
301
		}
302
		return NULL;
L
Linus Torvalds 已提交
303 304 305 306 307 308 309
	}

	atomic_set(&skb->users, 1);
	skb_reserve(skb, reserve);
	return skb;
}

310 311 312 313 314 315 316 317 318 319 320
static int netpoll_owner_active(struct net_device *dev)
{
	struct napi_struct *napi;

	list_for_each_entry(napi, &dev->napi_list, dev_list) {
		if (napi->poll_owner == smp_processor_id())
			return 1;
	}
	return 0;
}

321
/* call with IRQ disabled */
322 323
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
			     struct net_device *dev)
L
Linus Torvalds 已提交
324
{
S
Stephen Hemminger 已提交
325 326
	int status = NETDEV_TX_BUSY;
	unsigned long tries;
H
Herbert Xu 已提交
327
	/* It is up to the caller to keep npinfo alive. */
328
	struct netpoll_info *npinfo;
S
Stephen Hemminger 已提交
329

330 331 332
	WARN_ON_ONCE(!irqs_disabled());

	npinfo = rcu_dereference_bh(np->dev->npinfo);
333
	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
334
		dev_kfree_skb_irq(skb);
335 336
		return;
	}
S
Stephen Hemminger 已提交
337 338

	/* don't get messages out of order, and no recursion */
339
	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
340
		struct netdev_queue *txq;
341

342
		txq = netdev_pick_tx(dev, skb, NULL);
343

344 345 346
		/* try until next clock tick */
		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
		     tries > 0; --tries) {
347
			if (HARD_TX_TRYLOCK(dev, txq)) {
348 349 350
				if (!netif_xmit_stopped(txq))
					status = netpoll_start_xmit(skb, dev, txq);

351
				HARD_TX_UNLOCK(dev, txq);
352 353 354 355 356

				if (status == NETDEV_TX_OK)
					break;

			}
357 358

			/* tickle device maybe there is some cleanup */
359
			netpoll_poll_dev(np->dev);
360 361

			udelay(USEC_PER_POLL);
M
Matt Mackall 已提交
362
		}
363 364

		WARN_ONCE(!irqs_disabled(),
365
			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
366
			dev->name, dev->netdev_ops->ndo_start_xmit);
367

L
Linus Torvalds 已提交
368 369
	}

S
Stephen Hemminger 已提交
370
	if (status != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
371
		skb_queue_tail(&npinfo->txq, skb);
372
		schedule_delayed_work(&npinfo->tx_work,0);
L
Linus Torvalds 已提交
373 374
	}
}
375
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
L
Linus Torvalds 已提交
376 377 378

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
379
	int total_len, ip_len, udp_len;
L
Linus Torvalds 已提交
380 381 382 383
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;
384
	static atomic_t ip_ident;
C
Cong Wang 已提交
385
	struct ipv6hdr *ip6h;
L
Linus Torvalds 已提交
386 387

	udp_len = len + sizeof(*udph);
C
Cong Wang 已提交
388 389 390
	if (np->ipv6)
		ip_len = udp_len + sizeof(*ip6h);
	else
C
Cong Wang 已提交
391 392
		ip_len = udp_len + sizeof(*iph);

393
	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
L
Linus Torvalds 已提交
394

395 396
	skb = find_skb(np, total_len + np->dev->needed_tailroom,
		       total_len - len);
L
Linus Torvalds 已提交
397 398 399
	if (!skb)
		return;

400
	skb_copy_to_linear_data(skb, msg, len);
401
	skb_put(skb, len);
L
Linus Torvalds 已提交
402

403 404 405
	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
L
Linus Torvalds 已提交
406 407 408
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);
C
Cong Wang 已提交
409

C
Cong Wang 已提交
410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
	if (np->ipv6) {
		udph->check = 0;
		udph->check = csum_ipv6_magic(&np->local_ip.in6,
					      &np->remote_ip.in6,
					      udp_len, IPPROTO_UDP,
					      csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*ip6h));
		skb_reset_network_header(skb);
		ip6h = ipv6_hdr(skb);

		/* ip6h->version = 6; ip6h->priority = 0; */
		put_unaligned(0x60, (unsigned char *)ip6h);
		ip6h->flow_lbl[0] = 0;
		ip6h->flow_lbl[1] = 0;
		ip6h->flow_lbl[2] = 0;

		ip6h->payload_len = htons(sizeof(struct udphdr) + len);
		ip6h->nexthdr = IPPROTO_UDP;
		ip6h->hop_limit = 32;
		ip6h->saddr = np->local_ip.in6;
		ip6h->daddr = np->remote_ip.in6;

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
	} else {
C
Cong Wang 已提交
439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468
		udph->check = 0;
		udph->check = csum_tcpudp_magic(np->local_ip.ip,
						np->remote_ip.ip,
						udp_len, IPPROTO_UDP,
						csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*iph));
		skb_reset_network_header(skb);
		iph = ip_hdr(skb);

		/* iph->version = 4; iph->ihl = 5; */
		put_unaligned(0x45, (unsigned char *)iph);
		iph->tos      = 0;
		put_unaligned(htons(ip_len), &(iph->tot_len));
		iph->id       = htons(atomic_inc_return(&ip_ident));
		iph->frag_off = 0;
		iph->ttl      = 64;
		iph->protocol = IPPROTO_UDP;
		iph->check    = 0;
		put_unaligned(np->local_ip.ip, &(iph->saddr));
		put_unaligned(np->remote_ip.ip, &(iph->daddr));
		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IP);
	}

J
Joe Perches 已提交
469 470
	ether_addr_copy(eth->h_source, np->dev->dev_addr);
	ether_addr_copy(eth->h_dest, np->remote_mac);
L
Linus Torvalds 已提交
471 472 473 474 475

	skb->dev = np->dev;

	netpoll_send_skb(np, skb);
}
E
Eric Dumazet 已提交
476
EXPORT_SYMBOL(netpoll_send_udp);
L
Linus Torvalds 已提交
477

478 479
void netpoll_print_options(struct netpoll *np)
{
480
	np_info(np, "local port %d\n", np->local_port);
C
Cong Wang 已提交
481 482 483
	if (np->ipv6)
		np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
	else
C
Cong Wang 已提交
484
		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
485 486
	np_info(np, "interface '%s'\n", np->dev_name);
	np_info(np, "remote port %d\n", np->remote_port);
C
Cong Wang 已提交
487 488 489
	if (np->ipv6)
		np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
	else
C
Cong Wang 已提交
490
		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
491
	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
492
}
E
Eric Dumazet 已提交
493
EXPORT_SYMBOL(netpoll_print_options);
494

C
Cong Wang 已提交
495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514
static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
{
	const char *end;

	if (!strchr(str, ':') &&
	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
		if (!*end)
			return 0;
	}
	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
#if IS_ENABLED(CONFIG_IPV6)
		if (!*end)
			return 1;
#else
		return -1;
#endif
	}
	return -1;
}

L
Linus Torvalds 已提交
515 516 517
int netpoll_parse_options(struct netpoll *np, char *opt)
{
	char *cur=opt, *delim;
C
Cong Wang 已提交
518
	int ipv6;
519
	bool ipversion_set = false;
L
Linus Torvalds 已提交
520

521
	if (*cur != '@') {
L
Linus Torvalds 已提交
522 523
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
524
		*delim = 0;
525 526
		if (kstrtou16(cur, 10, &np->local_port))
			goto parse_failed;
527
		cur = delim;
L
Linus Torvalds 已提交
528 529 530
	}
	cur++;

531
	if (*cur != '/') {
532
		ipversion_set = true;
L
Linus Torvalds 已提交
533 534
		if ((delim = strchr(cur, '/')) == NULL)
			goto parse_failed;
535
		*delim = 0;
C
Cong Wang 已提交
536 537 538 539 540
		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
		if (ipv6 < 0)
			goto parse_failed;
		else
			np->ipv6 = (bool)ipv6;
541
		cur = delim;
L
Linus Torvalds 已提交
542 543 544
	}
	cur++;

545
	if (*cur != ',') {
L
Linus Torvalds 已提交
546 547 548
		/* parse out dev name */
		if ((delim = strchr(cur, ',')) == NULL)
			goto parse_failed;
549
		*delim = 0;
L
Linus Torvalds 已提交
550
		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
551
		cur = delim;
L
Linus Torvalds 已提交
552 553 554
	}
	cur++;

555
	if (*cur != '@') {
L
Linus Torvalds 已提交
556 557 558
		/* dst port */
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
559
		*delim = 0;
560
		if (*cur == ' ' || *cur == '\t')
561
			np_info(np, "warning: whitespace is not allowed\n");
562 563
		if (kstrtou16(cur, 10, &np->remote_port))
			goto parse_failed;
564
		cur = delim;
L
Linus Torvalds 已提交
565 566 567 568 569 570
	}
	cur++;

	/* dst ip */
	if ((delim = strchr(cur, '/')) == NULL)
		goto parse_failed;
571
	*delim = 0;
C
Cong Wang 已提交
572 573 574
	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
	if (ipv6 < 0)
		goto parse_failed;
575
	else if (ipversion_set && np->ipv6 != (bool)ipv6)
C
Cong Wang 已提交
576 577 578
		goto parse_failed;
	else
		np->ipv6 = (bool)ipv6;
579
	cur = delim + 1;
L
Linus Torvalds 已提交
580

581
	if (*cur != 0) {
L
Linus Torvalds 已提交
582
		/* MAC address */
583
		if (!mac_pton(cur, np->remote_mac))
L
Linus Torvalds 已提交
584 585 586
			goto parse_failed;
	}

587
	netpoll_print_options(np);
L
Linus Torvalds 已提交
588 589 590 591

	return 0;

 parse_failed:
592
	np_info(np, "couldn't parse config at '%s'!\n", cur);
L
Linus Torvalds 已提交
593 594
	return -1;
}
E
Eric Dumazet 已提交
595
EXPORT_SYMBOL(netpoll_parse_options);
L
Linus Torvalds 已提交
596

597
int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
L
Linus Torvalds 已提交
598
{
599
	struct netpoll_info *npinfo;
H
Herbert Xu 已提交
600
	const struct net_device_ops *ops;
S
Stephen Hemminger 已提交
601
	int err;
L
Linus Torvalds 已提交
602

603 604
	np->dev = ndev;
	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
605
	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
606

607 608
	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
	    !ndev->netdev_ops->ndo_poll_controller) {
609 610
		np_err(np, "%s doesn't support polling, aborting\n",
		       np->dev_name);
611 612 613 614 615
		err = -ENOTSUPP;
		goto out;
	}

	if (!ndev->npinfo) {
616
		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
617 618 619 620 621
		if (!npinfo) {
			err = -ENOMEM;
			goto out;
		}

622
		sema_init(&npinfo->dev_lock, 1);
623 624 625 626 627 628 629
		skb_queue_head_init(&npinfo->txq);
		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

		atomic_set(&npinfo->refcnt, 1);

		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_setup) {
630
			err = ops->ndo_netpoll_setup(ndev, npinfo);
631 632 633 634
			if (err)
				goto free_npinfo;
		}
	} else {
N
Neil Horman 已提交
635
		npinfo = rtnl_dereference(ndev->npinfo);
636 637 638 639 640 641
		atomic_inc(&npinfo->refcnt);
	}

	npinfo->netpoll = np;

	/* last thing to do is link it to the net device structure */
642
	rcu_assign_pointer(ndev->npinfo, npinfo);
643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658

	return 0;

free_npinfo:
	kfree(npinfo);
out:
	return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
	struct net_device *ndev = NULL;
	struct in_device *in_dev;
	int err;

659
	rtnl_lock();
660 661 662 663
	if (np->dev_name) {
		struct net *net = current->nsproxy->net_ns;
		ndev = __dev_get_by_name(net, np->dev_name);
	}
L
Linus Torvalds 已提交
664
	if (!ndev) {
665
		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
666 667
		err = -ENODEV;
		goto unlock;
L
Linus Torvalds 已提交
668
	}
669
	dev_hold(ndev);
L
Linus Torvalds 已提交
670

671
	if (netdev_master_upper_dev_get(ndev)) {
672
		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
673 674
		err = -EBUSY;
		goto put;
675 676
	}

L
Linus Torvalds 已提交
677 678 679
	if (!netif_running(ndev)) {
		unsigned long atmost, atleast;

680
		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
L
Linus Torvalds 已提交
681

S
Stephen Hemminger 已提交
682 683 684
		err = dev_open(ndev);

		if (err) {
685
			np_err(np, "failed to open %s\n", ndev->name);
686
			goto put;
L
Linus Torvalds 已提交
687 688
		}

689
		rtnl_unlock();
L
Linus Torvalds 已提交
690
		atleast = jiffies + HZ/10;
691
		atmost = jiffies + carrier_timeout * HZ;
L
Linus Torvalds 已提交
692 693
		while (!netif_carrier_ok(ndev)) {
			if (time_after(jiffies, atmost)) {
694
				np_notice(np, "timeout waiting for carrier\n");
L
Linus Torvalds 已提交
695 696
				break;
			}
697
			msleep(1);
L
Linus Torvalds 已提交
698 699 700 701 702 703 704 705
		}

		/* If carrier appears to come up instantly, we don't
		 * trust it and pause so that we don't pump all our
		 * queued console messages into the bitbucket.
		 */

		if (time_before(jiffies, atleast)) {
706
			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
L
Linus Torvalds 已提交
707 708
			msleep(4000);
		}
709
		rtnl_lock();
L
Linus Torvalds 已提交
710 711
	}

C
Cong Wang 已提交
712 713
	if (!np->local_ip.ip) {
		if (!np->ipv6) {
714
			in_dev = __in_dev_get_rtnl(ndev);
C
Cong Wang 已提交
715 716 717 718 719 720 721 722 723 724

			if (!in_dev || !in_dev->ifa_list) {
				np_err(np, "no IP address for %s, aborting\n",
				       np->dev_name);
				err = -EDESTADDRREQ;
				goto put;
			}

			np->local_ip.ip = in_dev->ifa_list->ifa_local;
			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
C
Cong Wang 已提交
725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752
		} else {
#if IS_ENABLED(CONFIG_IPV6)
			struct inet6_dev *idev;

			err = -EDESTADDRREQ;
			idev = __in6_dev_get(ndev);
			if (idev) {
				struct inet6_ifaddr *ifp;

				read_lock_bh(&idev->lock);
				list_for_each_entry(ifp, &idev->addr_list, if_list) {
					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
						continue;
					np->local_ip.in6 = ifp->addr;
					err = 0;
					break;
				}
				read_unlock_bh(&idev->lock);
			}
			if (err) {
				np_err(np, "no IPv6 address for %s, aborting\n",
				       np->dev_name);
				goto put;
			} else
				np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
#else
			np_err(np, "IPv6 is not supported %s, aborting\n",
			       np->dev_name);
753
			err = -EINVAL;
C
Cong Wang 已提交
754 755
			goto put;
#endif
L
Linus Torvalds 已提交
756 757 758
		}
	}

759 760 761
	/* fill up the skb queue */
	refill_skbs();

762
	err = __netpoll_setup(np, ndev);
763 764 765
	if (err)
		goto put;

766
	rtnl_unlock();
L
Linus Torvalds 已提交
767 768
	return 0;

769
put:
L
Linus Torvalds 已提交
770
	dev_put(ndev);
771 772
unlock:
	rtnl_unlock();
S
Stephen Hemminger 已提交
773
	return err;
L
Linus Torvalds 已提交
774
}
E
Eric Dumazet 已提交
775
EXPORT_SYMBOL(netpoll_setup);
L
Linus Torvalds 已提交
776

777 778
static int __init netpoll_init(void)
{
779 780 781 782 783
	skb_queue_head_init(&skb_pool);
	return 0;
}
core_initcall(netpoll_init);

784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800
static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
{
	struct netpoll_info *npinfo =
			container_of(rcu_head, struct netpoll_info, rcu);

	skb_queue_purge(&npinfo->txq);

	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
	cancel_delayed_work(&npinfo->tx_work);

	/* clean after last, unfinished work */
	__skb_queue_purge(&npinfo->txq);
	/* now cancel it again */
	cancel_delayed_work(&npinfo->tx_work);
	kfree(npinfo);
}

801
void __netpoll_cleanup(struct netpoll *np)
L
Linus Torvalds 已提交
802
{
803 804
	struct netpoll_info *npinfo;

N
Neil Horman 已提交
805 806 807 808 809
	/* rtnl_dereference would be preferable here but
	 * rcu_cleanup_netpoll path can put us in here safely without
	 * holding the rtnl, so plain rcu_dereference it is
	 */
	npinfo = rtnl_dereference(np->dev->npinfo);
810
	if (!npinfo)
811
		return;
S
Stephen Hemminger 已提交
812

813 814
	synchronize_srcu(&netpoll_srcu);

815 816
	if (atomic_dec_and_test(&npinfo->refcnt)) {
		const struct net_device_ops *ops;
H
Herbert Xu 已提交
817

818 819 820
		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_cleanup)
			ops->ndo_netpoll_cleanup(np->dev);
H
Herbert Xu 已提交
821

822
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
823
		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
D
david decotigny 已提交
824 825
	} else
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
826 827
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
H
Herbert Xu 已提交
828

829
static void netpoll_async_cleanup(struct work_struct *work)
830
{
831
	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
S
Stephen Hemminger 已提交
832

833
	rtnl_lock();
834
	__netpoll_cleanup(np);
835
	rtnl_unlock();
836 837
	kfree(np);
}
S
Stephen Hemminger 已提交
838

839
void __netpoll_free_async(struct netpoll *np)
840
{
841
	schedule_work(&np->cleanup_work);
842
}
843
EXPORT_SYMBOL_GPL(__netpoll_free_async);
844

845 846 847
void netpoll_cleanup(struct netpoll *np)
{
	rtnl_lock();
848 849
	if (!np->dev)
		goto out;
850 851
	__netpoll_cleanup(np);
	dev_put(np->dev);
L
Linus Torvalds 已提交
852
	np->dev = NULL;
853 854
out:
	rtnl_unlock();
L
Linus Torvalds 已提交
855
}
E
Eric Dumazet 已提交
856
EXPORT_SYMBOL(netpoll_cleanup);