netpoll.c 19.3 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11
/*
 * Common framework for low-level network console, dump, and debugger code
 *
 * Sep 8 2003  Matt Mackall <mpm@selenic.com>
 *
 * based on the netconsole code from:
 *
 * Copyright (C) 2001  Ingo Molnar <mingo@redhat.com>
 * Copyright (C) 2002  Red Hat, Inc.
 */

12 13
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt

14
#include <linux/moduleparam.h>
15
#include <linux/kernel.h>
L
Linus Torvalds 已提交
16 17 18
#include <linux/netdevice.h>
#include <linux/etherdevice.h>
#include <linux/string.h>
19
#include <linux/if_arp.h>
L
Linus Torvalds 已提交
20 21 22 23 24 25 26 27
#include <linux/inetdevice.h>
#include <linux/inet.h>
#include <linux/interrupt.h>
#include <linux/netpoll.h>
#include <linux/sched.h>
#include <linux/delay.h>
#include <linux/rcupdate.h>
#include <linux/workqueue.h>
28
#include <linux/slab.h>
29
#include <linux/export.h>
30
#include <linux/if_vlan.h>
L
Linus Torvalds 已提交
31 32
#include <net/tcp.h>
#include <net/udp.h>
C
Cong Wang 已提交
33 34 35
#include <net/addrconf.h>
#include <net/ndisc.h>
#include <net/ip6_checksum.h>
L
Linus Torvalds 已提交
36
#include <asm/unaligned.h>
37
#include <trace/events/napi.h>
L
Linus Torvalds 已提交
38 39 40 41 42 43 44 45 46

/*
 * We maintain a small pool of fully-sized skbs, to make sure the
 * message gets out even in extreme OOM situations.
 */

#define MAX_UDP_CHUNK 1460
#define MAX_SKBS 32

47
static struct sk_buff_head skb_pool;
L
Linus Torvalds 已提交
48

49
DEFINE_STATIC_SRCU(netpoll_srcu);
50

S
Stephen Hemminger 已提交
51
#define USEC_PER_POLL	50
L
Linus Torvalds 已提交
52

J
Joe Perches 已提交
53 54 55 56 57
#define MAX_SKB_SIZE							\
	(sizeof(struct ethhdr) +					\
	 sizeof(struct iphdr) +						\
	 sizeof(struct udphdr) +					\
	 MAX_UDP_CHUNK)
L
Linus Torvalds 已提交
58

59
static void zap_completion_queue(void);
60
static void netpoll_async_cleanup(struct work_struct *work);
L
Linus Torvalds 已提交
61

62 63 64
static unsigned int carrier_timeout = 4;
module_param(carrier_timeout, uint, 0644);

65 66 67 68 69 70 71
#define np_info(np, fmt, ...)				\
	pr_info("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_err(np, fmt, ...)				\
	pr_err("%s: " fmt, np->name, ##__VA_ARGS__)
#define np_notice(np, fmt, ...)				\
	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)

72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102
static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
			      struct netdev_queue *txq)
{
	const struct net_device_ops *ops = dev->netdev_ops;
	int status = NETDEV_TX_OK;
	netdev_features_t features;

	features = netif_skb_features(skb);

	if (vlan_tx_tag_present(skb) &&
	    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
		skb = __vlan_put_tag(skb, skb->vlan_proto,
				     vlan_tx_tag_get(skb));
		if (unlikely(!skb)) {
			/* This is actually a packet drop, but we
			 * don't want the code that calls this
			 * function to try and operate on a NULL skb.
			 */
			goto out;
		}
		skb->vlan_tci = 0;
	}

	status = ops->ndo_start_xmit(skb, dev);
	if (status == NETDEV_TX_OK)
		txq_trans_update(txq);

out:
	return status;
}

D
David Howells 已提交
103
static void queue_process(struct work_struct *work)
L
Linus Torvalds 已提交
104
{
105 106
	struct netpoll_info *npinfo =
		container_of(work, struct netpoll_info, tx_work.work);
L
Linus Torvalds 已提交
107
	struct sk_buff *skb;
I
Ingo Molnar 已提交
108
	unsigned long flags;
L
Linus Torvalds 已提交
109

S
Stephen Hemminger 已提交
110 111
	while ((skb = skb_dequeue(&npinfo->txq))) {
		struct net_device *dev = skb->dev;
112
		struct netdev_queue *txq;
L
Linus Torvalds 已提交
113

S
Stephen Hemminger 已提交
114 115 116 117
		if (!netif_device_present(dev) || !netif_running(dev)) {
			__kfree_skb(skb);
			continue;
		}
L
Linus Torvalds 已提交
118

119 120
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));

I
Ingo Molnar 已提交
121
		local_irq_save(flags);
122
		__netif_tx_lock(txq, smp_processor_id());
123
		if (netif_xmit_frozen_or_stopped(txq) ||
124
		    netpoll_start_xmit(skb, dev, txq) != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
125
			skb_queue_head(&npinfo->txq, skb);
126
			__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
127
			local_irq_restore(flags);
L
Linus Torvalds 已提交
128

129
			schedule_delayed_work(&npinfo->tx_work, HZ/10);
S
Stephen Hemminger 已提交
130 131
			return;
		}
132
		__netif_tx_unlock(txq);
I
Ingo Molnar 已提交
133
		local_irq_restore(flags);
L
Linus Torvalds 已提交
134 135 136 137 138 139 140 141 142 143 144 145 146 147
	}
}

/*
 * Check whether delayed processing was scheduled for our NIC. If so,
 * we attempt to grab the poll lock and use ->poll() to pump the card.
 * If this fails, either we've recursed in ->poll() or it's already
 * running on another CPU.
 *
 * Note: we don't mask interrupts with this lock because we're using
 * trylock here and interrupts are already disabled in the softirq
 * case. Further, we test the poll_owner to avoid recursion on UP
 * systems where the lock doesn't exist.
 */
148
static int poll_one_napi(struct napi_struct *napi, int budget)
149 150 151 152 153 154 155 156 157 158
{
	int work;

	/* net_rx_action's ->poll() invocations and our's are
	 * synchronized by this test which is only made while
	 * holding the napi->poll_lock.
	 */
	if (!test_bit(NAPI_STATE_SCHED, &napi->state))
		return budget;

159
	set_bit(NAPI_STATE_NPSVC, &napi->state);
160 161

	work = napi->poll(napi, budget);
162
	WARN_ONCE(work > budget, "%pF exceeded budget in poll\n", napi->poll);
163
	trace_napi_poll(napi);
164

165
	clear_bit(NAPI_STATE_NPSVC, &napi->state);
166 167 168 169

	return budget - work;
}

170
static void poll_napi(struct net_device *dev, int budget)
L
Linus Torvalds 已提交
171
{
172
	struct napi_struct *napi;
L
Linus Torvalds 已提交
173

174
	list_for_each_entry(napi, &dev->napi_list, dev_list) {
175
		if (napi->poll_owner != smp_processor_id() &&
176
		    spin_trylock(&napi->poll_lock)) {
177
			budget = poll_one_napi(napi, budget);
178 179
			spin_unlock(&napi->poll_lock);
		}
L
Linus Torvalds 已提交
180 181 182
	}
}

183
static void netpoll_poll_dev(struct net_device *dev)
L
Linus Torvalds 已提交
184
{
185
	const struct net_device_ops *ops;
186
	struct netpoll_info *ni = rcu_dereference_bh(dev->npinfo);
187
	int budget = 0;
188

189 190 191 192
	/* Don't do any rx activity if the dev_lock mutex is held
	 * the dev_open/close paths use this to block netpoll activity
	 * while changing device state
	 */
193
	if (down_trylock(&ni->dev_lock))
194 195
		return;

196
	if (!netif_running(dev)) {
197
		up(&ni->dev_lock);
198
		return;
199
	}
200 201

	ops = dev->netdev_ops;
202
	if (!ops->ndo_poll_controller) {
203
		up(&ni->dev_lock);
L
Linus Torvalds 已提交
204
		return;
205
	}
L
Linus Torvalds 已提交
206 207

	/* Process pending work on NIC */
208
	ops->ndo_poll_controller(dev);
209

210
	poll_napi(dev, budget);
L
Linus Torvalds 已提交
211

212
	up(&ni->dev_lock);
213

214
	zap_completion_queue();
L
Linus Torvalds 已提交
215 216
}

217
void netpoll_rx_disable(struct net_device *dev)
218 219 220 221 222 223 224
{
	struct netpoll_info *ni;
	int idx;
	might_sleep();
	idx = srcu_read_lock(&netpoll_srcu);
	ni = srcu_dereference(dev->npinfo, &netpoll_srcu);
	if (ni)
225
		down(&ni->dev_lock);
226 227 228 229 230 231 232 233 234 235
	srcu_read_unlock(&netpoll_srcu, idx);
}
EXPORT_SYMBOL(netpoll_rx_disable);

void netpoll_rx_enable(struct net_device *dev)
{
	struct netpoll_info *ni;
	rcu_read_lock();
	ni = rcu_dereference(dev->npinfo);
	if (ni)
236
		up(&ni->dev_lock);
237 238 239 240
	rcu_read_unlock();
}
EXPORT_SYMBOL(netpoll_rx_enable);

L
Linus Torvalds 已提交
241 242 243 244 245
static void refill_skbs(void)
{
	struct sk_buff *skb;
	unsigned long flags;

246 247
	spin_lock_irqsave(&skb_pool.lock, flags);
	while (skb_pool.qlen < MAX_SKBS) {
L
Linus Torvalds 已提交
248 249 250 251
		skb = alloc_skb(MAX_SKB_SIZE, GFP_ATOMIC);
		if (!skb)
			break;

252
		__skb_queue_tail(&skb_pool, skb);
L
Linus Torvalds 已提交
253
	}
254
	spin_unlock_irqrestore(&skb_pool.lock, flags);
L
Linus Torvalds 已提交
255 256
}

257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284
static void zap_completion_queue(void)
{
	unsigned long flags;
	struct softnet_data *sd = &get_cpu_var(softnet_data);

	if (sd->completion_queue) {
		struct sk_buff *clist;

		local_irq_save(flags);
		clist = sd->completion_queue;
		sd->completion_queue = NULL;
		local_irq_restore(flags);

		while (clist != NULL) {
			struct sk_buff *skb = clist;
			clist = clist->next;
			if (skb->destructor) {
				atomic_inc(&skb->users);
				dev_kfree_skb_any(skb); /* put this one back */
			} else {
				__kfree_skb(skb);
			}
		}
	}

	put_cpu_var(softnet_data);
}

285
static struct sk_buff *find_skb(struct netpoll *np, int len, int reserve)
L
Linus Torvalds 已提交
286
{
287 288
	int count = 0;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
289

290
	zap_completion_queue();
291
	refill_skbs();
L
Linus Torvalds 已提交
292 293 294
repeat:

	skb = alloc_skb(len, GFP_ATOMIC);
295 296
	if (!skb)
		skb = skb_dequeue(&skb_pool);
L
Linus Torvalds 已提交
297 298

	if (!skb) {
299
		if (++count < 10) {
300
			netpoll_poll_dev(np->dev);
301
			goto repeat;
L
Linus Torvalds 已提交
302
		}
303
		return NULL;
L
Linus Torvalds 已提交
304 305 306 307 308 309 310
	}

	atomic_set(&skb->users, 1);
	skb_reserve(skb, reserve);
	return skb;
}

311 312 313 314 315 316 317 318 319 320 321
static int netpoll_owner_active(struct net_device *dev)
{
	struct napi_struct *napi;

	list_for_each_entry(napi, &dev->napi_list, dev_list) {
		if (napi->poll_owner == smp_processor_id())
			return 1;
	}
	return 0;
}

322
/* call with IRQ disabled */
323 324
void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
			     struct net_device *dev)
L
Linus Torvalds 已提交
325
{
S
Stephen Hemminger 已提交
326 327
	int status = NETDEV_TX_BUSY;
	unsigned long tries;
H
Herbert Xu 已提交
328
	/* It is up to the caller to keep npinfo alive. */
329
	struct netpoll_info *npinfo;
S
Stephen Hemminger 已提交
330

331 332 333
	WARN_ON_ONCE(!irqs_disabled());

	npinfo = rcu_dereference_bh(np->dev->npinfo);
334 335 336 337
	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
		__kfree_skb(skb);
		return;
	}
S
Stephen Hemminger 已提交
338 339

	/* don't get messages out of order, and no recursion */
340
	if (skb_queue_len(&npinfo->txq) == 0 && !netpoll_owner_active(dev)) {
341
		struct netdev_queue *txq;
342

343
		txq = netdev_pick_tx(dev, skb, NULL);
344

345 346 347
		/* try until next clock tick */
		for (tries = jiffies_to_usecs(1)/USEC_PER_POLL;
		     tries > 0; --tries) {
348
			if (__netif_tx_trylock(txq)) {
349 350 351
				if (!netif_xmit_stopped(txq))
					status = netpoll_start_xmit(skb, dev, txq);

352
				__netif_tx_unlock(txq);
353 354 355 356 357

				if (status == NETDEV_TX_OK)
					break;

			}
358 359

			/* tickle device maybe there is some cleanup */
360
			netpoll_poll_dev(np->dev);
361 362

			udelay(USEC_PER_POLL);
M
Matt Mackall 已提交
363
		}
364 365

		WARN_ONCE(!irqs_disabled(),
366
			"netpoll_send_skb_on_dev(): %s enabled interrupts in poll (%pF)\n",
367
			dev->name, dev->netdev_ops->ndo_start_xmit);
368

L
Linus Torvalds 已提交
369 370
	}

S
Stephen Hemminger 已提交
371
	if (status != NETDEV_TX_OK) {
S
Stephen Hemminger 已提交
372
		skb_queue_tail(&npinfo->txq, skb);
373
		schedule_delayed_work(&npinfo->tx_work,0);
L
Linus Torvalds 已提交
374 375
	}
}
376
EXPORT_SYMBOL(netpoll_send_skb_on_dev);
L
Linus Torvalds 已提交
377 378 379

void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
{
380
	int total_len, ip_len, udp_len;
L
Linus Torvalds 已提交
381 382 383 384
	struct sk_buff *skb;
	struct udphdr *udph;
	struct iphdr *iph;
	struct ethhdr *eth;
385
	static atomic_t ip_ident;
C
Cong Wang 已提交
386
	struct ipv6hdr *ip6h;
L
Linus Torvalds 已提交
387 388

	udp_len = len + sizeof(*udph);
C
Cong Wang 已提交
389 390 391
	if (np->ipv6)
		ip_len = udp_len + sizeof(*ip6h);
	else
C
Cong Wang 已提交
392 393
		ip_len = udp_len + sizeof(*iph);

394
	total_len = ip_len + LL_RESERVED_SPACE(np->dev);
L
Linus Torvalds 已提交
395

396 397
	skb = find_skb(np, total_len + np->dev->needed_tailroom,
		       total_len - len);
L
Linus Torvalds 已提交
398 399 400
	if (!skb)
		return;

401
	skb_copy_to_linear_data(skb, msg, len);
402
	skb_put(skb, len);
L
Linus Torvalds 已提交
403

404 405 406
	skb_push(skb, sizeof(*udph));
	skb_reset_transport_header(skb);
	udph = udp_hdr(skb);
L
Linus Torvalds 已提交
407 408 409
	udph->source = htons(np->local_port);
	udph->dest = htons(np->remote_port);
	udph->len = htons(udp_len);
C
Cong Wang 已提交
410

C
Cong Wang 已提交
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439
	if (np->ipv6) {
		udph->check = 0;
		udph->check = csum_ipv6_magic(&np->local_ip.in6,
					      &np->remote_ip.in6,
					      udp_len, IPPROTO_UDP,
					      csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*ip6h));
		skb_reset_network_header(skb);
		ip6h = ipv6_hdr(skb);

		/* ip6h->version = 6; ip6h->priority = 0; */
		put_unaligned(0x60, (unsigned char *)ip6h);
		ip6h->flow_lbl[0] = 0;
		ip6h->flow_lbl[1] = 0;
		ip6h->flow_lbl[2] = 0;

		ip6h->payload_len = htons(sizeof(struct udphdr) + len);
		ip6h->nexthdr = IPPROTO_UDP;
		ip6h->hop_limit = 32;
		ip6h->saddr = np->local_ip.in6;
		ip6h->daddr = np->remote_ip.in6;

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IPV6);
	} else {
C
Cong Wang 已提交
440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469
		udph->check = 0;
		udph->check = csum_tcpudp_magic(np->local_ip.ip,
						np->remote_ip.ip,
						udp_len, IPPROTO_UDP,
						csum_partial(udph, udp_len, 0));
		if (udph->check == 0)
			udph->check = CSUM_MANGLED_0;

		skb_push(skb, sizeof(*iph));
		skb_reset_network_header(skb);
		iph = ip_hdr(skb);

		/* iph->version = 4; iph->ihl = 5; */
		put_unaligned(0x45, (unsigned char *)iph);
		iph->tos      = 0;
		put_unaligned(htons(ip_len), &(iph->tot_len));
		iph->id       = htons(atomic_inc_return(&ip_ident));
		iph->frag_off = 0;
		iph->ttl      = 64;
		iph->protocol = IPPROTO_UDP;
		iph->check    = 0;
		put_unaligned(np->local_ip.ip, &(iph->saddr));
		put_unaligned(np->remote_ip.ip, &(iph->daddr));
		iph->check    = ip_fast_csum((unsigned char *)iph, iph->ihl);

		eth = (struct ethhdr *) skb_push(skb, ETH_HLEN);
		skb_reset_mac_header(skb);
		skb->protocol = eth->h_proto = htons(ETH_P_IP);
	}

J
Joe Perches 已提交
470 471
	ether_addr_copy(eth->h_source, np->dev->dev_addr);
	ether_addr_copy(eth->h_dest, np->remote_mac);
L
Linus Torvalds 已提交
472 473 474 475 476

	skb->dev = np->dev;

	netpoll_send_skb(np, skb);
}
E
Eric Dumazet 已提交
477
EXPORT_SYMBOL(netpoll_send_udp);
L
Linus Torvalds 已提交
478

479 480
void netpoll_print_options(struct netpoll *np)
{
481
	np_info(np, "local port %d\n", np->local_port);
C
Cong Wang 已提交
482 483 484
	if (np->ipv6)
		np_info(np, "local IPv6 address %pI6c\n", &np->local_ip.in6);
	else
C
Cong Wang 已提交
485
		np_info(np, "local IPv4 address %pI4\n", &np->local_ip.ip);
486 487
	np_info(np, "interface '%s'\n", np->dev_name);
	np_info(np, "remote port %d\n", np->remote_port);
C
Cong Wang 已提交
488 489 490
	if (np->ipv6)
		np_info(np, "remote IPv6 address %pI6c\n", &np->remote_ip.in6);
	else
C
Cong Wang 已提交
491
		np_info(np, "remote IPv4 address %pI4\n", &np->remote_ip.ip);
492
	np_info(np, "remote ethernet address %pM\n", np->remote_mac);
493
}
E
Eric Dumazet 已提交
494
EXPORT_SYMBOL(netpoll_print_options);
495

C
Cong Wang 已提交
496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515
static int netpoll_parse_ip_addr(const char *str, union inet_addr *addr)
{
	const char *end;

	if (!strchr(str, ':') &&
	    in4_pton(str, -1, (void *)addr, -1, &end) > 0) {
		if (!*end)
			return 0;
	}
	if (in6_pton(str, -1, addr->in6.s6_addr, -1, &end) > 0) {
#if IS_ENABLED(CONFIG_IPV6)
		if (!*end)
			return 1;
#else
		return -1;
#endif
	}
	return -1;
}

L
Linus Torvalds 已提交
516 517 518
int netpoll_parse_options(struct netpoll *np, char *opt)
{
	char *cur=opt, *delim;
C
Cong Wang 已提交
519
	int ipv6;
520
	bool ipversion_set = false;
L
Linus Torvalds 已提交
521

522
	if (*cur != '@') {
L
Linus Torvalds 已提交
523 524
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
525
		*delim = 0;
526 527
		if (kstrtou16(cur, 10, &np->local_port))
			goto parse_failed;
528
		cur = delim;
L
Linus Torvalds 已提交
529 530 531
	}
	cur++;

532
	if (*cur != '/') {
533
		ipversion_set = true;
L
Linus Torvalds 已提交
534 535
		if ((delim = strchr(cur, '/')) == NULL)
			goto parse_failed;
536
		*delim = 0;
C
Cong Wang 已提交
537 538 539 540 541
		ipv6 = netpoll_parse_ip_addr(cur, &np->local_ip);
		if (ipv6 < 0)
			goto parse_failed;
		else
			np->ipv6 = (bool)ipv6;
542
		cur = delim;
L
Linus Torvalds 已提交
543 544 545
	}
	cur++;

546
	if (*cur != ',') {
L
Linus Torvalds 已提交
547 548 549
		/* parse out dev name */
		if ((delim = strchr(cur, ',')) == NULL)
			goto parse_failed;
550
		*delim = 0;
L
Linus Torvalds 已提交
551
		strlcpy(np->dev_name, cur, sizeof(np->dev_name));
552
		cur = delim;
L
Linus Torvalds 已提交
553 554 555
	}
	cur++;

556
	if (*cur != '@') {
L
Linus Torvalds 已提交
557 558 559
		/* dst port */
		if ((delim = strchr(cur, '@')) == NULL)
			goto parse_failed;
560
		*delim = 0;
561
		if (*cur == ' ' || *cur == '\t')
562
			np_info(np, "warning: whitespace is not allowed\n");
563 564
		if (kstrtou16(cur, 10, &np->remote_port))
			goto parse_failed;
565
		cur = delim;
L
Linus Torvalds 已提交
566 567 568 569 570 571
	}
	cur++;

	/* dst ip */
	if ((delim = strchr(cur, '/')) == NULL)
		goto parse_failed;
572
	*delim = 0;
C
Cong Wang 已提交
573 574 575
	ipv6 = netpoll_parse_ip_addr(cur, &np->remote_ip);
	if (ipv6 < 0)
		goto parse_failed;
576
	else if (ipversion_set && np->ipv6 != (bool)ipv6)
C
Cong Wang 已提交
577 578 579
		goto parse_failed;
	else
		np->ipv6 = (bool)ipv6;
580
	cur = delim + 1;
L
Linus Torvalds 已提交
581

582
	if (*cur != 0) {
L
Linus Torvalds 已提交
583
		/* MAC address */
584
		if (!mac_pton(cur, np->remote_mac))
L
Linus Torvalds 已提交
585 586 587
			goto parse_failed;
	}

588
	netpoll_print_options(np);
L
Linus Torvalds 已提交
589 590 591 592

	return 0;

 parse_failed:
593
	np_info(np, "couldn't parse config at '%s'!\n", cur);
L
Linus Torvalds 已提交
594 595
	return -1;
}
E
Eric Dumazet 已提交
596
EXPORT_SYMBOL(netpoll_parse_options);
L
Linus Torvalds 已提交
597

598
int __netpoll_setup(struct netpoll *np, struct net_device *ndev)
L
Linus Torvalds 已提交
599
{
600
	struct netpoll_info *npinfo;
H
Herbert Xu 已提交
601
	const struct net_device_ops *ops;
S
Stephen Hemminger 已提交
602
	int err;
L
Linus Torvalds 已提交
603

604 605
	np->dev = ndev;
	strlcpy(np->dev_name, ndev->name, IFNAMSIZ);
606
	INIT_WORK(&np->cleanup_work, netpoll_async_cleanup);
607

608 609
	if ((ndev->priv_flags & IFF_DISABLE_NETPOLL) ||
	    !ndev->netdev_ops->ndo_poll_controller) {
610 611
		np_err(np, "%s doesn't support polling, aborting\n",
		       np->dev_name);
612 613 614 615 616
		err = -ENOTSUPP;
		goto out;
	}

	if (!ndev->npinfo) {
617
		npinfo = kmalloc(sizeof(*npinfo), GFP_KERNEL);
618 619 620 621 622
		if (!npinfo) {
			err = -ENOMEM;
			goto out;
		}

623
		sema_init(&npinfo->dev_lock, 1);
624 625 626 627 628 629 630
		skb_queue_head_init(&npinfo->txq);
		INIT_DELAYED_WORK(&npinfo->tx_work, queue_process);

		atomic_set(&npinfo->refcnt, 1);

		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_setup) {
631
			err = ops->ndo_netpoll_setup(ndev, npinfo);
632 633 634 635
			if (err)
				goto free_npinfo;
		}
	} else {
N
Neil Horman 已提交
636
		npinfo = rtnl_dereference(ndev->npinfo);
637 638 639 640 641 642
		atomic_inc(&npinfo->refcnt);
	}

	npinfo->netpoll = np;

	/* last thing to do is link it to the net device structure */
643
	rcu_assign_pointer(ndev->npinfo, npinfo);
644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659

	return 0;

free_npinfo:
	kfree(npinfo);
out:
	return err;
}
EXPORT_SYMBOL_GPL(__netpoll_setup);

int netpoll_setup(struct netpoll *np)
{
	struct net_device *ndev = NULL;
	struct in_device *in_dev;
	int err;

660
	rtnl_lock();
661 662 663 664
	if (np->dev_name) {
		struct net *net = current->nsproxy->net_ns;
		ndev = __dev_get_by_name(net, np->dev_name);
	}
L
Linus Torvalds 已提交
665
	if (!ndev) {
666
		np_err(np, "%s doesn't exist, aborting\n", np->dev_name);
667 668
		err = -ENODEV;
		goto unlock;
L
Linus Torvalds 已提交
669
	}
670
	dev_hold(ndev);
L
Linus Torvalds 已提交
671

672
	if (netdev_master_upper_dev_get(ndev)) {
673
		np_err(np, "%s is a slave device, aborting\n", np->dev_name);
674 675
		err = -EBUSY;
		goto put;
676 677
	}

L
Linus Torvalds 已提交
678 679 680
	if (!netif_running(ndev)) {
		unsigned long atmost, atleast;

681
		np_info(np, "device %s not up yet, forcing it\n", np->dev_name);
L
Linus Torvalds 已提交
682

S
Stephen Hemminger 已提交
683 684 685
		err = dev_open(ndev);

		if (err) {
686
			np_err(np, "failed to open %s\n", ndev->name);
687
			goto put;
L
Linus Torvalds 已提交
688 689
		}

690
		rtnl_unlock();
L
Linus Torvalds 已提交
691
		atleast = jiffies + HZ/10;
692
		atmost = jiffies + carrier_timeout * HZ;
L
Linus Torvalds 已提交
693 694
		while (!netif_carrier_ok(ndev)) {
			if (time_after(jiffies, atmost)) {
695
				np_notice(np, "timeout waiting for carrier\n");
L
Linus Torvalds 已提交
696 697
				break;
			}
698
			msleep(1);
L
Linus Torvalds 已提交
699 700 701 702 703 704 705 706
		}

		/* If carrier appears to come up instantly, we don't
		 * trust it and pause so that we don't pump all our
		 * queued console messages into the bitbucket.
		 */

		if (time_before(jiffies, atleast)) {
707
			np_notice(np, "carrier detect appears untrustworthy, waiting 4 seconds\n");
L
Linus Torvalds 已提交
708 709
			msleep(4000);
		}
710
		rtnl_lock();
L
Linus Torvalds 已提交
711 712
	}

C
Cong Wang 已提交
713 714
	if (!np->local_ip.ip) {
		if (!np->ipv6) {
715
			in_dev = __in_dev_get_rtnl(ndev);
C
Cong Wang 已提交
716 717 718 719 720 721 722 723 724 725

			if (!in_dev || !in_dev->ifa_list) {
				np_err(np, "no IP address for %s, aborting\n",
				       np->dev_name);
				err = -EDESTADDRREQ;
				goto put;
			}

			np->local_ip.ip = in_dev->ifa_list->ifa_local;
			np_info(np, "local IP %pI4\n", &np->local_ip.ip);
C
Cong Wang 已提交
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753
		} else {
#if IS_ENABLED(CONFIG_IPV6)
			struct inet6_dev *idev;

			err = -EDESTADDRREQ;
			idev = __in6_dev_get(ndev);
			if (idev) {
				struct inet6_ifaddr *ifp;

				read_lock_bh(&idev->lock);
				list_for_each_entry(ifp, &idev->addr_list, if_list) {
					if (ipv6_addr_type(&ifp->addr) & IPV6_ADDR_LINKLOCAL)
						continue;
					np->local_ip.in6 = ifp->addr;
					err = 0;
					break;
				}
				read_unlock_bh(&idev->lock);
			}
			if (err) {
				np_err(np, "no IPv6 address for %s, aborting\n",
				       np->dev_name);
				goto put;
			} else
				np_info(np, "local IPv6 %pI6c\n", &np->local_ip.in6);
#else
			np_err(np, "IPv6 is not supported %s, aborting\n",
			       np->dev_name);
754
			err = -EINVAL;
C
Cong Wang 已提交
755 756
			goto put;
#endif
L
Linus Torvalds 已提交
757 758 759
		}
	}

760 761 762
	/* fill up the skb queue */
	refill_skbs();

763
	err = __netpoll_setup(np, ndev);
764 765 766
	if (err)
		goto put;

767
	rtnl_unlock();
L
Linus Torvalds 已提交
768 769
	return 0;

770
put:
L
Linus Torvalds 已提交
771
	dev_put(ndev);
772 773
unlock:
	rtnl_unlock();
S
Stephen Hemminger 已提交
774
	return err;
L
Linus Torvalds 已提交
775
}
E
Eric Dumazet 已提交
776
EXPORT_SYMBOL(netpoll_setup);
L
Linus Torvalds 已提交
777

778 779
static int __init netpoll_init(void)
{
780 781 782 783 784
	skb_queue_head_init(&skb_pool);
	return 0;
}
core_initcall(netpoll_init);

785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801
static void rcu_cleanup_netpoll_info(struct rcu_head *rcu_head)
{
	struct netpoll_info *npinfo =
			container_of(rcu_head, struct netpoll_info, rcu);

	skb_queue_purge(&npinfo->txq);

	/* we can't call cancel_delayed_work_sync here, as we are in softirq */
	cancel_delayed_work(&npinfo->tx_work);

	/* clean after last, unfinished work */
	__skb_queue_purge(&npinfo->txq);
	/* now cancel it again */
	cancel_delayed_work(&npinfo->tx_work);
	kfree(npinfo);
}

802
void __netpoll_cleanup(struct netpoll *np)
L
Linus Torvalds 已提交
803
{
804 805
	struct netpoll_info *npinfo;

N
Neil Horman 已提交
806 807 808 809 810
	/* rtnl_dereference would be preferable here but
	 * rcu_cleanup_netpoll path can put us in here safely without
	 * holding the rtnl, so plain rcu_dereference it is
	 */
	npinfo = rtnl_dereference(np->dev->npinfo);
811
	if (!npinfo)
812
		return;
S
Stephen Hemminger 已提交
813

814 815
	synchronize_srcu(&netpoll_srcu);

816 817
	if (atomic_dec_and_test(&npinfo->refcnt)) {
		const struct net_device_ops *ops;
H
Herbert Xu 已提交
818

819 820 821
		ops = np->dev->netdev_ops;
		if (ops->ndo_netpoll_cleanup)
			ops->ndo_netpoll_cleanup(np->dev);
H
Herbert Xu 已提交
822

823
		RCU_INIT_POINTER(np->dev->npinfo, NULL);
824 825 826 827
		call_rcu_bh(&npinfo->rcu, rcu_cleanup_netpoll_info);
	}
}
EXPORT_SYMBOL_GPL(__netpoll_cleanup);
H
Herbert Xu 已提交
828

829
static void netpoll_async_cleanup(struct work_struct *work)
830
{
831
	struct netpoll *np = container_of(work, struct netpoll, cleanup_work);
S
Stephen Hemminger 已提交
832

833
	rtnl_lock();
834
	__netpoll_cleanup(np);
835
	rtnl_unlock();
836 837
	kfree(np);
}
S
Stephen Hemminger 已提交
838

839
void __netpoll_free_async(struct netpoll *np)
840
{
841
	schedule_work(&np->cleanup_work);
842
}
843
EXPORT_SYMBOL_GPL(__netpoll_free_async);
844

845 846 847
void netpoll_cleanup(struct netpoll *np)
{
	rtnl_lock();
848 849
	if (!np->dev)
		goto out;
850 851
	__netpoll_cleanup(np);
	dev_put(np->dev);
L
Linus Torvalds 已提交
852
	np->dev = NULL;
853 854
out:
	rtnl_unlock();
L
Linus Torvalds 已提交
855
}
E
Eric Dumazet 已提交
856
EXPORT_SYMBOL(netpoll_cleanup);