sch_generic.c 17.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * qdisc_root_lock(qdisc) spinlock.
33 34
 *
 * The idea is the following:
35 36
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
37
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
38 39
 */

40 41 42 43 44
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

45
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46 47
{
	if (unlikely(skb->next))
48
		q->gso_skb = skb;
49 50
	else
		q->ops->requeue(skb, q);
51

52
	__netif_schedule(q);
53 54 55
	return 0;
}

56
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
57
{
58
	struct sk_buff *skb;
59

60 61
	if ((skb = q->gso_skb))
		q->gso_skb = NULL;
62 63 64 65 66 67
	else
		skb = q->dequeue(q);

	return skb;
}

68
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
69
					   struct netdev_queue *dev_queue,
70
					   struct Qdisc *q)
71
{
72
	int ret;
73

74
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
75 76 77 78 79 80
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
81
		kfree_skb(skb);
82 83
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
84
			       "fix it urgently!\n", dev_queue->dev->name);
85 86 87 88 89 90 91
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
92
		ret = dev_requeue_skb(skb, q);
93 94
	}

95
	return ret;
96 97
}

98
/*
99
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
100
 *
101
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
102 103
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
104 105 106
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
107
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
108 109 110 111 112 113 114 115 116
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
117
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
118
{
119
	struct netdev_queue *txq;
120
	int ret = NETDEV_TX_BUSY;
121
	struct net_device *dev;
122
	spinlock_t *root_lock;
123
	struct sk_buff *skb;
L
Linus Torvalds 已提交
124

125
	/* Dequeue packet */
126
	if (unlikely((skb = dequeue_skb(q)) == NULL))
127
		return 0;
128

129 130 131 132
	root_lock = qdisc_root_lock(q);

	/* And release qdisc */
	spin_unlock(root_lock);
133

134 135
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
136

137
	HARD_TX_LOCK(dev, txq, smp_processor_id());
138
	if (!netif_subqueue_stopped(dev, skb))
139
		ret = dev_hard_start_xmit(skb, dev, txq);
140
	HARD_TX_UNLOCK(dev, txq);
141

142
	spin_lock(root_lock);
143

144 145 146 147 148 149 150 151
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
152
		ret = handle_dev_cpu_collision(skb, txq, q);
153 154 155 156 157 158 159 160
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

161
		ret = dev_requeue_skb(skb, q);
162 163
		break;
	}
164

165 166 167
	if (ret && netif_tx_queue_stopped(txq))
		ret = 0;

168
	return ret;
L
Linus Torvalds 已提交
169 170
}

171
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
172
{
173 174
	unsigned long start_time = jiffies;

175
	while (qdisc_restart(q)) {
176 177 178 179 180 181
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
182
			__netif_schedule(q);
183
			break;
184 185
		}
	}
H
Herbert Xu 已提交
186

187
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
188 189
}

L
Linus Torvalds 已提交
190 191 192 193
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
194
	netif_tx_lock(dev);
195
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
196 197 198
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
199 200 201 202 203 204 205 206 207 208 209 210
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
211

212 213 214
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
215 216 217
				char drivername[64];
				printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
				       dev->name, netdev_drivername(dev, drivername, 64));
L
Linus Torvalds 已提交
218
				dev->tx_timeout(dev);
219
				WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
220
			}
221 222 223
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
224 225 226
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
227
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
228 229 230 231 232 233 234 235 236

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
237 238
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
239 240 241 242 243 244 245 246 247 248 249
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
250
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
251
	if (del_timer(&dev->watchdog_timer))
252
		dev_put(dev);
H
Herbert Xu 已提交
253
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
254 255
}

256 257 258 259 260 261
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
262 263
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
264
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
265
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
266 267 268
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
269
}
270
EXPORT_SYMBOL(netif_carrier_on);
271

272 273 274 275 276 277
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
278 279 280 281 282
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
283
EXPORT_SYMBOL(netif_carrier_off);
284

L
Linus Torvalds 已提交
285 286 287 288 289
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

290
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
291 292 293 294 295
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

296
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
297 298 299 300
{
	return NULL;
}

301
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
302 303
{
	if (net_ratelimit())
304 305
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
306 307 308 309
	kfree_skb(skb);
	return NET_XMIT_CN;
}

310
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
311 312 313 314 315 316 317 318
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

319 320 321 322
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
};

L
Linus Torvalds 已提交
323 324 325 326
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
327
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
328
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
329
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
330
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
331
};
332
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
333

334
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
335 336 337 338 339 340 341 342
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

343 344 345 346 347
static struct Qdisc noqueue_qdisc;
static struct netdev_queue noqueue_netdev_queue = {
	.qdisc		=	&noqueue_qdisc,
};

L
Linus Torvalds 已提交
348 349 350 351 352 353
static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
354 355
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
	.dev_queue	=	&noqueue_netdev_queue,
L
Linus Torvalds 已提交
356 357 358
};


359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

#define PFIFO_FAST_BANDS 3

static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}

static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
376
{
377
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
378

379 380
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
		qdisc->q.qlen++;
381
		return __qdisc_enqueue_tail(skb, qdisc, list);
382
	}
383 384

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
385 386
}

387
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
388
{
389 390
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);
L
Linus Torvalds 已提交
391

392 393 394 395 396 397
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
			qdisc->q.qlen--;
			return __qdisc_dequeue_head(qdisc, list + prio);
		}
	}
398

L
Linus Torvalds 已提交
399 400 401
	return NULL;
}

402
static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
403
{
404 405
	qdisc->q.qlen++;
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
L
Linus Torvalds 已提交
406 407
}

408
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
409
{
410 411 412 413 414 415
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		__qdisc_reset_queue(qdisc, list + prio);

416
	qdisc->qstats.backlog = 0;
417
	qdisc->q.qlen = 0;
L
Linus Torvalds 已提交
418 419
}

420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
	return skb->len;

nla_put_failure:
	return -1;
}

static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);

	return 0;
}

static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
	.id		=	"pfifo_fast",
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
	.requeue	=	pfifo_fast_requeue,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
L
Linus Torvalds 已提交
452 453 454
	.owner		=	THIS_MODULE,
};

455
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
456
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
457 458 459
{
	void *p;
	struct Qdisc *sch;
460 461
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
462 463

	/* ensure that the Qdisc and the private data are 32-byte aligned */
464 465
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
466

467
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
468
	if (!p)
469 470 471
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
472 473 474 475 476 477

	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
478
	sch->dev_queue = dev_queue;
479
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
480
	atomic_set(&sch->refcnt, 1);
481 482 483

	return sch;
errout:
484
	return ERR_PTR(err);
485 486
}

487 488 489
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
490
				 unsigned int parentid)
491 492
{
	struct Qdisc *sch;
493

494
	sch = qdisc_alloc(dev_queue, ops);
495 496
	if (IS_ERR(sch))
		goto errout;
497
	sch->parent = parentid;
498

L
Linus Torvalds 已提交
499 500 501
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

502
	qdisc_destroy(sch);
503
errout:
L
Linus Torvalds 已提交
504 505
	return NULL;
}
506
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
507

508
/* Under qdisc_root_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
509 510 511

void qdisc_reset(struct Qdisc *qdisc)
{
512
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
513 514 515 516

	if (ops->reset)
		ops->reset(qdisc);
}
517
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
518

519
/* this is the rcu callback function to clean up a qdisc when there
L
Linus Torvalds 已提交
520 521 522 523 524
 * are no further references to it */

static void __qdisc_destroy(struct rcu_head *head)
{
	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
525 526
	const struct Qdisc_ops  *ops = qdisc->ops;

527
#ifdef CONFIG_NET_SCHED
528
	qdisc_put_stab(qdisc->stab);
529
#endif
530 531 532 533 534 535 536 537 538
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

539 540
	kfree_skb(qdisc->gso_skb);

L
Linus Torvalds 已提交
541 542 543
	kfree((char *) qdisc - qdisc->padded);
}

544
/* Under qdisc_root_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
545 546 547 548

void qdisc_destroy(struct Qdisc *qdisc)
{
	if (qdisc->flags & TCQ_F_BUILTIN ||
549
	    !atomic_dec_and_test(&qdisc->refcnt))
L
Linus Torvalds 已提交
550 551
		return;

552 553
	if (qdisc->parent)
		list_del(&qdisc->list);
L
Linus Torvalds 已提交
554 555 556

	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
}
557
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
558

559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
580
					  &pfifo_fast_ops, TC_H_ROOT);
581 582 583 584 585 586 587 588 589 590 591 592 593 594
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
595
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
596 597
	int *need_watchdog_p = _need_watchdog;

598 599
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
	if (new_qdisc != &noqueue_qdisc)
600 601 602
		*need_watchdog_p = 1;
}

L
Linus Torvalds 已提交
603 604
void dev_activate(struct net_device *dev)
{
605
	int need_watchdog;
606

L
Linus Torvalds 已提交
607
	/* No queueing discipline is attached to device;
608 609 610
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
L
Linus Torvalds 已提交
611 612
	 */

613 614
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
615

616 617 618 619
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

620 621 622 623
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);

	if (need_watchdog) {
L
Linus Torvalds 已提交
624 625 626
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
627 628
}

629 630 631
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
632
{
633
	struct Qdisc *qdisc_default = _qdisc_default;
634 635 636
	struct Qdisc *qdisc;

	qdisc = dev_queue->qdisc;
637
	if (qdisc) {
638 639
		spin_lock_bh(qdisc_lock(qdisc));

640 641
		dev_queue->qdisc = qdisc_default;
		qdisc_reset(qdisc);
642

643
		spin_unlock_bh(qdisc_lock(qdisc));
644
	}
L
Linus Torvalds 已提交
645 646
}

647 648 649 650 651 652
static bool some_qdisc_is_running(struct net_device *dev, int lock)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
653
		spinlock_t *root_lock;
654
		struct Qdisc *q;
655 656 657
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
658
		q = dev_queue->qdisc;
659
		root_lock = qdisc_root_lock(q);
660 661

		if (lock)
662
			spin_lock_bh(root_lock);
663

664
		val = test_bit(__QDISC_STATE_RUNNING, &q->state);
665 666

		if (lock)
667
			spin_unlock_bh(root_lock);
668 669 670 671 672 673 674

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
675 676
void dev_deactivate(struct net_device *dev)
{
677
	bool running;
L
Linus Torvalds 已提交
678

679
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
680

L
Linus Torvalds 已提交
681 682
	dev_watchdog_down(dev);

683
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
684
	synchronize_rcu();
L
Linus Torvalds 已提交
685

686
	/* Wait for outstanding qdisc_run calls. */
687
	do {
688
		while (some_qdisc_is_running(dev, 0))
689 690 691 692 693 694
			yield();

		/*
		 * Double-check inside queue lock to ensure that all effects
		 * of the queue run are visible when we return.
		 */
695
		running = some_qdisc_is_running(dev, 1);
696 697 698 699 700 701 702 703 704 705

		/*
		 * The running flag should never be set at this point because
		 * we've already set dev->qdisc to noop_qdisc *inside* the same
		 * pair of spin locks.  That is, if any qdisc_run starts after
		 * our initial test it should see the noop_qdisc and then
		 * clear the RUNNING bit before dropping the queue lock.  So
		 * if it is set here then we've found a bug.
		 */
	} while (WARN_ON_ONCE(running));
L
Linus Torvalds 已提交
706 707
}

708 709
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
710
				     void *_qdisc)
711
{
712 713
	struct Qdisc *qdisc = _qdisc;

714 715 716 717
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
718 719
void dev_init_scheduler(struct net_device *dev)
{
720
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
721
	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
722

723
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
724 725
}

726 727 728
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
729
{
730
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
731
	struct Qdisc *qdisc_default = _qdisc_default;
732 733

	if (qdisc) {
734 735
		spinlock_t *root_lock = qdisc_root_lock(qdisc);

736 737
		dev_queue->qdisc = qdisc_default;
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
738

739
		spin_lock_bh(root_lock);
L
Linus Torvalds 已提交
740
		qdisc_destroy(qdisc);
741
		spin_unlock_bh(root_lock);
742
	}
743 744 745 746
}

void dev_shutdown(struct net_device *dev)
{
747 748
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
749 750
	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
}