sch_generic.c 15.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * qdisc_root_lock(qdisc) spinlock.
33 34
 *
 * The idea is the following:
35 36
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
37
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
38 39
 */

40 41 42 43 44
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

45
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46 47
{
	if (unlikely(skb->next))
48
		q->gso_skb = skb;
49 50
	else
		q->ops->requeue(skb, q);
51

52
	__netif_schedule(q);
53 54 55
	return 0;
}

56
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
57
{
58
	struct sk_buff *skb;
59

60 61
	if ((skb = q->gso_skb))
		q->gso_skb = NULL;
62 63 64 65 66 67
	else
		skb = q->dequeue(q);

	return skb;
}

68
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
69
					   struct netdev_queue *dev_queue,
70
					   struct Qdisc *q)
71
{
72
	int ret;
73

74
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
75 76 77 78 79 80
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
81
		kfree_skb(skb);
82 83
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
84
			       "fix it urgently!\n", dev_queue->dev->name);
85 86 87 88 89 90 91
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
92
		ret = dev_requeue_skb(skb, q);
93 94
	}

95
	return ret;
96 97
}

98
/*
99
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
100
 *
101
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
102 103
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
104 105 106
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
107
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
108 109 110 111 112 113 114 115 116
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
117
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
118
{
119
	struct netdev_queue *txq;
120
	int ret = NETDEV_TX_BUSY;
121
	struct net_device *dev;
122
	spinlock_t *root_lock;
123
	struct sk_buff *skb;
L
Linus Torvalds 已提交
124

125
	/* Dequeue packet */
126
	if (unlikely((skb = dequeue_skb(q)) == NULL))
127
		return 0;
128

129 130 131 132
	root_lock = qdisc_root_lock(q);

	/* And release qdisc */
	spin_unlock(root_lock);
133

134 135
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
136

137
	HARD_TX_LOCK(dev, txq, smp_processor_id());
138
	if (!netif_subqueue_stopped(dev, skb))
139
		ret = dev_hard_start_xmit(skb, dev, txq);
140
	HARD_TX_UNLOCK(dev, txq);
141

142
	spin_lock(root_lock);
143

144 145 146 147 148 149 150 151
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
152
		ret = handle_dev_cpu_collision(skb, txq, q);
153 154 155 156 157 158 159 160
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

161
		ret = dev_requeue_skb(skb, q);
162 163
		break;
	}
164

165 166 167
	if (ret && netif_tx_queue_stopped(txq))
		ret = 0;

168
	return ret;
L
Linus Torvalds 已提交
169 170
}

171
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
172
{
173 174
	unsigned long start_time = jiffies;

175
	while (qdisc_restart(q)) {
176 177 178 179 180 181
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
182
			__netif_schedule(q);
183
			break;
184 185
		}
	}
H
Herbert Xu 已提交
186

187
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
188 189
}

L
Linus Torvalds 已提交
190 191 192 193
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
194
	netif_tx_lock(dev);
195
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
196 197 198
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
199 200 201 202 203 204 205 206 207 208 209 210
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
211

212 213 214 215 216
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
				printk(KERN_INFO "NETDEV WATCHDOG: %s: "
				       "transmit timed out\n",
217
				       dev->name);
L
Linus Torvalds 已提交
218
				dev->tx_timeout(dev);
219
				WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
220
			}
221 222 223
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
224 225 226
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
227
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
228 229 230 231 232 233 234 235 236

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
237 238
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
239 240 241 242 243 244 245 246 247 248 249
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
250
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
251
	if (del_timer(&dev->watchdog_timer))
252
		dev_put(dev);
H
Herbert Xu 已提交
253
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
254 255
}

256 257 258 259 260 261
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
262 263
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
264
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
265
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
266 267 268
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
269
}
270
EXPORT_SYMBOL(netif_carrier_on);
271

272 273 274 275 276 277
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
278 279 280 281 282
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
283
EXPORT_SYMBOL(netif_carrier_off);
284

L
Linus Torvalds 已提交
285 286 287 288 289
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

290
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
291 292 293 294 295
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

296
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
297 298 299 300
{
	return NULL;
}

301
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
302 303
{
	if (net_ratelimit())
304 305
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
306 307 308 309
	kfree_skb(skb);
	return NET_XMIT_CN;
}

310
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
311 312 313 314 315 316 317 318
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

319 320 321 322
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
};

L
Linus Torvalds 已提交
323 324 325 326
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
327
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
328
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
329
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
330
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
331
};
332
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
333

334
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
};


352
static int fifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
353
{
354
	struct sk_buff_head *list = &qdisc->q;
L
Linus Torvalds 已提交
355

356
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len)
357 358 359
		return __qdisc_enqueue_tail(skb, qdisc, list);

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
360 361
}

362
static struct sk_buff *fifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
363
{
364
	struct sk_buff_head *list = &qdisc->q;
L
Linus Torvalds 已提交
365

366 367
	if (!skb_queue_empty(list))
		return __qdisc_dequeue_head(qdisc, list);
368

L
Linus Torvalds 已提交
369 370 371
	return NULL;
}

372
static int fifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
373
{
374
	return __qdisc_requeue(skb, qdisc, &qdisc->q);
L
Linus Torvalds 已提交
375 376
}

377
static void fifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
378
{
379
	__qdisc_reset_queue(qdisc, &qdisc->q);
380
	qdisc->qstats.backlog = 0;
L
Linus Torvalds 已提交
381 382
}

383 384 385 386 387 388 389
static struct Qdisc_ops fifo_fast_ops __read_mostly = {
	.id		=	"fifo_fast",
	.priv_size	=	0,
	.enqueue	=	fifo_fast_enqueue,
	.dequeue	=	fifo_fast_dequeue,
	.requeue	=	fifo_fast_requeue,
	.reset		=	fifo_fast_reset,
L
Linus Torvalds 已提交
390 391 392
	.owner		=	THIS_MODULE,
};

393
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
394
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
395 396 397
{
	void *p;
	struct Qdisc *sch;
398 399
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
400 401

	/* ensure that the Qdisc and the private data are 32-byte aligned */
402 403
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
404

405
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
406
	if (!p)
407 408 409
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
410 411 412 413 414 415

	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
416
	sch->dev_queue = dev_queue;
417
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
418
	atomic_set(&sch->refcnt, 1);
419 420 421

	return sch;
errout:
422
	return ERR_PTR(err);
423 424
}

425 426 427
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
428
				 unsigned int parentid)
429 430
{
	struct Qdisc *sch;
431

432
	sch = qdisc_alloc(dev_queue, ops);
433 434
	if (IS_ERR(sch))
		goto errout;
435
	sch->parent = parentid;
436

L
Linus Torvalds 已提交
437 438 439
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

440
	qdisc_destroy(sch);
441
errout:
L
Linus Torvalds 已提交
442 443
	return NULL;
}
444
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
445

446
/* Under qdisc_root_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
447 448 449

void qdisc_reset(struct Qdisc *qdisc)
{
450
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
451 452 453 454

	if (ops->reset)
		ops->reset(qdisc);
}
455
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
456

457
/* this is the rcu callback function to clean up a qdisc when there
L
Linus Torvalds 已提交
458 459 460 461 462
 * are no further references to it */

static void __qdisc_destroy(struct rcu_head *head)
{
	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
463 464 465 466 467 468 469 470 471 472 473
	const struct Qdisc_ops  *ops = qdisc->ops;

	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

474 475
	kfree_skb(qdisc->gso_skb);

L
Linus Torvalds 已提交
476 477 478
	kfree((char *) qdisc - qdisc->padded);
}

479
/* Under qdisc_root_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
480 481 482

void qdisc_destroy(struct Qdisc *qdisc)
{
483 484
	struct net_device *dev = qdisc_dev(qdisc);

L
Linus Torvalds 已提交
485
	if (qdisc->flags & TCQ_F_BUILTIN ||
486
	    !atomic_dec_and_test(&qdisc->refcnt))
L
Linus Torvalds 已提交
487 488
		return;

489
	spin_lock_bh(&dev->qdisc_list_lock);
490
	list_del(&qdisc->list);
491
	spin_unlock_bh(&dev->qdisc_list_lock);
L
Linus Torvalds 已提交
492 493 494

	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
}
495
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
496

497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
518
					  &fifo_fast_ops, TC_H_ROOT);
519 520 521 522
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
523 524 525
		spin_lock_bh(&dev->qdisc_list_lock);
		list_add_tail(&qdisc->list, &dev->qdisc_list);
		spin_unlock_bh(&dev->qdisc_list_lock);
526 527 528 529 530 531 532 533 534 535
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
536
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
537 538
	int *need_watchdog_p = _need_watchdog;

539 540
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
	if (new_qdisc != &noqueue_qdisc)
541 542 543
		*need_watchdog_p = 1;
}

L
Linus Torvalds 已提交
544 545
void dev_activate(struct net_device *dev)
{
546
	int need_watchdog;
547

L
Linus Torvalds 已提交
548
	/* No queueing discipline is attached to device;
549 550 551
	 * create default one i.e. fifo_fast for devices,
	 * which need queueing and noqueue_qdisc for
	 * virtual interfaces.
L
Linus Torvalds 已提交
552 553
	 */

554 555
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
556

557 558 559 560
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

561 562 563 564
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);

	if (need_watchdog) {
L
Linus Torvalds 已提交
565 566 567
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
568 569
}

570 571 572
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
573
{
574
	struct Qdisc *qdisc_default = _qdisc_default;
575
	struct sk_buff *skb = NULL;
576 577 578
	struct Qdisc *qdisc;

	qdisc = dev_queue->qdisc;
579
	if (qdisc) {
580 581
		spin_lock_bh(qdisc_lock(qdisc));

582 583
		dev_queue->qdisc = qdisc_default;
		qdisc_reset(qdisc);
584

585
		spin_unlock_bh(qdisc_lock(qdisc));
586
	}
587 588

	kfree_skb(skb);
L
Linus Torvalds 已提交
589 590
}

591 592 593 594 595 596
static bool some_qdisc_is_running(struct net_device *dev, int lock)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
597
		spinlock_t *root_lock;
598
		struct Qdisc *q;
599 600 601
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
602
		q = dev_queue->qdisc;
603
		root_lock = qdisc_root_lock(q);
604 605

		if (lock)
606
			spin_lock_bh(root_lock);
607

608
		val = test_bit(__QDISC_STATE_RUNNING, &q->state);
609 610

		if (lock)
611
			spin_unlock_bh(root_lock);
612 613 614 615 616 617 618

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
619 620
void dev_deactivate(struct net_device *dev)
{
621
	bool running;
L
Linus Torvalds 已提交
622

623
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
624

L
Linus Torvalds 已提交
625 626
	dev_watchdog_down(dev);

627
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
628
	synchronize_rcu();
L
Linus Torvalds 已提交
629

630
	/* Wait for outstanding qdisc_run calls. */
631
	do {
632
		while (some_qdisc_is_running(dev, 0))
633 634 635 636 637 638
			yield();

		/*
		 * Double-check inside queue lock to ensure that all effects
		 * of the queue run are visible when we return.
		 */
639
		running = some_qdisc_is_running(dev, 1);
640 641 642 643 644 645 646 647 648 649

		/*
		 * The running flag should never be set at this point because
		 * we've already set dev->qdisc to noop_qdisc *inside* the same
		 * pair of spin locks.  That is, if any qdisc_run starts after
		 * our initial test it should see the noop_qdisc and then
		 * clear the RUNNING bit before dropping the queue lock.  So
		 * if it is set here then we've found a bug.
		 */
	} while (WARN_ON_ONCE(running));
L
Linus Torvalds 已提交
650 651
}

652 653
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
654
				     void *_qdisc)
655
{
656 657
	struct Qdisc *qdisc = _qdisc;

658 659 660 661
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
662 663
void dev_init_scheduler(struct net_device *dev)
{
664
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
665
	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
666

667
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
668 669
}

670 671 672
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
673
{
674
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
675
	struct Qdisc *qdisc_default = _qdisc_default;
676 677

	if (qdisc) {
678 679
		spinlock_t *root_lock = qdisc_root_lock(qdisc);

680 681
		dev_queue->qdisc = qdisc_default;
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
682

683
		spin_lock(root_lock);
L
Linus Torvalds 已提交
684
		qdisc_destroy(qdisc);
685
		spin_unlock(root_lock);
686
	}
687 688 689 690
}

void dev_shutdown(struct net_device *dev)
{
691 692
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
693 694
	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
}