sch_generic.c 15.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * qdisc_root_lock(qdisc) spinlock.
33 34
 *
 * The idea is the following:
35 36
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
37
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
38 39
 */

40 41 42 43 44
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

45
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46 47
{
	if (unlikely(skb->next))
48
		q->gso_skb = skb;
49 50
	else
		q->ops->requeue(skb, q);
51

52
	__netif_schedule(q);
53 54 55
	return 0;
}

56
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
57
{
58
	struct sk_buff *skb;
59

60 61
	if ((skb = q->gso_skb))
		q->gso_skb = NULL;
62 63 64 65 66 67
	else
		skb = q->dequeue(q);

	return skb;
}

68
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
69
					   struct netdev_queue *dev_queue,
70
					   struct Qdisc *q)
71
{
72
	int ret;
73

74
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
75 76 77 78 79 80
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
81
		kfree_skb(skb);
82 83
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
84
			       "fix it urgently!\n", dev_queue->dev->name);
85 86 87 88 89 90 91
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
92
		ret = dev_requeue_skb(skb, q);
93 94
	}

95
	return ret;
96 97
}

98
/*
99
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
100
 *
101
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
102 103
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
104 105 106
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
107
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
108 109 110 111 112 113 114 115 116
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
117
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
118
{
119
	struct netdev_queue *txq;
120
	int ret = NETDEV_TX_BUSY;
121
	struct net_device *dev;
122
	spinlock_t *root_lock;
123
	struct sk_buff *skb;
L
Linus Torvalds 已提交
124

125
	/* Dequeue packet */
126
	if (unlikely((skb = dequeue_skb(q)) == NULL))
127
		return 0;
128

129 130 131 132
	root_lock = qdisc_root_lock(q);

	/* And release qdisc */
	spin_unlock(root_lock);
133

134 135
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
136

137
	HARD_TX_LOCK(dev, txq, smp_processor_id());
138
	if (!netif_subqueue_stopped(dev, skb))
139
		ret = dev_hard_start_xmit(skb, dev, txq);
140
	HARD_TX_UNLOCK(dev, txq);
141

142
	spin_lock(root_lock);
143

144 145 146 147 148 149 150 151
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
152
		ret = handle_dev_cpu_collision(skb, txq, q);
153 154 155 156 157 158 159 160
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

161
		ret = dev_requeue_skb(skb, q);
162 163
		break;
	}
164

165 166 167
	if (ret && netif_tx_queue_stopped(txq))
		ret = 0;

168
	return ret;
L
Linus Torvalds 已提交
169 170
}

171
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
172
{
173 174
	unsigned long start_time = jiffies;

175
	while (qdisc_restart(q)) {
176 177 178 179 180 181
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
182
			__netif_schedule(q);
183
			break;
184 185
		}
	}
H
Herbert Xu 已提交
186

187
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
188 189
}

L
Linus Torvalds 已提交
190 191 192 193
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
194
	netif_tx_lock(dev);
195
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
196 197 198
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
199 200 201 202 203 204 205 206 207 208 209 210
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
211

212 213 214 215 216
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
				printk(KERN_INFO "NETDEV WATCHDOG: %s: "
				       "transmit timed out\n",
217
				       dev->name);
L
Linus Torvalds 已提交
218
				dev->tx_timeout(dev);
219
				WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
220
			}
221 222 223
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
224 225 226
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
227
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
228 229 230 231 232 233 234 235 236

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
237 238
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
239 240 241 242 243 244 245 246 247 248 249
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
250
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
251
	if (del_timer(&dev->watchdog_timer))
252
		dev_put(dev);
H
Herbert Xu 已提交
253
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
254 255
}

256 257 258 259 260 261
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
262 263
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
264
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
265
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
266 267 268
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
269
}
270
EXPORT_SYMBOL(netif_carrier_on);
271

272 273 274 275 276 277
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
278 279 280 281 282
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
283
EXPORT_SYMBOL(netif_carrier_off);
284

L
Linus Torvalds 已提交
285 286 287 288 289
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

290
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
291 292 293 294 295
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

296
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
297 298 299 300
{
	return NULL;
}

301
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
302 303
{
	if (net_ratelimit())
304 305
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
306 307 308 309
	kfree_skb(skb);
	return NET_XMIT_CN;
}

310
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
311 312 313 314 315 316 317 318
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

319 320 321 322
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
};

L
Linus Torvalds 已提交
323 324 325 326
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
327
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
328
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
329
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
330
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
331
};
332
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
333

334
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
335 336 337 338 339 340 341 342
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

343 344 345 346 347
static struct Qdisc noqueue_qdisc;
static struct netdev_queue noqueue_netdev_queue = {
	.qdisc		=	&noqueue_qdisc,
};

L
Linus Torvalds 已提交
348 349 350 351 352 353
static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
354 355
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
	.dev_queue	=	&noqueue_netdev_queue,
L
Linus Torvalds 已提交
356 357 358
};


359
static int fifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
360
{
361
	struct sk_buff_head *list = &qdisc->q;
L
Linus Torvalds 已提交
362

363
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len)
364 365 366
		return __qdisc_enqueue_tail(skb, qdisc, list);

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
367 368
}

369
static struct sk_buff *fifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
370
{
371
	struct sk_buff_head *list = &qdisc->q;
L
Linus Torvalds 已提交
372

373 374
	if (!skb_queue_empty(list))
		return __qdisc_dequeue_head(qdisc, list);
375

L
Linus Torvalds 已提交
376 377 378
	return NULL;
}

379
static int fifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
380
{
381
	return __qdisc_requeue(skb, qdisc, &qdisc->q);
L
Linus Torvalds 已提交
382 383
}

384
static void fifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
385
{
386
	__qdisc_reset_queue(qdisc, &qdisc->q);
387
	qdisc->qstats.backlog = 0;
L
Linus Torvalds 已提交
388 389
}

390 391 392 393 394 395 396
static struct Qdisc_ops fifo_fast_ops __read_mostly = {
	.id		=	"fifo_fast",
	.priv_size	=	0,
	.enqueue	=	fifo_fast_enqueue,
	.dequeue	=	fifo_fast_dequeue,
	.requeue	=	fifo_fast_requeue,
	.reset		=	fifo_fast_reset,
L
Linus Torvalds 已提交
397 398 399
	.owner		=	THIS_MODULE,
};

400
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
401
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
402 403 404
{
	void *p;
	struct Qdisc *sch;
405 406
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
407 408

	/* ensure that the Qdisc and the private data are 32-byte aligned */
409 410
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
411

412
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
413
	if (!p)
414 415 416
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
417 418 419 420 421 422

	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
423
	sch->dev_queue = dev_queue;
424
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
425
	atomic_set(&sch->refcnt, 1);
426 427 428

	return sch;
errout:
429
	return ERR_PTR(err);
430 431
}

432 433 434
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
435
				 unsigned int parentid)
436 437
{
	struct Qdisc *sch;
438

439
	sch = qdisc_alloc(dev_queue, ops);
440 441
	if (IS_ERR(sch))
		goto errout;
442
	sch->parent = parentid;
443

L
Linus Torvalds 已提交
444 445 446
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

447
	qdisc_destroy(sch);
448
errout:
L
Linus Torvalds 已提交
449 450
	return NULL;
}
451
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
452

453
/* Under qdisc_root_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
454 455 456

void qdisc_reset(struct Qdisc *qdisc)
{
457
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
458 459 460 461

	if (ops->reset)
		ops->reset(qdisc);
}
462
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
463

464
/* this is the rcu callback function to clean up a qdisc when there
L
Linus Torvalds 已提交
465 466 467 468 469
 * are no further references to it */

static void __qdisc_destroy(struct rcu_head *head)
{
	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
470 471
	const struct Qdisc_ops  *ops = qdisc->ops;

472
#ifdef CONFIG_NET_SCHED
473
	qdisc_put_stab(qdisc->stab);
474
#endif
475 476 477 478 479 480 481 482 483
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

484 485
	kfree_skb(qdisc->gso_skb);

L
Linus Torvalds 已提交
486 487 488
	kfree((char *) qdisc - qdisc->padded);
}

489
/* Under qdisc_root_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
490 491 492 493

void qdisc_destroy(struct Qdisc *qdisc)
{
	if (qdisc->flags & TCQ_F_BUILTIN ||
494
	    !atomic_dec_and_test(&qdisc->refcnt))
L
Linus Torvalds 已提交
495 496
		return;

497 498
	if (qdisc->parent)
		list_del(&qdisc->list);
L
Linus Torvalds 已提交
499 500 501

	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
}
502
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
503

504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
525
					  &fifo_fast_ops, TC_H_ROOT);
526 527 528 529 530 531 532 533 534 535 536 537 538 539
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
540
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
541 542
	int *need_watchdog_p = _need_watchdog;

543 544
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
	if (new_qdisc != &noqueue_qdisc)
545 546 547
		*need_watchdog_p = 1;
}

L
Linus Torvalds 已提交
548 549
void dev_activate(struct net_device *dev)
{
550
	int need_watchdog;
551

L
Linus Torvalds 已提交
552
	/* No queueing discipline is attached to device;
553 554 555
	 * create default one i.e. fifo_fast for devices,
	 * which need queueing and noqueue_qdisc for
	 * virtual interfaces.
L
Linus Torvalds 已提交
556 557
	 */

558 559
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
560

561 562 563 564
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

565 566 567 568
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);

	if (need_watchdog) {
L
Linus Torvalds 已提交
569 570 571
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
572 573
}

574 575 576
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
577
{
578
	struct Qdisc *qdisc_default = _qdisc_default;
579
	struct sk_buff *skb = NULL;
580 581 582
	struct Qdisc *qdisc;

	qdisc = dev_queue->qdisc;
583
	if (qdisc) {
584 585
		spin_lock_bh(qdisc_lock(qdisc));

586 587
		dev_queue->qdisc = qdisc_default;
		qdisc_reset(qdisc);
588

589
		spin_unlock_bh(qdisc_lock(qdisc));
590
	}
591 592

	kfree_skb(skb);
L
Linus Torvalds 已提交
593 594
}

595 596 597 598 599 600
static bool some_qdisc_is_running(struct net_device *dev, int lock)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
601
		spinlock_t *root_lock;
602
		struct Qdisc *q;
603 604 605
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
606
		q = dev_queue->qdisc;
607
		root_lock = qdisc_root_lock(q);
608 609

		if (lock)
610
			spin_lock_bh(root_lock);
611

612
		val = test_bit(__QDISC_STATE_RUNNING, &q->state);
613 614

		if (lock)
615
			spin_unlock_bh(root_lock);
616 617 618 619 620 621 622

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
623 624
void dev_deactivate(struct net_device *dev)
{
625
	bool running;
L
Linus Torvalds 已提交
626

627
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
628

L
Linus Torvalds 已提交
629 630
	dev_watchdog_down(dev);

631
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
632
	synchronize_rcu();
L
Linus Torvalds 已提交
633

634
	/* Wait for outstanding qdisc_run calls. */
635
	do {
636
		while (some_qdisc_is_running(dev, 0))
637 638 639 640 641 642
			yield();

		/*
		 * Double-check inside queue lock to ensure that all effects
		 * of the queue run are visible when we return.
		 */
643
		running = some_qdisc_is_running(dev, 1);
644 645 646 647 648 649 650 651 652 653

		/*
		 * The running flag should never be set at this point because
		 * we've already set dev->qdisc to noop_qdisc *inside* the same
		 * pair of spin locks.  That is, if any qdisc_run starts after
		 * our initial test it should see the noop_qdisc and then
		 * clear the RUNNING bit before dropping the queue lock.  So
		 * if it is set here then we've found a bug.
		 */
	} while (WARN_ON_ONCE(running));
L
Linus Torvalds 已提交
654 655
}

656 657
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
658
				     void *_qdisc)
659
{
660 661
	struct Qdisc *qdisc = _qdisc;

662 663 664 665
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
666 667
void dev_init_scheduler(struct net_device *dev)
{
668
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
669
	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
670

671
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
672 673
}

674 675 676
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
677
{
678
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
679
	struct Qdisc *qdisc_default = _qdisc_default;
680 681

	if (qdisc) {
682 683
		spinlock_t *root_lock = qdisc_root_lock(qdisc);

684 685
		dev_queue->qdisc = qdisc_default;
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
686

687
		spin_lock(root_lock);
L
Linus Torvalds 已提交
688
		qdisc_destroy(qdisc);
689
		spin_unlock(root_lock);
690
	}
691 692 693 694
}

void dev_shutdown(struct net_device *dev)
{
695 696
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
697 698
	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
}