sch_generic.c 17.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * qdisc_root_lock(qdisc) spinlock.
33 34
 *
 * The idea is the following:
35 36
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
37
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
38 39
 */

40 41 42 43 44
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

45
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46 47
{
	if (unlikely(skb->next))
48
		q->gso_skb = skb;
49 50
	else
		q->ops->requeue(skb, q);
51

52
	__netif_schedule(q);
53 54 55
	return 0;
}

56
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
57
{
58
	struct sk_buff *skb;
59

60 61
	if ((skb = q->gso_skb))
		q->gso_skb = NULL;
62 63 64 65 66 67
	else
		skb = q->dequeue(q);

	return skb;
}

68
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
69
					   struct netdev_queue *dev_queue,
70
					   struct Qdisc *q)
71
{
72
	int ret;
73

74
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
75 76 77 78 79 80
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
81
		kfree_skb(skb);
82 83
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
84
			       "fix it urgently!\n", dev_queue->dev->name);
85 86 87 88 89 90 91
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
92
		ret = dev_requeue_skb(skb, q);
93 94
	}

95
	return ret;
96 97
}

98
/*
99
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
100
 *
101
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
102 103
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
104 105 106
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
107
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
108 109 110 111 112 113 114 115 116
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
117
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
118
{
119
	struct netdev_queue *txq;
120
	int ret = NETDEV_TX_BUSY;
121
	struct net_device *dev;
122
	spinlock_t *root_lock;
123
	struct sk_buff *skb;
L
Linus Torvalds 已提交
124

125
	/* Dequeue packet */
126
	if (unlikely((skb = dequeue_skb(q)) == NULL))
127
		return 0;
128

129 130 131 132
	root_lock = qdisc_root_lock(q);

	/* And release qdisc */
	spin_unlock(root_lock);
133

134 135
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
136

137
	HARD_TX_LOCK(dev, txq, smp_processor_id());
138
	if (!netif_subqueue_stopped(dev, skb))
139
		ret = dev_hard_start_xmit(skb, dev, txq);
140
	HARD_TX_UNLOCK(dev, txq);
141

142
	spin_lock(root_lock);
143

144 145 146 147 148 149 150 151
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
152
		ret = handle_dev_cpu_collision(skb, txq, q);
153 154 155 156 157 158 159 160
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

161
		ret = dev_requeue_skb(skb, q);
162 163
		break;
	}
164

165 166 167
	if (ret && netif_tx_queue_stopped(txq))
		ret = 0;

168
	return ret;
L
Linus Torvalds 已提交
169 170
}

171
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
172
{
173 174
	unsigned long start_time = jiffies;

175
	while (qdisc_restart(q)) {
176 177 178 179 180 181
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
182
			__netif_schedule(q);
183
			break;
184 185
		}
	}
H
Herbert Xu 已提交
186

187
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
188 189
}

L
Linus Torvalds 已提交
190 191 192 193
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
194
	netif_tx_lock(dev);
195
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
196 197 198
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
199 200 201 202 203 204 205 206 207 208 209 210
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
211

212 213 214 215 216
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
				printk(KERN_INFO "NETDEV WATCHDOG: %s: "
				       "transmit timed out\n",
217
				       dev->name);
L
Linus Torvalds 已提交
218
				dev->tx_timeout(dev);
219
				WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
220
			}
221 222 223
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
224 225 226
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
227
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
228 229 230 231 232 233 234 235 236

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
237 238
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
239 240 241 242 243 244 245 246 247 248 249
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
250
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
251
	if (del_timer(&dev->watchdog_timer))
252
		dev_put(dev);
H
Herbert Xu 已提交
253
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
254 255
}

256 257 258 259 260 261
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
262 263
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
264
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
265
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
266 267 268
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
269
}
270
EXPORT_SYMBOL(netif_carrier_on);
271

272 273 274 275 276 277
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
278 279 280 281 282
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
283
EXPORT_SYMBOL(netif_carrier_off);
284

L
Linus Torvalds 已提交
285 286 287 288 289
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

290
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
291 292 293 294 295
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

296
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
297 298 299 300
{
	return NULL;
}

301
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
302 303
{
	if (net_ratelimit())
304 305
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
306 307 308 309
	kfree_skb(skb);
	return NET_XMIT_CN;
}

310
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
311 312 313 314 315 316 317 318
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

319 320 321 322
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
};

L
Linus Torvalds 已提交
323 324 325 326
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
327
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
328
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
329
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
330
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
331
};
332
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
333

334
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
};


static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

359 360
#define PFIFO_FAST_BANDS 3

361 362
static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
L
Linus Torvalds 已提交
363 364
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
365 366
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}
L
Linus Torvalds 已提交
367

368
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
369 370
{
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
371

372
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
L
Linus Torvalds 已提交
373
		qdisc->q.qlen++;
374
		return __qdisc_enqueue_tail(skb, qdisc, list);
L
Linus Torvalds 已提交
375
	}
376 377

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
378 379
}

380
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
381 382 383 384
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

385 386
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
L
Linus Torvalds 已提交
387
			qdisc->q.qlen--;
388
			return __qdisc_dequeue_head(qdisc, list + prio);
L
Linus Torvalds 已提交
389 390
		}
	}
391

L
Linus Torvalds 已提交
392 393 394
	return NULL;
}

395
static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
396 397
{
	qdisc->q.qlen++;
398
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
L
Linus Torvalds 已提交
399 400
}

401
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
402 403 404 405
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

406
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
407 408 409
		__qdisc_reset_queue(qdisc, list + prio);

	qdisc->qstats.backlog = 0;
L
Linus Torvalds 已提交
410 411 412 413 414
	qdisc->q.qlen = 0;
}

static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
415
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
L
Linus Torvalds 已提交
416 417

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
418
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
L
Linus Torvalds 已提交
419 420
	return skb->len;

421
nla_put_failure:
L
Linus Torvalds 已提交
422 423 424
	return -1;
}

425
static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
L
Linus Torvalds 已提交
426
{
427
	int prio;
L
Linus Torvalds 已提交
428 429
	struct sk_buff_head *list = qdisc_priv(qdisc);

430 431
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);
L
Linus Torvalds 已提交
432 433 434 435

	return 0;
}

436
static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
L
Linus Torvalds 已提交
437
	.id		=	"pfifo_fast",
438
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
L
Linus Torvalds 已提交
439 440 441 442 443 444 445 446 447
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
	.requeue	=	pfifo_fast_requeue,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
	.owner		=	THIS_MODULE,
};

448
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
449
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
450 451 452
{
	void *p;
	struct Qdisc *sch;
453 454
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
455 456

	/* ensure that the Qdisc and the private data are 32-byte aligned */
457 458
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
459

460
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
461
	if (!p)
462 463 464
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
465 466 467 468 469 470

	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
471
	sch->dev_queue = dev_queue;
472
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
473
	atomic_set(&sch->refcnt, 1);
474 475 476

	return sch;
errout:
477
	return ERR_PTR(err);
478 479
}

480 481 482
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
483
				 unsigned int parentid)
484 485
{
	struct Qdisc *sch;
486

487
	sch = qdisc_alloc(dev_queue, ops);
488 489
	if (IS_ERR(sch))
		goto errout;
490
	sch->parent = parentid;
491

L
Linus Torvalds 已提交
492 493 494
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

495
	qdisc_destroy(sch);
496
errout:
L
Linus Torvalds 已提交
497 498
	return NULL;
}
499
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
500

501
/* Under qdisc_root_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
502 503 504

void qdisc_reset(struct Qdisc *qdisc)
{
505
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
506 507 508 509

	if (ops->reset)
		ops->reset(qdisc);
}
510
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
511

512
/* this is the rcu callback function to clean up a qdisc when there
L
Linus Torvalds 已提交
513 514 515 516 517
 * are no further references to it */

static void __qdisc_destroy(struct rcu_head *head)
{
	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
518 519 520 521 522 523 524 525 526 527 528
	const struct Qdisc_ops  *ops = qdisc->ops;

	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

529 530
	kfree_skb(qdisc->gso_skb);

L
Linus Torvalds 已提交
531 532 533
	kfree((char *) qdisc - qdisc->padded);
}

534
/* Under qdisc_root_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
535 536 537

void qdisc_destroy(struct Qdisc *qdisc)
{
538 539
	struct net_device *dev = qdisc_dev(qdisc);

L
Linus Torvalds 已提交
540
	if (qdisc->flags & TCQ_F_BUILTIN ||
541
	    !atomic_dec_and_test(&qdisc->refcnt))
L
Linus Torvalds 已提交
542 543
		return;

544
	spin_lock_bh(&dev->qdisc_list_lock);
545
	list_del(&qdisc->list);
546
	spin_unlock_bh(&dev->qdisc_list_lock);
L
Linus Torvalds 已提交
547 548 549

	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
}
550
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
551

552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
					  &pfifo_fast_ops, TC_H_ROOT);
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
578 579 580
		spin_lock_bh(&dev->qdisc_list_lock);
		list_add_tail(&qdisc->list, &dev->qdisc_list);
		spin_unlock_bh(&dev->qdisc_list_lock);
581 582 583 584 585 586 587 588 589 590
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
591
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
592 593
	int *need_watchdog_p = _need_watchdog;

594 595
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
	if (new_qdisc != &noqueue_qdisc)
596 597 598
		*need_watchdog_p = 1;
}

L
Linus Torvalds 已提交
599 600
void dev_activate(struct net_device *dev)
{
601
	int need_watchdog;
602

L
Linus Torvalds 已提交
603 604 605 606 607 608
	/* No queueing discipline is attached to device;
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
	 */

609 610
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
611

612 613 614 615
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

616 617 618 619
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);

	if (need_watchdog) {
L
Linus Torvalds 已提交
620 621 622
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
623 624
}

625 626 627
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
628
{
629
	struct Qdisc *qdisc_default = _qdisc_default;
630
	struct sk_buff *skb = NULL;
631 632 633
	struct Qdisc *qdisc;

	qdisc = dev_queue->qdisc;
634
	if (qdisc) {
635 636
		spin_lock_bh(qdisc_lock(qdisc));

637 638
		dev_queue->qdisc = qdisc_default;
		qdisc_reset(qdisc);
639

640
		spin_unlock_bh(qdisc_lock(qdisc));
641
	}
642 643

	kfree_skb(skb);
L
Linus Torvalds 已提交
644 645
}

646 647 648 649 650 651
static bool some_qdisc_is_running(struct net_device *dev, int lock)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
652
		spinlock_t *root_lock;
653
		struct Qdisc *q;
654 655 656
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
657
		q = dev_queue->qdisc;
658
		root_lock = qdisc_root_lock(q);
659 660

		if (lock)
661
			spin_lock_bh(root_lock);
662

663
		val = test_bit(__QDISC_STATE_RUNNING, &q->state);
664 665

		if (lock)
666
			spin_unlock_bh(root_lock);
667 668 669 670 671 672 673

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
674 675
void dev_deactivate(struct net_device *dev)
{
676
	bool running;
L
Linus Torvalds 已提交
677

678
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
679

L
Linus Torvalds 已提交
680 681
	dev_watchdog_down(dev);

682
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
683
	synchronize_rcu();
L
Linus Torvalds 已提交
684

685
	/* Wait for outstanding qdisc_run calls. */
686
	do {
687
		while (some_qdisc_is_running(dev, 0))
688 689 690 691 692 693
			yield();

		/*
		 * Double-check inside queue lock to ensure that all effects
		 * of the queue run are visible when we return.
		 */
694
		running = some_qdisc_is_running(dev, 1);
695 696 697 698 699 700 701 702 703 704

		/*
		 * The running flag should never be set at this point because
		 * we've already set dev->qdisc to noop_qdisc *inside* the same
		 * pair of spin locks.  That is, if any qdisc_run starts after
		 * our initial test it should see the noop_qdisc and then
		 * clear the RUNNING bit before dropping the queue lock.  So
		 * if it is set here then we've found a bug.
		 */
	} while (WARN_ON_ONCE(running));
L
Linus Torvalds 已提交
705 706
}

707 708
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
709
				     void *_qdisc)
710
{
711 712
	struct Qdisc *qdisc = _qdisc;

713 714 715 716
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
717 718
void dev_init_scheduler(struct net_device *dev)
{
719
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
720
	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
721

722
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
723 724
}

725 726 727
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
728
{
729
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
730
	struct Qdisc *qdisc_default = _qdisc_default;
731 732

	if (qdisc) {
733 734
		spinlock_t *root_lock = qdisc_root_lock(qdisc);

735 736
		dev_queue->qdisc = qdisc_default;
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
737

738
		spin_lock(root_lock);
L
Linus Torvalds 已提交
739
		qdisc_destroy(qdisc);
740
		spin_unlock(root_lock);
741
	}
742 743 744 745
}

void dev_shutdown(struct net_device *dev)
{
746 747
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
748 749
	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
}