sch_generic.c 16.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * qdisc_lock(qdisc) spinlock.
33 34
 *
 * The idea is the following:
35 36
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
37
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
38 39
 */

40 41 42 43 44
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

45
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46
{
47
	q->gso_skb = skb;
48
	q->qstats.requeues++;
49
	__netif_schedule(q);
50

51 52 53
	return 0;
}

54
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
55
{
56 57
	struct sk_buff *skb = q->gso_skb;

58 59 60 61 62 63
	if (unlikely(skb)) {
		struct net_device *dev = qdisc_dev(q);
		struct netdev_queue *txq;

		/* check the reason of requeuing without tx lock first */
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
64 65 66
		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
			q->gso_skb = NULL;
		else
67 68
			skb = NULL;
	} else {
69
		skb = q->dequeue(q);
70
	}
71 72 73 74

	return skb;
}

75
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
76
					   struct netdev_queue *dev_queue,
77
					   struct Qdisc *q)
78
{
79
	int ret;
80

81
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
82 83 84 85 86 87
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
88
		kfree_skb(skb);
89 90
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
91
			       "fix it urgently!\n", dev_queue->dev->name);
92 93 94 95 96 97 98
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
99
		ret = dev_requeue_skb(skb, q);
100 101
	}

102
	return ret;
103 104
}

105
/*
106
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
107
 *
108
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
109 110
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
111 112 113
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
114
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
115 116 117 118 119 120 121 122 123
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
124
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
125
{
126
	struct netdev_queue *txq;
127
	int ret = NETDEV_TX_BUSY;
128
	struct net_device *dev;
129
	spinlock_t *root_lock;
130
	struct sk_buff *skb;
L
Linus Torvalds 已提交
131

132
	/* Dequeue packet */
133
	if (unlikely((skb = dequeue_skb(q)) == NULL))
134
		return 0;
135

136
	root_lock = qdisc_lock(q);
137 138 139

	/* And release qdisc */
	spin_unlock(root_lock);
140

141 142
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
143

144
	HARD_TX_LOCK(dev, txq, smp_processor_id());
145 146
	if (!netif_tx_queue_stopped(txq) &&
	    !netif_tx_queue_frozen(txq))
147
		ret = dev_hard_start_xmit(skb, dev, txq);
148
	HARD_TX_UNLOCK(dev, txq);
149

150
	spin_lock(root_lock);
151

152 153 154 155 156 157 158 159
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
160
		ret = handle_dev_cpu_collision(skb, txq, q);
161 162 163 164 165 166 167 168
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

169
		ret = dev_requeue_skb(skb, q);
170 171
		break;
	}
172

173 174
	if (ret && (netif_tx_queue_stopped(txq) ||
		    netif_tx_queue_frozen(txq)))
175 176
		ret = 0;

177
	return ret;
L
Linus Torvalds 已提交
178 179
}

180
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
181
{
182 183
	unsigned long start_time = jiffies;

184
	while (qdisc_restart(q)) {
185 186 187 188 189 190
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
191
			__netif_schedule(q);
192
			break;
193 194
		}
	}
H
Herbert Xu 已提交
195

196
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
197 198
}

L
Linus Torvalds 已提交
199 200 201 202
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
203
	netif_tx_lock(dev);
204
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
205 206 207
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
208 209 210 211 212 213 214 215 216 217 218 219
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
220

221 222 223
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
224
				char drivername[64];
225
				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
226
				       dev->name, netdev_drivername(dev, drivername, 64));
L
Linus Torvalds 已提交
227 228
				dev->tx_timeout(dev);
			}
229 230 231
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
232 233 234
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
235
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
236 237 238 239 240 241 242 243 244

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
245 246
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
247 248 249 250 251 252 253 254 255 256 257
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
258
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
259
	if (del_timer(&dev->watchdog_timer))
260
		dev_put(dev);
H
Herbert Xu 已提交
261
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
262 263
}

264 265 266 267 268 269
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
270 271
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
272
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
273
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
274 275 276
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
277
}
278
EXPORT_SYMBOL(netif_carrier_on);
279

280 281 282 283 284 285
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
286 287 288 289 290
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
291
EXPORT_SYMBOL(netif_carrier_off);
292

L
Linus Torvalds 已提交
293 294 295 296 297
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

298
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
299 300 301 302 303
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

304
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
305 306 307 308
{
	return NULL;
}

309
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
310 311 312 313
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
314
	.peek		=	noop_dequeue,
L
Linus Torvalds 已提交
315 316 317
	.owner		=	THIS_MODULE,
};

318 319
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
320
	.qdisc_sleeping	=	&noop_qdisc,
321 322
};

L
Linus Torvalds 已提交
323 324 325 326
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
327
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
328
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
329
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
330
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
331
};
332
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
333

334
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
335 336 337 338
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
339
	.peek		=	noop_dequeue,
L
Linus Torvalds 已提交
340 341 342
	.owner		=	THIS_MODULE,
};

343 344 345
static struct Qdisc noqueue_qdisc;
static struct netdev_queue noqueue_netdev_queue = {
	.qdisc		=	&noqueue_qdisc,
346
	.qdisc_sleeping	=	&noqueue_qdisc,
347 348
};

L
Linus Torvalds 已提交
349 350 351 352 353 354
static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
355 356
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
	.dev_queue	=	&noqueue_netdev_queue,
L
Linus Torvalds 已提交
357 358 359
};


360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

#define PFIFO_FAST_BANDS 3

static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}

static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
377
{
378
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
379

380 381
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
		qdisc->q.qlen++;
382
		return __qdisc_enqueue_tail(skb, qdisc, list);
383
	}
384 385

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
386 387
}

388
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
389
{
390 391
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);
L
Linus Torvalds 已提交
392

393 394 395 396 397 398
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
			qdisc->q.qlen--;
			return __qdisc_dequeue_head(qdisc, list + prio);
		}
	}
399

L
Linus Torvalds 已提交
400 401 402
	return NULL;
}

403 404 405 406 407 408 409 410 411 412 413 414 415
static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio))
			return skb_peek(list + prio);
	}

	return NULL;
}

416
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
417
{
418 419 420 421 422 423
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		__qdisc_reset_queue(qdisc, list + prio);

424
	qdisc->qstats.backlog = 0;
425
	qdisc->q.qlen = 0;
L
Linus Torvalds 已提交
426 427
}

428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
	return skb->len;

nla_put_failure:
	return -1;
}

static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);

	return 0;
}

static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
	.id		=	"pfifo_fast",
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
456
	.peek		=	pfifo_fast_peek,
457 458 459
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
L
Linus Torvalds 已提交
460 461 462
	.owner		=	THIS_MODULE,
};

463
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
464
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
465 466 467
{
	void *p;
	struct Qdisc *sch;
468 469
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
470 471

	/* ensure that the Qdisc and the private data are 32-byte aligned */
472 473
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
474

475
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
476
	if (!p)
477 478 479
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
480 481 482 483 484 485

	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
486
	sch->dev_queue = dev_queue;
487
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
488
	atomic_set(&sch->refcnt, 1);
489 490 491

	return sch;
errout:
492
	return ERR_PTR(err);
493 494
}

495 496 497
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
498
				 unsigned int parentid)
499 500
{
	struct Qdisc *sch;
501

502
	sch = qdisc_alloc(dev_queue, ops);
503 504
	if (IS_ERR(sch))
		goto errout;
505
	sch->parent = parentid;
506

L
Linus Torvalds 已提交
507 508 509
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

510
	qdisc_destroy(sch);
511
errout:
L
Linus Torvalds 已提交
512 513
	return NULL;
}
514
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
515

516
/* Under qdisc_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
517 518 519

void qdisc_reset(struct Qdisc *qdisc)
{
520
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
521 522 523

	if (ops->reset)
		ops->reset(qdisc);
524 525 526

	kfree_skb(qdisc->gso_skb);
	qdisc->gso_skb = NULL;
L
Linus Torvalds 已提交
527
}
528
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
529

530
void qdisc_destroy(struct Qdisc *qdisc)
L
Linus Torvalds 已提交
531
{
532 533
	const struct Qdisc_ops  *ops = qdisc->ops;

534 535 536 537
	if (qdisc->flags & TCQ_F_BUILTIN ||
	    !atomic_dec_and_test(&qdisc->refcnt))
		return;

538
#ifdef CONFIG_NET_SCHED
539 540
	qdisc_list_del(qdisc);

541
	qdisc_put_stab(qdisc->stab);
542
#endif
543 544 545 546 547 548 549 550 551
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

552
	kfree_skb(qdisc->gso_skb);
L
Linus Torvalds 已提交
553 554
	kfree((char *) qdisc - qdisc->padded);
}
555
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
556

557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
578
					  &pfifo_fast_ops, TC_H_ROOT);
579 580 581 582 583 584 585 586 587 588 589 590 591 592
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
593
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
594 595
	int *need_watchdog_p = _need_watchdog;

596 597 598
	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);

599
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
600
	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
601 602 603
		*need_watchdog_p = 1;
}

L
Linus Torvalds 已提交
604 605
void dev_activate(struct net_device *dev)
{
606
	int need_watchdog;
607

L
Linus Torvalds 已提交
608
	/* No queueing discipline is attached to device;
609 610 611
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
L
Linus Torvalds 已提交
612 613
	 */

614 615
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
616

617 618 619 620
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

621 622
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
623
	transition_one_qdisc(dev, &dev->rx_queue, NULL);
624 625

	if (need_watchdog) {
L
Linus Torvalds 已提交
626 627 628
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
629 630
}

631 632 633
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
634
{
635
	struct Qdisc *qdisc_default = _qdisc_default;
636 637 638
	struct Qdisc *qdisc;

	qdisc = dev_queue->qdisc;
639
	if (qdisc) {
640 641
		spin_lock_bh(qdisc_lock(qdisc));

642 643 644
		if (!(qdisc->flags & TCQ_F_BUILTIN))
			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);

645
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
646
		qdisc_reset(qdisc);
647

648
		spin_unlock_bh(qdisc_lock(qdisc));
649
	}
L
Linus Torvalds 已提交
650 651
}

652
static bool some_qdisc_is_busy(struct net_device *dev)
653 654 655 656 657
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
658
		spinlock_t *root_lock;
659
		struct Qdisc *q;
660 661 662
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
663
		q = dev_queue->qdisc_sleeping;
664
		root_lock = qdisc_lock(q);
665

666
		spin_lock_bh(root_lock);
667

668 669
		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
		       test_bit(__QDISC_STATE_SCHED, &q->state));
670

671
		spin_unlock_bh(root_lock);
672 673 674 675 676 677 678

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
679 680
void dev_deactivate(struct net_device *dev)
{
681
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
682
	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
683

L
Linus Torvalds 已提交
684 685
	dev_watchdog_down(dev);

686
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
687
	synchronize_rcu();
L
Linus Torvalds 已提交
688

689
	/* Wait for outstanding qdisc_run calls. */
690 691
	while (some_qdisc_is_busy(dev))
		yield();
L
Linus Torvalds 已提交
692 693
}

694 695
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
696
				     void *_qdisc)
697
{
698 699
	struct Qdisc *qdisc = _qdisc;

700 701 702 703
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
704 705
void dev_init_scheduler(struct net_device *dev)
{
706
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
707
	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
L
Linus Torvalds 已提交
708

709
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
710 711
}

712 713 714
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
715
{
716
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
717
	struct Qdisc *qdisc_default = _qdisc_default;
718 719

	if (qdisc) {
720
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
721
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
722 723

		qdisc_destroy(qdisc);
724
	}
725 726 727 728
}

void dev_shutdown(struct net_device *dev)
{
729
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
730
	shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
731
	WARN_ON(timer_pending(&dev->watchdog_timer));
L
Linus Torvalds 已提交
732
}