sch_generic.c 17.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * qdisc_lock(qdisc) spinlock.
33 34
 *
 * The idea is the following:
35 36
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
37
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
38 39
 */

40 41 42 43 44
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

45
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46
{
47
	q->gso_skb = skb;
48
	q->qstats.requeues++;
49
	__netif_schedule(q);
50

51 52 53
	return 0;
}

54
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
55
{
56 57
	struct sk_buff *skb = q->gso_skb;

58 59 60 61 62 63
	if (unlikely(skb)) {
		struct net_device *dev = qdisc_dev(q);
		struct netdev_queue *txq;

		/* check the reason of requeuing without tx lock first */
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
64 65 66
		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
			q->gso_skb = NULL;
		else
67 68
			skb = NULL;
	} else {
69
		skb = q->dequeue(q);
70
	}
71 72 73 74

	return skb;
}

75
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
76
					   struct netdev_queue *dev_queue,
77
					   struct Qdisc *q)
78
{
79
	int ret;
80

81
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
82 83 84 85 86 87
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
88
		kfree_skb(skb);
89 90
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
91
			       "fix it urgently!\n", dev_queue->dev->name);
92 93 94 95 96 97 98
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
99
		ret = dev_requeue_skb(skb, q);
100 101
	}

102
	return ret;
103 104
}

105
/*
106
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
107
 *
108
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
109 110
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
111 112 113
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
114
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
115 116 117 118 119 120 121 122 123
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
124
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
125
{
126
	struct netdev_queue *txq;
127
	int ret = NETDEV_TX_BUSY;
128
	struct net_device *dev;
129
	spinlock_t *root_lock;
130
	struct sk_buff *skb;
L
Linus Torvalds 已提交
131

132
	/* Dequeue packet */
133
	if (unlikely((skb = dequeue_skb(q)) == NULL))
134
		return 0;
135

136
	root_lock = qdisc_lock(q);
137 138 139

	/* And release qdisc */
	spin_unlock(root_lock);
140

141 142
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
143

144
	HARD_TX_LOCK(dev, txq, smp_processor_id());
145 146
	if (!netif_tx_queue_stopped(txq) &&
	    !netif_tx_queue_frozen(txq))
147
		ret = dev_hard_start_xmit(skb, dev, txq);
148
	HARD_TX_UNLOCK(dev, txq);
149

150
	spin_lock(root_lock);
151

152 153 154 155 156 157 158 159
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
160
		ret = handle_dev_cpu_collision(skb, txq, q);
161 162 163 164 165 166 167 168
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

169
		ret = dev_requeue_skb(skb, q);
170 171
		break;
	}
172

173 174
	if (ret && (netif_tx_queue_stopped(txq) ||
		    netif_tx_queue_frozen(txq)))
175 176
		ret = 0;

177
	return ret;
L
Linus Torvalds 已提交
178 179
}

180
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
181
{
182 183
	unsigned long start_time = jiffies;

184
	while (qdisc_restart(q)) {
185 186 187 188 189 190
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
191
			__netif_schedule(q);
192
			break;
193 194
		}
	}
H
Herbert Xu 已提交
195

196
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
197 198
}

L
Linus Torvalds 已提交
199 200 201 202
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
203
	netif_tx_lock(dev);
204
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
205 206 207
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
208 209 210 211 212 213 214 215 216 217 218 219
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
220

221 222 223
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
224
				char drivername[64];
225
				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
226
				       dev->name, netdev_drivername(dev, drivername, 64));
L
Linus Torvalds 已提交
227 228
				dev->tx_timeout(dev);
			}
229 230 231
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
232 233 234
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
235
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
236 237 238 239 240 241 242 243 244

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
245 246
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
247 248 249 250 251 252 253 254 255 256 257
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
258
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
259
	if (del_timer(&dev->watchdog_timer))
260
		dev_put(dev);
H
Herbert Xu 已提交
261
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
262 263
}

264 265 266 267 268 269
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
270 271
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
272
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
273
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
274 275 276
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
277
}
278
EXPORT_SYMBOL(netif_carrier_on);
279

280 281 282 283 284 285
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
286 287 288 289 290
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
291
EXPORT_SYMBOL(netif_carrier_off);
292

L
Linus Torvalds 已提交
293 294 295 296 297
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

298
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
299 300 301 302 303
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

304
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
305 306 307 308
{
	return NULL;
}

309
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
310 311
{
	if (net_ratelimit())
312 313
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
314 315 316 317
	kfree_skb(skb);
	return NET_XMIT_CN;
}

318
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
319 320 321 322 323 324 325 326
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

327 328
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
329
	.qdisc_sleeping	=	&noop_qdisc,
330 331
};

L
Linus Torvalds 已提交
332 333 334 335
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
336
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
337
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
338
	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
339
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
340
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
341
};
342
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
343

344
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
345 346 347 348 349 350 351 352
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

353 354 355
static struct Qdisc noqueue_qdisc;
static struct netdev_queue noqueue_netdev_queue = {
	.qdisc		=	&noqueue_qdisc,
356
	.qdisc_sleeping	=	&noqueue_qdisc,
357 358
};

L
Linus Torvalds 已提交
359 360 361 362 363 364
static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
365
	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
366 367
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
	.dev_queue	=	&noqueue_netdev_queue,
L
Linus Torvalds 已提交
368 369 370
};


371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

#define PFIFO_FAST_BANDS 3

static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}

static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
388
{
389
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
390

391 392
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
		qdisc->q.qlen++;
393
		return __qdisc_enqueue_tail(skb, qdisc, list);
394
	}
395 396

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
397 398
}

399
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
400
{
401 402
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);
L
Linus Torvalds 已提交
403

404 405 406 407 408 409
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
			qdisc->q.qlen--;
			return __qdisc_dequeue_head(qdisc, list + prio);
		}
	}
410

L
Linus Torvalds 已提交
411 412 413
	return NULL;
}

414
static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
415
{
416 417
	qdisc->q.qlen++;
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
L
Linus Torvalds 已提交
418 419
}

420
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
421
{
422 423 424 425 426 427
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		__qdisc_reset_queue(qdisc, list + prio);

428
	qdisc->qstats.backlog = 0;
429
	qdisc->q.qlen = 0;
L
Linus Torvalds 已提交
430 431
}

432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
	return skb->len;

nla_put_failure:
	return -1;
}

static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);

	return 0;
}

static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
	.id		=	"pfifo_fast",
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
	.requeue	=	pfifo_fast_requeue,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
L
Linus Torvalds 已提交
464 465 466
	.owner		=	THIS_MODULE,
};

467
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
468
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
469 470 471
{
	void *p;
	struct Qdisc *sch;
472 473
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
474 475

	/* ensure that the Qdisc and the private data are 32-byte aligned */
476 477
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
478

479
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
480
	if (!p)
481 482 483
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
484 485

	INIT_LIST_HEAD(&sch->list);
486
	skb_queue_head_init(&sch->requeue);
L
Linus Torvalds 已提交
487 488 489 490
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
491
	sch->dev_queue = dev_queue;
492
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
493
	atomic_set(&sch->refcnt, 1);
494 495 496

	return sch;
errout:
497
	return ERR_PTR(err);
498 499
}

500 501 502
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
503
				 unsigned int parentid)
504 505
{
	struct Qdisc *sch;
506

507
	sch = qdisc_alloc(dev_queue, ops);
508 509
	if (IS_ERR(sch))
		goto errout;
510
	sch->parent = parentid;
511

L
Linus Torvalds 已提交
512 513 514
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

515
	qdisc_destroy(sch);
516
errout:
L
Linus Torvalds 已提交
517 518
	return NULL;
}
519
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
520

521
/* Under qdisc_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
522 523 524

void qdisc_reset(struct Qdisc *qdisc)
{
525
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
526 527 528 529

	if (ops->reset)
		ops->reset(qdisc);
}
530
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
531

532
void qdisc_destroy(struct Qdisc *qdisc)
L
Linus Torvalds 已提交
533
{
534 535
	const struct Qdisc_ops  *ops = qdisc->ops;

536 537 538 539
	if (qdisc->flags & TCQ_F_BUILTIN ||
	    !atomic_dec_and_test(&qdisc->refcnt))
		return;

540
#ifdef CONFIG_NET_SCHED
541 542
	qdisc_list_del(qdisc);

543
	qdisc_put_stab(qdisc->stab);
544
#endif
545 546 547 548 549 550 551 552 553
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

554
	kfree_skb(qdisc->gso_skb);
555
	__skb_queue_purge(&qdisc->requeue);
556

L
Linus Torvalds 已提交
557 558
	kfree((char *) qdisc - qdisc->padded);
}
559
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
560

561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
582
					  &pfifo_fast_ops, TC_H_ROOT);
583 584 585 586 587 588 589 590 591 592 593 594 595 596
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
597
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
598 599
	int *need_watchdog_p = _need_watchdog;

600 601 602
	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);

603
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
604
	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
605 606 607
		*need_watchdog_p = 1;
}

L
Linus Torvalds 已提交
608 609
void dev_activate(struct net_device *dev)
{
610
	int need_watchdog;
611

L
Linus Torvalds 已提交
612
	/* No queueing discipline is attached to device;
613 614 615
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
L
Linus Torvalds 已提交
616 617
	 */

618 619
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
620

621 622 623 624
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

625 626
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
627
	transition_one_qdisc(dev, &dev->rx_queue, NULL);
628 629

	if (need_watchdog) {
L
Linus Torvalds 已提交
630 631 632
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
633 634
}

635 636 637
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
638
{
639
	struct Qdisc *qdisc_default = _qdisc_default;
640 641 642
	struct Qdisc *qdisc;

	qdisc = dev_queue->qdisc;
643
	if (qdisc) {
644 645
		spin_lock_bh(qdisc_lock(qdisc));

646 647 648
		if (!(qdisc->flags & TCQ_F_BUILTIN))
			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);

649
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
650
		qdisc_reset(qdisc);
651

652
		spin_unlock_bh(qdisc_lock(qdisc));
653
	}
L
Linus Torvalds 已提交
654 655
}

656
static bool some_qdisc_is_busy(struct net_device *dev)
657 658 659 660 661
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
662
		spinlock_t *root_lock;
663
		struct Qdisc *q;
664 665 666
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
667
		q = dev_queue->qdisc_sleeping;
668
		root_lock = qdisc_lock(q);
669

670
		spin_lock_bh(root_lock);
671

672 673
		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
		       test_bit(__QDISC_STATE_SCHED, &q->state));
674

675
		spin_unlock_bh(root_lock);
676 677 678 679 680 681 682

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
683 684
void dev_deactivate(struct net_device *dev)
{
685
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
686
	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
687

L
Linus Torvalds 已提交
688 689
	dev_watchdog_down(dev);

690
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
691
	synchronize_rcu();
L
Linus Torvalds 已提交
692

693
	/* Wait for outstanding qdisc_run calls. */
694 695
	while (some_qdisc_is_busy(dev))
		yield();
L
Linus Torvalds 已提交
696 697
}

698 699
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
700
				     void *_qdisc)
701
{
702 703
	struct Qdisc *qdisc = _qdisc;

704 705 706 707
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
708 709
void dev_init_scheduler(struct net_device *dev)
{
710
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
711
	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
L
Linus Torvalds 已提交
712

713
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
714 715
}

716 717 718
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
719
{
720
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
721
	struct Qdisc *qdisc_default = _qdisc_default;
722 723

	if (qdisc) {
724
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
725
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
726 727

		qdisc_destroy(qdisc);
728
	}
729 730 731 732
}

void dev_shutdown(struct net_device *dev)
{
733
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
734
	shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
735
	WARN_ON(timer_pending(&dev->watchdog_timer));
L
Linus Torvalds 已提交
736
}