sch_generic.c 16.9 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * qdisc_lock(qdisc) spinlock.
33 34
 *
 * The idea is the following:
35 36
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
37
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
38 39
 */

40 41 42 43 44
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

45
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46
{
47
	q->gso_skb = skb;
48
	q->qstats.requeues++;
49
	__netif_schedule(q);
50

51 52 53
	return 0;
}

54
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
55
{
56 57
	struct sk_buff *skb = q->gso_skb;

58 59 60 61 62 63
	if (unlikely(skb)) {
		struct net_device *dev = qdisc_dev(q);
		struct netdev_queue *txq;

		/* check the reason of requeuing without tx lock first */
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
64 65 66
		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
			q->gso_skb = NULL;
		else
67 68
			skb = NULL;
	} else {
69
		skb = q->dequeue(q);
70
	}
71 72 73 74

	return skb;
}

75
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
76
					   struct netdev_queue *dev_queue,
77
					   struct Qdisc *q)
78
{
79
	int ret;
80

81
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
82 83 84 85 86 87
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
88
		kfree_skb(skb);
89 90
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
91
			       "fix it urgently!\n", dev_queue->dev->name);
92 93 94 95 96 97 98
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
99
		ret = dev_requeue_skb(skb, q);
100 101
	}

102
	return ret;
103 104
}

105
/*
106
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
107
 *
108
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
109 110
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
111 112 113
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
114
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
115 116 117 118 119 120 121 122 123
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
124
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
125
{
126
	struct netdev_queue *txq;
127
	int ret = NETDEV_TX_BUSY;
128
	struct net_device *dev;
129
	spinlock_t *root_lock;
130
	struct sk_buff *skb;
L
Linus Torvalds 已提交
131

132
	/* Dequeue packet */
133
	if (unlikely((skb = dequeue_skb(q)) == NULL))
134
		return 0;
135

136
	root_lock = qdisc_lock(q);
137 138 139

	/* And release qdisc */
	spin_unlock(root_lock);
140

141 142
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
143

144
	HARD_TX_LOCK(dev, txq, smp_processor_id());
145 146
	if (!netif_tx_queue_stopped(txq) &&
	    !netif_tx_queue_frozen(txq))
147
		ret = dev_hard_start_xmit(skb, dev, txq);
148
	HARD_TX_UNLOCK(dev, txq);
149

150
	spin_lock(root_lock);
151

152 153 154 155 156 157 158 159
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
160
		ret = handle_dev_cpu_collision(skb, txq, q);
161 162 163 164 165 166 167 168
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

169
		ret = dev_requeue_skb(skb, q);
170 171
		break;
	}
172

173 174
	if (ret && (netif_tx_queue_stopped(txq) ||
		    netif_tx_queue_frozen(txq)))
175 176
		ret = 0;

177
	return ret;
L
Linus Torvalds 已提交
178 179
}

180
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
181
{
182 183
	unsigned long start_time = jiffies;

184
	while (qdisc_restart(q)) {
185 186 187 188 189 190
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
191
			__netif_schedule(q);
192
			break;
193 194
		}
	}
H
Herbert Xu 已提交
195

196
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
197 198
}

L
Linus Torvalds 已提交
199 200 201 202
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
203
	netif_tx_lock(dev);
204
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
205 206 207
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
208 209 210 211 212 213 214 215 216 217 218 219
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
220

221 222 223
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
224
				char drivername[64];
225
				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
226
				       dev->name, netdev_drivername(dev, drivername, 64));
227
				dev->netdev_ops->ndo_tx_timeout(dev);
L
Linus Torvalds 已提交
228
			}
229 230 231
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
232 233 234
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
235
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
236 237 238 239 240 241

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
242
	if (dev->netdev_ops->ndo_tx_timeout) {
L
Linus Torvalds 已提交
243 244
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
245 246
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
247 248 249 250 251 252 253 254 255 256 257
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
258
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
259
	if (del_timer(&dev->watchdog_timer))
260
		dev_put(dev);
H
Herbert Xu 已提交
261
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
262 263
}

264 265 266 267 268 269
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
270 271
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
272
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
273 274
		if (dev->reg_state == NETREG_UNINITIALIZED)
			return;
275
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
276 277 278
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
279
}
280
EXPORT_SYMBOL(netif_carrier_on);
281

282 283 284 285 286 287
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
288 289
void netif_carrier_off(struct net_device *dev)
{
290 291 292
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
		if (dev->reg_state == NETREG_UNINITIALIZED)
			return;
293
		linkwatch_fire_event(dev);
294
	}
295
}
296
EXPORT_SYMBOL(netif_carrier_off);
297

L
Linus Torvalds 已提交
298 299 300 301 302
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

303
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
304 305 306 307 308
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

309
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
310 311 312 313
{
	return NULL;
}

314
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
315 316 317 318
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
319
	.peek		=	noop_dequeue,
L
Linus Torvalds 已提交
320 321 322
	.owner		=	THIS_MODULE,
};

323 324
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
325
	.qdisc_sleeping	=	&noop_qdisc,
326 327
};

L
Linus Torvalds 已提交
328 329 330 331
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
332
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
333
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
334
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
335
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
336
};
337
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
338

339
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
340 341 342 343
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
344
	.peek		=	noop_dequeue,
L
Linus Torvalds 已提交
345 346 347
	.owner		=	THIS_MODULE,
};

348 349 350
static struct Qdisc noqueue_qdisc;
static struct netdev_queue noqueue_netdev_queue = {
	.qdisc		=	&noqueue_qdisc,
351
	.qdisc_sleeping	=	&noqueue_qdisc,
352 353
};

L
Linus Torvalds 已提交
354 355 356 357 358 359
static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
360 361
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
	.dev_queue	=	&noqueue_netdev_queue,
L
Linus Torvalds 已提交
362 363 364
};


365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381
static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

#define PFIFO_FAST_BANDS 3

static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}

static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
382
{
383
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
384

385 386
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
		qdisc->q.qlen++;
387
		return __qdisc_enqueue_tail(skb, qdisc, list);
388
	}
389 390

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
391 392
}

393
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
394
{
395 396
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);
L
Linus Torvalds 已提交
397

398 399 400 401 402 403
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
			qdisc->q.qlen--;
			return __qdisc_dequeue_head(qdisc, list + prio);
		}
	}
404

L
Linus Torvalds 已提交
405 406 407
	return NULL;
}

408 409 410 411 412 413 414 415 416 417 418 419 420
static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio))
			return skb_peek(list + prio);
	}

	return NULL;
}

421
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
422
{
423 424 425 426 427 428
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		__qdisc_reset_queue(qdisc, list + prio);

429
	qdisc->qstats.backlog = 0;
430
	qdisc->q.qlen = 0;
L
Linus Torvalds 已提交
431 432
}

433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
	return skb->len;

nla_put_failure:
	return -1;
}

static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);

	return 0;
}

static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
	.id		=	"pfifo_fast",
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
461
	.peek		=	pfifo_fast_peek,
462 463 464
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
L
Linus Torvalds 已提交
465 466 467
	.owner		=	THIS_MODULE,
};

468
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
469
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
470 471 472
{
	void *p;
	struct Qdisc *sch;
473 474
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
475 476

	/* ensure that the Qdisc and the private data are 32-byte aligned */
477 478
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
479

480
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
481
	if (!p)
482 483 484
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
485 486 487 488 489 490

	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
491
	sch->dev_queue = dev_queue;
492
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
493
	atomic_set(&sch->refcnt, 1);
494 495 496

	return sch;
errout:
497
	return ERR_PTR(err);
498 499
}

500 501 502
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
503
				 unsigned int parentid)
504 505
{
	struct Qdisc *sch;
506

507
	sch = qdisc_alloc(dev_queue, ops);
508 509
	if (IS_ERR(sch))
		goto errout;
510
	sch->parent = parentid;
511

L
Linus Torvalds 已提交
512 513 514
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

515
	qdisc_destroy(sch);
516
errout:
L
Linus Torvalds 已提交
517 518
	return NULL;
}
519
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
520

521
/* Under qdisc_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
522 523 524

void qdisc_reset(struct Qdisc *qdisc)
{
525
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
526 527 528

	if (ops->reset)
		ops->reset(qdisc);
529 530 531

	kfree_skb(qdisc->gso_skb);
	qdisc->gso_skb = NULL;
L
Linus Torvalds 已提交
532
}
533
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
534

535
void qdisc_destroy(struct Qdisc *qdisc)
L
Linus Torvalds 已提交
536
{
537 538
	const struct Qdisc_ops  *ops = qdisc->ops;

539 540 541 542
	if (qdisc->flags & TCQ_F_BUILTIN ||
	    !atomic_dec_and_test(&qdisc->refcnt))
		return;

543
#ifdef CONFIG_NET_SCHED
544 545
	qdisc_list_del(qdisc);

546
	qdisc_put_stab(qdisc->stab);
547
#endif
548 549 550 551 552 553 554 555 556
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

557
	kfree_skb(qdisc->gso_skb);
L
Linus Torvalds 已提交
558 559
	kfree((char *) qdisc - qdisc->padded);
}
560
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
561

562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
583
					  &pfifo_fast_ops, TC_H_ROOT);
584 585 586 587 588 589 590 591 592 593 594 595 596 597
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
598
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
599 600
	int *need_watchdog_p = _need_watchdog;

601 602 603
	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);

604
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
605
	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
606 607 608
		*need_watchdog_p = 1;
}

L
Linus Torvalds 已提交
609 610
void dev_activate(struct net_device *dev)
{
611
	int need_watchdog;
612

L
Linus Torvalds 已提交
613
	/* No queueing discipline is attached to device;
614 615 616
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
L
Linus Torvalds 已提交
617 618
	 */

619 620
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
621

622 623 624 625
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

626 627
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
628
	transition_one_qdisc(dev, &dev->rx_queue, NULL);
629 630

	if (need_watchdog) {
L
Linus Torvalds 已提交
631 632 633
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
634 635
}

636 637 638
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
639
{
640
	struct Qdisc *qdisc_default = _qdisc_default;
641 642 643
	struct Qdisc *qdisc;

	qdisc = dev_queue->qdisc;
644
	if (qdisc) {
645 646
		spin_lock_bh(qdisc_lock(qdisc));

647 648 649
		if (!(qdisc->flags & TCQ_F_BUILTIN))
			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);

650
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
651
		qdisc_reset(qdisc);
652

653
		spin_unlock_bh(qdisc_lock(qdisc));
654
	}
L
Linus Torvalds 已提交
655 656
}

657
static bool some_qdisc_is_busy(struct net_device *dev)
658 659 660 661 662
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
663
		spinlock_t *root_lock;
664
		struct Qdisc *q;
665 666 667
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
668
		q = dev_queue->qdisc_sleeping;
669
		root_lock = qdisc_lock(q);
670

671
		spin_lock_bh(root_lock);
672

673 674
		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
		       test_bit(__QDISC_STATE_SCHED, &q->state));
675

676
		spin_unlock_bh(root_lock);
677 678 679 680 681 682 683

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
684 685
void dev_deactivate(struct net_device *dev)
{
686
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
687
	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
688

L
Linus Torvalds 已提交
689 690
	dev_watchdog_down(dev);

691
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
692
	synchronize_rcu();
L
Linus Torvalds 已提交
693

694
	/* Wait for outstanding qdisc_run calls. */
695 696
	while (some_qdisc_is_busy(dev))
		yield();
L
Linus Torvalds 已提交
697 698
}

699 700
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
701
				     void *_qdisc)
702
{
703 704
	struct Qdisc *qdisc = _qdisc;

705 706 707 708
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
709 710
void dev_init_scheduler(struct net_device *dev)
{
711
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
712
	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
L
Linus Torvalds 已提交
713

714
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
715 716
}

717 718 719
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
720
{
721
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
722
	struct Qdisc *qdisc_default = _qdisc_default;
723 724

	if (qdisc) {
725
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
726
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
727 728

		qdisc_destroy(qdisc);
729
	}
730 731 732 733
}

void dev_shutdown(struct net_device *dev)
{
734
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
735
	shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
736
	WARN_ON(timer_pending(&dev->watchdog_timer));
L
Linus Torvalds 已提交
737
}