sch_generic.c 17.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * qdisc_lock(qdisc) spinlock.
33 34
 *
 * The idea is the following:
35 36
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
37
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
38 39
 */

40 41 42 43 44
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

45
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46
{
47
	q->gso_skb = skb;
48
	q->qstats.requeues++;
49
	__netif_schedule(q);
50

51 52 53
	return 0;
}

54
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
55
{
56 57
	struct sk_buff *skb = q->gso_skb;

58 59 60 61 62 63
	if (unlikely(skb)) {
		struct net_device *dev = qdisc_dev(q);
		struct netdev_queue *txq;

		/* check the reason of requeuing without tx lock first */
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
64 65 66
		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
			q->gso_skb = NULL;
		else
67 68
			skb = NULL;
	} else {
69
		skb = q->dequeue(q);
70
	}
71 72 73 74

	return skb;
}

75
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
76
					   struct netdev_queue *dev_queue,
77
					   struct Qdisc *q)
78
{
79
	int ret;
80

81
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
82 83 84 85 86 87
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
88
		kfree_skb(skb);
89 90
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
91
			       "fix it urgently!\n", dev_queue->dev->name);
92 93 94 95 96 97 98
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
99
		ret = dev_requeue_skb(skb, q);
100 101
	}

102
	return ret;
103 104
}

105
/*
106
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
107
 *
108
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
109 110
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
111 112 113
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
114
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
115 116 117 118 119 120 121 122 123
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
124
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
125
{
126
	struct netdev_queue *txq;
127
	int ret = NETDEV_TX_BUSY;
128
	struct net_device *dev;
129
	spinlock_t *root_lock;
130
	struct sk_buff *skb;
L
Linus Torvalds 已提交
131

132
	/* Dequeue packet */
133
	if (unlikely((skb = dequeue_skb(q)) == NULL))
134
		return 0;
135

136
	root_lock = qdisc_lock(q);
137 138 139

	/* And release qdisc */
	spin_unlock(root_lock);
140

141 142
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
143

144
	HARD_TX_LOCK(dev, txq, smp_processor_id());
145 146
	if (!netif_tx_queue_stopped(txq) &&
	    !netif_tx_queue_frozen(txq))
147
		ret = dev_hard_start_xmit(skb, dev, txq);
148
	HARD_TX_UNLOCK(dev, txq);
149

150
	spin_lock(root_lock);
151

152 153 154 155 156 157 158 159
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
160
		ret = handle_dev_cpu_collision(skb, txq, q);
161 162 163 164 165 166 167 168
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

169
		ret = dev_requeue_skb(skb, q);
170 171
		break;
	}
172

173 174
	if (ret && (netif_tx_queue_stopped(txq) ||
		    netif_tx_queue_frozen(txq)))
175 176
		ret = 0;

177
	return ret;
L
Linus Torvalds 已提交
178 179
}

180
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
181
{
182 183
	unsigned long start_time = jiffies;

184
	while (qdisc_restart(q)) {
185 186 187 188 189 190
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
191
			__netif_schedule(q);
192
			break;
193 194
		}
	}
H
Herbert Xu 已提交
195

196
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
197 198
}

L
Linus Torvalds 已提交
199 200 201 202
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
203
	netif_tx_lock(dev);
204
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
205 206 207
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
208 209 210 211 212 213 214 215 216 217 218 219
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
220

221 222 223
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
224
				char drivername[64];
225
				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
226
				       dev->name, netdev_drivername(dev, drivername, 64));
L
Linus Torvalds 已提交
227 228
				dev->tx_timeout(dev);
			}
229 230 231
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
232 233 234
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
235
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
236 237 238 239 240 241 242 243 244

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
245 246
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
247 248 249 250 251 252 253 254 255 256 257
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
258
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
259
	if (del_timer(&dev->watchdog_timer))
260
		dev_put(dev);
H
Herbert Xu 已提交
261
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
262 263
}

264 265 266 267 268 269
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
270 271
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
272
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
273
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
274 275 276
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
277
}
278
EXPORT_SYMBOL(netif_carrier_on);
279

280 281 282 283 284 285
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
286 287 288 289 290
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
291
EXPORT_SYMBOL(netif_carrier_off);
292

L
Linus Torvalds 已提交
293 294 295 296 297
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

298
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
299 300 301 302 303
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

304
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
305 306 307 308
{
	return NULL;
}

309
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
310 311
{
	if (net_ratelimit())
312 313
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
314 315 316 317
	kfree_skb(skb);
	return NET_XMIT_CN;
}

318
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
319 320 321 322
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
323
	.peek		=	noop_dequeue,
L
Linus Torvalds 已提交
324 325 326 327
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

328 329
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
330
	.qdisc_sleeping	=	&noop_qdisc,
331 332
};

L
Linus Torvalds 已提交
333 334 335 336
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
337
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
338
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
339
	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
340
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
341
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
342
};
343
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
344

345
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
346 347 348 349
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
350
	.peek		=	noop_dequeue,
L
Linus Torvalds 已提交
351 352 353 354
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

355 356 357
static struct Qdisc noqueue_qdisc;
static struct netdev_queue noqueue_netdev_queue = {
	.qdisc		=	&noqueue_qdisc,
358
	.qdisc_sleeping	=	&noqueue_qdisc,
359 360
};

L
Linus Torvalds 已提交
361 362 363 364 365 366
static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
367
	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
368 369
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
	.dev_queue	=	&noqueue_netdev_queue,
L
Linus Torvalds 已提交
370 371 372
};


373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389
static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

#define PFIFO_FAST_BANDS 3

static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}

static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
390
{
391
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
392

393 394
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
		qdisc->q.qlen++;
395
		return __qdisc_enqueue_tail(skb, qdisc, list);
396
	}
397 398

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
399 400
}

401
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
402
{
403 404
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);
L
Linus Torvalds 已提交
405

406 407 408 409 410 411
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
			qdisc->q.qlen--;
			return __qdisc_dequeue_head(qdisc, list + prio);
		}
	}
412

L
Linus Torvalds 已提交
413 414 415
	return NULL;
}

416 417 418 419 420 421 422 423 424 425 426 427 428
static struct sk_buff *pfifo_fast_peek(struct Qdisc* qdisc)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio))
			return skb_peek(list + prio);
	}

	return NULL;
}

429
static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
430
{
431 432
	qdisc->q.qlen++;
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
L
Linus Torvalds 已提交
433 434
}

435
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
436
{
437 438 439 440 441 442
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		__qdisc_reset_queue(qdisc, list + prio);

443
	qdisc->qstats.backlog = 0;
444
	qdisc->q.qlen = 0;
L
Linus Torvalds 已提交
445 446
}

447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
	return skb->len;

nla_put_failure:
	return -1;
}

static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);

	return 0;
}

static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
	.id		=	"pfifo_fast",
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
475
	.peek		=	pfifo_fast_peek,
476 477 478 479
	.requeue	=	pfifo_fast_requeue,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
L
Linus Torvalds 已提交
480 481 482
	.owner		=	THIS_MODULE,
};

483
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
484
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
485 486 487
{
	void *p;
	struct Qdisc *sch;
488 489
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
490 491

	/* ensure that the Qdisc and the private data are 32-byte aligned */
492 493
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
494

495
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
496
	if (!p)
497 498 499
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
500 501

	INIT_LIST_HEAD(&sch->list);
502
	skb_queue_head_init(&sch->requeue);
L
Linus Torvalds 已提交
503 504 505 506
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
507
	sch->dev_queue = dev_queue;
508
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
509
	atomic_set(&sch->refcnt, 1);
510 511 512

	return sch;
errout:
513
	return ERR_PTR(err);
514 515
}

516 517 518
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
519
				 unsigned int parentid)
520 521
{
	struct Qdisc *sch;
522

523
	sch = qdisc_alloc(dev_queue, ops);
524 525
	if (IS_ERR(sch))
		goto errout;
526
	sch->parent = parentid;
527

L
Linus Torvalds 已提交
528 529 530
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

531
	qdisc_destroy(sch);
532
errout:
L
Linus Torvalds 已提交
533 534
	return NULL;
}
535
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
536

537
/* Under qdisc_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
538 539 540

void qdisc_reset(struct Qdisc *qdisc)
{
541
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
542 543 544

	if (ops->reset)
		ops->reset(qdisc);
545 546 547

	kfree_skb(qdisc->gso_skb);
	qdisc->gso_skb = NULL;
L
Linus Torvalds 已提交
548
}
549
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
550

551
void qdisc_destroy(struct Qdisc *qdisc)
L
Linus Torvalds 已提交
552
{
553 554
	const struct Qdisc_ops  *ops = qdisc->ops;

555 556 557 558
	if (qdisc->flags & TCQ_F_BUILTIN ||
	    !atomic_dec_and_test(&qdisc->refcnt))
		return;

559
#ifdef CONFIG_NET_SCHED
560 561
	qdisc_list_del(qdisc);

562
	qdisc_put_stab(qdisc->stab);
563
#endif
564 565 566 567 568 569 570 571 572
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

573
	kfree_skb(qdisc->gso_skb);
574
	__skb_queue_purge(&qdisc->requeue);
575

L
Linus Torvalds 已提交
576 577
	kfree((char *) qdisc - qdisc->padded);
}
578
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
579

580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
601
					  &pfifo_fast_ops, TC_H_ROOT);
602 603 604 605 606 607 608 609 610 611 612 613 614 615
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
616
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
617 618
	int *need_watchdog_p = _need_watchdog;

619 620 621
	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);

622
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
623
	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
624 625 626
		*need_watchdog_p = 1;
}

L
Linus Torvalds 已提交
627 628
void dev_activate(struct net_device *dev)
{
629
	int need_watchdog;
630

L
Linus Torvalds 已提交
631
	/* No queueing discipline is attached to device;
632 633 634
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
L
Linus Torvalds 已提交
635 636
	 */

637 638
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
639

640 641 642 643
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

644 645
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
646
	transition_one_qdisc(dev, &dev->rx_queue, NULL);
647 648

	if (need_watchdog) {
L
Linus Torvalds 已提交
649 650 651
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
652 653
}

654 655 656
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
657
{
658
	struct Qdisc *qdisc_default = _qdisc_default;
659 660 661
	struct Qdisc *qdisc;

	qdisc = dev_queue->qdisc;
662
	if (qdisc) {
663 664
		spin_lock_bh(qdisc_lock(qdisc));

665 666 667
		if (!(qdisc->flags & TCQ_F_BUILTIN))
			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);

668
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
669
		qdisc_reset(qdisc);
670

671
		spin_unlock_bh(qdisc_lock(qdisc));
672
	}
L
Linus Torvalds 已提交
673 674
}

675
static bool some_qdisc_is_busy(struct net_device *dev)
676 677 678 679 680
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
681
		spinlock_t *root_lock;
682
		struct Qdisc *q;
683 684 685
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
686
		q = dev_queue->qdisc_sleeping;
687
		root_lock = qdisc_lock(q);
688

689
		spin_lock_bh(root_lock);
690

691 692
		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
		       test_bit(__QDISC_STATE_SCHED, &q->state));
693

694
		spin_unlock_bh(root_lock);
695 696 697 698 699 700 701

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
702 703
void dev_deactivate(struct net_device *dev)
{
704
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
705
	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
706

L
Linus Torvalds 已提交
707 708
	dev_watchdog_down(dev);

709
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
710
	synchronize_rcu();
L
Linus Torvalds 已提交
711

712
	/* Wait for outstanding qdisc_run calls. */
713 714
	while (some_qdisc_is_busy(dev))
		yield();
L
Linus Torvalds 已提交
715 716
}

717 718
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
719
				     void *_qdisc)
720
{
721 722
	struct Qdisc *qdisc = _qdisc;

723 724 725 726
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
727 728
void dev_init_scheduler(struct net_device *dev)
{
729
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
730
	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
L
Linus Torvalds 已提交
731

732
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
733 734
}

735 736 737
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
738
{
739
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
740
	struct Qdisc *qdisc_default = _qdisc_default;
741 742

	if (qdisc) {
743
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
744
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
745 746

		qdisc_destroy(qdisc);
747
	}
748 749 750 751
}

void dev_shutdown(struct net_device *dev)
{
752
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
753
	shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
754
	WARN_ON(timer_pending(&dev->watchdog_timer));
L
Linus Torvalds 已提交
755
}