sch_generic.c 17.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * queue->lock spinlock.
33 34 35
 *
 * The idea is the following:
 * - enqueue, dequeue are serialized via top level device
36
 *   spinlock queue->lock.
37
 * - ingress filtering is serialized via top level device
38
 *   spinlock dev->rx_queue.lock.
39
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
40 41 42
 */

void qdisc_lock_tree(struct net_device *dev)
43
	__acquires(dev->rx_queue.lock)
L
Linus Torvalds 已提交
44
{
45 46 47 48 49 50 51
	unsigned int i;

	local_bh_disable();
	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
		spin_lock(&txq->lock);
	}
52
	spin_lock(&dev->rx_queue.lock);
L
Linus Torvalds 已提交
53
}
54
EXPORT_SYMBOL(qdisc_lock_tree);
L
Linus Torvalds 已提交
55 56

void qdisc_unlock_tree(struct net_device *dev)
57
	__releases(dev->rx_queue.lock)
L
Linus Torvalds 已提交
58
{
59 60
	unsigned int i;

61
	spin_unlock(&dev->rx_queue.lock);
62 63 64 65 66
	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
		spin_unlock(&txq->lock);
	}
	local_bh_enable();
L
Linus Torvalds 已提交
67
}
68
EXPORT_SYMBOL(qdisc_unlock_tree);
L
Linus Torvalds 已提交
69

70 71 72 73 74
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

75
static inline int dev_requeue_skb(struct sk_buff *skb,
76
				  struct netdev_queue *dev_queue,
77
				  struct Qdisc *q)
78 79
{
	if (unlikely(skb->next))
80
		q->gso_skb = skb;
81 82
	else
		q->ops->requeue(skb, q);
83

84
	netif_schedule_queue(dev_queue);
85 86 87
	return 0;
}

88
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
89
{
90
	struct sk_buff *skb;
91

92 93
	if ((skb = q->gso_skb))
		q->gso_skb = NULL;
94 95 96 97 98 99
	else
		skb = q->dequeue(q);

	return skb;
}

100
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
101
					   struct netdev_queue *dev_queue,
102
					   struct Qdisc *q)
103
{
104
	int ret;
105

106
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
107 108 109 110 111 112
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
113
		kfree_skb(skb);
114 115
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
116
			       "fix it urgently!\n", dev_queue->dev->name);
117 118 119 120 121 122 123
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
124
		ret = dev_requeue_skb(skb, dev_queue, q);
125 126
	}

127
	return ret;
128 129
}

130
/*
131
 * NOTE: Called under queue->lock with locally disabled BH.
132
 *
133 134
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
 * this qdisc at a time. queue->lock serializes queue accesses for
135
 * this queue AND txq->qdisc pointer itself.
136 137 138
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
139
 *  queue->lock and netif_tx_lock are mutually exclusive,
140 141 142 143 144 145 146 147 148
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
149 150
static inline int qdisc_restart(struct netdev_queue *txq,
				struct Qdisc *q)
L
Linus Torvalds 已提交
151
{
152
	int ret = NETDEV_TX_BUSY;
153
	struct net_device *dev;
154
	spinlock_t *root_lock;
155
	struct sk_buff *skb;
L
Linus Torvalds 已提交
156

157
	/* Dequeue packet */
158
	if (unlikely((skb = dequeue_skb(q)) == NULL))
159
		return 0;
160

161 162 163 164
	root_lock = qdisc_root_lock(q);

	/* And release qdisc */
	spin_unlock(root_lock);
165

166 167
	dev = txq->dev;

168
	HARD_TX_LOCK(dev, txq, smp_processor_id());
169
	if (!netif_subqueue_stopped(dev, skb))
170
		ret = dev_hard_start_xmit(skb, dev, txq);
171
	HARD_TX_UNLOCK(dev, txq);
172

173
	spin_lock(root_lock);
174

175 176 177 178 179 180 181 182
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
183
		ret = handle_dev_cpu_collision(skb, txq, q);
184 185 186 187 188 189 190 191
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

192
		ret = dev_requeue_skb(skb, txq, q);
193 194
		break;
	}
195

196
	return ret;
L
Linus Torvalds 已提交
197 198
}

199
void __qdisc_run(struct netdev_queue *txq)
H
Herbert Xu 已提交
200
{
201
	unsigned long start_time = jiffies;
202
	struct Qdisc *q = txq->qdisc;
203

204
	while (qdisc_restart(txq, q)) {
205
		if (netif_tx_queue_stopped(txq))
206 207 208 209 210 211 212 213
			break;

		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
214
			netif_schedule_queue(txq);
215
			break;
216 217
		}
	}
H
Herbert Xu 已提交
218

219
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
220 221
}

L
Linus Torvalds 已提交
222 223 224 225
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
226
	netif_tx_lock(dev);
227
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
228 229 230
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
231 232 233 234 235 236 237 238 239 240 241 242
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
243

244 245 246 247 248
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
				printk(KERN_INFO "NETDEV WATCHDOG: %s: "
				       "transmit timed out\n",
249
				       dev->name);
L
Linus Torvalds 已提交
250
				dev->tx_timeout(dev);
251
				WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
252
			}
253 254 255
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
256 257 258
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
259
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
260 261 262 263 264 265 266 267 268

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
269 270
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
271 272 273 274 275 276 277 278 279 280 281
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
282
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
283
	if (del_timer(&dev->watchdog_timer))
284
		dev_put(dev);
H
Herbert Xu 已提交
285
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
286 287
}

288 289 290 291 292 293
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
294 295
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
296
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
297
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
298 299 300
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
301
}
302
EXPORT_SYMBOL(netif_carrier_on);
303

304 305 306 307 308 309
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
310 311 312 313 314
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
315
EXPORT_SYMBOL(netif_carrier_off);
316

L
Linus Torvalds 已提交
317 318 319 320 321
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

322
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
323 324 325 326 327
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

328
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
329 330 331 332
{
	return NULL;
}

333
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
334 335
{
	if (net_ratelimit())
336 337
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
338 339 340 341
	kfree_skb(skb);
	return NET_XMIT_CN;
}

342
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
343 344 345 346 347 348 349 350
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

351 352 353 354 355
static struct netdev_queue noop_netdev_queue = {
	.lock		=	__SPIN_LOCK_UNLOCKED(noop_netdev_queue.lock),
	.qdisc		=	&noop_qdisc,
};

L
Linus Torvalds 已提交
356 357 358 359
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
360
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
361
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
362
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
363
};
364
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
365

366
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
};


static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

391 392
#define PFIFO_FAST_BANDS 3

393 394
static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
L
Linus Torvalds 已提交
395 396
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
397 398
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}
L
Linus Torvalds 已提交
399

400
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
401 402
{
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
403

404
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
L
Linus Torvalds 已提交
405
		qdisc->q.qlen++;
406
		return __qdisc_enqueue_tail(skb, qdisc, list);
L
Linus Torvalds 已提交
407
	}
408 409

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
410 411
}

412
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
413 414 415 416
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

417 418
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
L
Linus Torvalds 已提交
419
			qdisc->q.qlen--;
420
			return __qdisc_dequeue_head(qdisc, list + prio);
L
Linus Torvalds 已提交
421 422
		}
	}
423

L
Linus Torvalds 已提交
424 425 426
	return NULL;
}

427
static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
428 429
{
	qdisc->q.qlen++;
430
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
L
Linus Torvalds 已提交
431 432
}

433
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
434 435 436 437
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

438
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
439 440 441
		__qdisc_reset_queue(qdisc, list + prio);

	qdisc->qstats.backlog = 0;
L
Linus Torvalds 已提交
442 443 444 445 446
	qdisc->q.qlen = 0;
}

static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
447
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
L
Linus Torvalds 已提交
448 449

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
450
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
L
Linus Torvalds 已提交
451 452
	return skb->len;

453
nla_put_failure:
L
Linus Torvalds 已提交
454 455 456
	return -1;
}

457
static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
L
Linus Torvalds 已提交
458
{
459
	int prio;
L
Linus Torvalds 已提交
460 461
	struct sk_buff_head *list = qdisc_priv(qdisc);

462 463
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);
L
Linus Torvalds 已提交
464 465 466 467

	return 0;
}

468
static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
L
Linus Torvalds 已提交
469
	.id		=	"pfifo_fast",
470
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
L
Linus Torvalds 已提交
471 472 473 474 475 476 477 478 479
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
	.requeue	=	pfifo_fast_requeue,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
	.owner		=	THIS_MODULE,
};

480
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
481
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
482 483 484
{
	void *p;
	struct Qdisc *sch;
485 486
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
487 488

	/* ensure that the Qdisc and the private data are 32-byte aligned */
489 490
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
491

492
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
493
	if (!p)
494 495 496
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
497 498 499 500 501 502

	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
503
	sch->dev_queue = dev_queue;
504
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
505
	atomic_set(&sch->refcnt, 1);
506 507 508

	return sch;
errout:
509
	return ERR_PTR(err);
510 511
}

512 513 514
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
515
				 unsigned int parentid)
516 517
{
	struct Qdisc *sch;
518

519
	sch = qdisc_alloc(dev_queue, ops);
520 521
	if (IS_ERR(sch))
		goto errout;
522
	sch->parent = parentid;
523

L
Linus Torvalds 已提交
524 525 526
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

527
	qdisc_destroy(sch);
528
errout:
L
Linus Torvalds 已提交
529 530
	return NULL;
}
531
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
532

533
/* Under queue->lock and BH! */
L
Linus Torvalds 已提交
534 535 536

void qdisc_reset(struct Qdisc *qdisc)
{
537
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
538 539 540 541

	if (ops->reset)
		ops->reset(qdisc);
}
542
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
543

544
/* this is the rcu callback function to clean up a qdisc when there
L
Linus Torvalds 已提交
545 546 547 548 549 550 551 552
 * are no further references to it */

static void __qdisc_destroy(struct rcu_head *head)
{
	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
	kfree((char *) qdisc - qdisc->padded);
}

553
/* Under queue->lock and BH! */
L
Linus Torvalds 已提交
554 555 556

void qdisc_destroy(struct Qdisc *qdisc)
{
557
	const struct Qdisc_ops  *ops = qdisc->ops;
L
Linus Torvalds 已提交
558 559

	if (qdisc->flags & TCQ_F_BUILTIN ||
560
	    !atomic_dec_and_test(&qdisc->refcnt))
L
Linus Torvalds 已提交
561 562
		return;

563 564 565 566 567 568
	list_del(&qdisc->list);
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);
L
Linus Torvalds 已提交
569

570
	module_put(ops->owner);
571
	dev_put(qdisc_dev(qdisc));
L
Linus Torvalds 已提交
572 573
	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
}
574
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
575

576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
					  &pfifo_fast_ops, TC_H_ROOT);
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
		list_add_tail(&qdisc->list, &dev_queue->qdisc_list);
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
	int *need_watchdog_p = _need_watchdog;

	spin_lock_bh(&dev_queue->lock);
	rcu_assign_pointer(dev_queue->qdisc, dev_queue->qdisc_sleeping);
	if (dev_queue->qdisc != &noqueue_qdisc)
		*need_watchdog_p = 1;
	spin_unlock_bh(&dev_queue->lock);
}

L
Linus Torvalds 已提交
622 623
void dev_activate(struct net_device *dev)
{
624
	int need_watchdog;
625

L
Linus Torvalds 已提交
626 627 628 629 630 631
	/* No queueing discipline is attached to device;
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
	 */

632 633
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
634

635 636 637 638
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

639 640 641 642
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);

	if (need_watchdog) {
L
Linus Torvalds 已提交
643 644 645
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
646 647
}

648 649 650
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
651
{
652
	struct Qdisc *qdisc_default = _qdisc_default;
653
	struct sk_buff *skb = NULL;
654 655 656
	struct Qdisc *qdisc;

	spin_lock_bh(&dev_queue->lock);
657

658
	qdisc = dev_queue->qdisc;
659 660 661
	if (qdisc) {
		dev_queue->qdisc = qdisc_default;
		qdisc_reset(qdisc);
662 663 664

		skb = qdisc->gso_skb;
		qdisc->gso_skb = NULL;
665
	}
666 667 668 669

	spin_unlock_bh(&dev_queue->lock);

	kfree_skb(skb);
L
Linus Torvalds 已提交
670 671
}

672 673 674 675 676 677
static bool some_qdisc_is_running(struct net_device *dev, int lock)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
678
		spinlock_t *root_lock;
679
		struct Qdisc *q;
680 681 682
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
683
		q = dev_queue->qdisc;
684
		root_lock = qdisc_root_lock(q);
685 686

		if (lock)
687
			spin_lock_bh(root_lock);
688

689
		val = test_bit(__QDISC_STATE_RUNNING, &q->state);
690 691

		if (lock)
692
			spin_unlock_bh(root_lock);
693 694 695 696 697 698 699

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
700 701
void dev_deactivate(struct net_device *dev)
{
702
	bool running;
L
Linus Torvalds 已提交
703

704
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
705

L
Linus Torvalds 已提交
706 707
	dev_watchdog_down(dev);

708
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
709
	synchronize_rcu();
L
Linus Torvalds 已提交
710

711
	/* Wait for outstanding qdisc_run calls. */
712
	do {
713
		while (some_qdisc_is_running(dev, 0))
714 715 716 717 718 719
			yield();

		/*
		 * Double-check inside queue lock to ensure that all effects
		 * of the queue run are visible when we return.
		 */
720
		running = some_qdisc_is_running(dev, 1);
721 722 723 724 725 726 727 728 729 730

		/*
		 * The running flag should never be set at this point because
		 * we've already set dev->qdisc to noop_qdisc *inside* the same
		 * pair of spin locks.  That is, if any qdisc_run starts after
		 * our initial test it should see the noop_qdisc and then
		 * clear the RUNNING bit before dropping the queue lock.  So
		 * if it is set here then we've found a bug.
		 */
	} while (WARN_ON_ONCE(running));
L
Linus Torvalds 已提交
731 732
}

733 734
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
735
				     void *_qdisc)
736
{
737 738
	struct Qdisc *qdisc = _qdisc;

739 740 741 742 743
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
	INIT_LIST_HEAD(&dev_queue->qdisc_list);
}

L
Linus Torvalds 已提交
744 745 746
void dev_init_scheduler(struct net_device *dev)
{
	qdisc_lock_tree(dev);
747
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
748
	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
749 750
	qdisc_unlock_tree(dev);

751
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
752 753
}

754 755 756
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
757
{
758
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
759
	struct Qdisc *qdisc_default = _qdisc_default;
760 761 762 763

	if (qdisc) {
		dev_queue->qdisc = qdisc_default;
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
764 765

		qdisc_destroy(qdisc);
766
	}
767 768 769 770 771
}

void dev_shutdown(struct net_device *dev)
{
	qdisc_lock_tree(dev);
772 773
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
774 775 776
	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
	qdisc_unlock_tree(dev);
}