sch_generic.c 17.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * queue->lock spinlock.
33 34 35
 *
 * The idea is the following:
 * - enqueue, dequeue are serialized via top level device
36
 *   spinlock queue->lock.
37
 * - ingress filtering is serialized via top level device
38
 *   spinlock dev->rx_queue.lock.
39
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
40 41 42
 */

void qdisc_lock_tree(struct net_device *dev)
43
	__acquires(dev->rx_queue.lock)
L
Linus Torvalds 已提交
44
{
45 46 47 48 49 50 51
	unsigned int i;

	local_bh_disable();
	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
		spin_lock(&txq->lock);
	}
52
	spin_lock(&dev->rx_queue.lock);
L
Linus Torvalds 已提交
53
}
54
EXPORT_SYMBOL(qdisc_lock_tree);
L
Linus Torvalds 已提交
55 56

void qdisc_unlock_tree(struct net_device *dev)
57
	__releases(dev->rx_queue.lock)
L
Linus Torvalds 已提交
58
{
59 60
	unsigned int i;

61
	spin_unlock(&dev->rx_queue.lock);
62 63 64 65 66
	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
		spin_unlock(&txq->lock);
	}
	local_bh_enable();
L
Linus Torvalds 已提交
67
}
68
EXPORT_SYMBOL(qdisc_unlock_tree);
L
Linus Torvalds 已提交
69

70 71 72 73 74
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

75
static inline int dev_requeue_skb(struct sk_buff *skb,
76
				  struct netdev_queue *dev_queue,
77
				  struct Qdisc *q)
78 79
{
	if (unlikely(skb->next))
80
		dev_queue->gso_skb = skb;
81 82
	else
		q->ops->requeue(skb, q);
83

84
	netif_schedule_queue(dev_queue);
85 86 87
	return 0;
}

88 89
static inline struct sk_buff *dequeue_skb(struct netdev_queue *dev_queue,
					  struct Qdisc *q)
90
{
91
	struct sk_buff *skb;
92

93 94
	if ((skb = dev_queue->gso_skb))
		dev_queue->gso_skb = NULL;
95 96 97 98 99 100
	else
		skb = q->dequeue(q);

	return skb;
}

101
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
102
					   struct netdev_queue *dev_queue,
103
					   struct Qdisc *q)
104
{
105
	int ret;
106

107
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
108 109 110 111 112 113
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
114
		kfree_skb(skb);
115 116
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
117
			       "fix it urgently!\n", dev_queue->dev->name);
118 119 120 121 122 123 124
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
125
		ret = dev_requeue_skb(skb, dev_queue, q);
126 127
	}

128
	return ret;
129 130
}

131
/*
132
 * NOTE: Called under queue->lock with locally disabled BH.
133
 *
134 135 136
 * __QUEUE_STATE_QDISC_RUNNING guarantees only one CPU can process
 * this queue at a time. queue->lock serializes queue accesses for
 * this queue AND txq->qdisc pointer itself.
137 138 139
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
140
 *  queue->lock and netif_tx_lock are mutually exclusive,
141 142 143 144 145 146 147 148 149
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
150
static inline int qdisc_restart(struct netdev_queue *txq)
L
Linus Torvalds 已提交
151
{
152
	struct Qdisc *q = txq->qdisc;
153
	int ret = NETDEV_TX_BUSY;
154 155
	struct net_device *dev;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
156

157
	/* Dequeue packet */
158
	if (unlikely((skb = dequeue_skb(txq, q)) == NULL))
159
		return 0;
160

161 162

	/* And release queue */
163
	spin_unlock(&txq->lock);
164

165 166
	dev = txq->dev;

167
	HARD_TX_LOCK(dev, txq, smp_processor_id());
168 169
	if (!netif_subqueue_stopped(dev, skb))
		ret = dev_hard_start_xmit(skb, dev);
170
	HARD_TX_UNLOCK(dev, txq);
171

172 173
	spin_lock(&txq->lock);
	q = txq->qdisc;
174

175 176 177 178 179 180 181 182
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
183
		ret = handle_dev_cpu_collision(skb, txq, q);
184 185 186 187 188 189 190 191
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

192
		ret = dev_requeue_skb(skb, txq, q);
193 194
		break;
	}
195

196
	return ret;
L
Linus Torvalds 已提交
197 198
}

199
void __qdisc_run(struct netdev_queue *txq)
H
Herbert Xu 已提交
200
{
201
	struct net_device *dev = txq->dev;
202 203
	unsigned long start_time = jiffies;

204
	while (qdisc_restart(txq)) {
205 206 207 208 209 210 211 212 213
		if (netif_queue_stopped(dev))
			break;

		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
214
			netif_schedule_queue(txq);
215
			break;
216 217
		}
	}
H
Herbert Xu 已提交
218

219
	clear_bit(__QUEUE_STATE_QDISC_RUNNING, &txq->state);
H
Herbert Xu 已提交
220 221
}

L
Linus Torvalds 已提交
222 223 224 225
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
226
	netif_tx_lock(dev);
227
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
228 229 230
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
231 232 233 234 235 236 237 238 239 240 241 242
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
243

244 245 246 247 248
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
				printk(KERN_INFO "NETDEV WATCHDOG: %s: "
				       "transmit timed out\n",
249
				       dev->name);
L
Linus Torvalds 已提交
250
				dev->tx_timeout(dev);
251
				WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
252
			}
253 254 255
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
256 257 258
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
259
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
260 261 262 263 264 265 266 267 268

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
269 270
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
271 272 273 274 275 276 277 278 279 280 281
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
282
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
283
	if (del_timer(&dev->watchdog_timer))
284
		dev_put(dev);
H
Herbert Xu 已提交
285
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
286 287
}

288 289 290 291 292 293
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
294 295
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
296
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
297
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
298 299 300
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
301
}
302
EXPORT_SYMBOL(netif_carrier_on);
303

304 305 306 307 308 309
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
310 311 312 313 314
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
315
EXPORT_SYMBOL(netif_carrier_off);
316

L
Linus Torvalds 已提交
317 318 319 320 321
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

322
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
323 324 325 326 327
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

328
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
329 330 331 332
{
	return NULL;
}

333
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
334 335
{
	if (net_ratelimit())
336 337
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
338 339 340 341
	kfree_skb(skb);
	return NET_XMIT_CN;
}

342
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
343 344 345 346 347 348 349 350 351 352 353 354
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
355
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
356 357
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
};
358
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
359

360
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
};


static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

385 386
#define PFIFO_FAST_BANDS 3

387 388
static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
L
Linus Torvalds 已提交
389 390
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
391 392
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}
L
Linus Torvalds 已提交
393

394
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
395 396
{
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
397

398
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
L
Linus Torvalds 已提交
399
		qdisc->q.qlen++;
400
		return __qdisc_enqueue_tail(skb, qdisc, list);
L
Linus Torvalds 已提交
401
	}
402 403

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
404 405
}

406
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
407 408 409 410
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

411 412
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
L
Linus Torvalds 已提交
413
			qdisc->q.qlen--;
414
			return __qdisc_dequeue_head(qdisc, list + prio);
L
Linus Torvalds 已提交
415 416
		}
	}
417

L
Linus Torvalds 已提交
418 419 420
	return NULL;
}

421
static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
422 423
{
	qdisc->q.qlen++;
424
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
L
Linus Torvalds 已提交
425 426
}

427
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
428 429 430 431
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

432
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
433 434 435
		__qdisc_reset_queue(qdisc, list + prio);

	qdisc->qstats.backlog = 0;
L
Linus Torvalds 已提交
436 437 438 439 440
	qdisc->q.qlen = 0;
}

static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
441
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
L
Linus Torvalds 已提交
442 443

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
444
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
L
Linus Torvalds 已提交
445 446
	return skb->len;

447
nla_put_failure:
L
Linus Torvalds 已提交
448 449 450
	return -1;
}

451
static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
L
Linus Torvalds 已提交
452
{
453
	int prio;
L
Linus Torvalds 已提交
454 455
	struct sk_buff_head *list = qdisc_priv(qdisc);

456 457
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);
L
Linus Torvalds 已提交
458 459 460 461

	return 0;
}

462
static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
L
Linus Torvalds 已提交
463
	.id		=	"pfifo_fast",
464
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
L
Linus Torvalds 已提交
465 466 467 468 469 470 471 472 473
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
	.requeue	=	pfifo_fast_requeue,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
	.owner		=	THIS_MODULE,
};

474
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
475
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
476 477 478
{
	void *p;
	struct Qdisc *sch;
479 480
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
481 482

	/* ensure that the Qdisc and the private data are 32-byte aligned */
483 484
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
485

486
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
487
	if (!p)
488 489 490
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
491 492 493 494 495 496

	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
497
	sch->dev_queue = dev_queue;
498
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
499
	atomic_set(&sch->refcnt, 1);
500 501 502

	return sch;
errout:
503
	return ERR_PTR(err);
504 505
}

506 507 508
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
509
				 unsigned int parentid)
510 511
{
	struct Qdisc *sch;
512

513
	sch = qdisc_alloc(dev_queue, ops);
514 515
	if (IS_ERR(sch))
		goto errout;
516
	sch->parent = parentid;
517

L
Linus Torvalds 已提交
518 519 520
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

521
	qdisc_destroy(sch);
522
errout:
L
Linus Torvalds 已提交
523 524
	return NULL;
}
525
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
526

527
/* Under queue->lock and BH! */
L
Linus Torvalds 已提交
528 529 530

void qdisc_reset(struct Qdisc *qdisc)
{
531
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
532 533 534 535

	if (ops->reset)
		ops->reset(qdisc);
}
536
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
537

538
/* this is the rcu callback function to clean up a qdisc when there
L
Linus Torvalds 已提交
539 540 541 542 543 544 545 546
 * are no further references to it */

static void __qdisc_destroy(struct rcu_head *head)
{
	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
	kfree((char *) qdisc - qdisc->padded);
}

547
/* Under queue->lock and BH! */
L
Linus Torvalds 已提交
548 549 550

void qdisc_destroy(struct Qdisc *qdisc)
{
551
	const struct Qdisc_ops  *ops = qdisc->ops;
L
Linus Torvalds 已提交
552 553

	if (qdisc->flags & TCQ_F_BUILTIN ||
554
	    !atomic_dec_and_test(&qdisc->refcnt))
L
Linus Torvalds 已提交
555 556
		return;

557 558 559 560 561 562
	list_del(&qdisc->list);
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);
L
Linus Torvalds 已提交
563

564
	module_put(ops->owner);
565
	dev_put(qdisc_dev(qdisc));
L
Linus Torvalds 已提交
566 567
	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
}
568
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
569

570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
					  &pfifo_fast_ops, TC_H_ROOT);
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
		list_add_tail(&qdisc->list, &dev_queue->qdisc_list);
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
	int *need_watchdog_p = _need_watchdog;

	spin_lock_bh(&dev_queue->lock);
	rcu_assign_pointer(dev_queue->qdisc, dev_queue->qdisc_sleeping);
	if (dev_queue->qdisc != &noqueue_qdisc)
		*need_watchdog_p = 1;
	spin_unlock_bh(&dev_queue->lock);
}

L
Linus Torvalds 已提交
616 617
void dev_activate(struct net_device *dev)
{
618
	int need_watchdog;
619

L
Linus Torvalds 已提交
620 621 622 623 624 625
	/* No queueing discipline is attached to device;
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
	 */

626 627
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
628

629 630 631 632
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

633 634 635 636
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);

	if (need_watchdog) {
L
Linus Torvalds 已提交
637 638 639
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
640 641
}

642 643 644
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
645
{
646
	struct Qdisc *qdisc_default = _qdisc_default;
647 648 649 650
	struct Qdisc *qdisc;
	struct sk_buff *skb;

	spin_lock_bh(&dev_queue->lock);
651

652
	qdisc = dev_queue->qdisc;
653 654 655 656
	if (qdisc) {
		dev_queue->qdisc = qdisc_default;
		qdisc_reset(qdisc);
	}
657 658 659 660 661 662
	skb = dev_queue->gso_skb;
	dev_queue->gso_skb = NULL;

	spin_unlock_bh(&dev_queue->lock);

	kfree_skb(skb);
L
Linus Torvalds 已提交
663 664
}

665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688
static bool some_qdisc_is_running(struct net_device *dev, int lock)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);

		if (lock)
			spin_lock_bh(&dev_queue->lock);

		val = test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state);

		if (lock)
			spin_unlock_bh(&dev_queue->lock);

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
689 690
void dev_deactivate(struct net_device *dev)
{
691
	bool running;
L
Linus Torvalds 已提交
692

693
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
694

L
Linus Torvalds 已提交
695 696
	dev_watchdog_down(dev);

697
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
698
	synchronize_rcu();
L
Linus Torvalds 已提交
699

700
	/* Wait for outstanding qdisc_run calls. */
701
	do {
702
		while (some_qdisc_is_running(dev, 0))
703 704 705 706 707 708
			yield();

		/*
		 * Double-check inside queue lock to ensure that all effects
		 * of the queue run are visible when we return.
		 */
709
		running = some_qdisc_is_running(dev, 1);
710 711 712 713 714 715 716 717 718 719

		/*
		 * The running flag should never be set at this point because
		 * we've already set dev->qdisc to noop_qdisc *inside* the same
		 * pair of spin locks.  That is, if any qdisc_run starts after
		 * our initial test it should see the noop_qdisc and then
		 * clear the RUNNING bit before dropping the queue lock.  So
		 * if it is set here then we've found a bug.
		 */
	} while (WARN_ON_ONCE(running));
L
Linus Torvalds 已提交
720 721
}

722 723
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
724
				     void *_qdisc)
725
{
726 727
	struct Qdisc *qdisc = _qdisc;

728 729 730 731 732
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
	INIT_LIST_HEAD(&dev_queue->qdisc_list);
}

L
Linus Torvalds 已提交
733 734 735
void dev_init_scheduler(struct net_device *dev)
{
	qdisc_lock_tree(dev);
736
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
737
	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
738 739
	qdisc_unlock_tree(dev);

740
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
741 742
}

743 744 745
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
746
{
747
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
748
	struct Qdisc *qdisc_default = _qdisc_default;
749 750 751 752

	if (qdisc) {
		dev_queue->qdisc = qdisc_default;
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
753 754

		qdisc_destroy(qdisc);
755
	}
756 757 758 759 760
}

void dev_shutdown(struct net_device *dev)
{
	qdisc_lock_tree(dev);
761 762
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
763 764 765
	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
	qdisc_unlock_tree(dev);
}