sch_generic.c 17.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * queue->lock spinlock.
33 34 35
 *
 * The idea is the following:
 * - enqueue, dequeue are serialized via top level device
36
 *   spinlock queue->lock.
37
 * - ingress filtering is serialized via top level device
38
 *   spinlock dev->rx_queue.lock.
39
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
40 41 42
 */

void qdisc_lock_tree(struct net_device *dev)
43
	__acquires(dev->rx_queue.lock)
L
Linus Torvalds 已提交
44
{
45 46 47 48 49 50 51
	unsigned int i;

	local_bh_disable();
	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
		spin_lock(&txq->lock);
	}
52
	spin_lock(&dev->rx_queue.lock);
L
Linus Torvalds 已提交
53
}
54
EXPORT_SYMBOL(qdisc_lock_tree);
L
Linus Torvalds 已提交
55 56

void qdisc_unlock_tree(struct net_device *dev)
57
	__releases(dev->rx_queue.lock)
L
Linus Torvalds 已提交
58
{
59 60
	unsigned int i;

61
	spin_unlock(&dev->rx_queue.lock);
62 63 64 65 66
	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
		spin_unlock(&txq->lock);
	}
	local_bh_enable();
L
Linus Torvalds 已提交
67
}
68
EXPORT_SYMBOL(qdisc_unlock_tree);
L
Linus Torvalds 已提交
69

70 71 72 73 74
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

75
static inline int dev_requeue_skb(struct sk_buff *skb,
76
				  struct netdev_queue *dev_queue,
77
				  struct Qdisc *q)
78 79
{
	if (unlikely(skb->next))
80
		dev_queue->gso_skb = skb;
81 82
	else
		q->ops->requeue(skb, q);
83

84
	netif_schedule_queue(dev_queue);
85 86 87
	return 0;
}

88 89
static inline struct sk_buff *dequeue_skb(struct netdev_queue *dev_queue,
					  struct Qdisc *q)
90
{
91
	struct sk_buff *skb;
92

93 94
	if ((skb = dev_queue->gso_skb))
		dev_queue->gso_skb = NULL;
95 96 97 98 99 100
	else
		skb = q->dequeue(q);

	return skb;
}

101
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
102
					   struct netdev_queue *dev_queue,
103
					   struct Qdisc *q)
104
{
105
	int ret;
106

107
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
108 109 110 111 112 113
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
114
		kfree_skb(skb);
115 116
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
117
			       "fix it urgently!\n", dev_queue->dev->name);
118 119 120 121 122 123 124
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
125
		ret = dev_requeue_skb(skb, dev_queue, q);
126 127
	}

128
	return ret;
129 130
}

131
/*
132
 * NOTE: Called under queue->lock with locally disabled BH.
133
 *
134 135 136
 * __QUEUE_STATE_QDISC_RUNNING guarantees only one CPU can process
 * this queue at a time. queue->lock serializes queue accesses for
 * this queue AND txq->qdisc pointer itself.
137 138 139
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
140
 *  queue->lock and netif_tx_lock are mutually exclusive,
141 142 143 144 145 146 147 148 149
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
150
static inline int qdisc_restart(struct netdev_queue *txq)
L
Linus Torvalds 已提交
151
{
152
	struct Qdisc *q = txq->qdisc;
153
	int ret = NETDEV_TX_BUSY;
154 155
	struct net_device *dev;
	struct sk_buff *skb;
L
Linus Torvalds 已提交
156

157
	/* Dequeue packet */
158
	if (unlikely((skb = dequeue_skb(txq, q)) == NULL))
159
		return 0;
160

161 162

	/* And release queue */
163
	spin_unlock(&txq->lock);
164

165 166
	dev = txq->dev;

167
	HARD_TX_LOCK(dev, txq, smp_processor_id());
168
	if (!netif_subqueue_stopped(dev, skb))
169
		ret = dev_hard_start_xmit(skb, dev, txq);
170
	HARD_TX_UNLOCK(dev, txq);
171

172 173
	spin_lock(&txq->lock);
	q = txq->qdisc;
174

175 176 177 178 179 180 181 182
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
183
		ret = handle_dev_cpu_collision(skb, txq, q);
184 185 186 187 188 189 190 191
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

192
		ret = dev_requeue_skb(skb, txq, q);
193 194
		break;
	}
195

196
	return ret;
L
Linus Torvalds 已提交
197 198
}

199
void __qdisc_run(struct netdev_queue *txq)
H
Herbert Xu 已提交
200
{
201 202
	unsigned long start_time = jiffies;

203
	while (qdisc_restart(txq)) {
204
		if (netif_tx_queue_stopped(txq))
205 206 207 208 209 210 211 212
			break;

		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
213
			netif_schedule_queue(txq);
214
			break;
215 216
		}
	}
H
Herbert Xu 已提交
217

218
	clear_bit(__QUEUE_STATE_QDISC_RUNNING, &txq->state);
H
Herbert Xu 已提交
219 220
}

L
Linus Torvalds 已提交
221 222 223 224
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
225
	netif_tx_lock(dev);
226
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
227 228 229
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
230 231 232 233 234 235 236 237 238 239 240 241
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
242

243 244 245 246 247
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
				printk(KERN_INFO "NETDEV WATCHDOG: %s: "
				       "transmit timed out\n",
248
				       dev->name);
L
Linus Torvalds 已提交
249
				dev->tx_timeout(dev);
250
				WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
251
			}
252 253 254
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
255 256 257
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
258
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
259 260 261 262 263 264 265 266 267

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
268 269
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
270 271 272 273 274 275 276 277 278 279 280
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
281
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
282
	if (del_timer(&dev->watchdog_timer))
283
		dev_put(dev);
H
Herbert Xu 已提交
284
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
285 286
}

287 288 289 290 291 292
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
293 294
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
295
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
296
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
297 298 299
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
300
}
301
EXPORT_SYMBOL(netif_carrier_on);
302

303 304 305 306 307 308
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
309 310 311 312 313
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
314
EXPORT_SYMBOL(netif_carrier_off);
315

L
Linus Torvalds 已提交
316 317 318 319 320
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

321
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
322 323 324 325 326
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

327
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
328 329 330 331
{
	return NULL;
}

332
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
333 334
{
	if (net_ratelimit())
335 336
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
337 338 339 340
	kfree_skb(skb);
	return NET_XMIT_CN;
}

341
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
342 343 344 345 346 347 348 349 350 351 352 353
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
354
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
355 356
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
};
357
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
358

359
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
};


static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

384 385
#define PFIFO_FAST_BANDS 3

386 387
static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
L
Linus Torvalds 已提交
388 389
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
390 391
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}
L
Linus Torvalds 已提交
392

393
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
394 395
{
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
396

397
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
L
Linus Torvalds 已提交
398
		qdisc->q.qlen++;
399
		return __qdisc_enqueue_tail(skb, qdisc, list);
L
Linus Torvalds 已提交
400
	}
401 402

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
403 404
}

405
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
406 407 408 409
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

410 411
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
L
Linus Torvalds 已提交
412
			qdisc->q.qlen--;
413
			return __qdisc_dequeue_head(qdisc, list + prio);
L
Linus Torvalds 已提交
414 415
		}
	}
416

L
Linus Torvalds 已提交
417 418 419
	return NULL;
}

420
static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
421 422
{
	qdisc->q.qlen++;
423
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
L
Linus Torvalds 已提交
424 425
}

426
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
427 428 429 430
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

431
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
432 433 434
		__qdisc_reset_queue(qdisc, list + prio);

	qdisc->qstats.backlog = 0;
L
Linus Torvalds 已提交
435 436 437 438 439
	qdisc->q.qlen = 0;
}

static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
440
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
L
Linus Torvalds 已提交
441 442

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
443
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
L
Linus Torvalds 已提交
444 445
	return skb->len;

446
nla_put_failure:
L
Linus Torvalds 已提交
447 448 449
	return -1;
}

450
static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
L
Linus Torvalds 已提交
451
{
452
	int prio;
L
Linus Torvalds 已提交
453 454
	struct sk_buff_head *list = qdisc_priv(qdisc);

455 456
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);
L
Linus Torvalds 已提交
457 458 459 460

	return 0;
}

461
static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
L
Linus Torvalds 已提交
462
	.id		=	"pfifo_fast",
463
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
L
Linus Torvalds 已提交
464 465 466 467 468 469 470 471 472
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
	.requeue	=	pfifo_fast_requeue,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
	.owner		=	THIS_MODULE,
};

473
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
474
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
475 476 477
{
	void *p;
	struct Qdisc *sch;
478 479
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
480 481

	/* ensure that the Qdisc and the private data are 32-byte aligned */
482 483
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
484

485
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
486
	if (!p)
487 488 489
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
490 491 492 493 494 495

	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
496
	sch->dev_queue = dev_queue;
497
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
498
	atomic_set(&sch->refcnt, 1);
499 500 501

	return sch;
errout:
502
	return ERR_PTR(err);
503 504
}

505 506 507
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
508
				 unsigned int parentid)
509 510
{
	struct Qdisc *sch;
511

512
	sch = qdisc_alloc(dev_queue, ops);
513 514
	if (IS_ERR(sch))
		goto errout;
515
	sch->parent = parentid;
516

L
Linus Torvalds 已提交
517 518 519
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

520
	qdisc_destroy(sch);
521
errout:
L
Linus Torvalds 已提交
522 523
	return NULL;
}
524
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
525

526
/* Under queue->lock and BH! */
L
Linus Torvalds 已提交
527 528 529

void qdisc_reset(struct Qdisc *qdisc)
{
530
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
531 532 533 534

	if (ops->reset)
		ops->reset(qdisc);
}
535
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
536

537
/* this is the rcu callback function to clean up a qdisc when there
L
Linus Torvalds 已提交
538 539 540 541 542 543 544 545
 * are no further references to it */

static void __qdisc_destroy(struct rcu_head *head)
{
	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
	kfree((char *) qdisc - qdisc->padded);
}

546
/* Under queue->lock and BH! */
L
Linus Torvalds 已提交
547 548 549

void qdisc_destroy(struct Qdisc *qdisc)
{
550
	const struct Qdisc_ops  *ops = qdisc->ops;
L
Linus Torvalds 已提交
551 552

	if (qdisc->flags & TCQ_F_BUILTIN ||
553
	    !atomic_dec_and_test(&qdisc->refcnt))
L
Linus Torvalds 已提交
554 555
		return;

556 557 558 559 560 561
	list_del(&qdisc->list);
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);
L
Linus Torvalds 已提交
562

563
	module_put(ops->owner);
564
	dev_put(qdisc_dev(qdisc));
L
Linus Torvalds 已提交
565 566
	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
}
567
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
568

569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
					  &pfifo_fast_ops, TC_H_ROOT);
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
		list_add_tail(&qdisc->list, &dev_queue->qdisc_list);
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
	int *need_watchdog_p = _need_watchdog;

	spin_lock_bh(&dev_queue->lock);
	rcu_assign_pointer(dev_queue->qdisc, dev_queue->qdisc_sleeping);
	if (dev_queue->qdisc != &noqueue_qdisc)
		*need_watchdog_p = 1;
	spin_unlock_bh(&dev_queue->lock);
}

L
Linus Torvalds 已提交
615 616
void dev_activate(struct net_device *dev)
{
617
	int need_watchdog;
618

L
Linus Torvalds 已提交
619 620 621 622 623 624
	/* No queueing discipline is attached to device;
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
	 */

625 626
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
627

628 629 630 631
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

632 633 634 635
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);

	if (need_watchdog) {
L
Linus Torvalds 已提交
636 637 638
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
639 640
}

641 642 643
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
644
{
645
	struct Qdisc *qdisc_default = _qdisc_default;
646 647 648 649
	struct Qdisc *qdisc;
	struct sk_buff *skb;

	spin_lock_bh(&dev_queue->lock);
650

651
	qdisc = dev_queue->qdisc;
652 653 654 655
	if (qdisc) {
		dev_queue->qdisc = qdisc_default;
		qdisc_reset(qdisc);
	}
656 657 658 659 660 661
	skb = dev_queue->gso_skb;
	dev_queue->gso_skb = NULL;

	spin_unlock_bh(&dev_queue->lock);

	kfree_skb(skb);
L
Linus Torvalds 已提交
662 663
}

664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687
static bool some_qdisc_is_running(struct net_device *dev, int lock)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);

		if (lock)
			spin_lock_bh(&dev_queue->lock);

		val = test_bit(__QUEUE_STATE_QDISC_RUNNING, &dev_queue->state);

		if (lock)
			spin_unlock_bh(&dev_queue->lock);

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
688 689
void dev_deactivate(struct net_device *dev)
{
690
	bool running;
L
Linus Torvalds 已提交
691

692
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
693

L
Linus Torvalds 已提交
694 695
	dev_watchdog_down(dev);

696
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
697
	synchronize_rcu();
L
Linus Torvalds 已提交
698

699
	/* Wait for outstanding qdisc_run calls. */
700
	do {
701
		while (some_qdisc_is_running(dev, 0))
702 703 704 705 706 707
			yield();

		/*
		 * Double-check inside queue lock to ensure that all effects
		 * of the queue run are visible when we return.
		 */
708
		running = some_qdisc_is_running(dev, 1);
709 710 711 712 713 714 715 716 717 718

		/*
		 * The running flag should never be set at this point because
		 * we've already set dev->qdisc to noop_qdisc *inside* the same
		 * pair of spin locks.  That is, if any qdisc_run starts after
		 * our initial test it should see the noop_qdisc and then
		 * clear the RUNNING bit before dropping the queue lock.  So
		 * if it is set here then we've found a bug.
		 */
	} while (WARN_ON_ONCE(running));
L
Linus Torvalds 已提交
719 720
}

721 722
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
723
				     void *_qdisc)
724
{
725 726
	struct Qdisc *qdisc = _qdisc;

727 728 729 730 731
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
	INIT_LIST_HEAD(&dev_queue->qdisc_list);
}

L
Linus Torvalds 已提交
732 733 734
void dev_init_scheduler(struct net_device *dev)
{
	qdisc_lock_tree(dev);
735
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
736
	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
737 738
	qdisc_unlock_tree(dev);

739
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
740 741
}

742 743 744
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
745
{
746
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
747
	struct Qdisc *qdisc_default = _qdisc_default;
748 749 750 751

	if (qdisc) {
		dev_queue->qdisc = qdisc_default;
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
752 753

		qdisc_destroy(qdisc);
754
	}
755 756 757 758 759
}

void dev_shutdown(struct net_device *dev)
{
	qdisc_lock_tree(dev);
760 761
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
762 763 764
	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
	qdisc_unlock_tree(dev);
}