sch_generic.c 17.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * queue->lock spinlock.
33 34 35
 *
 * The idea is the following:
 * - enqueue, dequeue are serialized via top level device
36
 *   spinlock queue->lock.
37
 * - ingress filtering is serialized via top level device
38
 *   spinlock dev->rx_queue.lock.
39
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
40 41 42
 */

void qdisc_lock_tree(struct net_device *dev)
43
	__acquires(dev->rx_queue.lock)
L
Linus Torvalds 已提交
44
{
45 46 47 48 49 50 51
	unsigned int i;

	local_bh_disable();
	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
		spin_lock(&txq->lock);
	}
52
	spin_lock(&dev->rx_queue.lock);
L
Linus Torvalds 已提交
53
}
54
EXPORT_SYMBOL(qdisc_lock_tree);
L
Linus Torvalds 已提交
55 56

void qdisc_unlock_tree(struct net_device *dev)
57
	__releases(dev->rx_queue.lock)
L
Linus Torvalds 已提交
58
{
59 60
	unsigned int i;

61
	spin_unlock(&dev->rx_queue.lock);
62 63 64 65 66
	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
		spin_unlock(&txq->lock);
	}
	local_bh_enable();
L
Linus Torvalds 已提交
67
}
68
EXPORT_SYMBOL(qdisc_unlock_tree);
L
Linus Torvalds 已提交
69

70 71 72 73 74
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

75
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
76 77
{
	if (unlikely(skb->next))
78
		q->gso_skb = skb;
79 80
	else
		q->ops->requeue(skb, q);
81

82
	__netif_schedule(q);
83 84 85
	return 0;
}

86
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
87
{
88
	struct sk_buff *skb;
89

90 91
	if ((skb = q->gso_skb))
		q->gso_skb = NULL;
92 93 94 95 96 97
	else
		skb = q->dequeue(q);

	return skb;
}

98
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
99
					   struct netdev_queue *dev_queue,
100
					   struct Qdisc *q)
101
{
102
	int ret;
103

104
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
105 106 107 108 109 110
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
111
		kfree_skb(skb);
112 113
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
114
			       "fix it urgently!\n", dev_queue->dev->name);
115 116 117 118 119 120 121
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
122
		ret = dev_requeue_skb(skb, q);
123 124
	}

125
	return ret;
126 127
}

128
/*
129
 * NOTE: Called under queue->lock with locally disabled BH.
130
 *
131 132
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
 * this qdisc at a time. queue->lock serializes queue accesses for
133
 * this queue AND txq->qdisc pointer itself.
134 135 136
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
137
 *  queue->lock and netif_tx_lock are mutually exclusive,
138 139 140 141 142 143 144 145 146
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
147
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
148
{
149
	struct netdev_queue *txq;
150
	int ret = NETDEV_TX_BUSY;
151
	struct net_device *dev;
152
	spinlock_t *root_lock;
153
	struct sk_buff *skb;
L
Linus Torvalds 已提交
154

155
	/* Dequeue packet */
156
	if (unlikely((skb = dequeue_skb(q)) == NULL))
157
		return 0;
158

159 160 161 162
	root_lock = qdisc_root_lock(q);

	/* And release qdisc */
	spin_unlock(root_lock);
163

164 165
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
166

167
	HARD_TX_LOCK(dev, txq, smp_processor_id());
168
	if (!netif_subqueue_stopped(dev, skb))
169
		ret = dev_hard_start_xmit(skb, dev, txq);
170
	HARD_TX_UNLOCK(dev, txq);
171

172
	spin_lock(root_lock);
173

174 175 176 177 178 179 180 181
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
182
		ret = handle_dev_cpu_collision(skb, txq, q);
183 184 185 186 187 188 189 190
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

191
		ret = dev_requeue_skb(skb, q);
192 193
		break;
	}
194

195 196 197
	if (ret && netif_tx_queue_stopped(txq))
		ret = 0;

198
	return ret;
L
Linus Torvalds 已提交
199 200
}

201
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
202
{
203 204
	unsigned long start_time = jiffies;

205
	while (qdisc_restart(q)) {
206 207 208 209 210 211
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
212
			__netif_schedule(q);
213
			break;
214 215
		}
	}
H
Herbert Xu 已提交
216

217
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
218 219
}

L
Linus Torvalds 已提交
220 221 222 223
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
224
	netif_tx_lock(dev);
225
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
226 227 228
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
229 230 231 232 233 234 235 236 237 238 239 240
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
241

242 243 244 245 246
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
				printk(KERN_INFO "NETDEV WATCHDOG: %s: "
				       "transmit timed out\n",
247
				       dev->name);
L
Linus Torvalds 已提交
248
				dev->tx_timeout(dev);
249
				WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
250
			}
251 252 253
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
254 255 256
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
257
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
258 259 260 261 262 263 264 265 266

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
267 268
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
269 270 271 272 273 274 275 276 277 278 279
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
280
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
281
	if (del_timer(&dev->watchdog_timer))
282
		dev_put(dev);
H
Herbert Xu 已提交
283
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
284 285
}

286 287 288 289 290 291
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
292 293
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
294
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
295
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
296 297 298
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
299
}
300
EXPORT_SYMBOL(netif_carrier_on);
301

302 303 304 305 306 307
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
308 309 310 311 312
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
313
EXPORT_SYMBOL(netif_carrier_off);
314

L
Linus Torvalds 已提交
315 316 317 318 319
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

320
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
321 322 323 324 325
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

326
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
327 328 329 330
{
	return NULL;
}

331
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
332 333
{
	if (net_ratelimit())
334 335
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
336 337 338 339
	kfree_skb(skb);
	return NET_XMIT_CN;
}

340
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
341 342 343 344 345 346 347 348
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

349 350 351 352 353
static struct netdev_queue noop_netdev_queue = {
	.lock		=	__SPIN_LOCK_UNLOCKED(noop_netdev_queue.lock),
	.qdisc		=	&noop_qdisc,
};

L
Linus Torvalds 已提交
354 355 356 357
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
358
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
359
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
360
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
361
};
362
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
363

364
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
};


static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

389 390
#define PFIFO_FAST_BANDS 3

391 392
static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
L
Linus Torvalds 已提交
393 394
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
395 396
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}
L
Linus Torvalds 已提交
397

398
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
399 400
{
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
401

402
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
L
Linus Torvalds 已提交
403
		qdisc->q.qlen++;
404
		return __qdisc_enqueue_tail(skb, qdisc, list);
L
Linus Torvalds 已提交
405
	}
406 407

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
408 409
}

410
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
411 412 413 414
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

415 416
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
L
Linus Torvalds 已提交
417
			qdisc->q.qlen--;
418
			return __qdisc_dequeue_head(qdisc, list + prio);
L
Linus Torvalds 已提交
419 420
		}
	}
421

L
Linus Torvalds 已提交
422 423 424
	return NULL;
}

425
static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
426 427
{
	qdisc->q.qlen++;
428
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
L
Linus Torvalds 已提交
429 430
}

431
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
432 433 434 435
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

436
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
437 438 439
		__qdisc_reset_queue(qdisc, list + prio);

	qdisc->qstats.backlog = 0;
L
Linus Torvalds 已提交
440 441 442 443 444
	qdisc->q.qlen = 0;
}

static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
445
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };
L
Linus Torvalds 已提交
446 447

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
448
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
L
Linus Torvalds 已提交
449 450
	return skb->len;

451
nla_put_failure:
L
Linus Torvalds 已提交
452 453 454
	return -1;
}

455
static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
L
Linus Torvalds 已提交
456
{
457
	int prio;
L
Linus Torvalds 已提交
458 459
	struct sk_buff_head *list = qdisc_priv(qdisc);

460 461
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);
L
Linus Torvalds 已提交
462 463 464 465

	return 0;
}

466
static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
L
Linus Torvalds 已提交
467
	.id		=	"pfifo_fast",
468
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
L
Linus Torvalds 已提交
469 470 471 472 473 474 475 476 477
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
	.requeue	=	pfifo_fast_requeue,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
	.owner		=	THIS_MODULE,
};

478
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
479
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
480 481 482
{
	void *p;
	struct Qdisc *sch;
483 484
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
485 486

	/* ensure that the Qdisc and the private data are 32-byte aligned */
487 488
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
489

490
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
491
	if (!p)
492 493 494
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
495 496 497 498 499 500

	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
501
	sch->dev_queue = dev_queue;
502
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
503
	atomic_set(&sch->refcnt, 1);
504 505 506

	return sch;
errout:
507
	return ERR_PTR(err);
508 509
}

510 511 512
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
513
				 unsigned int parentid)
514 515
{
	struct Qdisc *sch;
516

517
	sch = qdisc_alloc(dev_queue, ops);
518 519
	if (IS_ERR(sch))
		goto errout;
520
	sch->parent = parentid;
521

L
Linus Torvalds 已提交
522 523 524
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

525
	qdisc_destroy(sch);
526
errout:
L
Linus Torvalds 已提交
527 528
	return NULL;
}
529
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
530

531
/* Under queue->lock and BH! */
L
Linus Torvalds 已提交
532 533 534

void qdisc_reset(struct Qdisc *qdisc)
{
535
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
536 537 538 539

	if (ops->reset)
		ops->reset(qdisc);
}
540
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
541

542
/* this is the rcu callback function to clean up a qdisc when there
L
Linus Torvalds 已提交
543 544 545 546 547
 * are no further references to it */

static void __qdisc_destroy(struct rcu_head *head)
{
	struct Qdisc *qdisc = container_of(head, struct Qdisc, q_rcu);
548 549 550 551 552 553 554 555 556 557 558
	const struct Qdisc_ops  *ops = qdisc->ops;

	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

L
Linus Torvalds 已提交
559 560 561
	kfree((char *) qdisc - qdisc->padded);
}

562
/* Under queue->lock and BH! */
L
Linus Torvalds 已提交
563 564 565

void qdisc_destroy(struct Qdisc *qdisc)
{
566 567
	struct net_device *dev = qdisc_dev(qdisc);

L
Linus Torvalds 已提交
568
	if (qdisc->flags & TCQ_F_BUILTIN ||
569
	    !atomic_dec_and_test(&qdisc->refcnt))
L
Linus Torvalds 已提交
570 571
		return;

572
	spin_lock_bh(&dev->qdisc_list_lock);
573
	list_del(&qdisc->list);
574
	spin_unlock_bh(&dev->qdisc_list_lock);
L
Linus Torvalds 已提交
575 576 577

	call_rcu(&qdisc->q_rcu, __qdisc_destroy);
}
578
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
579

580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
					  &pfifo_fast_ops, TC_H_ROOT);
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
606 607 608
		spin_lock_bh(&dev->qdisc_list_lock);
		list_add_tail(&qdisc->list, &dev->qdisc_list);
		spin_unlock_bh(&dev->qdisc_list_lock);
609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
	int *need_watchdog_p = _need_watchdog;

	spin_lock_bh(&dev_queue->lock);
	rcu_assign_pointer(dev_queue->qdisc, dev_queue->qdisc_sleeping);
	if (dev_queue->qdisc != &noqueue_qdisc)
		*need_watchdog_p = 1;
	spin_unlock_bh(&dev_queue->lock);
}

L
Linus Torvalds 已提交
628 629
void dev_activate(struct net_device *dev)
{
630
	int need_watchdog;
631

L
Linus Torvalds 已提交
632 633 634 635 636 637
	/* No queueing discipline is attached to device;
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
	 */

638 639
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
640

641 642 643 644
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

645 646 647 648
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);

	if (need_watchdog) {
L
Linus Torvalds 已提交
649 650 651
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
652 653
}

654 655 656
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
657
{
658
	struct Qdisc *qdisc_default = _qdisc_default;
659
	struct sk_buff *skb = NULL;
660 661 662
	struct Qdisc *qdisc;

	spin_lock_bh(&dev_queue->lock);
663

664
	qdisc = dev_queue->qdisc;
665 666 667
	if (qdisc) {
		dev_queue->qdisc = qdisc_default;
		qdisc_reset(qdisc);
668 669 670

		skb = qdisc->gso_skb;
		qdisc->gso_skb = NULL;
671
	}
672 673 674 675

	spin_unlock_bh(&dev_queue->lock);

	kfree_skb(skb);
L
Linus Torvalds 已提交
676 677
}

678 679 680 681 682 683
static bool some_qdisc_is_running(struct net_device *dev, int lock)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
684
		spinlock_t *root_lock;
685
		struct Qdisc *q;
686 687 688
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
689
		q = dev_queue->qdisc;
690
		root_lock = qdisc_root_lock(q);
691 692

		if (lock)
693
			spin_lock_bh(root_lock);
694

695
		val = test_bit(__QDISC_STATE_RUNNING, &q->state);
696 697

		if (lock)
698
			spin_unlock_bh(root_lock);
699 700 701 702 703 704 705

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
706 707
void dev_deactivate(struct net_device *dev)
{
708
	bool running;
L
Linus Torvalds 已提交
709

710
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
711

L
Linus Torvalds 已提交
712 713
	dev_watchdog_down(dev);

714
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
715
	synchronize_rcu();
L
Linus Torvalds 已提交
716

717
	/* Wait for outstanding qdisc_run calls. */
718
	do {
719
		while (some_qdisc_is_running(dev, 0))
720 721 722 723 724 725
			yield();

		/*
		 * Double-check inside queue lock to ensure that all effects
		 * of the queue run are visible when we return.
		 */
726
		running = some_qdisc_is_running(dev, 1);
727 728 729 730 731 732 733 734 735 736

		/*
		 * The running flag should never be set at this point because
		 * we've already set dev->qdisc to noop_qdisc *inside* the same
		 * pair of spin locks.  That is, if any qdisc_run starts after
		 * our initial test it should see the noop_qdisc and then
		 * clear the RUNNING bit before dropping the queue lock.  So
		 * if it is set here then we've found a bug.
		 */
	} while (WARN_ON_ONCE(running));
L
Linus Torvalds 已提交
737 738
}

739 740
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
741
				     void *_qdisc)
742
{
743 744
	struct Qdisc *qdisc = _qdisc;

745 746 747 748
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
749 750
void dev_init_scheduler(struct net_device *dev)
{
751
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
752
	dev_init_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
753

754
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
755 756
}

757 758 759
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
760
{
761
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
762
	struct Qdisc *qdisc_default = _qdisc_default;
763 764

	if (qdisc) {
765 766
		spinlock_t *root_lock = qdisc_root_lock(qdisc);

767 768
		dev_queue->qdisc = qdisc_default;
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
769

770
		spin_lock(root_lock);
L
Linus Torvalds 已提交
771
		qdisc_destroy(qdisc);
772
		spin_unlock(root_lock);
773
	}
774 775 776 777
}

void dev_shutdown(struct net_device *dev)
{
778 779
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
	shutdown_scheduler_queue(dev, &dev->rx_queue, NULL);
L
Linus Torvalds 已提交
780 781
	BUG_TRAP(!timer_pending(&dev->watchdog_timer));
}