sch_generic.c 16.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * qdisc_lock(qdisc) spinlock.
33 34
 *
 * The idea is the following:
35 36
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
37
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
38 39
 */

40 41 42 43 44
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

45
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46 47
{
	if (unlikely(skb->next))
48
		q->gso_skb = skb;
49 50
	else
		q->ops->requeue(skb, q);
51

52
	__netif_schedule(q);
53 54 55
	return 0;
}

56
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
57
{
58
	struct sk_buff *skb;
59

60 61
	if ((skb = q->gso_skb))
		q->gso_skb = NULL;
62 63 64 65 66 67
	else
		skb = q->dequeue(q);

	return skb;
}

68
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
69
					   struct netdev_queue *dev_queue,
70
					   struct Qdisc *q)
71
{
72
	int ret;
73

74
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
75 76 77 78 79 80
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
81
		kfree_skb(skb);
82 83
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
84
			       "fix it urgently!\n", dev_queue->dev->name);
85 86 87 88 89 90 91
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
92
		ret = dev_requeue_skb(skb, q);
93 94
	}

95
	return ret;
96 97
}

98
/*
99
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
100
 *
101
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
102 103
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
104 105 106
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
107
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
108 109 110 111 112 113 114 115 116
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
117
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
118
{
119
	struct netdev_queue *txq;
120
	int ret = NETDEV_TX_BUSY;
121
	struct net_device *dev;
122
	spinlock_t *root_lock;
123
	struct sk_buff *skb;
L
Linus Torvalds 已提交
124

125
	/* Dequeue packet */
126
	if (unlikely((skb = dequeue_skb(q)) == NULL))
127
		return 0;
128

129
	root_lock = qdisc_lock(q);
130 131 132

	/* And release qdisc */
	spin_unlock(root_lock);
133

134 135
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
136

137
	HARD_TX_LOCK(dev, txq, smp_processor_id());
138 139
	if (!netif_tx_queue_stopped(txq) &&
	    !netif_tx_queue_frozen(txq))
140
		ret = dev_hard_start_xmit(skb, dev, txq);
141
	HARD_TX_UNLOCK(dev, txq);
142

143
	spin_lock(root_lock);
144

145 146 147 148 149 150 151 152
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
153
		ret = handle_dev_cpu_collision(skb, txq, q);
154 155 156 157 158 159 160 161
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

162
		ret = dev_requeue_skb(skb, q);
163 164
		break;
	}
165

166 167
	if (ret && (netif_tx_queue_stopped(txq) ||
		    netif_tx_queue_frozen(txq)))
168 169
		ret = 0;

170
	return ret;
L
Linus Torvalds 已提交
171 172
}

173
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
174
{
175 176
	unsigned long start_time = jiffies;

177
	while (qdisc_restart(q)) {
178 179 180 181 182 183
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
184
			__netif_schedule(q);
185
			break;
186 187
		}
	}
H
Herbert Xu 已提交
188

189
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
190 191
}

L
Linus Torvalds 已提交
192 193 194 195
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
196
	netif_tx_lock(dev);
197
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
198 199 200
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
201 202 203 204 205 206 207 208 209 210 211 212
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
213

214 215 216
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
217 218 219
				char drivername[64];
				printk(KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
				       dev->name, netdev_drivername(dev, drivername, 64));
L
Linus Torvalds 已提交
220
				dev->tx_timeout(dev);
221
				WARN_ON_ONCE(1);
L
Linus Torvalds 已提交
222
			}
223 224 225
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
226 227 228
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
229
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
230 231 232 233 234 235 236 237 238

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
239 240
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
241 242 243 244 245 246 247 248 249 250 251
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
252
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
253
	if (del_timer(&dev->watchdog_timer))
254
		dev_put(dev);
H
Herbert Xu 已提交
255
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
256 257
}

258 259 260 261 262 263
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
264 265
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
266
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
267
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
268 269 270
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
271
}
272
EXPORT_SYMBOL(netif_carrier_on);
273

274 275 276 277 278 279
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
280 281 282 283 284
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
285
EXPORT_SYMBOL(netif_carrier_off);
286

L
Linus Torvalds 已提交
287 288 289 290 291
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

292
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
293 294 295 296 297
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

298
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
299 300 301 302
{
	return NULL;
}

303
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
304 305
{
	if (net_ratelimit())
306 307
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
308 309 310 311
	kfree_skb(skb);
	return NET_XMIT_CN;
}

312
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
313 314 315 316 317 318 319 320
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

321 322 323 324
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
};

L
Linus Torvalds 已提交
325 326 327 328
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
329
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
330
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
331
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
332
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
333
};
334
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
335

336
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
337 338 339 340 341 342 343 344
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

345 346 347 348 349
static struct Qdisc noqueue_qdisc;
static struct netdev_queue noqueue_netdev_queue = {
	.qdisc		=	&noqueue_qdisc,
};

L
Linus Torvalds 已提交
350 351 352 353 354 355
static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
356 357
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
	.dev_queue	=	&noqueue_netdev_queue,
L
Linus Torvalds 已提交
358 359 360
};


361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

#define PFIFO_FAST_BANDS 3

static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}

static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
378
{
379
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
380

381 382
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
		qdisc->q.qlen++;
383
		return __qdisc_enqueue_tail(skb, qdisc, list);
384
	}
385 386

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
387 388
}

389
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
390
{
391 392
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);
L
Linus Torvalds 已提交
393

394 395 396 397 398 399
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
			qdisc->q.qlen--;
			return __qdisc_dequeue_head(qdisc, list + prio);
		}
	}
400

L
Linus Torvalds 已提交
401 402 403
	return NULL;
}

404
static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
405
{
406 407
	qdisc->q.qlen++;
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
L
Linus Torvalds 已提交
408 409
}

410
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
411
{
412 413 414 415 416 417
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		__qdisc_reset_queue(qdisc, list + prio);

418
	qdisc->qstats.backlog = 0;
419
	qdisc->q.qlen = 0;
L
Linus Torvalds 已提交
420 421
}

422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
	return skb->len;

nla_put_failure:
	return -1;
}

static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);

	return 0;
}

static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
	.id		=	"pfifo_fast",
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
	.requeue	=	pfifo_fast_requeue,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
L
Linus Torvalds 已提交
454 455 456
	.owner		=	THIS_MODULE,
};

457
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
458
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
459 460 461
{
	void *p;
	struct Qdisc *sch;
462 463
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
464 465

	/* ensure that the Qdisc and the private data are 32-byte aligned */
466 467
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
468

469
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
470
	if (!p)
471 472 473
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
474 475 476 477 478 479

	INIT_LIST_HEAD(&sch->list);
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
480
	sch->dev_queue = dev_queue;
481
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
482
	atomic_set(&sch->refcnt, 1);
483 484 485

	return sch;
errout:
486
	return ERR_PTR(err);
487 488
}

489 490 491
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
492
				 unsigned int parentid)
493 494
{
	struct Qdisc *sch;
495

496
	sch = qdisc_alloc(dev_queue, ops);
497 498
	if (IS_ERR(sch))
		goto errout;
499
	sch->parent = parentid;
500

L
Linus Torvalds 已提交
501 502 503
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

504
	qdisc_destroy(sch);
505
errout:
L
Linus Torvalds 已提交
506 507
	return NULL;
}
508
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
509

510
/* Under qdisc_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
511 512 513

void qdisc_reset(struct Qdisc *qdisc)
{
514
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
515 516 517 518

	if (ops->reset)
		ops->reset(qdisc);
}
519
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
520

521
/* Under qdisc_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
522

523
void qdisc_destroy(struct Qdisc *qdisc)
L
Linus Torvalds 已提交
524
{
525 526
	const struct Qdisc_ops  *ops = qdisc->ops;

527 528 529 530 531 532 533
	if (qdisc->flags & TCQ_F_BUILTIN ||
	    !atomic_dec_and_test(&qdisc->refcnt))
		return;

	if (qdisc->parent)
		list_del(&qdisc->list);

534
#ifdef CONFIG_NET_SCHED
535
	qdisc_put_stab(qdisc->stab);
536
#endif
537 538 539 540 541 542 543 544 545
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

546 547
	kfree_skb(qdisc->gso_skb);

L
Linus Torvalds 已提交
548 549
	kfree((char *) qdisc - qdisc->padded);
}
550
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
551

552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
573
					  &pfifo_fast_ops, TC_H_ROOT);
574 575 576 577 578 579 580 581 582 583 584 585 586 587
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
588
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
589 590
	int *need_watchdog_p = _need_watchdog;

591 592 593
	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);

594
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
595
	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
596 597 598
		*need_watchdog_p = 1;
}

L
Linus Torvalds 已提交
599 600
void dev_activate(struct net_device *dev)
{
601
	int need_watchdog;
602

L
Linus Torvalds 已提交
603
	/* No queueing discipline is attached to device;
604 605 606
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
L
Linus Torvalds 已提交
607 608
	 */

609 610
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
611

612 613 614 615
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

616 617
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
618
	transition_one_qdisc(dev, &dev->rx_queue, NULL);
619 620

	if (need_watchdog) {
L
Linus Torvalds 已提交
621 622 623
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
624 625
}

626 627 628
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
629
{
630
	struct Qdisc *qdisc_default = _qdisc_default;
631 632 633
	struct Qdisc *qdisc;

	qdisc = dev_queue->qdisc;
634
	if (qdisc) {
635 636
		spin_lock_bh(qdisc_lock(qdisc));

637 638 639
		if (!(qdisc->flags & TCQ_F_BUILTIN))
			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);

640 641
		dev_queue->qdisc = qdisc_default;
		qdisc_reset(qdisc);
642

643
		spin_unlock_bh(qdisc_lock(qdisc));
644
	}
L
Linus Torvalds 已提交
645 646
}

647
static bool some_qdisc_is_busy(struct net_device *dev)
648 649 650 651 652
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
653
		spinlock_t *root_lock;
654
		struct Qdisc *q;
655 656 657
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
658
		q = dev_queue->qdisc_sleeping;
659
		root_lock = qdisc_lock(q);
660

661
		spin_lock_bh(root_lock);
662

663 664
		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
		       test_bit(__QDISC_STATE_SCHED, &q->state));
665

666
		spin_unlock_bh(root_lock);
667 668 669 670 671 672 673

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
674 675
void dev_deactivate(struct net_device *dev)
{
676
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
677
	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
678

L
Linus Torvalds 已提交
679 680
	dev_watchdog_down(dev);

681
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
682
	synchronize_rcu();
L
Linus Torvalds 已提交
683

684
	/* Wait for outstanding qdisc_run calls. */
685 686
	while (some_qdisc_is_busy(dev))
		yield();
L
Linus Torvalds 已提交
687 688
}

689 690
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
691
				     void *_qdisc)
692
{
693 694
	struct Qdisc *qdisc = _qdisc;

695 696 697 698
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
699 700
void dev_init_scheduler(struct net_device *dev)
{
701
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
702
	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
L
Linus Torvalds 已提交
703

704
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
705 706
}

707 708 709
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
710
{
711
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
712
	struct Qdisc *qdisc_default = _qdisc_default;
713 714

	if (qdisc) {
715
		spinlock_t *root_lock = qdisc_lock(qdisc);
716

717 718
		dev_queue->qdisc = qdisc_default;
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
719

720
		spin_lock_bh(root_lock);
L
Linus Torvalds 已提交
721
		qdisc_destroy(qdisc);
722
		spin_unlock_bh(root_lock);
723
	}
724 725 726 727
}

void dev_shutdown(struct net_device *dev)
{
728
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
729
	shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
730
	WARN_ON(timer_pending(&dev->watchdog_timer));
L
Linus Torvalds 已提交
731
}