sch_generic.c 16.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * qdisc_lock(qdisc) spinlock.
33 34
 *
 * The idea is the following:
35 36
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
37
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
38 39
 */

40 41 42 43 44
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

45
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46
{
47
	__skb_queue_head(&q->requeue, skb);
48

49
	__netif_schedule(q);
50 51 52
	return 0;
}

53
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
54
{
55
	struct sk_buff *skb;
56

57 58
	skb = __skb_dequeue(&q->requeue);
	if (!skb)
59 60 61 62 63
		skb = q->dequeue(q);

	return skb;
}

64
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
65
					   struct netdev_queue *dev_queue,
66
					   struct Qdisc *q)
67
{
68
	int ret;
69

70
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
71 72 73 74 75 76
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
77
		kfree_skb(skb);
78 79
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
80
			       "fix it urgently!\n", dev_queue->dev->name);
81 82 83 84 85 86 87
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
88
		ret = dev_requeue_skb(skb, q);
89 90
	}

91
	return ret;
92 93
}

94
/*
95
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
96
 *
97
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
98 99
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
100 101 102
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
103
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
104 105 106 107 108 109 110 111 112
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
113
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
114
{
115
	struct netdev_queue *txq;
116
	int ret = NETDEV_TX_BUSY;
117
	struct net_device *dev;
118
	spinlock_t *root_lock;
119
	struct sk_buff *skb;
L
Linus Torvalds 已提交
120

121
	/* Dequeue packet */
122
	if (unlikely((skb = dequeue_skb(q)) == NULL))
123
		return 0;
124

125
	root_lock = qdisc_lock(q);
126 127 128

	/* And release qdisc */
	spin_unlock(root_lock);
129

130 131
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
132

133
	HARD_TX_LOCK(dev, txq, smp_processor_id());
134 135
	if (!netif_tx_queue_stopped(txq) &&
	    !netif_tx_queue_frozen(txq))
136
		ret = dev_hard_start_xmit(skb, dev, txq);
137
	HARD_TX_UNLOCK(dev, txq);
138

139
	spin_lock(root_lock);
140

141 142 143 144 145 146 147 148
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
149
		ret = handle_dev_cpu_collision(skb, txq, q);
150 151 152 153 154 155 156 157
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

158
		ret = dev_requeue_skb(skb, q);
159 160
		break;
	}
161

162 163
	if (ret && (netif_tx_queue_stopped(txq) ||
		    netif_tx_queue_frozen(txq)))
164 165
		ret = 0;

166
	return ret;
L
Linus Torvalds 已提交
167 168
}

169
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
170
{
171 172
	unsigned long start_time = jiffies;

173
	while (qdisc_restart(q)) {
174 175 176 177 178 179
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
180
			__netif_schedule(q);
181
			break;
182 183
		}
	}
H
Herbert Xu 已提交
184

185
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
186 187
}

L
Linus Torvalds 已提交
188 189 190 191
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
192
	netif_tx_lock(dev);
193
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
194 195 196
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
197 198 199 200 201 202 203 204 205 206 207 208
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
209

210 211 212
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
213
				char drivername[64];
214
				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
215
				       dev->name, netdev_drivername(dev, drivername, 64));
L
Linus Torvalds 已提交
216 217
				dev->tx_timeout(dev);
			}
218 219 220
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
221 222 223
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
224
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
225 226 227 228 229 230 231 232 233

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
234 235
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
236 237 238 239 240 241 242 243 244 245 246
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
247
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
248
	if (del_timer(&dev->watchdog_timer))
249
		dev_put(dev);
H
Herbert Xu 已提交
250
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
251 252
}

253 254 255 256 257 258
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
259 260
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
261
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
262
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
263 264 265
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
266
}
267
EXPORT_SYMBOL(netif_carrier_on);
268

269 270 271 272 273 274
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
275 276 277 278 279
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
280
EXPORT_SYMBOL(netif_carrier_off);
281

L
Linus Torvalds 已提交
282 283 284 285 286
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

287
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
288 289 290 291 292
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

293
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
294 295 296 297
{
	return NULL;
}

298
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
299 300
{
	if (net_ratelimit())
301 302
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
303 304 305 306
	kfree_skb(skb);
	return NET_XMIT_CN;
}

307
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
308 309 310 311 312 313 314 315
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

316 317 318 319
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
};

L
Linus Torvalds 已提交
320 321 322 323
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
324
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
325
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
326
	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
327
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
328
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
329
};
330
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
331

332
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
333 334 335 336 337 338 339 340
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

341 342 343 344 345
static struct Qdisc noqueue_qdisc;
static struct netdev_queue noqueue_netdev_queue = {
	.qdisc		=	&noqueue_qdisc,
};

L
Linus Torvalds 已提交
346 347 348 349 350 351
static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
352
	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
353 354
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
	.dev_queue	=	&noqueue_netdev_queue,
L
Linus Torvalds 已提交
355 356 357
};


358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

#define PFIFO_FAST_BANDS 3

static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}

static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
375
{
376
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
377

378 379
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
		qdisc->q.qlen++;
380
		return __qdisc_enqueue_tail(skb, qdisc, list);
381
	}
382 383

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
384 385
}

386
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
387
{
388 389
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);
L
Linus Torvalds 已提交
390

391 392 393 394 395 396
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
			qdisc->q.qlen--;
			return __qdisc_dequeue_head(qdisc, list + prio);
		}
	}
397

L
Linus Torvalds 已提交
398 399 400
	return NULL;
}

401
static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
402
{
403 404
	qdisc->q.qlen++;
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
L
Linus Torvalds 已提交
405 406
}

407
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
408
{
409 410 411 412 413 414
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		__qdisc_reset_queue(qdisc, list + prio);

415
	qdisc->qstats.backlog = 0;
416
	qdisc->q.qlen = 0;
L
Linus Torvalds 已提交
417 418
}

419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
	return skb->len;

nla_put_failure:
	return -1;
}

static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);

	return 0;
}

static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
	.id		=	"pfifo_fast",
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
	.requeue	=	pfifo_fast_requeue,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
L
Linus Torvalds 已提交
451 452 453
	.owner		=	THIS_MODULE,
};

454
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
455
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
456 457 458
{
	void *p;
	struct Qdisc *sch;
459 460
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
461 462

	/* ensure that the Qdisc and the private data are 32-byte aligned */
463 464
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
465

466
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
467
	if (!p)
468 469 470
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
471 472

	INIT_LIST_HEAD(&sch->list);
473
	skb_queue_head_init(&sch->requeue);
L
Linus Torvalds 已提交
474 475 476 477
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
478
	sch->dev_queue = dev_queue;
479
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
480
	atomic_set(&sch->refcnt, 1);
481 482 483

	return sch;
errout:
484
	return ERR_PTR(err);
485 486
}

487 488 489
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
490
				 unsigned int parentid)
491 492
{
	struct Qdisc *sch;
493

494
	sch = qdisc_alloc(dev_queue, ops);
495 496
	if (IS_ERR(sch))
		goto errout;
497
	sch->parent = parentid;
498

L
Linus Torvalds 已提交
499 500 501
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

502
	qdisc_destroy(sch);
503
errout:
L
Linus Torvalds 已提交
504 505
	return NULL;
}
506
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
507

508
/* Under qdisc_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
509 510 511

void qdisc_reset(struct Qdisc *qdisc)
{
512
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
513 514 515 516

	if (ops->reset)
		ops->reset(qdisc);
}
517
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
518

519
void qdisc_destroy(struct Qdisc *qdisc)
L
Linus Torvalds 已提交
520
{
521 522
	const struct Qdisc_ops  *ops = qdisc->ops;

523 524 525 526
	if (qdisc->flags & TCQ_F_BUILTIN ||
	    !atomic_dec_and_test(&qdisc->refcnt))
		return;

527
#ifdef CONFIG_NET_SCHED
528 529
	qdisc_list_del(qdisc);

530
	qdisc_put_stab(qdisc->stab);
531
#endif
532 533 534 535 536 537 538 539 540
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

541
	__skb_queue_purge(&qdisc->requeue);
542

L
Linus Torvalds 已提交
543 544
	kfree((char *) qdisc - qdisc->padded);
}
545
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
546

547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
568
					  &pfifo_fast_ops, TC_H_ROOT);
569 570 571 572 573 574 575 576 577 578 579 580 581 582
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
583
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
584 585
	int *need_watchdog_p = _need_watchdog;

586 587 588
	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);

589
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
590
	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
591 592 593
		*need_watchdog_p = 1;
}

L
Linus Torvalds 已提交
594 595
void dev_activate(struct net_device *dev)
{
596
	int need_watchdog;
597

L
Linus Torvalds 已提交
598
	/* No queueing discipline is attached to device;
599 600 601
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
L
Linus Torvalds 已提交
602 603
	 */

604 605
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
606

607 608 609 610
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

611 612
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
613
	transition_one_qdisc(dev, &dev->rx_queue, NULL);
614 615

	if (need_watchdog) {
L
Linus Torvalds 已提交
616 617 618
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
619 620
}

621 622 623
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
624
{
625
	struct Qdisc *qdisc_default = _qdisc_default;
626 627 628
	struct Qdisc *qdisc;

	qdisc = dev_queue->qdisc;
629
	if (qdisc) {
630 631
		spin_lock_bh(qdisc_lock(qdisc));

632 633 634
		if (!(qdisc->flags & TCQ_F_BUILTIN))
			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);

635
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
636
		qdisc_reset(qdisc);
637

638
		spin_unlock_bh(qdisc_lock(qdisc));
639
	}
L
Linus Torvalds 已提交
640 641
}

642
static bool some_qdisc_is_busy(struct net_device *dev)
643 644 645 646 647
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
648
		spinlock_t *root_lock;
649
		struct Qdisc *q;
650 651 652
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
653
		q = dev_queue->qdisc_sleeping;
654
		root_lock = qdisc_lock(q);
655

656
		spin_lock_bh(root_lock);
657

658 659
		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
		       test_bit(__QDISC_STATE_SCHED, &q->state));
660

661
		spin_unlock_bh(root_lock);
662 663 664 665 666 667 668

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
669 670
void dev_deactivate(struct net_device *dev)
{
671
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
672
	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
673

L
Linus Torvalds 已提交
674 675
	dev_watchdog_down(dev);

676
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
677
	synchronize_rcu();
L
Linus Torvalds 已提交
678

679
	/* Wait for outstanding qdisc_run calls. */
680 681
	while (some_qdisc_is_busy(dev))
		yield();
L
Linus Torvalds 已提交
682 683
}

684 685
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
686
				     void *_qdisc)
687
{
688 689
	struct Qdisc *qdisc = _qdisc;

690 691 692 693
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
694 695
void dev_init_scheduler(struct net_device *dev)
{
696
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
697
	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
L
Linus Torvalds 已提交
698

699
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
700 701
}

702 703 704
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
705
{
706
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
707
	struct Qdisc *qdisc_default = _qdisc_default;
708 709

	if (qdisc) {
710
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
711
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
712 713

		qdisc_destroy(qdisc);
714
	}
715 716 717 718
}

void dev_shutdown(struct net_device *dev)
{
719
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
720
	shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
721
	WARN_ON(timer_pending(&dev->watchdog_timer));
L
Linus Torvalds 已提交
722
}