sch_generic.c 17.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
#include <net/pkt_sched.h>

/* Main transmission queue. */

31
/* Modifications to data participating in scheduling must be protected with
32
 * qdisc_lock(qdisc) spinlock.
33 34
 *
 * The idea is the following:
35 36
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
37
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
38 39
 */

40 41 42 43 44
static inline int qdisc_qlen(struct Qdisc *q)
{
	return q->q.qlen;
}

45
static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
46
{
47
	q->gso_skb = skb;
48
	__netif_schedule(q);
49

50 51 52
	return 0;
}

53
static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
54
{
55 56
	struct sk_buff *skb = q->gso_skb;

57 58 59 60 61 62
	if (unlikely(skb)) {
		struct net_device *dev = qdisc_dev(q);
		struct netdev_queue *txq;

		/* check the reason of requeuing without tx lock first */
		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
63 64 65
		if (!netif_tx_queue_stopped(txq) && !netif_tx_queue_frozen(txq))
			q->gso_skb = NULL;
		else
66 67
			skb = NULL;
	} else {
68
		skb = q->dequeue(q);
69
	}
70 71 72 73

	return skb;
}

74
static inline int handle_dev_cpu_collision(struct sk_buff *skb,
75
					   struct netdev_queue *dev_queue,
76
					   struct Qdisc *q)
77
{
78
	int ret;
79

80
	if (unlikely(dev_queue->xmit_lock_owner == smp_processor_id())) {
81 82 83 84 85 86
		/*
		 * Same CPU holding the lock. It may be a transient
		 * configuration error, when hard_start_xmit() recurses. We
		 * detect it by checking xmit owner and drop the packet when
		 * deadloop is detected. Return OK to try the next skb.
		 */
87
		kfree_skb(skb);
88 89
		if (net_ratelimit())
			printk(KERN_WARNING "Dead loop on netdevice %s, "
90
			       "fix it urgently!\n", dev_queue->dev->name);
91 92 93 94 95 96 97
		ret = qdisc_qlen(q);
	} else {
		/*
		 * Another cpu is holding lock, requeue & delay xmits for
		 * some time.
		 */
		__get_cpu_var(netdev_rx_stat).cpu_collision++;
98
		ret = dev_requeue_skb(skb, q);
99 100
	}

101
	return ret;
102 103
}

104
/*
105
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
106
 *
107
 * __QDISC_STATE_RUNNING guarantees only one CPU can process
108 109
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
110 111 112
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
113
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
114 115 116 117 118 119 120 121 122
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
123
static inline int qdisc_restart(struct Qdisc *q)
L
Linus Torvalds 已提交
124
{
125
	struct netdev_queue *txq;
126
	int ret = NETDEV_TX_BUSY;
127
	struct net_device *dev;
128
	spinlock_t *root_lock;
129
	struct sk_buff *skb;
L
Linus Torvalds 已提交
130

131
	/* Dequeue packet */
132
	if (unlikely((skb = dequeue_skb(q)) == NULL))
133
		return 0;
134

135
	root_lock = qdisc_lock(q);
136 137 138

	/* And release qdisc */
	spin_unlock(root_lock);
139

140 141
	dev = qdisc_dev(q);
	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
142

143
	HARD_TX_LOCK(dev, txq, smp_processor_id());
144 145
	if (!netif_tx_queue_stopped(txq) &&
	    !netif_tx_queue_frozen(txq))
146
		ret = dev_hard_start_xmit(skb, dev, txq);
147
	HARD_TX_UNLOCK(dev, txq);
148

149
	spin_lock(root_lock);
150

151 152 153 154 155 156 157 158
	switch (ret) {
	case NETDEV_TX_OK:
		/* Driver sent out skb successfully */
		ret = qdisc_qlen(q);
		break;

	case NETDEV_TX_LOCKED:
		/* Driver try lock failed */
159
		ret = handle_dev_cpu_collision(skb, txq, q);
160 161 162 163 164 165 166 167
		break;

	default:
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
		if (unlikely (ret != NETDEV_TX_BUSY && net_ratelimit()))
			printk(KERN_WARNING "BUG %s code %d qlen %d\n",
			       dev->name, ret, q->q.qlen);

168
		ret = dev_requeue_skb(skb, q);
169 170
		break;
	}
171

172 173
	if (ret && (netif_tx_queue_stopped(txq) ||
		    netif_tx_queue_frozen(txq)))
174 175
		ret = 0;

176
	return ret;
L
Linus Torvalds 已提交
177 178
}

179
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
180
{
181 182
	unsigned long start_time = jiffies;

183
	while (qdisc_restart(q)) {
184 185 186 187 188 189
		/*
		 * Postpone processing if
		 * 1. another process needs the CPU;
		 * 2. we've been doing it for too long.
		 */
		if (need_resched() || jiffies != start_time) {
190
			__netif_schedule(q);
191
			break;
192 193
		}
	}
H
Herbert Xu 已提交
194

195
	clear_bit(__QDISC_STATE_RUNNING, &q->state);
H
Herbert Xu 已提交
196 197
}

L
Linus Torvalds 已提交
198 199 200 201
static void dev_watchdog(unsigned long arg)
{
	struct net_device *dev = (struct net_device *)arg;

H
Herbert Xu 已提交
202
	netif_tx_lock(dev);
203
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
204 205 206
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
207 208 209 210 211 212 213 214 215 216 217 218
			int some_queue_stopped = 0;
			unsigned int i;

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
				if (netif_tx_queue_stopped(txq)) {
					some_queue_stopped = 1;
					break;
				}
			}
219

220 221 222
			if (some_queue_stopped &&
			    time_after(jiffies, (dev->trans_start +
						 dev->watchdog_timeo))) {
223
				char drivername[64];
224
				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit timed out\n",
225
				       dev->name, netdev_drivername(dev, drivername, 64));
L
Linus Torvalds 已提交
226 227
				dev->tx_timeout(dev);
			}
228 229 230
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
231 232 233
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
234
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
235 236 237 238 239 240 241 242 243

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
	if (dev->tx_timeout) {
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
244 245
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
246 247 248 249 250 251 252 253 254 255 256
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
257
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
258
	if (del_timer(&dev->watchdog_timer))
259
		dev_put(dev);
H
Herbert Xu 已提交
260
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
261 262
}

263 264 265 266 267 268
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
269 270
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
271
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
272
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
273 274 275
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
276
}
277
EXPORT_SYMBOL(netif_carrier_on);
278

279 280 281 282 283 284
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
285 286 287 288 289
void netif_carrier_off(struct net_device *dev)
{
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state))
		linkwatch_fire_event(dev);
}
290
EXPORT_SYMBOL(netif_carrier_off);
291

L
Linus Torvalds 已提交
292 293 294 295 296
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

297
static int noop_enqueue(struct sk_buff *skb, struct Qdisc * qdisc)
L
Linus Torvalds 已提交
298 299 300 301 302
{
	kfree_skb(skb);
	return NET_XMIT_CN;
}

303
static struct sk_buff *noop_dequeue(struct Qdisc * qdisc)
L
Linus Torvalds 已提交
304 305 306 307
{
	return NULL;
}

308
static int noop_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
309 310
{
	if (net_ratelimit())
311 312
		printk(KERN_DEBUG "%s deferred output. It is buggy.\n",
		       skb->dev->name);
L
Linus Torvalds 已提交
313 314 315 316
	kfree_skb(skb);
	return NET_XMIT_CN;
}

317
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
318 319 320 321 322 323 324 325
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

326 327 328 329
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
};

L
Linus Torvalds 已提交
330 331 332 333
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
334
	.ops		=	&noop_qdisc_ops,
L
Linus Torvalds 已提交
335
	.list		=	LIST_HEAD_INIT(noop_qdisc.list),
336
	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
337
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
338
	.dev_queue	=	&noop_netdev_queue,
L
Linus Torvalds 已提交
339
};
340
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
341

342
static struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
343 344 345 346 347 348 349 350
	.id		=	"noqueue",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.requeue	=	noop_requeue,
	.owner		=	THIS_MODULE,
};

351 352 353 354 355
static struct Qdisc noqueue_qdisc;
static struct netdev_queue noqueue_netdev_queue = {
	.qdisc		=	&noqueue_qdisc,
};

L
Linus Torvalds 已提交
356 357 358 359 360 361
static struct Qdisc noqueue_qdisc = {
	.enqueue	=	NULL,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
	.ops		=	&noqueue_qdisc_ops,
	.list		=	LIST_HEAD_INIT(noqueue_qdisc.list),
362
	.requeue.lock	=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
363 364
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noqueue_qdisc.q.lock),
	.dev_queue	=	&noqueue_netdev_queue,
L
Linus Torvalds 已提交
365 366 367
};


368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384
static const u8 prio2band[TC_PRIO_MAX+1] =
	{ 1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1 };

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

#define PFIFO_FAST_BANDS 3

static inline struct sk_buff_head *prio2list(struct sk_buff *skb,
					     struct Qdisc *qdisc)
{
	struct sk_buff_head *list = qdisc_priv(qdisc);
	return list + prio2band[skb->priority & TC_PRIO_MAX];
}

static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc* qdisc)
385
{
386
	struct sk_buff_head *list = prio2list(skb, qdisc);
L
Linus Torvalds 已提交
387

388 389
	if (skb_queue_len(list) < qdisc_dev(qdisc)->tx_queue_len) {
		qdisc->q.qlen++;
390
		return __qdisc_enqueue_tail(skb, qdisc, list);
391
	}
392 393

	return qdisc_drop(skb, qdisc);
L
Linus Torvalds 已提交
394 395
}

396
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
397
{
398 399
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);
L
Linus Torvalds 已提交
400

401 402 403 404 405 406
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		if (!skb_queue_empty(list + prio)) {
			qdisc->q.qlen--;
			return __qdisc_dequeue_head(qdisc, list + prio);
		}
	}
407

L
Linus Torvalds 已提交
408 409 410
	return NULL;
}

411
static int pfifo_fast_requeue(struct sk_buff *skb, struct Qdisc* qdisc)
L
Linus Torvalds 已提交
412
{
413 414
	qdisc->q.qlen++;
	return __qdisc_requeue(skb, qdisc, prio2list(skb, qdisc));
L
Linus Torvalds 已提交
415 416
}

417
static void pfifo_fast_reset(struct Qdisc* qdisc)
L
Linus Torvalds 已提交
418
{
419 420 421 422 423 424
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		__qdisc_reset_queue(qdisc, list + prio);

425
	qdisc->qstats.backlog = 0;
426
	qdisc->q.qlen = 0;
L
Linus Torvalds 已提交
427 428
}

429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };

	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX+1);
	NLA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt);
	return skb->len;

nla_put_failure:
	return -1;
}

static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt)
{
	int prio;
	struct sk_buff_head *list = qdisc_priv(qdisc);

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++)
		skb_queue_head_init(list + prio);

	return 0;
}

static struct Qdisc_ops pfifo_fast_ops __read_mostly = {
	.id		=	"pfifo_fast",
	.priv_size	=	PFIFO_FAST_BANDS * sizeof(struct sk_buff_head),
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
	.requeue	=	pfifo_fast_requeue,
	.init		=	pfifo_fast_init,
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
L
Linus Torvalds 已提交
461 462 463
	.owner		=	THIS_MODULE,
};

464
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
465
			  struct Qdisc_ops *ops)
L
Linus Torvalds 已提交
466 467 468
{
	void *p;
	struct Qdisc *sch;
469 470
	unsigned int size;
	int err = -ENOBUFS;
L
Linus Torvalds 已提交
471 472

	/* ensure that the Qdisc and the private data are 32-byte aligned */
473 474
	size = QDISC_ALIGN(sizeof(*sch));
	size += ops->priv_size + (QDISC_ALIGNTO - 1);
L
Linus Torvalds 已提交
475

476
	p = kzalloc(size, GFP_KERNEL);
L
Linus Torvalds 已提交
477
	if (!p)
478 479 480
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
	sch->padded = (char *) sch - (char *) p;
L
Linus Torvalds 已提交
481 482

	INIT_LIST_HEAD(&sch->list);
483
	skb_queue_head_init(&sch->requeue);
L
Linus Torvalds 已提交
484 485 486 487
	skb_queue_head_init(&sch->q);
	sch->ops = ops;
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
488
	sch->dev_queue = dev_queue;
489
	dev_hold(qdisc_dev(sch));
L
Linus Torvalds 已提交
490
	atomic_set(&sch->refcnt, 1);
491 492 493

	return sch;
errout:
494
	return ERR_PTR(err);
495 496
}

497 498 499
struct Qdisc * qdisc_create_dflt(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 struct Qdisc_ops *ops,
500
				 unsigned int parentid)
501 502
{
	struct Qdisc *sch;
503

504
	sch = qdisc_alloc(dev_queue, ops);
505 506
	if (IS_ERR(sch))
		goto errout;
507
	sch->parent = parentid;
508

L
Linus Torvalds 已提交
509 510 511
	if (!ops->init || ops->init(sch, NULL) == 0)
		return sch;

512
	qdisc_destroy(sch);
513
errout:
L
Linus Torvalds 已提交
514 515
	return NULL;
}
516
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
517

518
/* Under qdisc_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
519 520 521

void qdisc_reset(struct Qdisc *qdisc)
{
522
	const struct Qdisc_ops *ops = qdisc->ops;
L
Linus Torvalds 已提交
523 524 525 526

	if (ops->reset)
		ops->reset(qdisc);
}
527
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
528

529
void qdisc_destroy(struct Qdisc *qdisc)
L
Linus Torvalds 已提交
530
{
531 532
	const struct Qdisc_ops  *ops = qdisc->ops;

533 534 535 536
	if (qdisc->flags & TCQ_F_BUILTIN ||
	    !atomic_dec_and_test(&qdisc->refcnt))
		return;

537
#ifdef CONFIG_NET_SCHED
538 539
	qdisc_list_del(qdisc);

540
	qdisc_put_stab(qdisc->stab);
541
#endif
542 543 544 545 546 547 548 549 550
	gen_kill_estimator(&qdisc->bstats, &qdisc->rate_est);
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

551
	kfree_skb(qdisc->gso_skb);
552
	__skb_queue_purge(&qdisc->requeue);
553

L
Linus Torvalds 已提交
554 555
	kfree((char *) qdisc - qdisc->padded);
}
556
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
557

558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578
static bool dev_all_qdisc_sleeping_noop(struct net_device *dev)
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);

		if (txq->qdisc_sleeping != &noop_qdisc)
			return false;
	}
	return true;
}

static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
	struct Qdisc *qdisc;

	if (dev->tx_queue_len) {
		qdisc = qdisc_create_dflt(dev, dev_queue,
579
					  &pfifo_fast_ops, TC_H_ROOT);
580 581 582 583 584 585 586 587 588 589 590 591 592 593
		if (!qdisc) {
			printk(KERN_INFO "%s: activation failed\n", dev->name);
			return;
		}
	} else {
		qdisc =  &noqueue_qdisc;
	}
	dev_queue->qdisc_sleeping = qdisc;
}

static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
594
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
595 596
	int *need_watchdog_p = _need_watchdog;

597 598 599
	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);

600
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
601
	if (need_watchdog_p && new_qdisc != &noqueue_qdisc)
602 603 604
		*need_watchdog_p = 1;
}

L
Linus Torvalds 已提交
605 606
void dev_activate(struct net_device *dev)
{
607
	int need_watchdog;
608

L
Linus Torvalds 已提交
609
	/* No queueing discipline is attached to device;
610 611 612
	   create default one i.e. pfifo_fast for devices,
	   which need queueing and noqueue_qdisc for
	   virtual interfaces
L
Linus Torvalds 已提交
613 614
	 */

615 616
	if (dev_all_qdisc_sleeping_noop(dev))
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
L
Linus Torvalds 已提交
617

618 619 620 621
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

622 623
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
624
	transition_one_qdisc(dev, &dev->rx_queue, NULL);
625 626

	if (need_watchdog) {
L
Linus Torvalds 已提交
627 628 629
		dev->trans_start = jiffies;
		dev_watchdog_up(dev);
	}
630 631
}

632 633 634
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
635
{
636
	struct Qdisc *qdisc_default = _qdisc_default;
637 638 639
	struct Qdisc *qdisc;

	qdisc = dev_queue->qdisc;
640
	if (qdisc) {
641 642
		spin_lock_bh(qdisc_lock(qdisc));

643 644 645
		if (!(qdisc->flags & TCQ_F_BUILTIN))
			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);

646
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
647
		qdisc_reset(qdisc);
648

649
		spin_unlock_bh(qdisc_lock(qdisc));
650
	}
L
Linus Torvalds 已提交
651 652
}

653
static bool some_qdisc_is_busy(struct net_device *dev)
654 655 656 657 658
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
659
		spinlock_t *root_lock;
660
		struct Qdisc *q;
661 662 663
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
664
		q = dev_queue->qdisc_sleeping;
665
		root_lock = qdisc_lock(q);
666

667
		spin_lock_bh(root_lock);
668

669 670
		val = (test_bit(__QDISC_STATE_RUNNING, &q->state) ||
		       test_bit(__QDISC_STATE_SCHED, &q->state));
671

672
		spin_unlock_bh(root_lock);
673 674 675 676 677 678 679

		if (val)
			return true;
	}
	return false;
}

L
Linus Torvalds 已提交
680 681
void dev_deactivate(struct net_device *dev)
{
682
	netdev_for_each_tx_queue(dev, dev_deactivate_queue, &noop_qdisc);
683
	dev_deactivate_queue(dev, &dev->rx_queue, &noop_qdisc);
684

L
Linus Torvalds 已提交
685 686
	dev_watchdog_down(dev);

687
	/* Wait for outstanding qdisc-less dev_queue_xmit calls. */
688
	synchronize_rcu();
L
Linus Torvalds 已提交
689

690
	/* Wait for outstanding qdisc_run calls. */
691 692
	while (some_qdisc_is_busy(dev))
		yield();
L
Linus Torvalds 已提交
693 694
}

695 696
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
697
				     void *_qdisc)
698
{
699 700
	struct Qdisc *qdisc = _qdisc;

701 702 703 704
	dev_queue->qdisc = qdisc;
	dev_queue->qdisc_sleeping = qdisc;
}

L
Linus Torvalds 已提交
705 706
void dev_init_scheduler(struct net_device *dev)
{
707
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
708
	dev_init_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
L
Linus Torvalds 已提交
709

710
	setup_timer(&dev->watchdog_timer, dev_watchdog, (unsigned long)dev);
L
Linus Torvalds 已提交
711 712
}

713 714 715
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
716
{
717
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
718
	struct Qdisc *qdisc_default = _qdisc_default;
719 720

	if (qdisc) {
721
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
722
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
723 724

		qdisc_destroy(qdisc);
725
	}
726 727 728 729
}

void dev_shutdown(struct net_device *dev)
{
730
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
731
	shutdown_scheduler_queue(dev, &dev->rx_queue, &noop_qdisc);
732
	WARN_ON(timer_pending(&dev->watchdog_timer));
L
Linus Torvalds 已提交
733
}