sch_generic.c 30.0 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
 * net/sched/sch_generic.c	Generic packet scheduler routines.
 *
 *		This program is free software; you can redistribute it and/or
 *		modify it under the terms of the GNU General Public License
 *		as published by the Free Software Foundation; either version
 *		2 of the License, or (at your option) any later version.
 *
 * Authors:	Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
 *              Jamal Hadi Salim, <hadi@cyberus.ca> 990601
 *              - Ingress support
 */

#include <linux/bitops.h>
#include <linux/module.h>
#include <linux/types.h>
#include <linux/kernel.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/errno.h>
#include <linux/netdevice.h>
#include <linux/skbuff.h>
#include <linux/rtnetlink.h>
#include <linux/init.h>
#include <linux/rcupdate.h>
#include <linux/list.h>
27
#include <linux/slab.h>
28
#include <linux/if_vlan.h>
29
#include <linux/skb_array.h>
30
#include <linux/if_macvlan.h>
31
#include <net/sch_generic.h>
L
Linus Torvalds 已提交
32
#include <net/pkt_sched.h>
E
Eric Dumazet 已提交
33
#include <net/dst.h>
34
#include <trace/events/qdisc.h>
L
Linus Torvalds 已提交
35

36 37 38 39
/* Qdisc to use by default */
const struct Qdisc_ops *default_qdisc_ops = &pfifo_fast_ops;
EXPORT_SYMBOL(default_qdisc_ops);

L
Linus Torvalds 已提交
40 41
/* Main transmission queue. */

42
/* Modifications to data participating in scheduling must be protected with
43
 * qdisc_lock(qdisc) spinlock.
44 45
 *
 * The idea is the following:
46 47
 * - enqueue, dequeue are serialized via qdisc root lock
 * - ingress filtering is also serialized via qdisc root lock
48
 * - updates to tree and tree walking are only done under the rtnl mutex.
L
Linus Torvalds 已提交
49
 */
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111

static inline struct sk_buff *__skb_dequeue_bad_txq(struct Qdisc *q)
{
	const struct netdev_queue *txq = q->dev_queue;
	spinlock_t *lock = NULL;
	struct sk_buff *skb;

	if (q->flags & TCQ_F_NOLOCK) {
		lock = qdisc_lock(q);
		spin_lock(lock);
	}

	skb = skb_peek(&q->skb_bad_txq);
	if (skb) {
		/* check the reason of requeuing without tx lock first */
		txq = skb_get_tx_queue(txq->dev, skb);
		if (!netif_xmit_frozen_or_stopped(txq)) {
			skb = __skb_dequeue(&q->skb_bad_txq);
			if (qdisc_is_percpu_stats(q)) {
				qdisc_qstats_cpu_backlog_dec(q, skb);
				qdisc_qstats_cpu_qlen_dec(q);
			} else {
				qdisc_qstats_backlog_dec(q, skb);
				q->q.qlen--;
			}
		} else {
			skb = NULL;
		}
	}

	if (lock)
		spin_unlock(lock);

	return skb;
}

static inline struct sk_buff *qdisc_dequeue_skb_bad_txq(struct Qdisc *q)
{
	struct sk_buff *skb = skb_peek(&q->skb_bad_txq);

	if (unlikely(skb))
		skb = __skb_dequeue_bad_txq(q);

	return skb;
}

static inline void qdisc_enqueue_skb_bad_txq(struct Qdisc *q,
					     struct sk_buff *skb)
{
	spinlock_t *lock = NULL;

	if (q->flags & TCQ_F_NOLOCK) {
		lock = qdisc_lock(q);
		spin_lock(lock);
	}

	__skb_queue_tail(&q->skb_bad_txq, skb);

	if (lock)
		spin_unlock(lock);
}

112
static inline int __dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
113
{
114
	__skb_queue_head(&q->gso_skb, skb);
115
	q->qstats.requeues++;
116
	qdisc_qstats_backlog_inc(q, skb);
117
	q->q.qlen++;	/* it's still part of the queue */
118
	__netif_schedule(q);
119

120 121 122
	return 0;
}

123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
static inline int dev_requeue_skb_locked(struct sk_buff *skb, struct Qdisc *q)
{
	spinlock_t *lock = qdisc_lock(q);

	spin_lock(lock);
	__skb_queue_tail(&q->gso_skb, skb);
	spin_unlock(lock);

	qdisc_qstats_cpu_requeues_inc(q);
	qdisc_qstats_cpu_backlog_inc(q, skb);
	qdisc_qstats_cpu_qlen_inc(q);
	__netif_schedule(q);

	return 0;
}

static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
{
	if (q->flags & TCQ_F_NOLOCK)
		return dev_requeue_skb_locked(skb, q);
	else
		return __dev_requeue_skb(skb, q);
}

147 148
static void try_bulk_dequeue_skb(struct Qdisc *q,
				 struct sk_buff *skb,
149 150
				 const struct netdev_queue *txq,
				 int *packets)
151
{
152
	int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
153 154

	while (bytelimit > 0) {
155
		struct sk_buff *nskb = q->dequeue(q);
156

157
		if (!nskb)
158 159
			break;

160 161 162
		bytelimit -= nskb->len; /* covers GSO len */
		skb->next = nskb;
		skb = nskb;
163
		(*packets)++; /* GSO counts as one pkt */
164
	}
165
	skb->next = NULL;
166 167
}

168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
/* This variant of try_bulk_dequeue_skb() makes sure
 * all skbs in the chain are for the same txq
 */
static void try_bulk_dequeue_skb_slow(struct Qdisc *q,
				      struct sk_buff *skb,
				      int *packets)
{
	int mapping = skb_get_queue_mapping(skb);
	struct sk_buff *nskb;
	int cnt = 0;

	do {
		nskb = q->dequeue(q);
		if (!nskb)
			break;
		if (unlikely(skb_get_queue_mapping(nskb) != mapping)) {
184 185 186 187 188 189 190 191 192
			qdisc_enqueue_skb_bad_txq(q, nskb);

			if (qdisc_is_percpu_stats(q)) {
				qdisc_qstats_cpu_backlog_inc(q, nskb);
				qdisc_qstats_cpu_qlen_inc(q);
			} else {
				qdisc_qstats_backlog_inc(q, nskb);
				q->q.qlen++;
			}
193 194 195 196 197 198 199 200 201
			break;
		}
		skb->next = nskb;
		skb = nskb;
	} while (++cnt < 8);
	(*packets) += cnt;
	skb->next = NULL;
}

202 203 204
/* Note that dequeue_skb can possibly return a SKB list (via skb->next).
 * A requeued skb (via q->gso_skb) can also be a SKB list.
 */
205 206
static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate,
				   int *packets)
207
{
208
	const struct netdev_queue *txq = q->dev_queue;
209
	struct sk_buff *skb = NULL;
210

211
	*packets = 1;
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230
	if (unlikely(!skb_queue_empty(&q->gso_skb))) {
		spinlock_t *lock = NULL;

		if (q->flags & TCQ_F_NOLOCK) {
			lock = qdisc_lock(q);
			spin_lock(lock);
		}

		skb = skb_peek(&q->gso_skb);

		/* skb may be null if another cpu pulls gso_skb off in between
		 * empty check and lock.
		 */
		if (!skb) {
			if (lock)
				spin_unlock(lock);
			goto validate;
		}

231 232
		/* skb in gso_skb were already validated */
		*validate = false;
233
		/* check the reason of requeuing without tx lock first */
234
		txq = skb_get_tx_queue(txq->dev, skb);
235
		if (!netif_xmit_frozen_or_stopped(txq)) {
236 237 238 239 240 241 242 243 244
			skb = __skb_dequeue(&q->gso_skb);
			if (qdisc_is_percpu_stats(q)) {
				qdisc_qstats_cpu_backlog_dec(q, skb);
				qdisc_qstats_cpu_qlen_dec(q);
			} else {
				qdisc_qstats_backlog_dec(q, skb);
				q->q.qlen--;
			}
		} else {
245
			skb = NULL;
246 247 248
		}
		if (lock)
			spin_unlock(lock);
249
		goto trace;
250
	}
251
validate:
252
	*validate = true;
253 254 255 256 257

	if ((q->flags & TCQ_F_ONETXQUEUE) &&
	    netif_xmit_frozen_or_stopped(txq))
		return skb;

258 259 260
	skb = qdisc_dequeue_skb_bad_txq(q);
	if (unlikely(skb))
		goto bulk;
261
	skb = q->dequeue(q);
262 263 264 265 266 267
	if (skb) {
bulk:
		if (qdisc_may_bulk(q))
			try_bulk_dequeue_skb(q, skb, txq, packets);
		else
			try_bulk_dequeue_skb_slow(q, skb, packets);
268
	}
269 270
trace:
	trace_qdisc_dequeue(q, txq, *packets, skb);
271 272 273
	return skb;
}

274
/*
275
 * Transmit possibly several skbs, and handle the return status as
276
 * required. Owning running seqcount bit guarantees that
277
 * only one CPU can execute this function.
278 279
 *
 * Returns to the caller:
280 281
 *				false  - hardware queue frozen backoff
 *				true   - feel free to send more pkts
282
 */
283 284 285
bool sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
		     struct net_device *dev, struct netdev_queue *txq,
		     spinlock_t *root_lock, bool validate)
L
Linus Torvalds 已提交
286
{
287
	int ret = NETDEV_TX_BUSY;
288 289

	/* And release qdisc */
290 291
	if (root_lock)
		spin_unlock(root_lock);
292

293 294 295
	/* Note that we validate skb (GSO, checksum, ...) outside of locks */
	if (validate)
		skb = validate_xmit_skb_list(skb, dev);
296

297
	if (likely(skb)) {
298 299 300
		HARD_TX_LOCK(dev, txq, smp_processor_id());
		if (!netif_xmit_frozen_or_stopped(txq))
			skb = dev_hard_start_xmit(skb, dev, txq, &ret);
301

302
		HARD_TX_UNLOCK(dev, txq);
303
	} else {
304 305
		if (root_lock)
			spin_lock(root_lock);
306
		return true;
307
	}
308 309 310

	if (root_lock)
		spin_lock(root_lock);
311

312
	if (!dev_xmit_complete(ret)) {
313
		/* Driver returned NETDEV_TX_BUSY - requeue skb */
314 315 316
		if (unlikely(ret != NETDEV_TX_BUSY))
			net_warn_ratelimited("BUG %s code %d qlen %d\n",
					     dev->name, ret, q->q.qlen);
317

318 319
		dev_requeue_skb(skb, q);
		return false;
320
	}
321

322
	if (ret && netif_xmit_frozen_or_stopped(txq))
323
		return false;
324

325
	return true;
L
Linus Torvalds 已提交
326 327
}

328 329 330
/*
 * NOTE: Called under qdisc_lock(q) with locally disabled BH.
 *
331
 * running seqcount guarantees only one CPU can process
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
 * this qdisc at a time. qdisc_lock(q) serializes queue accesses for
 * this queue.
 *
 *  netif_tx_lock serializes accesses to device driver.
 *
 *  qdisc_lock(q) and netif_tx_lock are mutually exclusive,
 *  if one is grabbed, another must be free.
 *
 * Note, that this procedure can be called by a watchdog timer
 *
 * Returns to the caller:
 *				0  - queue is empty or throttled.
 *				>0 - queue is not empty.
 *
 */
347
static inline bool qdisc_restart(struct Qdisc *q, int *packets)
348
{
349
	spinlock_t *root_lock = NULL;
350 351 352
	struct netdev_queue *txq;
	struct net_device *dev;
	struct sk_buff *skb;
353
	bool validate;
354 355

	/* Dequeue packet */
356
	skb = dequeue_skb(q, &validate, packets);
357
	if (unlikely(!skb))
358
		return false;
359

360 361 362
	if (!(q->flags & TCQ_F_NOLOCK))
		root_lock = qdisc_lock(q);

363
	dev = qdisc_dev(q);
364
	txq = skb_get_tx_queue(dev, skb);
365

366
	return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
367 368
}

369
void __qdisc_run(struct Qdisc *q)
H
Herbert Xu 已提交
370
{
371
	int quota = dev_tx_weight;
372
	int packets;
373

374
	while (qdisc_restart(q, &packets)) {
375
		/*
J
jamal 已提交
376 377 378
		 * Ordered by possible occurrence: Postpone processing if
		 * 1. we've exceeded packet quota
		 * 2. another process needs the CPU;
379
		 */
380 381
		quota -= packets;
		if (quota <= 0 || need_resched()) {
382
			__netif_schedule(q);
383
			break;
384 385
		}
	}
H
Herbert Xu 已提交
386 387
}

388 389
unsigned long dev_trans_start(struct net_device *dev)
{
390
	unsigned long val, res;
391 392
	unsigned int i;

393 394
	if (is_vlan_dev(dev))
		dev = vlan_dev_real_dev(dev);
395 396
	else if (netif_is_macvlan(dev))
		dev = macvlan_dev_real_dev(dev);
F
Florian Westphal 已提交
397 398
	res = netdev_get_tx_queue(dev, 0)->trans_start;
	for (i = 1; i < dev->num_tx_queues; i++) {
399 400 401 402
		val = netdev_get_tx_queue(dev, i)->trans_start;
		if (val && time_after(val, res))
			res = val;
	}
403

404 405 406 407
	return res;
}
EXPORT_SYMBOL(dev_trans_start);

408
static void dev_watchdog(struct timer_list *t)
L
Linus Torvalds 已提交
409
{
410
	struct net_device *dev = from_timer(dev, t, watchdog_timer);
L
Linus Torvalds 已提交
411

H
Herbert Xu 已提交
412
	netif_tx_lock(dev);
413
	if (!qdisc_tx_is_noop(dev)) {
L
Linus Torvalds 已提交
414 415 416
		if (netif_device_present(dev) &&
		    netif_running(dev) &&
		    netif_carrier_ok(dev)) {
417
			int some_queue_timedout = 0;
418
			unsigned int i;
419
			unsigned long trans_start;
420 421 422 423 424

			for (i = 0; i < dev->num_tx_queues; i++) {
				struct netdev_queue *txq;

				txq = netdev_get_tx_queue(dev, i);
F
Florian Westphal 已提交
425
				trans_start = txq->trans_start;
426
				if (netif_xmit_stopped(txq) &&
427 428 429
				    time_after(jiffies, (trans_start +
							 dev->watchdog_timeo))) {
					some_queue_timedout = 1;
430
					txq->trans_timeout++;
431 432 433
					break;
				}
			}
434

435 436
			if (some_queue_timedout) {
				WARN_ONCE(1, KERN_INFO "NETDEV WATCHDOG: %s (%s): transmit queue %u timed out\n",
437
				       dev->name, netdev_drivername(dev), i);
438
				dev->netdev_ops->ndo_tx_timeout(dev);
L
Linus Torvalds 已提交
439
			}
440 441 442
			if (!mod_timer(&dev->watchdog_timer,
				       round_jiffies(jiffies +
						     dev->watchdog_timeo)))
L
Linus Torvalds 已提交
443 444 445
				dev_hold(dev);
		}
	}
H
Herbert Xu 已提交
446
	netif_tx_unlock(dev);
L
Linus Torvalds 已提交
447 448 449 450 451 452

	dev_put(dev);
}

void __netdev_watchdog_up(struct net_device *dev)
{
453
	if (dev->netdev_ops->ndo_tx_timeout) {
L
Linus Torvalds 已提交
454 455
		if (dev->watchdog_timeo <= 0)
			dev->watchdog_timeo = 5*HZ;
456 457
		if (!mod_timer(&dev->watchdog_timer,
			       round_jiffies(jiffies + dev->watchdog_timeo)))
L
Linus Torvalds 已提交
458 459 460 461 462 463 464 465 466 467 468
			dev_hold(dev);
	}
}

static void dev_watchdog_up(struct net_device *dev)
{
	__netdev_watchdog_up(dev);
}

static void dev_watchdog_down(struct net_device *dev)
{
H
Herbert Xu 已提交
469
	netif_tx_lock_bh(dev);
L
Linus Torvalds 已提交
470
	if (del_timer(&dev->watchdog_timer))
471
		dev_put(dev);
H
Herbert Xu 已提交
472
	netif_tx_unlock_bh(dev);
L
Linus Torvalds 已提交
473 474
}

475 476 477 478 479 480
/**
 *	netif_carrier_on - set carrier
 *	@dev: network device
 *
 * Device has detected that carrier.
 */
481 482
void netif_carrier_on(struct net_device *dev)
{
J
Jeff Garzik 已提交
483
	if (test_and_clear_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
484 485
		if (dev->reg_state == NETREG_UNINITIALIZED)
			return;
486
		atomic_inc(&dev->carrier_changes);
487
		linkwatch_fire_event(dev);
J
Jeff Garzik 已提交
488 489 490
		if (netif_running(dev))
			__netdev_watchdog_up(dev);
	}
491
}
492
EXPORT_SYMBOL(netif_carrier_on);
493

494 495 496 497 498 499
/**
 *	netif_carrier_off - clear carrier
 *	@dev: network device
 *
 * Device has detected loss of carrier.
 */
500 501
void netif_carrier_off(struct net_device *dev)
{
502 503 504
	if (!test_and_set_bit(__LINK_STATE_NOCARRIER, &dev->state)) {
		if (dev->reg_state == NETREG_UNINITIALIZED)
			return;
505
		atomic_inc(&dev->carrier_changes);
506
		linkwatch_fire_event(dev);
507
	}
508
}
509
EXPORT_SYMBOL(netif_carrier_off);
510

L
Linus Torvalds 已提交
511 512 513 514 515
/* "NOOP" scheduler: the best scheduler, recommended for all interfaces
   under all circumstances. It is difficult to invent anything faster or
   cheaper.
 */

516 517
static int noop_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
			struct sk_buff **to_free)
L
Linus Torvalds 已提交
518
{
519
	__qdisc_drop(skb, to_free);
L
Linus Torvalds 已提交
520 521 522
	return NET_XMIT_CN;
}

523
static struct sk_buff *noop_dequeue(struct Qdisc *qdisc)
L
Linus Torvalds 已提交
524 525 526 527
{
	return NULL;
}

528
struct Qdisc_ops noop_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
529 530 531 532
	.id		=	"noop",
	.priv_size	=	0,
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
533
	.peek		=	noop_dequeue,
L
Linus Torvalds 已提交
534 535 536
	.owner		=	THIS_MODULE,
};

537 538
static struct netdev_queue noop_netdev_queue = {
	.qdisc		=	&noop_qdisc,
539
	.qdisc_sleeping	=	&noop_qdisc,
540 541
};

L
Linus Torvalds 已提交
542 543 544 545
struct Qdisc noop_qdisc = {
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
	.flags		=	TCQ_F_BUILTIN,
546
	.ops		=	&noop_qdisc_ops,
547
	.q.lock		=	__SPIN_LOCK_UNLOCKED(noop_qdisc.q.lock),
548
	.dev_queue	=	&noop_netdev_queue,
549
	.running	=	SEQCNT_ZERO(noop_qdisc.running),
550
	.busylock	=	__SPIN_LOCK_UNLOCKED(noop_qdisc.busylock),
L
Linus Torvalds 已提交
551
};
552
EXPORT_SYMBOL(noop_qdisc);
L
Linus Torvalds 已提交
553

554 555
static int noqueue_init(struct Qdisc *qdisc, struct nlattr *opt,
			struct netlink_ext_ack *extack)
P
Phil Sutter 已提交
556 557 558 559 560 561 562 563 564
{
	/* register_qdisc() assigns a default of noop_enqueue if unset,
	 * but __dev_queue_xmit() treats noqueue only as such
	 * if this is NULL - so clear it here. */
	qdisc->enqueue = NULL;
	return 0;
}

struct Qdisc_ops noqueue_qdisc_ops __read_mostly = {
L
Linus Torvalds 已提交
565 566
	.id		=	"noqueue",
	.priv_size	=	0,
P
Phil Sutter 已提交
567
	.init		=	noqueue_init,
L
Linus Torvalds 已提交
568 569
	.enqueue	=	noop_enqueue,
	.dequeue	=	noop_dequeue,
570
	.peek		=	noop_dequeue,
L
Linus Torvalds 已提交
571 572 573
	.owner		=	THIS_MODULE,
};

E
Eric Dumazet 已提交
574 575 576
static const u8 prio2band[TC_PRIO_MAX + 1] = {
	1, 2, 2, 2, 1, 2, 0, 0 , 1, 1, 1, 1, 1, 1, 1, 1
};
577 578 579 580 581 582 583

/* 3-band FIFO queue: old style, but should be a bit faster than
   generic prio+fifo combination.
 */

#define PFIFO_FAST_BANDS 3

584 585
/*
 * Private data for a pfifo_fast scheduler containing:
586
 *	- rings for priority bands
587 588
 */
struct pfifo_fast_priv {
589
	struct skb_array q[PFIFO_FAST_BANDS];
590 591
};

592 593
static inline struct skb_array *band2list(struct pfifo_fast_priv *priv,
					  int band)
594
{
595
	return &priv->q[band];
596 597
}

598 599
static int pfifo_fast_enqueue(struct sk_buff *skb, struct Qdisc *qdisc,
			      struct sk_buff **to_free)
600
{
601 602 603 604
	int band = prio2band[skb->priority & TC_PRIO_MAX];
	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
	struct skb_array *q = band2list(priv, band);
	int err;
605

606 607 608 609 610 611 612 613
	err = skb_array_produce(q, skb);

	if (unlikely(err))
		return qdisc_drop_cpu(skb, qdisc, to_free);

	qdisc_qstats_cpu_qlen_inc(qdisc);
	qdisc_qstats_cpu_backlog_inc(qdisc, skb);
	return NET_XMIT_SUCCESS;
L
Linus Torvalds 已提交
614 615
}

E
Eric Dumazet 已提交
616
static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc)
L
Linus Torvalds 已提交
617
{
618
	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
619 620
	struct sk_buff *skb = NULL;
	int band;
621

622 623
	for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
		struct skb_array *q = band2list(priv, band);
624

625 626
		if (__skb_array_empty(q))
			continue;
627

628 629 630 631 632 633
		skb = skb_array_consume_bh(q);
	}
	if (likely(skb)) {
		qdisc_qstats_cpu_backlog_dec(qdisc, skb);
		qdisc_bstats_cpu_update(qdisc, skb);
		qdisc_qstats_cpu_qlen_dec(qdisc);
634
	}
635

636
	return skb;
L
Linus Torvalds 已提交
637 638
}

E
Eric Dumazet 已提交
639
static struct sk_buff *pfifo_fast_peek(struct Qdisc *qdisc)
640
{
641
	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
642 643
	struct sk_buff *skb = NULL;
	int band;
644

645 646
	for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) {
		struct skb_array *q = band2list(priv, band);
647

648
		skb = __skb_array_peek(q);
649 650
	}

651
	return skb;
652 653
}

E
Eric Dumazet 已提交
654
static void pfifo_fast_reset(struct Qdisc *qdisc)
L
Linus Torvalds 已提交
655
{
656
	int i, band;
657
	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
658

659 660 661
	for (band = 0; band < PFIFO_FAST_BANDS; band++) {
		struct skb_array *q = band2list(priv, band);
		struct sk_buff *skb;
662

663 664 665 666 667 668
		/* NULL ring is possible if destroy path is due to a failed
		 * skb_array_init() in pfifo_fast_init() case.
		 */
		if (!q->ring.queue)
			continue;

669 670 671 672 673 674 675 676 677 678
		while ((skb = skb_array_consume_bh(q)) != NULL)
			kfree_skb(skb);
	}

	for_each_possible_cpu(i) {
		struct gnet_stats_queue *q = per_cpu_ptr(qdisc->cpu_qstats, i);

		q->backlog = 0;
		q->qlen = 0;
	}
L
Linus Torvalds 已提交
679 680
}

681 682 683 684
static int pfifo_fast_dump(struct Qdisc *qdisc, struct sk_buff *skb)
{
	struct tc_prio_qopt opt = { .bands = PFIFO_FAST_BANDS };

E
Eric Dumazet 已提交
685
	memcpy(&opt.priomap, prio2band, TC_PRIO_MAX + 1);
686 687
	if (nla_put(skb, TCA_OPTIONS, sizeof(opt), &opt))
		goto nla_put_failure;
688 689 690 691 692 693
	return skb->len;

nla_put_failure:
	return -1;
}

694 695
static int pfifo_fast_init(struct Qdisc *qdisc, struct nlattr *opt,
			   struct netlink_ext_ack *extack)
696
{
697
	unsigned int qlen = qdisc_dev(qdisc)->tx_queue_len;
698
	struct pfifo_fast_priv *priv = qdisc_priv(qdisc);
699 700 701 702 703
	int prio;

	/* guard against zero length rings */
	if (!qlen)
		return -EINVAL;
704

705 706 707 708 709 710 711 712
	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		struct skb_array *q = band2list(priv, prio);
		int err;

		err = skb_array_init(q, qlen, GFP_KERNEL);
		if (err)
			return -ENOMEM;
	}
713

714 715
	/* Can by-pass the queue discipline */
	qdisc->flags |= TCQ_F_CAN_BYPASS;
716 717 718
	return 0;
}

719 720 721 722 723 724 725 726 727 728 729
static void pfifo_fast_destroy(struct Qdisc *sch)
{
	struct pfifo_fast_priv *priv = qdisc_priv(sch);
	int prio;

	for (prio = 0; prio < PFIFO_FAST_BANDS; prio++) {
		struct skb_array *q = band2list(priv, prio);

		/* NULL ring is possible if destroy path is due to a failed
		 * skb_array_init() in pfifo_fast_init() case.
		 */
730
		if (!q->ring.queue)
731 732 733 734 735 736 737 738
			continue;
		/* Destroy ring but no need to kfree_skb because a call to
		 * pfifo_fast_reset() has already done that work.
		 */
		ptr_ring_cleanup(&q->ring, NULL);
	}
}

739
struct Qdisc_ops pfifo_fast_ops __read_mostly = {
740
	.id		=	"pfifo_fast",
741
	.priv_size	=	sizeof(struct pfifo_fast_priv),
742 743
	.enqueue	=	pfifo_fast_enqueue,
	.dequeue	=	pfifo_fast_dequeue,
744
	.peek		=	pfifo_fast_peek,
745
	.init		=	pfifo_fast_init,
746
	.destroy	=	pfifo_fast_destroy,
747 748
	.reset		=	pfifo_fast_reset,
	.dump		=	pfifo_fast_dump,
L
Linus Torvalds 已提交
749
	.owner		=	THIS_MODULE,
750
	.static_flags	=	TCQ_F_NOLOCK | TCQ_F_CPUSTATS,
L
Linus Torvalds 已提交
751
};
752
EXPORT_SYMBOL(pfifo_fast_ops);
L
Linus Torvalds 已提交
753

754
static struct lock_class_key qdisc_tx_busylock;
755
static struct lock_class_key qdisc_running_key;
756

757
struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
758 759
			  const struct Qdisc_ops *ops,
			  struct netlink_ext_ack *extack)
L
Linus Torvalds 已提交
760 761 762
{
	void *p;
	struct Qdisc *sch;
E
Eric Dumazet 已提交
763
	unsigned int size = QDISC_ALIGN(sizeof(*sch)) + ops->priv_size;
764
	int err = -ENOBUFS;
765 766 767
	struct net_device *dev;

	if (!dev_queue) {
768
		NL_SET_ERR_MSG(extack, "No device queue given");
769 770 771
		err = -EINVAL;
		goto errout;
	}
L
Linus Torvalds 已提交
772

773
	dev = dev_queue->dev;
774 775 776
	p = kzalloc_node(size, GFP_KERNEL,
			 netdev_queue_numa_node_read(dev_queue));

L
Linus Torvalds 已提交
777
	if (!p)
778 779
		goto errout;
	sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
E
Eric Dumazet 已提交
780 781 782 783 784 785 786 787 788 789
	/* if we got non aligned memory, ask more and do alignment ourself */
	if (sch != p) {
		kfree(p);
		p = kzalloc_node(size + QDISC_ALIGNTO - 1, GFP_KERNEL,
				 netdev_queue_numa_node_read(dev_queue));
		if (!p)
			goto errout;
		sch = (struct Qdisc *) QDISC_ALIGN((unsigned long) p);
		sch->padded = (char *) sch - (char *) p;
	}
790
	__skb_queue_head_init(&sch->gso_skb);
791
	__skb_queue_head_init(&sch->skb_bad_txq);
792 793
	qdisc_skb_head_init(&sch->q);
	spin_lock_init(&sch->q.lock);
794

795 796 797 798 799 800 801 802 803 804 805 806 807
	if (ops->static_flags & TCQ_F_CPUSTATS) {
		sch->cpu_bstats =
			netdev_alloc_pcpu_stats(struct gnet_stats_basic_cpu);
		if (!sch->cpu_bstats)
			goto errout1;

		sch->cpu_qstats = alloc_percpu(struct gnet_stats_queue);
		if (!sch->cpu_qstats) {
			free_percpu(sch->cpu_bstats);
			goto errout1;
		}
	}

808
	spin_lock_init(&sch->busylock);
809 810 811
	lockdep_set_class(&sch->busylock,
			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);

812 813 814 815
	seqcount_init(&sch->running);
	lockdep_set_class(&sch->running,
			  dev->qdisc_running_key ?: &qdisc_running_key);

L
Linus Torvalds 已提交
816
	sch->ops = ops;
817
	sch->flags = ops->static_flags;
L
Linus Torvalds 已提交
818 819
	sch->enqueue = ops->enqueue;
	sch->dequeue = ops->dequeue;
820
	sch->dev_queue = dev_queue;
821
	dev_hold(dev);
822
	refcount_set(&sch->refcnt, 1);
823 824

	return sch;
825 826
errout1:
	kfree(p);
827
errout:
828
	return ERR_PTR(err);
829 830
}

831
struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
832 833
				const struct Qdisc_ops *ops,
				unsigned int parentid)
834 835
{
	struct Qdisc *sch;
836

837
	if (!try_module_get(ops->owner))
838
		return NULL;
839

840
	sch = qdisc_alloc(dev_queue, ops, NULL);
841 842 843 844
	if (IS_ERR(sch)) {
		module_put(ops->owner);
		return NULL;
	}
845
	sch->parent = parentid;
846

847
	if (!ops->init || ops->init(sch, NULL, NULL) == 0)
L
Linus Torvalds 已提交
848 849
		return sch;

850
	qdisc_destroy(sch);
L
Linus Torvalds 已提交
851 852
	return NULL;
}
853
EXPORT_SYMBOL(qdisc_create_dflt);
L
Linus Torvalds 已提交
854

855
/* Under qdisc_lock(qdisc) and BH! */
L
Linus Torvalds 已提交
856 857 858

void qdisc_reset(struct Qdisc *qdisc)
{
859
	const struct Qdisc_ops *ops = qdisc->ops;
860
	struct sk_buff *skb, *tmp;
L
Linus Torvalds 已提交
861 862 863

	if (ops->reset)
		ops->reset(qdisc);
864

865 866 867
	skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
		__skb_unlink(skb, &qdisc->gso_skb);
		kfree_skb_list(skb);
868
	}
869

870 871 872 873 874
	skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
		__skb_unlink(skb, &qdisc->skb_bad_txq);
		kfree_skb_list(skb);
	}

875
	qdisc->q.qlen = 0;
876
	qdisc->qstats.backlog = 0;
L
Linus Torvalds 已提交
877
}
878
EXPORT_SYMBOL(qdisc_reset);
L
Linus Torvalds 已提交
879

880
static void qdisc_free(struct Qdisc *qdisc)
E
Eric Dumazet 已提交
881
{
882
	if (qdisc_is_percpu_stats(qdisc)) {
883
		free_percpu(qdisc->cpu_bstats);
884 885
		free_percpu(qdisc->cpu_qstats);
	}
886

E
Eric Dumazet 已提交
887 888 889
	kfree((char *) qdisc - qdisc->padded);
}

890
void qdisc_destroy(struct Qdisc *qdisc)
L
Linus Torvalds 已提交
891
{
892
	const struct Qdisc_ops  *ops = qdisc->ops;
893
	struct sk_buff *skb, *tmp;
894

895
	if (qdisc->flags & TCQ_F_BUILTIN ||
896
	    !refcount_dec_and_test(&qdisc->refcnt))
897 898
		return;

899
#ifdef CONFIG_NET_SCHED
900
	qdisc_hash_del(qdisc);
901

E
Eric Dumazet 已提交
902
	qdisc_put_stab(rtnl_dereference(qdisc->stab));
903
#endif
904
	gen_kill_estimator(&qdisc->rate_est);
905 906 907 908 909 910 911 912
	if (ops->reset)
		ops->reset(qdisc);
	if (ops->destroy)
		ops->destroy(qdisc);

	module_put(ops->owner);
	dev_put(qdisc_dev(qdisc));

913 914 915 916 917
	skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
		__skb_unlink(skb, &qdisc->gso_skb);
		kfree_skb_list(skb);
	}

918 919 920 921 922
	skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
		__skb_unlink(skb, &qdisc->skb_bad_txq);
		kfree_skb_list(skb);
	}

923
	qdisc_free(qdisc);
L
Linus Torvalds 已提交
924
}
925
EXPORT_SYMBOL(qdisc_destroy);
L
Linus Torvalds 已提交
926

927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946
/* Attach toplevel qdisc to device queue. */
struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue,
			      struct Qdisc *qdisc)
{
	struct Qdisc *oqdisc = dev_queue->qdisc_sleeping;
	spinlock_t *root_lock;

	root_lock = qdisc_lock(oqdisc);
	spin_lock_bh(root_lock);

	/* ... and graft new one */
	if (qdisc == NULL)
		qdisc = &noop_qdisc;
	dev_queue->qdisc_sleeping = qdisc;
	rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc);

	spin_unlock_bh(root_lock);

	return oqdisc;
}
947
EXPORT_SYMBOL(dev_graft_qdisc);
948

949 950 951 952
static void attach_one_default_qdisc(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_unused)
{
953 954
	struct Qdisc *qdisc;
	const struct Qdisc_ops *ops = default_qdisc_ops;
955

956 957 958 959 960 961 962
	if (dev->priv_flags & IFF_NO_QUEUE)
		ops = &noqueue_qdisc_ops;

	qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT);
	if (!qdisc) {
		netdev_info(dev, "activation failed\n");
		return;
963
	}
964
	if (!netif_is_multiqueue(dev))
965
		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
966 967 968
	dev_queue->qdisc_sleeping = qdisc;
}

969 970 971 972 973 974 975
static void attach_default_qdiscs(struct net_device *dev)
{
	struct netdev_queue *txq;
	struct Qdisc *qdisc;

	txq = netdev_get_tx_queue(dev, 0);

976 977
	if (!netif_is_multiqueue(dev) ||
	    dev->priv_flags & IFF_NO_QUEUE) {
978 979
		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
		dev->qdisc = txq->qdisc_sleeping;
980
		qdisc_refcount_inc(dev->qdisc);
981
	} else {
982
		qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT);
983 984
		if (qdisc) {
			dev->qdisc = qdisc;
985
			qdisc->ops->attach(qdisc);
986 987
		}
	}
988
#ifdef CONFIG_NET_SCHED
989
	if (dev->qdisc != &noop_qdisc)
990
		qdisc_hash_add(dev->qdisc, false);
991
#endif
992 993
}

994 995 996 997
static void transition_one_qdisc(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_need_watchdog)
{
998
	struct Qdisc *new_qdisc = dev_queue->qdisc_sleeping;
999 1000
	int *need_watchdog_p = _need_watchdog;

1001 1002 1003
	if (!(new_qdisc->flags & TCQ_F_BUILTIN))
		clear_bit(__QDISC_STATE_DEACTIVATED, &new_qdisc->state);

1004
	rcu_assign_pointer(dev_queue->qdisc, new_qdisc);
1005
	if (need_watchdog_p) {
1006
		dev_queue->trans_start = 0;
1007
		*need_watchdog_p = 1;
1008
	}
1009 1010
}

L
Linus Torvalds 已提交
1011 1012
void dev_activate(struct net_device *dev)
{
1013
	int need_watchdog;
1014

L
Linus Torvalds 已提交
1015
	/* No queueing discipline is attached to device;
1016 1017
	 * create default one for devices, which need queueing
	 * and noqueue_qdisc for virtual interfaces
L
Linus Torvalds 已提交
1018 1019
	 */

1020 1021
	if (dev->qdisc == &noop_qdisc)
		attach_default_qdiscs(dev);
1022

1023 1024 1025 1026
	if (!netif_carrier_ok(dev))
		/* Delay activation until next carrier-on event */
		return;

1027 1028
	need_watchdog = 0;
	netdev_for_each_tx_queue(dev, transition_one_qdisc, &need_watchdog);
1029 1030
	if (dev_ingress_queue(dev))
		transition_one_qdisc(dev, dev_ingress_queue(dev), NULL);
1031 1032

	if (need_watchdog) {
1033
		netif_trans_update(dev);
L
Linus Torvalds 已提交
1034 1035
		dev_watchdog_up(dev);
	}
1036
}
1037
EXPORT_SYMBOL(dev_activate);
1038

1039 1040 1041
static void dev_deactivate_queue(struct net_device *dev,
				 struct netdev_queue *dev_queue,
				 void *_qdisc_default)
1042
{
1043
	struct Qdisc *qdisc_default = _qdisc_default;
1044 1045
	struct Qdisc *qdisc;

1046
	qdisc = rtnl_dereference(dev_queue->qdisc);
1047
	if (qdisc) {
1048 1049
		spin_lock_bh(qdisc_lock(qdisc));

1050 1051 1052
		if (!(qdisc->flags & TCQ_F_BUILTIN))
			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);

1053
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1054
		qdisc_reset(qdisc);
1055

1056
		spin_unlock_bh(qdisc_lock(qdisc));
1057
	}
L
Linus Torvalds 已提交
1058 1059
}

1060
static bool some_qdisc_is_busy(struct net_device *dev)
1061 1062 1063 1064 1065
{
	unsigned int i;

	for (i = 0; i < dev->num_tx_queues; i++) {
		struct netdev_queue *dev_queue;
1066
		spinlock_t *root_lock;
1067
		struct Qdisc *q;
1068 1069 1070
		int val;

		dev_queue = netdev_get_tx_queue(dev, i);
1071
		q = dev_queue->qdisc_sleeping;
1072

1073 1074 1075 1076 1077
		if (q->flags & TCQ_F_NOLOCK) {
			val = test_bit(__QDISC_STATE_SCHED, &q->state);
		} else {
			root_lock = qdisc_lock(q);
			spin_lock_bh(root_lock);
1078

1079 1080
			val = (qdisc_is_running(q) ||
			       test_bit(__QDISC_STATE_SCHED, &q->state));
1081

1082 1083
			spin_unlock_bh(root_lock);
		}
1084 1085 1086 1087 1088 1089 1090

		if (val)
			return true;
	}
	return false;
}

1091 1092 1093 1094 1095 1096 1097 1098 1099 1100
static void dev_qdisc_reset(struct net_device *dev,
			    struct netdev_queue *dev_queue,
			    void *none)
{
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;

	if (qdisc)
		qdisc_reset(qdisc);
}

1101 1102 1103 1104 1105 1106 1107
/**
 * 	dev_deactivate_many - deactivate transmissions on several devices
 * 	@head: list of devices to deactivate
 *
 *	This function returns only when all outstanding transmissions
 *	have completed, unless all devices are in dismantle phase.
 */
1108
void dev_deactivate_many(struct list_head *head)
L
Linus Torvalds 已提交
1109
{
1110
	struct net_device *dev;
1111

1112
	list_for_each_entry(dev, head, close_list) {
1113 1114 1115 1116 1117 1118 1119 1120
		netdev_for_each_tx_queue(dev, dev_deactivate_queue,
					 &noop_qdisc);
		if (dev_ingress_queue(dev))
			dev_deactivate_queue(dev, dev_ingress_queue(dev),
					     &noop_qdisc);

		dev_watchdog_down(dev);
	}
L
Linus Torvalds 已提交
1121

1122 1123 1124 1125
	/* Wait for outstanding qdisc-less dev_queue_xmit calls.
	 * This is avoided if all devices are in dismantle phase :
	 * Caller will call synchronize_net() for us
	 */
1126
	synchronize_net();
L
Linus Torvalds 已提交
1127

1128
	/* Wait for outstanding qdisc_run calls. */
1129
	list_for_each_entry(dev, head, close_list) {
1130 1131
		while (some_qdisc_is_busy(dev))
			yield();
1132 1133 1134 1135 1136 1137 1138
		/* The new qdisc is assigned at this point so we can safely
		 * unwind stale skb lists and qdisc statistics
		 */
		netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
		if (dev_ingress_queue(dev))
			dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
	}
1139 1140 1141 1142 1143 1144
}

void dev_deactivate(struct net_device *dev)
{
	LIST_HEAD(single);

1145
	list_add(&dev->close_list, &single);
1146
	dev_deactivate_many(&single);
1147
	list_del(&single);
L
Linus Torvalds 已提交
1148
}
1149
EXPORT_SYMBOL(dev_deactivate);
L
Linus Torvalds 已提交
1150

1151 1152
static void dev_init_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
1153
				     void *_qdisc)
1154
{
1155 1156
	struct Qdisc *qdisc = _qdisc;

1157
	rcu_assign_pointer(dev_queue->qdisc, qdisc);
1158
	dev_queue->qdisc_sleeping = qdisc;
1159
	__skb_queue_head_init(&qdisc->gso_skb);
1160
	__skb_queue_head_init(&qdisc->skb_bad_txq);
1161 1162
}

L
Linus Torvalds 已提交
1163 1164
void dev_init_scheduler(struct net_device *dev)
{
1165
	dev->qdisc = &noop_qdisc;
1166
	netdev_for_each_tx_queue(dev, dev_init_scheduler_queue, &noop_qdisc);
1167 1168
	if (dev_ingress_queue(dev))
		dev_init_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
L
Linus Torvalds 已提交
1169

1170
	timer_setup(&dev->watchdog_timer, dev_watchdog, 0);
L
Linus Torvalds 已提交
1171 1172
}

1173 1174 1175
static void shutdown_scheduler_queue(struct net_device *dev,
				     struct netdev_queue *dev_queue,
				     void *_qdisc_default)
L
Linus Torvalds 已提交
1176
{
1177
	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
1178
	struct Qdisc *qdisc_default = _qdisc_default;
1179 1180

	if (qdisc) {
1181
		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
1182
		dev_queue->qdisc_sleeping = qdisc_default;
L
Linus Torvalds 已提交
1183 1184

		qdisc_destroy(qdisc);
1185
	}
1186 1187 1188 1189
}

void dev_shutdown(struct net_device *dev)
{
1190
	netdev_for_each_tx_queue(dev, shutdown_scheduler_queue, &noop_qdisc);
1191 1192
	if (dev_ingress_queue(dev))
		shutdown_scheduler_queue(dev, dev_ingress_queue(dev), &noop_qdisc);
1193 1194 1195
	qdisc_destroy(dev->qdisc);
	dev->qdisc = &noop_qdisc;

1196
	WARN_ON(timer_pending(&dev->watchdog_timer));
L
Linus Torvalds 已提交
1197
}
1198

1199
void psched_ratecfg_precompute(struct psched_ratecfg *r,
1200 1201
			       const struct tc_ratespec *conf,
			       u64 rate64)
1202
{
1203 1204
	memset(r, 0, sizeof(*r));
	r->overhead = conf->overhead;
1205
	r->rate_bytes_ps = max_t(u64, conf->rate, rate64);
1206
	r->linklayer = (conf->linklayer & TC_LINKLAYER_MASK);
1207 1208
	r->mult = 1;
	/*
1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219
	 * The deal here is to replace a divide by a reciprocal one
	 * in fast path (a reciprocal divide is a multiply and a shift)
	 *
	 * Normal formula would be :
	 *  time_in_ns = (NSEC_PER_SEC * len) / rate_bps
	 *
	 * We compute mult/shift to use instead :
	 *  time_in_ns = (len * mult) >> shift;
	 *
	 * We try to get the highest possible mult value for accuracy,
	 * but have to make sure no overflows will ever happen.
1220
	 */
1221 1222 1223 1224 1225 1226
	if (r->rate_bytes_ps > 0) {
		u64 factor = NSEC_PER_SEC;

		for (;;) {
			r->mult = div64_u64(factor, r->rate_bytes_ps);
			if (r->mult & (1U << 31) || factor & (1ULL << 63))
1227
				break;
1228 1229
			factor <<= 1;
			r->shift++;
1230 1231 1232 1233
		}
	}
}
EXPORT_SYMBOL(psched_ratecfg_precompute);
1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279

static void mini_qdisc_rcu_func(struct rcu_head *head)
{
}

void mini_qdisc_pair_swap(struct mini_Qdisc_pair *miniqp,
			  struct tcf_proto *tp_head)
{
	struct mini_Qdisc *miniq_old = rtnl_dereference(*miniqp->p_miniq);
	struct mini_Qdisc *miniq;

	if (!tp_head) {
		RCU_INIT_POINTER(*miniqp->p_miniq, NULL);
		return;
	}

	miniq = !miniq_old || miniq_old == &miniqp->miniq2 ?
		&miniqp->miniq1 : &miniqp->miniq2;

	/* We need to make sure that readers won't see the miniq
	 * we are about to modify. So wait until previous call_rcu_bh callback
	 * is done.
	 */
	rcu_barrier_bh();
	miniq->filter_list = tp_head;
	rcu_assign_pointer(*miniqp->p_miniq, miniq);

	if (miniq_old)
		/* This is counterpart of the rcu barrier above. We need to
		 * block potential new user of miniq_old until all readers
		 * are not seeing it.
		 */
		call_rcu_bh(&miniq_old->rcu, mini_qdisc_rcu_func);
}
EXPORT_SYMBOL(mini_qdisc_pair_swap);

void mini_qdisc_pair_init(struct mini_Qdisc_pair *miniqp, struct Qdisc *qdisc,
			  struct mini_Qdisc __rcu **p_miniq)
{
	miniqp->miniq1.cpu_bstats = qdisc->cpu_bstats;
	miniqp->miniq1.cpu_qstats = qdisc->cpu_qstats;
	miniqp->miniq2.cpu_bstats = qdisc->cpu_bstats;
	miniqp->miniq2.cpu_qstats = qdisc->cpu_qstats;
	miniqp->p_miniq = p_miniq;
}
EXPORT_SYMBOL(mini_qdisc_pair_init);