提交 0f9f32ac 编写于 作者: S Stephen Hemminger 提交者: David S. Miller

[PKT_SCHED] netem: use only inner qdisc -- no private skbuff queue

Netem works better if there if packets are just queued in the inner discipline
rather than having a separate delayed queue. Change to use the dequeue/requeue
to peek like TBF does.

By doing this potential qlen problems with the old method are avoided. The problems
happened when the netem_run that moved packets from the inner discipline to the nested
discipline failed (because inner queue was full). This happened in dequeue, so the
effective qlen of the netem would be decreased (because of the drop), but there was
no way to keep the outer qdisc (caller of netem dequeue) in sync.

The problem window is still there since this patch doesn't address the issue of
requeue failing in netem_dequeue, but that shouldn't happen since the sequence dequeue/requeue
should always work.  Long term correct fix is to implement qdisc->peek in all the qdisc's
to allow for this (needed by several other qdisc's as well).
Signed-off-by: NStephen Hemminger <shemminger@osdl.org>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 0afb51e7
...@@ -53,7 +53,6 @@ ...@@ -53,7 +53,6 @@
struct netem_sched_data { struct netem_sched_data {
struct Qdisc *qdisc; struct Qdisc *qdisc;
struct sk_buff_head delayed;
struct timer_list timer; struct timer_list timer;
u32 latency; u32 latency;
...@@ -137,72 +136,6 @@ static long tabledist(unsigned long mu, long sigma, ...@@ -137,72 +136,6 @@ static long tabledist(unsigned long mu, long sigma,
return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu; return x / NETEM_DIST_SCALE + (sigma / NETEM_DIST_SCALE) * t + mu;
} }
/* Put skb in the private delayed queue. */
static int netem_delay(struct Qdisc *sch, struct sk_buff *skb)
{
struct netem_sched_data *q = qdisc_priv(sch);
psched_tdiff_t td;
psched_time_t now;
PSCHED_GET_TIME(now);
td = tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist);
/* Always queue at tail to keep packets in order */
if (likely(q->delayed.qlen < q->limit)) {
struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
PSCHED_TADD2(now, td, cb->time_to_send);
pr_debug("netem_delay: skb=%p now=%llu tosend=%llu\n", skb,
now, cb->time_to_send);
__skb_queue_tail(&q->delayed, skb);
return NET_XMIT_SUCCESS;
}
pr_debug("netem_delay: queue over limit %d\n", q->limit);
sch->qstats.overlimits++;
kfree_skb(skb);
return NET_XMIT_DROP;
}
/*
* Move a packet that is ready to send from the delay holding
* list to the underlying qdisc.
*/
static int netem_run(struct Qdisc *sch)
{
struct netem_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb;
psched_time_t now;
PSCHED_GET_TIME(now);
skb = skb_peek(&q->delayed);
if (skb) {
const struct netem_skb_cb *cb
= (const struct netem_skb_cb *)skb->cb;
long delay
= PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
/* if more time remaining? */
if (delay > 0) {
mod_timer(&q->timer, jiffies + delay);
return 1;
}
__skb_unlink(skb, &q->delayed);
if (q->qdisc->enqueue(skb, q->qdisc)) {
sch->q.qlen--;
sch->qstats.drops++;
}
}
return 0;
}
/* /*
* Insert one skb into qdisc. * Insert one skb into qdisc.
* Note: parent depends on return value to account for queue length. * Note: parent depends on return value to account for queue length.
...@@ -212,6 +145,7 @@ static int netem_run(struct Qdisc *sch) ...@@ -212,6 +145,7 @@ static int netem_run(struct Qdisc *sch)
static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
{ {
struct netem_sched_data *q = qdisc_priv(sch); struct netem_sched_data *q = qdisc_priv(sch);
struct netem_skb_cb *cb = (struct netem_skb_cb *)skb->cb;
struct sk_buff *skb2; struct sk_buff *skb2;
int ret; int ret;
int count = 1; int count = 1;
...@@ -246,18 +180,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch) ...@@ -246,18 +180,24 @@ static int netem_enqueue(struct sk_buff *skb, struct Qdisc *sch)
q->duplicate = dupsave; q->duplicate = dupsave;
} }
/* If doing simple delay then gap == 0 so all packets /*
* go into the delayed holding queue * Do re-ordering by putting one out of N packets at the front
* otherwise if doing out of order only "1 out of gap" * of the queue.
* packets will be delayed. * gap == 0 is special case for no-reordering.
*/ */
if (q->counter < q->gap) { if (q->gap == 0 || q->counter != q->gap) {
psched_time_t now;
PSCHED_GET_TIME(now);
PSCHED_TADD2(now,
tabledist(q->latency, q->jitter, &q->delay_cor, q->delay_dist),
cb->time_to_send);
++q->counter; ++q->counter;
ret = q->qdisc->enqueue(skb, q->qdisc); ret = q->qdisc->enqueue(skb, q->qdisc);
} else { } else {
q->counter = 0; q->counter = 0;
ret = netem_delay(sch, skb); PSCHED_GET_TIME(cb->time_to_send);
netem_run(sch); ret = q->qdisc->ops->requeue(skb, q->qdisc);
} }
if (likely(ret == NET_XMIT_SUCCESS)) { if (likely(ret == NET_XMIT_SUCCESS)) {
...@@ -301,22 +241,33 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch) ...@@ -301,22 +241,33 @@ static struct sk_buff *netem_dequeue(struct Qdisc *sch)
{ {
struct netem_sched_data *q = qdisc_priv(sch); struct netem_sched_data *q = qdisc_priv(sch);
struct sk_buff *skb; struct sk_buff *skb;
int pending;
pending = netem_run(sch);
skb = q->qdisc->dequeue(q->qdisc); skb = q->qdisc->dequeue(q->qdisc);
if (skb) { if (skb) {
const struct netem_skb_cb *cb
= (const struct netem_skb_cb *)skb->cb;
psched_time_t now;
long delay;
/* if more time remaining? */
PSCHED_GET_TIME(now);
delay = PSCHED_US2JIFFIE(PSCHED_TDIFF(cb->time_to_send, now));
pr_debug("netem_run: skb=%p delay=%ld\n", skb, delay);
if (delay <= 0) {
pr_debug("netem_dequeue: return skb=%p\n", skb); pr_debug("netem_dequeue: return skb=%p\n", skb);
sch->q.qlen--; sch->q.qlen--;
sch->flags &= ~TCQ_F_THROTTLED; sch->flags &= ~TCQ_F_THROTTLED;
return skb;
} }
else if (pending) {
pr_debug("netem_dequeue: throttling\n"); mod_timer(&q->timer, jiffies + delay);
sch->flags |= TCQ_F_THROTTLED; sch->flags |= TCQ_F_THROTTLED;
if (q->qdisc->ops->requeue(skb, q->qdisc) != 0)
sch->qstats.drops++;
} }
return skb; return NULL;
} }
static void netem_watchdog(unsigned long arg) static void netem_watchdog(unsigned long arg)
...@@ -333,8 +284,6 @@ static void netem_reset(struct Qdisc *sch) ...@@ -333,8 +284,6 @@ static void netem_reset(struct Qdisc *sch)
struct netem_sched_data *q = qdisc_priv(sch); struct netem_sched_data *q = qdisc_priv(sch);
qdisc_reset(q->qdisc); qdisc_reset(q->qdisc);
skb_queue_purge(&q->delayed);
sch->q.qlen = 0; sch->q.qlen = 0;
sch->flags &= ~TCQ_F_THROTTLED; sch->flags &= ~TCQ_F_THROTTLED;
del_timer_sync(&q->timer); del_timer_sync(&q->timer);
...@@ -460,7 +409,6 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) ...@@ -460,7 +409,6 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt)
if (!opt) if (!opt)
return -EINVAL; return -EINVAL;
skb_queue_head_init(&q->delayed);
init_timer(&q->timer); init_timer(&q->timer);
q->timer.function = netem_watchdog; q->timer.function = netem_watchdog;
q->timer.data = (unsigned long) sch; q->timer.data = (unsigned long) sch;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册