From 300ce174ebc2fcf2b5111a50fa42f79d891927dd Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 30 Oct 2005 13:47:34 -0800 Subject: [NETEM]: Support time based reordering Change netem to support packets getting reordered because of variations in delay. Introduce a special case version of FIFO that queues packets in order based on the netem delay. Since netem is classful, those users that don't want jitter based reordering can just insert a pfifo instead of the default. This required changes to generic skbuff code to allow finer grain manipulation of sk_buff_head. Insertion into the middle and reverse walk. Signed-off-by: Stephen Hemminger Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/skbuff.h | 38 +++++++++++++++++----- net/sched/sch_netem.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 114 insertions(+), 9 deletions(-) diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4286d832166..fdfb8fe8c38 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -603,29 +603,46 @@ static inline void skb_queue_head_init(struct sk_buff_head *list) */ /** - * __skb_queue_head - queue a buffer at the list head + * __skb_queue_after - queue a buffer at the list head * @list: list to use + * @prev: place after this buffer * @newsk: buffer to queue * - * Queue a buffer at the start of a list. This function takes no locks + * Queue a buffer int the middle of a list. This function takes no locks * and you must therefore hold required locks before calling it. * * A buffer cannot be placed on two lists at the same time. */ -extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); -static inline void __skb_queue_head(struct sk_buff_head *list, - struct sk_buff *newsk) +static inline void __skb_queue_after(struct sk_buff_head *list, + struct sk_buff *prev, + struct sk_buff *newsk) { - struct sk_buff *prev, *next; - + struct sk_buff *next; list->qlen++; - prev = (struct sk_buff *)list; + next = prev->next; newsk->next = next; newsk->prev = prev; next->prev = prev->next = newsk; } +/** + * __skb_queue_head - queue a buffer at the list head + * @list: list to use + * @newsk: buffer to queue + * + * Queue a buffer at the start of a list. This function takes no locks + * and you must therefore hold required locks before calling it. + * + * A buffer cannot be placed on two lists at the same time. + */ +extern void skb_queue_head(struct sk_buff_head *list, struct sk_buff *newsk); +static inline void __skb_queue_head(struct sk_buff_head *list, + struct sk_buff *newsk) +{ + __skb_queue_after(list, (struct sk_buff *)list, newsk); +} + /** * __skb_queue_tail - queue a buffer at the list tail * @list: list to use @@ -1203,6 +1220,11 @@ static inline void kunmap_skb_frag(void *vaddr) prefetch(skb->next), (skb != (struct sk_buff *)(queue)); \ skb = skb->next) +#define skb_queue_reverse_walk(queue, skb) \ + for (skb = (queue)->prev; \ + prefetch(skb->prev), (skb != (struct sk_buff *)(queue)); \ + skb = skb->prev) + extern struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock, int *err); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index d871fe7f81a..7c10ef3457d 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -300,11 +300,16 @@ static void netem_reset(struct Qdisc *sch) del_timer_sync(&q->timer); } +/* Pass size change message down to embedded FIFO */ static int set_fifo_limit(struct Qdisc *q, int limit) { struct rtattr *rta; int ret = -ENOMEM; + /* Hack to avoid sending change message to non-FIFO */ + if (strncmp(q->ops->id + 1, "fifo", 4) != 0) + return 0; + rta = kmalloc(RTA_LENGTH(sizeof(struct tc_fifo_qopt)), GFP_KERNEL); if (rta) { rta->rta_type = RTM_NEWQDISC; @@ -436,6 +441,84 @@ static int netem_change(struct Qdisc *sch, struct rtattr *opt) return 0; } +/* + * Special case version of FIFO queue for use by netem. + * It queues in order based on timestamps in skb's + */ +struct fifo_sched_data { + u32 limit; +}; + +static int tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch) +{ + struct fifo_sched_data *q = qdisc_priv(sch); + struct sk_buff_head *list = &sch->q; + const struct netem_skb_cb *ncb + = (const struct netem_skb_cb *)nskb->cb; + struct sk_buff *skb; + + if (likely(skb_queue_len(list) < q->limit)) { + skb_queue_reverse_walk(list, skb) { + const struct netem_skb_cb *cb + = (const struct netem_skb_cb *)skb->cb; + + if (PSCHED_TLESS(cb->time_to_send, ncb->time_to_send)) + break; + } + + __skb_queue_after(list, skb, nskb); + + sch->qstats.backlog += nskb->len; + sch->bstats.bytes += nskb->len; + sch->bstats.packets++; + + return NET_XMIT_SUCCESS; + } + + return qdisc_drop(nskb, sch); +} + +static int tfifo_init(struct Qdisc *sch, struct rtattr *opt) +{ + struct fifo_sched_data *q = qdisc_priv(sch); + + if (opt) { + struct tc_fifo_qopt *ctl = RTA_DATA(opt); + if (RTA_PAYLOAD(opt) < sizeof(*ctl)) + return -EINVAL; + + q->limit = ctl->limit; + } else + q->limit = max_t(u32, sch->dev->tx_queue_len, 1); + + return 0; +} + +static int tfifo_dump(struct Qdisc *sch, struct sk_buff *skb) +{ + struct fifo_sched_data *q = qdisc_priv(sch); + struct tc_fifo_qopt opt = { .limit = q->limit }; + + RTA_PUT(skb, TCA_OPTIONS, sizeof(opt), &opt); + return skb->len; + +rtattr_failure: + return -1; +} + +static struct Qdisc_ops tfifo_qdisc_ops = { + .id = "tfifo", + .priv_size = sizeof(struct fifo_sched_data), + .enqueue = tfifo_enqueue, + .dequeue = qdisc_dequeue_head, + .requeue = qdisc_requeue, + .drop = qdisc_queue_drop, + .init = tfifo_init, + .reset = qdisc_reset_queue, + .change = tfifo_init, + .dump = tfifo_dump, +}; + static int netem_init(struct Qdisc *sch, struct rtattr *opt) { struct netem_sched_data *q = qdisc_priv(sch); @@ -448,7 +531,7 @@ static int netem_init(struct Qdisc *sch, struct rtattr *opt) q->timer.function = netem_watchdog; q->timer.data = (unsigned long) sch; - q->qdisc = qdisc_create_dflt(sch->dev, &pfifo_qdisc_ops); + q->qdisc = qdisc_create_dflt(sch->dev, &tfifo_qdisc_ops); if (!q->qdisc) { pr_debug("netem: qdisc create failed\n"); return -ENOMEM; -- cgit v1.2.3