From c9f1d0389b962521af1e2b699c8ee5e299d77b85 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:13 +0000 Subject: net_sched: fix class grafting errno codes If the parent qdisc doesn't support classes, use EOPNOTSUPP. If the parent class doesn't exist, use ENOENT. Currently EINVAL is returned in both cases. Additionally check whether grafting is supported and remove a now unnecessary graft function from sch_ingress. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 24d17ce9c294..bef2d645a366 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -728,14 +728,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, } else { const struct Qdisc_class_ops *cops = parent->ops->cl_ops; - err = -EINVAL; - - if (cops) { + err = -EOPNOTSUPP; + if (cops && cops->graft) { unsigned long cl = cops->get(parent, classid); if (cl) { err = cops->graft(parent, cl, new, &old); cops->put(parent, cl); - } + } else + err = -ENOENT; } if (!err) notify_and_destroy(skb, n, classid, old, new); -- cgit v1.2.3 From de6d5cdf881353f83006d5f3e28ac4fffd42145e Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:16 +0000 Subject: net_sched: make cls_ops->change and cls_ops->delete optional Some schedulers don't support creating, changing or deleting classes. Make the respective callbacks optionally and consistently return -EOPNOTSUPP for unsupported operations, instead of currently either -EOPNOTSUPP, -ENOSYS or no error. In case of sch_prio and sch_multiq, the removed operations additionally checked for an invalid class. This is not necessary since the class argument can only orginate from ->get() or in case of ->change is 0 for creation of new classes, in which case ->change() incorrectly returned -ENOENT. As a side-effect, this patch fixes a possible (root-only) NULL pointer function call in sch_ingress, which didn't implement a so far mandatory ->delete() operation. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index bef2d645a366..166fcca86e7a 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1417,7 +1417,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) goto out; break; case RTM_DELTCLASS: - err = cops->delete(q, cl); + err = -EOPNOTSUPP; + if (cops->delete) + err = cops->delete(q, cl); if (err == 0) tclass_notify(skb, n, q, cl, RTM_DELTCLASS); goto out; @@ -1431,7 +1433,9 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } new_cl = cl; - err = cops->change(q, clid, pid, tca, &new_cl); + err = -EOPNOTSUPP; + if (cops->change) + err = cops->change(q, clid, pid, tca, &new_cl); if (err == 0) tclass_notify(skb, n, q, new_cl, RTM_NEWTCLASS); -- cgit v1.2.3 From af356afa010f3cd2c8b8fcc3bce90f7a7b7ec02a Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:18 +0000 Subject: net_sched: reintroduce dev->qdisc for use by sch_api Currently the multiqueue integration with the qdisc API suffers from a few problems: - with multiple queues, all root qdiscs use the same handle. This means they can't be exposed to userspace in a backwards compatible fashion. - all API operations always refer to queue number 0. Newly created qdiscs are automatically shared between all queues, its not possible to address individual queues or restore multiqueue behaviour once a shared qdisc has been attached. - Dumps only contain the root qdisc of queue 0, in case of non-shared qdiscs this means the statistics are incomplete. This patch reintroduces dev->qdisc, which points to the (single) root qdisc from userspace's point of view. Currently it either points to the first (non-shared) default qdisc, or a qdisc shared between all queues. The following patches will introduce a classful dummy qdisc, which will be used as root qdisc and contain the per-queue qdiscs as children. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 41 ++++++++++++++++------------------------- 1 file changed, 16 insertions(+), 25 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 166fcca86e7a..8aa9a0c5a9eb 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -207,7 +207,7 @@ static struct Qdisc *qdisc_match_from_root(struct Qdisc *root, u32 handle) static void qdisc_list_add(struct Qdisc *q) { if ((q->parent != TC_H_ROOT) && !(q->flags & TCQ_F_INGRESS)) - list_add_tail(&q->list, &qdisc_root_sleeping(q)->list); + list_add_tail(&q->list, &qdisc_dev(q)->qdisc->list); } void qdisc_list_del(struct Qdisc *q) @@ -219,17 +219,11 @@ EXPORT_SYMBOL(qdisc_list_del); struct Qdisc *qdisc_lookup(struct net_device *dev, u32 handle) { - unsigned int i; struct Qdisc *q; - for (i = 0; i < dev->num_tx_queues; i++) { - struct netdev_queue *txq = netdev_get_tx_queue(dev, i); - struct Qdisc *txq_root = txq->qdisc_sleeping; - - q = qdisc_match_from_root(txq_root, handle); - if (q) - goto out; - } + q = qdisc_match_from_root(dev->qdisc, handle); + if (q) + goto out; q = qdisc_match_from_root(dev->rx_queue.qdisc_sleeping, handle); out: @@ -720,9 +714,14 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (new && i > 0) atomic_inc(&new->refcnt); - notify_and_destroy(skb, n, classid, old, new); + qdisc_destroy(old); } + notify_and_destroy(skb, n, classid, dev->qdisc, new); + if (new) + atomic_inc(&new->refcnt); + dev->qdisc = new ? : &noop_qdisc; + if (dev->flags & IFF_UP) dev_activate(dev); } else { @@ -974,9 +973,7 @@ static int tc_get_qdisc(struct sk_buff *skb, struct nlmsghdr *n, void *arg) q = dev->rx_queue.qdisc_sleeping; } } else { - struct netdev_queue *dev_queue; - dev_queue = netdev_get_tx_queue(dev, 0); - q = dev_queue->qdisc_sleeping; + q = dev->qdisc; } if (!q) return -ENOENT; @@ -1044,9 +1041,7 @@ replay: q = dev->rx_queue.qdisc_sleeping; } } else { - struct netdev_queue *dev_queue; - dev_queue = netdev_get_tx_queue(dev, 0); - q = dev_queue->qdisc_sleeping; + q = dev->qdisc; } /* It may be default qdisc, ignore it */ @@ -1291,8 +1286,7 @@ static int tc_dump_qdisc(struct sk_buff *skb, struct netlink_callback *cb) s_q_idx = 0; q_idx = 0; - dev_queue = netdev_get_tx_queue(dev, 0); - if (tc_dump_qdisc_root(dev_queue->qdisc_sleeping, skb, cb, &q_idx, s_q_idx) < 0) + if (tc_dump_qdisc_root(dev->qdisc, skb, cb, &q_idx, s_q_idx) < 0) goto done; dev_queue = &dev->rx_queue; @@ -1323,7 +1317,6 @@ done: static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); - struct netdev_queue *dev_queue; struct tcmsg *tcm = NLMSG_DATA(n); struct nlattr *tca[TCA_MAX + 1]; struct net_device *dev; @@ -1361,7 +1354,6 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ - dev_queue = netdev_get_tx_queue(dev, 0); if (pid != TC_H_ROOT) { u32 qid1 = TC_H_MAJ(pid); @@ -1372,7 +1364,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) } else if (qid1) { qid = qid1; } else if (qid == 0) - qid = dev_queue->qdisc_sleeping->handle; + qid = dev->qdisc->handle; /* Now qid is genuine qdisc handle consistent both with parent and child. @@ -1383,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) pid = TC_H_MAKE(qid, pid); } else { if (qid == 0) - qid = dev_queue->qdisc_sleeping->handle; + qid = dev->qdisc->handle; } /* OK. Locate qdisc */ @@ -1588,8 +1580,7 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb) s_t = cb->args[0]; t = 0; - dev_queue = netdev_get_tx_queue(dev, 0); - if (tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb, &t, s_t) < 0) + if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0) goto done; dev_queue = &dev->rx_queue; -- cgit v1.2.3 From 589983cd21f4a2e4ed74a958805a90fa676845c5 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Fri, 4 Sep 2009 06:41:20 +0000 Subject: net_sched: move dev_graft_qdisc() to sch_generic.c It will be used in a following patch by the multiqueue qdisc. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 8aa9a0c5a9eb..d71f12be6e29 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -610,32 +610,6 @@ static u32 qdisc_alloc_handle(struct net_device *dev) return i>0 ? autohandle : 0; } -/* Attach toplevel qdisc to device queue. */ - -static struct Qdisc *dev_graft_qdisc(struct netdev_queue *dev_queue, - struct Qdisc *qdisc) -{ - struct Qdisc *oqdisc = dev_queue->qdisc_sleeping; - spinlock_t *root_lock; - - root_lock = qdisc_lock(oqdisc); - spin_lock_bh(root_lock); - - /* Prune old scheduler */ - if (oqdisc && atomic_read(&oqdisc->refcnt) <= 1) - qdisc_reset(oqdisc); - - /* ... and graft new one */ - if (qdisc == NULL) - qdisc = &noop_qdisc; - dev_queue->qdisc_sleeping = qdisc; - rcu_assign_pointer(dev_queue->qdisc, &noop_qdisc); - - spin_unlock_bh(root_lock); - - return oqdisc; -} - void qdisc_tree_decrease_qlen(struct Qdisc *sch, unsigned int n) { const struct Qdisc_class_ops *cops; -- cgit v1.2.3 From 6ec1c69a8f6492fd25722f4762721921da074c12 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 6 Sep 2009 01:58:51 -0700 Subject: net_sched: add classful multiqueue dummy scheduler This patch adds a classful dummy scheduler which can be used as root qdisc for multiqueue devices and exposes each device queue as a child class. This allows to address queues individually and graft them similar to regular classes. Additionally it presents an accumulated view of the statistics of all real root qdiscs in the dummy root. Two new callbacks are added to the qdisc_ops and qdisc_class_ops: - cl_ops->select_queue selects the tx queue number for new child classes. - qdisc_ops->attach() overrides root qdisc device grafting to attach non-shared qdiscs to the queues. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index d71f12be6e29..2a78d5410154 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -678,6 +678,11 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, if (dev->flags & IFF_UP) dev_deactivate(dev); + if (new && new->ops->attach) { + new->ops->attach(new); + num_q = 0; + } + for (i = 0; i < num_q; i++) { struct netdev_queue *dev_queue = &dev->rx_queue; @@ -692,7 +697,7 @@ static int qdisc_graft(struct net_device *dev, struct Qdisc *parent, } notify_and_destroy(skb, n, classid, dev->qdisc, new); - if (new) + if (new && !new->ops->attach) atomic_inc(&new->refcnt); dev->qdisc = new ? : &noop_qdisc; @@ -1095,10 +1100,16 @@ create_n_graft: q = qdisc_create(dev, &dev->rx_queue, tcm->tcm_parent, tcm->tcm_parent, tca, &err); - else - q = qdisc_create(dev, netdev_get_tx_queue(dev, 0), + else { + unsigned int ntx = 0; + + if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) + ntx = p->ops->cl_ops->select_queue(p, tcm); + + q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), tcm->tcm_parent, tcm->tcm_handle, tca, &err); + } if (q == NULL) { if (err == -EAGAIN) goto replay; @@ -1674,6 +1685,7 @@ static int __init pktsched_init(void) { register_qdisc(&pfifo_qdisc_ops); register_qdisc(&bfifo_qdisc_ops); + register_qdisc(&mq_qdisc_ops); proc_net_fops_create(&init_net, "psched", 0, &psched_fops); rtnl_register(PF_UNSPEC, RTM_NEWQDISC, tc_modify_qdisc, NULL); -- cgit v1.2.3 From 23bcf634c8bc0d84607a5b863333191d58baee4c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 9 Sep 2009 18:11:23 -0700 Subject: net_sched: fix estimator lock selection for mq child qdiscs When new child qdiscs are attached to the mq qdisc, they are actually attached as root qdiscs to the device queues. The lock selection for new estimators incorrectly picks the root lock of the existing and to be replaced qdisc, which results in a use-after-free once the old qdisc has been destroyed. Mark mq qdisc instances with a new flag and treat qdiscs attached to mq as children similar to regular root qdiscs. Additionally prevent estimators from being attached to the mq qdisc itself since it only updates its byte and packet counters during dumps. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- net/sched/sch_api.c | 42 ++++++++++++++++++++++++++---------------- 1 file changed, 26 insertions(+), 16 deletions(-) (limited to 'net/sched/sch_api.c') diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 2a78d5410154..3af106140f35 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -733,7 +733,8 @@ static struct lock_class_key qdisc_rx_lock; static struct Qdisc * qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, - u32 parent, u32 handle, struct nlattr **tca, int *errp) + struct Qdisc *p, u32 parent, u32 handle, + struct nlattr **tca, int *errp) { int err; struct nlattr *kind = tca[TCA_KIND]; @@ -810,24 +811,21 @@ qdisc_create(struct net_device *dev, struct netdev_queue *dev_queue, if (tca[TCA_RATE]) { spinlock_t *root_lock; + err = -EOPNOTSUPP; + if (sch->flags & TCQ_F_MQROOT) + goto err_out4; + if ((sch->parent != TC_H_ROOT) && - !(sch->flags & TCQ_F_INGRESS)) + !(sch->flags & TCQ_F_INGRESS) && + (!p || !(p->flags & TCQ_F_MQROOT))) root_lock = qdisc_root_sleeping_lock(sch); else root_lock = qdisc_lock(sch); err = gen_new_estimator(&sch->bstats, &sch->rate_est, root_lock, tca[TCA_RATE]); - if (err) { - /* - * Any broken qdiscs that would require - * a ops->reset() here? The qdisc was never - * in action so it shouldn't be necessary. - */ - if (ops->destroy) - ops->destroy(sch); - goto err_out3; - } + if (err) + goto err_out4; } qdisc_list_add(sch); @@ -843,6 +841,15 @@ err_out2: err_out: *errp = err; return NULL; + +err_out4: + /* + * Any broken qdiscs that would require a ops->reset() here? + * The qdisc was never in action so it shouldn't be necessary. + */ + if (ops->destroy) + ops->destroy(sch); + goto err_out3; } static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) @@ -867,13 +874,16 @@ static int qdisc_change(struct Qdisc *sch, struct nlattr **tca) qdisc_put_stab(sch->stab); sch->stab = stab; - if (tca[TCA_RATE]) + if (tca[TCA_RATE]) { /* NB: ignores errors from replace_estimator because change can't be undone. */ + if (sch->flags & TCQ_F_MQROOT) + goto out; gen_replace_estimator(&sch->bstats, &sch->rate_est, qdisc_root_sleeping_lock(sch), tca[TCA_RATE]); - + } +out: return 0; } @@ -1097,7 +1107,7 @@ create_n_graft: if (!(n->nlmsg_flags&NLM_F_CREATE)) return -ENOENT; if (clid == TC_H_INGRESS) - q = qdisc_create(dev, &dev->rx_queue, + q = qdisc_create(dev, &dev->rx_queue, p, tcm->tcm_parent, tcm->tcm_parent, tca, &err); else { @@ -1106,7 +1116,7 @@ create_n_graft: if (p && p->ops->cl_ops && p->ops->cl_ops->select_queue) ntx = p->ops->cl_ops->select_queue(p, tcm); - q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), + q = qdisc_create(dev, netdev_get_tx_queue(dev, ntx), p, tcm->tcm_parent, tcm->tcm_handle, tca, &err); } -- cgit v1.2.3