From 6ecf23afab13c39d3bb0e2d826d0984b0dd53733 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:14:59 -0800 Subject: block: extend queue bypassing to cover blkcg policies Extend queue bypassing such that dying queue is always bypassing and blk-throttle is drained on bypass. With blkcg policies updated to test blk_queue_bypass() instead of blk_queue_dead(), this ensures that no bio or request is held by or going through blkcg policies on a bypassing queue. This will be used to implement blkg cleanup on elevator switches and policy changes. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 5eed6a76721..702c0e64e09 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -310,7 +310,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) struct request_queue *q = td->queue; /* no throttling for dead queue */ - if (unlikely(blk_queue_dead(q))) + if (unlikely(blk_queue_bypass(q))) return NULL; rcu_read_lock(); @@ -335,7 +335,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) spin_lock_irq(q->queue_lock); /* Make sure @q is still alive */ - if (unlikely(blk_queue_dead(q))) { + if (unlikely(blk_queue_bypass(q))) { kfree(tg); return NULL; } -- cgit v1.2.3 From 72e06c255181537d0b3e1f657a9ed81655d745b1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:00 -0800 Subject: blkcg: shoot down blkio_groups on elevator switch Elevator switch may involve changes to blkcg policies. Implement shoot down of blkio_groups. Combined with the previous bypass updates, the end goal is updating blkcg core such that it can ensure that blkcg's being affected become quiescent and don't have any per-blkg data hanging around before commencing any policy updates. Until queues are made aware of the policies that applies to them, as an interim step, all per-policy blkg data will be shot down. * blk-throtl doesn't need this change as it can't be disabled for a live queue; however, update it anyway as the scheduled blkg unification requires this behavior change. This means that blk-throtl configuration will be unnecessarily lost over elevator switch. This oddity will be removed after blkcg learns to associate individual policies with request_queues. * blk-throtl dosen't shoot down root_tg. This is to ease transition. Unified blkg will always have persistent root group and not shooting down root_tg for now eases transition to that point by avoiding having to update td->root_tg and is safe as blk-throtl can never be disabled -v2: Vivek pointed out that group list is not guaranteed to be empty on return from clear function if it raced cgroup removal and lost. Fix it by waiting a bit and retrying. This kludge will soon be removed once locking is updated such that blkg is never in limbo state between blkcg and request_queue locks. blk-throtl no longer shoots down root_tg to avoid breaking td->root_tg. Also, Nest queue_lock inside blkio_list_lock not the other way around to avoid introduce possible deadlock via blkcg lock. -v3: blkcg_clear_queue() repositioned and renamed to blkg_destroy_all() to increase consistency with later changes. cfq_clear_queue() updated to check q->elevator before dereferencing it to avoid NULL dereference on not fully initialized queues (used by later change). Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 702c0e64e09..3699ab40d49 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -989,12 +989,17 @@ throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg) td->nr_undestroyed_grps--; } -static void throtl_release_tgs(struct throtl_data *td) +static bool throtl_release_tgs(struct throtl_data *td, bool release_root) { struct hlist_node *pos, *n; struct throtl_grp *tg; + bool empty = true; hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { + /* skip root? */ + if (!release_root && tg == td->root_tg) + continue; + /* * If cgroup removal path got to blk_group first and removed * it from cgroup list, then it will take care of destroying @@ -1002,7 +1007,10 @@ static void throtl_release_tgs(struct throtl_data *td) */ if (!blkiocg_del_blkio_group(&tg->blkg)) throtl_destroy_tg(td, tg); + else + empty = false; } + return empty; } /* @@ -1029,6 +1037,20 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg) spin_unlock_irqrestore(td->queue->queue_lock, flags); } +static bool throtl_clear_queue(struct request_queue *q) +{ + lockdep_assert_held(q->queue_lock); + + /* + * Clear tgs but leave the root one alone. This is necessary + * because root_tg is expected to be persistent and safe because + * blk-throtl can never be disabled while @q is alive. This is a + * kludge to prepare for unified blkg. This whole function will be + * removed soon. + */ + return throtl_release_tgs(q->td, false); +} + static void throtl_update_blkio_group_common(struct throtl_data *td, struct throtl_grp *tg) { @@ -1097,6 +1119,7 @@ static void throtl_shutdown_wq(struct request_queue *q) static struct blkio_policy_type blkio_policy_throtl = { .ops = { .blkio_unlink_group_fn = throtl_unlink_blkio_group, + .blkio_clear_queue_fn = throtl_clear_queue, .blkio_update_group_read_bps_fn = throtl_update_blkio_group_read_bps, .blkio_update_group_write_bps_fn = @@ -1282,7 +1305,7 @@ void blk_throtl_exit(struct request_queue *q) throtl_shutdown_wq(q); spin_lock_irq(q->queue_lock); - throtl_release_tgs(td); + throtl_release_tgs(td, true); /* If there are other groups */ if (td->nr_undestroyed_grps > 0) -- cgit v1.2.3 From 2a7f124414b35645049e9c1b125a6f0b470aa5ae Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:01 -0800 Subject: blkcg: move rcu_read_lock() outside of blkio_group get functions rcu_read_lock() in throtl_get_tb() and cfq_get_cfqg() holds onto @blkcg while looking up blkg. For API cleanup, the next patch will make the caller responsible for determining @blkcg to look blkg from and let them specify it as a parameter. Move rcu read locking out to the callers to prepare for the change. -v2: Originally this patch was described as a fix for RCU read locking bug around @blkg, which Vivek pointed out to be incorrect. It was from misunderstanding the role of rcu locking as protecting @blkg not @blkcg. Patch description updated. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 3699ab40d49..9beaac7fb39 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -313,25 +313,23 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) if (unlikely(blk_queue_bypass(q))) return NULL; - rcu_read_lock(); blkcg = task_blkio_cgroup(current); tg = throtl_find_tg(td, blkcg); - if (tg) { - rcu_read_unlock(); + if (tg) return tg; - } /* * Need to allocate a group. Allocation of group also needs allocation * of per cpu stats which in-turn takes a mutex() and can block. Hence * we need to drop rcu lock and queue_lock before we call alloc. */ - rcu_read_unlock(); spin_unlock_irq(q->queue_lock); + rcu_read_unlock(); tg = throtl_alloc_tg(td); /* Group allocated and queue is still alive. take the lock */ + rcu_read_lock(); spin_lock_irq(q->queue_lock); /* Make sure @q is still alive */ @@ -343,7 +341,6 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) /* * Initialize the new group. After sleeping, read the blkcg again. */ - rcu_read_lock(); blkcg = task_blkio_cgroup(current); /* @@ -354,7 +351,6 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) if (__tg) { kfree(tg); - rcu_read_unlock(); return __tg; } @@ -365,7 +361,6 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) } throtl_init_add_tg_lists(td, tg, blkcg); - rcu_read_unlock(); return tg; } @@ -1150,7 +1145,6 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) * basic fields like stats and io rates. If a group has no rules, * just update the dispatch stats in lockless manner and return. */ - rcu_read_lock(); blkcg = task_blkio_cgroup(current); tg = throtl_find_tg(td, blkcg); @@ -1160,11 +1154,9 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) if (tg_no_rule_group(tg, rw)) { blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, rw_is_sync(bio->bi_rw)); - rcu_read_unlock(); - goto out; + goto out_unlock_rcu; } } - rcu_read_unlock(); /* * Either group has not been allocated yet or it is not an unlimited @@ -1222,6 +1214,8 @@ queue_bio: out_unlock: spin_unlock_irq(q->queue_lock); +out_unlock_rcu: + rcu_read_unlock(); out: return throttled; } -- cgit v1.2.3 From 0a5a7d0e32be6643b881f0e7cd9d0d06fadde27a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:02 -0800 Subject: blkcg: update blkg get functions take blkio_cgroup as parameter In both blkg get functions - throtl_get_tg() and cfq_get_cfqg(), instead of obtaining blkcg of %current explicitly, let the caller specify the blkcg to use as parameter and make both functions hold on to the blkcg. This is part of block cgroup interface cleanup and will help making blkcg API more modular. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 9beaac7fb39..c252df9169d 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -303,21 +303,23 @@ throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) return tg; } -static struct throtl_grp * throtl_get_tg(struct throtl_data *td) +static struct throtl_grp *throtl_get_tg(struct throtl_data *td, + struct blkio_cgroup *blkcg) { struct throtl_grp *tg = NULL, *__tg = NULL; - struct blkio_cgroup *blkcg; struct request_queue *q = td->queue; /* no throttling for dead queue */ if (unlikely(blk_queue_bypass(q))) return NULL; - blkcg = task_blkio_cgroup(current); tg = throtl_find_tg(td, blkcg); if (tg) return tg; + if (!css_tryget(&blkcg->css)) + return NULL; + /* * Need to allocate a group. Allocation of group also needs allocation * of per cpu stats which in-turn takes a mutex() and can block. Hence @@ -331,6 +333,7 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) /* Group allocated and queue is still alive. take the lock */ rcu_read_lock(); spin_lock_irq(q->queue_lock); + css_put(&blkcg->css); /* Make sure @q is still alive */ if (unlikely(blk_queue_bypass(q))) { @@ -338,11 +341,6 @@ static struct throtl_grp * throtl_get_tg(struct throtl_data *td) return NULL; } - /* - * Initialize the new group. After sleeping, read the blkcg again. - */ - blkcg = task_blkio_cgroup(current); - /* * If some other thread already allocated the group while we were * not holding queue lock, free up the group @@ -1163,7 +1161,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) * IO group */ spin_lock_irq(q->queue_lock); - tg = throtl_get_tg(td); + tg = throtl_get_tg(td, blkcg); if (unlikely(!tg)) goto out_unlock; -- cgit v1.2.3 From ca32aefc7f2539ed88d42763330d54ee3e61769a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:03 -0800 Subject: blkcg: use q and plid instead of opaque void * for blkio_group association blkgio_group is association between a block cgroup and a queue for a given policy. Using opaque void * for association makes things confusing and hinders factoring of common code. Use request_queue * and, if necessary, policy id instead. This will help block cgroup API cleanup. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 50 +++++++++++++++++++++++--------------------------- 1 file changed, 23 insertions(+), 27 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index c252df9169d..6613de78e36 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -252,7 +252,7 @@ static void throtl_init_add_tg_lists(struct throtl_data *td, __throtl_tg_fill_dev_details(td, tg); /* Add group onto cgroup list */ - blkiocg_add_blkio_group(blkcg, &tg->blkg, (void *)td, + blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue, tg->blkg.dev, BLKIO_POLICY_THROTL); tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); @@ -288,7 +288,6 @@ static struct throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) { struct throtl_grp *tg = NULL; - void *key = td; /* * This is the common case when there are no blkio cgroups. @@ -297,7 +296,8 @@ throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) if (blkcg == &blkio_root_cgroup) tg = td->root_tg; else - tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key)); + tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue, + BLKIO_POLICY_THROTL)); __throtl_tg_fill_dev_details(td, tg); return tg; @@ -1012,22 +1012,22 @@ static bool throtl_release_tgs(struct throtl_data *td, bool release_root) * no new IO will come in this group. So get rid of this group as soon as * any pending IO in the group is finished. * - * This function is called under rcu_read_lock(). key is the rcu protected - * pointer. That means "key" is a valid throtl_data pointer as long as we are - * rcu read lock. + * This function is called under rcu_read_lock(). @q is the rcu protected + * pointer. That means @q is a valid request_queue pointer as long as we + * are rcu read lock. * - * "key" was fetched from blkio_group under blkio_cgroup->lock. That means + * @q was fetched from blkio_group under blkio_cgroup->lock. That means * it should not be NULL as even if queue was going away, cgroup deltion * path got to it first. */ -void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg) +void throtl_unlink_blkio_group(struct request_queue *q, + struct blkio_group *blkg) { unsigned long flags; - struct throtl_data *td = key; - spin_lock_irqsave(td->queue->queue_lock, flags); - throtl_destroy_tg(td, tg_of_blkg(blkg)); - spin_unlock_irqrestore(td->queue->queue_lock, flags); + spin_lock_irqsave(q->queue_lock, flags); + throtl_destroy_tg(q->td, tg_of_blkg(blkg)); + spin_unlock_irqrestore(q->queue_lock, flags); } static bool throtl_clear_queue(struct request_queue *q) @@ -1054,52 +1054,48 @@ static void throtl_update_blkio_group_common(struct throtl_data *td, } /* - * For all update functions, key should be a valid pointer because these + * For all update functions, @q should be a valid pointer because these * update functions are called under blkcg_lock, that means, blkg is - * valid and in turn key is valid. queue exit path can not race because + * valid and in turn @q is valid. queue exit path can not race because * of blkcg_lock * * Can not take queue lock in update functions as queue lock under blkcg_lock * is not allowed. Under other paths we take blkcg_lock under queue_lock. */ -static void throtl_update_blkio_group_read_bps(void *key, +static void throtl_update_blkio_group_read_bps(struct request_queue *q, struct blkio_group *blkg, u64 read_bps) { - struct throtl_data *td = key; struct throtl_grp *tg = tg_of_blkg(blkg); tg->bps[READ] = read_bps; - throtl_update_blkio_group_common(td, tg); + throtl_update_blkio_group_common(q->td, tg); } -static void throtl_update_blkio_group_write_bps(void *key, +static void throtl_update_blkio_group_write_bps(struct request_queue *q, struct blkio_group *blkg, u64 write_bps) { - struct throtl_data *td = key; struct throtl_grp *tg = tg_of_blkg(blkg); tg->bps[WRITE] = write_bps; - throtl_update_blkio_group_common(td, tg); + throtl_update_blkio_group_common(q->td, tg); } -static void throtl_update_blkio_group_read_iops(void *key, +static void throtl_update_blkio_group_read_iops(struct request_queue *q, struct blkio_group *blkg, unsigned int read_iops) { - struct throtl_data *td = key; struct throtl_grp *tg = tg_of_blkg(blkg); tg->iops[READ] = read_iops; - throtl_update_blkio_group_common(td, tg); + throtl_update_blkio_group_common(q->td, tg); } -static void throtl_update_blkio_group_write_iops(void *key, +static void throtl_update_blkio_group_write_iops(struct request_queue *q, struct blkio_group *blkg, unsigned int write_iops) { - struct throtl_data *td = key; struct throtl_grp *tg = tg_of_blkg(blkg); tg->iops[WRITE] = write_iops; - throtl_update_blkio_group_common(td, tg); + throtl_update_blkio_group_common(q->td, tg); } static void throtl_shutdown_wq(struct request_queue *q) @@ -1306,7 +1302,7 @@ void blk_throtl_exit(struct request_queue *q) spin_unlock_irq(q->queue_lock); /* - * Wait for tg->blkg->key accessors to exit their grace periods. + * Wait for tg->blkg->q accessors to exit their grace periods. * Do this wait only if there are other undestroyed groups out * there (other than root group). This can happen if cgroup deletion * path claimed the responsibility of cleaning up a group before -- cgit v1.2.3 From f51b802c17e2a21926b29911493f5e7ddf6eee87 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:05 -0800 Subject: blkcg: use the usual get blkg path for root blkio_group For root blkg, blk_throtl_init() was using throtl_alloc_tg() explicitly and cfq_init_queue() was manually initializing embedded cfqd->root_group, adding unnecessarily different code paths to blkg handling. Make both use the usual blkio_group get functions - throtl_get_tg() and cfq_get_cfqg() - for the root blkio_group too. Note that blk_throtl_init() callsite is pushed downwards in blk_alloc_queue_node() so that @q is sufficiently initialized for throtl_get_tg(). This simplifies root blkg handling noticeably for cfq and will allow further modularization of blkcg API. -v2: Vivek pointed out that using cfq_get_cfqg() won't work if CONFIG_CFQ_GROUP_IOSCHED is disabled. Fix it by factoring out initialization of base part of cfqg into cfq_init_cfqg_base() and alloc/init/free explicitly if !CONFIG_CFQ_GROUP_IOSCHED. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 6613de78e36..aeeb798d1cd 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1252,7 +1252,6 @@ void blk_throtl_drain(struct request_queue *q) int blk_throtl_init(struct request_queue *q) { struct throtl_data *td; - struct throtl_grp *tg; td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); if (!td) @@ -1265,19 +1264,20 @@ int blk_throtl_init(struct request_queue *q) /* alloc and Init root group. */ td->queue = q; - tg = throtl_alloc_tg(td); - if (!tg) { - kfree(td); - return -ENOMEM; - } + rcu_read_lock(); + spin_lock_irq(q->queue_lock); - td->root_tg = tg; + td->root_tg = throtl_get_tg(td, &blkio_root_cgroup); - rcu_read_lock(); - throtl_init_add_tg_lists(td, tg, &blkio_root_cgroup); + spin_unlock_irq(q->queue_lock); rcu_read_unlock(); + if (!td->root_tg) { + kfree(td); + return -ENOMEM; + } + /* Attach throtl data to request queue */ q->td = td; return 0; -- cgit v1.2.3 From cd1604fab4f95f7cfc227d3955fd7ae14da61f38 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:06 -0800 Subject: blkcg: factor out blkio_group creation Currently both blk-throttle and cfq-iosched implement their own blkio_group creation code in throtl_get_tg() and cfq_get_cfqg(). This patch factors out the common code into blkg_lookup_create(), which returns ERR_PTR value so that transitional failures due to queue bypass can be distinguished from other failures. * New plkio_policy_ops methods blkio_alloc_group_fn() and blkio_link_group_fn added. Both are transitional and will be removed once the blkg management code is fully moved into blk-cgroup.c. * blkio_alloc_group_fn() allocates policy-specific blkg which is usually a larger data structure with blkg as the first entry and intiailizes it. Note that initialization of blkg proper, including percpu stats, is responsibility of blk-cgroup proper. Note that default config (weight, bps...) initialization is done from this method; otherwise, we end up violating locking order between blkcg and q locks via blkcg_get_CONF() functions. * blkio_link_group_fn() is called under queue_lock and responsible for linking the blkg to the queue. blkcg side is handled by blk-cgroup proper. * The common blkg creation function is named blkg_lookup_create() and blkiocg_lookup_group() is renamed to blkg_lookup() for consistency. Also, throtl / cfq related functions are similarly [re]named for consistency. This simplifies blkcg policy implementations and enables further cleanup. -v2: Vivek noticed that blkg_lookup_create() incorrectly tested blk_queue_dead() instead of blk_queue_bypass() leading a user of the function ending up creating a new blkg on bypassing queue. This is a bug introduced while relocating bypass patches before this one. Fixed. -v3: ERR_PTR patch folded into this one. @for_root added to blkg_lookup_create() to allow creating root group on a bypassed queue during elevator switch. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 155 +++++++++++++++++---------------------------------- 1 file changed, 52 insertions(+), 103 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index aeeb798d1cd..2ae637b9e80 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -181,17 +181,25 @@ static void throtl_put_tg(struct throtl_grp *tg) call_rcu(&tg->rcu_head, throtl_free_tg); } -static void throtl_init_group(struct throtl_grp *tg) +static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q, + struct blkio_cgroup *blkcg) { + struct throtl_grp *tg; + + tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node); + if (!tg) + return NULL; + INIT_HLIST_NODE(&tg->tg_node); RB_CLEAR_NODE(&tg->rb_node); bio_list_init(&tg->bio_lists[0]); bio_list_init(&tg->bio_lists[1]); tg->limits_changed = false; - /* Practically unlimited BW */ - tg->bps[0] = tg->bps[1] = -1; - tg->iops[0] = tg->iops[1] = -1; + tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); + tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); + tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); + tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); /* * Take the initial reference that will be released on destroy @@ -200,14 +208,8 @@ static void throtl_init_group(struct throtl_grp *tg) * exit or cgroup deletion path depending on who is exiting first. */ atomic_set(&tg->ref, 1); -} -/* Should be called with rcu read lock held (needed for blkcg) */ -static void -throtl_add_group_to_td_list(struct throtl_data *td, struct throtl_grp *tg) -{ - hlist_add_head(&tg->tg_node, &td->tg_list); - td->nr_undestroyed_grps++; + return &tg->blkg; } static void @@ -246,119 +248,62 @@ throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg) spin_unlock_irq(td->queue->queue_lock); } -static void throtl_init_add_tg_lists(struct throtl_data *td, - struct throtl_grp *tg, struct blkio_cgroup *blkcg) +static void throtl_link_blkio_group(struct request_queue *q, + struct blkio_group *blkg) { - __throtl_tg_fill_dev_details(td, tg); - - /* Add group onto cgroup list */ - blkiocg_add_blkio_group(blkcg, &tg->blkg, td->queue, - tg->blkg.dev, BLKIO_POLICY_THROTL); - - tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); - tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); - tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); - tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); - - throtl_add_group_to_td_list(td, tg); -} - -/* Should be called without queue lock and outside of rcu period */ -static struct throtl_grp *throtl_alloc_tg(struct throtl_data *td) -{ - struct throtl_grp *tg = NULL; - int ret; - - tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, td->queue->node); - if (!tg) - return NULL; - - ret = blkio_alloc_blkg_stats(&tg->blkg); + struct throtl_data *td = q->td; + struct throtl_grp *tg = tg_of_blkg(blkg); - if (ret) { - kfree(tg); - return NULL; - } + __throtl_tg_fill_dev_details(td, tg); - throtl_init_group(tg); - return tg; + hlist_add_head(&tg->tg_node, &td->tg_list); + td->nr_undestroyed_grps++; } static struct -throtl_grp *throtl_find_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) +throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) { struct throtl_grp *tg = NULL; /* * This is the common case when there are no blkio cgroups. - * Avoid lookup in this case - */ + * Avoid lookup in this case + */ if (blkcg == &blkio_root_cgroup) tg = td->root_tg; else - tg = tg_of_blkg(blkiocg_lookup_group(blkcg, td->queue, - BLKIO_POLICY_THROTL)); + tg = tg_of_blkg(blkg_lookup(blkcg, td->queue, + BLKIO_POLICY_THROTL)); __throtl_tg_fill_dev_details(td, tg); return tg; } -static struct throtl_grp *throtl_get_tg(struct throtl_data *td, - struct blkio_cgroup *blkcg) +static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, + struct blkio_cgroup *blkcg) { - struct throtl_grp *tg = NULL, *__tg = NULL; struct request_queue *q = td->queue; - - /* no throttling for dead queue */ - if (unlikely(blk_queue_bypass(q))) - return NULL; - - tg = throtl_find_tg(td, blkcg); - if (tg) - return tg; - - if (!css_tryget(&blkcg->css)) - return NULL; - - /* - * Need to allocate a group. Allocation of group also needs allocation - * of per cpu stats which in-turn takes a mutex() and can block. Hence - * we need to drop rcu lock and queue_lock before we call alloc. - */ - spin_unlock_irq(q->queue_lock); - rcu_read_unlock(); - - tg = throtl_alloc_tg(td); - - /* Group allocated and queue is still alive. take the lock */ - rcu_read_lock(); - spin_lock_irq(q->queue_lock); - css_put(&blkcg->css); - - /* Make sure @q is still alive */ - if (unlikely(blk_queue_bypass(q))) { - kfree(tg); - return NULL; - } + struct throtl_grp *tg = NULL; /* - * If some other thread already allocated the group while we were - * not holding queue lock, free up the group + * This is the common case when there are no blkio cgroups. + * Avoid lookup in this case */ - __tg = throtl_find_tg(td, blkcg); + if (blkcg == &blkio_root_cgroup) { + tg = td->root_tg; + } else { + struct blkio_group *blkg; - if (__tg) { - kfree(tg); - return __tg; - } + blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false); - /* Group allocation failed. Account the IO to root group */ - if (!tg) { - tg = td->root_tg; - return tg; + /* if %NULL and @q is alive, fall back to root_tg */ + if (!IS_ERR(blkg)) + tg = tg_of_blkg(blkg); + else if (!blk_queue_dead(q)) + tg = td->root_tg; } - throtl_init_add_tg_lists(td, tg, blkcg); + __throtl_tg_fill_dev_details(td, tg); return tg; } @@ -1107,6 +1052,8 @@ static void throtl_shutdown_wq(struct request_queue *q) static struct blkio_policy_type blkio_policy_throtl = { .ops = { + .blkio_alloc_group_fn = throtl_alloc_blkio_group, + .blkio_link_group_fn = throtl_link_blkio_group, .blkio_unlink_group_fn = throtl_unlink_blkio_group, .blkio_clear_queue_fn = throtl_clear_queue, .blkio_update_group_read_bps_fn = @@ -1141,7 +1088,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) */ rcu_read_lock(); blkcg = task_blkio_cgroup(current); - tg = throtl_find_tg(td, blkcg); + tg = throtl_lookup_tg(td, blkcg); if (tg) { throtl_tg_fill_dev_details(td, tg); @@ -1157,7 +1104,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) * IO group */ spin_lock_irq(q->queue_lock); - tg = throtl_get_tg(td, blkcg); + tg = throtl_lookup_create_tg(td, blkcg); if (unlikely(!tg)) goto out_unlock; @@ -1252,6 +1199,7 @@ void blk_throtl_drain(struct request_queue *q) int blk_throtl_init(struct request_queue *q) { struct throtl_data *td; + struct blkio_group *blkg; td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); if (!td) @@ -1262,13 +1210,17 @@ int blk_throtl_init(struct request_queue *q) td->limits_changed = false; INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); - /* alloc and Init root group. */ + q->td = td; td->queue = q; + /* alloc and init root group. */ rcu_read_lock(); spin_lock_irq(q->queue_lock); - td->root_tg = throtl_get_tg(td, &blkio_root_cgroup); + blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL, + true); + if (!IS_ERR(blkg)) + td->root_tg = tg_of_blkg(blkg); spin_unlock_irq(q->queue_lock); rcu_read_unlock(); @@ -1277,9 +1229,6 @@ int blk_throtl_init(struct request_queue *q) kfree(td); return -ENOMEM; } - - /* Attach throtl data to request queue */ - q->td = td; return 0; } -- cgit v1.2.3 From e56da7e287967667474a58c4f60c286279e3f487 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:07 -0800 Subject: blkcg: don't allow or retain configuration of missing devices blkcg is very peculiar in that it allows setting and remembering configurations for non-existent devices by maintaining separate data structures for configuration. This behavior is completely out of the usual norms and outright confusing; furthermore, it uses dev_t number to match the configuration to devices, which is unpredictable to begin with and becomes completely unuseable if EXT_DEVT is fully used. It is wholely unnecessary - we already have fully functional userland mechanism to program devices being hotplugged which has full access to device identification, connection topology and filesystem information. Add a new struct blkio_group_conf which contains all blkcg configurations to blkio_group and let blkio_group, which can be created iff the associated device exists and is removed when the associated device goes away, carry all configurations. Note that, after this patch, all newly created blkg's will always have the default configuration (unlimited for throttling and blkcg's weight for propio). This patch makes blkio_policy_node meaningless but doesn't remove it. The next patch will. -v2: Updated to retry after short sleep if blkg lookup/creation failed due to the queue being temporarily bypassed as indicated by -EBUSY return. Pointed out by Vivek. Signed-off-by: Tejun Heo Cc: Vivek Goyal Cc: Kay Sievers Signed-off-by: Jens Axboe --- block/blk-throttle.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 2ae637b9e80..791b10719e4 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -196,10 +196,10 @@ static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q, bio_list_init(&tg->bio_lists[1]); tg->limits_changed = false; - tg->bps[READ] = blkcg_get_read_bps(blkcg, tg->blkg.dev); - tg->bps[WRITE] = blkcg_get_write_bps(blkcg, tg->blkg.dev); - tg->iops[READ] = blkcg_get_read_iops(blkcg, tg->blkg.dev); - tg->iops[WRITE] = blkcg_get_write_iops(blkcg, tg->blkg.dev); + tg->bps[READ] = -1; + tg->bps[WRITE] = -1; + tg->iops[READ] = -1; + tg->iops[WRITE] = -1; /* * Take the initial reference that will be released on destroy -- cgit v1.2.3 From 7a4dd281ec66224f802093962d1d903d86b09560 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:09 -0800 Subject: blkcg: kill the mind-bending blkg->dev blkg->dev is dev_t recording the device number of the block device for the associated request_queue. It is used to identify the associated block device when printing out configuration or stats. This is redundant to begin with. A blkg is an association between a cgroup and a request_queue and it of course is possible to reach request_queue from blkg and synchronization conventions are in place for safe q dereferencing, so this shouldn't be necessary from the beginning. Furthermore, it's initialized by sscanf()ing the device name of backing_dev_info. The mind boggles. Anyways, if blkg is visible under rcu lock, we *know* that the associated request_queue hasn't gone away yet and its bdi is registered and alive - blkg can't be created for request_queue which hasn't been fully initialized and it can't go away before blkg is removed. Let stat and conf read functions get device name from blkg->q->backing_dev_info.dev and pass it down to printing functions and remove blkg->dev. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 51 ++------------------------------------------------- 1 file changed, 2 insertions(+), 49 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 791b10719e4..52a429397d3 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -212,50 +212,12 @@ static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q, return &tg->blkg; } -static void -__throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg) -{ - struct backing_dev_info *bdi = &td->queue->backing_dev_info; - unsigned int major, minor; - - if (!tg || tg->blkg.dev) - return; - - /* - * Fill in device details for a group which might not have been - * filled at group creation time as queue was being instantiated - * and driver had not attached a device yet - */ - if (bdi->dev && dev_name(bdi->dev)) { - sscanf(dev_name(bdi->dev), "%u:%u", &major, &minor); - tg->blkg.dev = MKDEV(major, minor); - } -} - -/* - * Should be called with without queue lock held. Here queue lock will be - * taken rarely. It will be taken only once during life time of a group - * if need be - */ -static void -throtl_tg_fill_dev_details(struct throtl_data *td, struct throtl_grp *tg) -{ - if (!tg || tg->blkg.dev) - return; - - spin_lock_irq(td->queue->queue_lock); - __throtl_tg_fill_dev_details(td, tg); - spin_unlock_irq(td->queue->queue_lock); -} - static void throtl_link_blkio_group(struct request_queue *q, struct blkio_group *blkg) { struct throtl_data *td = q->td; struct throtl_grp *tg = tg_of_blkg(blkg); - __throtl_tg_fill_dev_details(td, tg); - hlist_add_head(&tg->tg_node, &td->tg_list); td->nr_undestroyed_grps++; } @@ -263,20 +225,14 @@ static void throtl_link_blkio_group(struct request_queue *q, static struct throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) { - struct throtl_grp *tg = NULL; - /* * This is the common case when there are no blkio cgroups. * Avoid lookup in this case */ if (blkcg == &blkio_root_cgroup) - tg = td->root_tg; - else - tg = tg_of_blkg(blkg_lookup(blkcg, td->queue, - BLKIO_POLICY_THROTL)); + return td->root_tg; - __throtl_tg_fill_dev_details(td, tg); - return tg; + return tg_of_blkg(blkg_lookup(blkcg, td->queue, BLKIO_POLICY_THROTL)); } static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, @@ -303,7 +259,6 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, tg = td->root_tg; } - __throtl_tg_fill_dev_details(td, tg); return tg; } @@ -1090,8 +1045,6 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) blkcg = task_blkio_cgroup(current); tg = throtl_lookup_tg(td, blkcg); if (tg) { - throtl_tg_fill_dev_details(td, tg); - if (tg_no_rule_group(tg, rw)) { blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, rw_is_sync(bio->bi_rw)); -- cgit v1.2.3 From 7ee9c5620504906e98451dc9a1945b2b9e892cb8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:11 -0800 Subject: blkcg: let blkio_group point to blkio_cgroup directly Currently, blkg points to the associated blkcg via its css_id. This unnecessarily complicates dereferencing blkcg. Let blkg hold a reference to the associated blkcg and point directly to it and disable css_id on blkio_subsys. This change requires splitting blkiocg_destroy() into blkiocg_pre_destroy() and blkiocg_destroy() so that all blkg's can be destroyed and all the blkcg references held by them dropped during cgroup removal. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 52a429397d3..fe6a442b848 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -169,6 +169,9 @@ static void throtl_put_tg(struct throtl_grp *tg) if (!atomic_dec_and_test(&tg->ref)) return; + /* release the extra blkcg reference this blkg has been holding */ + css_put(&tg->blkg.blkcg->css); + /* * A group is freed in rcu manner. But having an rcu lock does not * mean that one can access all the fields of blkg and assume these -- cgit v1.2.3 From 5efd611351d1a847c72d74fb12ff4bd187c0cb2c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:12 -0800 Subject: blkcg: add blkcg_{init|drain|exit}_queue() Currently block core calls directly into blk-throttle for init, drain and exit. This patch adds blkcg_{init|drain|exit}_queue() which wraps the blk-throttle functions. This is to give more control and visiblity to blkcg core layer for proper layering. Further patches will add logic common to blkcg policies to the functions. While at it, collapse blk_throtl_release() into blk_throtl_exit(). There's no reason to keep them separate. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index fe6a442b848..ac6d0fe6e4e 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1226,10 +1226,7 @@ void blk_throtl_exit(struct request_queue *q) * it. */ throtl_shutdown_wq(q); -} -void blk_throtl_release(struct request_queue *q) -{ kfree(q->td); } -- cgit v1.2.3 From 0381411e4b1a52cee134eb73750e5e3cc1155d09 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:14 -0800 Subject: blkcg: let blkcg core handle policy private data allocation Currently, blkg's are embedded in private data blkcg policy private data structure and thus allocated and freed by policies. This leads to duplicate codes in policies, hinders implementing common part in blkcg core with strong semantics, and forces duplicate blkg's for the same cgroup-q association. This patch introduces struct blkg_policy_data which is a separate data structure chained from blkg. Policies specifies the amount of private data it needs in its blkio_policy_type->pdata_size and blkcg core takes care of allocating them along with blkg which can be accessed using blkg_to_pdata(). blkg can be determined from pdata using pdata_to_blkg(). blkio_alloc_group_fn() method is accordingly updated to blkio_init_group_fn(). For consistency, tg_of_blkg() and cfqg_of_blkg() are replaced with blkg_to_tg() and blkg_to_cfqg() respectively, and functions to map in the reverse direction are added. Except that policy specific data now lives in a separate data structure from blkg, this patch doesn't introduce any functional difference. This will be used to unify blkg's for different policies. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 79 ++++++++++++++++++++++++++-------------------------- 1 file changed, 40 insertions(+), 39 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index ac6d0fe6e4e..9c8a12477e1 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -21,6 +21,8 @@ static int throtl_quantum = 32; /* Throttling is performed over 100ms slice and after that slice is renewed */ static unsigned long throtl_slice = HZ/10; /* 100 ms */ +static struct blkio_policy_type blkio_policy_throtl; + /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue; static void throtl_schedule_delayed_work(struct throtl_data *td, @@ -52,7 +54,6 @@ struct throtl_grp { */ unsigned long disptime; - struct blkio_group blkg; atomic_t ref; unsigned int flags; @@ -108,6 +109,16 @@ struct throtl_data int limits_changed; }; +static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg) +{ + return blkg_to_pdata(blkg, &blkio_policy_throtl); +} + +static inline struct blkio_group *tg_to_blkg(struct throtl_grp *tg) +{ + return pdata_to_blkg(tg, &blkio_policy_throtl); +} + enum tg_state_flags { THROTL_TG_FLAG_on_rr = 0, /* on round-robin busy list */ }; @@ -130,19 +141,11 @@ THROTL_TG_FNS(on_rr); #define throtl_log_tg(td, tg, fmt, args...) \ blk_add_trace_msg((td)->queue, "throtl %s " fmt, \ - blkg_path(&(tg)->blkg), ##args); \ + blkg_path(tg_to_blkg(tg)), ##args); \ #define throtl_log(td, fmt, args...) \ blk_add_trace_msg((td)->queue, "throtl " fmt, ##args) -static inline struct throtl_grp *tg_of_blkg(struct blkio_group *blkg) -{ - if (blkg) - return container_of(blkg, struct throtl_grp, blkg); - - return NULL; -} - static inline unsigned int total_nr_queued(struct throtl_data *td) { return td->nr_queued[0] + td->nr_queued[1]; @@ -156,21 +159,24 @@ static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg) static void throtl_free_tg(struct rcu_head *head) { - struct throtl_grp *tg; + struct throtl_grp *tg = container_of(head, struct throtl_grp, rcu_head); + struct blkio_group *blkg = tg_to_blkg(tg); - tg = container_of(head, struct throtl_grp, rcu_head); - free_percpu(tg->blkg.stats_cpu); - kfree(tg); + free_percpu(blkg->stats_cpu); + kfree(blkg->pd); + kfree(blkg); } static void throtl_put_tg(struct throtl_grp *tg) { + struct blkio_group *blkg = tg_to_blkg(tg); + BUG_ON(atomic_read(&tg->ref) <= 0); if (!atomic_dec_and_test(&tg->ref)) return; /* release the extra blkcg reference this blkg has been holding */ - css_put(&tg->blkg.blkcg->css); + css_put(&blkg->blkcg->css); /* * A group is freed in rcu manner. But having an rcu lock does not @@ -184,14 +190,9 @@ static void throtl_put_tg(struct throtl_grp *tg) call_rcu(&tg->rcu_head, throtl_free_tg); } -static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q, - struct blkio_cgroup *blkcg) +static void throtl_init_blkio_group(struct blkio_group *blkg) { - struct throtl_grp *tg; - - tg = kzalloc_node(sizeof(*tg), GFP_ATOMIC, q->node); - if (!tg) - return NULL; + struct throtl_grp *tg = blkg_to_tg(blkg); INIT_HLIST_NODE(&tg->tg_node); RB_CLEAR_NODE(&tg->rb_node); @@ -211,15 +212,13 @@ static struct blkio_group *throtl_alloc_blkio_group(struct request_queue *q, * exit or cgroup deletion path depending on who is exiting first. */ atomic_set(&tg->ref, 1); - - return &tg->blkg; } static void throtl_link_blkio_group(struct request_queue *q, struct blkio_group *blkg) { struct throtl_data *td = q->td; - struct throtl_grp *tg = tg_of_blkg(blkg); + struct throtl_grp *tg = blkg_to_tg(blkg); hlist_add_head(&tg->tg_node, &td->tg_list); td->nr_undestroyed_grps++; @@ -235,7 +234,7 @@ throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) if (blkcg == &blkio_root_cgroup) return td->root_tg; - return tg_of_blkg(blkg_lookup(blkcg, td->queue, BLKIO_POLICY_THROTL)); + return blkg_to_tg(blkg_lookup(blkcg, td->queue, BLKIO_POLICY_THROTL)); } static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, @@ -257,7 +256,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, /* if %NULL and @q is alive, fall back to root_tg */ if (!IS_ERR(blkg)) - tg = tg_of_blkg(blkg); + tg = blkg_to_tg(blkg); else if (!blk_queue_dead(q)) tg = td->root_tg; } @@ -639,7 +638,7 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) tg->bytes_disp[rw] += bio->bi_size; tg->io_disp[rw]++; - blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, rw, sync); + blkiocg_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, rw, sync); } static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg, @@ -901,7 +900,7 @@ static bool throtl_release_tgs(struct throtl_data *td, bool release_root) * it from cgroup list, then it will take care of destroying * cfqg also. */ - if (!blkiocg_del_blkio_group(&tg->blkg)) + if (!blkiocg_del_blkio_group(tg_to_blkg(tg))) throtl_destroy_tg(td, tg); else empty = false; @@ -929,7 +928,7 @@ void throtl_unlink_blkio_group(struct request_queue *q, unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - throtl_destroy_tg(q->td, tg_of_blkg(blkg)); + throtl_destroy_tg(q->td, blkg_to_tg(blkg)); spin_unlock_irqrestore(q->queue_lock, flags); } @@ -968,7 +967,7 @@ static void throtl_update_blkio_group_common(struct throtl_data *td, static void throtl_update_blkio_group_read_bps(struct request_queue *q, struct blkio_group *blkg, u64 read_bps) { - struct throtl_grp *tg = tg_of_blkg(blkg); + struct throtl_grp *tg = blkg_to_tg(blkg); tg->bps[READ] = read_bps; throtl_update_blkio_group_common(q->td, tg); @@ -977,7 +976,7 @@ static void throtl_update_blkio_group_read_bps(struct request_queue *q, static void throtl_update_blkio_group_write_bps(struct request_queue *q, struct blkio_group *blkg, u64 write_bps) { - struct throtl_grp *tg = tg_of_blkg(blkg); + struct throtl_grp *tg = blkg_to_tg(blkg); tg->bps[WRITE] = write_bps; throtl_update_blkio_group_common(q->td, tg); @@ -986,7 +985,7 @@ static void throtl_update_blkio_group_write_bps(struct request_queue *q, static void throtl_update_blkio_group_read_iops(struct request_queue *q, struct blkio_group *blkg, unsigned int read_iops) { - struct throtl_grp *tg = tg_of_blkg(blkg); + struct throtl_grp *tg = blkg_to_tg(blkg); tg->iops[READ] = read_iops; throtl_update_blkio_group_common(q->td, tg); @@ -995,7 +994,7 @@ static void throtl_update_blkio_group_read_iops(struct request_queue *q, static void throtl_update_blkio_group_write_iops(struct request_queue *q, struct blkio_group *blkg, unsigned int write_iops) { - struct throtl_grp *tg = tg_of_blkg(blkg); + struct throtl_grp *tg = blkg_to_tg(blkg); tg->iops[WRITE] = write_iops; throtl_update_blkio_group_common(q->td, tg); @@ -1010,7 +1009,7 @@ static void throtl_shutdown_wq(struct request_queue *q) static struct blkio_policy_type blkio_policy_throtl = { .ops = { - .blkio_alloc_group_fn = throtl_alloc_blkio_group, + .blkio_init_group_fn = throtl_init_blkio_group, .blkio_link_group_fn = throtl_link_blkio_group, .blkio_unlink_group_fn = throtl_unlink_blkio_group, .blkio_clear_queue_fn = throtl_clear_queue, @@ -1024,6 +1023,7 @@ static struct blkio_policy_type blkio_policy_throtl = { throtl_update_blkio_group_write_iops, }, .plid = BLKIO_POLICY_THROTL, + .pdata_size = sizeof(struct throtl_grp), }; bool blk_throtl_bio(struct request_queue *q, struct bio *bio) @@ -1049,8 +1049,9 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) tg = throtl_lookup_tg(td, blkcg); if (tg) { if (tg_no_rule_group(tg, rw)) { - blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, - rw, rw_is_sync(bio->bi_rw)); + blkiocg_update_dispatch_stats(tg_to_blkg(tg), + bio->bi_size, rw, + rw_is_sync(bio->bi_rw)); goto out_unlock_rcu; } } @@ -1176,7 +1177,7 @@ int blk_throtl_init(struct request_queue *q) blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL, true); if (!IS_ERR(blkg)) - td->root_tg = tg_of_blkg(blkg); + td->root_tg = blkg_to_tg(blkg); spin_unlock_irq(q->queue_lock); rcu_read_unlock(); @@ -1207,7 +1208,7 @@ void blk_throtl_exit(struct request_queue *q) spin_unlock_irq(q->queue_lock); /* - * Wait for tg->blkg->q accessors to exit their grace periods. + * Wait for tg_to_blkg(tg)->q accessors to exit their grace periods. * Do this wait only if there are other undestroyed groups out * there (other than root group). This can happen if cgroup deletion * path claimed the responsibility of cleaning up a group before -- cgit v1.2.3 From 1adaf3dde37a8b9b59ea59c5f58fed7761178383 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:15 -0800 Subject: blkcg: move refcnt to blkcg core Currently, blkcg policy implementations manage blkg refcnt duplicating mostly identical code in both policies. This patch moves refcnt to blkg and let blkcg core handle refcnt and freeing of blkgs. * cfq blkgs now also get freed via RCU. * cfq blkgs lose RB_EMPTY_ROOT() sanity check on blkg free. If necessary, we can add blkio_exit_group_fn() to resurrect this. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 58 ++++------------------------------------------------ 1 file changed, 4 insertions(+), 54 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 9c8a12477e1..153ba509446 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -54,7 +54,6 @@ struct throtl_grp { */ unsigned long disptime; - atomic_t ref; unsigned int flags; /* Two lists for READ and WRITE */ @@ -80,8 +79,6 @@ struct throtl_grp { /* Some throttle limits got updated for the group */ int limits_changed; - - struct rcu_head rcu_head; }; struct throtl_data @@ -151,45 +148,6 @@ static inline unsigned int total_nr_queued(struct throtl_data *td) return td->nr_queued[0] + td->nr_queued[1]; } -static inline struct throtl_grp *throtl_ref_get_tg(struct throtl_grp *tg) -{ - atomic_inc(&tg->ref); - return tg; -} - -static void throtl_free_tg(struct rcu_head *head) -{ - struct throtl_grp *tg = container_of(head, struct throtl_grp, rcu_head); - struct blkio_group *blkg = tg_to_blkg(tg); - - free_percpu(blkg->stats_cpu); - kfree(blkg->pd); - kfree(blkg); -} - -static void throtl_put_tg(struct throtl_grp *tg) -{ - struct blkio_group *blkg = tg_to_blkg(tg); - - BUG_ON(atomic_read(&tg->ref) <= 0); - if (!atomic_dec_and_test(&tg->ref)) - return; - - /* release the extra blkcg reference this blkg has been holding */ - css_put(&blkg->blkcg->css); - - /* - * A group is freed in rcu manner. But having an rcu lock does not - * mean that one can access all the fields of blkg and assume these - * are valid. For example, don't try to follow throtl_data and - * request queue links. - * - * Having a reference to blkg under an rcu allows acess to only - * values local to groups like group stats and group rate limits - */ - call_rcu(&tg->rcu_head, throtl_free_tg); -} - static void throtl_init_blkio_group(struct blkio_group *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); @@ -204,14 +162,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg) tg->bps[WRITE] = -1; tg->iops[READ] = -1; tg->iops[WRITE] = -1; - - /* - * Take the initial reference that will be released on destroy - * This can be thought of a joint reference by cgroup and - * request queue which will be dropped by either request queue - * exit or cgroup deletion path depending on who is exiting first. - */ - atomic_set(&tg->ref, 1); } static void throtl_link_blkio_group(struct request_queue *q, @@ -648,7 +598,7 @@ static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg, bio_list_add(&tg->bio_lists[rw], bio); /* Take a bio reference on tg */ - throtl_ref_get_tg(tg); + blkg_get(tg_to_blkg(tg)); tg->nr_queued[rw]++; td->nr_queued[rw]++; throtl_enqueue_tg(td, tg); @@ -681,8 +631,8 @@ static void tg_dispatch_one_bio(struct throtl_data *td, struct throtl_grp *tg, bio = bio_list_pop(&tg->bio_lists[rw]); tg->nr_queued[rw]--; - /* Drop bio reference on tg */ - throtl_put_tg(tg); + /* Drop bio reference on blkg */ + blkg_put(tg_to_blkg(tg)); BUG_ON(td->nr_queued[rw] <= 0); td->nr_queued[rw]--; @@ -880,7 +830,7 @@ throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg) * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ - throtl_put_tg(tg); + blkg_put(tg_to_blkg(tg)); td->nr_undestroyed_grps--; } -- cgit v1.2.3 From c1768268f9424410761da57ea71107acae7b03cc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:17 -0800 Subject: blkcg: don't use blkg->plid in stat related functions blkg is scheduled to be unified for all policies and thus there won't be one-to-one mapping from blkg to policy. Update stat related functions to take explicit @pol or @plid arguments and not use blkg->plid. This is painful for now but most of specific stat interface functions will be replaced with a handful of generic helpers. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 153ba509446..b2fddaf20b9 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -588,7 +588,8 @@ static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) tg->bytes_disp[rw] += bio->bi_size; tg->io_disp[rw]++; - blkiocg_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, rw, sync); + blkiocg_update_dispatch_stats(tg_to_blkg(tg), &blkio_policy_throtl, + bio->bi_size, rw, sync); } static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg, @@ -1000,6 +1001,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) if (tg) { if (tg_no_rule_group(tg, rw)) { blkiocg_update_dispatch_stats(tg_to_blkg(tg), + &blkio_policy_throtl, bio->bi_size, rw, rw_is_sync(bio->bi_rw)); goto out_unlock_rcu; -- cgit v1.2.3 From 4eef3049986e8397d5003916aed8cad6567a5e02 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:18 -0800 Subject: blkcg: move per-queue blkg list heads and counters to queue and blkg Currently, specific policy implementations are responsible for maintaining list and number of blkgs. This duplicates code unnecessarily, and hinders factoring common code and providing blkcg API with better defined semantics. After this patch, request_queue hosts list heads and counters and blkg has list nodes for both policies. This patch only relocates the necessary fields and the next patch will actually move management code into blkcg core. Note that request_queue->blkg_list[] and ->nr_blkgs[] are hardcoded to have 2 elements. This is to avoid include dependency and will be removed by the next patch. This patch doesn't introduce any behavior change. -v2: Now unnecessary conditional on CONFIG_BLK_CGROUP_MODULE removed as pointed out by Vivek. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 49 +++++++++++++++++++++++-------------------------- 1 file changed, 23 insertions(+), 26 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index b2fddaf20b9..c15d38307e1 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -41,9 +41,6 @@ struct throtl_rb_root { #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) struct throtl_grp { - /* List of throtl groups on the request queue*/ - struct hlist_node tg_node; - /* active throtl group service_tree member */ struct rb_node rb_node; @@ -83,9 +80,6 @@ struct throtl_grp { struct throtl_data { - /* List of throtl groups */ - struct hlist_head tg_list; - /* service tree for active throtl groups */ struct throtl_rb_root tg_service_tree; @@ -152,7 +146,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); - INIT_HLIST_NODE(&tg->tg_node); RB_CLEAR_NODE(&tg->rb_node); bio_list_init(&tg->bio_lists[0]); bio_list_init(&tg->bio_lists[1]); @@ -167,11 +160,9 @@ static void throtl_init_blkio_group(struct blkio_group *blkg) static void throtl_link_blkio_group(struct request_queue *q, struct blkio_group *blkg) { - struct throtl_data *td = q->td; - struct throtl_grp *tg = blkg_to_tg(blkg); - - hlist_add_head(&tg->tg_node, &td->tg_list); - td->nr_undestroyed_grps++; + list_add(&blkg->q_node[BLKIO_POLICY_THROTL], + &q->blkg_list[BLKIO_POLICY_THROTL]); + q->nr_blkgs[BLKIO_POLICY_THROTL]++; } static struct @@ -711,8 +702,8 @@ static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl) static void throtl_process_limit_change(struct throtl_data *td) { - struct throtl_grp *tg; - struct hlist_node *pos, *n; + struct request_queue *q = td->queue; + struct blkio_group *blkg, *n; if (!td->limits_changed) return; @@ -721,7 +712,10 @@ static void throtl_process_limit_change(struct throtl_data *td) throtl_log(td, "limits changed"); - hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { + list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL], + q_node[BLKIO_POLICY_THROTL]) { + struct throtl_grp *tg = blkg_to_tg(blkg); + if (!tg->limits_changed) continue; @@ -822,26 +816,31 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) static void throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg) { + struct blkio_group *blkg = tg_to_blkg(tg); + /* Something wrong if we are trying to remove same group twice */ - BUG_ON(hlist_unhashed(&tg->tg_node)); + WARN_ON_ONCE(list_empty(&blkg->q_node[BLKIO_POLICY_THROTL])); - hlist_del_init(&tg->tg_node); + list_del_init(&blkg->q_node[BLKIO_POLICY_THROTL]); /* * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. */ blkg_put(tg_to_blkg(tg)); - td->nr_undestroyed_grps--; + td->queue->nr_blkgs[BLKIO_POLICY_THROTL]--; } static bool throtl_release_tgs(struct throtl_data *td, bool release_root) { - struct hlist_node *pos, *n; - struct throtl_grp *tg; + struct request_queue *q = td->queue; + struct blkio_group *blkg, *n; bool empty = true; - hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { + list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL], + q_node[BLKIO_POLICY_THROTL]) { + struct throtl_grp *tg = blkg_to_tg(blkg); + /* skip root? */ if (!release_root && tg == td->root_tg) continue; @@ -851,7 +850,7 @@ static bool throtl_release_tgs(struct throtl_data *td, bool release_root) * it from cgroup list, then it will take care of destroying * cfqg also. */ - if (!blkiocg_del_blkio_group(tg_to_blkg(tg))) + if (!blkiocg_del_blkio_group(blkg)) throtl_destroy_tg(td, tg); else empty = false; @@ -1114,7 +1113,6 @@ int blk_throtl_init(struct request_queue *q) if (!td) return -ENOMEM; - INIT_HLIST_HEAD(&td->tg_list); td->tg_service_tree = THROTL_RB_ROOT; td->limits_changed = false; INIT_DELAYED_WORK(&td->throtl_work, blk_throtl_work); @@ -1144,7 +1142,7 @@ int blk_throtl_init(struct request_queue *q) void blk_throtl_exit(struct request_queue *q) { struct throtl_data *td = q->td; - bool wait = false; + bool wait; BUG_ON(!td); @@ -1154,8 +1152,7 @@ void blk_throtl_exit(struct request_queue *q) throtl_release_tgs(td, true); /* If there are other groups */ - if (td->nr_undestroyed_grps > 0) - wait = true; + wait = q->nr_blkgs[BLKIO_POLICY_THROTL]; spin_unlock_irq(q->queue_lock); -- cgit v1.2.3 From 03aa264ac15637b6f98374270bcdf31400965505 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:19 -0800 Subject: blkcg: let blkcg core manage per-queue blkg list and counter With the previous patch to move blkg list heads and counters to request_queue and blkg, logic to manage them in both policies are almost identical and can be moved to blkcg core. This patch moves blkg link logic into blkg_lookup_create(), implements common blkg unlink code in blkg_destroy(), and updates blkg_destory_all() so that it's policy specific and can skip root group. The updated blkg_destroy_all() is now used to both clear queue for bypassing and elv switching, and release all blkgs on q exit. This patch introduces a race window where policy [de]registration may race against queue blkg clearing. This can only be a problem on cfq unload and shouldn't be a real problem in practice (and we have many other places where this race already exists). Future patches will remove these unlikely races. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 99 ++-------------------------------------------------- 1 file changed, 2 insertions(+), 97 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index c15d38307e1..132941260e5 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -157,14 +157,6 @@ static void throtl_init_blkio_group(struct blkio_group *blkg) tg->iops[WRITE] = -1; } -static void throtl_link_blkio_group(struct request_queue *q, - struct blkio_group *blkg) -{ - list_add(&blkg->q_node[BLKIO_POLICY_THROTL], - &q->blkg_list[BLKIO_POLICY_THROTL]); - q->nr_blkgs[BLKIO_POLICY_THROTL]++; -} - static struct throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) { @@ -813,89 +805,6 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) } } -static void -throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg) -{ - struct blkio_group *blkg = tg_to_blkg(tg); - - /* Something wrong if we are trying to remove same group twice */ - WARN_ON_ONCE(list_empty(&blkg->q_node[BLKIO_POLICY_THROTL])); - - list_del_init(&blkg->q_node[BLKIO_POLICY_THROTL]); - - /* - * Put the reference taken at the time of creation so that when all - * queues are gone, group can be destroyed. - */ - blkg_put(tg_to_blkg(tg)); - td->queue->nr_blkgs[BLKIO_POLICY_THROTL]--; -} - -static bool throtl_release_tgs(struct throtl_data *td, bool release_root) -{ - struct request_queue *q = td->queue; - struct blkio_group *blkg, *n; - bool empty = true; - - list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL], - q_node[BLKIO_POLICY_THROTL]) { - struct throtl_grp *tg = blkg_to_tg(blkg); - - /* skip root? */ - if (!release_root && tg == td->root_tg) - continue; - - /* - * If cgroup removal path got to blk_group first and removed - * it from cgroup list, then it will take care of destroying - * cfqg also. - */ - if (!blkiocg_del_blkio_group(blkg)) - throtl_destroy_tg(td, tg); - else - empty = false; - } - return empty; -} - -/* - * Blk cgroup controller notification saying that blkio_group object is being - * delinked as associated cgroup object is going away. That also means that - * no new IO will come in this group. So get rid of this group as soon as - * any pending IO in the group is finished. - * - * This function is called under rcu_read_lock(). @q is the rcu protected - * pointer. That means @q is a valid request_queue pointer as long as we - * are rcu read lock. - * - * @q was fetched from blkio_group under blkio_cgroup->lock. That means - * it should not be NULL as even if queue was going away, cgroup deltion - * path got to it first. - */ -void throtl_unlink_blkio_group(struct request_queue *q, - struct blkio_group *blkg) -{ - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - throtl_destroy_tg(q->td, blkg_to_tg(blkg)); - spin_unlock_irqrestore(q->queue_lock, flags); -} - -static bool throtl_clear_queue(struct request_queue *q) -{ - lockdep_assert_held(q->queue_lock); - - /* - * Clear tgs but leave the root one alone. This is necessary - * because root_tg is expected to be persistent and safe because - * blk-throtl can never be disabled while @q is alive. This is a - * kludge to prepare for unified blkg. This whole function will be - * removed soon. - */ - return throtl_release_tgs(q->td, false); -} - static void throtl_update_blkio_group_common(struct throtl_data *td, struct throtl_grp *tg) { @@ -960,9 +869,6 @@ static void throtl_shutdown_wq(struct request_queue *q) static struct blkio_policy_type blkio_policy_throtl = { .ops = { .blkio_init_group_fn = throtl_init_blkio_group, - .blkio_link_group_fn = throtl_link_blkio_group, - .blkio_unlink_group_fn = throtl_unlink_blkio_group, - .blkio_clear_queue_fn = throtl_clear_queue, .blkio_update_group_read_bps_fn = throtl_update_blkio_group_read_bps, .blkio_update_group_write_bps_fn = @@ -1148,12 +1054,11 @@ void blk_throtl_exit(struct request_queue *q) throtl_shutdown_wq(q); - spin_lock_irq(q->queue_lock); - throtl_release_tgs(td, true); + blkg_destroy_all(q, BLKIO_POLICY_THROTL, true); /* If there are other groups */ + spin_lock_irq(q->queue_lock); wait = q->nr_blkgs[BLKIO_POLICY_THROTL]; - spin_unlock_irq(q->queue_lock); /* -- cgit v1.2.3 From e8989fae38d9831c72b20375a206a919ca468c52 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:20 -0800 Subject: blkcg: unify blkg's for blkcg policies Currently, blkg is per cgroup-queue-policy combination. This is unnatural and leads to various convolutions in partially used duplicate fields in blkg, config / stat access, and general management of blkgs. This patch make blkg's per cgroup-queue and let them serve all policies. blkgs are now created and destroyed by blkcg core proper. This will allow further consolidation of common management logic into blkcg core and API with better defined semantics and layering. As a transitional step to untangle blkg management, elvswitch and policy [de]registration, all blkgs except the root blkg are being shot down during elvswitch and bypass. This patch adds blkg_root_update() to update root blkg in place on policy change. This is hacky and racy but should be good enough as interim step until we get locking simplified and switch over to proper in-place update for all blkgs. -v2: Root blkgs need to be updated on elvswitch too and blkg_alloc() comment wasn't updated according to the function change. Fixed. Both pointed out by Vivek. -v3: v2 updated blkg_destroy_all() to invoke update_root_blkg_pd() for all policies. This freed root pd during elvswitch before the last queue finished exiting and led to oops. Directly invoke update_root_blkg_pd() only on BLKIO_POLICY_PROP from cfq_exit_queue(). This also is closer to what will be done with proper in-place blkg update. Reported by Vivek. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 132941260e5..e35ee7aeea6 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -167,7 +167,7 @@ throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) if (blkcg == &blkio_root_cgroup) return td->root_tg; - return blkg_to_tg(blkg_lookup(blkcg, td->queue, BLKIO_POLICY_THROTL)); + return blkg_to_tg(blkg_lookup(blkcg, td->queue)); } static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, @@ -704,8 +704,7 @@ static void throtl_process_limit_change(struct throtl_data *td) throtl_log(td, "limits changed"); - list_for_each_entry_safe(blkg, n, &q->blkg_list[BLKIO_POLICY_THROTL], - q_node[BLKIO_POLICY_THROTL]) { + list_for_each_entry_safe(blkg, n, &q->blkg_list, q_node) { struct throtl_grp *tg = blkg_to_tg(blkg); if (!tg->limits_changed) @@ -1054,11 +1053,9 @@ void blk_throtl_exit(struct request_queue *q) throtl_shutdown_wq(q); - blkg_destroy_all(q, BLKIO_POLICY_THROTL, true); - /* If there are other groups */ spin_lock_irq(q->queue_lock); - wait = q->nr_blkgs[BLKIO_POLICY_THROTL]; + wait = q->nr_blkgs; spin_unlock_irq(q->queue_lock); /* -- cgit v1.2.3 From c875f4d0250a1f070fa26087a73bdd8f54c48100 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:22 -0800 Subject: blkcg: drop unnecessary RCU locking Now that blkg additions / removals are always done under both q and blkcg locks, the only places RCU locking is necessary are blkg_lookup[_create]() for lookup w/o blkcg lock. This patch drops unncessary RCU locking replacing it with plain blkcg locking as necessary. * blkiocg_pre_destroy() already perform proper locking and don't need RCU. Dropped. * blkio_read_blkg_stats() now uses blkcg->lock instead of RCU read lock. This isn't a hot path. * Now unnecessary synchronize_rcu() from queue exit paths removed. This makes q->nr_blkgs unnecessary. Dropped. * RCU annotation on blkg->q removed. -v2: Vivek pointed out that blkg_lookup_create() still needs to be called under rcu_read_lock(). Updated. -v3: After the update, stats_lock locking in blkio_read_blkg_stats() shouldn't be using _irq variant as it otherwise ends up enabling irq while blkcg->lock is locked. Fixed. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e35ee7aeea6..bfa5168249e 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1046,39 +1046,8 @@ int blk_throtl_init(struct request_queue *q) void blk_throtl_exit(struct request_queue *q) { - struct throtl_data *td = q->td; - bool wait; - - BUG_ON(!td); - + BUG_ON(!q->td); throtl_shutdown_wq(q); - - /* If there are other groups */ - spin_lock_irq(q->queue_lock); - wait = q->nr_blkgs; - spin_unlock_irq(q->queue_lock); - - /* - * Wait for tg_to_blkg(tg)->q accessors to exit their grace periods. - * Do this wait only if there are other undestroyed groups out - * there (other than root group). This can happen if cgroup deletion - * path claimed the responsibility of cleaning up a group before - * queue cleanup code get to the group. - * - * Do not call synchronize_rcu() unconditionally as there are drivers - * which create/delete request queue hundreds of times during scan/boot - * and synchronize_rcu() can take significant time and slow down boot. - */ - if (wait) - synchronize_rcu(); - - /* - * Just being safe to make sure after previous flush if some body did - * update limits through cgroup and another work got queued, cancel - * it. - */ - throtl_shutdown_wq(q); - kfree(q->td); } -- cgit v1.2.3 From 4f85cb96d9d2fbbb7160db855a6beee1baced5e5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:28 -0800 Subject: block: make block cgroup policies follow bio task association Implement bio_blkio_cgroup() which returns the blkcg associated with the bio if exists or %current's blkcg, and use it in blk-throttle and cfq-iosched propio. This makes both cgroup policies honor task association for the bio instead of always assuming %current. As nobody is using bio_set_task() yet, this doesn't introduce any behavior change. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index bfa5168249e..08b7ab292a8 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -900,7 +900,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) * just update the dispatch stats in lockless manner and return. */ rcu_read_lock(); - blkcg = task_blkio_cgroup(current); + blkcg = bio_blkio_cgroup(bio); tg = throtl_lookup_tg(td, blkcg); if (tg) { if (tg_no_rule_group(tg, rw)) { -- cgit v1.2.3 From 671058fb2a2aac4e70f01b316b06bc59b98bd138 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 5 Mar 2012 13:15:29 -0800 Subject: block: make blk-throttle preserve the issuing task on delayed bios Make blk-throttle call bio_associate_current() on bios being delayed such that they get issued to block layer with the original io_context. This allows stacking blk-throttle and cfq-iosched propio policies. bios will always be issued with the correct ioc and blkcg whether it gets delayed by blk-throttle or not. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 08b7ab292a8..4ba141820a2 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -894,6 +894,9 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) goto out; } + /* bio_associate_current() needs ioc, try creating */ + create_io_context(GFP_ATOMIC, q->node); + /* * A throtl_grp pointer retrieved under rcu can be used to access * basic fields like stats and io rates. If a group has no rules, @@ -958,6 +961,7 @@ queue_bio: tg->io_disp[rw], tg->iops[rw], tg->nr_queued[READ], tg->nr_queued[WRITE]); + bio_associate_current(bio); throtl_add_bio_tg(q->td, tg, bio); throttled = true; -- cgit v1.2.3 From aaec55a002a29bf940588dc03253099a4cd543bf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Apr 2012 14:38:42 -0700 Subject: blkcg: remove unused @pol and @plid parameters @pol to blkg_to_pdata() and @plid to blkg_lookup_create() are no longer necessary. Drop them. Signed-off-by: Tejun Heo --- block/blk-throttle.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 4ba141820a2..1cc6c23de2c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -107,7 +107,7 @@ static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg) static inline struct blkio_group *tg_to_blkg(struct throtl_grp *tg) { - return pdata_to_blkg(tg, &blkio_policy_throtl); + return pdata_to_blkg(tg); } enum tg_state_flags { @@ -185,7 +185,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, } else { struct blkio_group *blkg; - blkg = blkg_lookup_create(blkcg, q, BLKIO_POLICY_THROTL, false); + blkg = blkg_lookup_create(blkcg, q, false); /* if %NULL and @q is alive, fall back to root_tg */ if (!IS_ERR(blkg)) @@ -1033,8 +1033,7 @@ int blk_throtl_init(struct request_queue *q) rcu_read_lock(); spin_lock_irq(q->queue_lock); - blkg = blkg_lookup_create(&blkio_root_cgroup, q, BLKIO_POLICY_THROTL, - true); + blkg = blkg_lookup_create(&blkio_root_cgroup, q, true); if (!IS_ERR(blkg)) td->root_tg = blkg_to_tg(blkg); -- cgit v1.2.3 From 60c2bc2d5a12369deef395cda41638d7e6b6bf19 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Apr 2012 14:38:43 -0700 Subject: blkcg: move conf/stat file handling code to policies blkcg conf/stat handling is convoluted in that details which belong to specific policy implementations are all out in blkcg core and then policies hook into core layer to access and manipulate confs and stats. This sadly achieves both inflexibility (confs/stats can't be modified without messing with blkcg core) and complexity (all the call-ins and call-backs). The previous patches restructured conf and stat handling code such that they can be separated out. This patch relocates the file handling part. All conf/stat file handling code which belongs to BLKIO_POLICY_PROP is moved to cfq-iosched.c and all BKLIO_POLICY_THROTL code to blk-throtl.c. The move is verbatim except for blkio_update_group_{weight|bps|iops}() callbacks which relays conf changes to policies. The configuration settings are handled in policies themselves so the relaying isn't necessary. Conf setting functions are modified to directly call per-policy update functions and the relaying mechanism is dropped. Signed-off-by: Tejun Heo --- block/blk-throttle.c | 163 ++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 134 insertions(+), 29 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 1cc6c23de2c..fb6f25778fb 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -804,6 +804,11 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) } } +/* + * Can not take queue lock in update functions as queue lock under + * blkcg_lock is not allowed. Under other paths we take blkcg_lock under + * queue_lock. + */ static void throtl_update_blkio_group_common(struct throtl_data *td, struct throtl_grp *tg) { @@ -813,51 +818,158 @@ static void throtl_update_blkio_group_common(struct throtl_data *td, throtl_schedule_delayed_work(td, 0); } -/* - * For all update functions, @q should be a valid pointer because these - * update functions are called under blkcg_lock, that means, blkg is - * valid and in turn @q is valid. queue exit path can not race because - * of blkcg_lock - * - * Can not take queue lock in update functions as queue lock under blkcg_lock - * is not allowed. Under other paths we take blkcg_lock under queue_lock. - */ -static void throtl_update_blkio_group_read_bps(struct request_queue *q, - struct blkio_group *blkg, u64 read_bps) +static u64 blkg_prfill_conf_u64(struct seq_file *sf, + struct blkg_policy_data *pd, int off) +{ + u64 v = *(u64 *)((void *)&pd->conf + off); + + if (!v) + return 0; + return __blkg_prfill_u64(sf, pd, v); +} + +static int blkcg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) +{ + blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), + blkg_prfill_conf_u64, BLKIO_POLICY_THROTL, + cft->private, false); + return 0; +} + +static void throtl_update_blkio_group_read_bps(struct blkio_group *blkg, + u64 read_bps) { struct throtl_grp *tg = blkg_to_tg(blkg); tg->bps[READ] = read_bps; - throtl_update_blkio_group_common(q->td, tg); + throtl_update_blkio_group_common(blkg->q->td, tg); } -static void throtl_update_blkio_group_write_bps(struct request_queue *q, - struct blkio_group *blkg, u64 write_bps) +static void throtl_update_blkio_group_write_bps(struct blkio_group *blkg, + u64 write_bps) { struct throtl_grp *tg = blkg_to_tg(blkg); tg->bps[WRITE] = write_bps; - throtl_update_blkio_group_common(q->td, tg); + throtl_update_blkio_group_common(blkg->q->td, tg); } -static void throtl_update_blkio_group_read_iops(struct request_queue *q, - struct blkio_group *blkg, unsigned int read_iops) +static void throtl_update_blkio_group_read_iops(struct blkio_group *blkg, + u64 read_iops) { struct throtl_grp *tg = blkg_to_tg(blkg); tg->iops[READ] = read_iops; - throtl_update_blkio_group_common(q->td, tg); + throtl_update_blkio_group_common(blkg->q->td, tg); } -static void throtl_update_blkio_group_write_iops(struct request_queue *q, - struct blkio_group *blkg, unsigned int write_iops) +static void throtl_update_blkio_group_write_iops(struct blkio_group *blkg, + u64 write_iops) { struct throtl_grp *tg = blkg_to_tg(blkg); tg->iops[WRITE] = write_iops; - throtl_update_blkio_group_common(q->td, tg); + throtl_update_blkio_group_common(blkg->q->td, tg); +} + +static int blkcg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft, + const char *buf, + void (*update)(struct blkio_group *, u64)) +{ + struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); + struct blkg_policy_data *pd; + struct blkg_conf_ctx ctx; + int ret; + + ret = blkg_conf_prep(blkcg, buf, &ctx); + if (ret) + return ret; + + ret = -EINVAL; + pd = ctx.blkg->pd[BLKIO_POLICY_THROTL]; + if (pd) { + *(u64 *)((void *)&pd->conf + cft->private) = ctx.v; + update(ctx.blkg, ctx.v ?: -1); + ret = 0; + } + + blkg_conf_finish(&ctx); + return ret; } +static int blkcg_set_conf_bps_r(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + return blkcg_set_conf_u64(cgrp, cft, buf, + throtl_update_blkio_group_read_bps); +} + +static int blkcg_set_conf_bps_w(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + return blkcg_set_conf_u64(cgrp, cft, buf, + throtl_update_blkio_group_write_bps); +} + +static int blkcg_set_conf_iops_r(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + return blkcg_set_conf_u64(cgrp, cft, buf, + throtl_update_blkio_group_read_iops); +} + +static int blkcg_set_conf_iops_w(struct cgroup *cgrp, struct cftype *cft, + const char *buf) +{ + return blkcg_set_conf_u64(cgrp, cft, buf, + throtl_update_blkio_group_write_iops); +} + +static struct cftype throtl_files[] = { + { + .name = "throttle.read_bps_device", + .private = offsetof(struct blkio_group_conf, bps[READ]), + .read_seq_string = blkcg_print_conf_u64, + .write_string = blkcg_set_conf_bps_r, + .max_write_len = 256, + }, + { + .name = "throttle.write_bps_device", + .private = offsetof(struct blkio_group_conf, bps[WRITE]), + .read_seq_string = blkcg_print_conf_u64, + .write_string = blkcg_set_conf_bps_w, + .max_write_len = 256, + }, + { + .name = "throttle.read_iops_device", + .private = offsetof(struct blkio_group_conf, iops[READ]), + .read_seq_string = blkcg_print_conf_u64, + .write_string = blkcg_set_conf_iops_r, + .max_write_len = 256, + }, + { + .name = "throttle.write_iops_device", + .private = offsetof(struct blkio_group_conf, iops[WRITE]), + .read_seq_string = blkcg_print_conf_u64, + .write_string = blkcg_set_conf_iops_w, + .max_write_len = 256, + }, + { + .name = "throttle.io_service_bytes", + .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL, + offsetof(struct blkio_group_stats_cpu, service_bytes)), + .read_seq_string = blkcg_print_cpu_rwstat, + }, + { + .name = "throttle.io_serviced", + .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL, + offsetof(struct blkio_group_stats_cpu, serviced)), + .read_seq_string = blkcg_print_cpu_rwstat, + }, + { } /* terminate */ +}; + static void throtl_shutdown_wq(struct request_queue *q) { struct throtl_data *td = q->td; @@ -868,17 +980,10 @@ static void throtl_shutdown_wq(struct request_queue *q) static struct blkio_policy_type blkio_policy_throtl = { .ops = { .blkio_init_group_fn = throtl_init_blkio_group, - .blkio_update_group_read_bps_fn = - throtl_update_blkio_group_read_bps, - .blkio_update_group_write_bps_fn = - throtl_update_blkio_group_write_bps, - .blkio_update_group_read_iops_fn = - throtl_update_blkio_group_read_iops, - .blkio_update_group_write_iops_fn = - throtl_update_blkio_group_write_iops, }, .plid = BLKIO_POLICY_THROTL, .pdata_size = sizeof(struct throtl_grp), + .cftypes = throtl_files, }; bool blk_throtl_bio(struct request_queue *q, struct bio *bio) -- cgit v1.2.3 From 629ed0b10209ffc4e1d439e5508d52d5e3a090b8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Apr 2012 14:38:44 -0700 Subject: blkcg: move statistics update code to policies As with conf/stats file handling code, there's no reason for stat update code to live in blkcg core with policies calling into update them. The current organization is both inflexible and complex. This patch moves stat update code to specific policies. All blkiocg_update_*_stats() functions which deal with BLKIO_POLICY_PROP stats are collapsed into their cfq_blkiocg_update_*_stats() counterparts. blkiocg_update_dispatch_stats() is used by both policies and duplicated as throtl_update_dispatch_stats() and cfq_blkiocg_update_dispatch_stats(). This will be cleaned up later. Signed-off-by: Tejun Heo --- block/blk-throttle.c | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index fb6f25778fb..5d647edc02a 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -562,17 +562,42 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, return 0; } +static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes, + int rw) +{ + struct blkg_policy_data *pd = blkg->pd[BLKIO_POLICY_THROTL]; + struct blkio_group_stats_cpu *stats_cpu; + unsigned long flags; + + /* If per cpu stats are not allocated yet, don't do any accounting. */ + if (pd->stats_cpu == NULL) + return; + + /* + * Disabling interrupts to provide mutual exclusion between two + * writes on same cpu. It probably is not needed for 64bit. Not + * optimizing that case yet. + */ + local_irq_save(flags); + + stats_cpu = this_cpu_ptr(pd->stats_cpu); + + blkg_stat_add(&stats_cpu->sectors, bytes >> 9); + blkg_rwstat_add(&stats_cpu->serviced, rw, 1); + blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes); + + local_irq_restore(flags); +} + static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); - bool sync = rw_is_sync(bio->bi_rw); /* Charge the bio to the group */ tg->bytes_disp[rw] += bio->bi_size; tg->io_disp[rw]++; - blkiocg_update_dispatch_stats(tg_to_blkg(tg), &blkio_policy_throtl, - bio->bi_size, rw, sync); + throtl_update_dispatch_stats(tg_to_blkg(tg), bio->bi_size, bio->bi_rw); } static void throtl_add_bio_tg(struct throtl_data *td, struct throtl_grp *tg, @@ -1012,10 +1037,8 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) tg = throtl_lookup_tg(td, blkcg); if (tg) { if (tg_no_rule_group(tg, rw)) { - blkiocg_update_dispatch_stats(tg_to_blkg(tg), - &blkio_policy_throtl, - bio->bi_size, rw, - rw_is_sync(bio->bi_rw)); + throtl_update_dispatch_stats(tg_to_blkg(tg), + bio->bi_size, bio->bi_rw); goto out_unlock_rcu; } } -- cgit v1.2.3 From 41b38b6d540f951c49315d8573e6f6195a6e736d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Apr 2012 14:38:44 -0700 Subject: blkcg: cfq doesn't need per-cpu dispatch stats blkio_group_stats_cpu is used to count dispatch stats using per-cpu counters. This is used by both blk-throtl and cfq-iosched but the sharing is rather silly. * cfq-iosched doesn't need per-cpu dispatch stats. cfq always updates those stats while holding queue_lock. * blk-throtl needs per-cpu dispatch stats but only service_bytes and serviced. It doesn't make use of sectors. This patch makes cfq add and use global stats for service_bytes, serviced and sectors, removes per-cpu sectors counter and moves per-cpu stat printing code to blk-throttle.c. Signed-off-by: Tejun Heo --- block/blk-throttle.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 5d647edc02a..cb259bc46f4 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -582,7 +582,6 @@ static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes, stats_cpu = this_cpu_ptr(pd->stats_cpu); - blkg_stat_add(&stats_cpu->sectors, bytes >> 9); blkg_rwstat_add(&stats_cpu->serviced, rw, 1); blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes); @@ -843,6 +842,36 @@ static void throtl_update_blkio_group_common(struct throtl_data *td, throtl_schedule_delayed_work(td, 0); } +static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf, + struct blkg_policy_data *pd, int off) +{ + struct blkg_rwstat rwstat = { }, tmp; + int i, cpu; + + for_each_possible_cpu(cpu) { + struct blkio_group_stats_cpu *sc = + per_cpu_ptr(pd->stats_cpu, cpu); + + tmp = blkg_rwstat_read((void *)sc + off); + for (i = 0; i < BLKG_RWSTAT_NR; i++) + rwstat.cnt[i] += tmp.cnt[i]; + } + + return __blkg_prfill_rwstat(sf, pd, &rwstat); +} + +/* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */ +static int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) +{ + struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); + + blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_rwstat, + BLKCG_STAT_POL(cft->private), + BLKCG_STAT_OFF(cft->private), true); + return 0; +} + static u64 blkg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd, int off) { -- cgit v1.2.3 From 8a3d26151f24e2a2ffa550890144c3d54d2edb15 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Apr 2012 14:38:44 -0700 Subject: blkcg: move blkio_group_stats_cpu and friends to blk-throttle.c blkio_group_stats_cpu is used only by blk-throtl and has no reason to be defined in blkcg core. * Move blkio_group_stats_cpu to blk-throttle.c and rename it to tg_stats_cpu. * blkg_policy_data->stats_cpu is replaced with throtl_grp->stats_cpu. prfill functions updated accordingly. * All related macros / functions are renamed so that they have tg_ prefix and the unnecessary @pol arguments are dropped. * Per-cpu stats allocation code is also moved from blk-cgroup.c to blk-throttle.c and gets simplified to only deal with BLKIO_POLICY_THROTL. percpu stat free is performed by the exit method throtl_exit_blkio_group(). * throtl_reset_group_stats() implemented for blkio_reset_group_stats_fn method so that tg->stats_cpu can be reset. Signed-off-by: Tejun Heo --- block/blk-throttle.c | 128 +++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 113 insertions(+), 15 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index cb259bc46f4..27f7960dd42 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -40,6 +40,14 @@ struct throtl_rb_root { #define rb_entry_tg(node) rb_entry((node), struct throtl_grp, rb_node) +/* Per-cpu group stats */ +struct tg_stats_cpu { + /* total bytes transferred */ + struct blkg_rwstat service_bytes; + /* total IOs serviced, post merge */ + struct blkg_rwstat serviced; +}; + struct throtl_grp { /* active throtl group service_tree member */ struct rb_node rb_node; @@ -76,6 +84,12 @@ struct throtl_grp { /* Some throttle limits got updated for the group */ int limits_changed; + + /* Per cpu stats pointer */ + struct tg_stats_cpu __percpu *stats_cpu; + + /* List of tgs waiting for per cpu stats memory to be allocated */ + struct list_head stats_alloc_node; }; struct throtl_data @@ -100,6 +114,13 @@ struct throtl_data int limits_changed; }; +/* list and work item to allocate percpu group stats */ +static DEFINE_SPINLOCK(tg_stats_alloc_lock); +static LIST_HEAD(tg_stats_alloc_list); + +static void tg_stats_alloc_fn(struct work_struct *); +static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn); + static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg) { return blkg_to_pdata(blkg, &blkio_policy_throtl); @@ -142,6 +163,44 @@ static inline unsigned int total_nr_queued(struct throtl_data *td) return td->nr_queued[0] + td->nr_queued[1]; } +/* + * Worker for allocating per cpu stat for tgs. This is scheduled on the + * system_nrt_wq once there are some groups on the alloc_list waiting for + * allocation. + */ +static void tg_stats_alloc_fn(struct work_struct *work) +{ + static struct tg_stats_cpu *stats_cpu; /* this fn is non-reentrant */ + struct delayed_work *dwork = to_delayed_work(work); + bool empty = false; + +alloc_stats: + if (!stats_cpu) { + stats_cpu = alloc_percpu(struct tg_stats_cpu); + if (!stats_cpu) { + /* allocation failed, try again after some time */ + queue_delayed_work(system_nrt_wq, dwork, + msecs_to_jiffies(10)); + return; + } + } + + spin_lock_irq(&tg_stats_alloc_lock); + + if (!list_empty(&tg_stats_alloc_list)) { + struct throtl_grp *tg = list_first_entry(&tg_stats_alloc_list, + struct throtl_grp, + stats_alloc_node); + swap(tg->stats_cpu, stats_cpu); + list_del_init(&tg->stats_alloc_node); + } + + empty = list_empty(&tg_stats_alloc_list); + spin_unlock_irq(&tg_stats_alloc_lock); + if (!empty) + goto alloc_stats; +} + static void throtl_init_blkio_group(struct blkio_group *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); @@ -155,6 +214,43 @@ static void throtl_init_blkio_group(struct blkio_group *blkg) tg->bps[WRITE] = -1; tg->iops[READ] = -1; tg->iops[WRITE] = -1; + + /* + * Ugh... We need to perform per-cpu allocation for tg->stats_cpu + * but percpu allocator can't be called from IO path. Queue tg on + * tg_stats_alloc_list and allocate from work item. + */ + spin_lock(&tg_stats_alloc_lock); + list_add(&tg->stats_alloc_node, &tg_stats_alloc_list); + queue_delayed_work(system_nrt_wq, &tg_stats_alloc_work, 0); + spin_unlock(&tg_stats_alloc_lock); +} + +static void throtl_exit_blkio_group(struct blkio_group *blkg) +{ + struct throtl_grp *tg = blkg_to_tg(blkg); + + spin_lock(&tg_stats_alloc_lock); + list_del_init(&tg->stats_alloc_node); + spin_unlock(&tg_stats_alloc_lock); + + free_percpu(tg->stats_cpu); +} + +static void throtl_reset_group_stats(struct blkio_group *blkg) +{ + struct throtl_grp *tg = blkg_to_tg(blkg); + int cpu; + + if (tg->stats_cpu == NULL) + return; + + for_each_possible_cpu(cpu) { + struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu); + + blkg_rwstat_reset(&sc->service_bytes); + blkg_rwstat_reset(&sc->serviced); + } } static struct @@ -565,12 +661,12 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes, int rw) { - struct blkg_policy_data *pd = blkg->pd[BLKIO_POLICY_THROTL]; - struct blkio_group_stats_cpu *stats_cpu; + struct throtl_grp *tg = blkg_to_tg(blkg); + struct tg_stats_cpu *stats_cpu; unsigned long flags; /* If per cpu stats are not allocated yet, don't do any accounting. */ - if (pd->stats_cpu == NULL) + if (tg->stats_cpu == NULL) return; /* @@ -580,7 +676,7 @@ static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes, */ local_irq_save(flags); - stats_cpu = this_cpu_ptr(pd->stats_cpu); + stats_cpu = this_cpu_ptr(tg->stats_cpu); blkg_rwstat_add(&stats_cpu->serviced, rw, 1); blkg_rwstat_add(&stats_cpu->service_bytes, rw, bytes); @@ -842,15 +938,15 @@ static void throtl_update_blkio_group_common(struct throtl_data *td, throtl_schedule_delayed_work(td, 0); } -static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf, - struct blkg_policy_data *pd, int off) +static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, + struct blkg_policy_data *pd, int off) { + struct throtl_grp *tg = (void *)pd->pdata; struct blkg_rwstat rwstat = { }, tmp; int i, cpu; for_each_possible_cpu(cpu) { - struct blkio_group_stats_cpu *sc = - per_cpu_ptr(pd->stats_cpu, cpu); + struct tg_stats_cpu *sc = per_cpu_ptr(tg->stats_cpu, cpu); tmp = blkg_rwstat_read((void *)sc + off); for (i = 0; i < BLKG_RWSTAT_NR; i++) @@ -861,12 +957,12 @@ static u64 blkg_prfill_cpu_rwstat(struct seq_file *sf, } /* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */ -static int blkcg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, - struct seq_file *sf) +static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) { struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); - blkcg_print_blkgs(sf, blkcg, blkg_prfill_cpu_rwstat, + blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, BLKCG_STAT_POL(cft->private), BLKCG_STAT_OFF(cft->private), true); return 0; @@ -1012,14 +1108,14 @@ static struct cftype throtl_files[] = { { .name = "throttle.io_service_bytes", .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL, - offsetof(struct blkio_group_stats_cpu, service_bytes)), - .read_seq_string = blkcg_print_cpu_rwstat, + offsetof(struct tg_stats_cpu, service_bytes)), + .read_seq_string = tg_print_cpu_rwstat, }, { .name = "throttle.io_serviced", .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL, - offsetof(struct blkio_group_stats_cpu, serviced)), - .read_seq_string = blkcg_print_cpu_rwstat, + offsetof(struct tg_stats_cpu, serviced)), + .read_seq_string = tg_print_cpu_rwstat, }, { } /* terminate */ }; @@ -1034,6 +1130,8 @@ static void throtl_shutdown_wq(struct request_queue *q) static struct blkio_policy_type blkio_policy_throtl = { .ops = { .blkio_init_group_fn = throtl_init_blkio_group, + .blkio_exit_group_fn = throtl_exit_blkio_group, + .blkio_reset_group_stats_fn = throtl_reset_group_stats, }, .plid = BLKIO_POLICY_THROTL, .pdata_size = sizeof(struct throtl_grp), -- cgit v1.2.3 From af133ceb261033eb43c03d161a991c3b772e8c56 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Apr 2012 14:38:44 -0700 Subject: blkcg: move blkio_group_conf->iops and ->bps to blk-throttle blkio_cgroup_conf->iops and ->bps are owned by blk-throttle and has no reason to be defined in blkcg core. Drop them and let conf setting functions directly manipulate throtl_grp->bps[] and ->iops[]. This makes blkio_group_conf empty. Drop it. Signed-off-by: Tejun Heo --- block/blk-throttle.c | 153 +++++++++++++++++++-------------------------------- 1 file changed, 58 insertions(+), 95 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 27f7960dd42..004964bb6fd 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -924,20 +924,6 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) } } -/* - * Can not take queue lock in update functions as queue lock under - * blkcg_lock is not allowed. Under other paths we take blkcg_lock under - * queue_lock. - */ -static void throtl_update_blkio_group_common(struct throtl_data *td, - struct throtl_grp *tg) -{ - xchg(&tg->limits_changed, true); - xchg(&td->limits_changed, true); - /* Schedule a work now to process the limit change */ - throtl_schedule_delayed_work(td, 0); -} - static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off) { @@ -968,68 +954,48 @@ static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, return 0; } -static u64 blkg_prfill_conf_u64(struct seq_file *sf, - struct blkg_policy_data *pd, int off) +static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd, + int off) { - u64 v = *(u64 *)((void *)&pd->conf + off); + u64 v = *(u64 *)((void *)pd->pdata + off); - if (!v) + if (v == -1) return 0; return __blkg_prfill_u64(sf, pd, v); } -static int blkcg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft, - struct seq_file *sf) +static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd, + int off) { - blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), - blkg_prfill_conf_u64, BLKIO_POLICY_THROTL, - cft->private, false); - return 0; -} + unsigned int v = *(unsigned int *)((void *)pd->pdata + off); -static void throtl_update_blkio_group_read_bps(struct blkio_group *blkg, - u64 read_bps) -{ - struct throtl_grp *tg = blkg_to_tg(blkg); - - tg->bps[READ] = read_bps; - throtl_update_blkio_group_common(blkg->q->td, tg); -} - -static void throtl_update_blkio_group_write_bps(struct blkio_group *blkg, - u64 write_bps) -{ - struct throtl_grp *tg = blkg_to_tg(blkg); - - tg->bps[WRITE] = write_bps; - throtl_update_blkio_group_common(blkg->q->td, tg); + if (v == -1) + return 0; + return __blkg_prfill_u64(sf, pd, v); } -static void throtl_update_blkio_group_read_iops(struct blkio_group *blkg, - u64 read_iops) +static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) { - struct throtl_grp *tg = blkg_to_tg(blkg); - - tg->iops[READ] = read_iops; - throtl_update_blkio_group_common(blkg->q->td, tg); + blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_u64, + BLKIO_POLICY_THROTL, cft->private, false); + return 0; } -static void throtl_update_blkio_group_write_iops(struct blkio_group *blkg, - u64 write_iops) +static int tg_print_conf_uint(struct cgroup *cgrp, struct cftype *cft, + struct seq_file *sf) { - struct throtl_grp *tg = blkg_to_tg(blkg); - - tg->iops[WRITE] = write_iops; - throtl_update_blkio_group_common(blkg->q->td, tg); + blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_uint, + BLKIO_POLICY_THROTL, cft->private, false); + return 0; } -static int blkcg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft, - const char *buf, - void (*update)(struct blkio_group *, u64)) +static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, + bool is_u64) { struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); - struct blkg_policy_data *pd; struct blkg_conf_ctx ctx; + struct throtl_grp *tg; int ret; ret = blkg_conf_prep(blkcg, buf, &ctx); @@ -1037,10 +1003,23 @@ static int blkcg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft, return ret; ret = -EINVAL; - pd = ctx.blkg->pd[BLKIO_POLICY_THROTL]; - if (pd) { - *(u64 *)((void *)&pd->conf + cft->private) = ctx.v; - update(ctx.blkg, ctx.v ?: -1); + tg = blkg_to_tg(ctx.blkg); + if (tg) { + struct throtl_data *td = ctx.blkg->q->td; + + if (!ctx.v) + ctx.v = -1; + + if (is_u64) + *(u64 *)((void *)tg + cft->private) = ctx.v; + else + *(unsigned int *)((void *)tg + cft->private) = ctx.v; + + /* XXX: we don't need the following deferred processing */ + xchg(&tg->limits_changed, true); + xchg(&td->limits_changed, true); + throtl_schedule_delayed_work(td, 0); + ret = 0; } @@ -1048,61 +1027,45 @@ static int blkcg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft, return ret; } -static int blkcg_set_conf_bps_r(struct cgroup *cgrp, struct cftype *cft, - const char *buf) -{ - return blkcg_set_conf_u64(cgrp, cft, buf, - throtl_update_blkio_group_read_bps); -} - -static int blkcg_set_conf_bps_w(struct cgroup *cgrp, struct cftype *cft, - const char *buf) -{ - return blkcg_set_conf_u64(cgrp, cft, buf, - throtl_update_blkio_group_write_bps); -} - -static int blkcg_set_conf_iops_r(struct cgroup *cgrp, struct cftype *cft, - const char *buf) +static int tg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft, + const char *buf) { - return blkcg_set_conf_u64(cgrp, cft, buf, - throtl_update_blkio_group_read_iops); + return tg_set_conf(cgrp, cft, buf, true); } -static int blkcg_set_conf_iops_w(struct cgroup *cgrp, struct cftype *cft, - const char *buf) +static int tg_set_conf_uint(struct cgroup *cgrp, struct cftype *cft, + const char *buf) { - return blkcg_set_conf_u64(cgrp, cft, buf, - throtl_update_blkio_group_write_iops); + return tg_set_conf(cgrp, cft, buf, false); } static struct cftype throtl_files[] = { { .name = "throttle.read_bps_device", - .private = offsetof(struct blkio_group_conf, bps[READ]), - .read_seq_string = blkcg_print_conf_u64, - .write_string = blkcg_set_conf_bps_r, + .private = offsetof(struct throtl_grp, bps[READ]), + .read_seq_string = tg_print_conf_u64, + .write_string = tg_set_conf_u64, .max_write_len = 256, }, { .name = "throttle.write_bps_device", - .private = offsetof(struct blkio_group_conf, bps[WRITE]), - .read_seq_string = blkcg_print_conf_u64, - .write_string = blkcg_set_conf_bps_w, + .private = offsetof(struct throtl_grp, bps[WRITE]), + .read_seq_string = tg_print_conf_u64, + .write_string = tg_set_conf_u64, .max_write_len = 256, }, { .name = "throttle.read_iops_device", - .private = offsetof(struct blkio_group_conf, iops[READ]), - .read_seq_string = blkcg_print_conf_u64, - .write_string = blkcg_set_conf_iops_r, + .private = offsetof(struct throtl_grp, iops[READ]), + .read_seq_string = tg_print_conf_uint, + .write_string = tg_set_conf_uint, .max_write_len = 256, }, { .name = "throttle.write_iops_device", - .private = offsetof(struct blkio_group_conf, iops[WRITE]), - .read_seq_string = blkcg_print_conf_u64, - .write_string = blkcg_set_conf_iops_w, + .private = offsetof(struct throtl_grp, iops[WRITE]), + .read_seq_string = tg_print_conf_uint, + .write_string = tg_set_conf_uint, .max_write_len = 256, }, { -- cgit v1.2.3 From d366e7ec41882791c970dfb7c67b737be8c3a174 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Apr 2012 14:38:44 -0700 Subject: blkcg: pass around pd->pdata instead of pd itself in prfill functions Now that all conf and stat fields are moved into policy specific blkio_policy_data->pdata areas, there's no reason to use blkio_policy_data itself in prfill functions. Pass around @pd->pdata instead of @pd. Signed-off-by: Tejun Heo --- block/blk-throttle.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 004964bb6fd..bd6dbfe1e4e 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -924,10 +924,9 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) } } -static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, - struct blkg_policy_data *pd, int off) +static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off) { - struct throtl_grp *tg = (void *)pd->pdata; + struct throtl_grp *tg = pdata; struct blkg_rwstat rwstat = { }, tmp; int i, cpu; @@ -939,7 +938,7 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, rwstat.cnt[i] += tmp.cnt[i]; } - return __blkg_prfill_rwstat(sf, pd, &rwstat); + return __blkg_prfill_rwstat(sf, pdata, &rwstat); } /* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */ @@ -954,24 +953,22 @@ static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, return 0; } -static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd, - int off) +static u64 tg_prfill_conf_u64(struct seq_file *sf, void *pdata, int off) { - u64 v = *(u64 *)((void *)pd->pdata + off); + u64 v = *(u64 *)(pdata + off); if (v == -1) return 0; - return __blkg_prfill_u64(sf, pd, v); + return __blkg_prfill_u64(sf, pdata, v); } -static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd, - int off) +static u64 tg_prfill_conf_uint(struct seq_file *sf, void *pdata, int off) { - unsigned int v = *(unsigned int *)((void *)pd->pdata + off); + unsigned int v = *(unsigned int *)(pdata + off); if (v == -1) return 0; - return __blkg_prfill_u64(sf, pd, v); + return __blkg_prfill_u64(sf, pdata, v); } static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft, -- cgit v1.2.3 From 5bc4afb1ec6aa562fac4d9aba34d957ee42f5813 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Sun, 1 Apr 2012 14:38:45 -0700 Subject: blkcg: drop BLKCG_STAT_{PRIV|POL|OFF} macros Now that all stat handling code lives in policy implementations, there's no need to encode policy ID in cft->private. * Export blkcg_prfill_[rw]stat() from blkcg, remove blkcg_print_[rw]stat(), and implement cfqg_print_[rw]stat() which use hard-code BLKIO_POLICY_PROP. * Use cft->private for offset of the target field directly and drop BLKCG_STAT_{PRIV|POL|OFF}(). Signed-off-by: Tejun Heo --- block/blk-throttle.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index bd6dbfe1e4e..60240142f5a 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -941,15 +941,13 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off) return __blkg_prfill_rwstat(sf, pdata, &rwstat); } -/* print per-cpu blkg_rwstat specified by BLKCG_STAT_PRIV() */ static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); - blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, - BLKCG_STAT_POL(cft->private), - BLKCG_STAT_OFF(cft->private), true); + blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, BLKIO_POLICY_THROTL, + cft->private, true); return 0; } @@ -1067,14 +1065,12 @@ static struct cftype throtl_files[] = { }, { .name = "throttle.io_service_bytes", - .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL, - offsetof(struct tg_stats_cpu, service_bytes)), + .private = offsetof(struct tg_stats_cpu, service_bytes), .read_seq_string = tg_print_cpu_rwstat, }, { .name = "throttle.io_serviced", - .private = BLKCG_STAT_PRIV(BLKIO_POLICY_THROTL, - offsetof(struct tg_stats_cpu, serviced)), + .private = offsetof(struct tg_stats_cpu, serviced), .read_seq_string = tg_print_cpu_rwstat, }, { } /* terminate */ -- cgit v1.2.3 From ec399347d39fb2337ebace928cf4a2855bd0ec37 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Apr 2012 13:11:27 -0700 Subject: blkcg: use @pol instead of @plid in update_root_blkg_pd() and blkcg_print_blkgs() The two functions were taking "enum blkio_policy_id plid". Make them take "const struct blkio_policy_type *pol" instead. This is to prepare for per-queue policy activation and doesn't cause any functional difference. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 60240142f5a..07c17c27a62 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -946,7 +946,7 @@ static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, { struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); - blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, BLKIO_POLICY_THROTL, + blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkio_policy_throtl, cft->private, true); return 0; } @@ -973,7 +973,7 @@ static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_u64, - BLKIO_POLICY_THROTL, cft->private, false); + &blkio_policy_throtl, cft->private, false); return 0; } @@ -981,7 +981,7 @@ static int tg_print_conf_uint(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_uint, - BLKIO_POLICY_THROTL, cft->private, false); + &blkio_policy_throtl, cft->private, false); return 0; } -- cgit v1.2.3 From 8bd435b30ecacb69bbb8b2d3e251f770b807c5b2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Apr 2012 13:11:28 -0700 Subject: blkcg: remove static policy ID enums Remove BLKIO_POLICY_* enums and let blkio_policy_register() allocate @pol->plid dynamically on registration. The maximum number of blkcg policies which can be registered at the same time is defined by BLKCG_MAX_POLS constant added to include/linux/blkdev.h. Note that blkio_policy_register() now may fail. Policy init functions updated accordingly and unnecessary ifdefs removed from cfq_init(). Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 07c17c27a62..0dc4645aa7f 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1089,7 +1089,6 @@ static struct blkio_policy_type blkio_policy_throtl = { .blkio_exit_group_fn = throtl_exit_blkio_group, .blkio_reset_group_stats_fn = throtl_reset_group_stats, }, - .plid = BLKIO_POLICY_THROTL, .pdata_size = sizeof(struct throtl_grp), .cftypes = throtl_files, }; @@ -1271,8 +1270,7 @@ static int __init throtl_init(void) if (!kthrotld_workqueue) panic("Failed to create kthrotld\n"); - blkio_policy_register(&blkio_policy_throtl); - return 0; + return blkio_policy_register(&blkio_policy_throtl); } module_init(throtl_init); -- cgit v1.2.3 From da8b066262e12d1d0a3b1e6d3486e500169bf730 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Apr 2012 13:11:29 -0700 Subject: blkcg: make blkg_conf_prep() take @pol and return with queue lock held Add @pol to blkg_conf_prep() and let it return with queue lock held (to be released by blkg_conf_finish()). Note that @pol isn't used yet. This is to prepare for per-queue policy activation and doesn't cause any visible difference. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 0dc4645aa7f..6f1bfdf9a1b 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -993,7 +993,7 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, struct throtl_grp *tg; int ret; - ret = blkg_conf_prep(blkcg, buf, &ctx); + ret = blkg_conf_prep(blkcg, &blkio_policy_throtl, buf, &ctx); if (ret) return ret; -- cgit v1.2.3 From 03d8e11142a893ad322285d3c8a08e88b570cda1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Apr 2012 13:11:32 -0700 Subject: blkcg: add request_queue->root_blkg With per-queue policy activation, root blkg creation will be moved to blkcg core. Add q->root_blkg in preparation. For blk-throtl, this replaces throtl_data->root_tg; however, cfq needs to keep cfqd->root_group for !CONFIG_CFQ_GROUP_IOSCHED. This is to prepare for per-queue policy activation and doesn't cause any functional difference. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 6f1bfdf9a1b..8c520fad688 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -97,7 +97,6 @@ struct throtl_data /* service tree for active throtl groups */ struct throtl_rb_root tg_service_tree; - struct throtl_grp *root_tg; struct request_queue *queue; /* Total Number of queued bios on READ and WRITE lists */ @@ -131,6 +130,11 @@ static inline struct blkio_group *tg_to_blkg(struct throtl_grp *tg) return pdata_to_blkg(tg); } +static inline struct throtl_grp *td_root_tg(struct throtl_data *td) +{ + return blkg_to_tg(td->queue->root_blkg); +} + enum tg_state_flags { THROTL_TG_FLAG_on_rr = 0, /* on round-robin busy list */ }; @@ -261,7 +265,7 @@ throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) * Avoid lookup in this case */ if (blkcg == &blkio_root_cgroup) - return td->root_tg; + return td_root_tg(td); return blkg_to_tg(blkg_lookup(blkcg, td->queue)); } @@ -277,7 +281,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, * Avoid lookup in this case */ if (blkcg == &blkio_root_cgroup) { - tg = td->root_tg; + tg = td_root_tg(td); } else { struct blkio_group *blkg; @@ -287,7 +291,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, if (!IS_ERR(blkg)) tg = blkg_to_tg(blkg); else if (!blk_queue_dead(q)) - tg = td->root_tg; + tg = td_root_tg(td); } return tg; @@ -1245,12 +1249,12 @@ int blk_throtl_init(struct request_queue *q) blkg = blkg_lookup_create(&blkio_root_cgroup, q, true); if (!IS_ERR(blkg)) - td->root_tg = blkg_to_tg(blkg); + q->root_blkg = blkg; spin_unlock_irq(q->queue_lock); rcu_read_unlock(); - if (!td->root_tg) { + if (!q->root_blkg) { kfree(td); return -ENOMEM; } -- cgit v1.2.3 From a2b1693bac45ea3fe3ba612fd22c45f17449f610 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Apr 2012 13:11:33 -0700 Subject: blkcg: implement per-queue policy activation All blkcg policies were assumed to be enabled on all request_queues. Due to various implementation obstacles, during the recent blkcg core updates, this was temporarily implemented as shooting down all !root blkgs on elevator switch and policy [de]registration combined with half-broken in-place root blkg updates. In addition to being buggy and racy, this meant losing all blkcg configurations across those events. Now that blkcg is cleaned up enough, this patch replaces the temporary implementation with proper per-queue policy activation. Each blkcg policy should call the new blkcg_[de]activate_policy() to enable and disable the policy on a specific queue. blkcg_activate_policy() allocates and installs policy data for the policy for all existing blkgs. blkcg_deactivate_policy() does the reverse. If a policy is not enabled for a given queue, blkg printing / config functions skip the respective blkg for the queue. blkcg_activate_policy() also takes care of root blkg creation, and cfq_init_queue() and blk_throtl_init() are updated accordingly. This replaces blkcg_bypass_{start|end}() and update_root_blkg_pd() unnecessary. Dropped. v2: cfq_init_queue() was returning uninitialized @ret on root_group alloc failure if !CONFIG_CFQ_GROUP_IOSCHED. Fixed. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 52 +++++++++++++++++++--------------------------------- 1 file changed, 19 insertions(+), 33 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 8c520fad688..2fc964e06ea 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -995,35 +995,31 @@ static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); struct blkg_conf_ctx ctx; struct throtl_grp *tg; + struct throtl_data *td; int ret; ret = blkg_conf_prep(blkcg, &blkio_policy_throtl, buf, &ctx); if (ret) return ret; - ret = -EINVAL; tg = blkg_to_tg(ctx.blkg); - if (tg) { - struct throtl_data *td = ctx.blkg->q->td; - - if (!ctx.v) - ctx.v = -1; + td = ctx.blkg->q->td; - if (is_u64) - *(u64 *)((void *)tg + cft->private) = ctx.v; - else - *(unsigned int *)((void *)tg + cft->private) = ctx.v; + if (!ctx.v) + ctx.v = -1; - /* XXX: we don't need the following deferred processing */ - xchg(&tg->limits_changed, true); - xchg(&td->limits_changed, true); - throtl_schedule_delayed_work(td, 0); + if (is_u64) + *(u64 *)((void *)tg + cft->private) = ctx.v; + else + *(unsigned int *)((void *)tg + cft->private) = ctx.v; - ret = 0; - } + /* XXX: we don't need the following deferred processing */ + xchg(&tg->limits_changed, true); + xchg(&td->limits_changed, true); + throtl_schedule_delayed_work(td, 0); blkg_conf_finish(&ctx); - return ret; + return 0; } static int tg_set_conf_u64(struct cgroup *cgrp, struct cftype *cft, @@ -1230,7 +1226,7 @@ void blk_throtl_drain(struct request_queue *q) int blk_throtl_init(struct request_queue *q) { struct throtl_data *td; - struct blkio_group *blkg; + int ret; td = kzalloc_node(sizeof(*td), GFP_KERNEL, q->node); if (!td) @@ -1243,28 +1239,18 @@ int blk_throtl_init(struct request_queue *q) q->td = td; td->queue = q; - /* alloc and init root group. */ - rcu_read_lock(); - spin_lock_irq(q->queue_lock); - - blkg = blkg_lookup_create(&blkio_root_cgroup, q, true); - if (!IS_ERR(blkg)) - q->root_blkg = blkg; - - spin_unlock_irq(q->queue_lock); - rcu_read_unlock(); - - if (!q->root_blkg) { + /* activate policy */ + ret = blkcg_activate_policy(q, &blkio_policy_throtl); + if (ret) kfree(td); - return -ENOMEM; - } - return 0; + return ret; } void blk_throtl_exit(struct request_queue *q) { BUG_ON(!q->td); throtl_shutdown_wq(q); + blkcg_deactivate_policy(q, &blkio_policy_throtl); kfree(q->td); } -- cgit v1.2.3 From 3c96cb32d318f323c1bf972a4c66821f8499e34d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 13 Apr 2012 13:11:34 -0700 Subject: blkcg: drop stuff unused after per-queue policy activation update * All_q_list is unused. Drop all_q_{mutex|list}. * @for_root of blkg_lookup_create() is always %false when called from outside blk-cgroup.c proper. Factor out __blkg_lookup_create() so that it doesn't check whether @q is bypassing and use the underscored version for the @for_root callsite. * blkg_destroy_all() is used only from blkcg proper and @destroy_root is always %true. Make it static and drop @destroy_root. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 2fc964e06ea..e2aaf27e1f1 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -285,7 +285,7 @@ static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, } else { struct blkio_group *blkg; - blkg = blkg_lookup_create(blkcg, q, false); + blkg = blkg_lookup_create(blkcg, q); /* if %NULL and @q is alive, fall back to root_tg */ if (!IS_ERR(blkg)) -- cgit v1.2.3 From 54e7ed12bad1e3aa2a28558fab6850240465f973 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Apr 2012 13:57:23 -0700 Subject: blkcg: remove blkio_group->path[] blkio_group->path[] stores the path of the associated cgroup and is used only for debug messages. Just format the path from blkg->cgroup when printing debug messages. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e2aaf27e1f1..e9b7a47f6da 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -155,9 +155,12 @@ static inline int throtl_tg_##name(const struct throtl_grp *tg) \ THROTL_TG_FNS(on_rr); -#define throtl_log_tg(td, tg, fmt, args...) \ - blk_add_trace_msg((td)->queue, "throtl %s " fmt, \ - blkg_path(tg_to_blkg(tg)), ##args); \ +#define throtl_log_tg(td, tg, fmt, args...) do { \ + char __pbuf[128]; \ + \ + blkg_path(tg_to_blkg(tg), __pbuf, sizeof(__pbuf)); \ + blk_add_trace_msg((td)->queue, "throtl %s " fmt, __pbuf, ##args); \ +} while (0) #define throtl_log(td, fmt, args...) \ blk_add_trace_msg((td)->queue, "throtl " fmt, ##args) -- cgit v1.2.3 From 3c798398e393e5f9502dbab2b51e6c25e2e8f2ac Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Apr 2012 13:57:25 -0700 Subject: blkcg: mass rename of blkcg API During the recent blkcg cleanup, most of blkcg API has changed to such extent that mass renaming wouldn't cause any noticeable pain. Take the chance and cleanup the naming. * Rename blkio_cgroup to blkcg. * Drop blkio / blkiocg prefixes and consistently use blkcg. * Rename blkio_group to blkcg_gq, which is consistent with io_cq but keep the blkg prefix / variable name. * Rename policy method type and field names to signify they're dealing with policy data. * Rename blkio_policy_type to blkcg_policy. This patch doesn't cause any functional change. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 72 ++++++++++++++++++++++++++-------------------------- 1 file changed, 36 insertions(+), 36 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index e9b7a47f6da..00c7eff66ec 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -21,7 +21,7 @@ static int throtl_quantum = 32; /* Throttling is performed over 100ms slice and after that slice is renewed */ static unsigned long throtl_slice = HZ/10; /* 100 ms */ -static struct blkio_policy_type blkio_policy_throtl; +static struct blkcg_policy blkcg_policy_throtl; /* A workqueue to queue throttle related work */ static struct workqueue_struct *kthrotld_workqueue; @@ -120,12 +120,12 @@ static LIST_HEAD(tg_stats_alloc_list); static void tg_stats_alloc_fn(struct work_struct *); static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn); -static inline struct throtl_grp *blkg_to_tg(struct blkio_group *blkg) +static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) { - return blkg_to_pdata(blkg, &blkio_policy_throtl); + return blkg_to_pdata(blkg, &blkcg_policy_throtl); } -static inline struct blkio_group *tg_to_blkg(struct throtl_grp *tg) +static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg) { return pdata_to_blkg(tg); } @@ -208,7 +208,7 @@ alloc_stats: goto alloc_stats; } -static void throtl_init_blkio_group(struct blkio_group *blkg) +static void throtl_pd_init(struct blkcg_gq *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); @@ -233,7 +233,7 @@ static void throtl_init_blkio_group(struct blkio_group *blkg) spin_unlock(&tg_stats_alloc_lock); } -static void throtl_exit_blkio_group(struct blkio_group *blkg) +static void throtl_pd_exit(struct blkcg_gq *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); @@ -244,7 +244,7 @@ static void throtl_exit_blkio_group(struct blkio_group *blkg) free_percpu(tg->stats_cpu); } -static void throtl_reset_group_stats(struct blkio_group *blkg) +static void throtl_pd_reset_stats(struct blkcg_gq *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); int cpu; @@ -260,33 +260,33 @@ static void throtl_reset_group_stats(struct blkio_group *blkg) } } -static struct -throtl_grp *throtl_lookup_tg(struct throtl_data *td, struct blkio_cgroup *blkcg) +static struct throtl_grp *throtl_lookup_tg(struct throtl_data *td, + struct blkcg *blkcg) { /* - * This is the common case when there are no blkio cgroups. - * Avoid lookup in this case + * This is the common case when there are no blkcgs. Avoid lookup + * in this case */ - if (blkcg == &blkio_root_cgroup) + if (blkcg == &blkcg_root) return td_root_tg(td); return blkg_to_tg(blkg_lookup(blkcg, td->queue)); } static struct throtl_grp *throtl_lookup_create_tg(struct throtl_data *td, - struct blkio_cgroup *blkcg) + struct blkcg *blkcg) { struct request_queue *q = td->queue; struct throtl_grp *tg = NULL; /* - * This is the common case when there are no blkio cgroups. - * Avoid lookup in this case + * This is the common case when there are no blkcgs. Avoid lookup + * in this case */ - if (blkcg == &blkio_root_cgroup) { + if (blkcg == &blkcg_root) { tg = td_root_tg(td); } else { - struct blkio_group *blkg; + struct blkcg_gq *blkg; blkg = blkg_lookup_create(blkcg, q); @@ -665,7 +665,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, return 0; } -static void throtl_update_dispatch_stats(struct blkio_group *blkg, u64 bytes, +static void throtl_update_dispatch_stats(struct blkcg_gq *blkg, u64 bytes, int rw) { struct throtl_grp *tg = blkg_to_tg(blkg); @@ -822,7 +822,7 @@ static int throtl_select_dispatch(struct throtl_data *td, struct bio_list *bl) static void throtl_process_limit_change(struct throtl_data *td) { struct request_queue *q = td->queue; - struct blkio_group *blkg, *n; + struct blkcg_gq *blkg, *n; if (!td->limits_changed) return; @@ -951,9 +951,9 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off) static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { - struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); + struct blkcg *blkcg = cgroup_to_blkcg(cgrp); - blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkio_policy_throtl, + blkcg_print_blkgs(sf, blkcg, tg_prfill_cpu_rwstat, &blkcg_policy_throtl, cft->private, true); return 0; } @@ -979,29 +979,29 @@ static u64 tg_prfill_conf_uint(struct seq_file *sf, void *pdata, int off) static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { - blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_u64, - &blkio_policy_throtl, cft->private, false); + blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), tg_prfill_conf_u64, + &blkcg_policy_throtl, cft->private, false); return 0; } static int tg_print_conf_uint(struct cgroup *cgrp, struct cftype *cft, struct seq_file *sf) { - blkcg_print_blkgs(sf, cgroup_to_blkio_cgroup(cgrp), tg_prfill_conf_uint, - &blkio_policy_throtl, cft->private, false); + blkcg_print_blkgs(sf, cgroup_to_blkcg(cgrp), tg_prfill_conf_uint, + &blkcg_policy_throtl, cft->private, false); return 0; } static int tg_set_conf(struct cgroup *cgrp, struct cftype *cft, const char *buf, bool is_u64) { - struct blkio_cgroup *blkcg = cgroup_to_blkio_cgroup(cgrp); + struct blkcg *blkcg = cgroup_to_blkcg(cgrp); struct blkg_conf_ctx ctx; struct throtl_grp *tg; struct throtl_data *td; int ret; - ret = blkg_conf_prep(blkcg, &blkio_policy_throtl, buf, &ctx); + ret = blkg_conf_prep(blkcg, &blkcg_policy_throtl, buf, &ctx); if (ret) return ret; @@ -1086,11 +1086,11 @@ static void throtl_shutdown_wq(struct request_queue *q) cancel_delayed_work_sync(&td->throtl_work); } -static struct blkio_policy_type blkio_policy_throtl = { +static struct blkcg_policy blkcg_policy_throtl = { .ops = { - .blkio_init_group_fn = throtl_init_blkio_group, - .blkio_exit_group_fn = throtl_exit_blkio_group, - .blkio_reset_group_stats_fn = throtl_reset_group_stats, + .pd_init_fn = throtl_pd_init, + .pd_exit_fn = throtl_pd_exit, + .pd_reset_stats_fn = throtl_pd_reset_stats, }, .pdata_size = sizeof(struct throtl_grp), .cftypes = throtl_files, @@ -1101,7 +1101,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) struct throtl_data *td = q->td; struct throtl_grp *tg; bool rw = bio_data_dir(bio), update_disptime = true; - struct blkio_cgroup *blkcg; + struct blkcg *blkcg; bool throttled = false; if (bio->bi_rw & REQ_THROTTLED) { @@ -1118,7 +1118,7 @@ bool blk_throtl_bio(struct request_queue *q, struct bio *bio) * just update the dispatch stats in lockless manner and return. */ rcu_read_lock(); - blkcg = bio_blkio_cgroup(bio); + blkcg = bio_blkcg(bio); tg = throtl_lookup_tg(td, blkcg); if (tg) { if (tg_no_rule_group(tg, rw)) { @@ -1243,7 +1243,7 @@ int blk_throtl_init(struct request_queue *q) td->queue = q; /* activate policy */ - ret = blkcg_activate_policy(q, &blkio_policy_throtl); + ret = blkcg_activate_policy(q, &blkcg_policy_throtl); if (ret) kfree(td); return ret; @@ -1253,7 +1253,7 @@ void blk_throtl_exit(struct request_queue *q) { BUG_ON(!q->td); throtl_shutdown_wq(q); - blkcg_deactivate_policy(q, &blkio_policy_throtl); + blkcg_deactivate_policy(q, &blkcg_policy_throtl); kfree(q->td); } @@ -1263,7 +1263,7 @@ static int __init throtl_init(void) if (!kthrotld_workqueue) panic("Failed to create kthrotld\n"); - return blkio_policy_register(&blkio_policy_throtl); + return blkcg_policy_register(&blkcg_policy_throtl); } module_init(throtl_init); -- cgit v1.2.3 From f95a04afa80c0f4ddd645ef6a84ed118b5d1ad46 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Apr 2012 13:57:26 -0700 Subject: blkcg: embed struct blkg_policy_data in policy specific data Currently blkg_policy_data carries policy specific data as char flex array instead of being embedded in policy specific data. This was forced by oddities around blkg allocation which are all gone now. This patch makes blkg_policy_data embedded in policy specific data - throtl_grp and cfq_group so that it's more conventional and consistent with how io_cq is handled. * blkcg_policy->pdata_size is renamed to ->pd_size. * Functions which used to take void *pdata now takes struct blkg_policy_data *pd. * blkg_to_pdata/pdata_to_blkg() updated to blkg_to_pd/pd_to_blkg(). * Dummy struct blkg_policy_data definition added. Dummy pdata_to_blkg() definition was unused and inconsistent with the non-dummy version - correct dummy pd_to_blkg() added. * throtl and cfq updated accordingly. * As dummy blkg_to_pd/pd_to_blkg() are provided, blkg_to_cfqg/cfqg_to_blkg() don't need to be ifdef'd. Moved outside ifdef block. This patch doesn't introduce any functional change. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 00c7eff66ec..6a0a17a8386 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -49,6 +49,9 @@ struct tg_stats_cpu { }; struct throtl_grp { + /* must be the first member */ + struct blkg_policy_data pd; + /* active throtl group service_tree member */ struct rb_node rb_node; @@ -120,14 +123,19 @@ static LIST_HEAD(tg_stats_alloc_list); static void tg_stats_alloc_fn(struct work_struct *); static DECLARE_DELAYED_WORK(tg_stats_alloc_work, tg_stats_alloc_fn); +static inline struct throtl_grp *pd_to_tg(struct blkg_policy_data *pd) +{ + return pd ? container_of(pd, struct throtl_grp, pd) : NULL; +} + static inline struct throtl_grp *blkg_to_tg(struct blkcg_gq *blkg) { - return blkg_to_pdata(blkg, &blkcg_policy_throtl); + return pd_to_tg(blkg_to_pd(blkg, &blkcg_policy_throtl)); } static inline struct blkcg_gq *tg_to_blkg(struct throtl_grp *tg) { - return pdata_to_blkg(tg); + return pd_to_blkg(&tg->pd); } static inline struct throtl_grp *td_root_tg(struct throtl_data *td) @@ -931,9 +939,10 @@ throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) } } -static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off) +static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, + struct blkg_policy_data *pd, int off) { - struct throtl_grp *tg = pdata; + struct throtl_grp *tg = pd_to_tg(pd); struct blkg_rwstat rwstat = { }, tmp; int i, cpu; @@ -945,7 +954,7 @@ static u64 tg_prfill_cpu_rwstat(struct seq_file *sf, void *pdata, int off) rwstat.cnt[i] += tmp.cnt[i]; } - return __blkg_prfill_rwstat(sf, pdata, &rwstat); + return __blkg_prfill_rwstat(sf, pd, &rwstat); } static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, @@ -958,22 +967,26 @@ static int tg_print_cpu_rwstat(struct cgroup *cgrp, struct cftype *cft, return 0; } -static u64 tg_prfill_conf_u64(struct seq_file *sf, void *pdata, int off) +static u64 tg_prfill_conf_u64(struct seq_file *sf, struct blkg_policy_data *pd, + int off) { - u64 v = *(u64 *)(pdata + off); + struct throtl_grp *tg = pd_to_tg(pd); + u64 v = *(u64 *)((void *)tg + off); if (v == -1) return 0; - return __blkg_prfill_u64(sf, pdata, v); + return __blkg_prfill_u64(sf, pd, v); } -static u64 tg_prfill_conf_uint(struct seq_file *sf, void *pdata, int off) +static u64 tg_prfill_conf_uint(struct seq_file *sf, struct blkg_policy_data *pd, + int off) { - unsigned int v = *(unsigned int *)(pdata + off); + struct throtl_grp *tg = pd_to_tg(pd); + unsigned int v = *(unsigned int *)((void *)tg + off); if (v == -1) return 0; - return __blkg_prfill_u64(sf, pdata, v); + return __blkg_prfill_u64(sf, pd, v); } static int tg_print_conf_u64(struct cgroup *cgrp, struct cftype *cft, @@ -1092,7 +1105,7 @@ static struct blkcg_policy blkcg_policy_throtl = { .pd_exit_fn = throtl_pd_exit, .pd_reset_stats_fn = throtl_pd_reset_stats, }, - .pdata_size = sizeof(struct throtl_grp), + .pd_size = sizeof(struct throtl_grp), .cftypes = throtl_files, }; -- cgit v1.2.3 From f9fcc2d3919b8eb575b3cee9274feefafb641bca Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 16 Apr 2012 13:57:27 -0700 Subject: blkcg: collapse blkcg_policy_ops into blkcg_policy There's no reason to keep blkcg_policy_ops separate. Collapse it into blkcg_policy. This patch doesn't introduce any functional change. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-throttle.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 6a0a17a8386..46310ec93d1 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -1100,13 +1100,12 @@ static void throtl_shutdown_wq(struct request_queue *q) } static struct blkcg_policy blkcg_policy_throtl = { - .ops = { - .pd_init_fn = throtl_pd_init, - .pd_exit_fn = throtl_pd_exit, - .pd_reset_stats_fn = throtl_pd_reset_stats, - }, - .pd_size = sizeof(struct throtl_grp), - .cftypes = throtl_files, + .pd_size = sizeof(struct throtl_grp), + .cftypes = throtl_files, + + .pd_init_fn = throtl_pd_init, + .pd_exit_fn = throtl_pd_exit, + .pd_reset_stats_fn = throtl_pd_reset_stats, }; bool blk_throtl_bio(struct request_queue *q, struct bio *bio) -- cgit v1.2.3 From ff26eaadf4d914e397872b99885d45756104e9ae Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 23 May 2012 12:16:21 +0200 Subject: blkcg: tg_stats_alloc_lock is an irq lock tg_stats_alloc_lock nests inside queue lock and should always be held with irq disabled. throtl_pd_{init|exit}() were using non-irqsafe spinlock ops which triggered inverse lock ordering via irq warning via RCU freeing of blkg invoking throtl_pd_exit() w/o disabling IRQ. Update both functions to use irq safe operations. Signed-off-by: Tejun Heo Reported-by: Sasha Levin LKML-Reference: <1335339396.16988.80.camel@lappy> Signed-off-by: Jens Axboe --- block/blk-throttle.c | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'block/blk-throttle.c') diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 14dedecfc7e..5b065951204 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -219,6 +219,7 @@ alloc_stats: static void throtl_pd_init(struct blkcg_gq *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); + unsigned long flags; RB_CLEAR_NODE(&tg->rb_node); bio_list_init(&tg->bio_lists[0]); @@ -235,19 +236,20 @@ static void throtl_pd_init(struct blkcg_gq *blkg) * but percpu allocator can't be called from IO path. Queue tg on * tg_stats_alloc_list and allocate from work item. */ - spin_lock(&tg_stats_alloc_lock); + spin_lock_irqsave(&tg_stats_alloc_lock, flags); list_add(&tg->stats_alloc_node, &tg_stats_alloc_list); queue_delayed_work(system_nrt_wq, &tg_stats_alloc_work, 0); - spin_unlock(&tg_stats_alloc_lock); + spin_unlock_irqrestore(&tg_stats_alloc_lock, flags); } static void throtl_pd_exit(struct blkcg_gq *blkg) { struct throtl_grp *tg = blkg_to_tg(blkg); + unsigned long flags; - spin_lock(&tg_stats_alloc_lock); + spin_lock_irqsave(&tg_stats_alloc_lock, flags); list_del_init(&tg->stats_alloc_node); - spin_unlock(&tg_stats_alloc_lock); + spin_unlock_irqrestore(&tg_stats_alloc_lock, flags); free_percpu(tg->stats_cpu); } -- cgit v1.2.3