summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
authorAlberto Garcia <berto@igalia.com>2015-06-08 18:17:44 +0200
committerStefan Hajnoczi <stefanha@redhat.com>2015-06-12 14:00:00 +0100
commit76f4afb40fa076ed23fe0ab42c7a768ddb71123f (patch)
tree38d8b57c8c56a457a060866197d03d537653c2a3 /block
parent1fee955f9cc5903b3c7f79bbd90929aefad583a6 (diff)
downloadqemu-76f4afb40fa076ed23fe0ab42c7a768ddb71123f.tar.gz
qemu-76f4afb40fa076ed23fe0ab42c7a768ddb71123f.tar.bz2
qemu-76f4afb40fa076ed23fe0ab42c7a768ddb71123f.zip
throttle: Add throttle group support
The throttle group support use a cooperative round robin scheduling algorithm. The principles of the algorithm are simple: - Each BDS of the group is used as a token in a circular way. - The active BDS computes if a wait must be done and arms the right timer. - If a wait must be done the token timer will be armed so the token will become the next active BDS. Signed-off-by: Alberto Garcia <berto@igalia.com> Reviewed-by: Stefan Hajnoczi <stefanha@redhat.com> Message-id: f0082a86f3ac01c46170f7eafe2101a92e8fde39.1433779731.git.berto@igalia.com Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Diffstat (limited to 'block')
-rw-r--r--block/io.c75
-rw-r--r--block/qapi.c5
-rw-r--r--block/throttle-groups.c214
3 files changed, 230 insertions, 64 deletions
diff --git a/block/io.c b/block/io.c
index 61a9d1da28..bb4f78784e 100644
--- a/block/io.c
+++ b/block/io.c
@@ -23,9 +23,9 @@
*/
#include "trace.h"
-#include "sysemu/qtest.h"
#include "block/blockjob.h"
#include "block/block_int.h"
+#include "block/throttle-groups.h"
#define NOT_DONE 0x7fffffff /* used while emulated sync operation in progress */
@@ -65,7 +65,7 @@ void bdrv_set_io_limits(BlockDriverState *bs,
{
int i;
- throttle_config(&bs->throttle_state, &bs->throttle_timers, cfg);
+ throttle_group_config(bs, cfg);
for (i = 0; i < 2; i++) {
qemu_co_enter_next(&bs->throttled_reqs[i]);
@@ -95,76 +95,33 @@ static bool bdrv_start_throttled_reqs(BlockDriverState *bs)
void bdrv_io_limits_disable(BlockDriverState *bs)
{
bs->io_limits_enabled = false;
-
bdrv_start_throttled_reqs(bs);
-
- throttle_timers_destroy(&bs->throttle_timers);
-}
-
-static void bdrv_throttle_read_timer_cb(void *opaque)
-{
- BlockDriverState *bs = opaque;
- qemu_co_enter_next(&bs->throttled_reqs[0]);
-}
-
-static void bdrv_throttle_write_timer_cb(void *opaque)
-{
- BlockDriverState *bs = opaque;
- qemu_co_enter_next(&bs->throttled_reqs[1]);
+ throttle_group_unregister_bs(bs);
}
/* should be called before bdrv_set_io_limits if a limit is set */
-void bdrv_io_limits_enable(BlockDriverState *bs)
+void bdrv_io_limits_enable(BlockDriverState *bs, const char *group)
{
- int clock_type = QEMU_CLOCK_REALTIME;
-
- if (qtest_enabled()) {
- /* For testing block IO throttling only */
- clock_type = QEMU_CLOCK_VIRTUAL;
- }
assert(!bs->io_limits_enabled);
- throttle_init(&bs->throttle_state);
- throttle_timers_init(&bs->throttle_timers,
- bdrv_get_aio_context(bs),
- clock_type,
- bdrv_throttle_read_timer_cb,
- bdrv_throttle_write_timer_cb,
- bs);
+ throttle_group_register_bs(bs, group);
bs->io_limits_enabled = true;
}
-/* This function makes an IO wait if needed
- *
- * @nb_sectors: the number of sectors of the IO
- * @is_write: is the IO a write
- */
-static void bdrv_io_limits_intercept(BlockDriverState *bs,
- unsigned int bytes,
- bool is_write)
+void bdrv_io_limits_update_group(BlockDriverState *bs, const char *group)
{
- /* does this io must wait */
- bool must_wait = throttle_schedule_timer(&bs->throttle_state,
- &bs->throttle_timers,
- is_write);
-
- /* if must wait or any request of this type throttled queue the IO */
- if (must_wait ||
- !qemu_co_queue_empty(&bs->throttled_reqs[is_write])) {
- qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
+ /* this bs is not part of any group */
+ if (!bs->throttle_state) {
+ return;
}
- /* the IO will be executed, do the accounting */
- throttle_account(&bs->throttle_state, is_write, bytes);
-
-
- /* if the next request must wait -> do nothing */
- if (throttle_schedule_timer(&bs->throttle_state, &bs->throttle_timers,
- is_write)) {
+ /* this bs is a part of the same group than the one we want */
+ if (!g_strcmp0(throttle_group_get_name(bs), group)) {
return;
}
- /* else queue next request for execution */
- qemu_co_queue_next(&bs->throttled_reqs[is_write]);
+ /* need to change the group this bs belong to */
+ bdrv_io_limits_disable(bs);
+ bdrv_io_limits_enable(bs, group);
}
void bdrv_setup_io_funcs(BlockDriver *bdrv)
@@ -971,7 +928,7 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs,
/* throttling disk I/O */
if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, bytes, false);
+ throttle_group_co_io_limits_intercept(bs, bytes, false);
}
/* Align read if necessary by padding qiov */
@@ -1301,7 +1258,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs,
/* throttling disk I/O */
if (bs->io_limits_enabled) {
- bdrv_io_limits_intercept(bs, bytes, true);
+ throttle_group_co_io_limits_intercept(bs, bytes, true);
}
/*
diff --git a/block/qapi.c b/block/qapi.c
index 18d2b95f54..a5ac31268d 100644
--- a/block/qapi.c
+++ b/block/qapi.c
@@ -24,6 +24,7 @@
#include "block/qapi.h"
#include "block/block_int.h"
+#include "block/throttle-groups.h"
#include "block/write-threshold.h"
#include "qmp-commands.h"
#include "qapi-visit.h"
@@ -65,7 +66,9 @@ BlockDeviceInfo *bdrv_block_device_info(BlockDriverState *bs, Error **errp)
if (bs->io_limits_enabled) {
ThrottleConfig cfg;
- throttle_get_config(&bs->throttle_state, &cfg);
+
+ throttle_group_get_config(bs, &cfg);
+
info->bps = cfg.buckets[THROTTLE_BPS_TOTAL].avg;
info->bps_rd = cfg.buckets[THROTTLE_BPS_READ].avg;
info->bps_wr = cfg.buckets[THROTTLE_BPS_WRITE].avg;
diff --git a/block/throttle-groups.c b/block/throttle-groups.c
index 352077f98c..da8c70c4a6 100644
--- a/block/throttle-groups.c
+++ b/block/throttle-groups.c
@@ -23,6 +23,9 @@
*/
#include "block/throttle-groups.h"
+#include "qemu/queue.h"
+#include "qemu/thread.h"
+#include "sysemu/qtest.h"
/* The ThrottleGroup structure (with its ThrottleState) is shared
* among different BlockDriverState and it's independent from
@@ -160,6 +163,153 @@ static BlockDriverState *throttle_group_next_bs(BlockDriverState *bs)
return next;
}
+/* Return the next BlockDriverState in the round-robin sequence with
+ * pending I/O requests.
+ *
+ * This assumes that tg->lock is held.
+ *
+ * @bs: the current BlockDriverState
+ * @is_write: the type of operation (read/write)
+ * @ret: the next BlockDriverState with pending requests, or bs
+ * if there is none.
+ */
+static BlockDriverState *next_throttle_token(BlockDriverState *bs,
+ bool is_write)
+{
+ ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+ BlockDriverState *token, *start;
+
+ start = token = tg->tokens[is_write];
+
+ /* get next bs round in round robin style */
+ token = throttle_group_next_bs(token);
+ while (token != start && !token->pending_reqs[is_write]) {
+ token = throttle_group_next_bs(token);
+ }
+
+ /* If no IO are queued for scheduling on the next round robin token
+ * then decide the token is the current bs because chances are
+ * the current bs get the current request queued.
+ */
+ if (token == start && !token->pending_reqs[is_write]) {
+ token = bs;
+ }
+
+ return token;
+}
+
+/* Check if the next I/O request for a BlockDriverState needs to be
+ * throttled or not. If there's no timer set in this group, set one
+ * and update the token accordingly.
+ *
+ * This assumes that tg->lock is held.
+ *
+ * @bs: the current BlockDriverState
+ * @is_write: the type of operation (read/write)
+ * @ret: whether the I/O request needs to be throttled or not
+ */
+static bool throttle_group_schedule_timer(BlockDriverState *bs,
+ bool is_write)
+{
+ ThrottleState *ts = bs->throttle_state;
+ ThrottleTimers *tt = &bs->throttle_timers;
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+ bool must_wait;
+
+ /* Check if any of the timers in this group is already armed */
+ if (tg->any_timer_armed[is_write]) {
+ return true;
+ }
+
+ must_wait = throttle_schedule_timer(ts, tt, is_write);
+
+ /* If a timer just got armed, set bs as the current token */
+ if (must_wait) {
+ tg->tokens[is_write] = bs;
+ tg->any_timer_armed[is_write] = true;
+ }
+
+ return must_wait;
+}
+
+/* Look for the next pending I/O request and schedule it.
+ *
+ * This assumes that tg->lock is held.
+ *
+ * @bs: the current BlockDriverState
+ * @is_write: the type of operation (read/write)
+ */
+static void schedule_next_request(BlockDriverState *bs, bool is_write)
+{
+ ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+ bool must_wait;
+ BlockDriverState *token;
+
+ /* Check if there's any pending request to schedule next */
+ token = next_throttle_token(bs, is_write);
+ if (!token->pending_reqs[is_write]) {
+ return;
+ }
+
+ /* Set a timer for the request if it needs to be throttled */
+ must_wait = throttle_group_schedule_timer(token, is_write);
+
+ /* If it doesn't have to wait, queue it for immediate execution */
+ if (!must_wait) {
+ /* Give preference to requests from the current bs */
+ if (qemu_in_coroutine() &&
+ qemu_co_queue_next(&bs->throttled_reqs[is_write])) {
+ token = bs;
+ } else {
+ ThrottleTimers *tt = &token->throttle_timers;
+ int64_t now = qemu_clock_get_ns(tt->clock_type);
+ timer_mod(tt->timers[is_write], now + 1);
+ tg->any_timer_armed[is_write] = true;
+ }
+ tg->tokens[is_write] = token;
+ }
+}
+
+/* Check if an I/O request needs to be throttled, wait and set a timer
+ * if necessary, and schedule the next request using a round robin
+ * algorithm.
+ *
+ * @bs: the current BlockDriverState
+ * @bytes: the number of bytes for this I/O
+ * @is_write: the type of operation (read/write)
+ */
+void coroutine_fn throttle_group_co_io_limits_intercept(BlockDriverState *bs,
+ unsigned int bytes,
+ bool is_write)
+{
+ bool must_wait;
+ BlockDriverState *token;
+
+ ThrottleGroup *tg = container_of(bs->throttle_state, ThrottleGroup, ts);
+ qemu_mutex_lock(&tg->lock);
+
+ /* First we check if this I/O has to be throttled. */
+ token = next_throttle_token(bs, is_write);
+ must_wait = throttle_group_schedule_timer(token, is_write);
+
+ /* Wait if there's a timer set or queued requests of this type */
+ if (must_wait || bs->pending_reqs[is_write]) {
+ bs->pending_reqs[is_write]++;
+ qemu_mutex_unlock(&tg->lock);
+ qemu_co_queue_wait(&bs->throttled_reqs[is_write]);
+ qemu_mutex_lock(&tg->lock);
+ bs->pending_reqs[is_write]--;
+ }
+
+ /* The I/O will be executed, so do the accounting */
+ throttle_account(bs->throttle_state, is_write, bytes);
+
+ /* Schedule the next request */
+ schedule_next_request(bs, is_write);
+
+ qemu_mutex_unlock(&tg->lock);
+}
+
/* Update the throttle configuration for a particular group. Similar
* to throttle_config(), but guarantees atomicity within the
* throttling group.
@@ -195,9 +345,49 @@ void throttle_group_get_config(BlockDriverState *bs, ThrottleConfig *cfg)
qemu_mutex_unlock(&tg->lock);
}
-/* Register a BlockDriverState in the throttling group, also updating
- * its throttle_state pointer to point to it. If a throttling group
- * with that name does not exist yet, it will be created.
+/* ThrottleTimers callback. This wakes up a request that was waiting
+ * because it had been throttled.
+ *
+ * @bs: the BlockDriverState whose request had been throttled
+ * @is_write: the type of operation (read/write)
+ */
+static void timer_cb(BlockDriverState *bs, bool is_write)
+{
+ ThrottleState *ts = bs->throttle_state;
+ ThrottleGroup *tg = container_of(ts, ThrottleGroup, ts);
+ bool empty_queue;
+
+ /* The timer has just been fired, so we can update the flag */
+ qemu_mutex_lock(&tg->lock);
+ tg->any_timer_armed[is_write] = false;
+ qemu_mutex_unlock(&tg->lock);
+
+ /* Run the request that was waiting for this timer */
+ empty_queue = !qemu_co_enter_next(&bs->throttled_reqs[is_write]);
+
+ /* If the request queue was empty then we have to take care of
+ * scheduling the next one */
+ if (empty_queue) {
+ qemu_mutex_lock(&tg->lock);
+ schedule_next_request(bs, is_write);
+ qemu_mutex_unlock(&tg->lock);
+ }
+}
+
+static void read_timer_cb(void *opaque)
+{
+ timer_cb(opaque, false);
+}
+
+static void write_timer_cb(void *opaque)
+{
+ timer_cb(opaque, true);
+}
+
+/* Register a BlockDriverState in the throttling group, also
+ * initializing its timers and updating its throttle_state pointer to
+ * point to it. If a throttling group with that name does not exist
+ * yet, it will be created.
*
* @bs: the BlockDriverState to insert
* @groupname: the name of the group
@@ -206,6 +396,12 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
{
int i;
ThrottleGroup *tg = throttle_group_incref(groupname);
+ int clock_type = QEMU_CLOCK_REALTIME;
+
+ if (qtest_enabled()) {
+ /* For testing block IO throttling only */
+ clock_type = QEMU_CLOCK_VIRTUAL;
+ }
bs->throttle_state = &tg->ts;
@@ -218,11 +414,20 @@ void throttle_group_register_bs(BlockDriverState *bs, const char *groupname)
}
QLIST_INSERT_HEAD(&tg->head, bs, round_robin);
+
+ throttle_timers_init(&bs->throttle_timers,
+ bdrv_get_aio_context(bs),
+ clock_type,
+ read_timer_cb,
+ write_timer_cb,
+ bs);
+
qemu_mutex_unlock(&tg->lock);
}
/* Unregister a BlockDriverState from its group, removing it from the
- * list and setting the throttle_state pointer to NULL.
+ * list, destroying the timers and setting the throttle_state pointer
+ * to NULL.
*
* The group will be destroyed if it's empty after this operation.
*
@@ -247,6 +452,7 @@ void throttle_group_unregister_bs(BlockDriverState *bs)
/* remove the current bs from the list */
QLIST_REMOVE(bs, round_robin);
+ throttle_timers_destroy(&bs->throttle_timers);
qemu_mutex_unlock(&tg->lock);
throttle_group_unref(tg);