blk-mq: fix tag_get wait task can't be awakened

In case of shared tags, there might be more than one hctx which allocates from the same tags, and each hctx is limited to allocate at most: hctx_max_depth = max((bt->sb.depth + users - 1) / users, 4U); tag idle detection is lazy, and may be delayed for 30sec, so there could be just one real active hctx(queue) but all others are actually idle and still accounted as active because of the lazy idle detection. Then if wake_batch is > hctx_max_depth, driver tag allocation may wait forever on this real active hctx. Fix this by recalculating wake_batch when inc or dec active_queues. Fixes: 0d2602ca30e41 ("blk-mq: improve support for shared tags maps") Suggested-by: Ming Lei <ming.lei@redhat.com> Suggested-by: John Garry <john.garry@huawei.com> Signed-off-by: Laibin Qiu <qiulaibin@huawei.com> Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com> Link: https://lore.kernel.org/r/20220113025536.1479653-1-qiulaibin@huawei.com Signed-off-by: Jens Axboe <axboe@kernel.dk>
author: Laibin Qiu <qiulaibin@huawei.com> 2022-01-13 10:55:36 +0800
committer: Jens Axboe <axboe@kernel.dk> 2022-01-13 12:52:14 -0700
commit: 180dccb0dba4f5e84a4a70c1be1d34cbb6528b32 (patch)
tree: c19baba7f6fcbc98312e5f261fb0aa37c9f434a5 /block
parent: fb3b0673b7d5b477ed104949450cd511337ba3c6 (diff)
download: linux-rpi-180dccb0dba4f5e84a4a70c1be1d34cbb6528b32.tar.gz
linux-rpi-180dccb0dba4f5e84a4a70c1be1d34cbb6528b32.tar.bz2
linux-rpi-180dccb0dba4f5e84a4a70c1be1d34cbb6528b32.zip
1 files changed, 33 insertions, 7 deletions
diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index e55a6834c9a6..845f74e8dd7b 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -17,6 +17,21 @@
 #include "blk-mq-tag.h"
 
 /*
+ * Recalculate wakeup batch when tag is shared by hctx.
+ */
+static void blk_mq_update_wake_batch(struct blk_mq_tags *tags,
+		unsigned int users)
+{
+	if (!users)
+		return;
+
+	sbitmap_queue_recalculate_wake_batch(&tags->bitmap_tags,
+			users);
+	sbitmap_queue_recalculate_wake_batch(&tags->breserved_tags,
+			users);
+}
+
+/*
  * If a previously inactive queue goes active, bump the active user count.
  * We need to do this before try to allocate driver tag, then even if fail
  * to get tag when first time, the other shared-tag users could reserve
@@ -24,18 +39,26 @@
  */
 bool __blk_mq_tag_busy(struct blk_mq_hw_ctx *hctx)
 {
+	unsigned int users;
+
 	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
 
-		if (!test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) &&
-		    !test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags))
-			atomic_inc(&hctx->tags->active_queues);
+		if (test_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags) ||
+		    test_and_set_bit(QUEUE_FLAG_HCTX_ACTIVE, &q->queue_flags)) {
+			return true;
+		}
 	} else {
-		if (!test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) &&
-		    !test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state))
-			atomic_inc(&hctx->tags->active_queues);
+		if (test_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state) ||
+		    test_and_set_bit(BLK_MQ_S_TAG_ACTIVE, &hctx->state)) {
+			return true;
+		}
 	}
 
+	users = atomic_inc_return(&hctx->tags->active_queues);
+
+	blk_mq_update_wake_batch(hctx->tags, users);
+
 	return true;
 }
 
@@ -56,6 +79,7 @@ void blk_mq_tag_wakeup_all(struct blk_mq_tags *tags, bool include_reserve)
 void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 {
 	struct blk_mq_tags *tags = hctx->tags;
+	unsigned int users;
 
 	if (blk_mq_is_shared_tags(hctx->flags)) {
 		struct request_queue *q = hctx->queue;
@@ -68,7 +92,9 @@ void __blk_mq_tag_idle(struct blk_mq_hw_ctx *hctx)
 			return;
 	}
 
-	atomic_dec(&tags->active_queues);
+	users = atomic_dec_return(&tags->active_queues);
+
+	blk_mq_update_wake_batch(tags, users);
 
 	blk_mq_tag_wakeup_all(tags, false);
 }
author	Laibin Qiu <qiulaibin@huawei.com>	2022-01-13 10:55:36 +0800
committer	Jens Axboe <axboe@kernel.dk>	2022-01-13 12:52:14 -0700
commit	180dccb0dba4f5e84a4a70c1be1d34cbb6528b32 (patch)
tree	c19baba7f6fcbc98312e5f261fb0aa37c9f434a5 /block
parent	fb3b0673b7d5b477ed104949450cd511337ba3c6 (diff)
download	linux-rpi-180dccb0dba4f5e84a4a70c1be1d34cbb6528b32.tar.gz linux-rpi-180dccb0dba4f5e84a4a70c1be1d34cbb6528b32.tar.bz2 linux-rpi-180dccb0dba4f5e84a4a70c1be1d34cbb6528b32.zip