summaryrefslogtreecommitdiff
path: root/fs/f2fs/iostat.c
diff options
context:
space:
mode:
authorDaeho Jeong <daehojeong@google.com>2021-08-20 15:29:09 -0700
committerJaegeuk Kim <jaegeuk@kernel.org>2021-08-23 10:25:51 -0700
commita4b6817625e71d5d4aee16cacf7a7fec077c6dbe (patch)
tree5053fae607f6ddd332a4058797939228d70194a5 /fs/f2fs/iostat.c
parent521187439abfb3e1c946796dc2187c443e5457ab (diff)
downloadlinux-rpi-a4b6817625e71d5d4aee16cacf7a7fec077c6dbe.tar.gz
linux-rpi-a4b6817625e71d5d4aee16cacf7a7fec077c6dbe.tar.bz2
linux-rpi-a4b6817625e71d5d4aee16cacf7a7fec077c6dbe.zip
f2fs: introduce periodic iostat io latency traces
Whenever we notice some sluggish issues on our machines, we are always curious about how well all types of I/O in the f2fs filesystem are handled. But, it's hard to get this kind of real data. First of all, we need to reproduce the issue while turning on the profiling tool like blktrace, but the issue doesn't happen again easily. Second, with the intervention of any tools, the overall timing of the issue will be slightly changed and it sometimes makes us hard to figure it out. So, I added the feature printing out IO latency statistics tracepoint events, which are minimal things to understand filesystem's I/O related behaviors, into F2FS_IOSTAT kernel config. With "iostat_enable" sysfs node on, we can get this statistics info in a periodic way and it would cause the least overhead. [samples] f2fs_ckpt-254:1-507 [003] .... 2842.439683: f2fs_iostat_latency: dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count], rd_data [136/1/801], rd_node [136/1/1704], rd_meta [4/2/4], wr_sync_data [164/16/3331], wr_sync_node [152/3/648], wr_sync_meta [160/2/4243], wr_async_data [24/13/15], wr_async_node [0/0/0], wr_async_meta [0/0/0] f2fs_ckpt-254:1-507 [002] .... 2845.450514: f2fs_iostat_latency: dev = (254,11), iotype [peak lat.(ms)/avg lat.(ms)/count], rd_data [60/3/456], rd_node [60/3/1258], rd_meta [0/0/1], wr_sync_data [120/12/2285], wr_sync_node [88/5/428], wr_sync_meta [52/6/2990], wr_async_data [4/1/3], wr_async_node [0/0/0], wr_async_meta [0/0/0] Signed-off-by: Daeho Jeong <daehojeong@google.com> Reviewed-by: Chao Yu <chao@kernel.org> Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
Diffstat (limited to 'fs/f2fs/iostat.c')
-rw-r--r--fs/f2fs/iostat.c133
1 files changed, 133 insertions, 0 deletions
diff --git a/fs/f2fs/iostat.c b/fs/f2fs/iostat.c
index 21c29e121a86..cdcf54ae0db8 100644
--- a/fs/f2fs/iostat.c
+++ b/fs/f2fs/iostat.c
@@ -14,6 +14,10 @@
#include "iostat.h"
#include <trace/events/f2fs.h>
+#define NUM_PREALLOC_IOSTAT_CTXS 128
+static struct kmem_cache *bio_iostat_ctx_cache;
+static mempool_t *bio_iostat_ctx_pool;
+
int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
{
struct super_block *sb = seq->private;
@@ -81,6 +85,32 @@ int __maybe_unused iostat_info_seq_show(struct seq_file *seq, void *offset)
return 0;
}
+static inline void __record_iostat_latency(struct f2fs_sb_info *sbi)
+{
+ int io, idx = 0;
+ unsigned int cnt;
+ struct f2fs_iostat_latency iostat_lat[MAX_IO_TYPE][NR_PAGE_TYPE];
+ struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
+
+ spin_lock_irq(&sbi->iostat_lat_lock);
+ for (idx = 0; idx < MAX_IO_TYPE; idx++) {
+ for (io = 0; io < NR_PAGE_TYPE; io++) {
+ cnt = io_lat->bio_cnt[idx][io];
+ iostat_lat[idx][io].peak_lat =
+ jiffies_to_msecs(io_lat->peak_lat[idx][io]);
+ iostat_lat[idx][io].cnt = cnt;
+ iostat_lat[idx][io].avg_lat = cnt ?
+ jiffies_to_msecs(io_lat->sum_lat[idx][io]) / cnt : 0;
+ io_lat->sum_lat[idx][io] = 0;
+ io_lat->peak_lat[idx][io] = 0;
+ io_lat->bio_cnt[idx][io] = 0;
+ }
+ }
+ spin_unlock_irq(&sbi->iostat_lat_lock);
+
+ trace_f2fs_iostat_latency(sbi, iostat_lat);
+}
+
static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
{
unsigned long long iostat_diff[NR_IO_TYPE];
@@ -106,10 +136,13 @@ static inline void f2fs_record_iostat(struct f2fs_sb_info *sbi)
spin_unlock(&sbi->iostat_lock);
trace_f2fs_iostat(sbi, iostat_diff);
+
+ __record_iostat_latency(sbi);
}
void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
{
+ struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
int i;
spin_lock(&sbi->iostat_lock);
@@ -118,6 +151,10 @@ void f2fs_reset_iostat(struct f2fs_sb_info *sbi)
sbi->prev_rw_iostat[i] = 0;
}
spin_unlock(&sbi->iostat_lock);
+
+ spin_lock_irq(&sbi->iostat_lat_lock);
+ memset(io_lat, 0, sizeof(struct iostat_lat_info));
+ spin_unlock_irq(&sbi->iostat_lat_lock);
}
void f2fs_update_iostat(struct f2fs_sb_info *sbi,
@@ -143,12 +180,108 @@ void f2fs_update_iostat(struct f2fs_sb_info *sbi,
f2fs_record_iostat(sbi);
}
+static inline void __update_iostat_latency(struct bio_iostat_ctx *iostat_ctx,
+ int rw, bool is_sync)
+{
+ unsigned long ts_diff;
+ unsigned int iotype = iostat_ctx->type;
+ unsigned long flags;
+ struct f2fs_sb_info *sbi = iostat_ctx->sbi;
+ struct iostat_lat_info *io_lat = sbi->iostat_io_lat;
+ int idx;
+
+ if (!sbi->iostat_enable)
+ return;
+
+ ts_diff = jiffies - iostat_ctx->submit_ts;
+ if (iotype >= META_FLUSH)
+ iotype = META;
+
+ if (rw == 0) {
+ idx = READ_IO;
+ } else {
+ if (is_sync)
+ idx = WRITE_SYNC_IO;
+ else
+ idx = WRITE_ASYNC_IO;
+ }
+
+ spin_lock_irqsave(&sbi->iostat_lat_lock, flags);
+ io_lat->sum_lat[idx][iotype] += ts_diff;
+ io_lat->bio_cnt[idx][iotype]++;
+ if (ts_diff > io_lat->peak_lat[idx][iotype])
+ io_lat->peak_lat[idx][iotype] = ts_diff;
+ spin_unlock_irqrestore(&sbi->iostat_lat_lock, flags);
+}
+
+void iostat_update_and_unbind_ctx(struct bio *bio, int rw)
+{
+ struct bio_iostat_ctx *iostat_ctx = bio->bi_private;
+ bool is_sync = bio->bi_opf & REQ_SYNC;
+
+ if (rw == 0)
+ bio->bi_private = iostat_ctx->post_read_ctx;
+ else
+ bio->bi_private = iostat_ctx->sbi;
+ __update_iostat_latency(iostat_ctx, rw, is_sync);
+ mempool_free(iostat_ctx, bio_iostat_ctx_pool);
+}
+
+void iostat_alloc_and_bind_ctx(struct f2fs_sb_info *sbi,
+ struct bio *bio, struct bio_post_read_ctx *ctx)
+{
+ struct bio_iostat_ctx *iostat_ctx;
+ /* Due to the mempool, this never fails. */
+ iostat_ctx = mempool_alloc(bio_iostat_ctx_pool, GFP_NOFS);
+ iostat_ctx->sbi = sbi;
+ iostat_ctx->submit_ts = 0;
+ iostat_ctx->type = 0;
+ iostat_ctx->post_read_ctx = ctx;
+ bio->bi_private = iostat_ctx;
+}
+
+int __init f2fs_init_iostat_processing(void)
+{
+ bio_iostat_ctx_cache =
+ kmem_cache_create("f2fs_bio_iostat_ctx",
+ sizeof(struct bio_iostat_ctx), 0, 0, NULL);
+ if (!bio_iostat_ctx_cache)
+ goto fail;
+ bio_iostat_ctx_pool =
+ mempool_create_slab_pool(NUM_PREALLOC_IOSTAT_CTXS,
+ bio_iostat_ctx_cache);
+ if (!bio_iostat_ctx_pool)
+ goto fail_free_cache;
+ return 0;
+
+fail_free_cache:
+ kmem_cache_destroy(bio_iostat_ctx_cache);
+fail:
+ return -ENOMEM;
+}
+
+void f2fs_destroy_iostat_processing(void)
+{
+ mempool_destroy(bio_iostat_ctx_pool);
+ kmem_cache_destroy(bio_iostat_ctx_cache);
+}
+
int f2fs_init_iostat(struct f2fs_sb_info *sbi)
{
/* init iostat info */
spin_lock_init(&sbi->iostat_lock);
+ spin_lock_init(&sbi->iostat_lat_lock);
sbi->iostat_enable = false;
sbi->iostat_period_ms = DEFAULT_IOSTAT_PERIOD_MS;
+ sbi->iostat_io_lat = f2fs_kzalloc(sbi, sizeof(struct iostat_lat_info),
+ GFP_KERNEL);
+ if (!sbi->iostat_io_lat)
+ return -ENOMEM;
return 0;
}
+
+void f2fs_destroy_iostat(struct f2fs_sb_info *sbi)
+{
+ kfree(sbi->iostat_io_lat);
+}