From 9ed74f2dba6ebf9f30b80554290bfc73cc3ef083 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Fri, 11 Sep 2009 16:12:44 -0400 Subject: Btrfs: proper -ENOSPC handling At the start of a transaction we do a btrfs_reserve_metadata_space() and specify how many items we plan on modifying. Then once we've done our modifications and such, just call btrfs_unreserve_metadata_space() for the same number of items we reserved. For keeping track of metadata needed for data I've had to add an extent_io op for when we merge extents. This lets us track space properly when we are doing sequential writes, so we don't end up reserving way more metadata space than what we need. The only place where the metadata space accounting is not done is in the relocation code. This is because Yan is going to be reworking that code in the near future, so running btrfs-vol -b could still possibly result in a ENOSPC related panic. This patch also turns off the metadata_ratio stuff in order to allow users to more efficiently use their disk space. This patch makes it so we track how much metadata we need for an inode's delayed allocation extents by tracking how many extents are currently waiting for allocation. It introduces two new callbacks for the extent_io tree's, merge_extent_hook and split_extent_hook. These help us keep track of when we merge delalloc extents together and split them up. Reservations are handled prior to any actually dirty'ing occurs, and then we unreserve after we dirty. btrfs_unreserve_metadata_for_delalloc() will make the appropriate unreservations as needed based on the number of reservations we currently have and the number of extents we currently have. Doing the reservation outside of doing any of the actual dirty'ing lets us do things like filemap_flush() the inode to try and force delalloc to happen, or as a last resort actually start allocation on all delalloc inodes in the fs. This has survived dbench, fs_mark and an fsx torture test. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'fs/btrfs/extent_io.h') diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 14ed16fd862..4794ec891fe 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -60,8 +60,13 @@ struct extent_io_ops { struct extent_state *state, int uptodate); int (*set_bit_hook)(struct inode *inode, u64 start, u64 end, unsigned long old, unsigned long bits); - int (*clear_bit_hook)(struct inode *inode, u64 start, u64 end, - unsigned long old, unsigned long bits); + int (*clear_bit_hook)(struct inode *inode, struct extent_state *state, + unsigned long bits); + int (*merge_extent_hook)(struct inode *inode, + struct extent_state *new, + struct extent_state *other); + int (*split_extent_hook)(struct inode *inode, + struct extent_state *orig, u64 split); int (*write_cache_pages_lock_hook)(struct page *page); }; @@ -79,10 +84,14 @@ struct extent_state { u64 start; u64 end; /* inclusive */ struct rb_node rb_node; + + /* ADD NEW ELEMENTS AFTER THIS */ struct extent_io_tree *tree; wait_queue_head_t wq; atomic_t refs; unsigned long state; + u64 split_start; + u64 split_end; /* for use by the FS */ u64 private; -- cgit v1.2.3 From a791e35e12ff672e8a0e140abeeaf900c3b2ea77 Mon Sep 17 00:00:00 2001 From: Chris Mason Date: Thu, 8 Oct 2009 11:27:10 -0400 Subject: Btrfs: cleanup extent_clear_unlock_delalloc flags extent_clear_unlock_delalloc has a growing set of ugly parameters that is very difficult to read and maintain. This switches to a flag field and well named flag defines. Signed-off-by: Chris Mason --- fs/btrfs/extent_io.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'fs/btrfs/extent_io.h') diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 4794ec891fe..41d2a47ecf3 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -25,6 +25,15 @@ #define EXTENT_BUFFER_BLOCKING 1 #define EXTENT_BUFFER_DIRTY 2 +/* these are flags for extent_clear_unlock_delalloc */ +#define EXTENT_CLEAR_UNLOCK_PAGE 0x1 +#define EXTENT_CLEAR_UNLOCK 0x2 +#define EXTENT_CLEAR_DELALLOC 0x4 +#define EXTENT_CLEAR_DIRTY 0x8 +#define EXTENT_SET_WRITEBACK 0x10 +#define EXTENT_END_WRITEBACK 0x20 +#define EXTENT_SET_PRIVATE2 0x40 + /* * page->private values. Every page that is controlled by the extent * map has page->private set to one. @@ -288,10 +297,5 @@ int extent_range_uptodate(struct extent_io_tree *tree, int extent_clear_unlock_delalloc(struct inode *inode, struct extent_io_tree *tree, u64 start, u64 end, struct page *locked_page, - int unlock_page, - int clear_unlock, - int clear_delalloc, int clear_dirty, - int set_writeback, - int end_writeback, - int set_private2); + unsigned long op); #endif -- cgit v1.2.3 From 32c00aff718bb54a214b39146bdd9ac01511cd25 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Thu, 8 Oct 2009 13:34:05 -0400 Subject: Btrfs: release delalloc reservations on extent item insertion This patch fixes an issue with the delalloc metadata space reservation code. The problem is we used to free the reservation as soon as we allocated the delalloc region. The problem with this is if we are not inserting an inline extent, we don't actually insert the extent item until after the ordered extent is written out. This patch does 3 things, 1) It moves the reservation clearing stuff into the ordered code, so when we remove the ordered extent we remove the reservation. 2) It adds a EXTENT_DO_ACCOUNTING flag that gets passed when we clear delalloc bits in the cases where we want to clear the metadata reservation when we clear the delalloc extent, in the case that we do an inline extent or we invalidate the page. 3) It adds another waitqueue to the space info so that when we start a fs wide delalloc flush, anybody else who also hits that area will simply wait for the flush to finish and then try to make their allocation. This has been tested thoroughly to make sure we did not regress on performance. Signed-off-by: Josef Bacik Signed-off-by: Chris Mason --- fs/btrfs/extent_io.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'fs/btrfs/extent_io.h') diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 41d2a47ecf3..36de250a7b2 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -15,6 +15,7 @@ #define EXTENT_BUFFER_FILLED (1 << 8) #define EXTENT_BOUNDARY (1 << 9) #define EXTENT_NODATASUM (1 << 10) +#define EXTENT_DO_ACCOUNTING (1 << 11) #define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) /* flags for bio submission */ @@ -33,6 +34,7 @@ #define EXTENT_SET_WRITEBACK 0x10 #define EXTENT_END_WRITEBACK 0x20 #define EXTENT_SET_PRIVATE2 0x40 +#define EXTENT_CLEAR_ACCOUNTING 0x80 /* * page->private values. Every page that is controlled by the extent -- cgit v1.2.3