summaryrefslogtreecommitdiff
path: root/fs/super.c
diff options
context:
space:
mode:
authorDave Chinner <dchinner@redhat.com>2011-07-08 14:14:42 +1000
committerAl Viro <viro@zeniv.linux.org.uk>2011-07-20 20:47:10 -0400
commitb0d40c92adafde7c2d81203ce7c1c69275f41140 (patch)
treef75a19dcd1a37aff23dc43323b58f014b1297c6b /fs/super.c
parent12ad3ab66103e6582ca69c0c9de18b13487eaaef (diff)
downloadkernel-common-b0d40c92adafde7c2d81203ce7c1c69275f41140.tar.gz
kernel-common-b0d40c92adafde7c2d81203ce7c1c69275f41140.tar.bz2
kernel-common-b0d40c92adafde7c2d81203ce7c1c69275f41140.zip
superblock: introduce per-sb cache shrinker infrastructure
With context based shrinkers, we can implement a per-superblock shrinker that shrinks the caches attached to the superblock. We currently have global shrinkers for the inode and dentry caches that split up into per-superblock operations via a coarse proportioning method that does not batch very well. The global shrinkers also have a dependency - dentries pin inodes - so we have to be very careful about how we register the global shrinkers so that the implicit call order is always correct. With a per-sb shrinker callout, we can encode this dependency directly into the per-sb shrinker, hence avoiding the need for strictly ordering shrinker registrations. We also have no need for any proportioning code for the shrinker subsystem already provides this functionality across all shrinkers. Allowing the shrinker to operate on a single superblock at a time means that we do less superblock list traversals and locking and reclaim should batch more effectively. This should result in less CPU overhead for reclaim and potentially faster reclaim of items from each filesystem. Signed-off-by: Dave Chinner <dchinner@redhat.com> Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Diffstat (limited to 'fs/super.c')
-rw-r--r--fs/super.c51
1 files changed, 50 insertions, 1 deletions
diff --git a/fs/super.c b/fs/super.c
index e63c754447ce..37a75410079e 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -38,6 +38,48 @@
LIST_HEAD(super_blocks);
DEFINE_SPINLOCK(sb_lock);
+/*
+ * One thing we have to be careful of with a per-sb shrinker is that we don't
+ * drop the last active reference to the superblock from within the shrinker.
+ * If that happens we could trigger unregistering the shrinker from within the
+ * shrinker path and that leads to deadlock on the shrinker_rwsem. Hence we
+ * take a passive reference to the superblock to avoid this from occurring.
+ */
+static int prune_super(struct shrinker *shrink, struct shrink_control *sc)
+{
+ struct super_block *sb;
+ int count;
+
+ sb = container_of(shrink, struct super_block, s_shrink);
+
+ /*
+ * Deadlock avoidance. We may hold various FS locks, and we don't want
+ * to recurse into the FS that called us in clear_inode() and friends..
+ */
+ if (sc->nr_to_scan && !(sc->gfp_mask & __GFP_FS))
+ return -1;
+
+ if (!grab_super_passive(sb))
+ return -1;
+
+ if (sc->nr_to_scan) {
+ /* proportion the scan between the two caches */
+ int total;
+
+ total = sb->s_nr_dentry_unused + sb->s_nr_inodes_unused + 1;
+ count = (sc->nr_to_scan * sb->s_nr_dentry_unused) / total;
+
+ /* prune dcache first as icache is pinned by it */
+ prune_dcache_sb(sb, count);
+ prune_icache_sb(sb, sc->nr_to_scan - count);
+ }
+
+ count = ((sb->s_nr_dentry_unused + sb->s_nr_inodes_unused) / 100)
+ * sysctl_vfs_cache_pressure;
+ drop_super(sb);
+ return count;
+}
+
/**
* alloc_super - create new superblock
* @type: filesystem type superblock should belong to
@@ -116,6 +158,9 @@ static struct super_block *alloc_super(struct file_system_type *type)
s->s_op = &default_op;
s->s_time_gran = 1000000000;
s->cleancache_poolid = -1;
+
+ s->s_shrink.seeks = DEFAULT_SEEKS;
+ s->s_shrink.shrink = prune_super;
}
out:
return s;
@@ -183,6 +228,10 @@ void deactivate_locked_super(struct super_block *s)
if (atomic_dec_and_test(&s->s_active)) {
cleancache_flush_fs(s);
fs->kill_sb(s);
+
+ /* caches are now gone, we can safely kill the shrinker now */
+ unregister_shrinker(&s->s_shrink);
+
/*
* We need to call rcu_barrier so all the delayed rcu free
* inodes are flushed before we release the fs module.
@@ -311,7 +360,6 @@ void generic_shutdown_super(struct super_block *sb)
{
const struct super_operations *sop = sb->s_op;
-
if (sb->s_root) {
shrink_dcache_for_umount(sb);
sync_filesystem(sb);
@@ -399,6 +447,7 @@ retry:
list_add(&s->s_instances, &type->fs_supers);
spin_unlock(&sb_lock);
get_filesystem(type);
+ register_shrinker(&s->s_shrink);
return s;
}