diff options
author | Jonathan Corbet <corbet@lwn.net> | 2009-02-01 14:26:59 -0700 |
---|---|---|
committer | Jonathan Corbet <corbet@lwn.net> | 2009-03-16 08:32:27 -0600 |
commit | 76398425bb06b07cc3a3b1ce169c67dc9d6874ed (patch) | |
tree | e6e1800edda88b5592617a950daacf2199587a33 | |
parent | db1dd4d376134eba0e08af523b61cc566a4ea1cd (diff) | |
download | linux-3.10-76398425bb06b07cc3a3b1ce169c67dc9d6874ed.tar.gz linux-3.10-76398425bb06b07cc3a3b1ce169c67dc9d6874ed.tar.bz2 linux-3.10-76398425bb06b07cc3a3b1ce169c67dc9d6874ed.zip |
Move FASYNC bit handling to f_op->fasync()
Removing the BKL from FASYNC handling ran into the challenge of keeping the
setting of the FASYNC bit in filp->f_flags atomic with regard to calls to
the underlying fasync() function. Andi Kleen suggested moving the handling
of that bit into fasync(); this patch does exactly that. As a result, we
have a couple of internal API changes: fasync() must now manage the FASYNC
bit, and it will be called without the BKL held.
As it happens, every fasync() implementation in the kernel with one
exception calls fasync_helper(). So, if we make fasync_helper() set the
FASYNC bit, we can avoid making any changes to the other fasync()
functions - as long as those functions, themselves, have proper locking.
Most fasync() implementations do nothing but call fasync_helper() - which
has its own lock - so they are easily verified as correct. The BKL had
already been pushed down into the rest.
The networking code has its own version of fasync_helper(), so that code
has been augmented with explicit FASYNC bit handling.
Cc: Al Viro <viro@ZenIV.linux.org.uk>
Cc: David Miller <davem@davemloft.net>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
-rw-r--r-- | Documentation/filesystems/Locking | 7 | ||||
-rw-r--r-- | fs/fcntl.c | 29 | ||||
-rw-r--r-- | fs/ioctl.c | 13 | ||||
-rw-r--r-- | net/socket.c | 7 |
4 files changed, 29 insertions, 27 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index ec6a9392a17..4e78ce67784 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -437,8 +437,11 @@ grab BKL for cases when we close a file that had been opened r/w, but that can and should be done using the internal locking with smaller critical areas). Current worst offender is ext2_get_block()... -->fasync() is a mess. This area needs a big cleanup and that will probably -affect locking. +->fasync() is called without BKL protection, and is responsible for +maintaining the FASYNC bit in filp->f_flags. Most instances call +fasync_helper(), which does that maintenance, so it's not normally +something one needs to worry about. Return values > 0 will be mapped to +zero in the VFS layer. ->readdir() and ->ioctl() on directories must be changed. Ideally we would move ->readdir() to inode_operations and use a separate method for directory diff --git a/fs/fcntl.c b/fs/fcntl.c index 04df8570a2d..431bb645927 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -141,7 +141,7 @@ SYSCALL_DEFINE1(dup, unsigned int, fildes) return ret; } -#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | FASYNC | O_DIRECT | O_NOATIME) +#define SETFL_MASK (O_APPEND | O_NONBLOCK | O_NDELAY | O_DIRECT | O_NOATIME) static int setfl(int fd, struct file * filp, unsigned long arg) { @@ -177,23 +177,19 @@ static int setfl(int fd, struct file * filp, unsigned long arg) return error; /* - * We still need a lock here for now to keep multiple FASYNC calls - * from racing with each other. + * ->fasync() is responsible for setting the FASYNC bit. */ - lock_kernel(); - if ((arg ^ filp->f_flags) & FASYNC) { - if (filp->f_op && filp->f_op->fasync) { - error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); - if (error < 0) - goto out; - } + if (((arg ^ filp->f_flags) & FASYNC) && filp->f_op && + filp->f_op->fasync) { + error = filp->f_op->fasync(fd, filp, (arg & FASYNC) != 0); + if (error < 0) + goto out; } - spin_lock(&filp->f_lock); filp->f_flags = (arg & SETFL_MASK) | (filp->f_flags & ~SETFL_MASK); spin_unlock(&filp->f_lock); + out: - unlock_kernel(); return error; } @@ -518,7 +514,7 @@ static DEFINE_RWLOCK(fasync_lock); static struct kmem_cache *fasync_cache __read_mostly; /* - * fasync_helper() is used by some character device drivers (mainly mice) + * fasync_helper() is used by almost all character device drivers * to set up the fasync queue. It returns negative on error, 0 if it did * no changes and positive if it added/deleted the entry. */ @@ -557,6 +553,13 @@ int fasync_helper(int fd, struct file * filp, int on, struct fasync_struct **fap result = 1; } out: + /* Fix up FASYNC bit while still holding fasync_lock */ + spin_lock(&filp->f_lock); + if (on) + filp->f_flags |= FASYNC; + else + filp->f_flags &= ~FASYNC; + spin_unlock(&filp->f_lock); write_unlock_irq(&fasync_lock); return result; } diff --git a/fs/ioctl.c b/fs/ioctl.c index 421aab465da..e8e89edba57 100644 --- a/fs/ioctl.c +++ b/fs/ioctl.c @@ -427,19 +427,11 @@ static int ioctl_fioasync(unsigned int fd, struct file *filp, /* Did FASYNC state change ? */ if ((flag ^ filp->f_flags) & FASYNC) { if (filp->f_op && filp->f_op->fasync) + /* fasync() adjusts filp->f_flags */ error = filp->f_op->fasync(fd, filp, on); else error = -ENOTTY; } - if (error) - return error; - - spin_lock(&filp->f_lock); - if (on) - filp->f_flags |= FASYNC; - else - filp->f_flags &= ~FASYNC; - spin_unlock(&filp->f_lock); return error; } @@ -507,10 +499,7 @@ int do_vfs_ioctl(struct file *filp, unsigned int fd, unsigned int cmd, break; case FIOASYNC: - /* BKL needed to avoid races tweaking f_flags */ - lock_kernel(); error = ioctl_fioasync(fd, filp, argp); - unlock_kernel(); break; case FIOQSIZE: diff --git a/net/socket.c b/net/socket.c index 35dd7371752..0f75746ab06 100644 --- a/net/socket.c +++ b/net/socket.c @@ -1030,6 +1030,13 @@ static int sock_fasync(int fd, struct file *filp, int on) lock_sock(sk); + spin_lock(&filp->f_lock); + if (on) + filp->f_flags |= FASYNC; + else + filp->f_flags &= ~FASYNC; + spin_unlock(&filp->f_lock); + prev = &(sock->fasync_list); for (fa = *prev; fa != NULL; prev = &fa->fa_next, fa = *prev) |