summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLachlan McIlroy <lachlan@sgi.com>2007-05-08 13:49:39 +1000
committerTim Shimmin <tes@sgi.com>2007-05-08 13:49:39 +1000
commit2a32963130aec5e157b58ff7dfa3dfa1afdf7ca1 (patch)
tree97b38a1d7c69bad94fafeee75102a0701c44f27e
parente6d29426bc8a5d07d0eebd0842fe0cf6ecc862cd (diff)
downloadlinux-3.10-2a32963130aec5e157b58ff7dfa3dfa1afdf7ca1.tar.gz
linux-3.10-2a32963130aec5e157b58ff7dfa3dfa1afdf7ca1.tar.bz2
linux-3.10-2a32963130aec5e157b58ff7dfa3dfa1afdf7ca1.zip
[XFS] Fix race condition in xfs_write().
This change addresses a race in xfs_write() where, for direct I/O, the flags need_i_mutex and need_flush are setup before the iolock is acquired. The logic used to setup the flags may change between setting the flags and acquiring the iolock resulting in these flags having incorrect values. For example, if a file is not currently cached then need_i_mutex is set to zero and then if the file is cached before the iolock is acquired we will fail to do the flushinval before the direct write. The flush (and also the call to xfs_zero_eof()) need to be done with the iolock held exclusive so we need to acquire the iolock before checking for cached data (or if the write begins after eof) to prevent this state from changing. For direct I/O I've chosen to always acquire the iolock in shared mode initially and if there is a need to promote it then drop it and reacquire it. There's also some other tidy-ups including removing the O_APPEND offset adjustment since that work is done in generic_write_checks() (and we don't use offset as an input parameter anywhere). SGI-PV: 962170 SGI-Modid: xfs-linux-melb:xfs-kern:28319a Signed-off-by: Lachlan McIlroy <lachlan@sgi.com> Signed-off-by: David Chinner <dgc@sgi.com> Signed-off-by: Tim Shimmin <tes@sgi.com>
-rw-r--r--fs/xfs/linux-2.6/xfs_lrw.c61
1 files changed, 32 insertions, 29 deletions
diff --git a/fs/xfs/linux-2.6/xfs_lrw.c b/fs/xfs/linux-2.6/xfs_lrw.c
index 8e46c9798fb..80fe3123347 100644
--- a/fs/xfs/linux-2.6/xfs_lrw.c
+++ b/fs/xfs/linux-2.6/xfs_lrw.c
@@ -649,7 +649,7 @@ xfs_write(
bhv_vrwlock_t locktype;
size_t ocount = 0, count;
loff_t pos;
- int need_i_mutex = 1, need_flush = 0;
+ int need_i_mutex;
XFS_STATS_INC(xs_write_calls);
@@ -689,39 +689,20 @@ xfs_write(
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
- if (ioflags & IO_ISDIRECT) {
- xfs_buftarg_t *target =
- (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
- mp->m_rtdev_targp : mp->m_ddev_targp;
-
- if ((pos & target->bt_smask) || (count & target->bt_smask))
- return XFS_ERROR(-EINVAL);
-
- if (!VN_CACHED(vp) && pos < i_size_read(inode))
- need_i_mutex = 0;
-
- if (VN_CACHED(vp))
- need_flush = 1;
- }
-
relock:
- if (need_i_mutex) {
+ if (ioflags & IO_ISDIRECT) {
+ iolock = XFS_IOLOCK_SHARED;
+ locktype = VRWLOCK_WRITE_DIRECT;
+ need_i_mutex = 0;
+ } else {
iolock = XFS_IOLOCK_EXCL;
locktype = VRWLOCK_WRITE;
-
+ need_i_mutex = 1;
mutex_lock(&inode->i_mutex);
- } else {
- iolock = XFS_IOLOCK_SHARED;
- locktype = VRWLOCK_WRITE_DIRECT;
}
xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
- isize = i_size_read(inode);
-
- if (file->f_flags & O_APPEND)
- *offset = isize;
-
start:
error = -generic_write_checks(file, &pos, &count,
S_ISBLK(inode->i_mode));
@@ -730,6 +711,29 @@ start:
goto out_unlock_mutex;
}
+ isize = i_size_read(inode);
+
+ if (ioflags & IO_ISDIRECT) {
+ xfs_buftarg_t *target =
+ (xip->i_d.di_flags & XFS_DIFLAG_REALTIME) ?
+ mp->m_rtdev_targp : mp->m_ddev_targp;
+
+ if ((pos & target->bt_smask) || (count & target->bt_smask)) {
+ xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+ return XFS_ERROR(-EINVAL);
+ }
+
+ if (!need_i_mutex && (VN_CACHED(vp) || pos > isize)) {
+ xfs_iunlock(xip, XFS_ILOCK_EXCL|iolock);
+ iolock = XFS_IOLOCK_EXCL;
+ locktype = VRWLOCK_WRITE;
+ need_i_mutex = 1;
+ mutex_lock(&inode->i_mutex);
+ xfs_ilock(xip, XFS_ILOCK_EXCL|iolock);
+ goto start;
+ }
+ }
+
new_size = pos + count;
if (new_size > isize)
io->io_new_size = new_size;
@@ -761,7 +765,6 @@ start:
* what allows the size to change in the first place.
*/
if ((file->f_flags & O_APPEND) && savedsize != isize) {
- pos = isize = xip->i_d.di_size;
goto start;
}
}
@@ -815,7 +818,8 @@ retry:
current->backing_dev_info = mapping->backing_dev_info;
if ((ioflags & IO_ISDIRECT)) {
- if (need_flush) {
+ if (VN_CACHED(vp)) {
+ WARN_ON(need_i_mutex == 0);
xfs_inval_cached_trace(io, pos, -1,
ctooff(offtoct(pos)), -1);
error = bhv_vop_flushinval_pages(vp, ctooff(offtoct(pos)),
@@ -849,7 +853,6 @@ retry:
pos += ret;
count -= ret;
- need_i_mutex = 1;
ioflags &= ~IO_ISDIRECT;
xfs_iunlock(xip, iolock);
goto relock;