diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/dlm/lowcomms.c | 2 | ||||
-rw-r--r-- | fs/fat/inode.c | 13 | ||||
-rw-r--r-- | fs/gfs2/incore.h | 2 | ||||
-rw-r--r-- | fs/iomap.c | 17 | ||||
-rw-r--r-- | fs/ocfs2/cluster/tcp.c | 2 | ||||
-rw-r--r-- | fs/overlayfs/util.c | 1 | ||||
-rw-r--r-- | fs/timerfd.c | 8 | ||||
-rw-r--r-- | fs/userfaultfd.c | 75 | ||||
-rw-r--r-- | fs/xfs/kmem.c | 18 | ||||
-rw-r--r-- | fs/xfs/kmem.h | 2 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap.c | 34 | ||||
-rw-r--r-- | fs/xfs/libxfs/xfs_bmap_btree.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_aops.c | 59 | ||||
-rw-r--r-- | fs/xfs/xfs_icache.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_inode.c | 2 | ||||
-rw-r--r-- | fs/xfs/xfs_iomap.c | 25 | ||||
-rw-r--r-- | fs/xfs/xfs_itable.c | 6 | ||||
-rw-r--r-- | fs/xfs/xfs_mount.c | 3 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.c | 23 | ||||
-rw-r--r-- | fs/xfs/xfs_reflink.h | 4 | ||||
-rw-r--r-- | fs/xfs/xfs_super.c | 2 |
21 files changed, 148 insertions, 158 deletions
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 7d398d300e97..9382db998ec9 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -743,7 +743,7 @@ static int tcp_accept_from_sock(struct connection *con) newsock->type = con->sock->type; newsock->ops = con->sock->ops; - result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK); + result = con->sock->ops->accept(con->sock, newsock, O_NONBLOCK, true); if (result < 0) goto accept_err; diff --git a/fs/fat/inode.c b/fs/fat/inode.c index 338d2f73eb29..a2c05f2ada6d 100644 --- a/fs/fat/inode.c +++ b/fs/fat/inode.c @@ -1359,6 +1359,16 @@ out: return 0; } +static void fat_dummy_inode_init(struct inode *inode) +{ + /* Initialize this dummy inode to work as no-op. */ + MSDOS_I(inode)->mmu_private = 0; + MSDOS_I(inode)->i_start = 0; + MSDOS_I(inode)->i_logstart = 0; + MSDOS_I(inode)->i_attrs = 0; + MSDOS_I(inode)->i_pos = 0; +} + static int fat_read_root(struct inode *inode) { struct msdos_sb_info *sbi = MSDOS_SB(inode->i_sb); @@ -1803,12 +1813,13 @@ int fat_fill_super(struct super_block *sb, void *data, int silent, int isvfat, fat_inode = new_inode(sb); if (!fat_inode) goto out_fail; - MSDOS_I(fat_inode)->i_pos = 0; + fat_dummy_inode_init(fat_inode); sbi->fat_inode = fat_inode; fsinfo_inode = new_inode(sb); if (!fsinfo_inode) goto out_fail; + fat_dummy_inode_init(fsinfo_inode); fsinfo_inode->i_ino = MSDOS_FSINFO_INO; sbi->fsinfo_inode = fsinfo_inode; insert_inode_hash(fsinfo_inode); diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h index c45084ac642d..511e1ed7e2de 100644 --- a/fs/gfs2/incore.h +++ b/fs/gfs2/incore.h @@ -207,7 +207,7 @@ struct lm_lockname { struct gfs2_sbd *ln_sbd; u64 ln_number; unsigned int ln_type; -}; +} __packed __aligned(sizeof(int)); #define lm_name_equal(name1, name2) \ (((name1)->ln_number == (name2)->ln_number) && \ diff --git a/fs/iomap.c b/fs/iomap.c index 3ca1a8e44135..141c3cd55a8b 100644 --- a/fs/iomap.c +++ b/fs/iomap.c @@ -846,7 +846,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, struct address_space *mapping = iocb->ki_filp->f_mapping; struct inode *inode = file_inode(iocb->ki_filp); size_t count = iov_iter_count(iter); - loff_t pos = iocb->ki_pos, end = iocb->ki_pos + count - 1, ret = 0; + loff_t pos = iocb->ki_pos, start = pos; + loff_t end = iocb->ki_pos + count - 1, ret = 0; unsigned int flags = IOMAP_DIRECT; struct blk_plug plug; struct iomap_dio *dio; @@ -887,12 +888,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, } if (mapping->nrpages) { - ret = filemap_write_and_wait_range(mapping, iocb->ki_pos, end); + ret = filemap_write_and_wait_range(mapping, start, end); if (ret) goto out_free_dio; ret = invalidate_inode_pages2_range(mapping, - iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); + start >> PAGE_SHIFT, end >> PAGE_SHIFT); WARN_ON_ONCE(ret); ret = 0; } @@ -941,6 +942,8 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, __set_current_state(TASK_RUNNING); } + ret = iomap_dio_complete(dio); + /* * Try again to invalidate clean pages which might have been cached by * non-direct readahead, or faulted in by get_user_pages() if the source @@ -949,12 +952,12 @@ iomap_dio_rw(struct kiocb *iocb, struct iov_iter *iter, * this invalidation fails, tough, the write still worked... */ if (iov_iter_rw(iter) == WRITE && mapping->nrpages) { - ret = invalidate_inode_pages2_range(mapping, - iocb->ki_pos >> PAGE_SHIFT, end >> PAGE_SHIFT); - WARN_ON_ONCE(ret); + int err = invalidate_inode_pages2_range(mapping, + start >> PAGE_SHIFT, end >> PAGE_SHIFT); + WARN_ON_ONCE(err); } - return iomap_dio_complete(dio); + return ret; out_free_dio: kfree(dio); diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c index 4348027384f5..d0ab7e56d0b4 100644 --- a/fs/ocfs2/cluster/tcp.c +++ b/fs/ocfs2/cluster/tcp.c @@ -1863,7 +1863,7 @@ static int o2net_accept_one(struct socket *sock, int *more) new_sock->type = sock->type; new_sock->ops = sock->ops; - ret = sock->ops->accept(sock, new_sock, O_NONBLOCK); + ret = sock->ops->accept(sock, new_sock, O_NONBLOCK, false); if (ret < 0) goto out; diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 1953986ee6bc..6e610a205e15 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -12,7 +12,6 @@ #include <linux/slab.h> #include <linux/cred.h> #include <linux/xattr.h> -#include <linux/sched/signal.h> #include "overlayfs.h" #include "ovl_entry.h" diff --git a/fs/timerfd.c b/fs/timerfd.c index 384fa759a563..c543cdb5f8ed 100644 --- a/fs/timerfd.c +++ b/fs/timerfd.c @@ -400,9 +400,9 @@ SYSCALL_DEFINE2(timerfd_create, int, clockid, int, flags) clockid != CLOCK_BOOTTIME_ALARM)) return -EINVAL; - if (!capable(CAP_WAKE_ALARM) && - (clockid == CLOCK_REALTIME_ALARM || - clockid == CLOCK_BOOTTIME_ALARM)) + if ((clockid == CLOCK_REALTIME_ALARM || + clockid == CLOCK_BOOTTIME_ALARM) && + !capable(CAP_WAKE_ALARM)) return -EPERM; ctx = kzalloc(sizeof(*ctx), GFP_KERNEL); @@ -449,7 +449,7 @@ static int do_timerfd_settime(int ufd, int flags, return ret; ctx = f.file->private_data; - if (!capable(CAP_WAKE_ALARM) && isalarm(ctx)) { + if (isalarm(ctx) && !capable(CAP_WAKE_ALARM)) { fdput(f); return -EPERM; } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 973607df579d..1d227b0fcf49 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -138,8 +138,6 @@ out: * userfaultfd_ctx_get - Acquires a reference to the internal userfaultfd * context. * @ctx: [in] Pointer to the userfaultfd context. - * - * Returns: In case of success, returns not zero. */ static void userfaultfd_ctx_get(struct userfaultfd_ctx *ctx) { @@ -267,6 +265,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, { struct mm_struct *mm = ctx->mm; pgd_t *pgd; + p4d_t *p4d; pud_t *pud; pmd_t *pmd, _pmd; pte_t *pte; @@ -277,7 +276,10 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) goto out; - pud = pud_offset(pgd, address); + p4d = p4d_offset(pgd, address); + if (!p4d_present(*p4d)) + goto out; + pud = pud_offset(p4d, address); if (!pud_present(*pud)) goto out; pmd = pmd_offset(pud, address); @@ -490,7 +492,7 @@ int handle_userfault(struct vm_fault *vmf, unsigned long reason) * in such case. */ down_read(&mm->mmap_sem); - ret = 0; + ret = VM_FAULT_NOPAGE; } } @@ -527,10 +529,11 @@ out: return ret; } -static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, - struct userfaultfd_wait_queue *ewq) +static void userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, + struct userfaultfd_wait_queue *ewq) { - int ret = 0; + if (WARN_ON_ONCE(current->flags & PF_EXITING)) + goto out; ewq->ctx = ctx; init_waitqueue_entry(&ewq->wq, current); @@ -547,8 +550,16 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, break; if (ACCESS_ONCE(ctx->released) || fatal_signal_pending(current)) { - ret = -1; __remove_wait_queue(&ctx->event_wqh, &ewq->wq); + if (ewq->msg.event == UFFD_EVENT_FORK) { + struct userfaultfd_ctx *new; + + new = (struct userfaultfd_ctx *) + (unsigned long) + ewq->msg.arg.reserved.reserved1; + + userfaultfd_ctx_put(new); + } break; } @@ -566,9 +577,8 @@ static int userfaultfd_event_wait_completion(struct userfaultfd_ctx *ctx, * ctx may go away after this if the userfault pseudo fd is * already released. */ - +out: userfaultfd_ctx_put(ctx); - return ret; } static void userfaultfd_event_complete(struct userfaultfd_ctx *ctx, @@ -626,7 +636,7 @@ int dup_userfaultfd(struct vm_area_struct *vma, struct list_head *fcs) return 0; } -static int dup_fctx(struct userfaultfd_fork_ctx *fctx) +static void dup_fctx(struct userfaultfd_fork_ctx *fctx) { struct userfaultfd_ctx *ctx = fctx->orig; struct userfaultfd_wait_queue ewq; @@ -636,17 +646,15 @@ static int dup_fctx(struct userfaultfd_fork_ctx *fctx) ewq.msg.event = UFFD_EVENT_FORK; ewq.msg.arg.reserved.reserved1 = (unsigned long)fctx->new; - return userfaultfd_event_wait_completion(ctx, &ewq); + userfaultfd_event_wait_completion(ctx, &ewq); } void dup_userfaultfd_complete(struct list_head *fcs) { - int ret = 0; struct userfaultfd_fork_ctx *fctx, *n; list_for_each_entry_safe(fctx, n, fcs, list) { - if (!ret) - ret = dup_fctx(fctx); + dup_fctx(fctx); list_del(&fctx->list); kfree(fctx); } @@ -689,8 +697,7 @@ void mremap_userfaultfd_complete(struct vm_userfaultfd_ctx *vm_ctx, userfaultfd_event_wait_completion(ctx, &ewq); } -void userfaultfd_remove(struct vm_area_struct *vma, - struct vm_area_struct **prev, +bool userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end) { struct mm_struct *mm = vma->vm_mm; @@ -699,13 +706,11 @@ void userfaultfd_remove(struct vm_area_struct *vma, ctx = vma->vm_userfaultfd_ctx.ctx; if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_REMOVE)) - return; + return true; userfaultfd_ctx_get(ctx); up_read(&mm->mmap_sem); - *prev = NULL; /* We wait for ACK w/o the mmap semaphore */ - msg_init(&ewq.msg); ewq.msg.event = UFFD_EVENT_REMOVE; @@ -714,7 +719,7 @@ void userfaultfd_remove(struct vm_area_struct *vma, userfaultfd_event_wait_completion(ctx, &ewq); - down_read(&mm->mmap_sem); + return false; } static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, @@ -775,34 +780,6 @@ void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) } } -void userfaultfd_exit(struct mm_struct *mm) -{ - struct vm_area_struct *vma = mm->mmap; - - /* - * We can do the vma walk without locking because the caller - * (exit_mm) knows it now has exclusive access - */ - while (vma) { - struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; - - if (ctx && (ctx->features & UFFD_FEATURE_EVENT_EXIT)) { - struct userfaultfd_wait_queue ewq; - - userfaultfd_ctx_get(ctx); - - msg_init(&ewq.msg); - ewq.msg.event = UFFD_EVENT_EXIT; - - userfaultfd_event_wait_completion(ctx, &ewq); - - ctx->features &= ~UFFD_FEATURE_EVENT_EXIT; - } - - vma = vma->vm_next; - } -} - static int userfaultfd_release(struct inode *inode, struct file *file) { struct userfaultfd_ctx *ctx = file->private_data; diff --git a/fs/xfs/kmem.c b/fs/xfs/kmem.c index 2dfdc62f795e..70a5b55e0870 100644 --- a/fs/xfs/kmem.c +++ b/fs/xfs/kmem.c @@ -25,24 +25,6 @@ #include "kmem.h" #include "xfs_message.h" -/* - * Greedy allocation. May fail and may return vmalloced memory. - */ -void * -kmem_zalloc_greedy(size_t *size, size_t minsize, size_t maxsize) -{ - void *ptr; - size_t kmsize = maxsize; - - while (!(ptr = vzalloc(kmsize))) { - if ((kmsize >>= 1) <= minsize) - kmsize = minsize; - } - if (ptr) - *size = kmsize; - return ptr; -} - void * kmem_alloc(size_t size, xfs_km_flags_t flags) { diff --git a/fs/xfs/kmem.h b/fs/xfs/kmem.h index 689f746224e7..f0fc84fcaac2 100644 --- a/fs/xfs/kmem.h +++ b/fs/xfs/kmem.h @@ -69,8 +69,6 @@ static inline void kmem_free(const void *ptr) } -extern void *kmem_zalloc_greedy(size_t *, size_t, size_t); - static inline void * kmem_zalloc(size_t size, xfs_km_flags_t flags) { diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c index a9c66d47757a..9bd104f32908 100644 --- a/fs/xfs/libxfs/xfs_bmap.c +++ b/fs/xfs/libxfs/xfs_bmap.c @@ -763,8 +763,8 @@ xfs_bmap_extents_to_btree( args.type = XFS_ALLOCTYPE_START_BNO; args.fsbno = XFS_INO_TO_FSB(mp, ip->i_ino); } else if (dfops->dop_low) { -try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; +try_another_ag: args.fsbno = *firstblock; } else { args.type = XFS_ALLOCTYPE_NEAR_BNO; @@ -790,13 +790,17 @@ try_another_ag: if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && args.fsbno == NULLFSBLOCK && args.type == XFS_ALLOCTYPE_NEAR_BNO) { - dfops->dop_low = true; + args.type = XFS_ALLOCTYPE_FIRST_AG; goto try_another_ag; } + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { + xfs_iroot_realloc(ip, -1, whichfork); + xfs_btree_del_cursor(cur, XFS_BTREE_ERROR); + return -ENOSPC; + } /* * Allocation can't fail, the space was reserved. */ - ASSERT(args.fsbno != NULLFSBLOCK); ASSERT(*firstblock == NULLFSBLOCK || args.agno >= XFS_FSB_TO_AGNO(mp, *firstblock)); *firstblock = cur->bc_private.b.firstblock = args.fsbno; @@ -4150,6 +4154,19 @@ xfs_bmapi_read( return 0; } +/* + * Add a delayed allocation extent to an inode. Blocks are reserved from the + * global pool and the extent inserted into the inode in-core extent tree. + * + * On entry, got refers to the first extent beyond the offset of the extent to + * allocate or eof is specified if no such extent exists. On return, got refers + * to the extent record that was inserted to the inode fork. + * + * Note that the allocated extent may have been merged with contiguous extents + * during insertion into the inode fork. Thus, got does not reflect the current + * state of the inode fork on return. If necessary, the caller can use lastx to + * look up the updated record in the inode fork. + */ int xfs_bmapi_reserve_delalloc( struct xfs_inode *ip, @@ -4236,13 +4253,8 @@ xfs_bmapi_reserve_delalloc( got->br_startblock = nullstartblock(indlen); got->br_blockcount = alen; got->br_state = XFS_EXT_NORM; - xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); - /* - * Update our extent pointer, given that xfs_bmap_add_extent_hole_delay - * might have merged it into one of the neighbouring ones. - */ - xfs_bmbt_get_all(xfs_iext_get_ext(ifp, *lastx), got); + xfs_bmap_add_extent_hole_delay(ip, whichfork, lastx, got); /* * Tag the inode if blocks were preallocated. Note that COW fork @@ -4254,10 +4266,6 @@ xfs_bmapi_reserve_delalloc( if (whichfork == XFS_COW_FORK && (prealloc || aoff < off || alen > len)) xfs_inode_set_cowblocks_tag(ip); - ASSERT(got->br_startoff <= aoff); - ASSERT(got->br_startoff + got->br_blockcount >= aoff + alen); - ASSERT(isnullstartblock(got->br_startblock)); - ASSERT(got->br_state == XFS_EXT_NORM); return 0; out_unreserve_blocks: diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c index f93072b58a58..fd55db479385 100644 --- a/fs/xfs/libxfs/xfs_bmap_btree.c +++ b/fs/xfs/libxfs/xfs_bmap_btree.c @@ -447,8 +447,8 @@ xfs_bmbt_alloc_block( if (args.fsbno == NULLFSBLOCK) { args.fsbno = be64_to_cpu(start->l); -try_another_ag: args.type = XFS_ALLOCTYPE_START_BNO; +try_another_ag: /* * Make sure there is sufficient room left in the AG to * complete a full tree split for an extent insert. If @@ -488,8 +488,8 @@ try_another_ag: if (xfs_sb_version_hasreflink(&cur->bc_mp->m_sb) && args.fsbno == NULLFSBLOCK && args.type == XFS_ALLOCTYPE_NEAR_BNO) { - cur->bc_private.b.dfops->dop_low = true; args.fsbno = cur->bc_private.b.firstblock; + args.type = XFS_ALLOCTYPE_FIRST_AG; goto try_another_ag; } @@ -506,7 +506,7 @@ try_another_ag: goto error0; cur->bc_private.b.dfops->dop_low = true; } - if (args.fsbno == NULLFSBLOCK) { + if (WARN_ON_ONCE(args.fsbno == NULLFSBLOCK)) { XFS_BTREE_TRACE_CURSOR(cur, XBT_EXIT); *stat = 0; return 0; diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c index bf65a9ea8642..61494295d92f 100644 --- a/fs/xfs/xfs_aops.c +++ b/fs/xfs/xfs_aops.c @@ -274,54 +274,49 @@ xfs_end_io( struct xfs_ioend *ioend = container_of(work, struct xfs_ioend, io_work); struct xfs_inode *ip = XFS_I(ioend->io_inode); + xfs_off_t offset = ioend->io_offset; + size_t size = ioend->io_size; int error = ioend->io_bio->bi_error; /* - * Set an error if the mount has shut down and proceed with end I/O - * processing so it can perform whatever cleanups are necessary. + * Just clean up the in-memory strutures if the fs has been shut down. */ - if (XFS_FORCED_SHUTDOWN(ip->i_mount)) + if (XFS_FORCED_SHUTDOWN(ip->i_mount)) { error = -EIO; + goto done; + } /* - * For a CoW extent, we need to move the mapping from the CoW fork - * to the data fork. If instead an error happened, just dump the - * new blocks. + * Clean up any COW blocks on an I/O error. */ - if (ioend->io_type == XFS_IO_COW) { - if (error) - goto done; - if (ioend->io_bio->bi_error) { - error = xfs_reflink_cancel_cow_range(ip, - ioend->io_offset, ioend->io_size); - goto done; + if (unlikely(error)) { + switch (ioend->io_type) { + case XFS_IO_COW: + xfs_reflink_cancel_cow_range(ip, offset, size, true); + break; } - error = xfs_reflink_end_cow(ip, ioend->io_offset, - ioend->io_size); - if (error) - goto done; + + goto done; } /* - * For unwritten extents we need to issue transactions to convert a - * range to normal written extens after the data I/O has finished. - * Detecting and handling completion IO errors is done individually - * for each case as different cleanup operations need to be performed - * on error. + * Success: commit the COW or unwritten blocks if needed. */ - if (ioend->io_type == XFS_IO_UNWRITTEN) { - if (error) - goto done; - error = xfs_iomap_write_unwritten(ip, ioend->io_offset, - ioend->io_size); - } else if (ioend->io_append_trans) { - error = xfs_setfilesize_ioend(ioend, error); - } else { - ASSERT(!xfs_ioend_is_append(ioend) || - ioend->io_type == XFS_IO_COW); + switch (ioend->io_type) { + case XFS_IO_COW: + error = xfs_reflink_end_cow(ip, offset, size); + break; + case XFS_IO_UNWRITTEN: + error = xfs_iomap_write_unwritten(ip, offset, size); + break; + default: + ASSERT(!xfs_ioend_is_append(ioend) || ioend->io_append_trans); + break; } done: + if (ioend->io_append_trans) + error = xfs_setfilesize_ioend(ioend, error); xfs_destroy_ioend(ioend, error); } diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c index 7234b9748c36..3531f8f72fa5 100644 --- a/fs/xfs/xfs_icache.c +++ b/fs/xfs/xfs_icache.c @@ -1608,7 +1608,7 @@ xfs_inode_free_cowblocks( xfs_ilock(ip, XFS_IOLOCK_EXCL); xfs_ilock(ip, XFS_MMAPLOCK_EXCL); - ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); + ret = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, false); xfs_iunlock(ip, XFS_MMAPLOCK_EXCL); xfs_iunlock(ip, XFS_IOLOCK_EXCL); diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c index edfa6a55b064..7eaf1ef74e3c 100644 --- a/fs/xfs/xfs_inode.c +++ b/fs/xfs/xfs_inode.c @@ -1615,7 +1615,7 @@ xfs_itruncate_extents( /* Remove all pending CoW reservations. */ error = xfs_reflink_cancel_cow_blocks(ip, &tp, first_unmap_block, - last_block); + last_block, true); if (error) goto out; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index 41662fb14e87..288ee5b840d7 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -630,6 +630,11 @@ retry: goto out_unlock; } + /* + * Flag newly allocated delalloc blocks with IOMAP_F_NEW so we punch + * them out if the write happens to fail. + */ + iomap->flags = IOMAP_F_NEW; trace_xfs_iomap_alloc(ip, offset, count, 0, &got); done: if (isnullstartblock(got.br_startblock)) @@ -1071,16 +1076,22 @@ xfs_file_iomap_end_delalloc( struct xfs_inode *ip, loff_t offset, loff_t length, - ssize_t written) + ssize_t written, + struct iomap *iomap) { struct xfs_mount *mp = ip->i_mount; xfs_fileoff_t start_fsb; xfs_fileoff_t end_fsb; int error = 0; - /* behave as if the write failed if drop writes is enabled */ - if (xfs_mp_drop_writes(mp)) + /* + * Behave as if the write failed if drop writes is enabled. Set the NEW + * flag to force delalloc cleanup. + */ + if (xfs_mp_drop_writes(mp)) { + iomap->flags |= IOMAP_F_NEW; written = 0; + } /* * start_fsb refers to the first unused block after a short write. If @@ -1094,14 +1105,14 @@ xfs_file_iomap_end_delalloc( end_fsb = XFS_B_TO_FSB(mp, offset + length); /* - * Trim back delalloc blocks if we didn't manage to write the whole - * range reserved. + * Trim delalloc blocks if they were allocated by this write and we + * didn't manage to write the whole range. * * We don't need to care about racing delalloc as we hold i_mutex * across the reserve/allocate/unreserve calls. If there are delalloc * blocks in the range, they are ours. */ - if (start_fsb < end_fsb) { + if ((iomap->flags & IOMAP_F_NEW) && start_fsb < end_fsb) { truncate_pagecache_range(VFS_I(ip), XFS_FSB_TO_B(mp, start_fsb), XFS_FSB_TO_B(mp, end_fsb) - 1); @@ -1131,7 +1142,7 @@ xfs_file_iomap_end( { if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, - length, written); + length, written, iomap); return 0; } diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c index 66e881790c17..2a6d9b1558e0 100644 --- a/fs/xfs/xfs_itable.c +++ b/fs/xfs/xfs_itable.c @@ -361,7 +361,6 @@ xfs_bulkstat( xfs_agino_t agino; /* inode # in allocation group */ xfs_agnumber_t agno; /* allocation group number */ xfs_btree_cur_t *cur; /* btree cursor for ialloc btree */ - size_t irbsize; /* size of irec buffer in bytes */ xfs_inobt_rec_incore_t *irbuf; /* start of irec buffer */ int nirbuf; /* size of irbuf */ int ubcount; /* size of user's buffer */ @@ -388,11 +387,10 @@ xfs_bulkstat( *ubcountp = 0; *done = 0; - irbuf = kmem_zalloc_greedy(&irbsize, PAGE_SIZE, PAGE_SIZE * 4); + irbuf = kmem_zalloc_large(PAGE_SIZE * 4, KM_SLEEP); if (!irbuf) return -ENOMEM; - - nirbuf = irbsize / sizeof(*irbuf); + nirbuf = (PAGE_SIZE * 4) / sizeof(*irbuf); /* * Loop over the allocation groups, starting from the last diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c index 450bde68bb75..688ebff1f663 100644 --- a/fs/xfs/xfs_mount.c +++ b/fs/xfs/xfs_mount.c @@ -513,8 +513,7 @@ STATIC void xfs_set_inoalignment(xfs_mount_t *mp) { if (xfs_sb_version_hasalign(&mp->m_sb) && - mp->m_sb.sb_inoalignmt >= - XFS_B_TO_FSBT(mp, mp->m_inode_cluster_size)) + mp->m_sb.sb_inoalignmt >= xfs_icluster_size_fsb(mp)) mp->m_inoalign_mask = mp->m_sb.sb_inoalignmt - 1; else mp->m_inoalign_mask = 0; diff --git a/fs/xfs/xfs_reflink.c b/fs/xfs/xfs_reflink.c index da6d08fb359c..4a84c5ea266d 100644 --- a/fs/xfs/xfs_reflink.c +++ b/fs/xfs/xfs_reflink.c @@ -548,14 +548,18 @@ xfs_reflink_trim_irec_to_next_cow( } /* - * Cancel all pending CoW reservations for some block range of an inode. + * Cancel CoW reservations for some block range of an inode. + * + * If cancel_real is true this function cancels all COW fork extents for the + * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_blocks( struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, - xfs_fileoff_t end_fsb) + xfs_fileoff_t end_fsb, + bool cancel_real) { struct xfs_ifork *ifp = XFS_IFORK_PTR(ip, XFS_COW_FORK); struct xfs_bmbt_irec got, del; @@ -579,7 +583,7 @@ xfs_reflink_cancel_cow_blocks( &idx, &got, &del); if (error) break; - } else { + } else if (del.br_state == XFS_EXT_UNWRITTEN || cancel_real) { xfs_trans_ijoin(*tpp, ip, 0); xfs_defer_init(&dfops, &firstfsb); @@ -621,13 +625,17 @@ xfs_reflink_cancel_cow_blocks( } /* - * Cancel all pending CoW reservations for some byte range of an inode. + * Cancel CoW reservations for some byte range of an inode. + * + * If cancel_real is true this function cancels all COW fork extents for the + * inode; if cancel_real is false, real extents are not cleared. */ int xfs_reflink_cancel_cow_range( struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t count) + xfs_off_t count, + bool cancel_real) { struct xfs_trans *tp; xfs_fileoff_t offset_fsb; @@ -653,7 +661,8 @@ xfs_reflink_cancel_cow_range( xfs_trans_ijoin(tp, ip, 0); /* Scrape out the old CoW reservations */ - error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb); + error = xfs_reflink_cancel_cow_blocks(ip, &tp, offset_fsb, end_fsb, + cancel_real); if (error) goto out_cancel; @@ -1450,7 +1459,7 @@ next: * We didn't find any shared blocks so turn off the reflink flag. * First, get rid of any leftover CoW mappings. */ - error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF); + error = xfs_reflink_cancel_cow_blocks(ip, tpp, 0, NULLFILEOFF, true); if (error) return error; diff --git a/fs/xfs/xfs_reflink.h b/fs/xfs/xfs_reflink.h index 33ac9b8db683..d29a7967f029 100644 --- a/fs/xfs/xfs_reflink.h +++ b/fs/xfs/xfs_reflink.h @@ -39,9 +39,9 @@ extern void xfs_reflink_trim_irec_to_next_cow(struct xfs_inode *ip, extern int xfs_reflink_cancel_cow_blocks(struct xfs_inode *ip, struct xfs_trans **tpp, xfs_fileoff_t offset_fsb, - xfs_fileoff_t end_fsb); + xfs_fileoff_t end_fsb, bool cancel_real); extern int xfs_reflink_cancel_cow_range(struct xfs_inode *ip, xfs_off_t offset, - xfs_off_t count); + xfs_off_t count, bool cancel_real); extern int xfs_reflink_end_cow(struct xfs_inode *ip, xfs_off_t offset, xfs_off_t count); extern int xfs_reflink_recover_cow(struct xfs_mount *mp); diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 890862f2447c..685c042a120f 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -953,7 +953,7 @@ xfs_fs_destroy_inode( XFS_STATS_INC(ip->i_mount, vn_remove); if (xfs_is_reflink_inode(ip)) { - error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF); + error = xfs_reflink_cancel_cow_range(ip, 0, NULLFILEOFF, true); if (error && !XFS_FORCED_SHUTDOWN(ip->i_mount)) xfs_warn(ip->i_mount, "Error %d while evicting CoW blocks for inode %llu.", |