diff options
Diffstat (limited to 'fs')
57 files changed, 646 insertions, 271 deletions
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index 699941e90667..511078586fa1 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -451,9 +451,9 @@ void v9fs_evict_inode(struct inode *inode) { struct v9fs_inode *v9inode = V9FS_I(inode); - truncate_inode_pages_final(inode->i_mapping); + truncate_inode_pages_final(&inode->i_data); clear_inode(inode); - filemap_fdatawrite(inode->i_mapping); + filemap_fdatawrite(&inode->i_data); v9fs_cache_inode_put_cookie(inode); /* clunk the fid stashed in writeback_fid */ diff --git a/fs/Kconfig b/fs/Kconfig index 7d1bef1999af..916d8348705b 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -46,6 +46,12 @@ config FS_DAX or if unsure, say N. Saying Y will increase the size of the kernel by about 5kB. +config FS_DAX_PMD + bool + default FS_DAX + depends on FS_DAX + depends on BROKEN + endif # BLOCK # Posix ACL utility routines diff --git a/fs/block_dev.c b/fs/block_dev.c index bb0dfb1c7af1..44d4a1e9244e 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -390,9 +390,17 @@ int bdev_read_page(struct block_device *bdev, sector_t sector, struct page *page) { const struct block_device_operations *ops = bdev->bd_disk->fops; + int result = -EOPNOTSUPP; + if (!ops->rw_page || bdev_get_integrity(bdev)) - return -EOPNOTSUPP; - return ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + return result; + + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, READ); + blk_queue_exit(bdev->bd_queue); + return result; } EXPORT_SYMBOL_GPL(bdev_read_page); @@ -421,14 +429,20 @@ int bdev_write_page(struct block_device *bdev, sector_t sector, int result; int rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE; const struct block_device_operations *ops = bdev->bd_disk->fops; + if (!ops->rw_page || bdev_get_integrity(bdev)) return -EOPNOTSUPP; + result = blk_queue_enter(bdev->bd_queue, GFP_KERNEL); + if (result) + return result; + set_page_writeback(page); result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, rw); if (result) end_page_writeback(page); else unlock_page(page); + blk_queue_exit(bdev->bd_queue); return result; } EXPORT_SYMBOL_GPL(bdev_write_page); @@ -1509,11 +1523,14 @@ static void __blkdev_put(struct block_device *bdev, fmode_t mode, int for_part) WARN_ON_ONCE(bdev->bd_holders); sync_blockdev(bdev); kill_bdev(bdev); + + bdev_write_inode(bdev); /* - * ->release can cause the queue to disappear, so flush all - * dirty data before. + * Detaching bdev inode from its wb in __destroy_inode() + * is too late: the queue which embeds its bdi (along with + * root wb) can be gone as soon as we put_disk() below. */ - bdev_write_inode(bdev); + inode_detach_wb(bdev->bd_inode); } if (bdev->bd_contains == bdev) { if (disk->fops->release) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 6dcdb2ec9211..d453d62ab0c6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -355,7 +355,7 @@ static int __resolve_indirect_ref(struct btrfs_fs_info *fs_info, index = srcu_read_lock(&fs_info->subvol_srcu); - root = btrfs_read_fs_root_no_name(fs_info, &root_key); + root = btrfs_get_fs_root(fs_info, &root_key, false); if (IS_ERR(root)) { srcu_read_unlock(&fs_info->subvol_srcu, index); ret = PTR_ERR(root); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 8c58191249cc..35489e7129a7 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3416,6 +3416,7 @@ int btrfs_cross_ref_exist(struct btrfs_trans_handle *trans, struct btrfs_block_group_cache *btrfs_lookup_block_group( struct btrfs_fs_info *info, u64 bytenr); +void btrfs_get_block_group(struct btrfs_block_group_cache *cache); void btrfs_put_block_group(struct btrfs_block_group_cache *cache); int get_block_group_index(struct btrfs_block_group_cache *cache); struct extent_buffer *btrfs_alloc_tree_block(struct btrfs_trans_handle *trans, @@ -3479,6 +3480,9 @@ int btrfs_make_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 bytes_used, u64 type, u64 chunk_objectid, u64 chunk_offset, u64 size); +struct btrfs_trans_handle *btrfs_start_trans_remove_block_group( + struct btrfs_fs_info *fs_info, + const u64 chunk_offset); int btrfs_remove_block_group(struct btrfs_trans_handle *trans, struct btrfs_root *root, u64 group_start, struct extent_map *em); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index acf3ed11cfb6..c4661db2b72a 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -124,7 +124,7 @@ static int block_group_bits(struct btrfs_block_group_cache *cache, u64 bits) return (cache->flags & bits) == bits; } -static void btrfs_get_block_group(struct btrfs_block_group_cache *cache) +void btrfs_get_block_group(struct btrfs_block_group_cache *cache) { atomic_inc(&cache->count); } @@ -5915,19 +5915,6 @@ static int update_block_group(struct btrfs_trans_handle *trans, set_extent_dirty(info->pinned_extents, bytenr, bytenr + num_bytes - 1, GFP_NOFS | __GFP_NOFAIL); - /* - * No longer have used bytes in this block group, queue - * it for deletion. - */ - if (old_val == 0) { - spin_lock(&info->unused_bgs_lock); - if (list_empty(&cache->bg_list)) { - btrfs_get_block_group(cache); - list_add_tail(&cache->bg_list, - &info->unused_bgs); - } - spin_unlock(&info->unused_bgs_lock); - } } spin_lock(&trans->transaction->dirty_bgs_lock); @@ -5939,6 +5926,22 @@ static int update_block_group(struct btrfs_trans_handle *trans, } spin_unlock(&trans->transaction->dirty_bgs_lock); + /* + * No longer have used bytes in this block group, queue it for + * deletion. We do this after adding the block group to the + * dirty list to avoid races between cleaner kthread and space + * cache writeout. + */ + if (!alloc && old_val == 0) { + spin_lock(&info->unused_bgs_lock); + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &info->unused_bgs); + } + spin_unlock(&info->unused_bgs_lock); + } + btrfs_put_block_group(cache); total -= num_bytes; bytenr += num_bytes; @@ -8105,21 +8108,47 @@ reada: } /* - * TODO: Modify related function to add related node/leaf to dirty_extent_root, - * for later qgroup accounting. - * - * Current, this function does nothing. + * These may not be seen by the usual inc/dec ref code so we have to + * add them here. */ +static int record_one_subtree_extent(struct btrfs_trans_handle *trans, + struct btrfs_root *root, u64 bytenr, + u64 num_bytes) +{ + struct btrfs_qgroup_extent_record *qrecord; + struct btrfs_delayed_ref_root *delayed_refs; + + qrecord = kmalloc(sizeof(*qrecord), GFP_NOFS); + if (!qrecord) + return -ENOMEM; + + qrecord->bytenr = bytenr; + qrecord->num_bytes = num_bytes; + qrecord->old_roots = NULL; + + delayed_refs = &trans->transaction->delayed_refs; + spin_lock(&delayed_refs->lock); + if (btrfs_qgroup_insert_dirty_extent(delayed_refs, qrecord)) + kfree(qrecord); + spin_unlock(&delayed_refs->lock); + + return 0; +} + static int account_leaf_items(struct btrfs_trans_handle *trans, struct btrfs_root *root, struct extent_buffer *eb) { int nr = btrfs_header_nritems(eb); - int i, extent_type; + int i, extent_type, ret; struct btrfs_key key; struct btrfs_file_extent_item *fi; u64 bytenr, num_bytes; + /* We can be called directly from walk_up_proc() */ + if (!root->fs_info->quota_enabled) + return 0; + for (i = 0; i < nr; i++) { btrfs_item_key_to_cpu(eb, &key, i); @@ -8138,6 +8167,10 @@ static int account_leaf_items(struct btrfs_trans_handle *trans, continue; num_bytes = btrfs_file_extent_disk_num_bytes(eb, fi); + + ret = record_one_subtree_extent(trans, root, bytenr, num_bytes); + if (ret) + return ret; } return 0; } @@ -8206,8 +8239,6 @@ static int adjust_slots_upwards(struct btrfs_root *root, /* * root_eb is the subtree root and is locked before this function is called. - * TODO: Modify this function to mark all (including complete shared node) - * to dirty_extent_root to allow it get accounted in qgroup. */ static int account_shared_subtree(struct btrfs_trans_handle *trans, struct btrfs_root *root, @@ -8285,6 +8316,11 @@ walk_down: btrfs_tree_read_lock(eb); btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); path->locks[level] = BTRFS_READ_LOCK_BLOCKING; + + ret = record_one_subtree_extent(trans, root, child_bytenr, + root->nodesize); + if (ret) + goto out; } if (level == 0) { @@ -10256,6 +10292,47 @@ out: return ret; } +struct btrfs_trans_handle * +btrfs_start_trans_remove_block_group(struct btrfs_fs_info *fs_info, + const u64 chunk_offset) +{ + struct extent_map_tree *em_tree = &fs_info->mapping_tree.map_tree; + struct extent_map *em; + struct map_lookup *map; + unsigned int num_items; + + read_lock(&em_tree->lock); + em = lookup_extent_mapping(em_tree, chunk_offset, 1); + read_unlock(&em_tree->lock); + ASSERT(em && em->start == chunk_offset); + + /* + * We need to reserve 3 + N units from the metadata space info in order + * to remove a block group (done at btrfs_remove_chunk() and at + * btrfs_remove_block_group()), which are used for: + * + * 1 unit for adding the free space inode's orphan (located in the tree + * of tree roots). + * 1 unit for deleting the block group item (located in the extent + * tree). + * 1 unit for deleting the free space item (located in tree of tree + * roots). + * N units for deleting N device extent items corresponding to each + * stripe (located in the device tree). + * + * In order to remove a block group we also need to reserve units in the + * system space info in order to update the chunk tree (update one or + * more device items and remove one chunk item), but this is done at + * btrfs_remove_chunk() through a call to check_system_chunk(). + */ + map = (struct map_lookup *)em->bdev; + num_items = 3 + map->num_stripes; + free_extent_map(em); + + return btrfs_start_transaction_fallback_global_rsv(fs_info->extent_root, + num_items, 1); +} + /* * Process the unused_bgs list and remove any that don't have any allocated * space inside of them. @@ -10322,8 +10399,8 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * Want to do this before we do anything else so we can recover * properly if we fail to join the transaction. */ - /* 1 for btrfs_orphan_reserve_metadata() */ - trans = btrfs_start_transaction(root, 1); + trans = btrfs_start_trans_remove_block_group(fs_info, + block_group->key.objectid); if (IS_ERR(trans)) { btrfs_dec_block_group_ro(root, block_group); ret = PTR_ERR(trans); @@ -10403,11 +10480,15 @@ void btrfs_delete_unused_bgs(struct btrfs_fs_info *fs_info) * until transaction commit to do the actual discard. */ if (trimming) { - WARN_ON(!list_empty(&block_group->bg_list)); - spin_lock(&trans->transaction->deleted_bgs_lock); + spin_lock(&fs_info->unused_bgs_lock); + /* + * A concurrent scrub might have added us to the list + * fs_info->unused_bgs, so use a list_move operation + * to add the block group to the deleted_bgs list. + */ list_move(&block_group->bg_list, &trans->transaction->deleted_bgs); - spin_unlock(&trans->transaction->deleted_bgs_lock); + spin_unlock(&fs_info->unused_bgs_lock); btrfs_get_block_group(block_group); } end_trans: diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index 977e715f0bf2..0f09526aa7d9 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -1291,7 +1291,8 @@ out: * on error we return an unlocked page and the error value * on success we return a locked page and 0 */ -static int prepare_uptodate_page(struct page *page, u64 pos, +static int prepare_uptodate_page(struct inode *inode, + struct page *page, u64 pos, bool force_uptodate) { int ret = 0; @@ -1306,6 +1307,10 @@ static int prepare_uptodate_page(struct page *page, u64 pos, unlock_page(page); return -EIO; } + if (page->mapping != inode->i_mapping) { + unlock_page(page); + return -EAGAIN; + } } return 0; } @@ -1324,6 +1329,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages, int faili; for (i = 0; i < num_pages; i++) { +again: pages[i] = find_or_create_page(inode->i_mapping, index + i, mask | __GFP_WRITE); if (!pages[i]) { @@ -1333,13 +1339,17 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages, } if (i == 0) - err = prepare_uptodate_page(pages[i], pos, + err = prepare_uptodate_page(inode, pages[i], pos, force_uptodate); - if (i == num_pages - 1) - err = prepare_uptodate_page(pages[i], + if (!err && i == num_pages - 1) + err = prepare_uptodate_page(inode, pages[i], pos + write_bytes, false); if (err) { page_cache_release(pages[i]); + if (err == -EAGAIN) { + err = 0; + goto again; + } faili = i - 1; goto fail; } @@ -1882,8 +1892,13 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) struct btrfs_log_ctx ctx; int ret = 0; bool full_sync = 0; - const u64 len = end - start + 1; + u64 len; + /* + * The range length can be represented by u64, we have to do the typecasts + * to avoid signed overflow if it's [0, LLONG_MAX] eg. from fsync() + */ + len = (u64)end - (u64)start + 1; trace_btrfs_sync_file(file, datasync); /* @@ -2071,8 +2086,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync) } } if (!full_sync) { - ret = btrfs_wait_ordered_range(inode, start, - end - start + 1); + ret = btrfs_wait_ordered_range(inode, start, len); if (ret) { btrfs_end_transaction(trans, root); goto out; diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index 85a1f8621b51..cfe99bec49de 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -891,7 +891,7 @@ out: spin_unlock(&block_group->lock); ret = 0; - btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuild it now", + btrfs_warn(fs_info, "failed to load free space cache for block group %llu, rebuilding it now", block_group->key.objectid); } @@ -2972,7 +2972,7 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, u64 cont1_bytes, u64 min_bytes) { struct btrfs_free_space_ctl *ctl = block_group->free_space_ctl; - struct btrfs_free_space *entry; + struct btrfs_free_space *entry = NULL; int ret = -ENOSPC; u64 bitmap_offset = offset_to_bitmap(ctl, offset); @@ -2983,8 +2983,10 @@ setup_cluster_bitmap(struct btrfs_block_group_cache *block_group, * The bitmap that covers offset won't be in the list unless offset * is just its start offset. */ - entry = list_first_entry(bitmaps, struct btrfs_free_space, list); - if (entry->offset != bitmap_offset) { + if (!list_empty(bitmaps)) + entry = list_first_entry(bitmaps, struct btrfs_free_space, list); + + if (!entry || entry->offset != bitmap_offset) { entry = tree_search_offset(ctl, bitmap_offset, 1, 0); if (entry && list_empty(&entry->list)) list_add(&entry->list, bitmaps); diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 994490d5fa64..a70c5790f8f5 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -4046,9 +4046,7 @@ int btrfs_unlink_inode(struct btrfs_trans_handle *trans, */ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) { - struct btrfs_trans_handle *trans; struct btrfs_root *root = BTRFS_I(dir)->root; - int ret; /* * 1 for the possible orphan item @@ -4057,27 +4055,7 @@ static struct btrfs_trans_handle *__unlink_start_trans(struct inode *dir) * 1 for the inode ref * 1 for the inode */ - trans = btrfs_start_transaction(root, 5); - if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) - return trans; - - if (PTR_ERR(trans) == -ENOSPC) { - u64 num_bytes = btrfs_calc_trans_metadata_size(root, 5); - - trans = btrfs_start_transaction(root, 0); - if (IS_ERR(trans)) - return trans; - ret = btrfs_cond_migrate_bytes(root->fs_info, - &root->fs_info->trans_block_rsv, - num_bytes, 5); - if (ret) { - btrfs_end_transaction(trans, root); - return ERR_PTR(ret); - } - trans->block_rsv = &root->fs_info->trans_block_rsv; - trans->bytes_reserved = num_bytes; - } - return trans; + return btrfs_start_transaction_fallback_global_rsv(root, 5, 5); } static int btrfs_unlink(struct inode *dir, struct dentry *dentry) diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 93e12c18ffd7..5279fdae7142 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -993,9 +993,10 @@ int btrfs_quota_disable(struct btrfs_trans_handle *trans, mutex_lock(&fs_info->qgroup_ioctl_lock); if (!fs_info->quota_root) goto out; - spin_lock(&fs_info->qgroup_lock); fs_info->quota_enabled = 0; fs_info->pending_quota_state = 0; + btrfs_qgroup_wait_for_completion(fs_info); + spin_lock(&fs_info->qgroup_lock); quota_root = fs_info->quota_root; fs_info->quota_root = NULL; fs_info->qgroup_flags &= ~BTRFS_QGROUP_STATUS_FLAG_ON; @@ -1461,6 +1462,8 @@ struct btrfs_qgroup_extent_record struct btrfs_qgroup_extent_record *entry; u64 bytenr = record->bytenr; + assert_spin_locked(&delayed_refs->lock); + while (*p) { parent_node = *p; entry = rb_entry(parent_node, struct btrfs_qgroup_extent_record, diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 2907a77fb1f6..b091d94ceef6 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -3432,7 +3432,9 @@ out: static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, struct btrfs_device *scrub_dev, u64 chunk_offset, u64 length, - u64 dev_offset, int is_dev_replace) + u64 dev_offset, + struct btrfs_block_group_cache *cache, + int is_dev_replace) { struct btrfs_mapping_tree *map_tree = &sctx->dev_root->fs_info->mapping_tree; @@ -3445,8 +3447,18 @@ static noinline_for_stack int scrub_chunk(struct scrub_ctx *sctx, em = lookup_extent_mapping(&map_tree->map_tree, chunk_offset, 1); read_unlock(&map_tree->map_tree.lock); - if (!em) - return -EINVAL; + if (!em) { + /* + * Might have been an unused block group deleted by the cleaner + * kthread or relocation. + */ + spin_lock(&cache->lock); + if (!cache->removed) + ret = -EINVAL; + spin_unlock(&cache->lock); + + return ret; + } map = (struct map_lookup *)em->bdev; if (em->start != chunk_offset) @@ -3483,6 +3495,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, u64 length; u64 chunk_offset; int ret = 0; + int ro_set; int slot; struct extent_buffer *l; struct btrfs_key key; @@ -3568,7 +3581,21 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, scrub_pause_on(fs_info); ret = btrfs_inc_block_group_ro(root, cache); scrub_pause_off(fs_info); - if (ret) { + + if (ret == 0) { + ro_set = 1; + } else if (ret == -ENOSPC) { + /* + * btrfs_inc_block_group_ro return -ENOSPC when it + * failed in creating new chunk for metadata. + * It is not a problem for scrub/replace, because + * metadata are always cowed, and our scrub paused + * commit_transactions. + */ + ro_set = 0; + } else { + btrfs_warn(fs_info, "failed setting block group ro, ret=%d\n", + ret); btrfs_put_block_group(cache); break; } @@ -3577,7 +3604,7 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, dev_replace->cursor_left = found_key.offset; dev_replace->item_needs_writeback = 1; ret = scrub_chunk(sctx, scrub_dev, chunk_offset, length, - found_key.offset, is_dev_replace); + found_key.offset, cache, is_dev_replace); /* * flush, submit all pending read and write bios, afterwards @@ -3611,7 +3638,30 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx, scrub_pause_off(fs_info); - btrfs_dec_block_group_ro(root, cache); + if (ro_set) + btrfs_dec_block_group_ro(root, cache); + + /* + * We might have prevented the cleaner kthread from deleting + * this block group if it was already unused because we raced + * and set it to RO mode first. So add it back to the unused + * list, otherwise it might not ever be deleted unless a manual + * balance is triggered or it becomes used and unused again. + */ + spin_lock(&cache->lock); + if (!cache->removed && !cache->ro && cache->reserved == 0 && + btrfs_block_group_used(&cache->item) == 0) { + spin_unlock(&cache->lock); + spin_lock(&fs_info->unused_bgs_lock); + if (list_empty(&cache->bg_list)) { + btrfs_get_block_group(cache); + list_add_tail(&cache->bg_list, + &fs_info->unused_bgs); + } + spin_unlock(&fs_info->unused_bgs_lock); + } else { + spin_unlock(&cache->lock); + } btrfs_put_block_group(cache); if (ret) diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c index c8c3d70c31ff..8b72b005bfb9 100644 --- a/fs/btrfs/tests/free-space-tests.c +++ b/fs/btrfs/tests/free-space-tests.c @@ -898,8 +898,10 @@ int btrfs_test_free_space_cache(void) } root = btrfs_alloc_dummy_root(); - if (!root) + if (IS_ERR(root)) { + ret = PTR_ERR(root); goto out; + } root->fs_info = btrfs_alloc_dummy_fs_info(); if (!root->fs_info) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 418c6a2ad7d8..be8eae80ff65 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -274,7 +274,6 @@ loop: cur_trans->num_dirty_bgs = 0; spin_lock_init(&cur_trans->dirty_bgs_lock); INIT_LIST_HEAD(&cur_trans->deleted_bgs); - spin_lock_init(&cur_trans->deleted_bgs_lock); spin_lock_init(&cur_trans->dropped_roots_lock); list_add_tail(&cur_trans->list, &fs_info->trans_list); extent_io_tree_init(&cur_trans->dirty_pages, @@ -592,6 +591,38 @@ struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, return start_transaction(root, num_items, TRANS_START, BTRFS_RESERVE_FLUSH_ALL); } +struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( + struct btrfs_root *root, + unsigned int num_items, + int min_factor) +{ + struct btrfs_trans_handle *trans; + u64 num_bytes; + int ret; + + trans = btrfs_start_transaction(root, num_items); + if (!IS_ERR(trans) || PTR_ERR(trans) != -ENOSPC) + return trans; + + trans = btrfs_start_transaction(root, 0); + if (IS_ERR(trans)) + return trans; + + num_bytes = btrfs_calc_trans_metadata_size(root, num_items); + ret = btrfs_cond_migrate_bytes(root->fs_info, + &root->fs_info->trans_block_rsv, + num_bytes, + min_factor); + if (ret) { + btrfs_end_transaction(trans, root); + return ERR_PTR(ret); + } + + trans->block_rsv = &root->fs_info->trans_block_rsv; + trans->bytes_reserved = num_bytes; + + return trans; +} struct btrfs_trans_handle *btrfs_start_transaction_lflush( struct btrfs_root *root, diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h index b05b2f64d913..64c8221b6165 100644 --- a/fs/btrfs/transaction.h +++ b/fs/btrfs/transaction.h @@ -77,8 +77,8 @@ struct btrfs_transaction { */ struct mutex cache_write_mutex; spinlock_t dirty_bgs_lock; + /* Protected by spin lock fs_info->unused_bgs_lock. */ struct list_head deleted_bgs; - spinlock_t deleted_bgs_lock; spinlock_t dropped_roots_lock; struct btrfs_delayed_ref_root delayed_refs; int aborted; @@ -185,6 +185,10 @@ int btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_root *root); struct btrfs_trans_handle *btrfs_start_transaction(struct btrfs_root *root, unsigned int num_items); +struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv( + struct btrfs_root *root, + unsigned int num_items, + int min_factor); struct btrfs_trans_handle *btrfs_start_transaction_lflush( struct btrfs_root *root, unsigned int num_items); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a6df8fdc1312..a23399e8e3ab 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1973,8 +1973,7 @@ void btrfs_rm_dev_replace_remove_srcdev(struct btrfs_fs_info *fs_info, if (srcdev->writeable) { fs_devices->rw_devices--; /* zero out the old super if it is writable */ - btrfs_scratch_superblocks(srcdev->bdev, - rcu_str_deref(srcdev->name)); + btrfs_scratch_superblocks(srcdev->bdev, srcdev->name->str); } if (srcdev->bdev) @@ -2024,8 +2023,7 @@ void btrfs_destroy_dev_replace_tgtdev(struct btrfs_fs_info *fs_info, btrfs_sysfs_rm_device_link(fs_info->fs_devices, tgtdev); if (tgtdev->bdev) { - btrfs_scratch_superblocks(tgtdev->bdev, - rcu_str_deref(tgtdev->name)); + btrfs_scratch_superblocks(tgtdev->bdev, tgtdev->name->str); fs_info->fs_devices->open_devices--; } fs_info->fs_devices->num_devices--; @@ -2853,7 +2851,8 @@ static int btrfs_relocate_chunk(struct btrfs_root *root, u64 chunk_offset) if (ret) return ret; - trans = btrfs_start_transaction(root, 0); + trans = btrfs_start_trans_remove_block_group(root->fs_info, + chunk_offset); if (IS_ERR(trans)) { ret = PTR_ERR(trans); btrfs_std_error(root->fs_info, ret, NULL); @@ -3123,7 +3122,7 @@ static int chunk_profiles_filter(u64 chunk_type, return 1; } -static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, +static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, struct btrfs_balance_args *bargs) { struct btrfs_block_group_cache *cache; @@ -3156,7 +3155,7 @@ static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, return ret; } -static int chunk_usage_range_filter(struct btrfs_fs_info *fs_info, +static int chunk_usage_filter(struct btrfs_fs_info *fs_info, u64 chunk_offset, struct btrfs_balance_args *bargs) { struct btrfs_block_group_cache *cache; @@ -3549,12 +3548,11 @@ again: ret = btrfs_force_chunk_alloc(trans, chunk_root, BTRFS_BLOCK_GROUP_DATA); + btrfs_end_transaction(trans, chunk_root); if (ret < 0) { mutex_unlock(&fs_info->delete_unused_bgs_mutex); goto error; } - - btrfs_end_transaction(trans, chunk_root); chunk_reserved = 1; } diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ec5712372732..d5c84f6b1353 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -382,7 +382,7 @@ struct map_lookup { #define BTRFS_BALANCE_ARGS_LIMIT (1ULL << 5) #define BTRFS_BALANCE_ARGS_LIMIT_RANGE (1ULL << 6) #define BTRFS_BALANCE_ARGS_STRIPES_RANGE (1ULL << 7) -#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 8) +#define BTRFS_BALANCE_ARGS_USAGE_RANGE (1ULL << 10) #define BTRFS_BALANCE_ARGS_MASK \ (BTRFS_BALANCE_ARGS_PROFILES | \ diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index 7a6b02f72787..c0f3da3926a0 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -879,7 +879,7 @@ int cachefiles_write_page(struct fscache_storage *op, struct page *page) loff_t pos, eof; size_t len; void *data; - int ret; + int ret = -ENOBUFS; ASSERT(op != NULL); ASSERT(page != NULL); diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 6b66dd5d1540..a329f5ba35aa 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1831,11 +1831,11 @@ cifs_invalidate_mapping(struct inode *inode) * @word: long word containing the bit lock */ static int -cifs_wait_bit_killable(struct wait_bit_key *key) +cifs_wait_bit_killable(struct wait_bit_key *key, int mode) { - if (fatal_signal_pending(current)) - return -ERESTARTSYS; freezable_schedule_unsafe(); + if (signal_pending_state(mode, current)) + return -ERESTARTSYS; return 0; } diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c index c81ce7f200a6..a7a1b218f308 100644 --- a/fs/configfs/dir.c +++ b/fs/configfs/dir.c @@ -1636,6 +1636,116 @@ const struct file_operations configfs_dir_operations = { .iterate = configfs_readdir, }; +/** + * configfs_register_group - creates a parent-child relation between two groups + * @parent_group: parent group + * @group: child group + * + * link groups, creates dentry for the child and attaches it to the + * parent dentry. + * + * Return: 0 on success, negative errno code on error + */ +int configfs_register_group(struct config_group *parent_group, + struct config_group *group) +{ + struct configfs_subsystem *subsys = parent_group->cg_subsys; + struct dentry *parent; + int ret; + + mutex_lock(&subsys->su_mutex); + link_group(parent_group, group); + mutex_unlock(&subsys->su_mutex); + + parent = parent_group->cg_item.ci_dentry; + + mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT); + ret = create_default_group(parent_group, group); + if (!ret) { + spin_lock(&configfs_dirent_lock); + configfs_dir_set_ready(group->cg_item.ci_dentry->d_fsdata); + spin_unlock(&configfs_dirent_lock); + } + mutex_unlock(&d_inode(parent)->i_mutex); + return ret; +} +EXPORT_SYMBOL(configfs_register_group); + +/** + * configfs_unregister_group() - unregisters a child group from its parent + * @group: parent group to be unregistered + * + * Undoes configfs_register_group() + */ +void configfs_unregister_group(struct config_group *group) +{ + struct configfs_subsystem *subsys = group->cg_subsys; + struct dentry *dentry = group->cg_item.ci_dentry; + struct dentry *parent = group->cg_item.ci_parent->ci_dentry; + + mutex_lock_nested(&d_inode(parent)->i_mutex, I_MUTEX_PARENT); + spin_lock(&configfs_dirent_lock); + configfs_detach_prep(dentry, NULL); + spin_unlock(&configfs_dirent_lock); + + configfs_detach_group(&group->cg_item); + d_inode(dentry)->i_flags |= S_DEAD; + dont_mount(dentry); + d_delete(dentry); + mutex_unlock(&d_inode(parent)->i_mutex); + + dput(dentry); + + mutex_lock(&subsys->su_mutex); + unlink_group(group); + mutex_unlock(&subsys->su_mutex); +} +EXPORT_SYMBOL(configfs_unregister_group); + +/** + * configfs_register_default_group() - allocates and registers a child group + * @parent_group: parent group + * @name: child group name + * @item_type: child item type description + * + * boilerplate to allocate and register a child group with its parent. We need + * kzalloc'ed memory because child's default_group is initially empty. + * + * Return: allocated config group or ERR_PTR() on error + */ +struct config_group * +configfs_register_default_group(struct config_group *parent_group, + const char *name, + struct config_item_type *item_type) +{ + int ret; + struct config_group *group; + + group = kzalloc(sizeof(*group), GFP_KERNEL); + if (!group) + return ERR_PTR(-ENOMEM); + config_group_init_type_name(group, name, item_type); + + ret = configfs_register_group(parent_group, group); + if (ret) { + kfree(group); + return ERR_PTR(ret); + } + return group; +} +EXPORT_SYMBOL(configfs_register_default_group); + +/** + * configfs_unregister_default_group() - unregisters and frees a child group + * @group: the group to act on + */ +void configfs_unregister_default_group(struct config_group *group) +{ + configfs_unregister_group(group); + kfree(group); +} +EXPORT_SYMBOL(configfs_unregister_default_group); + int configfs_register_subsystem(struct configfs_subsystem *subsys) { int err; @@ -541,6 +541,10 @@ int __dax_pmd_fault(struct vm_area_struct *vma, unsigned long address, unsigned long pfn; int result = 0; + /* dax pmd mappings are broken wrt gup and fork */ + if (!IS_ENABLED(CONFIG_FS_DAX_PMD)) + return VM_FAULT_FALLBACK; + /* Fall back to PTEs if we're going to COW */ if (write && !(vma->vm_flags & VM_SHARED)) return VM_FAULT_FALLBACK; diff --git a/fs/direct-io.c b/fs/direct-io.c index cb5337d8c273..602e8441bc0f 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -1169,6 +1169,16 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, } } + /* Once we sampled i_size check for reads beyond EOF */ + dio->i_size = i_size_read(inode); + if (iov_iter_rw(iter) == READ && offset >= dio->i_size) { + if (dio->flags & DIO_LOCKING) + mutex_unlock(&inode->i_mutex); + kmem_cache_free(dio_cache, dio); + retval = 0; + goto out; + } + /* * For file extending writes updating i_size before data writeouts * complete can expose uninitialized blocks in dumb filesystems. @@ -1222,7 +1232,6 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, sdio.next_block_for_io = -1; dio->iocb = iocb; - dio->i_size = i_size_read(inode); spin_lock_init(&dio->bio_lock); dio->refcount = 1; diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c index 87e9d796cf7d..3a37bd3f9637 100644 --- a/fs/dlm/lowcomms.c +++ b/fs/dlm/lowcomms.c @@ -421,7 +421,7 @@ static void lowcomms_write_space(struct sock *sk) if (test_and_clear_bit(CF_APP_LIMITED, &con->flags)) { con->sock->sk->sk_write_pending--; - clear_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags); + clear_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags); } if (!test_and_set_bit(CF_WRITE_PENDING, &con->flags)) @@ -1448,7 +1448,7 @@ static void send_to_sock(struct connection *con) msg_flags); if (ret == -EAGAIN || ret == 0) { if (ret == -EAGAIN && - test_bit(SOCK_ASYNC_NOSPACE, &con->sock->flags) && + test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) && !test_and_set_bit(CF_APP_LIMITED, &con->flags)) { /* Notify TCP that we're limited by the * application window size. diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c index 73c64daa0f55..60f03b78914e 100644 --- a/fs/exofs/inode.c +++ b/fs/exofs/inode.c @@ -592,10 +592,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) } unlock_page(page); } - if (PageDirty(page) || PageWriteback(page)) - *uptodate = true; - else - *uptodate = PageUptodate(page); + *uptodate = PageUptodate(page); EXOFS_DBGMSG2("index=0x%lx uptodate=%d\n", index, *uptodate); return page; } else { diff --git a/fs/ext2/super.c b/fs/ext2/super.c index 3a71cea68420..748d35afc902 100644 --- a/fs/ext2/super.c +++ b/fs/ext2/super.c @@ -569,6 +569,8 @@ static int parse_options(char *options, struct super_block *sb) /* Fall through */ case Opt_dax: #ifdef CONFIG_FS_DAX + ext2_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); set_opt(sbi->s_mount_opt, DAX); #else ext2_msg(sb, KERN_INFO, "dax option not supported"); diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c index af06830bfc00..1a0835073663 100644 --- a/fs/ext4/crypto.c +++ b/fs/ext4/crypto.c @@ -389,7 +389,7 @@ int ext4_encrypted_zeroout(struct inode *inode, struct ext4_extent *ex) struct ext4_crypto_ctx *ctx; struct page *ciphertext_page = NULL; struct bio *bio; - ext4_lblk_t lblk = ex->ee_block; + ext4_lblk_t lblk = le32_to_cpu(ex->ee_block); ext4_fsblk_t pblk = ext4_ext_pblock(ex); unsigned int len = ext4_ext_get_actual_len(ex); int ret, err = 0; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 750063f7a50c..cc7ca4e87144 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -26,6 +26,7 @@ #include <linux/seqlock.h> #include <linux/mutex.h> #include <linux/timer.h> +#include <linux/version.h> #include <linux/wait.h> #include <linux/blockgroup_lock.h> #include <linux/percpu_counter.h> @@ -727,19 +728,55 @@ struct move_extent { <= (EXT4_GOOD_OLD_INODE_SIZE + \ (einode)->i_extra_isize)) \ +/* + * We use an encoding that preserves the times for extra epoch "00": + * + * extra msb of adjust for signed + * epoch 32-bit 32-bit tv_sec to + * bits time decoded 64-bit tv_sec 64-bit tv_sec valid time range + * 0 0 1 -0x80000000..-0x00000001 0x000000000 1901-12-13..1969-12-31 + * 0 0 0 0x000000000..0x07fffffff 0x000000000 1970-01-01..2038-01-19 + * 0 1 1 0x080000000..0x0ffffffff 0x100000000 2038-01-19..2106-02-07 + * 0 1 0 0x100000000..0x17fffffff 0x100000000 2106-02-07..2174-02-25 + * 1 0 1 0x180000000..0x1ffffffff 0x200000000 2174-02-25..2242-03-16 + * 1 0 0 0x200000000..0x27fffffff 0x200000000 2242-03-16..2310-04-04 + * 1 1 1 0x280000000..0x2ffffffff 0x300000000 2310-04-04..2378-04-22 + * 1 1 0 0x300000000..0x37fffffff 0x300000000 2378-04-22..2446-05-10 + * + * Note that previous versions of the kernel on 64-bit systems would + * incorrectly use extra epoch bits 1,1 for dates between 1901 and + * 1970. e2fsck will correct this, assuming that it is run on the + * affected filesystem before 2242. + */ + static inline __le32 ext4_encode_extra_time(struct timespec *time) { - return cpu_to_le32((sizeof(time->tv_sec) > 4 ? - (time->tv_sec >> 32) & EXT4_EPOCH_MASK : 0) | - ((time->tv_nsec << EXT4_EPOCH_BITS) & EXT4_NSEC_MASK)); + u32 extra = sizeof(time->tv_sec) > 4 ? + ((time->tv_sec - (s32)time->tv_sec) >> 32) & EXT4_EPOCH_MASK : 0; + return cpu_to_le32(extra | (time->tv_nsec << EXT4_EPOCH_BITS)); } static inline void ext4_decode_extra_time(struct timespec *time, __le32 extra) { - if (sizeof(time->tv_sec) > 4) - time->tv_sec |= (__u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) - << 32; - time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; + if (unlikely(sizeof(time->tv_sec) > 4 && + (extra & cpu_to_le32(EXT4_EPOCH_MASK)))) { +#if LINUX_VERSION_CODE < KERNEL_VERSION(4,20,0) + /* Handle legacy encoding of pre-1970 dates with epoch + * bits 1,1. We assume that by kernel version 4.20, + * everyone will have run fsck over the affected + * filesystems to correct the problem. (This + * backwards compatibility may be removed before this + * time, at the discretion of the ext4 developers.) + */ + u64 extra_bits = le32_to_cpu(extra) & EXT4_EPOCH_MASK; + if (extra_bits == 3 && ((time->tv_sec) & 0x80000000) != 0) + extra_bits = 0; + time->tv_sec += extra_bits << 32; +#else + time->tv_sec += (u64)(le32_to_cpu(extra) & EXT4_EPOCH_MASK) << 32; +#endif + } + time->tv_nsec = (le32_to_cpu(extra) & EXT4_NSEC_MASK) >> EXT4_EPOCH_BITS; } #define EXT4_INODE_SET_XTIME(xtime, inode, raw_inode) \ diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 753f4e68b820..c9ab67da6e5a 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1664,8 +1664,12 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, } sbi->s_jquota_fmt = m->mount_opt; #endif -#ifndef CONFIG_FS_DAX } else if (token == Opt_dax) { +#ifdef CONFIG_FS_DAX + ext4_msg(sb, KERN_WARNING, + "DAX enabled. Warning: EXPERIMENTAL, use at your own risk"); + sbi->s_mount_opt |= m->mount_opt; +#else ext4_msg(sb, KERN_INFO, "dax option not supported"); return -1; #endif diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c index abe2401ce405..e8e7af62ac95 100644 --- a/fs/ext4/symlink.c +++ b/fs/ext4/symlink.c @@ -52,7 +52,7 @@ static const char *ext4_encrypted_follow_link(struct dentry *dentry, void **cook /* Symlink is encrypted */ sd = (struct ext4_encrypted_symlink_data *)caddr; cstr.name = sd->encrypted_path; - cstr.len = le32_to_cpu(sd->len); + cstr.len = le16_to_cpu(sd->len); if ((cstr.len + sizeof(struct ext4_encrypted_symlink_data) - 1) > max_size) { diff --git a/fs/ext4/sysfs.c b/fs/ext4/sysfs.c index 1b57c72f4a00..1420a3c614af 100644 --- a/fs/ext4/sysfs.c +++ b/fs/ext4/sysfs.c @@ -358,7 +358,7 @@ static int name##_open(struct inode *inode, struct file *file) \ return single_open(file, ext4_seq_##name##_show, PDE_DATA(inode)); \ } \ \ -const struct file_operations ext4_seq_##name##_fops = { \ +static const struct file_operations ext4_seq_##name##_fops = { \ .owner = THIS_MODULE, \ .open = name##_open, \ .read = seq_read, \ diff --git a/fs/fat/dir.c b/fs/fat/dir.c index 4afc4d9d2e41..8b2127ffb226 100644 --- a/fs/fat/dir.c +++ b/fs/fat/dir.c @@ -610,9 +610,9 @@ parse_record: int status = fat_parse_long(inode, &cpos, &bh, &de, &unicode, &nr_slots); if (status < 0) { - ctx->pos = cpos; + bh = NULL; ret = status; - goto out; + goto end_of_dir; } else if (status == PARSE_INVALID) goto record_end; else if (status == PARSE_NOT_LONGNAME) @@ -654,8 +654,9 @@ parse_record: fill_len = short_len; start_filldir: - if (!fake_offset) - ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); + ctx->pos = cpos - (nr_slots + 1) * sizeof(struct msdos_dir_entry); + if (fake_offset && ctx->pos < 2) + ctx->pos = 2; if (!memcmp(de->name, MSDOS_DOT, MSDOS_NAME)) { if (!dir_emit_dot(file, ctx)) @@ -681,14 +682,19 @@ record_end: fake_offset = 0; ctx->pos = cpos; goto get_new; + end_of_dir: - ctx->pos = cpos; + if (fake_offset && cpos < 2) + ctx->pos = 2; + else + ctx->pos = cpos; fill_failed: brelse(bh); if (unicode) __putname(unicode); out: mutex_unlock(&sbi->s_lock); + return ret; } diff --git a/fs/fuse/cuse.c b/fs/fuse/cuse.c index eae2c11268bc..8e3ee1936c7e 100644 --- a/fs/fuse/cuse.c +++ b/fs/fuse/cuse.c @@ -549,6 +549,8 @@ static int cuse_channel_release(struct inode *inode, struct file *file) unregister_chrdev_region(cc->cdev->dev, 1); cdev_del(cc->cdev); } + /* Base reference is now owned by "fud" */ + fuse_conn_put(&cc->fc); rc = fuse_dev_release(inode, file); /* puts the base reference */ diff --git a/fs/fuse/file.c b/fs/fuse/file.c index e0faf8f2c868..570ca4053c80 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -1049,6 +1049,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, tmp = iov_iter_copy_from_user_atomic(page, ii, offset, bytes); flush_dcache_page(page); + iov_iter_advance(ii, tmp); if (!tmp) { unlock_page(page); page_cache_release(page); @@ -1061,7 +1062,6 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req, req->page_descs[req->num_pages].length = tmp; req->num_pages++; - iov_iter_advance(ii, tmp); count += tmp; pos += tmp; offset += tmp; diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 316adb968b65..de4bdfac0cec 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -332,12 +332,17 @@ static void remove_huge_page(struct page *page) * truncation is indicated by end of range being LLONG_MAX * In this case, we first scan the range and release found pages. * After releasing pages, hugetlb_unreserve_pages cleans up region/reserv - * maps and global counts. + * maps and global counts. Page faults can not race with truncation + * in this routine. hugetlb_no_page() prevents page faults in the + * truncated range. It checks i_size before allocation, and again after + * with the page table lock for the page held. The same lock must be + * acquired to unmap a page. * hole punch is indicated if end is not LLONG_MAX * In the hole punch case we scan the range and release found pages. * Only when releasing a page is the associated region/reserv map * deleted. The region/reserv map for ranges without associated - * pages are not modified. + * pages are not modified. Page faults can race with hole punch. + * This is indicated if we find a mapped page. * Note: If the passed end of range value is beyond the end of file, but * not LLONG_MAX this routine still performs a hole punch operation. */ @@ -361,46 +366,37 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, next = start; while (next < end) { /* - * Make sure to never grab more pages that we - * might possibly need. + * Don't grab more pages than the number left in the range. */ if (end - next < lookup_nr) lookup_nr = end - next; /* - * This pagevec_lookup() may return pages past 'end', - * so we must check for page->index > end. + * When no more pages are found, we are done. */ - if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) { - if (next == start) - break; - next = start; - continue; - } + if (!pagevec_lookup(&pvec, mapping, next, lookup_nr)) + break; for (i = 0; i < pagevec_count(&pvec); ++i) { struct page *page = pvec.pages[i]; u32 hash; + /* + * The page (index) could be beyond end. This is + * only possible in the punch hole case as end is + * max page offset in the truncate case. + */ + next = page->index; + if (next >= end) + break; + hash = hugetlb_fault_mutex_hash(h, current->mm, &pseudo_vma, mapping, next, 0); mutex_lock(&hugetlb_fault_mutex_table[hash]); lock_page(page); - if (page->index >= end) { - unlock_page(page); - mutex_unlock(&hugetlb_fault_mutex_table[hash]); - next = end; /* we are done */ - break; - } - - /* - * If page is mapped, it was faulted in after being - * unmapped. Do nothing in this race case. In the - * normal case page is not mapped. - */ - if (!page_mapped(page)) { + if (likely(!page_mapped(page))) { bool rsv_on_error = !PagePrivate(page); /* * We must free the huge page and remove @@ -421,17 +417,23 @@ static void remove_inode_hugepages(struct inode *inode, loff_t lstart, hugetlb_fix_reserve_counts( inode, rsv_on_error); } + } else { + /* + * If page is mapped, it was faulted in after + * being unmapped. It indicates a race between + * hole punch and page fault. Do nothing in + * this case. Getting here in a truncate + * operation is a bug. + */ + BUG_ON(truncate_op); } - if (page->index > next) - next = page->index; - - ++next; unlock_page(page); - mutex_unlock(&hugetlb_fault_mutex_table[hash]); } + ++next; huge_pagevec_release(&pvec); + cond_resched(); } if (truncate_op) @@ -647,9 +649,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset, if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) i_size_write(inode, offset + len); inode->i_ctime = CURRENT_TIME; - spin_lock(&inode->i_lock); - inode->i_private = NULL; - spin_unlock(&inode->i_lock); out: mutex_unlock(&inode->i_mutex); return error; diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c index 89463eee6791..ca181e81c765 100644 --- a/fs/jbd2/transaction.c +++ b/fs/jbd2/transaction.c @@ -1009,7 +1009,8 @@ out: } /* Fast check whether buffer is already attached to the required transaction */ -static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh) +static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh, + bool undo) { struct journal_head *jh; bool ret = false; @@ -1036,6 +1037,9 @@ static bool jbd2_write_access_granted(handle_t *handle, struct buffer_head *bh) jh = READ_ONCE(bh->b_private); if (!jh) goto out; + /* For undo access buffer must have data copied */ + if (undo && !jh->b_committed_data) + goto out; if (jh->b_transaction != handle->h_transaction && jh->b_next_transaction != handle->h_transaction) goto out; @@ -1073,7 +1077,7 @@ int jbd2_journal_get_write_access(handle_t *handle, struct buffer_head *bh) struct journal_head *jh; int rc; - if (jbd2_write_access_granted(handle, bh)) + if (jbd2_write_access_granted(handle, bh, false)) return 0; jh = jbd2_journal_add_journal_head(bh); @@ -1210,7 +1214,7 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh) char *committed_data = NULL; JBUFFER_TRACE(jh, "entry"); - if (jbd2_write_access_granted(handle, bh)) + if (jbd2_write_access_granted(handle, bh, true)) return 0; jh = jbd2_journal_add_journal_head(bh); @@ -2152,6 +2156,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, if (!buffer_dirty(bh)) { /* bdflush has written it. We can drop it now */ + __jbd2_journal_remove_checkpoint(jh); goto zap_buffer; } @@ -2181,6 +2186,7 @@ static int journal_unmap_buffer(journal_t *journal, struct buffer_head *bh, /* The orphan record's transaction has * committed. We can cleanse this buffer */ clear_buffer_jbddirty(bh); + __jbd2_journal_remove_checkpoint(jh); goto zap_buffer; } } diff --git a/fs/namei.c b/fs/namei.c index d84d7c7515fc..0c3974cd3ecd 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1996,7 +1996,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags) nd->last_type = LAST_ROOT; /* if there are only slashes... */ nd->flags = flags | LOOKUP_JUMPED | LOOKUP_PARENT; nd->depth = 0; - nd->total_link_count = 0; if (flags & LOOKUP_ROOT) { struct dentry *root = nd->root.dentry; struct inode *inode = root->d_inode; diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 79b113048eac..0a3f9b594602 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -525,6 +525,8 @@ static long __ncp_ioctl(struct inode *inode, unsigned int cmd, unsigned long arg switch (rqdata.cmd) { case NCP_LOCK_EX: case NCP_LOCK_SH: + if (rqdata.timeout < 0) + return -EINVAL; if (rqdata.timeout == 0) rqdata.timeout = NCP_LOCK_DEFAULT_TIMEOUT; else if (rqdata.timeout > NCP_LOCK_MAX_TIMEOUT) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 326d9e10d833..c7e8b87da5b2 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -75,11 +75,11 @@ nfs_fattr_to_ino_t(struct nfs_fattr *fattr) * nfs_wait_bit_killable - helper for functions that are sleeping on bit locks * @word: long word containing the bit lock */ -int nfs_wait_bit_killable(struct wait_bit_key *key) +int nfs_wait_bit_killable(struct wait_bit_key *key, int mode) { - if (fatal_signal_pending(current)) - return -ERESTARTSYS; freezable_schedule_unsafe(); + if (signal_pending_state(mode, current)) + return -ERESTARTSYS; return 0; } EXPORT_SYMBOL_GPL(nfs_wait_bit_killable); @@ -618,7 +618,10 @@ void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, nfs_inc_stats(inode, NFSIOS_SETATTRTRUNC); nfs_vmtruncate(inode, attr->ia_size); } - nfs_update_inode(inode, fattr); + if (fattr->valid) + nfs_update_inode(inode, fattr); + else + NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATTR; spin_unlock(&inode->i_lock); } EXPORT_SYMBOL_GPL(nfs_setattr_update_inode); @@ -1824,7 +1827,11 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) if ((long)fattr->gencount - (long)nfsi->attr_gencount > 0) nfsi->attr_gencount = fattr->gencount; } - invalid &= ~NFS_INO_INVALID_ATTR; + + /* Don't declare attrcache up to date if there were no attrs! */ + if (fattr->valid != 0) + invalid &= ~NFS_INO_INVALID_ATTR; + /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 56cfde26fb9c..9dea85f7f918 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -379,7 +379,7 @@ extern int nfs_drop_inode(struct inode *); extern void nfs_clear_inode(struct inode *); extern void nfs_evict_inode(struct inode *); void nfs_zap_acl_cache(struct inode *inode); -extern int nfs_wait_bit_killable(struct wait_bit_key *key); +extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode); /* super.c */ extern const struct super_operations nfs_sops; diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 3e92a3cde15d..6b1ce9825430 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -14,7 +14,7 @@ #include "pnfs.h" #include "internal.h" -#define NFSDBG_FACILITY NFSDBG_PNFS +#define NFSDBG_FACILITY NFSDBG_PROC static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file, fmode_t fmode) @@ -284,6 +284,7 @@ static int _nfs42_proc_clone(struct rpc_message *msg, struct file *src_f, .dst_fh = NFS_FH(dst_inode), .src_offset = src_offset, .dst_offset = dst_offset, + .count = count, .dst_bitmask = server->cache_consistency_bitmask, }; struct nfs42_clone_res res = { diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 223bedda64ae..10410e8b5853 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -33,7 +33,7 @@ static int nfs_get_cb_ident_idr(struct nfs_client *clp, int minorversion) return ret; idr_preload(GFP_KERNEL); spin_lock(&nn->nfs_client_lock); - ret = idr_alloc(&nn->cb_ident_idr, clp, 0, 0, GFP_NOWAIT); + ret = idr_alloc(&nn->cb_ident_idr, clp, 1, 0, GFP_NOWAIT); if (ret >= 0) clp->cl_cb_ident = ret; spin_unlock(&nn->nfs_client_lock); diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c index 4aa571956cd6..db9b5fea5b3e 100644 --- a/fs/nfs/nfs4file.c +++ b/fs/nfs/nfs4file.c @@ -7,6 +7,7 @@ #include <linux/file.h> #include <linux/falloc.h> #include <linux/nfs_fs.h> +#include <uapi/linux/btrfs.h> /* BTRFS_IOC_CLONE/BTRFS_IOC_CLONE_RANGE */ #include "delegation.h" #include "internal.h" #include "iostat.h" @@ -203,6 +204,7 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, struct fd src_file; struct inode *src_inode; unsigned int bs = server->clone_blksize; + bool same_inode = false; int ret; /* dst file must be opened for writing */ @@ -221,10 +223,8 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, src_inode = file_inode(src_file.file); - /* src and dst must be different files */ - ret = -EINVAL; if (src_inode == dst_inode) - goto out_fput; + same_inode = true; /* src file must be opened for reading */ if (!(src_file.file->f_mode & FMODE_READ)) @@ -249,8 +249,16 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, goto out_fput; } + /* verify if ranges are overlapped within the same file */ + if (same_inode) { + if (dst_off + count > src_off && dst_off < src_off + count) + goto out_fput; + } + /* XXX: do we lock at all? what if server needs CB_RECALL_LAYOUT? */ - if (dst_inode < src_inode) { + if (same_inode) { + mutex_lock(&src_inode->i_mutex); + } else if (dst_inode < src_inode) { mutex_lock_nested(&dst_inode->i_mutex, I_MUTEX_PARENT); mutex_lock_nested(&src_inode->i_mutex, I_MUTEX_CHILD); } else { @@ -275,7 +283,9 @@ nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, truncate_inode_pages_range(&dst_inode->i_data, dst_off, dst_off + count - 1); out_unlock: - if (dst_inode < src_inode) { + if (same_inode) { + mutex_unlock(&src_inode->i_mutex); + } else if (dst_inode < src_inode) { mutex_unlock(&src_inode->i_mutex); mutex_unlock(&dst_inode->i_mutex); } else { @@ -291,46 +301,31 @@ out_drop_write: static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) { - struct nfs_ioctl_clone_range_args args; + struct btrfs_ioctl_clone_range_args args; if (copy_from_user(&args, argp, sizeof(args))) return -EFAULT; - return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_off, args.dst_off, args.count); -} -#else -static long nfs42_ioctl_clone(struct file *dst_file, unsigned long srcfd, - u64 src_off, u64 dst_off, u64 count) -{ - return -ENOTTY; -} - -static long nfs42_ioctl_clone_range(struct file *dst_file, void __user *argp) -{ - return -ENOTTY; + return nfs42_ioctl_clone(dst_file, args.src_fd, args.src_offset, + args.dest_offset, args.src_length); } -#endif /* CONFIG_NFS_V4_2 */ long nfs4_ioctl(struct file *file, unsigned int cmd, unsigned long arg) { void __user *argp = (void __user *)arg; switch (cmd) { - case NFS_IOC_CLONE: + case BTRFS_IOC_CLONE: return nfs42_ioctl_clone(file, arg, 0, 0, 0); - case NFS_IOC_CLONE_RANGE: + case BTRFS_IOC_CLONE_RANGE: return nfs42_ioctl_clone_range(file, argp); } return -ENOTTY; } +#endif /* CONFIG_NFS_V4_2 */ const struct file_operations nfs4_file_operations = { -#ifdef CONFIG_NFS_V4_2 - .llseek = nfs4_file_llseek, -#else - .llseek = nfs_file_llseek, -#endif .read_iter = nfs_file_read, .write_iter = nfs_file_write, .mmap = nfs_file_mmap, @@ -342,14 +337,14 @@ const struct file_operations nfs4_file_operations = { .flock = nfs_flock, .splice_read = nfs_file_splice_read, .splice_write = iter_file_splice_write, -#ifdef CONFIG_NFS_V4_2 - .fallocate = nfs42_fallocate, -#endif /* CONFIG_NFS_V4_2 */ .check_flags = nfs_check_flags, .setlease = simple_nosetlease, -#ifdef CONFIG_COMPAT +#ifdef CONFIG_NFS_V4_2 + .llseek = nfs4_file_llseek, + .fallocate = nfs42_fallocate, .unlocked_ioctl = nfs4_ioctl, -#else .compat_ioctl = nfs4_ioctl, -#endif /* CONFIG_COMPAT */ +#else + .llseek = nfs_file_llseek, +#endif }; diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 765a03559363..89818036f035 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -7866,7 +7866,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata) spin_unlock(&inode->i_lock); goto out_restart; } - if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) + if (nfs4_async_handle_error(task, server, state, &lgp->timeout) == -EAGAIN) goto out_restart; out: dprintk("<-- %s\n", __func__); diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index dfed4f5c8fcc..4e4441216804 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -3615,6 +3615,7 @@ static int decode_attr_fs_locations(struct xdr_stream *xdr, uint32_t *bitmap, st status = 0; if (unlikely(!(bitmap[0] & FATTR4_WORD0_FS_LOCATIONS))) goto out; + bitmap[0] &= ~FATTR4_WORD0_FS_LOCATIONS; status = -EIO; /* Ignore borken servers that return unrequested attrs */ if (unlikely(res == NULL)) diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c index 5c0c6b58157f..9aebffb40505 100644 --- a/fs/nfs/objlayout/objio_osd.c +++ b/fs/nfs/objlayout/objio_osd.c @@ -476,10 +476,7 @@ static struct page *__r4w_get_page(void *priv, u64 offset, bool *uptodate) } unlock_page(page); } - if (PageDirty(page) || PageWriteback(page)) - *uptodate = true; - else - *uptodate = PageUptodate(page); + *uptodate = PageUptodate(page); dprintk("%s: index=0x%lx uptodate=%d\n", __func__, index, *uptodate); return page; } diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c index fe3ddd20ff89..452a011ba0d8 100644 --- a/fs/nfs/pagelist.c +++ b/fs/nfs/pagelist.c @@ -129,7 +129,7 @@ __nfs_iocounter_wait(struct nfs_io_counter *c) set_bit(NFS_IO_INPROGRESS, &c->flags); if (atomic_read(&c->io_count) == 0) break; - ret = nfs_wait_bit_killable(&q.key); + ret = nfs_wait_bit_killable(&q.key, TASK_KILLABLE); } while (atomic_read(&c->io_count) != 0 && !ret); finish_wait(wq, &q.wait); return ret; diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index 93496c059837..bec0384499f7 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -872,33 +872,38 @@ send_layoutget(struct pnfs_layout_hdr *lo, dprintk("--> %s\n", __func__); - lgp = kzalloc(sizeof(*lgp), gfp_flags); - if (lgp == NULL) - return NULL; + /* + * Synchronously retrieve layout information from server and + * store in lseg. If we race with a concurrent seqid morphing + * op, then re-send the LAYOUTGET. + */ + do { + lgp = kzalloc(sizeof(*lgp), gfp_flags); + if (lgp == NULL) + return NULL; + + i_size = i_size_read(ino); + + lgp->args.minlength = PAGE_CACHE_SIZE; + if (lgp->args.minlength > range->length) + lgp->args.minlength = range->length; + if (range->iomode == IOMODE_READ) { + if (range->offset >= i_size) + lgp->args.minlength = 0; + else if (i_size - range->offset < lgp->args.minlength) + lgp->args.minlength = i_size - range->offset; + } + lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; + lgp->args.range = *range; + lgp->args.type = server->pnfs_curr_ld->id; + lgp->args.inode = ino; + lgp->args.ctx = get_nfs_open_context(ctx); + lgp->gfp_flags = gfp_flags; + lgp->cred = lo->plh_lc_cred; - i_size = i_size_read(ino); + lseg = nfs4_proc_layoutget(lgp, gfp_flags); + } while (lseg == ERR_PTR(-EAGAIN)); - lgp->args.minlength = PAGE_CACHE_SIZE; - if (lgp->args.minlength > range->length) - lgp->args.minlength = range->length; - if (range->iomode == IOMODE_READ) { - if (range->offset >= i_size) - lgp->args.minlength = 0; - else if (i_size - range->offset < lgp->args.minlength) - lgp->args.minlength = i_size - range->offset; - } - lgp->args.maxcount = PNFS_LAYOUT_MAXSIZE; - lgp->args.range = *range; - lgp->args.type = server->pnfs_curr_ld->id; - lgp->args.inode = ino; - lgp->args.ctx = get_nfs_open_context(ctx); - lgp->gfp_flags = gfp_flags; - lgp->cred = lo->plh_lc_cred; - - /* Synchronously retrieve layout information from server and - * store in lseg. - */ - lseg = nfs4_proc_layoutget(lgp, gfp_flags); if (IS_ERR(lseg)) { switch (PTR_ERR(lseg)) { case -ENOMEM: @@ -1461,11 +1466,11 @@ static bool pnfs_within_mdsthreshold(struct nfs_open_context *ctx, } /* stop waiting if someone clears NFS_LAYOUT_RETRY_LAYOUTGET bit. */ -static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key) +static int pnfs_layoutget_retry_bit_wait(struct wait_bit_key *key, int mode) { if (!test_bit(NFS_LAYOUT_RETRY_LAYOUTGET, key->flags)) return 1; - return nfs_wait_bit_killable(key); + return nfs_wait_bit_killable(key, mode); } static bool pnfs_prepare_to_retry_layoutget(struct pnfs_layout_hdr *lo) @@ -1687,6 +1692,7 @@ pnfs_layout_process(struct nfs4_layoutget *lgp) /* existing state ID, make sure the sequence number matches. */ if (pnfs_layout_stateid_blocked(lo, &res->stateid)) { dprintk("%s forget reply due to sequence\n", __func__); + status = -EAGAIN; goto out_forget_reply; } pnfs_set_layout_stateid(lo, &res->stateid, false); diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c index 9ffef06b30d5..c9d6c715c0fb 100644 --- a/fs/nfsd/nfs4layouts.c +++ b/fs/nfsd/nfs4layouts.c @@ -616,6 +616,7 @@ nfsd4_cb_layout_prepare(struct nfsd4_callback *cb) mutex_lock(&ls->ls_mutex); nfs4_inc_and_copy_stateid(&ls->ls_recall_sid, &ls->ls_stid); + mutex_unlock(&ls->ls_mutex); } static int @@ -659,7 +660,6 @@ nfsd4_cb_layout_release(struct nfsd4_callback *cb) trace_layout_recall_release(&ls->ls_stid.sc_stateid); - mutex_unlock(&ls->ls_mutex); nfsd4_return_all_layouts(ls, &reaplist); nfsd4_free_layouts(&reaplist); nfs4_put_stid(&ls->ls_stid); diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ce38b4ccc9ab..84f2f8079466 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -2843,6 +2843,8 @@ again: res->state &= ~DLM_LOCK_RES_BLOCK_DIRTY; if (!ret) BUG_ON(!(res->state & DLM_LOCK_RES_MIGRATING)); + else + res->migration_pending = 0; spin_unlock(&res->spinlock); /* diff --git a/fs/ocfs2/locks.c b/fs/ocfs2/locks.c index 652ece4a9d9e..d56f0079b858 100644 --- a/fs/ocfs2/locks.c +++ b/fs/ocfs2/locks.c @@ -67,7 +67,10 @@ static int ocfs2_do_flock(struct file *file, struct inode *inode, */ locks_lock_file_wait(file, - &(struct file_lock){.fl_type = F_UNLCK}); + &(struct file_lock) { + .fl_type = F_UNLCK, + .fl_flags = FL_FLOCK + }); ocfs2_file_unlock(file); } diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c index 3b48ac25d8a7..3123408da935 100644 --- a/fs/ocfs2/namei.c +++ b/fs/ocfs2/namei.c @@ -367,7 +367,7 @@ static int ocfs2_mknod(struct inode *dir, goto leave; } - status = posix_acl_create(dir, &mode, &default_acl, &acl); + status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl); if (status) { mlog_errno(status); goto leave; diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c index d5da6f624142..79b8021302b3 100644 --- a/fs/ocfs2/resize.c +++ b/fs/ocfs2/resize.c @@ -54,11 +54,12 @@ static u16 ocfs2_calc_new_backup_super(struct inode *inode, struct ocfs2_group_desc *gd, u16 cl_cpg, + u16 old_bg_clusters, int set) { int i; u16 backups = 0; - u32 cluster; + u32 cluster, lgd_cluster; u64 blkno, gd_blkno, lgd_blkno = le64_to_cpu(gd->bg_blkno); for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) { @@ -71,6 +72,12 @@ static u16 ocfs2_calc_new_backup_super(struct inode *inode, else if (gd_blkno > lgd_blkno) break; + /* check if already done backup super */ + lgd_cluster = ocfs2_blocks_to_clusters(inode->i_sb, lgd_blkno); + lgd_cluster += old_bg_clusters; + if (lgd_cluster >= cluster) + continue; + if (set) ocfs2_set_bit(cluster % cl_cpg, (unsigned long *)gd->bg_bitmap); @@ -99,6 +106,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, u16 chain, num_bits, backups = 0; u16 cl_bpc = le16_to_cpu(cl->cl_bpc); u16 cl_cpg = le16_to_cpu(cl->cl_cpg); + u16 old_bg_clusters; trace_ocfs2_update_last_group_and_inode(new_clusters, first_new_cluster); @@ -112,6 +120,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, group = (struct ocfs2_group_desc *)group_bh->b_data; + old_bg_clusters = le16_to_cpu(group->bg_bits) / cl_bpc; /* update the group first. */ num_bits = new_clusters * cl_bpc; le16_add_cpu(&group->bg_bits, num_bits); @@ -125,7 +134,7 @@ static int ocfs2_update_last_group_and_inode(handle_t *handle, OCFS2_FEATURE_COMPAT_BACKUP_SB)) { backups = ocfs2_calc_new_backup_super(bm_inode, group, - cl_cpg, 1); + cl_cpg, old_bg_clusters, 1); le16_add_cpu(&group->bg_free_bits_count, -1 * backups); } @@ -163,7 +172,7 @@ out_rollback: if (ret < 0) { ocfs2_calc_new_backup_super(bm_inode, group, - cl_cpg, 0); + cl_cpg, old_bg_clusters, 0); le16_add_cpu(&group->bg_free_bits_count, backups); le16_add_cpu(&group->bg_bits, -1 * num_bits); le16_add_cpu(&group->bg_free_bits_count, -1 * num_bits); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 871fcb67be97..0a8983492d91 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -195,8 +195,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat) static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, struct dentry *dentry, struct path *lowerpath, - struct kstat *stat, struct iattr *attr, - const char *link) + struct kstat *stat, const char *link) { struct inode *wdir = workdir->d_inode; struct inode *udir = upperdir->d_inode; @@ -240,8 +239,6 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir, mutex_lock(&newdentry->d_inode->i_mutex); err = ovl_set_attr(newdentry, stat); - if (!err && attr) - err = notify_change(newdentry, attr, NULL); mutex_unlock(&newdentry->d_inode->i_mutex); if (err) goto out_cleanup; @@ -286,8 +283,7 @@ out_cleanup: * that point the file will have already been copied up anyway. */ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, - struct path *lowerpath, struct kstat *stat, - struct iattr *attr) + struct path *lowerpath, struct kstat *stat) { struct dentry *workdir = ovl_workdir(dentry); int err; @@ -345,26 +341,19 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, } upperdentry = ovl_dentry_upper(dentry); if (upperdentry) { - unlock_rename(workdir, upperdir); + /* Raced with another copy-up? Nothing to do, then... */ err = 0; - /* Raced with another copy-up? Do the setattr here */ - if (attr) { - mutex_lock(&upperdentry->d_inode->i_mutex); - err = notify_change(upperdentry, attr, NULL); - mutex_unlock(&upperdentry->d_inode->i_mutex); - } - goto out_put_cred; + goto out_unlock; } err = ovl_copy_up_locked(workdir, upperdir, dentry, lowerpath, - stat, attr, link); + stat, link); if (!err) { /* Restore timestamps on parent (best effort) */ ovl_set_timestamps(upperdir, &pstat); } out_unlock: unlock_rename(workdir, upperdir); -out_put_cred: revert_creds(old_cred); put_cred(override_cred); @@ -406,7 +395,7 @@ int ovl_copy_up(struct dentry *dentry) ovl_path_lower(next, &lowerpath); err = vfs_getattr(&lowerpath, &stat); if (!err) - err = ovl_copy_up_one(parent, next, &lowerpath, &stat, NULL); + err = ovl_copy_up_one(parent, next, &lowerpath, &stat); dput(parent); dput(next); diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index ec0c2a050043..4060ffde8722 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -12,8 +12,7 @@ #include <linux/xattr.h> #include "overlayfs.h" -static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, - bool no_data) +static int ovl_copy_up_truncate(struct dentry *dentry) { int err; struct dentry *parent; @@ -30,10 +29,8 @@ static int ovl_copy_up_last(struct dentry *dentry, struct iattr *attr, if (err) goto out_dput_parent; - if (no_data) - stat.size = 0; - - err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat, attr); + stat.size = 0; + err = ovl_copy_up_one(parent, dentry, &lowerpath, &stat); out_dput_parent: dput(parent); @@ -49,13 +46,13 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr) if (err) goto out; - upperdentry = ovl_dentry_upper(dentry); - if (upperdentry) { + err = ovl_copy_up(dentry); + if (!err) { + upperdentry = ovl_dentry_upper(dentry); + mutex_lock(&upperdentry->d_inode->i_mutex); err = notify_change(upperdentry, attr, NULL); mutex_unlock(&upperdentry->d_inode->i_mutex); - } else { - err = ovl_copy_up_last(dentry, attr, false); } ovl_drop_write(dentry); out: @@ -353,7 +350,7 @@ struct inode *ovl_d_select_inode(struct dentry *dentry, unsigned file_flags) return ERR_PTR(err); if (file_flags & O_TRUNC) - err = ovl_copy_up_last(dentry, NULL, true); + err = ovl_copy_up_truncate(dentry); else err = ovl_copy_up(dentry); ovl_drop_write(dentry); diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h index ea5a40b06e3a..e17154aeaae4 100644 --- a/fs/overlayfs/overlayfs.h +++ b/fs/overlayfs/overlayfs.h @@ -194,7 +194,6 @@ void ovl_cleanup(struct inode *dir, struct dentry *dentry); /* copy_up.c */ int ovl_copy_up(struct dentry *dentry); int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, - struct path *lowerpath, struct kstat *stat, - struct iattr *attr); + struct path *lowerpath, struct kstat *stat); int ovl_copy_xattr(struct dentry *old, struct dentry *new); int ovl_set_attr(struct dentry *upper, struct kstat *stat); diff --git a/fs/proc/base.c b/fs/proc/base.c index bd3e9e68125b..4bd5d3118acd 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2494,6 +2494,7 @@ static ssize_t proc_coredump_filter_write(struct file *file, mm = get_task_mm(task); if (!mm) goto out_no_mm; + ret = 0; for (i = 0, mask = 1; i < MMF_DUMP_FILTER_BITS; i++, mask <<= 1) { if (val & mask) diff --git a/fs/splice.c b/fs/splice.c index 801c21cd77fe..4cf700d50b40 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -809,6 +809,13 @@ static int splice_from_pipe_feed(struct pipe_inode_info *pipe, struct splice_des */ static int splice_from_pipe_next(struct pipe_inode_info *pipe, struct splice_desc *sd) { + /* + * Check for signal early to make process killable when there are + * always buffers available + */ + if (signal_pending(current)) + return -ERESTARTSYS; + while (!pipe->nrbufs) { if (!pipe->writers) return 0; @@ -884,6 +891,7 @@ ssize_t __splice_from_pipe(struct pipe_inode_info *pipe, struct splice_desc *sd, splice_from_pipe_begin(sd); do { + cond_resched(); ret = splice_from_pipe_next(pipe, sd); if (ret > 0) ret = splice_from_pipe_feed(pipe, sd, actor); diff --git a/fs/sysv/inode.c b/fs/sysv/inode.c index 590ad9206e3f..02fa1dcc5969 100644 --- a/fs/sysv/inode.c +++ b/fs/sysv/inode.c @@ -162,15 +162,8 @@ void sysv_set_inode(struct inode *inode, dev_t rdev) inode->i_fop = &sysv_dir_operations; inode->i_mapping->a_ops = &sysv_aops; } else if (S_ISLNK(inode->i_mode)) { - if (inode->i_blocks) { - inode->i_op = &sysv_symlink_inode_operations; - inode->i_mapping->a_ops = &sysv_aops; - } else { - inode->i_op = &simple_symlink_inode_operations; - inode->i_link = (char *)SYSV_I(inode)->i_data; - nd_terminate_link(inode->i_link, inode->i_size, - sizeof(SYSV_I(inode)->i_data) - 1); - } + inode->i_op = &sysv_symlink_inode_operations; + inode->i_mapping->a_ops = &sysv_aops; } else init_special_inode(inode, inode->i_mode, rdev); } |