diff options
Diffstat (limited to 'fs')
98 files changed, 1094 insertions, 759 deletions
@@ -93,9 +93,8 @@ static void aio_free_ring(struct kioctx *ctx) put_page(info->ring_pages[i]); if (info->mmap_size) { - down_write(&ctx->mm->mmap_sem); - do_munmap(ctx->mm, info->mmap_base, info->mmap_size); - up_write(&ctx->mm->mmap_sem); + BUG_ON(ctx->mm != current->mm); + vm_munmap(info->mmap_base, info->mmap_size); } if (info->ring_pages && info->ring_pages != info->internal_pages) @@ -389,6 +388,17 @@ void exit_aio(struct mm_struct *mm) "exit_aio:ioctx still alive: %d %d %d\n", atomic_read(&ctx->users), ctx->dead, ctx->reqs_active); + /* + * We don't need to bother with munmap() here - + * exit_mmap(mm) is coming and it'll unmap everything. + * Since aio_free_ring() uses non-zero ->mmap_size + * as indicator that it needs to unmap the area, + * just set it to 0; aio_free_ring() is the only + * place that uses ->mmap_size, so it's safe. + * That way we get all munmap done to current->mm - + * all other callers have ctx->mm == current->mm. + */ + ctx->ring_info.mmap_size = 0; put_ioctx(ctx); } } diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index eb1cc92cd67..908e1845541 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -110,7 +110,6 @@ struct autofs_sb_info { int sub_version; int min_proto; int max_proto; - int compat_daemon; unsigned long exp_timeout; unsigned int type; int reghost_enabled; @@ -270,6 +269,17 @@ int autofs4_fill_super(struct super_block *, void *, int); struct autofs_info *autofs4_new_ino(struct autofs_sb_info *); void autofs4_clean_ino(struct autofs_info *); +static inline int autofs_prepare_pipe(struct file *pipe) +{ + if (!pipe->f_op || !pipe->f_op->write) + return -EINVAL; + if (!S_ISFIFO(pipe->f_dentry->d_inode->i_mode)) + return -EINVAL; + /* We want a packet pipe */ + pipe->f_flags |= O_DIRECT; + return 0; +} + /* Queue management functions */ int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify); diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 9dacb858670..aa9103f8f01 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -376,7 +376,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, err = -EBADF; goto out; } - if (!pipe->f_op || !pipe->f_op->write) { + if (autofs_prepare_pipe(pipe) < 0) { err = -EPIPE; fput(pipe); goto out; @@ -385,7 +385,6 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp, sbi->pipefd = pipefd; sbi->pipe = pipe; sbi->catatonic = 0; - sbi->compat_daemon = is_compat_task(); } out: mutex_unlock(&sbi->wq_mutex); diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index d8dc002e9cc..6e488ebe778 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -19,7 +19,6 @@ #include <linux/parser.h> #include <linux/bitops.h> #include <linux/magic.h> -#include <linux/compat.h> #include "autofs_i.h" #include <linux/module.h> @@ -225,7 +224,6 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) set_autofs_type_indirect(&sbi->type); sbi->min_proto = 0; sbi->max_proto = 0; - sbi->compat_daemon = is_compat_task(); mutex_init(&sbi->wq_mutex); mutex_init(&sbi->pipe_mutex); spin_lock_init(&sbi->fs_lock); @@ -292,7 +290,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) printk("autofs: could not open pipe file descriptor\n"); goto fail_dput; } - if (!pipe->f_op || !pipe->f_op->write) + if (autofs_prepare_pipe(pipe) < 0) goto fail_fput; sbi->pipe = pipe; sbi->pipefd = pipefd; diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 9c098db4334..da8876d38a7 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -91,24 +91,7 @@ static int autofs4_write(struct autofs_sb_info *sbi, return (bytes > 0); } - -/* - * The autofs_v5 packet was misdesigned. - * - * The packets are identical on x86-32 and x86-64, but have different - * alignment. Which means that 'sizeof()' will give different results. - * Fix it up for the case of running 32-bit user mode on a 64-bit kernel. - */ -static noinline size_t autofs_v5_packet_size(struct autofs_sb_info *sbi) -{ - size_t pktsz = sizeof(struct autofs_v5_packet); -#if defined(CONFIG_X86_64) && defined(CONFIG_COMPAT) - if (sbi->compat_daemon > 0) - pktsz -= 4; -#endif - return pktsz; -} - + static void autofs4_notify_daemon(struct autofs_sb_info *sbi, struct autofs_wait_queue *wq, int type) @@ -172,7 +155,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, { struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; - pktsz = autofs_v5_packet_size(sbi); + pktsz = sizeof(*packet); + packet->wait_queue_token = wq->wait_queue_token; packet->len = wq->name.len; memcpy(packet->name, wq->name.name, wq->name.len); diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 2eb12f13593..d146e181d10 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -50,9 +50,7 @@ static int set_brk(unsigned long start, unsigned long end) end = PAGE_ALIGN(end); if (end > start) { unsigned long addr; - down_write(¤t->mm->mmap_sem); - addr = do_brk(start, end - start); - up_write(¤t->mm->mmap_sem); + addr = vm_brk(start, end - start); if (BAD_ADDR(addr)) return addr; } @@ -280,9 +278,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) pos = 32; map_size = ex.a_text+ex.a_data; #endif - down_write(¤t->mm->mmap_sem); - error = do_brk(text_addr & PAGE_MASK, map_size); - up_write(¤t->mm->mmap_sem); + error = vm_brk(text_addr & PAGE_MASK, map_size); if (error != (text_addr & PAGE_MASK)) { send_sig(SIGKILL, current, 0); return error; @@ -313,9 +309,7 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) if (!bprm->file->f_op->mmap||((fd_offset & ~PAGE_MASK) != 0)) { loff_t pos = fd_offset; - down_write(¤t->mm->mmap_sem); - do_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); - up_write(¤t->mm->mmap_sem); + vm_brk(N_TXTADDR(ex), ex.a_text+ex.a_data); bprm->file->f_op->read(bprm->file, (char __user *)N_TXTADDR(ex), ex.a_text+ex.a_data, &pos); @@ -325,24 +319,20 @@ static int load_aout_binary(struct linux_binprm * bprm, struct pt_regs * regs) goto beyond_if; } - down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, + error = vm_mmap(bprm->file, N_TXTADDR(ex), ex.a_text, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset); - up_write(¤t->mm->mmap_sem); if (error != N_TXTADDR(ex)) { send_sig(SIGKILL, current, 0); return error; } - down_write(¤t->mm->mmap_sem); - error = do_mmap(bprm->file, N_DATADDR(ex), ex.a_data, + error = vm_mmap(bprm->file, N_DATADDR(ex), ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE | MAP_EXECUTABLE, fd_offset + ex.a_text); - up_write(¤t->mm->mmap_sem); if (error != N_DATADDR(ex)) { send_sig(SIGKILL, current, 0); return error; @@ -412,9 +402,7 @@ static int load_aout_library(struct file *file) "N_TXTOFF is not page aligned. Please convert library: %s\n", file->f_path.dentry->d_name.name); } - down_write(¤t->mm->mmap_sem); - do_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); - up_write(¤t->mm->mmap_sem); + vm_brk(start_addr, ex.a_text + ex.a_data + ex.a_bss); file->f_op->read(file, (char __user *)start_addr, ex.a_text + ex.a_data, &pos); @@ -425,12 +413,10 @@ static int load_aout_library(struct file *file) goto out; } /* Now use mmap to map the library into memory. */ - down_write(¤t->mm->mmap_sem); - error = do_mmap(file, start_addr, ex.a_text + ex.a_data, + error = vm_mmap(file, start_addr, ex.a_text + ex.a_data, PROT_READ | PROT_WRITE | PROT_EXEC, MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, N_TXTOFF(ex)); - up_write(¤t->mm->mmap_sem); retval = error; if (error != start_addr) goto out; @@ -438,9 +424,7 @@ static int load_aout_library(struct file *file) len = PAGE_ALIGN(ex.a_text + ex.a_data); bss = ex.a_text + ex.a_data + ex.a_bss; if (bss > len) { - down_write(¤t->mm->mmap_sem); - error = do_brk(start_addr + len, bss - len); - up_write(¤t->mm->mmap_sem); + error = vm_brk(start_addr + len, bss - len); retval = error; if (error != start_addr + len) goto out; diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 48ffb3dc610..16f73541707 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -82,9 +82,7 @@ static int set_brk(unsigned long start, unsigned long end) end = ELF_PAGEALIGN(end); if (end > start) { unsigned long addr; - down_write(¤t->mm->mmap_sem); - addr = do_brk(start, end - start); - up_write(¤t->mm->mmap_sem); + addr = vm_brk(start, end - start); if (BAD_ADDR(addr)) return addr; } @@ -514,9 +512,7 @@ static unsigned long load_elf_interp(struct elfhdr *interp_elf_ex, elf_bss = ELF_PAGESTART(elf_bss + ELF_MIN_ALIGN - 1); /* Map the last of the bss segment */ - down_write(¤t->mm->mmap_sem); - error = do_brk(elf_bss, last_bss - elf_bss); - up_write(¤t->mm->mmap_sem); + error = vm_brk(elf_bss, last_bss - elf_bss); if (BAD_ADDR(error)) goto out_close; } @@ -962,10 +958,8 @@ static int load_elf_binary(struct linux_binprm *bprm, struct pt_regs *regs) and some applications "depend" upon this behavior. Since we do not have the power to recompile these, we emulate the SVr4 behavior. Sigh. */ - down_write(¤t->mm->mmap_sem); - error = do_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, + error = vm_mmap(NULL, 0, PAGE_SIZE, PROT_READ | PROT_EXEC, MAP_FIXED | MAP_PRIVATE, 0); - up_write(¤t->mm->mmap_sem); } #ifdef ELF_PLAT_INIT @@ -1050,8 +1044,7 @@ static int load_elf_library(struct file *file) eppnt++; /* Now use mmap to map the library into memory. */ - down_write(¤t->mm->mmap_sem); - error = do_mmap(file, + error = vm_mmap(file, ELF_PAGESTART(eppnt->p_vaddr), (eppnt->p_filesz + ELF_PAGEOFFSET(eppnt->p_vaddr)), @@ -1059,7 +1052,6 @@ static int load_elf_library(struct file *file) MAP_FIXED | MAP_PRIVATE | MAP_DENYWRITE, (eppnt->p_offset - ELF_PAGEOFFSET(eppnt->p_vaddr))); - up_write(¤t->mm->mmap_sem); if (error != ELF_PAGESTART(eppnt->p_vaddr)) goto out_free_ph; @@ -1072,11 +1064,8 @@ static int load_elf_library(struct file *file) len = ELF_PAGESTART(eppnt->p_filesz + eppnt->p_vaddr + ELF_MIN_ALIGN - 1); bss = eppnt->p_memsz + eppnt->p_vaddr; - if (bss > len) { - down_write(¤t->mm->mmap_sem); - do_brk(len, bss - len); - up_write(¤t->mm->mmap_sem); - } + if (bss > len) + vm_brk(len, bss - len); error = 0; out_free_ph: diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 9bd5612a822..d390a0fffc6 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -390,21 +390,17 @@ static int load_elf_fdpic_binary(struct linux_binprm *bprm, (executable_stack == EXSTACK_DEFAULT && VM_STACK_FLAGS & VM_EXEC)) stack_prot |= PROT_EXEC; - down_write(¤t->mm->mmap_sem); - current->mm->start_brk = do_mmap(NULL, 0, stack_size, stack_prot, + current->mm->start_brk = vm_mmap(NULL, 0, stack_size, stack_prot, MAP_PRIVATE | MAP_ANONYMOUS | MAP_UNINITIALIZED | MAP_GROWSDOWN, 0); if (IS_ERR_VALUE(current->mm->start_brk)) { - up_write(¤t->mm->mmap_sem); retval = current->mm->start_brk; current->mm->start_brk = 0; goto error_kill; } - up_write(¤t->mm->mmap_sem); - current->mm->brk = current->mm->start_brk; current->mm->context.end_brk = current->mm->start_brk; current->mm->context.end_brk += @@ -955,10 +951,8 @@ static int elf_fdpic_map_file_constdisp_on_uclinux( if (params->flags & ELF_FDPIC_FLAG_EXECUTABLE) mflags |= MAP_EXECUTABLE; - down_write(&mm->mmap_sem); - maddr = do_mmap(NULL, load_addr, top - base, + maddr = vm_mmap(NULL, load_addr, top - base, PROT_READ | PROT_WRITE | PROT_EXEC, mflags, 0); - up_write(&mm->mmap_sem); if (IS_ERR_VALUE(maddr)) return (int) maddr; @@ -1096,10 +1090,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, /* create the mapping */ disp = phdr->p_vaddr & ~PAGE_MASK; - down_write(&mm->mmap_sem); - maddr = do_mmap(file, maddr, phdr->p_memsz + disp, prot, flags, + maddr = vm_mmap(file, maddr, phdr->p_memsz + disp, prot, flags, phdr->p_offset - disp); - up_write(&mm->mmap_sem); kdebug("mmap[%d] <file> sz=%lx pr=%x fl=%x of=%lx --> %08lx", loop, phdr->p_memsz + disp, prot, flags, @@ -1143,10 +1135,8 @@ static int elf_fdpic_map_file_by_direct_mmap(struct elf_fdpic_params *params, unsigned long xmaddr; flags |= MAP_FIXED | MAP_ANONYMOUS; - down_write(&mm->mmap_sem); - xmaddr = do_mmap(NULL, xaddr, excess - excess1, + xmaddr = vm_mmap(NULL, xaddr, excess - excess1, prot, flags, 0); - up_write(&mm->mmap_sem); kdebug("mmap[%d] <anon>" " ad=%lx sz=%lx pr=%x fl=%x of=0 --> %08lx", diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 024d20ee3ca..6b2daf99fab 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -542,10 +542,8 @@ static int load_flat_file(struct linux_binprm * bprm, */ DBG_FLT("BINFMT_FLAT: ROM mapping of file (we hope)\n"); - down_write(¤t->mm->mmap_sem); - textpos = do_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, + textpos = vm_mmap(bprm->file, 0, text_len, PROT_READ|PROT_EXEC, MAP_PRIVATE|MAP_EXECUTABLE, 0); - up_write(¤t->mm->mmap_sem); if (!textpos || IS_ERR_VALUE(textpos)) { if (!textpos) textpos = (unsigned long) -ENOMEM; @@ -556,10 +554,8 @@ static int load_flat_file(struct linux_binprm * bprm, len = data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); len = PAGE_ALIGN(len); - down_write(¤t->mm->mmap_sem); - realdatastart = do_mmap(0, 0, len, + realdatastart = vm_mmap(0, 0, len, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE, 0); - up_write(¤t->mm->mmap_sem); if (realdatastart == 0 || IS_ERR_VALUE(realdatastart)) { if (!realdatastart) @@ -603,10 +599,8 @@ static int load_flat_file(struct linux_binprm * bprm, len = text_len + data_len + extra + MAX_SHARED_LIBS * sizeof(unsigned long); len = PAGE_ALIGN(len); - down_write(¤t->mm->mmap_sem); - textpos = do_mmap(0, 0, len, + textpos = vm_mmap(0, 0, len, PROT_READ | PROT_EXEC | PROT_WRITE, MAP_PRIVATE, 0); - up_write(¤t->mm->mmap_sem); if (!textpos || IS_ERR_VALUE(textpos)) { if (!textpos) diff --git a/fs/binfmt_som.c b/fs/binfmt_som.c index e4fc746629a..4517aaff61b 100644 --- a/fs/binfmt_som.c +++ b/fs/binfmt_som.c @@ -147,10 +147,8 @@ static int map_som_binary(struct file *file, code_size = SOM_PAGEALIGN(hpuxhdr->exec_tsize); current->mm->start_code = code_start; current->mm->end_code = code_start + code_size; - down_write(¤t->mm->mmap_sem); - retval = do_mmap(file, code_start, code_size, prot, + retval = vm_mmap(file, code_start, code_size, prot, flags, SOM_PAGESTART(hpuxhdr->exec_tfile)); - up_write(¤t->mm->mmap_sem); if (retval < 0 && retval > -1024) goto out; @@ -158,20 +156,16 @@ static int map_som_binary(struct file *file, data_size = SOM_PAGEALIGN(hpuxhdr->exec_dsize); current->mm->start_data = data_start; current->mm->end_data = bss_start = data_start + data_size; - down_write(¤t->mm->mmap_sem); - retval = do_mmap(file, data_start, data_size, + retval = vm_mmap(file, data_start, data_size, prot | PROT_WRITE, flags, SOM_PAGESTART(hpuxhdr->exec_dfile)); - up_write(¤t->mm->mmap_sem); if (retval < 0 && retval > -1024) goto out; som_brk = bss_start + SOM_PAGEALIGN(hpuxhdr->exec_bsize); current->mm->start_brk = current->mm->brk = som_brk; - down_write(¤t->mm->mmap_sem); - retval = do_mmap(NULL, bss_start, som_brk - bss_start, + retval = vm_mmap(NULL, bss_start, som_brk - bss_start, prot | PROT_WRITE, MAP_FIXED | MAP_PRIVATE, 0); - up_write(¤t->mm->mmap_sem); if (retval > 0 || retval < -1024) retval = 0; out: @@ -505,9 +505,14 @@ EXPORT_SYMBOL(bio_clone); int bio_get_nr_vecs(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); - return min_t(unsigned, + int nr_pages; + + nr_pages = min_t(unsigned, queue_max_segments(q), queue_max_sectors(q) / (PAGE_SIZE >> 9) + 1); + + return min_t(unsigned, nr_pages, BIO_MAX_PAGES); + } EXPORT_SYMBOL(bio_get_nr_vecs); diff --git a/fs/block_dev.c b/fs/block_dev.c index e08f6a20a5b..ba11c30f302 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -70,7 +70,7 @@ static void bdev_inode_switch_bdi(struct inode *inode, spin_unlock(&dst->wb.list_lock); } -static sector_t max_block(struct block_device *bdev) +sector_t blkdev_max_block(struct block_device *bdev) { sector_t retval = ~((sector_t)0); loff_t sz = i_size_read(bdev->bd_inode); @@ -163,7 +163,7 @@ static int blkdev_get_block(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { - if (iblock >= max_block(I_BDEV(inode))) { + if (iblock >= blkdev_max_block(I_BDEV(inode))) { if (create) return -EIO; @@ -185,7 +185,7 @@ static int blkdev_get_blocks(struct inode *inode, sector_t iblock, struct buffer_head *bh, int create) { - sector_t end_block = max_block(I_BDEV(inode)); + sector_t end_block = blkdev_max_block(I_BDEV(inode)); unsigned long max_blocks = bh->b_size >> inode->i_blkbits; if ((iblock + max_blocks) > end_block) { diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f4e90748940..bcec0675023 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -22,6 +22,7 @@ #include "ulist.h" #include "transaction.h" #include "delayed-ref.h" +#include "locking.h" /* * this structure records all encountered refs on the way up to the root @@ -893,18 +894,22 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, s64 bytes_left = size - 1; struct extent_buffer *eb = eb_in; struct btrfs_key found_key; + int leave_spinning = path->leave_spinning; if (bytes_left >= 0) dest[bytes_left] = '\0'; + path->leave_spinning = 1; while (1) { len = btrfs_inode_ref_name_len(eb, iref); bytes_left -= len; if (bytes_left >= 0) read_extent_buffer(eb, dest + bytes_left, (unsigned long)(iref + 1), len); - if (eb != eb_in) + if (eb != eb_in) { + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); + } ret = inode_ref_info(parent, 0, fs_root, path, &found_key); if (ret > 0) ret = -ENOENT; @@ -919,8 +924,11 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, slot = path->slots[0]; eb = path->nodes[0]; /* make sure we can use eb after releasing the path */ - if (eb != eb_in) + if (eb != eb_in) { atomic_inc(&eb->refs); + btrfs_tree_read_lock(eb); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + } btrfs_release_path(path); iref = btrfs_item_ptr(eb, slot, struct btrfs_inode_ref); @@ -931,6 +939,7 @@ static char *iref_to_path(struct btrfs_root *fs_root, struct btrfs_path *path, } btrfs_release_path(path); + path->leave_spinning = leave_spinning; if (ret) return ERR_PTR(ret); @@ -1247,7 +1256,7 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, struct btrfs_path *path, iterate_irefs_t *iterate, void *ctx) { - int ret; + int ret = 0; int slot; u32 cur; u32 len; @@ -1259,7 +1268,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, struct btrfs_inode_ref *iref; struct btrfs_key found_key; - while (1) { + while (!ret) { + path->leave_spinning = 1; ret = inode_ref_info(inum, parent ? parent+1 : 0, fs_root, path, &found_key); if (ret < 0) @@ -1275,6 +1285,8 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, eb = path->nodes[0]; /* make sure we can use eb after releasing the path */ atomic_inc(&eb->refs); + btrfs_tree_read_lock(eb); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); btrfs_release_path(path); item = btrfs_item_nr(eb, slot); @@ -1288,13 +1300,12 @@ static int iterate_irefs(u64 inum, struct btrfs_root *fs_root, (unsigned long long)found_key.objectid, (unsigned long long)fs_root->objectid); ret = iterate(parent, iref, eb, ctx); - if (ret) { - free_extent_buffer(eb); + if (ret) break; - } len = sizeof(*iref) + name_len; iref = (struct btrfs_inode_ref *)((char *)iref + len); } + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); } @@ -1414,6 +1425,8 @@ struct inode_fs_paths *init_ipath(s32 total_bytes, struct btrfs_root *fs_root, void free_ipath(struct inode_fs_paths *ipath) { + if (!ipath) + return; kfree(ipath->fspath); kfree(ipath); } diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index d286b40a567..86eff48dab7 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -405,6 +405,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start, bio_put(bio); bio = compressed_bio_alloc(bdev, first_byte, GFP_NOFS); + BUG_ON(!bio); bio->bi_private = cb; bio->bi_end_io = end_compressed_bio_write; bio_add_page(bio, page, PAGE_CACHE_SIZE, 0); @@ -687,6 +688,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio, comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS); + BUG_ON(!comp_bio); comp_bio->bi_private = cb; comp_bio->bi_end_io = end_compressed_bio_read; diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index e801f226d7e..4106264fbc6 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -220,10 +220,12 @@ struct extent_buffer *btrfs_read_lock_root_node(struct btrfs_root *root) */ static void add_root_to_dirty_list(struct btrfs_root *root) { + spin_lock(&root->fs_info->trans_lock); if (root->track_dirty && list_empty(&root->dirty_list)) { list_add(&root->dirty_list, &root->fs_info->dirty_cowonly_roots); } + spin_unlock(&root->fs_info->trans_lock); } /* @@ -723,7 +725,7 @@ int btrfs_realloc_node(struct btrfs_trans_handle *trans, cur = btrfs_find_tree_block(root, blocknr, blocksize); if (cur) - uptodate = btrfs_buffer_uptodate(cur, gen); + uptodate = btrfs_buffer_uptodate(cur, gen, 0); else uptodate = 0; if (!cur || !uptodate) { @@ -1358,7 +1360,12 @@ static noinline int reada_for_balance(struct btrfs_root *root, block1 = btrfs_node_blockptr(parent, slot - 1); gen = btrfs_node_ptr_generation(parent, slot - 1); eb = btrfs_find_tree_block(root, block1, blocksize); - if (eb && btrfs_buffer_uptodate(eb, gen)) + /* + * if we get -eagain from btrfs_buffer_uptodate, we + * don't want to return eagain here. That will loop + * forever + */ + if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) block1 = 0; free_extent_buffer(eb); } @@ -1366,7 +1373,7 @@ static noinline int reada_for_balance(struct btrfs_root *root, block2 = btrfs_node_blockptr(parent, slot + 1); gen = btrfs_node_ptr_generation(parent, slot + 1); eb = btrfs_find_tree_block(root, block2, blocksize); - if (eb && btrfs_buffer_uptodate(eb, gen)) + if (eb && btrfs_buffer_uptodate(eb, gen, 1) != 0) block2 = 0; free_extent_buffer(eb); } @@ -1504,8 +1511,9 @@ read_block_for_search(struct btrfs_trans_handle *trans, tmp = btrfs_find_tree_block(root, blocknr, blocksize); if (tmp) { - if (btrfs_buffer_uptodate(tmp, 0)) { - if (btrfs_buffer_uptodate(tmp, gen)) { + /* first we do an atomic uptodate check */ + if (btrfs_buffer_uptodate(tmp, 0, 1) > 0) { + if (btrfs_buffer_uptodate(tmp, gen, 1) > 0) { /* * we found an up to date block without * sleeping, return @@ -1523,8 +1531,9 @@ read_block_for_search(struct btrfs_trans_handle *trans, free_extent_buffer(tmp); btrfs_set_path_blocking(p); + /* now we're allowed to do a blocking uptodate check */ tmp = read_tree_block(root, blocknr, blocksize, gen); - if (tmp && btrfs_buffer_uptodate(tmp, gen)) { + if (tmp && btrfs_buffer_uptodate(tmp, gen, 0) > 0) { *eb_ret = tmp; return 0; } @@ -1559,7 +1568,7 @@ read_block_for_search(struct btrfs_trans_handle *trans, * and give up so that our caller doesn't loop forever * on our EAGAINs. */ - if (!btrfs_buffer_uptodate(tmp, 0)) + if (!btrfs_buffer_uptodate(tmp, 0, 0)) ret = -EIO; free_extent_buffer(tmp); } @@ -4043,7 +4052,7 @@ again: tmp = btrfs_find_tree_block(root, blockptr, btrfs_level_size(root, level - 1)); - if (tmp && btrfs_buffer_uptodate(tmp, gen)) { + if (tmp && btrfs_buffer_uptodate(tmp, gen, 1) > 0) { free_extent_buffer(tmp); break; } @@ -4166,7 +4175,8 @@ next: struct extent_buffer *cur; cur = btrfs_find_tree_block(root, blockptr, btrfs_level_size(root, level - 1)); - if (!cur || !btrfs_buffer_uptodate(cur, gen)) { + if (!cur || + btrfs_buffer_uptodate(cur, gen, 1) <= 0) { slot++; if (cur) free_extent_buffer(cur); diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 5b8ef8eb352..8fd72331d60 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -1078,7 +1078,7 @@ struct btrfs_fs_info { * is required instead of the faster short fsync log commits */ u64 last_trans_log_full_commit; - unsigned long mount_opt:21; + unsigned long mount_opt; unsigned long compress_type:4; u64 max_inline; u64 alloc_start; @@ -2166,7 +2166,7 @@ BTRFS_SETGET_STACK_FUNCS(root_last_snapshot, struct btrfs_root_item, static inline bool btrfs_root_readonly(struct btrfs_root *root) { - return root->root_item.flags & BTRFS_ROOT_SUBVOL_RDONLY; + return (root->root_item.flags & cpu_to_le64(BTRFS_ROOT_SUBVOL_RDONLY)) != 0; } /* struct btrfs_root_backup */ diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 20196f41120..a7ffc88a7db 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -323,7 +323,8 @@ static int csum_tree_block(struct btrfs_root *root, struct extent_buffer *buf, * in the wrong place. */ static int verify_parent_transid(struct extent_io_tree *io_tree, - struct extent_buffer *eb, u64 parent_transid) + struct extent_buffer *eb, u64 parent_transid, + int atomic) { struct extent_state *cached_state = NULL; int ret; @@ -331,6 +332,9 @@ static int verify_parent_transid(struct extent_io_tree *io_tree, if (!parent_transid || btrfs_header_generation(eb) == parent_transid) return 0; + if (atomic) + return -EAGAIN; + lock_extent_bits(io_tree, eb->start, eb->start + eb->len - 1, 0, &cached_state); if (extent_buffer_uptodate(eb) && @@ -372,7 +376,8 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, ret = read_extent_buffer_pages(io_tree, eb, start, WAIT_COMPLETE, btree_get_extent, mirror_num); - if (!ret && !verify_parent_transid(io_tree, eb, parent_transid)) + if (!ret && !verify_parent_transid(io_tree, eb, + parent_transid, 0)) break; /* @@ -383,17 +388,16 @@ static int btree_read_extent_buffer_pages(struct btrfs_root *root, if (test_bit(EXTENT_BUFFER_CORRUPT, &eb->bflags)) break; - if (!failed_mirror) { - failed = 1; - printk(KERN_ERR "failed mirror was %d\n", eb->failed_mirror); - failed_mirror = eb->failed_mirror; - } - num_copies = btrfs_num_copies(&root->fs_info->mapping_tree, eb->start, eb->len); if (num_copies == 1) break; + if (!failed_mirror) { + failed = 1; + failed_mirror = eb->read_mirror; + } + mirror_num++; if (mirror_num == failed_mirror) mirror_num++; @@ -564,7 +568,7 @@ struct extent_buffer *find_eb_for_page(struct extent_io_tree *tree, } static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, - struct extent_state *state) + struct extent_state *state, int mirror) { struct extent_io_tree *tree; u64 found_start; @@ -589,6 +593,7 @@ static int btree_readpage_end_io_hook(struct page *page, u64 start, u64 end, if (!reads_done) goto err; + eb->read_mirror = mirror; if (test_bit(EXTENT_BUFFER_IOERR, &eb->bflags)) { ret = -EIO; goto err; @@ -652,7 +657,7 @@ static int btree_io_failed_hook(struct page *page, int failed_mirror) eb = (struct extent_buffer *)page->private; set_bit(EXTENT_BUFFER_IOERR, &eb->bflags); - eb->failed_mirror = failed_mirror; + eb->read_mirror = failed_mirror; if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags)) btree_readahead_hook(root, eb, eb->start, -EIO); return -EIO; /* we fixed nothing */ @@ -1202,7 +1207,7 @@ static int __must_check find_and_setup_root(struct btrfs_root *tree_root, root->commit_root = NULL; root->node = read_tree_block(root, btrfs_root_bytenr(&root->root_item), blocksize, generation); - if (!root->node || !btrfs_buffer_uptodate(root->node, generation)) { + if (!root->node || !btrfs_buffer_uptodate(root->node, generation, 0)) { free_extent_buffer(root->node); root->node = NULL; return -EIO; @@ -2254,9 +2259,9 @@ int open_ctree(struct super_block *sb, goto fail_sb_buffer; } - if (sectorsize < PAGE_SIZE) { - printk(KERN_WARNING "btrfs: Incompatible sector size " - "found on %s\n", sb->s_id); + if (sectorsize != PAGE_SIZE) { + printk(KERN_WARNING "btrfs: Incompatible sector size(%lu) " + "found on %s\n", (unsigned long)sectorsize, sb->s_id); goto fail_sb_buffer; } @@ -3143,7 +3148,8 @@ int close_ctree(struct btrfs_root *root) return 0; } -int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, + int atomic) { int ret; struct inode *btree_inode = buf->pages[0]->mapping->host; @@ -3153,7 +3159,9 @@ int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid) return ret; ret = verify_parent_transid(&BTRFS_I(btree_inode)->io_tree, buf, - parent_transid); + parent_transid, atomic); + if (ret == -EAGAIN) + return ret; return !ret; } diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index a7ace1a2dd1..ab1830aaf0e 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -66,7 +66,8 @@ void btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); void __btrfs_btree_balance_dirty(struct btrfs_root *root, unsigned long nr); void btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root); void btrfs_mark_buffer_dirty(struct extent_buffer *buf); -int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid); +int btrfs_buffer_uptodate(struct extent_buffer *buf, u64 parent_transid, + int atomic); int btrfs_set_buffer_uptodate(struct extent_buffer *buf); int btrfs_read_buffer(struct extent_buffer *buf, u64 parent_transid); u32 btrfs_csum_data(struct btrfs_root *root, char *data, u32 seed, size_t len); diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index a84420491c1..49fd7b66d57 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -529,9 +529,7 @@ static int cache_block_group(struct btrfs_block_group_cache *cache, * allocate blocks for the tree root we can't do the fast caching since * we likely hold important locks. */ - if (trans && (!trans->transaction->in_commit) && - (root && root != root->fs_info->tree_root) && - btrfs_test_opt(root, SPACE_CACHE)) { + if (fs_info->mount_opt & BTRFS_MOUNT_SPACE_CACHE) { ret = load_free_space_cache(fs_info, cache); spin_lock(&cache->lock); @@ -2303,6 +2301,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, if (ret) { printk(KERN_DEBUG "btrfs: run_delayed_extent_op returned %d\n", ret); + spin_lock(&delayed_refs->lock); return ret; } @@ -2333,6 +2332,7 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, if (ret) { printk(KERN_DEBUG "btrfs: run_one_delayed_ref returned %d\n", ret); + spin_lock(&delayed_refs->lock); return ret; } @@ -3152,15 +3152,14 @@ static void set_avail_alloc_bits(struct btrfs_fs_info *fs_info, u64 flags) /* * returns target flags in extended format or 0 if restripe for this * chunk_type is not in progress + * + * should be called with either volume_mutex or balance_lock held */ static u64 get_restripe_target(struct btrfs_fs_info *fs_info, u64 flags) { struct btrfs_balance_control *bctl = fs_info->balance_ctl; u64 target = 0; - BUG_ON(!mutex_is_locked(&fs_info->volume_mutex) && - !spin_is_locked(&fs_info->balance_lock)); - if (!bctl) return 0; @@ -3772,13 +3771,10 @@ again: */ if (current->journal_info) return -EAGAIN; - ret = wait_event_interruptible(space_info->wait, - !space_info->flush); - /* Must have been interrupted, return */ - if (ret) { - printk(KERN_DEBUG "btrfs: %s returning -EINTR\n", __func__); + ret = wait_event_killable(space_info->wait, !space_info->flush); + /* Must have been killed, return */ + if (ret) return -EINTR; - } spin_lock(&space_info->lock); } @@ -4205,7 +4201,7 @@ static u64 calc_global_metadata_size(struct btrfs_fs_info *fs_info) num_bytes += div64_u64(data_used + meta_used, 50); if (num_bytes * 3 > meta_used) - num_bytes = div64_u64(meta_used, 3) * 2; + num_bytes = div64_u64(meta_used, 3); return ALIGN(num_bytes, fs_info->extent_root->leafsize << 10); } @@ -4218,8 +4214,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) num_bytes = calc_global_metadata_size(fs_info); - spin_lock(&block_rsv->lock); spin_lock(&sinfo->lock); + spin_lock(&block_rsv->lock); block_rsv->size = num_bytes; @@ -4245,8 +4241,8 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info) block_rsv->full = 1; } - spin_unlock(&sinfo->lock); spin_unlock(&block_rsv->lock); + spin_unlock(&sinfo->lock); } static void init_global_block_rsv(struct btrfs_fs_info *fs_info) @@ -6572,7 +6568,7 @@ static noinline int do_walk_down(struct btrfs_trans_handle *trans, goto skip; } - if (!btrfs_buffer_uptodate(next, generation)) { + if (!btrfs_buffer_uptodate(next, generation, 0)) { btrfs_tree_unlock(next); free_extent_buffer(next); next = NULL; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index 8d904dd7ea9..c9018a05036 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -402,20 +402,28 @@ static int split_state(struct extent_io_tree *tree, struct extent_state *orig, return 0; } +static struct extent_state *next_state(struct extent_state *state) +{ + struct rb_node *next = rb_next(&state->rb_node); + if (next) + return rb_entry(next, struct extent_state, rb_node); + else + return NULL; +} + /* * utility function to clear some bits in an extent state struct. - * it will optionally wake up any one waiting on this state (wake == 1), or - * forcibly remove the state from the tree (delete == 1). + * it will optionally wake up any one waiting on this state (wake == 1) * * If no bits are set on the state struct after clearing things, the * struct is freed and removed from the tree */ -static int clear_state_bit(struct extent_io_tree *tree, - struct extent_state *state, - int *bits, int wake) +static struct extent_state *clear_state_bit(struct extent_io_tree *tree, + struct extent_state *state, + int *bits, int wake) { + struct extent_state *next; int bits_to_clear = *bits & ~EXTENT_CTLBITS; - int ret = state->state & bits_to_clear; if ((bits_to_clear & EXTENT_DIRTY) && (state->state & EXTENT_DIRTY)) { u64 range = state->end - state->start + 1; @@ -427,6 +435,7 @@ static int clear_state_bit(struct extent_io_tree *tree, if (wake) wake_up(&state->wq); if (state->state == 0) { + next = next_state(state); if (state->tree) { rb_erase(&state->rb_node, &tree->state); state->tree = NULL; @@ -436,8 +445,9 @@ static int clear_state_bit(struct extent_io_tree *tree, } } else { merge_state(tree, state); + next = next_state(state); } - return ret; + return next; } static struct extent_state * @@ -476,7 +486,6 @@ int clear_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, struct extent_state *state; struct extent_state *cached; struct extent_state *prealloc = NULL; - struct rb_node *next_node; struct rb_node *node; u64 last_end; int err; @@ -528,14 +537,11 @@ hit_next: WARN_ON(state->end < start); last_end = state->end; - if (state->end < end && !need_resched()) - next_node = rb_next(&state->rb_node); - else - next_node = NULL; - /* the state doesn't have the wanted bits, go ahead */ - if (!(state->state & bits)) + if (!(state->state & bits)) { + state = next_state(state); goto next; + } /* * | ---- desired range ---- | @@ -593,16 +599,13 @@ hit_next: goto out; } - clear_state_bit(tree, state, &bits, wake); + state = clear_state_bit(tree, state, &bits, wake); next: if (last_end == (u64)-1) goto out; start = last_end + 1; - if (start <= end && next_node) { - state = rb_entry(next_node, struct extent_state, - rb_node); + if (start <= end && state && !need_resched()) goto hit_next; - } goto search_again; out: @@ -1937,7 +1940,7 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb, struct btrfs_mapping_tree *map_tree = &root->fs_info->mapping_tree; u64 start = eb->start; unsigned long i, num_pages = num_extent_pages(eb->start, eb->len); - int ret; + int ret = 0; for (i = 0; i < num_pages; i++) { struct page *p = extent_buffer_page(eb, i); @@ -2180,6 +2183,10 @@ static int bio_readpage_error(struct bio *failed_bio, struct page *page, } bio = bio_alloc(GFP_NOFS, 1); + if (!bio) { + free_io_failure(inode, failrec, 0); + return -EIO; + } bio->bi_private = state; bio->bi_end_io = failed_bio->bi_end_io; bio->bi_sector = failrec->logical >> 9; @@ -2297,7 +2304,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) u64 start; u64 end; int whole_page; - int failed_mirror; + int mirror; int ret; if (err) @@ -2336,20 +2343,18 @@ static void end_bio_extent_readpage(struct bio *bio, int err) } spin_unlock(&tree->lock); + mirror = (int)(unsigned long)bio->bi_bdev; if (uptodate && tree->ops && tree->ops->readpage_end_io_hook) { ret = tree->ops->readpage_end_io_hook(page, start, end, - state); + state, mirror); if (ret) uptodate = 0; else clean_io_failure(start, page); } - if (!uptodate) - failed_mirror = (int)(unsigned long)bio->bi_bdev; - if (!uptodate && tree->ops && tree->ops->readpage_io_failed_hook) { - ret = tree->ops->readpage_io_failed_hook(page, failed_mirror); + ret = tree->ops->readpage_io_failed_hook(page, mirror); if (!ret && !err && test_bit(BIO_UPTODATE, &bio->bi_flags)) uptodate = 1; @@ -2364,8 +2369,7 @@ static void end_bio_extent_readpage(struct bio *bio, int err) * can't handle the error it will return -EIO and we * remain responsible for that page. */ - ret = bio_readpage_error(bio, page, start, end, - failed_mirror, NULL); + ret = bio_readpage_error(bio, page, start, end, mirror, NULL); if (ret == 0) { uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags); @@ -4116,6 +4120,7 @@ struct extent_buffer *alloc_extent_buffer(struct extent_io_tree *tree, if (atomic_inc_not_zero(&exists->refs)) { spin_unlock(&mapping->private_lock); unlock_page(p); + page_cache_release(p); mark_extent_buffer_accessed(exists); goto free_eb; } @@ -4195,8 +4200,7 @@ free_eb: unlock_page(eb->pages[i]); } - if (!atomic_dec_and_test(&eb->refs)) - return exists; + WARN_ON(!atomic_dec_and_test(&eb->refs)); btrfs_release_extent_buffer(eb); return exists; } @@ -4458,7 +4462,7 @@ int read_extent_buffer_pages(struct extent_io_tree *tree, } clear_bit(EXTENT_BUFFER_IOERR, &eb->bflags); - eb->failed_mirror = 0; + eb->read_mirror = 0; atomic_set(&eb->io_pages, num_reads); for (i = start_i; i < num_pages; i++) { page = extent_buffer_page(eb, i); diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index faf10eb57f7..b516c3b8dec 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -79,7 +79,7 @@ struct extent_io_ops { u64 start, u64 end, struct extent_state *state); int (*readpage_end_io_hook)(struct page *page, u64 start, u64 end, - struct extent_state *state); + struct extent_state *state, int mirror); int (*writepage_end_io_hook)(struct page *page, u64 start, u64 end, struct extent_state *state, int uptodate); void (*set_bit_hook)(struct inode *inode, struct extent_state *state, @@ -135,7 +135,7 @@ struct extent_buffer { spinlock_t refs_lock; atomic_t refs; atomic_t io_pages; - int failed_mirror; + int read_mirror; struct list_head leak_list; struct rcu_head rcu_head; pid_t lock_owner; diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index d83260d7498..53bf2d764bb 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -567,6 +567,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, int extent_type; int recow; int ret; + int modify_tree = -1; if (drop_cache) btrfs_drop_extent_cache(inode, start, end - 1, 0); @@ -575,10 +576,13 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans, struct inode *inode, if (!path) return -ENOMEM; + if (start >= BTRFS_I(inode)->disk_i_size) + modify_tree = 0; + while (1) { recow = 0; ret = btrfs_lookup_file_extent(trans, root, path, ino, - search_start, -1); + search_start, modify_tree); if (ret < 0) break; if (ret > 0 && path->slots[0] > 0 && search_start == start) { @@ -634,7 +638,8 @@ next_slot: } search_start = max(key.offset, start); - if (recow) { + if (recow || !modify_tree) { + modify_tree = -1; btrfs_release_path(path); continue; } diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c index e88330d3df5..202008ec367 100644 --- a/fs/btrfs/free-space-cache.c +++ b/fs/btrfs/free-space-cache.c @@ -748,13 +748,6 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, u64 used = btrfs_block_group_used(&block_group->item); /* - * If we're unmounting then just return, since this does a search on the - * normal root and not the commit root and we could deadlock. - */ - if (btrfs_fs_closing(fs_info)) - return 0; - - /* * If this block group has been marked to be cleared for one reason or * another then we can't trust the on disk cache, so just return. */ @@ -768,6 +761,8 @@ int load_free_space_cache(struct btrfs_fs_info *fs_info, path = btrfs_alloc_path(); if (!path) return 0; + path->search_commit_root = 1; + path->skip_locking = 1; inode = lookup_free_space_inode(root, block_group, path); if (IS_ERR(inode)) { diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 115bc05e42b..61b16c641ce 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -1947,7 +1947,7 @@ static int btrfs_writepage_end_io_hook(struct page *page, u64 start, u64 end, * extent_io.c will try to find good copies for us. */ static int btrfs_readpage_end_io_hook(struct page *page, u64 start, u64 end, - struct extent_state *state) + struct extent_state *state, int mirror) { size_t offset = start - ((u64)page->index << PAGE_CACHE_SHIFT); struct inode *inode = page->mapping->host; @@ -4069,7 +4069,7 @@ static struct inode *new_simple_dir(struct super_block *s, BTRFS_I(inode)->dummy_inode = 1; inode->i_ino = BTRFS_EMPTY_SUBVOL_DIR_OBJECTID; - inode->i_op = &simple_dir_inode_operations; + inode->i_op = &btrfs_dir_ro_inode_operations; inode->i_fop = &simple_dir_operations; inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO; inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; @@ -4140,14 +4140,18 @@ struct inode *btrfs_lookup_dentry(struct inode *dir, struct dentry *dentry) static int btrfs_dentry_delete(const struct dentry *dentry) { struct btrfs_root *root; + struct inode *inode = dentry->d_inode; - if (!dentry->d_inode && !IS_ROOT(dentry)) - dentry = dentry->d_parent; + if (!inode && !IS_ROOT(dentry)) + inode = dentry->d_parent->d_inode; - if (dentry->d_inode) { - root = BTRFS_I(dentry->d_inode)->root; + if (inode) { + root = BTRFS_I(inode)->root; if (btrfs_root_refs(&root->root_item) == 0) return 1; + + if (btrfs_ino(inode) == BTRFS_EMPTY_SUBVOL_DIR_OBJECTID) + return 1; } return 0; } @@ -4188,7 +4192,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, struct btrfs_path *path; struct list_head ins_list; struct list_head del_list; - struct qstr q; int ret; struct extent_buffer *leaf; int slot; @@ -4279,7 +4282,6 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, while (di_cur < di_total) { struct btrfs_key location; - struct dentry *tmp; if (verify_dir_item(root, leaf, di)) break; @@ -4300,35 +4302,15 @@ static int btrfs_real_readdir(struct file *filp, void *dirent, d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)]; btrfs_dir_item_key_to_cpu(leaf, di, &location); - q.name = name_ptr; - q.len = name_len; - q.hash = full_name_hash(q.name, q.len); - tmp = d_lookup(filp->f_dentry, &q); - if (!tmp) { - struct btrfs_key *newkey; - - newkey = kzalloc(sizeof(struct btrfs_key), - GFP_NOFS); - if (!newkey) - goto no_dentry; - tmp = d_alloc(filp->f_dentry, &q); - if (!tmp) { - kfree(newkey); - dput(tmp); - goto no_dentry; - } - memcpy(newkey, &location, - sizeof(struct btrfs_key)); - tmp->d_fsdata = newkey; - tmp->d_flags |= DCACHE_NEED_LOOKUP; - d_rehash(tmp); - dput(tmp); - } else { - dput(tmp); - } -no_dentry: + /* is this a reference to our own snapshot? If so - * skip it + * skip it. + * + * In contrast to old kernels, we insert the snapshot's + * dir item and dir index after it has been created, so + * we won't find a reference to our own snapshot. We + * still keep the following code for backward + * compatibility. */ if (location.type == BTRFS_ROOT_ITEM_KEY && location.objectid == root->root_key.objectid) { diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c index 18cc23d164a..14f8e1faa46 100644 --- a/fs/btrfs/ioctl.c +++ b/fs/btrfs/ioctl.c @@ -2262,7 +2262,10 @@ static long btrfs_ioctl_dev_info(struct btrfs_root *root, void __user *arg) di_args->bytes_used = dev->bytes_used; di_args->total_bytes = dev->total_bytes; memcpy(di_args->uuid, dev->uuid, sizeof(di_args->uuid)); - strncpy(di_args->path, dev->name, sizeof(di_args->path)); + if (dev->name) + strncpy(di_args->path, dev->name, sizeof(di_args->path)); + else + di_args->path[0] = '\0'; out: if (ret == 0 && copy_to_user(arg, di_args, sizeof(*di_args))) diff --git a/fs/btrfs/ioctl.h b/fs/btrfs/ioctl.h index 4f69028a68c..086e6bdae1c 100644 --- a/fs/btrfs/ioctl.h +++ b/fs/btrfs/ioctl.h @@ -252,7 +252,7 @@ struct btrfs_data_container { struct btrfs_ioctl_ino_path_args { __u64 inum; /* in */ - __u32 size; /* in */ + __u64 size; /* in */ __u64 reserved[4]; /* struct btrfs_data_container *fspath; out */ __u64 fspath; /* out */ @@ -260,7 +260,7 @@ struct btrfs_ioctl_ino_path_args { struct btrfs_ioctl_logical_ino_args { __u64 logical; /* in */ - __u32 size; /* in */ + __u64 size; /* in */ __u64 reserved[4]; /* struct btrfs_data_container *inodes; out */ __u64 inodes; diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index dc5d33146fd..ac5d0108588 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -250,14 +250,12 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info, struct btrfs_bio *bbio) { int ret; - int looped = 0; struct reada_zone *zone; struct btrfs_block_group_cache *cache = NULL; u64 start; u64 end; int i; -again: zone = NULL; spin_lock(&fs_info->reada_lock); ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, @@ -274,9 +272,6 @@ again: spin_unlock(&fs_info->reada_lock); } - if (looped) - return NULL; - cache = btrfs_lookup_block_group(fs_info, logical); if (!cache) return NULL; @@ -307,13 +302,15 @@ again: ret = radix_tree_insert(&dev->reada_zones, (unsigned long)(zone->end >> PAGE_CACHE_SHIFT), zone); - spin_unlock(&fs_info->reada_lock); - if (ret) { + if (ret == -EEXIST) { kfree(zone); - looped = 1; - goto again; + ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone, + logical >> PAGE_CACHE_SHIFT, 1); + if (ret == 1) + kref_get(&zone->refcnt); } + spin_unlock(&fs_info->reada_lock); return zone; } @@ -323,26 +320,26 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root, struct btrfs_key *top, int level) { int ret; - int looped = 0; struct reada_extent *re = NULL; + struct reada_extent *re_exist = NULL; struct btrfs_fs_info *fs_info = root->fs_info; struct btrfs_mapping_tree *map_tree = &fs_info->mapping_tree; struct btrfs_bio *bbio = NULL; struct btrfs_device *dev; + struct btrfs_device *prev_dev; u32 blocksize; u64 length; int nzones = 0; int i; unsigned long index = logical >> PAGE_CACHE_SHIFT; -again: spin_lock(&fs_info->reada_lock); re = radix_tree_lookup(&fs_info->reada_tree, index); if (re) kref_get(&re->refcnt); spin_unlock(&fs_info->reada_lock); - if (re || looped) + if (re) return re; re = kzalloc(sizeof(*re), GFP_NOFS); @@ -398,16 +395,31 @@ again: /* insert extent in reada_tree + all per-device trees, all or nothing */ spin_lock(&fs_info->reada_lock); ret = radix_tree_insert(&fs_info->reada_tree, index, re); + if (ret == -EEXIST) { + re_exist = radix_tree_lookup(&fs_info->reada_tree, index); + BUG_ON(!re_exist); + kref_get(&re_exist->refcnt); + spin_unlock(&fs_info->reada_lock); + goto error; + } if (ret) { spin_unlock(&fs_info->reada_lock); - if (ret != -ENOMEM) { - /* someone inserted the extent in the meantime */ - looped = 1; - } goto error; } + prev_dev = NULL; for (i = 0; i < nzones; ++i) { dev = bbio->stripes[i].dev; + if (dev == prev_dev) { + /* + * in case of DUP, just add the first zone. As both + * are on the same device, there's nothing to gain + * from adding both. + * Also, it wouldn't work, as the tree is per device + * and adding would fail with EEXIST + */ + continue; + } + prev_dev = dev; ret = radix_tree_insert(&dev->reada_extents, index, re); if (ret) { while (--i >= 0) { @@ -450,9 +462,7 @@ error: } kfree(bbio); kfree(re); - if (looped) - goto again; - return NULL; + return re_exist; } static void reada_kref_dummy(struct kref *kr) diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c index 017281dbb2a..646ee21bb03 100644 --- a/fs/btrfs/relocation.c +++ b/fs/btrfs/relocation.c @@ -1279,7 +1279,9 @@ static int __update_reloc_root(struct btrfs_root *root, int del) if (rb_node) backref_tree_panic(rb_node, -EEXIST, node->bytenr); } else { + spin_lock(&root->fs_info->trans_lock); list_del_init(&root->root_list); + spin_unlock(&root->fs_info->trans_lock); kfree(node); } return 0; @@ -3811,7 +3813,7 @@ restart: ret = btrfs_block_rsv_check(rc->extent_root, rc->block_rsv, 5); if (ret < 0) { - if (ret != -EAGAIN) { + if (ret != -ENOSPC) { err = ret; WARN_ON(1); break; diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c index 90acc82046c..2f3d6f917fb 100644 --- a/fs/btrfs/scrub.c +++ b/fs/btrfs/scrub.c @@ -998,6 +998,7 @@ static int scrub_setup_recheck_block(struct scrub_dev *sdev, page = sblock->pagev + page_index; page->logical = logical; page->physical = bbio->stripes[mirror_index].physical; + /* for missing devices, bdev is NULL */ page->bdev = bbio->stripes[mirror_index].dev->bdev; page->mirror_num = mirror_index + 1; page->page = alloc_page(GFP_NOFS); @@ -1042,8 +1043,16 @@ static int scrub_recheck_block(struct btrfs_fs_info *fs_info, struct scrub_page *page = sblock->pagev + page_num; DECLARE_COMPLETION_ONSTACK(complete); + if (page->bdev == NULL) { + page->io_error = 1; + sblock->no_io_error_seen = 0; + continue; + } + BUG_ON(!page->page); bio = bio_alloc(GFP_NOFS, 1); + if (!bio) + return -EIO; bio->bi_bdev = page->bdev; bio->bi_sector = page->physical >> 9; bio->bi_end_io = scrub_complete_bio_end_io; @@ -1171,6 +1180,8 @@ static int scrub_repair_page_from_good_copy(struct scrub_block *sblock_bad, DECLARE_COMPLETION_ONSTACK(complete); bio = bio_alloc(GFP_NOFS, 1); + if (!bio) + return -EIO; bio->bi_bdev = page_bad->bdev; bio->bi_sector = page_bad->physical >> 9; bio->bi_end_io = scrub_complete_bio_end_io; @@ -1253,12 +1264,6 @@ static int scrub_checksum_data(struct scrub_block *sblock) if (memcmp(csum, on_disk_csum, sdev->csum_size)) fail = 1; - if (fail) { - spin_lock(&sdev->stat_lock); - ++sdev->stat.csum_errors; - spin_unlock(&sdev->stat_lock); - } - return fail; } @@ -1331,15 +1336,6 @@ static int scrub_checksum_tree_block(struct scrub_block *sblock) if (memcmp(calculated_csum, on_disk_csum, sdev->csum_size)) ++crc_fail; - if (crc_fail || fail) { - spin_lock(&sdev->stat_lock); - if (crc_fail) - ++sdev->stat.csum_errors; - if (fail) - ++sdev->stat.verify_errors; - spin_unlock(&sdev->stat_lock); - } - return fail || crc_fail; } diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 8d5d380f7bd..c5f8fca4195 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -815,7 +815,6 @@ int btrfs_sync_fs(struct super_block *sb, int wait) return 0; } - btrfs_start_delalloc_inodes(root, 0); btrfs_wait_ordered_extents(root, 0, 0); trans = btrfs_start_transaction(root, 0); @@ -1148,13 +1147,15 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data) if (ret) goto restore; } else { - if (fs_info->fs_devices->rw_devices == 0) + if (fs_info->fs_devices->rw_devices == 0) { ret = -EACCES; goto restore; + } - if (btrfs_super_log_root(fs_info->super_copy) != 0) + if (btrfs_super_log_root(fs_info->super_copy) != 0) { ret = -EINVAL; goto restore; + } ret = btrfs_cleanup_fs_roots(fs_info); if (ret) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index 8da29e8e4de..36422254ef6 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -73,8 +73,10 @@ loop: cur_trans = root->fs_info->running_transaction; if (cur_trans) { - if (cur_trans->aborted) + if (cur_trans->aborted) { + spin_unlock(&root->fs_info->trans_lock); return cur_trans->aborted; + } atomic_inc(&cur_trans->use_count); atomic_inc(&cur_trans->num_writers); cur_trans->num_joined++; @@ -480,6 +482,7 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, struct btrfs_transaction *cur_trans = trans->transaction; struct btrfs_fs_info *info = root->fs_info; int count = 0; + int err = 0; if (--trans->use_count) { trans->block_rsv = trans->orig_rsv; @@ -532,18 +535,18 @@ static int __btrfs_end_transaction(struct btrfs_trans_handle *trans, if (current->journal_info == trans) current->journal_info = NULL; - memset(trans, 0, sizeof(*trans)); - kmem_cache_free(btrfs_trans_handle_cachep, trans); if (throttle) btrfs_run_delayed_iputs(root); if (trans->aborted || root->fs_info->fs_state & BTRFS_SUPER_FLAG_ERROR) { - return -EIO; + err = -EIO; } - return 0; + memset(trans, 0, sizeof(*trans)); + kmem_cache_free(btrfs_trans_handle_cachep, trans); + return err; } int btrfs_end_transaction(struct btrfs_trans_handle *trans, @@ -1399,6 +1402,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = commit_fs_roots(trans, root); if (ret) { mutex_unlock(&root->fs_info->tree_log_mutex); + mutex_unlock(&root->fs_info->reloc_mutex); goto cleanup_transaction; } @@ -1410,6 +1414,7 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans, ret = commit_cowonly_roots(trans, root); if (ret) { mutex_unlock(&root->fs_info->tree_log_mutex); + mutex_unlock(&root->fs_info->reloc_mutex); goto cleanup_transaction; } diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c index d017283ae6f..eb1ae908582 100644 --- a/fs/btrfs/tree-log.c +++ b/fs/btrfs/tree-log.c @@ -279,7 +279,7 @@ static int process_one_buffer(struct btrfs_root *log, log->fs_info->extent_root, eb->start, eb->len); - if (btrfs_buffer_uptodate(eb, gen)) { + if (btrfs_buffer_uptodate(eb, gen, 0)) { if (wc->write) btrfs_write_tree_block(eb); if (wc->wait) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index a872b48be0a..1411b99555a 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -3324,12 +3324,14 @@ static int __btrfs_alloc_chunk(struct btrfs_trans_handle *trans, stripe_size = devices_info[ndevs-1].max_avail; num_stripes = ndevs * dev_stripes; - if (stripe_size * num_stripes > max_chunk_size * ncopies) { + if (stripe_size * ndevs > max_chunk_size * ncopies) { stripe_size = max_chunk_size * ncopies; - do_div(stripe_size, num_stripes); + do_div(stripe_size, ndevs); } do_div(stripe_size, dev_stripes); + + /* align to BTRFS_STRIPE_LEN */ do_div(stripe_size, BTRFS_STRIPE_LEN); stripe_size *= BTRFS_STRIPE_LEN; @@ -3805,10 +3807,11 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, else if (mirror_num) stripe_index += mirror_num - 1; else { + int old_stripe_index = stripe_index; stripe_index = find_live_mirror(map, stripe_index, map->sub_stripes, stripe_index + current->pid % map->sub_stripes); - mirror_num = stripe_index + 1; + mirror_num = stripe_index - old_stripe_index + 1; } } else { /* @@ -3833,6 +3836,7 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, int sub_stripes = 0; u64 stripes_per_dev = 0; u32 remaining_stripes = 0; + u32 last_stripe = 0; if (map->type & (BTRFS_BLOCK_GROUP_RAID0 | BTRFS_BLOCK_GROUP_RAID10)) { @@ -3846,6 +3850,8 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, stripe_nr_orig, factor, &remaining_stripes); + div_u64_rem(stripe_nr_end - 1, factor, &last_stripe); + last_stripe *= sub_stripes; } for (i = 0; i < num_stripes; i++) { @@ -3858,16 +3864,29 @@ static int __btrfs_map_block(struct btrfs_mapping_tree *map_tree, int rw, BTRFS_BLOCK_GROUP_RAID10)) { bbio->stripes[i].length = stripes_per_dev * map->stripe_len; + if (i / sub_stripes < remaining_stripes) bbio->stripes[i].length += map->stripe_len; + + /* + * Special for the first stripe and + * the last stripe: + * + * |-------|...|-------| + * |----------| + * off end_off + */ if (i < sub_stripes) bbio->stripes[i].length -= stripe_offset; - if ((i / sub_stripes + 1) % - sub_stripes == remaining_stripes) + + if (stripe_index >= last_stripe && + stripe_index <= (last_stripe + + sub_stripes - 1)) bbio->stripes[i].length -= stripe_end_offset; + if (i == sub_stripes - 1) stripe_offset = 0; } else @@ -4334,8 +4353,10 @@ static int open_seed_devices(struct btrfs_root *root, u8 *fsid) ret = __btrfs_open_devices(fs_devices, FMODE_READ, root->fs_info->bdev_holder); - if (ret) + if (ret) { + free_fs_devices(fs_devices); goto out; + } if (!fs_devices->seeding) { __btrfs_close_devices(fs_devices); diff --git a/fs/buffer.c b/fs/buffer.c index 36d66653b93..ad5938ca357 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -921,6 +921,7 @@ init_page_buffers(struct page *page, struct block_device *bdev, struct buffer_head *head = page_buffers(page); struct buffer_head *bh = head; int uptodate = PageUptodate(page); + sector_t end_block = blkdev_max_block(I_BDEV(bdev->bd_inode)); do { if (!buffer_mapped(bh)) { @@ -929,7 +930,8 @@ init_page_buffers(struct page *page, struct block_device *bdev, bh->b_blocknr = block; if (uptodate) set_buffer_uptodate(bh); - set_buffer_mapped(bh); + if (block < end_block) + set_buffer_mapped(bh); } block++; bh = bh->b_this_page; @@ -985,7 +987,6 @@ grow_dev_page(struct block_device *bdev, sector_t block, return page; failed: - BUG(); unlock_page(page); page_cache_release(page); return NULL; diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index d3421282244..541ef81f6ae 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -370,13 +370,13 @@ cifs_show_options(struct seq_file *s, struct dentry *root) (int)(srcaddr->sa_family)); } - seq_printf(s, ",uid=%d", cifs_sb->mnt_uid); + seq_printf(s, ",uid=%u", cifs_sb->mnt_uid); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_UID) seq_printf(s, ",forceuid"); else seq_printf(s, ",noforceuid"); - seq_printf(s, ",gid=%d", cifs_sb->mnt_gid); + seq_printf(s, ",gid=%u", cifs_sb->mnt_gid); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_OVERR_GID) seq_printf(s, ",forcegid"); else @@ -434,11 +434,15 @@ cifs_show_options(struct seq_file *s, struct dentry *root) seq_printf(s, ",noperm"); if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_STRICT_IO) seq_printf(s, ",strictcache"); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPUID) + seq_printf(s, ",backupuid=%u", cifs_sb->mnt_backupuid); + if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_CIFS_BACKUPGID) + seq_printf(s, ",backupgid=%u", cifs_sb->mnt_backupgid); - seq_printf(s, ",rsize=%d", cifs_sb->rsize); - seq_printf(s, ",wsize=%d", cifs_sb->wsize); + seq_printf(s, ",rsize=%u", cifs_sb->rsize); + seq_printf(s, ",wsize=%u", cifs_sb->wsize); /* convert actimeo and display it in seconds */ - seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ); + seq_printf(s, ",actimeo=%lu", cifs_sb->actimeo / HZ); return 0; } @@ -695,7 +699,7 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int origin) * origin == SEEK_END || SEEK_DATA || SEEK_HOLE => we must revalidate * the cached file length */ - if (origin != SEEK_SET || origin != SEEK_CUR) { + if (origin != SEEK_SET && origin != SEEK_CUR) { int rc; struct inode *inode = file->f_path.dentry->d_inode; diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index d1389bb33ce..65365358c97 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -125,5 +125,5 @@ extern long cifs_ioctl(struct file *filep, unsigned int cmd, unsigned long arg); extern const struct export_operations cifs_export_ops; #endif /* CONFIG_CIFS_NFSD_EXPORT */ -#define CIFS_VERSION "1.77" +#define CIFS_VERSION "1.78" #endif /* _CIFSFS_H */ diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c index f52c5ab78f9..da2f5446fa7 100644 --- a/fs/cifs/cifssmb.c +++ b/fs/cifs/cifssmb.c @@ -4844,8 +4844,12 @@ parse_DFS_referrals(TRANSACTION2_GET_DFS_REFER_RSP *pSMBr, max_len = data_end - temp; node->node_name = cifs_strndup_from_utf16(temp, max_len, is_unicode, nls_codepage); - if (!node->node_name) + if (!node->node_name) { rc = -ENOMEM; + goto parse_DFS_referrals_exit; + } + + ref++; } parse_DFS_referrals_exit: diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index d81e933a796..e0b56d7a19c 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -109,6 +109,8 @@ enum { /* Options which could be blank */ Opt_blank_pass, + Opt_blank_user, + Opt_blank_ip, Opt_err }; @@ -162,7 +164,8 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_sign, "sign" }, { Opt_seal, "seal" }, { Opt_direct, "direct" }, - { Opt_direct, "forceddirectio" }, + { Opt_direct, "directio" }, + { Opt_direct, "forcedirectio" }, { Opt_strictcache, "strictcache" }, { Opt_noac, "noac" }, { Opt_fsc, "fsc" }, @@ -183,11 +186,15 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_wsize, "wsize=%s" }, { Opt_actimeo, "actimeo=%s" }, + { Opt_blank_user, "user=" }, + { Opt_blank_user, "username=" }, { Opt_user, "user=%s" }, { Opt_user, "username=%s" }, { Opt_blank_pass, "pass=" }, { Opt_pass, "pass=%s" }, { Opt_pass, "password=%s" }, + { Opt_blank_ip, "ip=" }, + { Opt_blank_ip, "addr=" }, { Opt_ip, "ip=%s" }, { Opt_ip, "addr=%s" }, { Opt_unc, "unc=%s" }, @@ -209,6 +216,8 @@ static const match_table_t cifs_mount_option_tokens = { { Opt_ignore, "cred" }, { Opt_ignore, "credentials" }, + { Opt_ignore, "cred=%s" }, + { Opt_ignore, "credentials=%s" }, { Opt_ignore, "guest" }, { Opt_ignore, "rw" }, { Opt_ignore, "ro" }, @@ -1117,7 +1126,7 @@ static int get_option_ul(substring_t args[], unsigned long *option) string = match_strdup(args); if (string == NULL) return -ENOMEM; - rc = kstrtoul(string, 10, option); + rc = kstrtoul(string, 0, option); kfree(string); return rc; @@ -1534,15 +1543,17 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, /* String Arguments */ + case Opt_blank_user: + /* null user, ie. anonymous authentication */ + vol->nullauth = 1; + vol->username = NULL; + break; case Opt_user: string = match_strdup(args); if (string == NULL) goto out_nomem; - if (!*string) { - /* null user, ie. anonymous authentication */ - vol->nullauth = 1; - } else if (strnlen(string, MAX_USERNAME_SIZE) > + if (strnlen(string, MAX_USERNAME_SIZE) > MAX_USERNAME_SIZE) { printk(KERN_WARNING "CIFS: username too long\n"); goto cifs_parse_mount_err; @@ -1611,14 +1622,15 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, } vol->password[j] = '\0'; break; + case Opt_blank_ip: + vol->UNCip = NULL; + break; case Opt_ip: string = match_strdup(args); if (string == NULL) goto out_nomem; - if (!*string) { - vol->UNCip = NULL; - } else if (strnlen(string, INET6_ADDRSTRLEN) > + if (strnlen(string, INET6_ADDRSTRLEN) > INET6_ADDRSTRLEN) { printk(KERN_WARNING "CIFS: ip address " "too long\n"); @@ -1636,12 +1648,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (!*string) { - printk(KERN_WARNING "CIFS: invalid path to " - "network resource\n"); - goto cifs_parse_mount_err; - } - temp_len = strnlen(string, 300); if (temp_len == 300) { printk(KERN_WARNING "CIFS: UNC name too long\n"); @@ -1670,11 +1676,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (!*string) { - printk(KERN_WARNING "CIFS: invalid domain" - " name\n"); - goto cifs_parse_mount_err; - } else if (strnlen(string, 256) == 256) { + if (strnlen(string, 256) == 256) { printk(KERN_WARNING "CIFS: domain name too" " long\n"); goto cifs_parse_mount_err; @@ -1693,11 +1695,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (!*string) { - printk(KERN_WARNING "CIFS: srcaddr value not" - " specified\n"); - goto cifs_parse_mount_err; - } else if (!cifs_convert_address( + if (!cifs_convert_address( (struct sockaddr *)&vol->srcaddr, string, strlen(string))) { printk(KERN_WARNING "CIFS: Could not parse" @@ -1710,11 +1708,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (!*string) { - printk(KERN_WARNING "CIFS: Invalid path" - " prefix\n"); - goto cifs_parse_mount_err; - } temp_len = strnlen(string, 1024); if (string[0] != '/') temp_len++; /* missing leading slash */ @@ -1742,11 +1735,7 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (!*string) { - printk(KERN_WARNING "CIFS: Invalid iocharset" - " specified\n"); - goto cifs_parse_mount_err; - } else if (strnlen(string, 1024) >= 65) { + if (strnlen(string, 1024) >= 65) { printk(KERN_WARNING "CIFS: iocharset name " "too long.\n"); goto cifs_parse_mount_err; @@ -1771,11 +1760,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (!*string) { - printk(KERN_WARNING "CIFS: No socket option" - " specified\n"); - goto cifs_parse_mount_err; - } if (strnicmp(string, "TCP_NODELAY", 11) == 0) vol->sockopt_tcp_nodelay = 1; break; @@ -1784,12 +1768,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (!*string) { - printk(KERN_WARNING "CIFS: Invalid (empty)" - " netbiosname\n"); - break; - } - memset(vol->source_rfc1001_name, 0x20, RFC1001_NAME_LEN); /* @@ -1817,11 +1795,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (!*string) { - printk(KERN_WARNING "CIFS: Empty server" - " netbiosname specified\n"); - break; - } /* last byte, type, is 0x20 for servr type */ memset(vol->target_rfc1001_name, 0x20, RFC1001_NAME_LEN_WITH_NULL); @@ -1848,12 +1821,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (!*string) { - cERROR(1, "no protocol version specified" - " after vers= mount option"); - goto cifs_parse_mount_err; - } - if (strnicmp(string, "cifs", 4) == 0 || strnicmp(string, "1", 1) == 0) { /* This is the default */ @@ -1868,12 +1835,6 @@ cifs_parse_mount_options(const char *mountdata, const char *devname, if (string == NULL) goto out_nomem; - if (!*string) { - printk(KERN_WARNING "CIFS: no security flavor" - " specified\n"); - break; - } - if (cifs_parse_security_flavors(string, vol) != 0) goto cifs_parse_mount_err; break; @@ -2225,6 +2186,7 @@ cifs_get_tcp_session(struct smb_vol *volume_info) tcp_ses->session_estab = false; tcp_ses->sequence_number = 0; tcp_ses->lstrp = jiffies; + spin_lock_init(&tcp_ses->req_lock); INIT_LIST_HEAD(&tcp_ses->tcp_ses_list); INIT_LIST_HEAD(&tcp_ses->smb_ses_list); INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request); @@ -3270,10 +3232,6 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, cifs_sb->mnt_uid = pvolume_info->linux_uid; cifs_sb->mnt_gid = pvolume_info->linux_gid; - if (pvolume_info->backupuid_specified) - cifs_sb->mnt_backupuid = pvolume_info->backupuid; - if (pvolume_info->backupgid_specified) - cifs_sb->mnt_backupgid = pvolume_info->backupgid; cifs_sb->mnt_file_mode = pvolume_info->file_mode; cifs_sb->mnt_dir_mode = pvolume_info->dir_mode; cFYI(1, "file mode: 0x%hx dir mode: 0x%hx", @@ -3304,10 +3262,14 @@ void cifs_setup_cifs_sb(struct smb_vol *pvolume_info, cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_RWPIDFORWARD; if (pvolume_info->cifs_acl) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_ACL; - if (pvolume_info->backupuid_specified) + if (pvolume_info->backupuid_specified) { cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPUID; - if (pvolume_info->backupgid_specified) + cifs_sb->mnt_backupuid = pvolume_info->backupuid; + } + if (pvolume_info->backupgid_specified) { cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_CIFS_BACKUPGID; + cifs_sb->mnt_backupgid = pvolume_info->backupgid; + } if (pvolume_info->override_uid) cifs_sb->mnt_cifs_flags |= CIFS_MOUNT_OVERR_UID; if (pvolume_info->override_gid) @@ -3656,22 +3618,6 @@ cifs_get_volume_info(char *mount_data, const char *devname) return volume_info; } -/* make sure ra_pages is a multiple of rsize */ -static inline unsigned int -cifs_ra_pages(struct cifs_sb_info *cifs_sb) -{ - unsigned int reads; - unsigned int rsize_pages = cifs_sb->rsize / PAGE_CACHE_SIZE; - - if (rsize_pages >= default_backing_dev_info.ra_pages) - return default_backing_dev_info.ra_pages; - else if (rsize_pages == 0) - return rsize_pages; - - reads = default_backing_dev_info.ra_pages / rsize_pages; - return reads * rsize_pages; -} - int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb_vol *volume_info) { @@ -3759,7 +3705,7 @@ try_mount_again: cifs_sb->rsize = cifs_negotiate_rsize(tcon, volume_info); /* tune readahead according to rsize */ - cifs_sb->bdi.ra_pages = cifs_ra_pages(cifs_sb); + cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE; remote_path_check: #ifdef CONFIG_CIFS_DFS_UPCALL diff --git a/fs/cifs/dir.c b/fs/cifs/dir.c index d172c8ed901..ec4e9a2a12f 100644 --- a/fs/cifs/dir.c +++ b/fs/cifs/dir.c @@ -668,12 +668,19 @@ cifs_d_revalidate(struct dentry *direntry, struct nameidata *nd) return 0; else { /* - * Forcibly invalidate automounting directory inodes - * (remote DFS directories) so to have them - * instantiated again for automount + * If the inode wasn't known to be a dfs entry when + * the dentry was instantiated, such as when created + * via ->readdir(), it needs to be set now since the + * attributes will have been updated by + * cifs_revalidate_dentry(). */ - if (IS_AUTOMOUNT(direntry->d_inode)) - return 0; + if (IS_AUTOMOUNT(direntry->d_inode) && + !(direntry->d_flags & DCACHE_NEED_AUTOMOUNT)) { + spin_lock(&direntry->d_lock); + direntry->d_flags |= DCACHE_NEED_AUTOMOUNT; + spin_unlock(&direntry->d_lock); + } + return 1; } } diff --git a/fs/cifs/file.c b/fs/cifs/file.c index fae765dac93..81725e9286e 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -2178,7 +2178,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, unsigned long nr_pages, i; size_t copied, len, cur_len; ssize_t total_written = 0; - loff_t offset = *poffset; + loff_t offset; struct iov_iter it; struct cifsFileInfo *open_file; struct cifs_tcon *tcon; @@ -2200,6 +2200,7 @@ cifs_iovec_write(struct file *file, const struct iovec *iov, cifs_sb = CIFS_SB(file->f_path.dentry->d_sb); open_file = file->private_data; tcon = tlink_tcon(open_file->tlink); + offset = *poffset; if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_RWPIDFORWARD) pid = open_file->pid; diff --git a/fs/dcache.c b/fs/dcache.c index b60ddc41d78..b80531c9177 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -141,18 +141,29 @@ int proc_nr_dentry(ctl_table *table, int write, void __user *buffer, * Compare 2 name strings, return 0 if they match, otherwise non-zero. * The strings are both count bytes long, and count is non-zero. */ +#ifdef CONFIG_DCACHE_WORD_ACCESS + +#include <asm/word-at-a-time.h> +/* + * NOTE! 'cs' and 'scount' come from a dentry, so it has a + * aligned allocation for this particular component. We don't + * strictly need the load_unaligned_zeropad() safety, but it + * doesn't hurt either. + * + * In contrast, 'ct' and 'tcount' can be from a pathname, and do + * need the careful unaligned handling. + */ static inline int dentry_cmp(const unsigned char *cs, size_t scount, const unsigned char *ct, size_t tcount) { -#ifdef CONFIG_DCACHE_WORD_ACCESS unsigned long a,b,mask; if (unlikely(scount != tcount)) return 1; for (;;) { - a = *(unsigned long *)cs; - b = *(unsigned long *)ct; + a = load_unaligned_zeropad(cs); + b = load_unaligned_zeropad(ct); if (tcount < sizeof(unsigned long)) break; if (unlikely(a != b)) @@ -165,7 +176,13 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, } mask = ~(~0ul << tcount*8); return unlikely(!!((a ^ b) & mask)); +} + #else + +static inline int dentry_cmp(const unsigned char *cs, size_t scount, + const unsigned char *ct, size_t tcount) +{ if (scount != tcount) return 1; @@ -177,9 +194,10 @@ static inline int dentry_cmp(const unsigned char *cs, size_t scount, tcount--; } while (tcount); return 0; -#endif } +#endif + static void __d_free(struct rcu_head *head) { struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu); diff --git a/fs/dlm/lock.c b/fs/dlm/lock.c index fa5c07d51dc..4c58d4a3adc 100644 --- a/fs/dlm/lock.c +++ b/fs/dlm/lock.c @@ -1737,6 +1737,18 @@ static int _can_be_granted(struct dlm_rsb *r, struct dlm_lkb *lkb, int now) return 1; /* + * Even if the convert is compat with all granted locks, + * QUECVT forces it behind other locks on the convert queue. + */ + + if (now && conv && (lkb->lkb_exflags & DLM_LKF_QUECVT)) { + if (list_empty(&r->res_convertqueue)) + return 1; + else + goto out; + } + + /* * The NOORDER flag is set to avoid the standard vms rules on grant * order. */ diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 739b0985b39..c0b3c70ee87 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1663,8 +1663,10 @@ SYSCALL_DEFINE4(epoll_ctl, int, epfd, int, op, int, fd, if (op == EPOLL_CTL_ADD) { if (is_file_epoll(tfile)) { error = -ELOOP; - if (ep_loop_check(ep, tfile) != 0) + if (ep_loop_check(ep, tfile) != 0) { + clear_tfile_check_list(); goto error_tgt_fput; + } } else list_add(&tfile->f_tfile_llink, &tfile_check_list); } diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index ab2594a30f8..0e01e90add8 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -1203,9 +1203,6 @@ struct ext4_sb_info { unsigned long s_ext_blocks; unsigned long s_ext_extents; #endif - /* ext4 extent cache stats */ - unsigned long extent_cache_hits; - unsigned long extent_cache_misses; /* for buddy allocator */ struct ext4_group_info ***s_group_info; diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c index 1421938e679..abcdeab67f5 100644 --- a/fs/ext4/extents.c +++ b/fs/ext4/extents.c @@ -2066,10 +2066,6 @@ static int ext4_ext_check_cache(struct inode *inode, ext4_lblk_t block, ret = 1; } errout: - if (!ret) - sbi->extent_cache_misses++; - else - sbi->extent_cache_hits++; trace_ext4_ext_in_cache(inode, block, ret); spin_unlock(&EXT4_I(inode)->i_block_reservation_lock); return ret; @@ -2882,7 +2878,7 @@ static int ext4_split_extent_at(handle_t *handle, if (err) goto fix_extent_len; /* update the extent length and mark as initialized */ - ex->ee_len = cpu_to_le32(ee_len); + ex->ee_len = cpu_to_le16(ee_len); ext4_ext_try_to_merge(inode, path, ex); err = ext4_ext_dirty(handle, inode, path + depth); goto out; diff --git a/fs/ext4/super.c b/fs/ext4/super.c index ceebaf853be..e1fb1d5de58 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -1305,20 +1305,20 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) ext4_msg(sb, KERN_ERR, "Cannot change journaled " "quota options when quota turned on"); - return 0; + return -1; } qname = match_strdup(args); if (!qname) { ext4_msg(sb, KERN_ERR, "Not enough memory for storing quotafile name"); - return 0; + return -1; } if (sbi->s_qf_names[qtype] && strcmp(sbi->s_qf_names[qtype], qname)) { ext4_msg(sb, KERN_ERR, "%s quota file already specified", QTYPE2NAME(qtype)); kfree(qname); - return 0; + return -1; } sbi->s_qf_names[qtype] = qname; if (strchr(sbi->s_qf_names[qtype], '/')) { @@ -1326,7 +1326,7 @@ static int set_qf_name(struct super_block *sb, int qtype, substring_t *args) "quotafile must be on filesystem root"); kfree(sbi->s_qf_names[qtype]); sbi->s_qf_names[qtype] = NULL; - return 0; + return -1; } set_opt(sb, QUOTA); return 1; @@ -1341,7 +1341,7 @@ static int clear_qf_name(struct super_block *sb, int qtype) sbi->s_qf_names[qtype]) { ext4_msg(sb, KERN_ERR, "Cannot change journaled quota options" " when quota turned on"); - return 0; + return -1; } /* * The space will be released later when all options are confirmed @@ -1450,6 +1450,16 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, const struct mount_opts *m; int arg = 0; +#ifdef CONFIG_QUOTA + if (token == Opt_usrjquota) + return set_qf_name(sb, USRQUOTA, &args[0]); + else if (token == Opt_grpjquota) + return set_qf_name(sb, GRPQUOTA, &args[0]); + else if (token == Opt_offusrjquota) + return clear_qf_name(sb, USRQUOTA); + else if (token == Opt_offgrpjquota) + return clear_qf_name(sb, GRPQUOTA); +#endif if (args->from && match_int(args, &arg)) return -1; switch (token) { @@ -1549,18 +1559,6 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token, sbi->s_mount_opt |= m->mount_opt; } #ifdef CONFIG_QUOTA - } else if (token == Opt_usrjquota) { - if (!set_qf_name(sb, USRQUOTA, &args[0])) - return -1; - } else if (token == Opt_grpjquota) { - if (!set_qf_name(sb, GRPQUOTA, &args[0])) - return -1; - } else if (token == Opt_offusrjquota) { - if (!clear_qf_name(sb, USRQUOTA)) - return -1; - } else if (token == Opt_offgrpjquota) { - if (!clear_qf_name(sb, GRPQUOTA)) - return -1; } else if (m->flags & MOPT_QFMT) { if (sb_any_quota_loaded(sb) && sbi->s_jquota_fmt != m->mount_opt) { @@ -1599,7 +1597,9 @@ static int parse_options(char *options, struct super_block *sb, unsigned int *journal_ioprio, int is_remount) { +#ifdef CONFIG_QUOTA struct ext4_sb_info *sbi = EXT4_SB(sb); +#endif char *p; substring_t args[MAX_OPT_ARGS]; int token; @@ -2366,18 +2366,6 @@ static ssize_t lifetime_write_kbytes_show(struct ext4_attr *a, EXT4_SB(sb)->s_sectors_written_start) >> 1))); } -static ssize_t extent_cache_hits_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_hits); -} - -static ssize_t extent_cache_misses_show(struct ext4_attr *a, - struct ext4_sb_info *sbi, char *buf) -{ - return snprintf(buf, PAGE_SIZE, "%lu\n", sbi->extent_cache_misses); -} - static ssize_t inode_readahead_blks_store(struct ext4_attr *a, struct ext4_sb_info *sbi, const char *buf, size_t count) @@ -2435,8 +2423,6 @@ static struct ext4_attr ext4_attr_##name = __ATTR(name, mode, show, store) EXT4_RO_ATTR(delayed_allocation_blocks); EXT4_RO_ATTR(session_write_kbytes); EXT4_RO_ATTR(lifetime_write_kbytes); -EXT4_RO_ATTR(extent_cache_hits); -EXT4_RO_ATTR(extent_cache_misses); EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, sbi_ui_show, inode_readahead_blks_store, s_inode_readahead_blks); EXT4_RW_ATTR_SBI_UI(inode_goal, s_inode_goal); @@ -2452,8 +2438,6 @@ static struct attribute *ext4_attrs[] = { ATTR_LIST(delayed_allocation_blocks), ATTR_LIST(session_write_kbytes), ATTR_LIST(lifetime_write_kbytes), - ATTR_LIST(extent_cache_hits), - ATTR_LIST(extent_cache_misses), ATTR_LIST(inode_readahead_blks), ATTR_LIST(inode_goal), ATTR_LIST(mb_stats), diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 206632887bb..df5ac048dc7 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -387,9 +387,6 @@ static int fuse_create_open(struct inode *dir, struct dentry *entry, if (fc->no_create) return -ENOSYS; - if (flags & O_DIRECT) - return -EINVAL; - forget = fuse_alloc_forget(); if (!forget) return -ENOMEM; @@ -644,13 +641,12 @@ static int fuse_unlink(struct inode *dir, struct dentry *entry) fuse_put_request(fc, req); if (!err) { struct inode *inode = entry->d_inode; + struct fuse_inode *fi = get_fuse_inode(inode); - /* - * Set nlink to zero so the inode can be cleared, if the inode - * does have more links this will be discovered at the next - * lookup/getattr. - */ - clear_nlink(inode); + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; + drop_nlink(inode); + spin_unlock(&fc->lock); fuse_invalidate_attr(inode); fuse_invalidate_attr(dir); fuse_invalidate_entry_cache(entry); @@ -762,8 +758,17 @@ static int fuse_link(struct dentry *entry, struct inode *newdir, will reflect changes in the backing inode (link count, etc.) */ - if (!err || err == -EINTR) + if (!err) { + struct fuse_inode *fi = get_fuse_inode(inode); + + spin_lock(&fc->lock); + fi->attr_version = ++fc->attr_version; + inc_nlink(inode); + spin_unlock(&fc->lock); + fuse_invalidate_attr(inode); + } else if (err == -EINTR) { fuse_invalidate_attr(inode); + } return err; } diff --git a/fs/fuse/file.c b/fs/fuse/file.c index a841868bf9c..504e61b7fd7 100644 --- a/fs/fuse/file.c +++ b/fs/fuse/file.c @@ -194,10 +194,6 @@ int fuse_open_common(struct inode *inode, struct file *file, bool isdir) struct fuse_conn *fc = get_fuse_conn(inode); int err; - /* VFS checks this, but only _after_ ->open() */ - if (file->f_flags & O_DIRECT) - return -EINVAL; - err = generic_file_open(inode, file); if (err) return err; @@ -932,17 +928,23 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; size_t count = 0; + size_t ocount = 0; ssize_t written = 0; + ssize_t written_buffered = 0; struct inode *inode = mapping->host; ssize_t err; struct iov_iter i; + loff_t endbyte = 0; WARN_ON(iocb->ki_pos != pos); - err = generic_segment_checks(iov, &nr_segs, &count, VERIFY_READ); + ocount = 0; + err = generic_segment_checks(iov, &nr_segs, &ocount, VERIFY_READ); if (err) return err; + count = ocount; + mutex_lock(&inode->i_mutex); vfs_check_frozen(inode->i_sb, SB_FREEZE_WRITE); @@ -962,11 +964,41 @@ static ssize_t fuse_file_aio_write(struct kiocb *iocb, const struct iovec *iov, file_update_time(file); - iov_iter_init(&i, iov, nr_segs, count, 0); - written = fuse_perform_write(file, mapping, &i, pos); - if (written >= 0) - iocb->ki_pos = pos + written; + if (file->f_flags & O_DIRECT) { + written = generic_file_direct_write(iocb, iov, &nr_segs, + pos, &iocb->ki_pos, + count, ocount); + if (written < 0 || written == count) + goto out; + + pos += written; + count -= written; + iov_iter_init(&i, iov, nr_segs, count, written); + written_buffered = fuse_perform_write(file, mapping, &i, pos); + if (written_buffered < 0) { + err = written_buffered; + goto out; + } + endbyte = pos + written_buffered - 1; + + err = filemap_write_and_wait_range(file->f_mapping, pos, + endbyte); + if (err) + goto out; + + invalidate_mapping_pages(file->f_mapping, + pos >> PAGE_CACHE_SHIFT, + endbyte >> PAGE_CACHE_SHIFT); + + written += written_buffered; + iocb->ki_pos = pos + written_buffered; + } else { + iov_iter_init(&i, iov, nr_segs, count, 0); + written = fuse_perform_write(file, mapping, &i, pos); + if (written >= 0) + iocb->ki_pos = pos + written; + } out: current->backing_dev_info = NULL; mutex_unlock(&inode->i_mutex); @@ -1101,30 +1133,41 @@ static ssize_t fuse_direct_read(struct file *file, char __user *buf, return res; } -static ssize_t fuse_direct_write(struct file *file, const char __user *buf, - size_t count, loff_t *ppos) +static ssize_t __fuse_direct_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) { struct inode *inode = file->f_path.dentry->d_inode; ssize_t res; - if (is_bad_inode(inode)) - return -EIO; - - /* Don't allow parallel writes to the same file */ - mutex_lock(&inode->i_mutex); res = generic_write_checks(file, ppos, &count, 0); if (!res) { res = fuse_direct_io(file, buf, count, ppos, 1); if (res > 0) fuse_write_update_size(inode, *ppos); } - mutex_unlock(&inode->i_mutex); fuse_invalidate_attr(inode); return res; } +static ssize_t fuse_direct_write(struct file *file, const char __user *buf, + size_t count, loff_t *ppos) +{ + struct inode *inode = file->f_path.dentry->d_inode; + ssize_t res; + + if (is_bad_inode(inode)) + return -EIO; + + /* Don't allow parallel writes to the same file */ + mutex_lock(&inode->i_mutex); + res = __fuse_direct_write(file, buf, count, ppos); + mutex_unlock(&inode->i_mutex); + + return res; +} + static void fuse_writepage_free(struct fuse_conn *fc, struct fuse_req *req) { __free_page(req->pages[0]); @@ -2077,6 +2120,57 @@ int fuse_notify_poll_wakeup(struct fuse_conn *fc, return 0; } +static ssize_t fuse_loop_dio(struct file *filp, const struct iovec *iov, + unsigned long nr_segs, loff_t *ppos, int rw) +{ + const struct iovec *vector = iov; + ssize_t ret = 0; + + while (nr_segs > 0) { + void __user *base; + size_t len; + ssize_t nr; + + base = vector->iov_base; + len = vector->iov_len; + vector++; + nr_segs--; + + if (rw == WRITE) + nr = __fuse_direct_write(filp, base, len, ppos); + else + nr = fuse_direct_read(filp, base, len, ppos); + + if (nr < 0) { + if (!ret) + ret = nr; + break; + } + ret += nr; + if (nr != len) + break; + } + + return ret; +} + + +static ssize_t +fuse_direct_IO(int rw, struct kiocb *iocb, const struct iovec *iov, + loff_t offset, unsigned long nr_segs) +{ + ssize_t ret = 0; + struct file *file = NULL; + loff_t pos = 0; + + file = iocb->ki_filp; + pos = offset; + + ret = fuse_loop_dio(file, iov, nr_segs, &pos, rw); + + return ret; +} + static const struct file_operations fuse_file_operations = { .llseek = fuse_file_llseek, .read = do_sync_read, @@ -2120,6 +2214,7 @@ static const struct address_space_operations fuse_file_aops = { .readpages = fuse_readpages, .set_page_dirty = __set_page_dirty_nobuffers, .bmap = fuse_bmap, + .direct_IO = fuse_direct_IO, }; void fuse_init_file_inode(struct inode *inode) diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index 4aec5995867..26783eb2b1f 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -947,6 +947,7 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) sb->s_magic = FUSE_SUPER_MAGIC; sb->s_op = &fuse_super_operations; sb->s_maxbytes = MAX_LFS_FILESIZE; + sb->s_time_gran = 1; sb->s_export_op = &fuse_export_operations; file = fget(d.fd); diff --git a/fs/gfs2/Kconfig b/fs/gfs2/Kconfig index c465ae066c6..eb08c9e43c2 100644 --- a/fs/gfs2/Kconfig +++ b/fs/gfs2/Kconfig @@ -1,10 +1,6 @@ config GFS2_FS tristate "GFS2 file system support" depends on (64BIT || LBDAF) - select DLM if GFS2_FS_LOCKING_DLM - select CONFIGFS_FS if GFS2_FS_LOCKING_DLM - select SYSFS if GFS2_FS_LOCKING_DLM - select IP_SCTP if DLM_SCTP select FS_POSIX_ACL select CRC32 select QUOTACTL @@ -29,7 +25,8 @@ config GFS2_FS config GFS2_FS_LOCKING_DLM bool "GFS2 DLM locking" - depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && HOTPLUG + depends on (GFS2_FS!=n) && NET && INET && (IPV6 || IPV6=n) && \ + HOTPLUG && DLM && CONFIGFS_FS && SYSFS help Multiple node locking module for GFS2 diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index 38b7a74a0f9..9b2ff0e851b 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -807,7 +807,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh, if (inode == sdp->sd_rindex) { adjust_fs_space(inode); - ip->i_gh.gh_flags |= GL_NOCACHE; + sdp->sd_rindex_uptodate = 0; } brelse(dibh); @@ -873,7 +873,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping, if (inode == sdp->sd_rindex) { adjust_fs_space(inode); - ip->i_gh.gh_flags |= GL_NOCACHE; + sdp->sd_rindex_uptodate = 0; } brelse(dibh); diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 197c5c47e57..03c04febe26 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -724,7 +724,11 @@ static int do_strip(struct gfs2_inode *ip, struct buffer_head *dibh, int metadata; unsigned int revokes = 0; int x; - int error = 0; + int error; + + error = gfs2_rindex_update(sdp); + if (error) + return error; if (!*top) sm->sm_first = 0; diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index c35573abd37..a836056343f 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1844,6 +1844,10 @@ static int leaf_dealloc(struct gfs2_inode *dip, u32 index, u32 len, unsigned int x, size = len * sizeof(u64); int error; + error = gfs2_rindex_update(sdp); + if (error) + return error; + memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); ht = kzalloc(size, GFP_NOFS); diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index c98a60ee6df..a9ba2444e07 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1031,7 +1031,13 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry) struct buffer_head *bh; struct gfs2_holder ghs[3]; struct gfs2_rgrpd *rgd; - int error = -EROFS; + int error; + + error = gfs2_rindex_update(sdp); + if (error) + return error; + + error = -EROFS; gfs2_holder_init(dip->i_gl, LM_ST_EXCLUSIVE, 0, ghs); gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, ghs + 1); @@ -1224,6 +1230,10 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, return 0; } + error = gfs2_rindex_update(sdp); + if (error) + return error; + if (odip != ndip) { error = gfs2_glock_nq_init(sdp->sd_rename_gl, LM_ST_EXCLUSIVE, 0, &r_gh); @@ -1345,7 +1355,6 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry, error = alloc_required; if (error < 0) goto out_gunlock; - error = 0; if (alloc_required) { struct gfs2_qadata *qa = gfs2_qadata_get(ndip); diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index f8411bd1b80..5f5e70e047d 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -200,10 +200,11 @@ static int make_mode(const unsigned int lmstate) return -1; } -static u32 make_flags(const u32 lkid, const unsigned int gfs_flags, +static u32 make_flags(struct gfs2_glock *gl, const unsigned int gfs_flags, const int req) { u32 lkf = DLM_LKF_VALBLK; + u32 lkid = gl->gl_lksb.sb_lkid; if (gfs_flags & LM_FLAG_TRY) lkf |= DLM_LKF_NOQUEUE; @@ -227,8 +228,11 @@ static u32 make_flags(const u32 lkid, const unsigned int gfs_flags, BUG(); } - if (lkid != 0) + if (lkid != 0) { lkf |= DLM_LKF_CONVERT; + if (test_bit(GLF_BLOCKING, &gl->gl_flags)) + lkf |= DLM_LKF_QUECVT; + } return lkf; } @@ -250,7 +254,7 @@ static int gdlm_lock(struct gfs2_glock *gl, unsigned int req_state, char strname[GDLM_STRNAME_BYTES] = ""; req = make_mode(req_state); - lkf = make_flags(gl->gl_lksb.sb_lkid, flags, req); + lkf = make_flags(gl, flags, req); gfs2_glstats_inc(gl, GFS2_LKS_DCOUNT); gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT); if (gl->gl_lksb.sb_lkid) { diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c index 19bde40b486..3df65c9ab73 100644 --- a/fs/gfs2/rgrp.c +++ b/fs/gfs2/rgrp.c @@ -332,9 +332,6 @@ struct gfs2_rgrpd *gfs2_blk2rgrpd(struct gfs2_sbd *sdp, u64 blk, bool exact) struct rb_node *n, *next; struct gfs2_rgrpd *cur; - if (gfs2_rindex_update(sdp)) - return NULL; - spin_lock(&sdp->sd_rindex_spin); n = sdp->sd_rindex_tree.rb_node; while (n) { @@ -640,6 +637,7 @@ static int read_rindex_entry(struct gfs2_inode *ip, return 0; error = 0; /* someone else read in the rgrp; free it and ignore it */ + gfs2_glock_put(rgd->rd_gl); fail: kfree(rgd->rd_bits); @@ -927,6 +925,10 @@ int gfs2_fitrim(struct file *filp, void __user *argp) } else if (copy_from_user(&r, argp, sizeof(r))) return -EFAULT; + ret = gfs2_rindex_update(sdp); + if (ret) + return ret; + rgd = gfs2_blk2rgrpd(sdp, r.start, 0); rgd_end = gfs2_blk2rgrpd(sdp, r.start + r.len, 0); diff --git a/fs/gfs2/xattr.c b/fs/gfs2/xattr.c index 2e5ba425cae..927f4df874a 100644 --- a/fs/gfs2/xattr.c +++ b/fs/gfs2/xattr.c @@ -238,6 +238,10 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh, unsigned int x; int error; + error = gfs2_rindex_update(sdp); + if (error) + return error; + if (GFS2_EA_IS_STUFFED(ea)) return 0; @@ -1330,6 +1334,10 @@ static int ea_dealloc_indirect(struct gfs2_inode *ip) unsigned int x; int error; + error = gfs2_rindex_update(sdp); + if (error) + return error; + memset(&rlist, 0, sizeof(struct gfs2_rgrp_list)); error = gfs2_meta_read(ip->i_gl, ip->i_eattr, DIO_WAIT, &indbh); @@ -1439,6 +1447,10 @@ static int ea_dealloc_block(struct gfs2_inode *ip) struct gfs2_holder gh; int error; + error = gfs2_rindex_update(sdp); + if (error) + return error; + rgd = gfs2_blk2rgrpd(sdp, ip->i_eattr, 1); if (!rgd) { gfs2_consist_inode(ip); diff --git a/fs/hfsplus/catalog.c b/fs/hfsplus/catalog.c index 4dfbfec357e..ec2a9c23f0c 100644 --- a/fs/hfsplus/catalog.c +++ b/fs/hfsplus/catalog.c @@ -366,6 +366,10 @@ int hfsplus_rename_cat(u32 cnid, err = hfs_brec_find(&src_fd); if (err) goto out; + if (src_fd.entrylength > sizeof(entry) || src_fd.entrylength < 0) { + err = -EIO; + goto out; + } hfs_bnode_read(src_fd.bnode, &entry, src_fd.entryoffset, src_fd.entrylength); diff --git a/fs/hfsplus/dir.c b/fs/hfsplus/dir.c index 88e155f895c..26b53fb09f6 100644 --- a/fs/hfsplus/dir.c +++ b/fs/hfsplus/dir.c @@ -150,6 +150,11 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) filp->f_pos++; /* fall through */ case 1: + if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { + err = -EIO; + goto out; + } + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); if (be16_to_cpu(entry.type) != HFSPLUS_FOLDER_THREAD) { @@ -181,6 +186,12 @@ static int hfsplus_readdir(struct file *filp, void *dirent, filldir_t filldir) err = -EIO; goto out; } + + if (fd.entrylength > sizeof(entry) || fd.entrylength < 0) { + err = -EIO; + goto out; + } + hfs_bnode_read(fd.bnode, &entry, fd.entryoffset, fd.entrylength); type = be16_to_cpu(entry.type); diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 28cf06e4ec8..001ef01d2fe 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -485,6 +485,7 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb, inode->i_fop = &simple_dir_operations; /* directory inodes start off with i_nlink == 2 (for "." entry) */ inc_nlink(inode); + lockdep_annotate_inode_mutex_key(inode); } return inode; } diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index 806525a7269..840f70f5079 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -723,7 +723,7 @@ start_journal_io: if (commit_transaction->t_need_data_flush && (journal->j_fs_dev != journal->j_dev) && (journal->j_flags & JBD2_BARRIER)) - blkdev_issue_flush(journal->j_fs_dev, GFP_KERNEL, NULL); + blkdev_issue_flush(journal->j_fs_dev, GFP_NOFS, NULL); /* Done it all: now write the commit record asynchronously. */ if (JBD2_HAS_INCOMPAT_FEATURE(journal, @@ -859,7 +859,7 @@ wait_for_iobuf: if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_ASYNC_COMMIT) && journal->j_flags & JBD2_BARRIER) { - blkdev_issue_flush(journal->j_dev, GFP_KERNEL, NULL); + blkdev_issue_flush(journal->j_dev, GFP_NOFS, NULL); } if (err) diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c index ad271c70aa2..5a2dec2b064 100644 --- a/fs/jffs2/gc.c +++ b/fs/jffs2/gc.c @@ -234,8 +234,8 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c) return 0; jffs2_dbg(1, "No progress from erasing block; doing GC anyway\n"); - spin_lock(&c->erase_completion_lock); mutex_lock(&c->alloc_sem); + spin_lock(&c->erase_completion_lock); } /* First, work out which block we're garbage-collecting */ diff --git a/fs/libfs.c b/fs/libfs.c index 358094f0433..18d08f5db53 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -529,6 +529,7 @@ int simple_fill_super(struct super_block *s, unsigned long magic, return 0; out: d_genocide(root); + shrink_dcache_parent(root); dput(root); return -ENOMEM; } diff --git a/fs/lockd/clnt4xdr.c b/fs/lockd/clnt4xdr.c index 3ddcbb1c0a4..13ad1539fbf 100644 --- a/fs/lockd/clnt4xdr.c +++ b/fs/lockd/clnt4xdr.c @@ -241,7 +241,7 @@ static int decode_nlm4_stat(struct xdr_stream *xdr, __be32 *stat) p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) goto out_overflow; - if (unlikely(*p > nlm4_failed)) + if (unlikely(ntohl(*p) > ntohl(nlm4_failed))) goto out_bad_xdr; *stat = *p; return 0; diff --git a/fs/lockd/clntxdr.c b/fs/lockd/clntxdr.c index 3d35e3e80c1..d269ada7670 100644 --- a/fs/lockd/clntxdr.c +++ b/fs/lockd/clntxdr.c @@ -236,7 +236,7 @@ static int decode_nlm_stat(struct xdr_stream *xdr, p = xdr_inline_decode(xdr, 4); if (unlikely(p == NULL)) goto out_overflow; - if (unlikely(*p > nlm_lck_denied_grace_period)) + if (unlikely(ntohl(*p) > ntohl(nlm_lck_denied_grace_period))) goto out_enum; *stat = *p; return 0; diff --git a/fs/namei.c b/fs/namei.c index 0062dd17eb5..c42791914f8 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1429,7 +1429,7 @@ unsigned int full_name_hash(const unsigned char *name, unsigned int len) unsigned long hash = 0; for (;;) { - a = *(unsigned long *)name; + a = load_unaligned_zeropad(name); if (len < sizeof(unsigned long)) break; hash += a; @@ -1459,7 +1459,7 @@ static inline unsigned long hash_name(const char *name, unsigned int *hashp) do { hash = (hash + a) * 9; len += sizeof(unsigned long); - a = *(unsigned long *)(name+len); + a = load_unaligned_zeropad(name+len); /* Do we have any NUL or '/' bytes in this word? */ mask = has_zero(a) | has_zero(a ^ REPEAT_BYTE('/')); } while (!mask); diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c index 9c94297bb70..7f6a23f0244 100644 --- a/fs/nfs/blocklayout/blocklayout.c +++ b/fs/nfs/blocklayout/blocklayout.c @@ -38,6 +38,8 @@ #include <linux/buffer_head.h> /* various write calls */ #include <linux/prefetch.h> +#include "../pnfs.h" +#include "../internal.h" #include "blocklayout.h" #define NFSDBG_FACILITY NFSDBG_PNFS_LD @@ -868,7 +870,7 @@ nfs4_blk_get_deviceinfo(struct nfs_server *server, const struct nfs_fh *fh, * GETDEVICEINFO's maxcount */ max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - max_pages = max_resp_sz >> PAGE_SHIFT; + max_pages = nfs_page_array_len(0, max_resp_sz); dprintk("%s max_resp_sz %u max_pages %d\n", __func__, max_resp_sz, max_pages); diff --git a/fs/nfs/client.c b/fs/nfs/client.c index da7b5e4ff9e..60f7e4ec842 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -1729,7 +1729,8 @@ error: */ struct nfs_server *nfs_clone_server(struct nfs_server *source, struct nfs_fh *fh, - struct nfs_fattr *fattr) + struct nfs_fattr *fattr, + rpc_authflavor_t flavor) { struct nfs_server *server; struct nfs_fattr *fattr_fsinfo; @@ -1758,7 +1759,7 @@ struct nfs_server *nfs_clone_server(struct nfs_server *source, error = nfs_init_server_rpcclient(server, source->client->cl_timeout, - source->client->cl_auth->au_flavor); + flavor); if (error < 0) goto out_free_server; if (!IS_ERR(source->client_acl)) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 4aaf0316d76..8789210c690 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1429,7 +1429,7 @@ static struct dentry *nfs_atomic_lookup(struct inode *dir, struct dentry *dentry } open_flags = nd->intent.open.flags; - attr.ia_valid = 0; + attr.ia_valid = ATTR_OPEN; ctx = create_nfs_open_context(dentry, open_flags); res = ERR_CAST(ctx); @@ -1536,7 +1536,7 @@ static int nfs_open_revalidate(struct dentry *dentry, struct nameidata *nd) if (IS_ERR(ctx)) goto out; - attr.ia_valid = 0; + attr.ia_valid = ATTR_OPEN; if (openflags & O_TRUNC) { attr.ia_valid |= ATTR_SIZE; attr.ia_size = 0; diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c index b7f348bb618..ba3019f5934 100644 --- a/fs/nfs/idmap.c +++ b/fs/nfs/idmap.c @@ -554,12 +554,16 @@ static int rpc_pipefs_event(struct notifier_block *nb, unsigned long event, struct nfs_client *clp; int error = 0; + if (!try_module_get(THIS_MODULE)) + return 0; + while ((clp = nfs_get_client_for_event(sb->s_fs_info, event))) { error = __rpc_pipefs_event(clp, event, sb); nfs_put_client(clp); if (error) break; } + module_put(THIS_MODULE); return error; } diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 2476dc69365..b777bdaba4c 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -165,7 +165,8 @@ extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, - struct nfs_fattr *); + struct nfs_fattr *, + rpc_authflavor_t); extern void nfs_mark_client_ready(struct nfs_client *clp, int state); extern int nfs4_check_client_ready(struct nfs_client *clp); extern struct nfs_client *nfs4_set_ds_client(struct nfs_client* mds_clp, @@ -186,10 +187,10 @@ static inline void nfs_fs_proc_exit(void) /* nfs4namespace.c */ #ifdef CONFIG_NFS_V4 -extern struct vfsmount *nfs_do_refmount(struct dentry *dentry); +extern struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry); #else static inline -struct vfsmount *nfs_do_refmount(struct dentry *dentry) +struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) { return ERR_PTR(-ENOENT); } @@ -234,7 +235,6 @@ extern const u32 nfs41_maxwrite_overhead; /* nfs4proc.c */ #ifdef CONFIG_NFS_V4 extern struct rpc_procinfo nfs4_procedures[]; -void nfs_fixup_secinfo_attributes(struct nfs_fattr *, struct nfs_fh *); #endif extern int nfs4_init_ds_session(struct nfs_client *clp); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index 1807866bb3a..d51868e5683 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -148,66 +148,31 @@ rpc_authflavor_t nfs_find_best_sec(struct nfs4_secinfo_flavors *flavors) return pseudoflavor; } -static int nfs_negotiate_security(const struct dentry *parent, - const struct dentry *dentry, - rpc_authflavor_t *flavor) +static struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, + struct qstr *name, + struct nfs_fh *fh, + struct nfs_fattr *fattr) { - struct page *page; - struct nfs4_secinfo_flavors *flavors; - int (*secinfo)(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); - int ret = -EPERM; - - secinfo = NFS_PROTO(parent->d_inode)->secinfo; - if (secinfo != NULL) { - page = alloc_page(GFP_KERNEL); - if (!page) { - ret = -ENOMEM; - goto out; - } - flavors = page_address(page); - ret = secinfo(parent->d_inode, &dentry->d_name, flavors); - *flavor = nfs_find_best_sec(flavors); - put_page(page); - } - -out: - return ret; -} - -static int nfs_lookup_with_sec(struct nfs_server *server, struct dentry *parent, - struct dentry *dentry, struct path *path, - struct nfs_fh *fh, struct nfs_fattr *fattr, - rpc_authflavor_t *flavor) -{ - struct rpc_clnt *clone; - struct rpc_auth *auth; int err; - err = nfs_negotiate_security(parent, path->dentry, flavor); - if (err < 0) - goto out; - clone = rpc_clone_client(server->client); - auth = rpcauth_create(*flavor, clone); - if (!auth) { - err = -EIO; - goto out_shutdown; - } - err = server->nfs_client->rpc_ops->lookup(clone, parent->d_inode, - &path->dentry->d_name, - fh, fattr); -out_shutdown: - rpc_shutdown_client(clone); -out: - return err; + if (NFS_PROTO(dir)->version == 4) + return nfs4_proc_lookup_mountpoint(dir, name, fh, fattr); + + err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr); + if (err) + return ERR_PTR(err); + return rpc_clone_client(NFS_SERVER(dir)->client); } #else /* CONFIG_NFS_V4 */ -static inline int nfs_lookup_with_sec(struct nfs_server *server, - struct dentry *parent, struct dentry *dentry, - struct path *path, struct nfs_fh *fh, - struct nfs_fattr *fattr, - rpc_authflavor_t *flavor) +static inline struct rpc_clnt *nfs_lookup_mountpoint(struct inode *dir, + struct qstr *name, + struct nfs_fh *fh, + struct nfs_fattr *fattr) { - return -EPERM; + int err = NFS_PROTO(dir)->lookup(NFS_SERVER(dir)->client, dir, name, fh, fattr); + if (err) + return ERR_PTR(err); + return rpc_clone_client(NFS_SERVER(dir)->client); } #endif /* CONFIG_NFS_V4 */ @@ -226,12 +191,10 @@ static inline int nfs_lookup_with_sec(struct nfs_server *server, struct vfsmount *nfs_d_automount(struct path *path) { struct vfsmount *mnt; - struct nfs_server *server = NFS_SERVER(path->dentry->d_inode); struct dentry *parent; struct nfs_fh *fh = NULL; struct nfs_fattr *fattr = NULL; - int err; - rpc_authflavor_t flavor = RPC_AUTH_UNIX; + struct rpc_clnt *client; dprintk("--> nfs_d_automount()\n"); @@ -249,21 +212,19 @@ struct vfsmount *nfs_d_automount(struct path *path) /* Look it up again to get its attributes */ parent = dget_parent(path->dentry); - err = server->nfs_client->rpc_ops->lookup(server->client, parent->d_inode, - &path->dentry->d_name, - fh, fattr); - if (err == -EPERM && NFS_PROTO(parent->d_inode)->secinfo != NULL) - err = nfs_lookup_with_sec(server, parent, path->dentry, path, fh, fattr, &flavor); + client = nfs_lookup_mountpoint(parent->d_inode, &path->dentry->d_name, fh, fattr); dput(parent); - if (err != 0) { - mnt = ERR_PTR(err); + if (IS_ERR(client)) { + mnt = ERR_CAST(client); goto out; } if (fattr->valid & NFS_ATTR_FATTR_V4_REFERRAL) - mnt = nfs_do_refmount(path->dentry); + mnt = nfs_do_refmount(client, path->dentry); else - mnt = nfs_do_submount(path->dentry, fh, fattr, flavor); + mnt = nfs_do_submount(path->dentry, fh, fattr, client->cl_auth->au_flavor); + rpc_shutdown_client(client); + if (IS_ERR(mnt)) goto out; diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h index 97ecc863dd7..8d75021020b 100644 --- a/fs/nfs/nfs4_fs.h +++ b/fs/nfs/nfs4_fs.h @@ -59,6 +59,7 @@ struct nfs_unique_id { #define NFS_SEQID_CONFIRMED 1 struct nfs_seqid_counter { + ktime_t create_time; int owner_id; int flags; u32 counter; @@ -204,6 +205,9 @@ struct nfs4_state_maintenance_ops { extern const struct dentry_operations nfs4_dentry_operations; extern const struct inode_operations nfs4_dir_inode_operations; +/* nfs4namespace.c */ +struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *, struct inode *, struct qstr *); + /* nfs4proc.c */ extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *); extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *); @@ -212,8 +216,11 @@ extern int nfs4_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs41_init_clientid(struct nfs_client *, struct rpc_cred *); extern int nfs4_do_close(struct nfs4_state *state, gfp_t gfp_mask, int wait, bool roc); extern int nfs4_server_capabilities(struct nfs_server *server, struct nfs_fh *fhandle); -extern int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, - struct nfs4_fs_locations *fs_locations, struct page *page); +extern int nfs4_proc_fs_locations(struct rpc_clnt *, struct inode *, const struct qstr *, + struct nfs4_fs_locations *, struct page *); +extern struct rpc_clnt *nfs4_proc_lookup_mountpoint(struct inode *, struct qstr *, + struct nfs_fh *, struct nfs_fattr *); +extern int nfs4_proc_secinfo(struct inode *, const struct qstr *, struct nfs4_secinfo_flavors *); extern int nfs4_release_lockowner(struct nfs4_lock_state *); extern const struct xattr_handler *nfs4_xattr_handlers[]; diff --git a/fs/nfs/nfs4filelayoutdev.c b/fs/nfs/nfs4filelayoutdev.c index a866bbd2890..c9cff9adb2d 100644 --- a/fs/nfs/nfs4filelayoutdev.c +++ b/fs/nfs/nfs4filelayoutdev.c @@ -699,7 +699,7 @@ get_device_info(struct inode *inode, struct nfs4_deviceid *dev_id, gfp_t gfp_fla * GETDEVICEINFO's maxcount */ max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - max_pages = max_resp_sz >> PAGE_SHIFT; + max_pages = nfs_page_array_len(0, max_resp_sz); dprintk("%s inode %p max_resp_sz %u max_pages %d\n", __func__, inode, max_resp_sz, max_pages); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 9c8eca315f4..a7f3dedc4ec 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -52,6 +52,30 @@ Elong: } /* + * return the path component of "<server>:<path>" + * nfspath - the "<server>:<path>" string + * end - one past the last char that could contain "<server>:" + * returns NULL on failure + */ +static char *nfs_path_component(const char *nfspath, const char *end) +{ + char *p; + + if (*nfspath == '[') { + /* parse [] escaped IPv6 addrs */ + p = strchr(nfspath, ']'); + if (p != NULL && ++p < end && *p == ':') + return p + 1; + } else { + /* otherwise split on first colon */ + p = strchr(nfspath, ':'); + if (p != NULL && p < end) + return p + 1; + } + return NULL; +} + +/* * Determine the mount path as a string */ static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) @@ -59,9 +83,9 @@ static char *nfs4_path(struct dentry *dentry, char *buffer, ssize_t buflen) char *limit; char *path = nfs_path(&limit, dentry, buffer, buflen); if (!IS_ERR(path)) { - char *colon = strchr(path, ':'); - if (colon && colon < limit) - path = colon + 1; + char *path_component = nfs_path_component(path, limit); + if (path_component) + return path_component; } return path; } @@ -108,6 +132,58 @@ static size_t nfs_parse_server_name(char *string, size_t len, return ret; } +static rpc_authflavor_t nfs4_negotiate_security(struct inode *inode, struct qstr *name) +{ + struct page *page; + struct nfs4_secinfo_flavors *flavors; + rpc_authflavor_t flavor; + int err; + + page = alloc_page(GFP_KERNEL); + if (!page) + return -ENOMEM; + flavors = page_address(page); + + err = nfs4_proc_secinfo(inode, name, flavors); + if (err < 0) { + flavor = err; + goto out; + } + + flavor = nfs_find_best_sec(flavors); + +out: + put_page(page); + return flavor; +} + +/* + * Please call rpc_shutdown_client() when you are done with this client. + */ +struct rpc_clnt *nfs4_create_sec_client(struct rpc_clnt *clnt, struct inode *inode, + struct qstr *name) +{ + struct rpc_clnt *clone; + struct rpc_auth *auth; + rpc_authflavor_t flavor; + + flavor = nfs4_negotiate_security(inode, name); + if (flavor < 0) + return ERR_PTR(flavor); + + clone = rpc_clone_client(clnt); + if (IS_ERR(clone)) + return clone; + + auth = rpcauth_create(flavor, clone); + if (!auth) { + rpc_shutdown_client(clone); + clone = ERR_PTR(-EIO); + } + + return clone; +} + static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, char *page, char *page2, const struct nfs4_fs_location *location) @@ -224,7 +300,7 @@ out: * @dentry - dentry of referral * */ -struct vfsmount *nfs_do_refmount(struct dentry *dentry) +struct vfsmount *nfs_do_refmount(struct rpc_clnt *client, struct dentry *dentry) { struct vfsmount *mnt = ERR_PTR(-ENOMEM); struct dentry *parent; @@ -250,7 +326,7 @@ struct vfsmount *nfs_do_refmount(struct dentry *dentry) dprintk("%s: getting locations for %s/%s\n", __func__, parent->d_name.name, dentry->d_name.name); - err = nfs4_proc_fs_locations(parent->d_inode, &dentry->d_name, fs_locations, page); + err = nfs4_proc_fs_locations(client, parent->d_inode, &dentry->d_name, fs_locations, page); dput(parent); if (err != 0 || fs_locations->nlocations <= 0 || diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index f82bde005a8..99650aaf893 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -838,7 +838,8 @@ static struct nfs4_opendata *nfs4_opendata_alloc(struct dentry *dentry, p->o_arg.open_flags = flags; p->o_arg.fmode = fmode & (FMODE_READ|FMODE_WRITE); p->o_arg.clientid = server->nfs_client->cl_clientid; - p->o_arg.id = sp->so_seqid.owner_id; + p->o_arg.id.create_time = ktime_to_ns(sp->so_seqid.create_time); + p->o_arg.id.uniquifier = sp->so_seqid.owner_id; p->o_arg.name = &dentry->d_name; p->o_arg.server = server; p->o_arg.bitmask = server->attr_bitmask; @@ -1466,8 +1467,7 @@ static void nfs4_open_prepare(struct rpc_task *task, void *calldata) goto unlock_no_action; rcu_read_unlock(); } - /* Update sequence id. */ - data->o_arg.id = sp->so_seqid.owner_id; + /* Update client id. */ data->o_arg.clientid = sp->so_server->nfs_client->cl_clientid; if (data->o_arg.claim == NFS4_OPEN_CLAIM_PREVIOUS) { task->tk_msg.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_OPEN_NOATTR]; @@ -1954,10 +1954,19 @@ static int nfs4_do_setattr(struct inode *inode, struct rpc_cred *cred, }; int err; do { - err = nfs4_handle_exception(server, - _nfs4_do_setattr(inode, cred, fattr, sattr, state), - &exception); + err = _nfs4_do_setattr(inode, cred, fattr, sattr, state); + switch (err) { + case -NFS4ERR_OPENMODE: + if (state && !(state->state & FMODE_WRITE)) { + err = -EBADF; + if (sattr->ia_valid & ATTR_OPEN) + err = -EACCES; + goto out; + } + } + err = nfs4_handle_exception(server, err, &exception); } while (exception.retry); +out: return err; } @@ -2368,8 +2377,9 @@ static int nfs4_proc_get_root(struct nfs_server *server, struct nfs_fh *fhandle, * Note that we'll actually follow the referral later when * we detect fsid mismatch in inode revalidation */ -static int nfs4_get_referral(struct inode *dir, const struct qstr *name, - struct nfs_fattr *fattr, struct nfs_fh *fhandle) +static int nfs4_get_referral(struct rpc_clnt *client, struct inode *dir, + const struct qstr *name, struct nfs_fattr *fattr, + struct nfs_fh *fhandle) { int status = -ENOMEM; struct page *page = NULL; @@ -2382,7 +2392,7 @@ static int nfs4_get_referral(struct inode *dir, const struct qstr *name, if (locations == NULL) goto out; - status = nfs4_proc_fs_locations(dir, name, locations, page); + status = nfs4_proc_fs_locations(client, dir, name, locations, page); if (status != 0) goto out; /* Make sure server returned a different fsid for the referral */ @@ -2519,39 +2529,84 @@ static int _nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, return status; } -void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr, struct nfs_fh *fh) +static void nfs_fixup_secinfo_attributes(struct nfs_fattr *fattr) { - memset(fh, 0, sizeof(struct nfs_fh)); - fattr->fsid.major = 1; fattr->valid |= NFS_ATTR_FATTR_TYPE | NFS_ATTR_FATTR_MODE | - NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_FSID | NFS_ATTR_FATTR_MOUNTPOINT; + NFS_ATTR_FATTR_NLINK | NFS_ATTR_FATTR_MOUNTPOINT; fattr->mode = S_IFDIR | S_IRUGO | S_IXUGO; fattr->nlink = 2; } -static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, - struct nfs_fh *fhandle, struct nfs_fattr *fattr) +static int nfs4_proc_lookup_common(struct rpc_clnt **clnt, struct inode *dir, + struct qstr *name, struct nfs_fh *fhandle, + struct nfs_fattr *fattr) { struct nfs4_exception exception = { }; + struct rpc_clnt *client = *clnt; int err; do { - int status; - - status = _nfs4_proc_lookup(clnt, dir, name, fhandle, fattr); - switch (status) { + err = _nfs4_proc_lookup(client, dir, name, fhandle, fattr); + switch (err) { case -NFS4ERR_BADNAME: - return -ENOENT; + err = -ENOENT; + goto out; case -NFS4ERR_MOVED: - return nfs4_get_referral(dir, name, fattr, fhandle); + err = nfs4_get_referral(client, dir, name, fattr, fhandle); + goto out; case -NFS4ERR_WRONGSEC: - nfs_fixup_secinfo_attributes(fattr, fhandle); + err = -EPERM; + if (client != *clnt) + goto out; + + client = nfs4_create_sec_client(client, dir, name); + if (IS_ERR(client)) + return PTR_ERR(client); + + exception.retry = 1; + break; + default: + err = nfs4_handle_exception(NFS_SERVER(dir), err, &exception); } - err = nfs4_handle_exception(NFS_SERVER(dir), - status, &exception); } while (exception.retry); + +out: + if (err == 0) + *clnt = client; + else if (client != *clnt) + rpc_shutdown_client(client); + return err; } +static int nfs4_proc_lookup(struct rpc_clnt *clnt, struct inode *dir, struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + int status; + struct rpc_clnt *client = NFS_CLIENT(dir); + + status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr); + if (client != NFS_CLIENT(dir)) { + rpc_shutdown_client(client); + nfs_fixup_secinfo_attributes(fattr); + } + return status; +} + +struct rpc_clnt * +nfs4_proc_lookup_mountpoint(struct inode *dir, struct qstr *name, + struct nfs_fh *fhandle, struct nfs_fattr *fattr) +{ + int status; + struct rpc_clnt *client = rpc_clone_client(NFS_CLIENT(dir)); + + status = nfs4_proc_lookup_common(&client, dir, name, fhandle, fattr); + if (status < 0) { + rpc_shutdown_client(client); + return ERR_PTR(status); + } + return client; +} + static int _nfs4_proc_access(struct inode *inode, struct nfs_access_entry *entry) { struct nfs_server *server = NFS_SERVER(inode); @@ -3619,16 +3674,16 @@ out: return ret; } -static void nfs4_write_cached_acl(struct inode *inode, const char *buf, size_t acl_len) +static void nfs4_write_cached_acl(struct inode *inode, struct page **pages, size_t pgbase, size_t acl_len) { struct nfs4_cached_acl *acl; - if (buf && acl_len <= PAGE_SIZE) { + if (pages && acl_len <= PAGE_SIZE) { acl = kmalloc(sizeof(*acl) + acl_len, GFP_KERNEL); if (acl == NULL) goto out; acl->cached = 1; - memcpy(acl->data, buf, acl_len); + _copy_from_pages(acl->data, pages, pgbase, acl_len); } else { acl = kmalloc(sizeof(*acl), GFP_KERNEL); if (acl == NULL) @@ -3661,7 +3716,6 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu struct nfs_getaclres res = { .acl_len = buflen, }; - void *resp_buf; struct rpc_message msg = { .rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETACL], .rpc_argp = &args, @@ -3675,24 +3729,27 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu if (npages == 0) npages = 1; + /* Add an extra page to handle the bitmap returned */ + npages++; + for (i = 0; i < npages; i++) { pages[i] = alloc_page(GFP_KERNEL); if (!pages[i]) goto out_free; } - if (npages > 1) { - /* for decoding across pages */ - res.acl_scratch = alloc_page(GFP_KERNEL); - if (!res.acl_scratch) - goto out_free; - } + + /* for decoding across pages */ + res.acl_scratch = alloc_page(GFP_KERNEL); + if (!res.acl_scratch) + goto out_free; + args.acl_len = npages * PAGE_SIZE; args.acl_pgbase = 0; + /* Let decode_getfacl know not to fail if the ACL data is larger than * the page we send as a guess */ if (buf == NULL) res.acl_flags |= NFS4_ACL_LEN_REQUEST; - resp_buf = page_address(pages[0]); dprintk("%s buf %p buflen %zu npages %d args.acl_len %zu\n", __func__, buf, buflen, npages, args.acl_len); @@ -3703,9 +3760,9 @@ static ssize_t __nfs4_get_acl_uncached(struct inode *inode, void *buf, size_t bu acl_len = res.acl_len - res.acl_data_offset; if (acl_len > args.acl_len) - nfs4_write_cached_acl(inode, NULL, acl_len); + nfs4_write_cached_acl(inode, NULL, 0, acl_len); else - nfs4_write_cached_acl(inode, resp_buf + res.acl_data_offset, + nfs4_write_cached_acl(inode, pages, res.acl_data_offset, acl_len); if (buf) { ret = -ERANGE; @@ -4558,7 +4615,9 @@ static int _nfs4_do_setlk(struct nfs4_state *state, int cmd, struct file_lock *f static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request) { struct nfs_server *server = NFS_SERVER(state->inode); - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .inode = state->inode, + }; int err; do { @@ -4576,7 +4635,9 @@ static int nfs4_lock_reclaim(struct nfs4_state *state, struct file_lock *request static int nfs4_lock_expired(struct nfs4_state *state, struct file_lock *request) { struct nfs_server *server = NFS_SERVER(state->inode); - struct nfs4_exception exception = { }; + struct nfs4_exception exception = { + .inode = state->inode, + }; int err; err = nfs4_set_lock_state(state, request); @@ -4676,6 +4737,7 @@ static int nfs4_proc_setlk(struct nfs4_state *state, int cmd, struct file_lock * { struct nfs4_exception exception = { .state = state, + .inode = state->inode, }; int err; @@ -4721,6 +4783,20 @@ nfs4_proc_lock(struct file *filp, int cmd, struct file_lock *request) if (state == NULL) return -ENOLCK; + /* + * Don't rely on the VFS having checked the file open mode, + * since it won't do this for flock() locks. + */ + switch (request->fl_type & (F_RDLCK|F_WRLCK|F_UNLCK)) { + case F_RDLCK: + if (!(filp->f_mode & FMODE_READ)) + return -EBADF; + break; + case F_WRLCK: + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; + } + do { status = nfs4_proc_setlk(state, cmd, request); if ((status != -EAGAIN) || IS_SETLK(cmd)) @@ -4891,8 +4967,10 @@ static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr) fattr->nlink = 2; } -int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, - struct nfs4_fs_locations *fs_locations, struct page *page) +static int _nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, + const struct qstr *name, + struct nfs4_fs_locations *fs_locations, + struct page *page) { struct nfs_server *server = NFS_SERVER(dir); u32 bitmask[2] = { @@ -4926,11 +5004,26 @@ int nfs4_proc_fs_locations(struct inode *dir, const struct qstr *name, nfs_fattr_init(&fs_locations->fattr); fs_locations->server = server; fs_locations->nlocations = 0; - status = nfs4_call_sync(server->client, server, &msg, &args.seq_args, &res.seq_res, 0); + status = nfs4_call_sync(client, server, &msg, &args.seq_args, &res.seq_res, 0); dprintk("%s: returned status = %d\n", __func__, status); return status; } +int nfs4_proc_fs_locations(struct rpc_clnt *client, struct inode *dir, + const struct qstr *name, + struct nfs4_fs_locations *fs_locations, + struct page *page) +{ + struct nfs4_exception exception = { }; + int err; + do { + err = nfs4_handle_exception(NFS_SERVER(dir), + _nfs4_proc_fs_locations(client, dir, name, fs_locations, page), + &exception); + } while (exception.retry); + return err; +} + static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct nfs4_secinfo_flavors *flavors) { int status; @@ -4953,8 +5046,8 @@ static int _nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, struct return status; } -static int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, - struct nfs4_secinfo_flavors *flavors) +int nfs4_proc_secinfo(struct inode *dir, const struct qstr *name, + struct nfs4_secinfo_flavors *flavors) { struct nfs4_exception exception = { }; int err; @@ -5029,10 +5122,9 @@ int nfs4_proc_exchange_id(struct nfs_client *clp, struct rpc_cred *cred) nfs4_construct_boot_verifier(clp, &verifier); args.id_len = scnprintf(args.id, sizeof(args.id), - "%s/%s.%s/%u", + "%s/%s/%u", clp->cl_ipaddr, - init_utsname()->nodename, - init_utsname()->domainname, + clp->cl_rpcclient->cl_nodename, clp->cl_rpcclient->cl_auth->au_flavor); res.server_scope = kzalloc(sizeof(struct server_scope), GFP_KERNEL); diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index 0f43414eb25..7f0fcfc1fe9 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -393,6 +393,7 @@ nfs4_remove_state_owner_locked(struct nfs4_state_owner *sp) static void nfs4_init_seqid_counter(struct nfs_seqid_counter *sc) { + sc->create_time = ktime_get(); sc->flags = 0; sc->counter = 0; spin_lock_init(&sc->lock); @@ -434,13 +435,17 @@ nfs4_alloc_state_owner(struct nfs_server *server, static void nfs4_drop_state_owner(struct nfs4_state_owner *sp) { - if (!RB_EMPTY_NODE(&sp->so_server_node)) { + struct rb_node *rb_node = &sp->so_server_node; + + if (!RB_EMPTY_NODE(rb_node)) { struct nfs_server *server = sp->so_server; struct nfs_client *clp = server->nfs_client; spin_lock(&clp->cl_lock); - rb_erase(&sp->so_server_node, &server->state_owners); - RB_CLEAR_NODE(&sp->so_server_node); + if (!RB_EMPTY_NODE(rb_node)) { + rb_erase(rb_node, &server->state_owners); + RB_CLEAR_NODE(rb_node); + } spin_unlock(&clp->cl_lock); } } @@ -516,6 +521,14 @@ out: /** * nfs4_put_state_owner - Release a nfs4_state_owner * @sp: state owner data to release + * + * Note that we keep released state owners on an LRU + * list. + * This caches valid state owners so that they can be + * reused, to avoid the OPEN_CONFIRM on minor version 0. + * It also pins the uniquifier of dropped state owners for + * a while, to ensure that those state owner names are + * never reused. */ void nfs4_put_state_owner(struct nfs4_state_owner *sp) { @@ -525,15 +538,9 @@ void nfs4_put_state_owner(struct nfs4_state_owner *sp) if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock)) return; - if (!RB_EMPTY_NODE(&sp->so_server_node)) { - sp->so_expires = jiffies; - list_add_tail(&sp->so_lru, &server->state_owners_lru); - spin_unlock(&clp->cl_lock); - } else { - nfs4_remove_state_owner_locked(sp); - spin_unlock(&clp->cl_lock); - nfs4_free_state_owner(sp); - } + sp->so_expires = jiffies; + list_add_tail(&sp->so_lru, &server->state_owners_lru); + spin_unlock(&clp->cl_lock); } /** diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c index c74fdb114b4..c54aae364be 100644 --- a/fs/nfs/nfs4xdr.c +++ b/fs/nfs/nfs4xdr.c @@ -74,7 +74,7 @@ static int nfs4_stat_to_errno(int); /* lock,open owner id: * we currently use size 2 (u64) out of (NFS4_OPAQUE_LIMIT >> 2) */ -#define open_owner_id_maxsz (1 + 1 + 4) +#define open_owner_id_maxsz (1 + 2 + 1 + 1 + 2) #define lock_owner_id_maxsz (1 + 1 + 4) #define decode_lockowner_maxsz (1 + XDR_QUADLEN(IDMAP_NAMESZ)) #define compound_encode_hdr_maxsz (3 + (NFS4_MAXTAGLEN >> 2)) @@ -1340,12 +1340,13 @@ static inline void encode_openhdr(struct xdr_stream *xdr, const struct nfs_opena */ encode_nfs4_seqid(xdr, arg->seqid); encode_share_access(xdr, arg->fmode); - p = reserve_space(xdr, 32); + p = reserve_space(xdr, 36); p = xdr_encode_hyper(p, arg->clientid); - *p++ = cpu_to_be32(20); + *p++ = cpu_to_be32(24); p = xdr_encode_opaque_fixed(p, "open id:", 8); *p++ = cpu_to_be32(arg->server->s_dev); - xdr_encode_hyper(p, arg->id); + *p++ = cpu_to_be32(arg->id.uniquifier); + xdr_encode_hyper(p, arg->id.create_time); } static inline void encode_createmode(struct xdr_stream *xdr, const struct nfs_openargs *arg) @@ -4257,8 +4258,6 @@ static int decode_getfattr_attrs(struct xdr_stream *xdr, uint32_t *bitmap, status = decode_attr_error(xdr, bitmap, &err); if (status < 0) goto xdr_error; - if (err == -NFS4ERR_WRONGSEC) - nfs_fixup_secinfo_attributes(fattr, fh); status = decode_attr_filehandle(xdr, bitmap, fh); if (status < 0) @@ -4901,11 +4900,19 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, bitmap[3] = {0}; struct kvec *iov = req->rq_rcv_buf.head; int status; + size_t page_len = xdr->buf->page_len; res->acl_len = 0; if ((status = decode_op_hdr(xdr, OP_GETATTR)) != 0) goto out; + bm_p = xdr->p; + res->acl_data_offset = be32_to_cpup(bm_p) + 2; + res->acl_data_offset <<= 2; + /* Check if the acl data starts beyond the allocated buffer */ + if (res->acl_data_offset > page_len) + return -ERANGE; + if ((status = decode_attr_bitmap(xdr, bitmap)) != 0) goto out; if ((status = decode_attr_length(xdr, &attrlen, &savep)) != 0) @@ -4915,28 +4922,24 @@ static int decode_getacl(struct xdr_stream *xdr, struct rpc_rqst *req, return -EIO; if (likely(bitmap[0] & FATTR4_WORD0_ACL)) { size_t hdrlen; - u32 recvd; /* The bitmap (xdr len + bitmaps) and the attr xdr len words * are stored with the acl data to handle the problem of * variable length bitmaps.*/ xdr->p = bm_p; - res->acl_data_offset = be32_to_cpup(bm_p) + 2; - res->acl_data_offset <<= 2; /* We ignore &savep and don't do consistency checks on * the attr length. Let userspace figure it out.... */ hdrlen = (u8 *)xdr->p - (u8 *)iov->iov_base; attrlen += res->acl_data_offset; - recvd = req->rq_rcv_buf.len - hdrlen; - if (attrlen > recvd) { + if (attrlen > page_len) { if (res->acl_flags & NFS4_ACL_LEN_REQUEST) { /* getxattr interface called with a NULL buf */ res->acl_len = attrlen; goto out; } - dprintk("NFS: acl reply: attrlen %u > recvd %u\n", - attrlen, recvd); + dprintk("NFS: acl reply: attrlen %u > page_len %zu\n", + attrlen, page_len); return -EINVAL; } xdr_read_pages(xdr, attrlen); @@ -5089,16 +5092,13 @@ out_err: return -EINVAL; } -static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) +static int decode_secinfo_common(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) { struct nfs4_secinfo_flavor *sec_flavor; int status; __be32 *p; int i, num_flavors; - status = decode_op_hdr(xdr, OP_SECINFO); - if (status) - goto out; p = xdr_inline_decode(xdr, 4); if (unlikely(!p)) goto out_overflow; @@ -5124,6 +5124,7 @@ static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) res->flavors->num_flavors++; } + status = 0; out: return status; out_overflow: @@ -5131,7 +5132,23 @@ out_overflow: return -EIO; } +static int decode_secinfo(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) +{ + int status = decode_op_hdr(xdr, OP_SECINFO); + if (status) + return status; + return decode_secinfo_common(xdr, res); +} + #if defined(CONFIG_NFS_V4_1) +static int decode_secinfo_no_name(struct xdr_stream *xdr, struct nfs4_secinfo_res *res) +{ + int status = decode_op_hdr(xdr, OP_SECINFO_NO_NAME); + if (status) + return status; + return decode_secinfo_common(xdr, res); +} + static int decode_exchange_id(struct xdr_stream *xdr, struct nfs41_exchange_id_res *res) { @@ -6816,7 +6833,7 @@ static int nfs4_xdr_dec_secinfo_no_name(struct rpc_rqst *rqstp, status = decode_putrootfh(xdr); if (status) goto out; - status = decode_secinfo(xdr, res); + status = decode_secinfo_no_name(xdr, res); out: return status; } diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c index 8d45f1c318c..595c5fc21a1 100644 --- a/fs/nfs/objlayout/objlayout.c +++ b/fs/nfs/objlayout/objlayout.c @@ -604,7 +604,6 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, { struct objlayout_deviceinfo *odi; struct pnfs_device pd; - struct super_block *sb; struct page *page, **pages; u32 *p; int err; @@ -623,7 +622,6 @@ int objlayout_get_deviceinfo(struct pnfs_layout_hdr *pnfslay, pd.pglen = PAGE_SIZE; pd.mincount = 0; - sb = pnfslay->plh_inode->i_sb; err = nfs4_proc_getdeviceinfo(NFS_SERVER(pnfslay->plh_inode), &pd); dprintk("%s nfs_getdeviceinfo returned %d\n", __func__, err); if (err) diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c index b5d45158694..38512bcd2e9 100644 --- a/fs/nfs/pnfs.c +++ b/fs/nfs/pnfs.c @@ -587,7 +587,7 @@ send_layoutget(struct pnfs_layout_hdr *lo, /* allocate pages for xdr post processing */ max_resp_sz = server->nfs_client->cl_session->fc_attrs.max_resp_sz; - max_pages = max_resp_sz >> PAGE_SHIFT; + max_pages = nfs_page_array_len(0, max_resp_sz); pages = kcalloc(max_pages, sizeof(struct page *), gfp_flags); if (!pages) diff --git a/fs/nfs/read.c b/fs/nfs/read.c index 9a0e8ef4a40..0a4be28c2ea 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -322,7 +322,7 @@ out_bad: while (!list_empty(res)) { data = list_entry(res->next, struct nfs_read_data, list); list_del(&data->list); - nfs_readdata_free(data); + nfs_readdata_release(data); } nfs_readpage_release(req); return -ENOMEM; diff --git a/fs/nfs/super.c b/fs/nfs/super.c index 37412f706b3..4ac7fca7e4b 100644 --- a/fs/nfs/super.c +++ b/fs/nfs/super.c @@ -2428,7 +2428,7 @@ nfs_xdev_mount(struct file_system_type *fs_type, int flags, dprintk("--> nfs_xdev_mount()\n"); /* create a new volume representation */ - server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); if (IS_ERR(server)) { error = PTR_ERR(server); goto out_err_noserver; @@ -2767,11 +2767,15 @@ static struct vfsmount *nfs_do_root_mount(struct file_system_type *fs_type, char *root_devname; size_t len; - len = strlen(hostname) + 3; + len = strlen(hostname) + 5; root_devname = kmalloc(len, GFP_KERNEL); if (root_devname == NULL) return ERR_PTR(-ENOMEM); - snprintf(root_devname, len, "%s:/", hostname); + /* Does hostname needs to be enclosed in brackets? */ + if (strchr(hostname, ':')) + snprintf(root_devname, len, "[%s]:/", hostname); + else + snprintf(root_devname, len, "%s:/", hostname); root_mnt = vfs_kern_mount(fs_type, flags, root_devname, data); kfree(root_devname); return root_mnt; @@ -2951,7 +2955,7 @@ nfs4_xdev_mount(struct file_system_type *fs_type, int flags, dprintk("--> nfs4_xdev_mount()\n"); /* create a new volume representation */ - server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr); + server = nfs_clone_server(NFS_SB(data->sb), data->fh, data->fattr, data->authflavor); if (IS_ERR(server)) { error = PTR_ERR(server); goto out_err_noserver; diff --git a/fs/nfs/write.c b/fs/nfs/write.c index 2c68818f68a..c07462320f6 100644 --- a/fs/nfs/write.c +++ b/fs/nfs/write.c @@ -682,7 +682,8 @@ static struct nfs_page *nfs_try_to_update_request(struct inode *inode, req->wb_bytes = rqend - req->wb_offset; out_unlock: spin_unlock(&inode->i_lock); - nfs_clear_request_commit(req); + if (req) + nfs_clear_request_commit(req); return req; out_flushme: spin_unlock(&inode->i_lock); @@ -1018,7 +1019,7 @@ out_bad: while (!list_empty(res)) { data = list_entry(res->next, struct nfs_write_data, list); list_del(&data->list); - nfs_writedata_free(data); + nfs_writedata_release(data); } nfs_redirty_request(req); return -ENOMEM; diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 08c6e36ab2e..43f46cd9ede 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -803,13 +803,13 @@ encode_entry_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, return p; } -static int +static __be32 compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, const char *name, int namlen) { struct svc_export *exp; struct dentry *dparent, *dchild; - int rv = 0; + __be32 rv = nfserr_noent; dparent = cd->fh.fh_dentry; exp = cd->fh.fh_export; @@ -817,26 +817,20 @@ compose_entry_fh(struct nfsd3_readdirres *cd, struct svc_fh *fhp, if (isdotent(name, namlen)) { if (namlen == 2) { dchild = dget_parent(dparent); - if (dchild == dparent) { - /* filesystem root - cannot return filehandle for ".." */ - dput(dchild); - return -ENOENT; - } + /* filesystem root - cannot return filehandle for ".." */ + if (dchild == dparent) + goto out; } else dchild = dget(dparent); } else dchild = lookup_one_len(name, dparent, namlen); if (IS_ERR(dchild)) - return -ENOENT; - rv = -ENOENT; + return rv; if (d_mountpoint(dchild)) goto out; - rv = fh_compose(fhp, exp, dchild, &cd->fh); - if (rv) - goto out; if (!dchild->d_inode) goto out; - rv = 0; + rv = fh_compose(fhp, exp, dchild, &cd->fh); out: dput(dchild); return rv; @@ -845,7 +839,7 @@ out: static __be32 *encode_entryplus_baggage(struct nfsd3_readdirres *cd, __be32 *p, const char *name, int namlen) { struct svc_fh fh; - int err; + __be32 err; fh_init(&fh, NFS3_FHSIZE); err = compose_entry_fh(cd, &fh, name, namlen); diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c index 2ed14dfd00a..987e719fbae 100644 --- a/fs/nfsd/nfs4proc.c +++ b/fs/nfsd/nfs4proc.c @@ -235,17 +235,17 @@ do_open_lookup(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nfsd4_o */ if (open->op_createmode == NFS4_CREATE_EXCLUSIVE && status == 0) open->op_bmval[1] = (FATTR4_WORD1_TIME_ACCESS | - FATTR4_WORD1_TIME_MODIFY); + FATTR4_WORD1_TIME_MODIFY); } else { status = nfsd_lookup(rqstp, current_fh, open->op_fname.data, open->op_fname.len, resfh); fh_unlock(current_fh); - if (status) - goto out; - status = nfsd_check_obj_isreg(resfh); } if (status) goto out; + status = nfsd_check_obj_isreg(resfh); + if (status) + goto out; if (is_create_with_attrs(open) && open->op_acl != NULL) do_set_nfs4_acl(rqstp, resfh, open->op_acl, open->op_bmval); @@ -841,6 +841,7 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct nfsd4_setattr *setattr) { __be32 status = nfs_ok; + int err; if (setattr->sa_iattr.ia_valid & ATTR_SIZE) { nfs4_lock_state(); @@ -852,9 +853,9 @@ nfsd4_setattr(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, return status; } } - status = fh_want_write(&cstate->current_fh); - if (status) - return status; + err = fh_want_write(&cstate->current_fh); + if (err) + return nfserrno(err); status = nfs_ok; status = check_attr_support(rqstp, cstate, setattr->sa_bmval, diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c index 4767429264a..ed3f9206a0e 100644 --- a/fs/nfsd/nfs4recover.c +++ b/fs/nfsd/nfs4recover.c @@ -577,7 +577,7 @@ cld_pipe_downcall(struct file *filp, const char __user *src, size_t mlen) struct cld_net *cn = nn->cld_net; if (mlen != sizeof(*cmsg)) { - dprintk("%s: got %lu bytes, expected %lu\n", __func__, mlen, + dprintk("%s: got %zu bytes, expected %zu\n", __func__, mlen, sizeof(*cmsg)); return -EINVAL; } diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c index 1841f8bf845..7f71c69cdcd 100644 --- a/fs/nfsd/nfs4state.c +++ b/fs/nfsd/nfs4state.c @@ -4211,16 +4211,14 @@ out: * vfs_test_lock. (Arguably perhaps test_lock should be done with an * inode operation.) */ -static int nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) +static __be32 nfsd_test_lock(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file_lock *lock) { struct file *file; - int err; - - err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); - if (err) - return err; - err = vfs_test_lock(file, lock); - nfsd_close(file); + __be32 err = nfsd_open(rqstp, fhp, S_IFREG, NFSD_MAY_READ, &file); + if (!err) { + err = nfserrno(vfs_test_lock(file, lock)); + nfsd_close(file); + } return err; } @@ -4234,7 +4232,6 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, struct inode *inode; struct file_lock file_lock; struct nfs4_lockowner *lo; - int error; __be32 status; if (locks_in_grace()) @@ -4280,12 +4277,10 @@ nfsd4_lockt(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate, nfs4_transform_lock_offset(&file_lock); - status = nfs_ok; - error = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock); - if (error) { - status = nfserrno(error); + status = nfsd_test_lock(rqstp, &cstate->current_fh, &file_lock); + if (status) goto out; - } + if (file_lock.fl_type != F_UNLCK) { status = nfserr_denied; nfs4_set_lock_denied(&file_lock, &lockt->lt_denied); diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index bcd8904ab1e..74c00bc92b9 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -1392,7 +1392,7 @@ nfsd4_decode_test_stateid(struct nfsd4_compoundargs *argp, struct nfsd4_test_sta for (i = 0; i < test_stateid->ts_num_ids; i++) { stateid = kmalloc(sizeof(struct nfsd4_test_stateid_id), GFP_KERNEL); if (!stateid) { - status = PTR_ERR(stateid); + status = nfserrno(-ENOMEM); goto out; } @@ -3410,7 +3410,7 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, int nfserr, *p++ = htonl(test_stateid->ts_num_ids); list_for_each_entry_safe(stateid, next, &test_stateid->ts_stateid_list, ts_id_list) { - *p++ = htonl(stateid->ts_id_status); + *p++ = stateid->ts_id_status; } ADJUST_ARGS(); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 296d671654d..568666156ea 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1458,7 +1458,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp, switch (createmode) { case NFS3_CREATE_UNCHECKED: if (! S_ISREG(dchild->d_inode->i_mode)) - err = nfserr_exist; + goto out; else if (truncp) { /* in nfsv4, we need to treat this case a little * differently. we don't want to truncate the diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index 3165aebb43c..31b9463fba1 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -1134,7 +1134,7 @@ static int ocfs2_adjust_rightmost_branch(handle_t *handle, } el = path_leaf_el(path); - rec = &el->l_recs[le32_to_cpu(el->l_next_free_rec) - 1]; + rec = &el->l_recs[le16_to_cpu(el->l_next_free_rec) - 1]; ocfs2_adjust_rightmost_records(handle, et, path, rec); diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c index cf782338266..9f32d7cbb7a 100644 --- a/fs/ocfs2/refcounttree.c +++ b/fs/ocfs2/refcounttree.c @@ -1036,14 +1036,14 @@ static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci, tmp_el = left_path->p_node[subtree_root].el; blkno = left_path->p_node[subtree_root+1].bh->b_blocknr; - for (i = 0; i < le32_to_cpu(tmp_el->l_next_free_rec); i++) { + for (i = 0; i < le16_to_cpu(tmp_el->l_next_free_rec); i++) { if (le64_to_cpu(tmp_el->l_recs[i].e_blkno) == blkno) { *cpos_end = le32_to_cpu(tmp_el->l_recs[i+1].e_cpos); break; } } - BUG_ON(i == le32_to_cpu(tmp_el->l_next_free_rec)); + BUG_ON(i == le16_to_cpu(tmp_el->l_next_free_rec)); out: ocfs2_free_path(left_path); @@ -1468,7 +1468,7 @@ static int ocfs2_divide_leaf_refcount_block(struct buffer_head *ref_leaf_bh, trace_ocfs2_divide_leaf_refcount_block( (unsigned long long)ref_leaf_bh->b_blocknr, - le32_to_cpu(rl->rl_count), le32_to_cpu(rl->rl_used)); + le16_to_cpu(rl->rl_count), le16_to_cpu(rl->rl_used)); /* * XXX: Improvement later. @@ -2411,7 +2411,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, rb = (struct ocfs2_refcount_block *) prev_bh->b_data; - if (le64_to_cpu(rb->rf_records.rl_used) + + if (le16_to_cpu(rb->rf_records.rl_used) + recs_add > le16_to_cpu(rb->rf_records.rl_count)) ref_blocks++; @@ -2476,7 +2476,7 @@ static int ocfs2_calc_refcount_meta_credits(struct super_block *sb, if (prev_bh) { rb = (struct ocfs2_refcount_block *)prev_bh->b_data; - if (le64_to_cpu(rb->rf_records.rl_used) + recs_add > + if (le16_to_cpu(rb->rf_records.rl_used) + recs_add > le16_to_cpu(rb->rf_records.rl_count)) ref_blocks++; @@ -3629,7 +3629,7 @@ int ocfs2_refcounted_xattr_delete_need(struct inode *inode, * one will split a refcount rec, so totally we need * clusters * 2 new refcount rec. */ - if (le64_to_cpu(rb->rf_records.rl_used) + clusters * 2 > + if (le16_to_cpu(rb->rf_records.rl_used) + clusters * 2 > le16_to_cpu(rb->rf_records.rl_count)) ref_blocks++; diff --git a/fs/ocfs2/suballoc.c b/fs/ocfs2/suballoc.c index ba5d97e4a73..f169da4624f 100644 --- a/fs/ocfs2/suballoc.c +++ b/fs/ocfs2/suballoc.c @@ -600,7 +600,7 @@ static void ocfs2_bg_alloc_cleanup(handle_t *handle, ret = ocfs2_free_clusters(handle, cluster_ac->ac_inode, cluster_ac->ac_bh, le64_to_cpu(rec->e_blkno), - le32_to_cpu(rec->e_leaf_clusters)); + le16_to_cpu(rec->e_leaf_clusters)); if (ret) mlog_errno(ret); /* Try all the clusters to free */ @@ -1628,7 +1628,7 @@ static int ocfs2_bg_discontig_fix_by_rec(struct ocfs2_suballoc_result *res, { unsigned int bpc = le16_to_cpu(cl->cl_bpc); unsigned int bitoff = le32_to_cpu(rec->e_cpos) * bpc; - unsigned int bitcount = le32_to_cpu(rec->e_leaf_clusters) * bpc; + unsigned int bitcount = le16_to_cpu(rec->e_leaf_clusters) * bpc; if (res->sr_bit_offset < bitoff) return 0; diff --git a/fs/pipe.c b/fs/pipe.c index 25feaa3faac..fec5e4ad071 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -346,6 +346,16 @@ static const struct pipe_buf_operations anon_pipe_buf_ops = { .get = generic_pipe_buf_get, }; +static const struct pipe_buf_operations packet_pipe_buf_ops = { + .can_merge = 0, + .map = generic_pipe_buf_map, + .unmap = generic_pipe_buf_unmap, + .confirm = generic_pipe_buf_confirm, + .release = anon_pipe_buf_release, + .steal = generic_pipe_buf_steal, + .get = generic_pipe_buf_get, +}; + static ssize_t pipe_read(struct kiocb *iocb, const struct iovec *_iov, unsigned long nr_segs, loff_t pos) @@ -407,6 +417,13 @@ redo: ret += chars; buf->offset += chars; buf->len -= chars; + + /* Was it a packet buffer? Clean up and exit */ + if (buf->flags & PIPE_BUF_FLAG_PACKET) { + total_len = chars; + buf->len = 0; + } + if (!buf->len) { buf->ops = NULL; ops->release(pipe, buf); @@ -459,6 +476,11 @@ redo: return ret; } +static inline int is_packetized(struct file *file) +{ + return (file->f_flags & O_DIRECT) != 0; +} + static ssize_t pipe_write(struct kiocb *iocb, const struct iovec *_iov, unsigned long nr_segs, loff_t ppos) @@ -593,6 +615,11 @@ redo2: buf->ops = &anon_pipe_buf_ops; buf->offset = 0; buf->len = chars; + buf->flags = 0; + if (is_packetized(filp)) { + buf->ops = &packet_pipe_buf_ops; + buf->flags = PIPE_BUF_FLAG_PACKET; + } pipe->nrbufs = ++bufs; pipe->tmp_page = NULL; @@ -1013,7 +1040,7 @@ struct file *create_write_pipe(int flags) goto err_dentry; f->f_mapping = inode->i_mapping; - f->f_flags = O_WRONLY | (flags & O_NONBLOCK); + f->f_flags = O_WRONLY | (flags & (O_NONBLOCK | O_DIRECT)); f->f_version = 0; return f; @@ -1057,7 +1084,7 @@ int do_pipe_flags(int *fd, int flags) int error; int fdw, fdr; - if (flags & ~(O_CLOEXEC | O_NONBLOCK)) + if (flags & ~(O_CLOEXEC | O_NONBLOCK | O_DIRECT)) return -EINVAL; fw = create_write_pipe(flags); diff --git a/fs/proc/base.c b/fs/proc/base.c index 1c8b280146d..57b8159f26f 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1799,10 +1799,15 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) if (task) { files = get_files_struct(task); if (files) { + struct file *file; rcu_read_lock(); - if (fcheck_files(files, fd)) { + file = fcheck_files(files, fd); + if (file) { + unsigned i_mode, f_mode = file->f_mode; + rcu_read_unlock(); put_files_struct(files); + if (task_dumpable(task)) { rcu_read_lock(); cred = __task_cred(task); @@ -1813,7 +1818,14 @@ static int tid_fd_revalidate(struct dentry *dentry, struct nameidata *nd) inode->i_uid = 0; inode->i_gid = 0; } - inode->i_mode &= ~(S_ISUID | S_ISGID); + + i_mode = S_IFLNK; + if (f_mode & FMODE_READ) + i_mode |= S_IRUSR | S_IXUSR; + if (f_mode & FMODE_WRITE) + i_mode |= S_IWUSR | S_IXUSR; + inode->i_mode = i_mode; + security_task_to_inode(task, inode); put_task_struct(task); return 1; @@ -1837,8 +1849,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { unsigned fd = *(const unsigned *)ptr; - struct file *file; - struct files_struct *files; struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-ENOENT); @@ -1848,25 +1858,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, goto out; ei = PROC_I(inode); ei->fd = fd; - files = get_files_struct(task); - if (!files) - goto out_iput; - inode->i_mode = S_IFLNK; - - /* - * We are not taking a ref to the file structure, so we must - * hold ->file_lock. - */ - spin_lock(&files->file_lock); - file = fcheck_files(files, fd); - if (!file) - goto out_unlock; - if (file->f_mode & FMODE_READ) - inode->i_mode |= S_IRUSR | S_IXUSR; - if (file->f_mode & FMODE_WRITE) - inode->i_mode |= S_IWUSR | S_IXUSR; - spin_unlock(&files->file_lock); - put_files_struct(files); inode->i_op = &proc_pid_link_inode_operations; inode->i_size = 64; @@ -1879,12 +1870,6 @@ static struct dentry *proc_fd_instantiate(struct inode *dir, out: return error; -out_unlock: - spin_unlock(&files->file_lock); - put_files_struct(files); -out_iput: - iput(inode); - goto out; } static struct dentry *proc_lookupfd_common(struct inode *dir, @@ -2177,16 +2162,16 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, goto out; result = ERR_PTR(-EACCES); - if (lock_trace(task)) + if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; result = ERR_PTR(-ENOENT); if (dname_to_vma_addr(dentry, &vm_start, &vm_end)) - goto out_unlock; + goto out_put_task; mm = get_task_mm(task); if (!mm) - goto out_unlock; + goto out_put_task; down_read(&mm->mmap_sem); vma = find_exact_vma(mm, vm_start, vm_end); @@ -2198,8 +2183,6 @@ static struct dentry *proc_map_files_lookup(struct inode *dir, out_no_vma: up_read(&mm->mmap_sem); mmput(mm); -out_unlock: - unlock_trace(task); out_put_task: put_task_struct(task); out: @@ -2233,7 +2216,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) goto out; ret = -EACCES; - if (lock_trace(task)) + if (!ptrace_may_access(task, PTRACE_MODE_READ)) goto out_put_task; ret = 0; @@ -2241,12 +2224,12 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) case 0: ino = inode->i_ino; if (filldir(dirent, ".", 1, 0, ino, DT_DIR) < 0) - goto out_unlock; + goto out_put_task; filp->f_pos++; case 1: ino = parent_ino(dentry); if (filldir(dirent, "..", 2, 1, ino, DT_DIR) < 0) - goto out_unlock; + goto out_put_task; filp->f_pos++; default: { @@ -2257,7 +2240,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) mm = get_task_mm(task); if (!mm) - goto out_unlock; + goto out_put_task; down_read(&mm->mmap_sem); nr_files = 0; @@ -2287,7 +2270,7 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) flex_array_free(fa); up_read(&mm->mmap_sem); mmput(mm); - goto out_unlock; + goto out_put_task; } for (i = 0, vma = mm->mmap, pos = 2; vma; vma = vma->vm_next) { @@ -2332,8 +2315,6 @@ proc_map_files_readdir(struct file *filp, void *dirent, filldir_t filldir) } } -out_unlock: - unlock_trace(task); out_put_task: put_task_struct(task); out: diff --git a/fs/proc/stat.c b/fs/proc/stat.c index 6a0c62d6e44..64c3b317236 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -18,19 +18,39 @@ #ifndef arch_irq_stat #define arch_irq_stat() 0 #endif -#ifndef arch_idle_time -#define arch_idle_time(cpu) 0 -#endif + +#ifdef arch_idle_time + +static cputime64_t get_idle_time(int cpu) +{ + cputime64_t idle; + + idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; + if (cpu_online(cpu) && !nr_iowait_cpu(cpu)) + idle += arch_idle_time(cpu); + return idle; +} + +static cputime64_t get_iowait_time(int cpu) +{ + cputime64_t iowait; + + iowait = kcpustat_cpu(cpu).cpustat[CPUTIME_IOWAIT]; + if (cpu_online(cpu) && nr_iowait_cpu(cpu)) + iowait += arch_idle_time(cpu); + return iowait; +} + +#else static u64 get_idle_time(int cpu) { u64 idle, idle_time = get_cpu_idle_time_us(cpu, NULL); - if (idle_time == -1ULL) { + if (idle_time == -1ULL) /* !NO_HZ so we can rely on cpustat.idle */ idle = kcpustat_cpu(cpu).cpustat[CPUTIME_IDLE]; - idle += arch_idle_time(cpu); - } else + else idle = usecs_to_cputime64(idle_time); return idle; @@ -49,6 +69,8 @@ static u64 get_iowait_time(int cpu) return iowait; } +#endif + static int show_stat(struct seq_file *p, void *v) { int i, j; diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 2b9a7607cbd..1030a716d15 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -597,9 +597,6 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, if (!page) continue; - if (PageReserved(page)) - continue; - /* Clear accessed and referenced bits. */ ptep_test_and_clear_young(vma, addr, pte); ClearPageReferenced(page); @@ -750,6 +747,8 @@ static void pte_to_pagemap_entry(pagemap_entry_t *pme, pte_t pte) else if (pte_present(pte)) *pme = make_pme(PM_PFRAME(pte_pfn(pte)) | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + else + *pme = make_pme(PM_NOT_PRESENT); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -764,6 +763,8 @@ static void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, if (pmd_present(pmd)) *pme = make_pme(PM_PFRAME(pmd_pfn(pmd) + offset) | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + else + *pme = make_pme(PM_NOT_PRESENT); } #else static inline void thp_pmd_to_pagemap_entry(pagemap_entry_t *pme, @@ -804,8 +805,10 @@ static int pagemap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, /* check to see if we've left 'vma' behind * and need a new, higher one */ - if (vma && (addr >= vma->vm_end)) + if (vma && (addr >= vma->vm_end)) { vma = find_vma(walk->mm, addr); + pme = make_pme(PM_NOT_PRESENT); + } /* check that 'vma' actually covers this address, * and that it isn't a huge page vma */ @@ -833,6 +836,8 @@ static void huge_pte_to_pagemap_entry(pagemap_entry_t *pme, if (pte_present(pte)) *pme = make_pme(PM_PFRAME(pte_pfn(pte) + offset) | PM_PSHIFT(PAGE_SHIFT) | PM_PRESENT); + else + *pme = make_pme(PM_NOT_PRESENT); } /* This function walks within one hugetlb entry in the single call */ @@ -842,7 +847,7 @@ static int pagemap_hugetlb_range(pte_t *pte, unsigned long hmask, { struct pagemapread *pm = walk->private; int err = 0; - pagemap_entry_t pme = make_pme(PM_NOT_PRESENT); + pagemap_entry_t pme; for (; addr != end; addr += PAGE_SIZE) { int offset = (addr & ~hmask) >> PAGE_SHIFT; diff --git a/fs/sysfs/dir.c b/fs/sysfs/dir.c index 2a7a3f5d1ca..35a36d39fa2 100644 --- a/fs/sysfs/dir.c +++ b/fs/sysfs/dir.c @@ -729,6 +729,9 @@ int sysfs_create_dir(struct kobject * kobj) else parent_sd = &sysfs_root; + if (!parent_sd) + return -ENOENT; + if (sysfs_ns_type(parent_sd)) ns = kobj->ktype->namespace(kobj); type = sysfs_read_ns_type(kobj); @@ -878,7 +881,6 @@ int sysfs_rename(struct sysfs_dirent *sd, dup_name = sd->s_name; sd->s_name = new_name; - sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); } /* Move to the appropriate place in the appropriate directories rbtree. */ @@ -886,6 +888,7 @@ int sysfs_rename(struct sysfs_dirent *sd, sysfs_get(new_parent_sd); sysfs_put(sd->s_parent); sd->s_ns = new_ns; + sd->s_hash = sysfs_name_hash(sd->s_ns, sd->s_name); sd->s_parent = new_parent_sd; sysfs_link_sibling(sd); diff --git a/fs/sysfs/group.c b/fs/sysfs/group.c index dd1701caecc..2df555c66d5 100644 --- a/fs/sysfs/group.c +++ b/fs/sysfs/group.c @@ -67,7 +67,11 @@ static int internal_create_group(struct kobject *kobj, int update, /* Updates may happen before the object has been instantiated */ if (unlikely(update && !kobj->sd)) return -EINVAL; - + if (!grp->attrs) { + WARN(1, "sysfs: attrs not set by subsystem for group: %s/%s\n", + kobj->name, grp->name ? "" : grp->name); + return -EINVAL; + } if (grp->name) { error = sysfs_create_subdir(kobj, grp->name, &sd); if (error) |