diff options
Diffstat (limited to 'fs')
44 files changed, 487 insertions, 291 deletions
diff --git a/fs/Kconfig b/fs/Kconfig index e31f3691b15..cc28a69246a 100644 --- a/fs/Kconfig +++ b/fs/Kconfig @@ -220,7 +220,7 @@ config JBD config JBD_DEBUG bool "JBD (ext3) debugging support" - depends on JBD + depends on JBD && DEBUG_FS help If you are using the ext3 journaled file system (or potentially any other file system/device using JBD), this option allows you to @@ -229,10 +229,10 @@ config JBD_DEBUG debugging output will be turned off. If you select Y here, then you will be able to turn on debugging - with "echo N > /proc/sys/fs/jbd-debug", where N is a number between - 1 and 5, the higher the number, the more debugging output is - generated. To turn debugging off again, do - "echo 0 > /proc/sys/fs/jbd-debug". + with "echo N > /sys/kernel/debug/jbd/jbd-debug", where N is a + number between 1 and 5, the higher the number, the more debugging + output is generated. To turn debugging off again, do + "echo 0 > /sys/kernel/debug/jbd/jbd-debug". config JBD2 tristate diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index e7204d71acc..45f5992a095 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -80,7 +80,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, *uid = current->uid; *gid = current->gid; - *pgrp = process_group(current); + *pgrp = task_pgrp_nr(current); *minproto = *maxproto = AUTOFS_PROTO_VERSION; diff --git a/fs/autofs/root.c b/fs/autofs/root.c index c1489533277..5efff3c0d88 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -214,8 +214,8 @@ static struct dentry *autofs_root_lookup(struct inode *dir, struct dentry *dentr oz_mode = autofs_oz_mode(sbi); DPRINTK(("autofs_lookup: pid = %u, pgrp = %u, catatonic = %d, " - "oz_mode = %d\n", pid_nr(task_pid(current)), - process_group(current), sbi->catatonic, + "oz_mode = %d\n", task_pid_nr(current), + task_pgrp_nr(current), sbi->catatonic, oz_mode)); /* @@ -536,7 +536,7 @@ static int autofs_root_ioctl(struct inode *inode, struct file *filp, struct autofs_sb_info *sbi = autofs_sbi(inode->i_sb); void __user *argp = (void __user *)arg; - DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,process_group(current))); + DPRINTK(("autofs_ioctl: cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",cmd,arg,sbi,task_pgrp_nr(current))); if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index d85f42fa920..2d4ae40718d 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -131,7 +131,7 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry) filesystem without "magic".) */ static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) { - return sbi->catatonic || process_group(current) == sbi->oz_pgrp; + return sbi->catatonic || task_pgrp_nr(current) == sbi->oz_pgrp; } /* Does a dentry have some pending activity? */ diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index cd81f083667..7f05d6ccdb1 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -226,7 +226,7 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, *uid = current->uid; *gid = current->gid; - *pgrp = process_group(current); + *pgrp = task_pgrp_nr(current); *minproto = AUTOFS_MIN_PROTO_VERSION; *maxproto = AUTOFS_MAX_PROTO_VERSION; @@ -323,7 +323,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent) sbi->pipe = NULL; sbi->catatonic = 1; sbi->exp_timeout = 0; - sbi->oz_pgrp = process_group(current); + sbi->oz_pgrp = task_pgrp_nr(current); sbi->sb = s; sbi->version = 0; sbi->sub_version = 0; diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c index 45ff3d63b75..2bbcc8151dc 100644 --- a/fs/autofs4/root.c +++ b/fs/autofs4/root.c @@ -582,7 +582,7 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, s oz_mode = autofs4_oz_mode(sbi); DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d", - current->pid, process_group(current), sbi->catatonic, oz_mode); + current->pid, task_pgrp_nr(current), sbi->catatonic, oz_mode); unhashed = autofs4_lookup_unhashed(sbi, dentry->d_parent, &dentry->d_name); if (!unhashed) { @@ -976,7 +976,7 @@ static int autofs4_root_ioctl(struct inode *inode, struct file *filp, void __user *p = (void __user *)arg; DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u", - cmd,arg,sbi,process_group(current)); + cmd,arg,sbi,task_pgrp_nr(current)); if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) || _IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 6e2f3b8dde7..ba8de7ca260 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1383,10 +1383,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus, prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; prstatus->pr_sigpend = p->pending.signal.sig[0]; prstatus->pr_sighold = p->blocked.sig[0]; - prstatus->pr_pid = p->pid; - prstatus->pr_ppid = p->parent->pid; - prstatus->pr_pgrp = process_group(p); - prstatus->pr_sid = process_session(p); + prstatus->pr_pid = task_pid_vnr(p); + prstatus->pr_ppid = task_pid_vnr(p->parent); + prstatus->pr_pgrp = task_pgrp_vnr(p); + prstatus->pr_sid = task_session_vnr(p); if (thread_group_leader(p)) { /* * This is the record for the group leader. Add in the @@ -1429,10 +1429,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_psargs[i] = ' '; psinfo->pr_psargs[len] = 0; - psinfo->pr_pid = p->pid; - psinfo->pr_ppid = p->parent->pid; - psinfo->pr_pgrp = process_group(p); - psinfo->pr_sid = process_session(p); + psinfo->pr_pid = task_pid_vnr(p); + psinfo->pr_ppid = task_pid_vnr(p->parent); + psinfo->pr_pgrp = task_pgrp_vnr(p); + psinfo->pr_sid = task_session_vnr(p); i = p->state ? ffz(~p->state) + 1 : 0; psinfo->pr_state = i; diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 033861c6b8f..32649f2a165 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1342,10 +1342,10 @@ static void fill_prstatus(struct elf_prstatus *prstatus, prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; prstatus->pr_sigpend = p->pending.signal.sig[0]; prstatus->pr_sighold = p->blocked.sig[0]; - prstatus->pr_pid = p->pid; - prstatus->pr_ppid = p->parent->pid; - prstatus->pr_pgrp = process_group(p); - prstatus->pr_sid = process_session(p); + prstatus->pr_pid = task_pid_vnr(p); + prstatus->pr_ppid = task_pid_vnr(p->parent); + prstatus->pr_pgrp = task_pgrp_vnr(p); + prstatus->pr_sid = task_session_vnr(p); if (thread_group_leader(p)) { /* * This is the record for the group leader. Add in the @@ -1391,10 +1391,10 @@ static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, psinfo->pr_psargs[i] = ' '; psinfo->pr_psargs[len] = 0; - psinfo->pr_pid = p->pid; - psinfo->pr_ppid = p->parent->pid; - psinfo->pr_pgrp = process_group(p); - psinfo->pr_sid = process_session(p); + psinfo->pr_pid = task_pid_vnr(p); + psinfo->pr_ppid = task_pid_vnr(p->parent); + psinfo->pr_pgrp = task_pgrp_vnr(p); + psinfo->pr_sid = task_session_vnr(p); i = p->state ? ffz(~p->state) + 1 : 0; psinfo->pr_state = i; diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 4af3588c1a9..370866cb3d4 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -352,7 +352,7 @@ cifs_demultiplex_thread(struct TCP_Server_Info *server) current->flags |= PF_MEMALLOC; server->tsk = current; /* save process info to wake at shutdown */ - cFYI(1, ("Demultiplex PID: %d", current->pid)); + cFYI(1, ("Demultiplex PID: %d", task_pid_nr(current))); write_lock(&GlobalSMBSeslock); atomic_inc(&tcpSesAllocCount); length = tcpSesAllocCount.counter; diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index cdb4c07a787..359e531094d 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -51,7 +51,7 @@ static void *alloc_upcall(int opcode, int size) inp->ih.opcode = opcode; inp->ih.pid = current->pid; - inp->ih.pgid = process_group(current); + inp->ih.pgid = task_pgrp_nr(current); #ifdef CONFIG_CODA_FS_OLD_API memset(&inp->ih.cred, 0, sizeof(struct coda_cred)); inp->ih.cred.cr_fsuid = current->fsuid; diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 6438941ab1f..4f741546f4b 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -456,7 +456,7 @@ static int check_version(struct dlm_write_request *req) printk(KERN_DEBUG "dlm: process %s (%d) version mismatch " "user (%d.%d.%d) kernel (%d.%d.%d)\n", current->comm, - current->pid, + task_pid_nr(current), req->version[0], req->version[1], req->version[2], diff --git a/fs/eventpoll.c b/fs/eventpoll.c index de618929195..34f68f3a069 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -325,15 +325,14 @@ static void ep_poll_safewake(struct poll_safewake *psw, wait_queue_head_t *wq) int wake_nests = 0; unsigned long flags; struct task_struct *this_task = current; - struct list_head *lsthead = &psw->wake_task_list, *lnk; + struct list_head *lsthead = &psw->wake_task_list; struct wake_task_node *tncur; struct wake_task_node tnode; spin_lock_irqsave(&psw->lock, flags); /* Try to see if the current task is already inside this wakeup call */ - list_for_each(lnk, lsthead) { - tncur = list_entry(lnk, struct wake_task_node, llink); + list_for_each_entry(tncur, lsthead, llink) { if (tncur->wq == wq || (tncur->task == this_task && ++wake_nests > EP_MAX_POLLWAKE_NESTS)) { diff --git a/fs/exec.c b/fs/exec.c index 070ddf13cb7..2c942e2d14e 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -234,7 +234,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm) vma->vm_start = vma->vm_end - PAGE_SIZE; vma->vm_flags = VM_STACK_FLAGS; - vma->vm_page_prot = protection_map[vma->vm_flags & 0x7]; + vma->vm_page_prot = vm_get_page_prot(vma->vm_flags); err = insert_vm_struct(mm, vma); if (err) { up_write(&mm->mmap_sem); @@ -775,8 +775,8 @@ static int de_thread(struct task_struct *tsk) * Reparenting needs write_lock on tasklist_lock, * so it is safe to do it under read_lock. */ - if (unlikely(tsk->group_leader == child_reaper(tsk))) - tsk->nsproxy->pid_ns->child_reaper = tsk; + if (unlikely(tsk->group_leader == task_child_reaper(tsk))) + task_active_pid_ns(tsk)->child_reaper = tsk; zap_other_threads(tsk); read_unlock(&tasklist_lock); @@ -841,8 +841,8 @@ static int de_thread(struct task_struct *tsk) */ tsk->start_time = leader->start_time; - BUG_ON(leader->tgid != tsk->tgid); - BUG_ON(tsk->pid == tsk->tgid); + BUG_ON(!same_thread_group(leader, tsk)); + BUG_ON(has_group_leader_pid(tsk)); /* * An exec() starts a new thread group with the * TGID of the previous thread group. Rehash the @@ -857,7 +857,7 @@ static int de_thread(struct task_struct *tsk) */ detach_pid(tsk, PIDTYPE_PID); tsk->pid = leader->pid; - attach_pid(tsk, PIDTYPE_PID, find_pid(tsk->pid)); + attach_pid(tsk, PIDTYPE_PID, task_pid(leader)); transfer_pid(leader, tsk, PIDTYPE_PGID); transfer_pid(leader, tsk, PIDTYPE_SID); list_replace_rcu(&leader->tasks, &tsk->tasks); @@ -1433,7 +1433,7 @@ static int format_corename(char *corename, const char *pattern, long signr) case 'p': pid_in_pattern = 1; rc = snprintf(out_ptr, out_end - out_ptr, - "%d", current->tgid); + "%d", task_tgid_vnr(current)); if (rc > out_end - out_ptr) goto out; out_ptr += rc; @@ -1513,7 +1513,7 @@ static int format_corename(char *corename, const char *pattern, long signr) if (!ispipe && !pid_in_pattern && (core_uses_pid || atomic_read(¤t->mm->mm_users) != 1)) { rc = snprintf(out_ptr, out_end - out_ptr, - ".%d", current->tgid); + ".%d", task_tgid_vnr(current)); if (rc > out_end - out_ptr) goto out; out_ptr += rc; diff --git a/fs/ext3/inode.c b/fs/ext3/inode.c index 3dec003b773..9b162cd6c16 100644 --- a/fs/ext3/inode.c +++ b/fs/ext3/inode.c @@ -2954,7 +2954,7 @@ int ext3_write_inode(struct inode *inode, int wait) return 0; if (ext3_journal_current_handle()) { - jbd_debug(0, "called recursively, non-PF_MEMALLOC!\n"); + jbd_debug(1, "called recursively, non-PF_MEMALLOC!\n"); dump_stack(); return -EIO; } diff --git a/fs/ext3/xattr.c b/fs/ext3/xattr.c index f58cbb26323..408373819e3 100644 --- a/fs/ext3/xattr.c +++ b/fs/ext3/xattr.c @@ -741,12 +741,11 @@ ext3_xattr_block_set(handle_t *handle, struct inode *inode, } } else { /* Allocate a buffer where we construct the new block. */ - s->base = kmalloc(sb->s_blocksize, GFP_KERNEL); + s->base = kzalloc(sb->s_blocksize, GFP_KERNEL); /* assert(header == s->base) */ error = -ENOMEM; if (s->base == NULL) goto cleanup; - memset(s->base, 0, sb->s_blocksize); header(s->base)->h_magic = cpu_to_le32(EXT3_XATTR_MAGIC); header(s->base)->h_blocks = cpu_to_le32(1); header(s->base)->h_refcount = cpu_to_le32(1); diff --git a/fs/fcntl.c b/fs/fcntl.c index c9db73fc5e3..8685263ccc4 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -18,6 +18,7 @@ #include <linux/ptrace.h> #include <linux/signal.h> #include <linux/rcupdate.h> +#include <linux/pid_namespace.h> #include <asm/poll.h> #include <asm/siginfo.h> @@ -292,7 +293,7 @@ int f_setown(struct file *filp, unsigned long arg, int force) who = -who; } rcu_read_lock(); - pid = find_pid(who); + pid = find_vpid(who); result = __f_setown(filp, pid, type, force); rcu_read_unlock(); return result; @@ -308,7 +309,7 @@ pid_t f_getown(struct file *filp) { pid_t pid; read_lock(&filp->f_owner.lock); - pid = pid_nr(filp->f_owner.pid); + pid = pid_nr_ns(filp->f_owner.pid, current->nsproxy->pid_ns); if (filp->f_owner.pid_type == PIDTYPE_PGID) pid = -pid; read_unlock(&filp->f_owner.lock); diff --git a/fs/file_table.c b/fs/file_table.c index 3176fefc92e..664e3f2309b 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -323,12 +323,11 @@ void file_kill(struct file *file) int fs_may_remount_ro(struct super_block *sb) { - struct list_head *p; + struct file *file; /* Check that no files are currently opened for writing. */ file_list_lock(); - list_for_each(p, &sb->s_files) { - struct file *file = list_entry(p, struct file, f_u.fu_list); + list_for_each_entry(file, &sb->s_files, f_u.fu_list) { struct inode *inode = file->f_path.dentry->d_inode; /* File with pending delete? */ diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 686734ff973..0fca82021d7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -89,7 +89,7 @@ void __mark_inode_dirty(struct inode *inode, int flags) if (inode->i_ino || strcmp(inode->i_sb->s_id, "bdev")) printk(KERN_DEBUG "%s(%d): dirtied inode %lu (%s) on %s\n", - current->comm, current->pid, inode->i_ino, + current->comm, task_pid_nr(current), inode->i_ino, name, inode->i_sb->s_id); } diff --git a/fs/ioprio.c b/fs/ioprio.c index 10d2c211d18..d6ff77e8e7e 100644 --- a/fs/ioprio.c +++ b/fs/ioprio.c @@ -25,6 +25,7 @@ #include <linux/capability.h> #include <linux/syscalls.h> #include <linux/security.h> +#include <linux/pid_namespace.h> static int set_task_ioprio(struct task_struct *task, int ioprio) { @@ -93,7 +94,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) if (!who) p = current; else - p = find_task_by_pid(who); + p = find_task_by_vpid(who); if (p) ret = set_task_ioprio(p, ioprio); break; @@ -101,7 +102,7 @@ asmlinkage long sys_ioprio_set(int which, int who, int ioprio) if (!who) pgrp = task_pgrp(current); else - pgrp = find_pid(who); + pgrp = find_vpid(who); do_each_pid_task(pgrp, PIDTYPE_PGID, p) { ret = set_task_ioprio(p, ioprio); if (ret) @@ -180,7 +181,7 @@ asmlinkage long sys_ioprio_get(int which, int who) if (!who) p = current; else - p = find_task_by_pid(who); + p = find_task_by_vpid(who); if (p) ret = get_task_ioprio(p); break; @@ -188,7 +189,7 @@ asmlinkage long sys_ioprio_get(int which, int who) if (!who) pgrp = task_pgrp(current); else - pgrp = find_pid(who); + pgrp = find_vpid(who); do_each_pid_task(pgrp, PIDTYPE_PGID, p) { tmpio = get_task_ioprio(p); if (tmpio < 0) diff --git a/fs/jbd/commit.c b/fs/jbd/commit.c index a263d82761d..8f1f2aa5fb3 100644 --- a/fs/jbd/commit.c +++ b/fs/jbd/commit.c @@ -466,7 +466,7 @@ void journal_commit_transaction(journal_t *journal) spin_unlock(&journal->j_list_lock); if (err) - __journal_abort_hard(journal); + journal_abort(journal, err); journal_write_revoke_records(journal, commit_transaction); @@ -524,7 +524,7 @@ void journal_commit_transaction(journal_t *journal) descriptor = journal_get_descriptor_buffer(journal); if (!descriptor) { - __journal_abort_hard(journal); + journal_abort(journal, -EIO); continue; } @@ -557,7 +557,7 @@ void journal_commit_transaction(journal_t *journal) and repeat this loop: we'll fall into the refile-on-abort condition above. */ if (err) { - __journal_abort_hard(journal); + journal_abort(journal, err); continue; } @@ -748,7 +748,7 @@ wait_for_iobuf: err = -EIO; if (err) - __journal_abort_hard(journal); + journal_abort(journal, err); /* End of a transaction! Finally, we can do checkpoint processing: any buffers committed as a result of this diff --git a/fs/jbd/journal.c b/fs/jbd/journal.c index 5d9fec0b7eb..5d14243499d 100644 --- a/fs/jbd/journal.c +++ b/fs/jbd/journal.c @@ -35,6 +35,7 @@ #include <linux/kthread.h> #include <linux/poison.h> #include <linux/proc_fs.h> +#include <linux/debugfs.h> #include <asm/uaccess.h> #include <asm/page.h> @@ -654,10 +655,9 @@ static journal_t * journal_init_common (void) journal_t *journal; int err; - journal = kmalloc(sizeof(*journal), GFP_KERNEL); + journal = kzalloc(sizeof(*journal), GFP_KERNEL); if (!journal) goto fail; - memset(journal, 0, sizeof(*journal)); init_waitqueue_head(&journal->j_wait_transaction_locked); init_waitqueue_head(&journal->j_wait_logspace); @@ -1852,64 +1852,41 @@ void journal_put_journal_head(struct journal_head *jh) } /* - * /proc tunables + * debugfs tunables */ -#if defined(CONFIG_JBD_DEBUG) -int journal_enable_debug; -EXPORT_SYMBOL(journal_enable_debug); -#endif +#ifdef CONFIG_JBD_DEBUG -#if defined(CONFIG_JBD_DEBUG) && defined(CONFIG_PROC_FS) +u8 journal_enable_debug __read_mostly; +EXPORT_SYMBOL(journal_enable_debug); -static struct proc_dir_entry *proc_jbd_debug; +static struct dentry *jbd_debugfs_dir; +static struct dentry *jbd_debug; -static int read_jbd_debug(char *page, char **start, off_t off, - int count, int *eof, void *data) +static void __init jbd_create_debugfs_entry(void) { - int ret; - - ret = sprintf(page + off, "%d\n", journal_enable_debug); - *eof = 1; - return ret; + jbd_debugfs_dir = debugfs_create_dir("jbd", NULL); + if (jbd_debugfs_dir) + jbd_debug = debugfs_create_u8("jbd-debug", S_IRUGO, + jbd_debugfs_dir, + &journal_enable_debug); } -static int write_jbd_debug(struct file *file, const char __user *buffer, - unsigned long count, void *data) +static void __exit jbd_remove_debugfs_entry(void) { - char buf[32]; - - if (count > ARRAY_SIZE(buf) - 1) - count = ARRAY_SIZE(buf) - 1; - if (copy_from_user(buf, buffer, count)) - return -EFAULT; - buf[ARRAY_SIZE(buf) - 1] = '\0'; - journal_enable_debug = simple_strtoul(buf, NULL, 10); - return count; + debugfs_remove(jbd_debug); + debugfs_remove(jbd_debugfs_dir); } -#define JBD_PROC_NAME "sys/fs/jbd-debug" +#else -static void __init create_jbd_proc_entry(void) +static inline void jbd_create_debugfs_entry(void) { - proc_jbd_debug = create_proc_entry(JBD_PROC_NAME, 0644, NULL); - if (proc_jbd_debug) { - /* Why is this so hard? */ - proc_jbd_debug->read_proc = read_jbd_debug; - proc_jbd_debug->write_proc = write_jbd_debug; - } } -static void __exit remove_jbd_proc_entry(void) +static inline void jbd_remove_debugfs_entry(void) { - if (proc_jbd_debug) - remove_proc_entry(JBD_PROC_NAME, NULL); } -#else - -#define create_jbd_proc_entry() do {} while (0) -#define remove_jbd_proc_entry() do {} while (0) - #endif struct kmem_cache *jbd_handle_cache; @@ -1966,7 +1943,7 @@ static int __init journal_init(void) ret = journal_init_caches(); if (ret != 0) journal_destroy_caches(); - create_jbd_proc_entry(); + jbd_create_debugfs_entry(); return ret; } @@ -1977,7 +1954,7 @@ static void __exit journal_exit(void) if (n) printk(KERN_EMERG "JBD: leaked %d journal_heads!\n", n); #endif - remove_jbd_proc_entry(); + jbd_remove_debugfs_entry(); journal_destroy_caches(); } diff --git a/fs/jbd/recovery.c b/fs/jbd/recovery.c index 2a5f4b833e3..c5d9694b6a2 100644 --- a/fs/jbd/recovery.c +++ b/fs/jbd/recovery.c @@ -250,10 +250,10 @@ int journal_recover(journal_t *journal) if (!err) err = do_one_pass(journal, &info, PASS_REPLAY); - jbd_debug(0, "JBD: recovery, exit status %d, " + jbd_debug(1, "JBD: recovery, exit status %d, " "recovered transactions %u to %u\n", err, info.start_transaction, info.end_transaction); - jbd_debug(0, "JBD: Replayed %d and revoked %d/%d blocks\n", + jbd_debug(1, "JBD: Replayed %d and revoked %d/%d blocks\n", info.nr_replays, info.nr_revoke_hits, info.nr_revokes); /* Restart the log at the next transaction ID, thus invalidating @@ -297,7 +297,7 @@ int journal_skip_recovery(journal_t *journal) #ifdef CONFIG_JBD_DEBUG int dropped = info.end_transaction - be32_to_cpu(sb->s_sequence); #endif - jbd_debug(0, + jbd_debug(1, "JBD: ignoring %d transaction%s from the journal.\n", dropped, (dropped == 1) ? "" : "s"); journal->j_transaction_sequence = ++info.end_transaction; diff --git a/fs/jbd/transaction.c b/fs/jbd/transaction.c index 9841b1e5af0..08ff6c7028c 100644 --- a/fs/jbd/transaction.c +++ b/fs/jbd/transaction.c @@ -96,13 +96,12 @@ static int start_this_handle(journal_t *journal, handle_t *handle) alloc_transaction: if (!journal->j_running_transaction) { - new_transaction = kmalloc(sizeof(*new_transaction), + new_transaction = kzalloc(sizeof(*new_transaction), GFP_NOFS|__GFP_NOFAIL); if (!new_transaction) { ret = -ENOMEM; goto out; } - memset(new_transaction, 0, sizeof(*new_transaction)); } jbd_debug(3, "New handle %p going live.\n", handle); diff --git a/fs/jffs2/debug.h b/fs/jffs2/debug.h index 2a49f2c51a9..4130adabd76 100644 --- a/fs/jffs2/debug.h +++ b/fs/jffs2/debug.h @@ -80,28 +80,28 @@ #define JFFS2_ERROR(fmt, ...) \ do { \ printk(JFFS2_ERR_MSG_PREFIX \ - " (%d) %s: " fmt, current->pid, \ + " (%d) %s: " fmt, task_pid_nr(current), \ __FUNCTION__ , ##__VA_ARGS__); \ } while(0) #define JFFS2_WARNING(fmt, ...) \ do { \ printk(JFFS2_WARN_MSG_PREFIX \ - " (%d) %s: " fmt, current->pid, \ + " (%d) %s: " fmt, task_pid_nr(current), \ __FUNCTION__ , ##__VA_ARGS__); \ } while(0) #define JFFS2_NOTICE(fmt, ...) \ do { \ printk(JFFS2_NOTICE_MSG_PREFIX \ - " (%d) %s: " fmt, current->pid, \ + " (%d) %s: " fmt, task_pid_nr(current), \ __FUNCTION__ , ##__VA_ARGS__); \ } while(0) #define JFFS2_DEBUG(fmt, ...) \ do { \ printk(JFFS2_DBG_MSG_PREFIX \ - " (%d) %s: " fmt, current->pid, \ + " (%d) %s: " fmt, task_pid_nr(current), \ __FUNCTION__ , ##__VA_ARGS__); \ } while(0) diff --git a/fs/namespace.c b/fs/namespace.c index 07daa797259..860752998fb 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1411,7 +1411,7 @@ long do_mount(char *dev_name, char *dir_name, char *type_page, mnt_flags |= MNT_RELATIME; flags &= ~(MS_NOSUID | MS_NOEXEC | MS_NODEV | MS_ACTIVE | - MS_NOATIME | MS_NODIRATIME | MS_RELATIME); + MS_NOATIME | MS_NODIRATIME | MS_RELATIME| MS_KERNMOUNT); /* ... and get the mountpoint */ retval = path_lookup(dir_name, LOOKUP_FOLLOW, &nd); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index 46934c97f8f..d0199189924 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -1029,13 +1029,13 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file, if (EX_WGATHER(exp)) { if (atomic_read(&inode->i_writecount) > 1 || (last_ino == inode->i_ino && last_dev == inode->i_sb->s_dev)) { - dprintk("nfsd: write defer %d\n", current->pid); + dprintk("nfsd: write defer %d\n", task_pid_nr(current)); msleep(10); - dprintk("nfsd: write resume %d\n", current->pid); + dprintk("nfsd: write resume %d\n", task_pid_nr(current)); } if (inode->i_state & I_DIRTY) { - dprintk("nfsd: write sync %d\n", current->pid); + dprintk("nfsd: write sync %d\n", task_pid_nr(current)); host_err=nfsd_sync(file); } #if 0 diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c index f14b541fab9..9cc7c0418b7 100644 --- a/fs/ocfs2/cluster/heartbeat.c +++ b/fs/ocfs2/cluster/heartbeat.c @@ -1372,7 +1372,7 @@ static ssize_t o2hb_region_pid_read(struct o2hb_region *reg, spin_lock(&o2hb_live_lock); if (reg->hr_task) - pid = reg->hr_task->pid; + pid = task_pid_nr(reg->hr_task); spin_unlock(&o2hb_live_lock); if (!pid) diff --git a/fs/ocfs2/cluster/masklog.h b/fs/ocfs2/cluster/masklog.h index 75cd877f6d4..cd046060114 100644 --- a/fs/ocfs2/cluster/masklog.h +++ b/fs/ocfs2/cluster/masklog.h @@ -192,7 +192,7 @@ extern struct mlog_bits mlog_and_bits, mlog_not_bits; * previous token if args expands to nothing. */ #define __mlog_printk(level, fmt, args...) \ - printk(level "(%u,%lu):%s:%d " fmt, current->pid, \ + printk(level "(%u,%lu):%s:%d " fmt, task_pid_nr(current), \ __mlog_cpu_guess, __PRETTY_FUNCTION__, __LINE__ , \ ##args) diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index a2c33160bfd..2fde7bf9143 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -259,7 +259,7 @@ static void dlm_print_reco_node_status(struct dlm_ctxt *dlm) struct dlm_lock_resource *res; mlog(ML_NOTICE, "%s(%d): recovery info, state=%s, dead=%u, master=%u\n", - dlm->name, dlm->dlm_reco_thread_task->pid, + dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.state & DLM_RECO_STATE_ACTIVE ? "ACTIVE" : "inactive", dlm->reco.dead_node, dlm->reco.new_master); @@ -420,7 +420,7 @@ void dlm_wait_for_recovery(struct dlm_ctxt *dlm) if (dlm_in_recovery(dlm)) { mlog(0, "%s: reco thread %d in recovery: " "state=%d, master=%u, dead=%u\n", - dlm->name, dlm->dlm_reco_thread_task->pid, + dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.state, dlm->reco.new_master, dlm->reco.dead_node); } @@ -483,7 +483,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) return 0; } mlog(0, "%s(%d):recovery thread found node %u in the recovery map!\n", - dlm->name, dlm->dlm_reco_thread_task->pid, + dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.dead_node); spin_unlock(&dlm->spinlock); @@ -507,7 +507,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) mlog(0, "another node will master this recovery session.\n"); } mlog(0, "dlm=%s (%d), new_master=%u, this node=%u, dead_node=%u\n", - dlm->name, dlm->dlm_reco_thread_task->pid, dlm->reco.new_master, + dlm->name, task_pid_nr(dlm->dlm_reco_thread_task), dlm->reco.new_master, dlm->node_num, dlm->reco.dead_node); /* it is safe to start everything back up here @@ -520,7 +520,7 @@ static int dlm_do_recovery(struct dlm_ctxt *dlm) master_here: mlog(0, "(%d) mastering recovery of %s:%u here(this=%u)!\n", - dlm->dlm_reco_thread_task->pid, + task_pid_nr(dlm->dlm_reco_thread_task), dlm->name, dlm->reco.dead_node, dlm->node_num); status = dlm_remaster_locks(dlm, dlm->reco.dead_node); diff --git a/fs/proc/array.c b/fs/proc/array.c index 27b59f5f3bd..7a34571203b 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -77,6 +77,7 @@ #include <linux/cpuset.h> #include <linux/rcupdate.h> #include <linux/delayacct.h> +#include <linux/pid_namespace.h> #include <asm/pgtable.h> #include <asm/processor.h> @@ -145,8 +146,7 @@ static inline const char *get_task_state(struct task_struct *tsk) TASK_UNINTERRUPTIBLE | TASK_STOPPED | TASK_TRACED)) | - (tsk->exit_state & (EXIT_ZOMBIE | - EXIT_DEAD)); + tsk->exit_state; const char **p = &task_state_array[0]; while (state) { @@ -161,8 +161,15 @@ static inline char *task_state(struct task_struct *p, char *buffer) struct group_info *group_info; int g; struct fdtable *fdt = NULL; + struct pid_namespace *ns; + pid_t ppid, tpid; + ns = current->nsproxy->pid_ns; rcu_read_lock(); + ppid = pid_alive(p) ? + task_tgid_nr_ns(rcu_dereference(p->real_parent), ns) : 0; + tpid = pid_alive(p) && p->ptrace ? + task_ppid_nr_ns(rcu_dereference(p->parent), ns) : 0; buffer += sprintf(buffer, "State:\t%s\n" "Tgid:\t%d\n" @@ -172,9 +179,9 @@ static inline char *task_state(struct task_struct *p, char *buffer) "Uid:\t%d\t%d\t%d\t%d\n" "Gid:\t%d\t%d\t%d\t%d\n", get_task_state(p), - p->tgid, p->pid, - pid_alive(p) ? rcu_dereference(p->real_parent)->tgid : 0, - pid_alive(p) && p->ptrace ? rcu_dereference(p->parent)->pid : 0, + task_tgid_nr_ns(p, ns), + task_pid_nr_ns(p, ns), + ppid, tpid, p->uid, p->euid, p->suid, p->fsuid, p->gid, p->egid, p->sgid, p->fsgid); @@ -394,6 +401,9 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) unsigned long rsslim = 0; char tcomm[sizeof(task->comm)]; unsigned long flags; + struct pid_namespace *ns; + + ns = current->nsproxy->pid_ns; state = *get_task_state(task); vsize = eip = esp = 0; @@ -416,7 +426,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) struct signal_struct *sig = task->signal; if (sig->tty) { - tty_pgrp = pid_nr(sig->tty->pgrp); + tty_pgrp = pid_nr_ns(sig->tty->pgrp, ns); tty_nr = new_encode_dev(tty_devnum(sig->tty)); } @@ -449,9 +459,9 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) gtime += cputime_add(gtime, sig->gtime); } - sid = signal_session(sig); - pgid = process_group(task); - ppid = rcu_dereference(task->real_parent)->tgid; + sid = task_session_nr_ns(task, ns); + pgid = task_pgrp_nr_ns(task, ns); + ppid = task_ppid_nr_ns(task, ns); unlock_task_sighand(task, &flags); } @@ -483,7 +493,7 @@ static int do_task_stat(struct task_struct *task, char *buffer, int whole) res = sprintf(buffer, "%d (%s) %c %d %d %d %d %d %u %lu \ %lu %lu %lu %lu %lu %ld %ld %ld %ld %d 0 %llu %lu %ld %lu %lu %lu %lu %lu \ %lu %lu %lu %lu %lu %lu %lu %lu %d %d %u %u %llu %lu %ld\n", - task->pid, + task_pid_nr_ns(task, ns), tcomm, state, ppid, diff --git a/fs/proc/base.c b/fs/proc/base.c index 4fe74d15641..39a3d7c969c 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -63,16 +63,19 @@ #include <linux/mm.h> #include <linux/rcupdate.h> #include <linux/kallsyms.h> +#include <linux/resource.h> #include <linux/module.h> #include <linux/mount.h> #include <linux/security.h> #include <linux/ptrace.h> +#include <linux/cgroup.h> #include <linux/cpuset.h> #include <linux/audit.h> #include <linux/poll.h> #include <linux/nsproxy.h> #include <linux/oom.h> #include <linux/elf.h> +#include <linux/pid_namespace.h> #include "internal.h" /* NOTE: @@ -301,6 +304,78 @@ static int proc_oom_score(struct task_struct *task, char *buffer) return sprintf(buffer, "%lu\n", points); } +struct limit_names { + char *name; + char *unit; +}; + +static const struct limit_names lnames[RLIM_NLIMITS] = { + [RLIMIT_CPU] = {"Max cpu time", "ms"}, + [RLIMIT_FSIZE] = {"Max file size", "bytes"}, + [RLIMIT_DATA] = {"Max data size", "bytes"}, + [RLIMIT_STACK] = {"Max stack size", "bytes"}, + [RLIMIT_CORE] = {"Max core file size", "bytes"}, + [RLIMIT_RSS] = {"Max resident set", "bytes"}, + [RLIMIT_NPROC] = {"Max processes", "processes"}, + [RLIMIT_NOFILE] = {"Max open files", "files"}, + [RLIMIT_MEMLOCK] = {"Max locked memory", "bytes"}, + [RLIMIT_AS] = {"Max address space", "bytes"}, + [RLIMIT_LOCKS] = {"Max file locks", "locks"}, + [RLIMIT_SIGPENDING] = {"Max pending signals", "signals"}, + [RLIMIT_MSGQUEUE] = {"Max msgqueue size", "bytes"}, + [RLIMIT_NICE] = {"Max nice priority", NULL}, + [RLIMIT_RTPRIO] = {"Max realtime priority", NULL}, +}; + +/* Display limits for a process */ +static int proc_pid_limits(struct task_struct *task, char *buffer) +{ + unsigned int i; + int count = 0; + unsigned long flags; + char *bufptr = buffer; + + struct rlimit rlim[RLIM_NLIMITS]; + + rcu_read_lock(); + if (!lock_task_sighand(task,&flags)) { + rcu_read_unlock(); + return 0; + } + memcpy(rlim, task->signal->rlim, sizeof(struct rlimit) * RLIM_NLIMITS); + unlock_task_sighand(task, &flags); + rcu_read_unlock(); + + /* + * print the file header + */ + count += sprintf(&bufptr[count], "%-25s %-20s %-20s %-10s\n", + "Limit", "Soft Limit", "Hard Limit", "Units"); + + for (i = 0; i < RLIM_NLIMITS; i++) { + if (rlim[i].rlim_cur == RLIM_INFINITY) + count += sprintf(&bufptr[count], "%-25s %-20s ", + lnames[i].name, "unlimited"); + else + count += sprintf(&bufptr[count], "%-25s %-20lu ", + lnames[i].name, rlim[i].rlim_cur); + + if (rlim[i].rlim_max == RLIM_INFINITY) + count += sprintf(&bufptr[count], "%-20s ", "unlimited"); + else + count += sprintf(&bufptr[count], "%-20lu ", + rlim[i].rlim_max); + + if (lnames[i].unit) + count += sprintf(&bufptr[count], "%-10s\n", + lnames[i].unit); + else + count += sprintf(&bufptr[count], "\n"); + } + + return count; +} + /************************************************************************/ /* Here the fs part begins */ /************************************************************************/ @@ -349,18 +424,21 @@ struct proc_mounts { static int mounts_open(struct inode *inode, struct file *file) { struct task_struct *task = get_proc_task(inode); + struct nsproxy *nsp; struct mnt_namespace *ns = NULL; struct proc_mounts *p; int ret = -EINVAL; if (task) { - task_lock(task); - if (task->nsproxy) { - ns = task->nsproxy->mnt_ns; + rcu_read_lock(); + nsp = task_nsproxy(task); + if (nsp) { + ns = nsp->mnt_ns; if (ns) get_mnt_ns(ns); } - task_unlock(task); + rcu_read_unlock(); + put_task_struct(task); } @@ -423,16 +501,20 @@ static int mountstats_open(struct inode *inode, struct file *file) if (!ret) { struct seq_file *m = file->private_data; + struct nsproxy *nsp; struct mnt_namespace *mnt_ns = NULL; struct task_struct *task = get_proc_task(inode); if (task) { - task_lock(task); - if (task->nsproxy) - mnt_ns = task->nsproxy->mnt_ns; - if (mnt_ns) - get_mnt_ns(mnt_ns); - task_unlock(task); + rcu_read_lock(); + nsp = task_nsproxy(task); + if (nsp) { + mnt_ns = nsp->mnt_ns; + if (mnt_ns) + get_mnt_ns(mnt_ns); + } + rcu_read_unlock(); + put_task_struct(task); } @@ -1437,7 +1519,7 @@ static int proc_readfd_common(struct file * filp, void * dirent, struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; struct task_struct *p = get_proc_task(inode); - unsigned int fd, tid, ino; + unsigned int fd, ino; int retval; struct files_struct * files; struct fdtable *fdt; @@ -1446,7 +1528,6 @@ static int proc_readfd_common(struct file * filp, void * dirent, if (!p) goto out_no_task; retval = 0; - tid = p->pid; fd = filp->f_pos; switch (fd) { @@ -1681,7 +1762,6 @@ static int proc_pident_readdir(struct file *filp, const struct pid_entry *ents, unsigned int nents) { int i; - int pid; struct dentry *dentry = filp->f_path.dentry; struct inode *inode = dentry->d_inode; struct task_struct *task = get_proc_task(inode); @@ -1694,7 +1774,6 @@ static int proc_pident_readdir(struct file *filp, goto out_no_task; ret = 0; - pid = task->pid; i = filp->f_pos; switch (i) { case 0: @@ -1928,14 +2007,14 @@ static int proc_self_readlink(struct dentry *dentry, char __user *buffer, int buflen) { char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", task_tgid_vnr(current)); return vfs_readlink(dentry,buffer,buflen,tmp); } static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) { char tmp[PROC_NUMBUF]; - sprintf(tmp, "%d", current->tgid); + sprintf(tmp, "%d", task_tgid_vnr(current)); return ERR_PTR(vfs_follow_link(nd,tmp)); } @@ -2101,6 +2180,7 @@ static const struct pid_entry tgid_base_stuff[] = { REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), INF("status", S_IRUGO, pid_status), + INF("limits", S_IRUSR, pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif @@ -2130,9 +2210,12 @@ static const struct pid_entry tgid_base_stuff[] = { #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, pid_schedstat), #endif -#ifdef CONFIG_CPUSETS +#ifdef CONFIG_PROC_PID_CPUSET REG("cpuset", S_IRUGO, cpuset), #endif +#ifdef CONFIG_CGROUPS + REG("cgroup", S_IRUGO, cgroup), +#endif INF("oom_score", S_IRUGO, oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), #ifdef CONFIG_AUDITSYSCALL @@ -2193,27 +2276,27 @@ static const struct inode_operations proc_tgid_base_inode_operations = { * that no dcache entries will exist at process exit time it * just makes it very unlikely that any will persist. */ -void proc_flush_task(struct task_struct *task) +static void proc_flush_task_mnt(struct vfsmount *mnt, pid_t pid, pid_t tgid) { struct dentry *dentry, *leader, *dir; char buf[PROC_NUMBUF]; struct qstr name; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", task->pid); - dentry = d_hash_and_lookup(proc_mnt->mnt_root, &name); + name.len = snprintf(buf, sizeof(buf), "%d", pid); + dentry = d_hash_and_lookup(mnt->mnt_root, &name); if (dentry) { shrink_dcache_parent(dentry); d_drop(dentry); dput(dentry); } - if (thread_group_leader(task)) + if (tgid == 0) goto out; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", task->tgid); - leader = d_hash_and_lookup(proc_mnt->mnt_root, &name); + name.len = snprintf(buf, sizeof(buf), "%d", tgid); + leader = d_hash_and_lookup(mnt->mnt_root, &name); if (!leader) goto out; @@ -2224,7 +2307,7 @@ void proc_flush_task(struct task_struct *task) goto out_put_leader; name.name = buf; - name.len = snprintf(buf, sizeof(buf), "%d", task->pid); + name.len = snprintf(buf, sizeof(buf), "%d", pid); dentry = d_hash_and_lookup(dir, &name); if (dentry) { shrink_dcache_parent(dentry); @@ -2239,6 +2322,36 @@ out: return; } +/* + * when flushing dentries from proc one need to flush them from global + * proc (proc_mnt) and from all the namespaces' procs this task was seen + * in. this call is supposed to make all this job. + */ + +void proc_flush_task(struct task_struct *task) +{ + int i, leader; + struct pid *pid, *tgid; + struct upid *upid; + + leader = thread_group_leader(task); + proc_flush_task_mnt(proc_mnt, task->pid, leader ? task->tgid : 0); + pid = task_pid(task); + if (pid->level == 0) + return; + + tgid = task_tgid(task); + for (i = 1; i <= pid->level; i++) { + upid = &pid->numbers[i]; + proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, + leader ? 0 : tgid->numbers[i].nr); + } + + upid = &pid->numbers[pid->level]; + if (upid->nr == 1) + pid_ns_release_proc(upid->ns); +} + static struct dentry *proc_pid_instantiate(struct inode *dir, struct dentry * dentry, struct task_struct *task, const void *ptr) @@ -2274,6 +2387,7 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct struct dentry *result = ERR_PTR(-ENOENT); struct task_struct *task; unsigned tgid; + struct pid_namespace *ns; result = proc_base_lookup(dir, dentry); if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) @@ -2283,8 +2397,9 @@ struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, struct if (tgid == ~0U) goto out; + ns = dentry->d_sb->s_fs_info; rcu_read_lock(); - task = find_task_by_pid(tgid); + task = find_task_by_pid_ns(tgid, ns); if (task) get_task_struct(task); rcu_read_unlock(); @@ -2301,7 +2416,8 @@ out: * Find the first task with tgid >= tgid * */ -static struct task_struct *next_tgid(unsigned int tgid) +static struct task_struct *next_tgid(unsigned int tgid, + struct pid_namespace *ns) { struct task_struct *task; struct pid *pid; @@ -2309,9 +2425,9 @@ static struct task_struct *next_tgid(unsigned int tgid) rcu_read_lock(); retry: task = NULL; - pid = find_ge_pid(tgid); + pid = find_ge_pid(tgid, ns); if (pid) { - tgid = pid->nr + 1; + tgid = pid_nr_ns(pid, ns) + 1; task = pid_task(pid, PIDTYPE_PID); /* What we to know is if the pid we have find is the * pid of a thread_group_leader. Testing for task @@ -2351,6 +2467,7 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) struct task_struct *reaper = get_proc_task(filp->f_path.dentry->d_inode); struct task_struct *task; int tgid; + struct pid_namespace *ns; if (!reaper) goto out_no_task; @@ -2361,11 +2478,12 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) goto out; } + ns = filp->f_dentry->d_sb->s_fs_info; tgid = filp->f_pos - TGID_OFFSET; - for (task = next_tgid(tgid); + for (task = next_tgid(tgid, ns); task; - put_task_struct(task), task = next_tgid(tgid + 1)) { - tgid = task->pid; + put_task_struct(task), task = next_tgid(tgid + 1, ns)) { + tgid = task_pid_nr_ns(task, ns); filp->f_pos = tgid + TGID_OFFSET; if (proc_pid_fill_cache(filp, dirent, filldir, task, tgid) < 0) { put_task_struct(task); @@ -2388,6 +2506,7 @@ static const struct pid_entry tid_base_stuff[] = { REG("environ", S_IRUSR, environ), INF("auxv", S_IRUSR, pid_auxv), INF("status", S_IRUGO, pid_status), + INF("limits", S_IRUSR, pid_limits), #ifdef CONFIG_SCHED_DEBUG REG("sched", S_IRUGO|S_IWUSR, pid_sched), #endif @@ -2416,9 +2535,12 @@ static const struct pid_entry tid_base_stuff[] = { #ifdef CONFIG_SCHEDSTATS INF("schedstat", S_IRUGO, pid_schedstat), #endif -#ifdef CONFIG_CPUSETS +#ifdef CONFIG_PROC_PID_CPUSET REG("cpuset", S_IRUGO, cpuset), #endif +#ifdef CONFIG_CGROUPS + REG("cgroup", S_IRUGO, cgroup), +#endif INF("oom_score", S_IRUGO, oom_score), REG("oom_adj", S_IRUGO|S_IWUSR, oom_adjust), #ifdef CONFIG_AUDITSYSCALL @@ -2486,6 +2608,7 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry struct task_struct *task; struct task_struct *leader = get_proc_task(dir); unsigned tid; + struct pid_namespace *ns; if (!leader) goto out_no_task; @@ -2494,14 +2617,15 @@ static struct dentry *proc_task_lookup(struct inode *dir, struct dentry * dentry if (tid == ~0U) goto out; + ns = dentry->d_sb->s_fs_info; rcu_read_lock(); - task = find_task_by_pid(tid); + task = find_task_by_pid_ns(tid, ns); if (task) get_task_struct(task); rcu_read_unlock(); if (!task) goto out; - if (leader->tgid != task->tgid) + if (!same_thread_group(leader, task)) goto out_drop_task; result = proc_task_instantiate(dir, dentry, task, NULL); @@ -2526,14 +2650,14 @@ out_no_task: * threads past it. */ static struct task_struct *first_tid(struct task_struct *leader, - int tid, int nr) + int tid, int nr, struct pid_namespace *ns) { struct task_struct *pos; rcu_read_lock(); /* Attempt to start with the pid of a thread */ if (tid && (nr > 0)) { - pos = find_task_by_pid(tid); + pos = find_task_by_pid_ns(tid, ns); if (pos && (pos->group_leader == leader)) goto found; } @@ -2602,6 +2726,7 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi ino_t ino; int tid; unsigned long pos = filp->f_pos; /* avoiding "long long" filp->f_pos */ + struct pid_namespace *ns; task = get_proc_task(inode); if (!task) @@ -2635,12 +2760,13 @@ static int proc_task_readdir(struct file * filp, void * dirent, filldir_t filldi /* f_version caches the tgid value that the last readdir call couldn't * return. lseek aka telldir automagically resets f_version to 0. */ + ns = filp->f_dentry->d_sb->s_fs_info; tid = (int)filp->f_version; filp->f_version = 0; - for (task = first_tid(leader, tid, pos - 2); + for (task = first_tid(leader, tid, pos - 2, ns); task; task = next_tid(task), pos++) { - tid = task->pid; + tid = task_pid_nr_ns(task, ns); if (proc_task_fill_cache(filp, dirent, filldir, task, tid) < 0) { /* returning this tgid failed, save it as the first * pid for the next readir call */ diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 99ca00485fc..abe6a3f0436 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -448,7 +448,7 @@ out_mod: return NULL; } -int proc_fill_super(struct super_block *s, void *data, int silent) +int proc_fill_super(struct super_block *s) { struct inode * root_inode; diff --git a/fs/proc/proc_misc.c b/fs/proc/proc_misc.c index d6dc72c78bc..e0d064e9764 100644 --- a/fs/proc/proc_misc.c +++ b/fs/proc/proc_misc.c @@ -91,7 +91,8 @@ static int loadavg_read_proc(char *page, char **start, off_t off, LOAD_INT(a), LOAD_FRAC(a), LOAD_INT(b), LOAD_FRAC(b), LOAD_INT(c), LOAD_FRAC(c), - nr_running(), nr_threads, current->nsproxy->pid_ns->last_pid); + nr_running(), nr_threads, + task_active_pid_ns(current)->last_pid); return proc_calc_metrics(page, start, off, count, eof, len); } diff --git a/fs/proc/root.c b/fs/proc/root.c index cf3046638b0..ec9cb3b6c93 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -18,32 +18,90 @@ #include <linux/bitops.h> #include <linux/smp_lock.h> #include <linux/mount.h> +#include <linux/pid_namespace.h> #include "internal.h" struct proc_dir_entry *proc_bus, *proc_root_fs, *proc_root_driver; +static int proc_test_super(struct super_block *sb, void *data) +{ + return sb->s_fs_info == data; +} + +static int proc_set_super(struct super_block *sb, void *data) +{ + struct pid_namespace *ns; + + ns = (struct pid_namespace *)data; + sb->s_fs_info = get_pid_ns(ns); + return set_anon_super(sb, NULL); +} + static int proc_get_sb(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, struct vfsmount *mnt) { + int err; + struct super_block *sb; + struct pid_namespace *ns; + struct proc_inode *ei; + if (proc_mnt) { /* Seed the root directory with a pid so it doesn't need * to be special in base.c. I would do this earlier but * the only task alive when /proc is mounted the first time * is the init_task and it doesn't have any pids. */ - struct proc_inode *ei; ei = PROC_I(proc_mnt->mnt_sb->s_root->d_inode); if (!ei->pid) ei->pid = find_get_pid(1); } - return get_sb_single(fs_type, flags, data, proc_fill_super, mnt); + + if (flags & MS_KERNMOUNT) + ns = (struct pid_namespace *)data; + else + ns = current->nsproxy->pid_ns; + + sb = sget(fs_type, proc_test_super, proc_set_super, ns); + if (IS_ERR(sb)) + return PTR_ERR(sb); + + if (!sb->s_root) { + sb->s_flags = flags; + err = proc_fill_super(sb); + if (err) { + up_write(&sb->s_umount); + deactivate_super(sb); + return err; + } + + ei = PROC_I(sb->s_root->d_inode); + if (!ei->pid) { + rcu_read_lock(); + ei->pid = get_pid(find_pid_ns(1, ns)); + rcu_read_unlock(); + } + + sb->s_flags |= MS_ACTIVE; + ns->proc_mnt = mnt; + } + + return simple_set_mnt(mnt, sb); +} + +static void proc_kill_sb(struct super_block *sb) +{ + struct pid_namespace *ns; + + ns = (struct pid_namespace *)sb->s_fs_info; + kill_anon_super(sb); + put_pid_ns(ns); } static struct file_system_type proc_fs_type = { .name = "proc", .get_sb = proc_get_sb, - .kill_sb = kill_anon_super, + .kill_sb = proc_kill_sb, }; void __init proc_root_init(void) @@ -54,12 +112,13 @@ void __init proc_root_init(void) err = register_filesystem(&proc_fs_type); if (err) return; - proc_mnt = kern_mount(&proc_fs_type); + proc_mnt = kern_mount_data(&proc_fs_type, &init_pid_ns); err = PTR_ERR(proc_mnt); if (IS_ERR(proc_mnt)) { unregister_filesystem(&proc_fs_type); return; } + proc_misc_init(); proc_net_init(); @@ -153,6 +212,22 @@ struct proc_dir_entry proc_root = { .parent = &proc_root, }; +int pid_ns_prepare_proc(struct pid_namespace *ns) +{ + struct vfsmount *mnt; + + mnt = kern_mount_data(&proc_fs_type, ns); + if (IS_ERR(mnt)) + return PTR_ERR(mnt); + + return 0; +} + +void pid_ns_release_proc(struct pid_namespace *ns) +{ + mntput(ns->proc_mnt); +} + EXPORT_SYMBOL(proc_symlink); EXPORT_SYMBOL(proc_mkdir); EXPORT_SYMBOL(create_proc_entry); diff --git a/fs/reiserfs/bitmap.c b/fs/reiserfs/bitmap.c index 2a5dd34649b..16b331dd991 100644 --- a/fs/reiserfs/bitmap.c +++ b/fs/reiserfs/bitmap.c @@ -47,7 +47,9 @@ test_bit(_ALLOC_ ## optname , &SB_ALLOC_OPTS(s)) static inline void get_bit_address(struct super_block *s, - b_blocknr_t block, int *bmap_nr, int *offset) + b_blocknr_t block, + unsigned int *bmap_nr, + unsigned int *offset) { /* It is in the bitmap block number equal to the block * number divided by the number of bits in a block. */ @@ -56,10 +58,10 @@ static inline void get_bit_address(struct super_block *s, *offset = block & ((s->s_blocksize << 3) - 1); } -#ifdef CONFIG_REISERFS_CHECK int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) { - int bmap, offset; + unsigned int bmap, offset; + unsigned int bmap_count = reiserfs_bmap_count(s); if (block == 0 || block >= SB_BLOCK_COUNT(s)) { reiserfs_warning(s, @@ -75,25 +77,26 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) if (unlikely(test_bit(REISERFS_OLD_FORMAT, &(REISERFS_SB(s)->s_properties)))) { b_blocknr_t bmap1 = REISERFS_SB(s)->s_sbh->b_blocknr + 1; - if (block >= bmap1 && block <= bmap1 + SB_BMAP_NR(s)) { + if (block >= bmap1 && + block <= bmap1 + bmap_count) { reiserfs_warning(s, "vs: 4019: is_reusable: " "bitmap block %lu(%u) can't be freed or reused", - block, SB_BMAP_NR(s)); + block, bmap_count); return 0; } } else { if (offset == 0) { reiserfs_warning(s, "vs: 4020: is_reusable: " "bitmap block %lu(%u) can't be freed or reused", - block, SB_BMAP_NR(s)); + block, bmap_count); return 0; } } - if (bmap >= SB_BMAP_NR(s)) { + if (bmap >= bmap_count) { reiserfs_warning(s, "vs-4030: is_reusable: there is no so many bitmap blocks: " - "block=%lu, bitmap_nr=%d", block, bmap); + "block=%lu, bitmap_nr=%u", block, bmap); return 0; } @@ -106,12 +109,11 @@ int is_reusable(struct super_block *s, b_blocknr_t block, int bit_value) return 1; } -#endif /* CONFIG_REISERFS_CHECK */ /* searches in journal structures for a given block number (bmap, off). If block is found in reiserfs journal it suggests next free block candidate to test. */ -static inline int is_block_in_journal(struct super_block *s, int bmap, int - off, int *next) +static inline int is_block_in_journal(struct super_block *s, unsigned int bmap, + int off, int *next) { b_blocknr_t tmp; @@ -132,8 +134,8 @@ static inline int is_block_in_journal(struct super_block *s, int bmap, int /* it searches for a window of zero bits with given minimum and maximum lengths in one bitmap * block; */ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, - int bmap_n, int *beg, int boundary, int min, - int max, int unfm) + unsigned int bmap_n, int *beg, int boundary, + int min, int max, int unfm) { struct super_block *s = th->t_super; struct reiserfs_bitmap_info *bi = &SB_AP_BITMAP(s)[bmap_n]; @@ -143,8 +145,8 @@ static int scan_bitmap_block(struct reiserfs_transaction_handle *th, BUG_ON(!th->t_trans_id); - RFALSE(bmap_n >= SB_BMAP_NR(s), "Bitmap %d is out of range (0..%d)", - bmap_n, SB_BMAP_NR(s) - 1); + RFALSE(bmap_n >= reiserfs_bmap_count(s), "Bitmap %u is out of " + "range (0..%u)", bmap_n, reiserfs_bmap_count(s) - 1); PROC_INFO_INC(s, scan_bitmap.bmap); /* this is unclear and lacks comments, explain how journal bitmaps work here for the reader. Convey a sense of the design here. What @@ -249,12 +251,12 @@ static int bmap_hash_id(struct super_block *s, u32 id) } else { hash_in = (char *)(&id); hash = keyed_hash(hash_in, 4); - bm = hash % SB_BMAP_NR(s); + bm = hash % reiserfs_bmap_count(s); if (!bm) bm = 1; } /* this can only be true when SB_BMAP_NR = 1 */ - if (bm >= SB_BMAP_NR(s)) + if (bm >= reiserfs_bmap_count(s)) bm = 0; return bm; } @@ -273,7 +275,7 @@ static inline int block_group_used(struct super_block *s, u32 id) * to make a better decision. This favors long-term performace gain * with a better on-disk layout vs. a short term gain of skipping the * read and potentially having a bad placement. */ - if (info->first_zero_hint == 0) { + if (info->free_count == UINT_MAX) { struct buffer_head *bh = reiserfs_read_bitmap_block(s, bm); brelse(bh); } @@ -309,16 +311,16 @@ __le32 reiserfs_choose_packing(struct inode * dir) * bitmap and place new blocks there. Returns number of allocated blocks. */ static int scan_bitmap(struct reiserfs_transaction_handle *th, b_blocknr_t * start, b_blocknr_t finish, - int min, int max, int unfm, unsigned long file_block) + int min, int max, int unfm, sector_t file_block) { int nr_allocated = 0; struct super_block *s = th->t_super; /* find every bm and bmap and bmap_nr in this file, and change them all to bitmap_blocknr * - Hans, it is not a block number - Zam. */ - int bm, off; - int end_bm, end_off; - int off_max = s->s_blocksize << 3; + unsigned int bm, off; + unsigned int end_bm, end_off; + unsigned int off_max = s->s_blocksize << 3; BUG_ON(!th->t_trans_id); @@ -328,10 +330,10 @@ static int scan_bitmap(struct reiserfs_transaction_handle *th, get_bit_address(s, *start, &bm, &off); get_bit_address(s, finish, &end_bm, &end_off); - if (bm > SB_BMAP_NR(s)) + if (bm > reiserfs_bmap_count(s)) return 0; - if (end_bm > SB_BMAP_NR(s)) - end_bm = SB_BMAP_NR(s); + if (end_bm > reiserfs_bmap_count(s)) + end_bm = reiserfs_bmap_count(s); /* When the bitmap is more than 10% free, anyone can allocate. * When it's less than 10% free, only files that already use the @@ -385,7 +387,7 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th, struct reiserfs_super_block *rs; struct buffer_head *sbh, *bmbh; struct reiserfs_bitmap_info *apbi; - int nr, offset; + unsigned int nr, offset; BUG_ON(!th->t_trans_id); @@ -397,10 +399,12 @@ static void _reiserfs_free_block(struct reiserfs_transaction_handle *th, get_bit_address(s, block, &nr, &offset); - if (nr >= sb_bmap_nr(rs)) { + if (nr >= reiserfs_bmap_count(s)) { reiserfs_warning(s, "vs-4075: reiserfs_free_block: " - "block %lu is out of range on %s", - block, reiserfs_bdevname(s)); + "block %lu is out of range on %s " + "(nr=%u,max=%u)", block, + reiserfs_bdevname(s), nr, + reiserfs_bmap_count(s)); return; } @@ -434,12 +438,19 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th, int for_unformatted) { struct super_block *s = th->t_super; - BUG_ON(!th->t_trans_id); RFALSE(!s, "vs-4061: trying to free block on nonexistent device"); - RFALSE(is_reusable(s, block, 1) == 0, - "vs-4071: can not free such block"); + if (!is_reusable(s, block, 1)) + return; + + if (block > sb_block_count(REISERFS_SB(s)->s_rs)) { + reiserfs_panic(th->t_super, "bitmap-4072", + "Trying to free block outside file system " + "boundaries (%lu > %lu)", + block, sb_block_count(REISERFS_SB(s)->s_rs)); + return; + } /* mark it before we clear it, just in case */ journal_mark_freed(th, s, block); _reiserfs_free_block(th, inode, block, for_unformatted); @@ -449,11 +460,11 @@ void reiserfs_free_block(struct reiserfs_transaction_handle *th, static void reiserfs_free_prealloc_block(struct reiserfs_transaction_handle *th, struct inode *inode, b_blocknr_t block) { + BUG_ON(!th->t_trans_id); RFALSE(!th->t_super, "vs-4060: trying to free block on nonexistent device"); - RFALSE(is_reusable(th->t_super, block, 1) == 0, - "vs-4070: can not free such block"); - BUG_ON(!th->t_trans_id); + if (!is_reusable(th->t_super, block, 1)) + return; _reiserfs_free_block(th, inode, block, 1); } @@ -1207,27 +1218,22 @@ void reiserfs_cache_bitmap_metadata(struct super_block *sb, { unsigned long *cur = (unsigned long *)(bh->b_data + bh->b_size); - info->first_zero_hint = 1 << (sb->s_blocksize_bits + 3); + /* The first bit must ALWAYS be 1 */ + BUG_ON(!reiserfs_test_le_bit(0, (unsigned long *)bh->b_data)); + + info->free_count = 0; while (--cur >= (unsigned long *)bh->b_data) { - int base = ((char *)cur - bh->b_data) << 3; + int i; /* 0 and ~0 are special, we can optimize for them */ - if (*cur == 0) { - info->first_zero_hint = base; + if (*cur == 0) info->free_count += BITS_PER_LONG; - } else if (*cur != ~0L) { /* A mix, investigate */ - int b; - for (b = BITS_PER_LONG - 1; b >= 0; b--) { - if (!reiserfs_test_le_bit(b, cur)) { - info->first_zero_hint = base + b; + else if (*cur != ~0L) /* A mix, investigate */ + for (i = BITS_PER_LONG - 1; i >= 0; i--) + if (!reiserfs_test_le_bit(i, cur)) info->free_count++; - } - } - } } - /* The first bit must ALWAYS be 1 */ - BUG_ON(info->first_zero_hint == 0); } struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, @@ -1257,7 +1263,7 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, BUG_ON(!buffer_uptodate(bh)); BUG_ON(atomic_read(&bh->b_count) == 0); - if (info->first_zero_hint == 0) + if (info->free_count == UINT_MAX) reiserfs_cache_bitmap_metadata(sb, bh, info); } @@ -1267,12 +1273,13 @@ struct buffer_head *reiserfs_read_bitmap_block(struct super_block *sb, int reiserfs_init_bitmap_cache(struct super_block *sb) { struct reiserfs_bitmap_info *bitmap; + unsigned int bmap_nr = reiserfs_bmap_count(sb); - bitmap = vmalloc(sizeof (*bitmap) * SB_BMAP_NR(sb)); + bitmap = vmalloc(sizeof(*bitmap) * bmap_nr); if (bitmap == NULL) return -ENOMEM; - memset(bitmap, 0, sizeof (*bitmap) * SB_BMAP_NR(sb)); + memset(bitmap, 0xff, sizeof(*bitmap) * bmap_nr); SB_AP_BITMAP(sb) = bitmap; diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 0804289d355..a991af96f3f 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -199,7 +199,7 @@ static inline void set_block_dev_mapped(struct buffer_head *bh, // files which were created in the earlier version can not be longer, // than 2 gb // -static int file_capable(struct inode *inode, long block) +static int file_capable(struct inode *inode, sector_t block) { if (get_inode_item_key_version(inode) != KEY_FORMAT_3_5 || // it is new file. block < (1 << (31 - inode->i_sb->s_blocksize_bits))) // old file, but 'block' is inside of 2gb @@ -242,7 +242,7 @@ static int restart_transaction(struct reiserfs_transaction_handle *th, // Please improve the english/clarity in the comment above, as it is // hard to understand. -static int _get_block_create_0(struct inode *inode, long block, +static int _get_block_create_0(struct inode *inode, sector_t block, struct buffer_head *bh_result, int args) { INITIALIZE_PATH(path); @@ -250,7 +250,7 @@ static int _get_block_create_0(struct inode *inode, long block, struct buffer_head *bh; struct item_head *ih, tmp_ih; int fs_gen; - int blocknr; + b_blocknr_t blocknr; char *p = NULL; int chars; int ret; @@ -569,7 +569,7 @@ static int convert_tail_for_hole(struct inode *inode, } static inline int _allocate_block(struct reiserfs_transaction_handle *th, - long block, + sector_t block, struct inode *inode, b_blocknr_t * allocated_block_nr, struct treepath *path, int flags) diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index 4cad9e75ef5..bb05a3e51b9 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -219,11 +219,12 @@ static void allocate_bitmap_nodes(struct super_block *p_s_sb) } } -static int set_bit_in_list_bitmap(struct super_block *p_s_sb, int block, +static int set_bit_in_list_bitmap(struct super_block *p_s_sb, + b_blocknr_t block, struct reiserfs_list_bitmap *jb) { - int bmap_nr = block / (p_s_sb->s_blocksize << 3); - int bit_nr = block % (p_s_sb->s_blocksize << 3); + unsigned int bmap_nr = block / (p_s_sb->s_blocksize << 3); + unsigned int bit_nr = block % (p_s_sb->s_blocksize << 3); if (!jb->bitmaps[bmap_nr]) { jb->bitmaps[bmap_nr] = get_bitmap_node(p_s_sb); @@ -239,7 +240,7 @@ static void cleanup_bitmap_list(struct super_block *p_s_sb, if (jb->bitmaps == NULL) return; - for (i = 0; i < SB_BMAP_NR(p_s_sb); i++) { + for (i = 0; i < reiserfs_bmap_count(p_s_sb); i++) { if (jb->bitmaps[i]) { free_bitmap_node(p_s_sb, jb->bitmaps[i]); jb->bitmaps[i] = NULL; @@ -289,7 +290,7 @@ static int free_bitmap_nodes(struct super_block *p_s_sb) */ int reiserfs_allocate_list_bitmaps(struct super_block *p_s_sb, struct reiserfs_list_bitmap *jb_array, - int bmap_nr) + unsigned int bmap_nr) { int i; int failed = 0; @@ -483,7 +484,7 @@ static inline struct reiserfs_journal_cnode *get_journal_hash_dev(struct ** */ int reiserfs_in_journal(struct super_block *p_s_sb, - int bmap_nr, int bit_nr, int search_all, + unsigned int bmap_nr, int bit_nr, int search_all, b_blocknr_t * next_zero_bit) { struct reiserfs_journal *journal = SB_JOURNAL(p_s_sb); @@ -1013,7 +1014,7 @@ static int flush_commit_list(struct super_block *s, struct reiserfs_journal_list *jl, int flushall) { int i; - int bn; + b_blocknr_t bn; struct buffer_head *tbh = NULL; unsigned long trans_id = jl->j_trans_id; struct reiserfs_journal *journal = SB_JOURNAL(s); @@ -2307,8 +2308,9 @@ static int journal_read_transaction(struct super_block *p_s_sb, Right now it is only used from journal code. But later we might use it from other places. Note: Do not use journal_getblk/sb_getblk functions here! */ -static struct buffer_head *reiserfs_breada(struct block_device *dev, int block, - int bufsize, unsigned int max_block) +static struct buffer_head *reiserfs_breada(struct block_device *dev, + b_blocknr_t block, int bufsize, + b_blocknr_t max_block) { struct buffer_head *bhlist[BUFNR]; unsigned int blocks = BUFNR; @@ -2732,7 +2734,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name, journal->j_persistent_trans = 0; if (reiserfs_allocate_list_bitmaps(p_s_sb, journal->j_list_bitmap, - SB_BMAP_NR(p_s_sb))) + reiserfs_bmap_count(p_s_sb))) goto free_and_return; allocate_bitmap_nodes(p_s_sb); @@ -2740,7 +2742,7 @@ int journal_init(struct super_block *p_s_sb, const char *j_dev_name, SB_JOURNAL_1st_RESERVED_BLOCK(p_s_sb) = (old_format ? REISERFS_OLD_DISK_OFFSET_IN_BYTES / p_s_sb->s_blocksize + - SB_BMAP_NR(p_s_sb) + + reiserfs_bmap_count(p_s_sb) + 1 : REISERFS_DISK_OFFSET_IN_BYTES / p_s_sb->s_blocksize + 2); diff --git a/fs/reiserfs/prints.c b/fs/reiserfs/prints.c index bc808a91eea..5e7388b32d0 100644 --- a/fs/reiserfs/prints.c +++ b/fs/reiserfs/prints.c @@ -356,13 +356,11 @@ extern struct tree_balance *cur_tb; void reiserfs_panic(struct super_block *sb, const char *fmt, ...) { do_reiserfs_warning(fmt); - printk(KERN_EMERG "REISERFS: panic (device %s): %s\n", - reiserfs_bdevname(sb), error_buf); - BUG(); - /* this is not actually called, but makes reiserfs_panic() "noreturn" */ - panic("REISERFS: panic (device %s): %s\n", - reiserfs_bdevname(sb), error_buf); + dump_stack(); + + panic(KERN_EMERG "REISERFS: panic (device %s): %s\n", + reiserfs_bdevname(sb), error_buf); } void reiserfs_abort(struct super_block *sb, int errno, const char *fmt, ...) diff --git a/fs/reiserfs/resize.c b/fs/reiserfs/resize.c index 976cc7887a0..f71c3948ede 100644 --- a/fs/reiserfs/resize.c +++ b/fs/reiserfs/resize.c @@ -61,7 +61,8 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) } /* count used bits in last bitmap block */ - block_r = SB_BLOCK_COUNT(s) - (SB_BMAP_NR(s) - 1) * s->s_blocksize * 8; + block_r = SB_BLOCK_COUNT(s) - + (reiserfs_bmap_count(s) - 1) * s->s_blocksize * 8; /* count bitmap blocks in new fs */ bmap_nr_new = block_count_new / (s->s_blocksize * 8); @@ -73,7 +74,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) /* save old values */ block_count = SB_BLOCK_COUNT(s); - bmap_nr = SB_BMAP_NR(s); + bmap_nr = reiserfs_bmap_count(s); /* resizing of reiserfs bitmaps (journal and real), if needed */ if (bmap_nr_new > bmap_nr) { @@ -119,7 +120,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) return -ENOMEM; } memset(bitmap, 0, - sizeof(struct reiserfs_bitmap_info) * SB_BMAP_NR(s)); + sizeof(struct reiserfs_bitmap_info) * bmap_nr_new); for (i = 0; i < bmap_nr; i++) bitmap[i] = old_bitmap[i]; @@ -143,7 +144,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) mark_buffer_dirty(bh); sync_dirty_buffer(bh); // update bitmap_info stuff - bitmap[i].first_zero_hint = 1; bitmap[i].free_count = sb_blocksize(sb) * 8 - 1; brelse(bh); } @@ -173,8 +173,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) for (i = block_r; i < s->s_blocksize * 8; i++) reiserfs_test_and_clear_le_bit(i, bh->b_data); info->free_count += s->s_blocksize * 8 - block_r; - if (!info->first_zero_hint) - info->first_zero_hint = block_r; journal_mark_dirty(&th, s, bh); brelse(bh); @@ -196,9 +194,6 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) brelse(bh); info->free_count -= s->s_blocksize * 8 - block_r_new; - /* Extreme case where last bitmap is the only valid block in itself. */ - if (!info->free_count) - info->first_zero_hint = 0; /* update super */ reiserfs_prepare_for_journal(s, SB_BUFFER_WITH_SB(s), 1); free_blocks = SB_FREE_BLOCKS(s); @@ -206,7 +201,7 @@ int reiserfs_resize(struct super_block *s, unsigned long block_count_new) free_blocks + (block_count_new - block_count - (bmap_nr_new - bmap_nr))); PUT_SB_BLOCK_COUNT(s, block_count_new); - PUT_SB_BMAP_NR(s, bmap_nr_new); + PUT_SB_BMAP_NR(s, bmap_would_wrap(bmap_nr_new) ? : bmap_nr_new); s->s_dirt = 1; journal_mark_dirty(&th, s, SB_BUFFER_WITH_SB(s)); diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index 981027d1187..ca41567d789 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -559,7 +559,7 @@ static int is_tree_node(struct buffer_head *bh, int level) /* The function is NOT SCHEDULE-SAFE! */ static void search_by_key_reada(struct super_block *s, struct buffer_head **bh, - unsigned long *b, int num) + b_blocknr_t *b, int num) { int i, j; @@ -611,7 +611,7 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /* DISK_LEAF_NODE_LEVEL */ ) { - int n_block_number; + b_blocknr_t n_block_number; int expected_level; struct buffer_head *p_s_bh; struct path_element *p_s_last_element; @@ -619,7 +619,7 @@ int search_by_key(struct super_block *p_s_sb, const struct cpu_key *p_s_key, /* int right_neighbor_of_leaf_node; int fs_gen; struct buffer_head *reada_bh[SEARCH_BY_KEY_READA]; - unsigned long reada_blocks[SEARCH_BY_KEY_READA]; + b_blocknr_t reada_blocks[SEARCH_BY_KEY_READA]; int reada_count = 0; #ifdef CONFIG_REISERFS_CHECK diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index b82897ae090..57adfe90d5a 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1725,6 +1725,21 @@ static int reiserfs_fill_super(struct super_block *s, void *data, int silent) set_sb_umount_state(rs, REISERFS_ERROR_FS); set_sb_fs_state(rs, 0); + /* Clear out s_bmap_nr if it would wrap. We can handle this + * case, but older revisions can't. This will cause the + * file system to fail mount on those older implementations, + * avoiding corruption. -jeffm */ + if (bmap_would_wrap(reiserfs_bmap_count(s)) && + sb_bmap_nr(rs) != 0) { + reiserfs_warning(s, "super-2030: This file system " + "claims to use %u bitmap blocks in " + "its super block, but requires %u. " + "Clearing to zero.", sb_bmap_nr(rs), + reiserfs_bmap_count(s)); + + set_sb_bmap_nr(rs, 0); + } + if (old_format_only(s)) { /* filesystem of format 3.5 either with standard or non-standard journal */ diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c index fab4b9b2664..1597f6b649e 100644 --- a/fs/reiserfs/xattr.c +++ b/fs/reiserfs/xattr.c @@ -484,7 +484,7 @@ reiserfs_xattr_set(struct inode *inode, const char *name, const void *buffer, /* Resize it so we're ok to write there */ newattrs.ia_size = buffer_size; newattrs.ia_valid = ATTR_SIZE | ATTR_CTIME; - mutex_lock(&xinode->i_mutex); + mutex_lock_nested(&xinode->i_mutex, I_MUTEX_XATTR); err = notify_change(fp->f_path.dentry, &newattrs); if (err) goto out_filp; @@ -1223,7 +1223,8 @@ int reiserfs_xattr_init(struct super_block *s, int mount_flags) if (!IS_ERR(dentry)) { if (!(mount_flags & MS_RDONLY) && !dentry->d_inode) { struct inode *inode = dentry->d_parent->d_inode; - mutex_lock(&inode->i_mutex); + mutex_lock_nested(&inode->i_mutex, + I_MUTEX_XATTR); err = inode->i_op->mkdir(inode, dentry, 0700); mutex_unlock(&inode->i_mutex); if (err) { diff --git a/fs/select.c b/fs/select.c index 7dede89658f..47f47925aea 100644 --- a/fs/select.c +++ b/fs/select.c @@ -177,11 +177,6 @@ get_max: return max; } -#define BIT(i) (1UL << ((i)&(__NFDBITS-1))) -#define MEM(i,m) ((m)+(unsigned)(i)/__NFDBITS) -#define ISSET(i,m) (((i)&*(m)) != 0) -#define SET(i,m) (*(m) |= (i)) - #define POLLIN_SET (POLLRDNORM | POLLRDBAND | POLLIN | POLLHUP | POLLERR) #define POLLOUT_SET (POLLWRBAND | POLLWRNORM | POLLOUT | POLLERR) #define POLLEX_SET (POLLPRI) diff --git a/fs/super.c b/fs/super.c index 1bfcca2104b..d28fde7e1cf 100644 --- a/fs/super.c +++ b/fs/super.c @@ -40,10 +40,6 @@ #include <asm/uaccess.h> -void get_filesystem(struct file_system_type *fs); -void put_filesystem(struct file_system_type *fs); -struct file_system_type *get_fs_type(const char *name); - LIST_HEAD(super_blocks); DEFINE_SPINLOCK(sb_lock); @@ -336,21 +332,21 @@ struct super_block *sget(struct file_system_type *type, void *data) { struct super_block *s = NULL; - struct list_head *p; + struct super_block *old; int err; retry: spin_lock(&sb_lock); - if (test) list_for_each(p, &type->fs_supers) { - struct super_block *old; - old = list_entry(p, struct super_block, s_instances); - if (!test(old, data)) - continue; - if (!grab_super(old)) - goto retry; - if (s) - destroy_super(s); - return old; + if (test) { + list_for_each_entry(old, &type->fs_supers, s_instances) { + if (!test(old, data)) + continue; + if (!grab_super(old)) + goto retry; + if (s) + destroy_super(s); + return old; + } } if (!s) { spin_unlock(&sb_lock); @@ -948,9 +944,9 @@ do_kern_mount(const char *fstype, int flags, const char *name, void *data) return mnt; } -struct vfsmount *kern_mount(struct file_system_type *type) +struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) { - return vfs_kern_mount(type, 0, type->name, NULL); + return vfs_kern_mount(type, MS_KERNMOUNT, type->name, data); } -EXPORT_SYMBOL(kern_mount); +EXPORT_SYMBOL_GPL(kern_mount_data); |