diff options
Diffstat (limited to 'fs')
-rw-r--r-- | fs/attr.c | 11 | ||||
-rw-r--r-- | fs/autofs4/autofs_i.h | 8 | ||||
-rw-r--r-- | fs/autofs4/dev-ioctl.c | 4 | ||||
-rw-r--r-- | fs/autofs4/inode.c | 24 | ||||
-rw-r--r-- | fs/autofs4/waitq.c | 5 | ||||
-rw-r--r-- | fs/exec.c | 9 | ||||
-rw-r--r-- | fs/fuse/dev.c | 4 | ||||
-rw-r--r-- | fs/fuse/dir.c | 20 | ||||
-rw-r--r-- | fs/fuse/fuse_i.h | 4 | ||||
-rw-r--r-- | fs/fuse/inode.c | 23 | ||||
-rw-r--r-- | fs/hppfs/hppfs.c | 2 | ||||
-rw-r--r-- | fs/mount.h | 3 | ||||
-rw-r--r-- | fs/namespace.c | 211 | ||||
-rw-r--r-- | fs/open.c | 2 | ||||
-rw-r--r-- | fs/pnode.h | 1 | ||||
-rw-r--r-- | fs/proc/Makefile | 1 | ||||
-rw-r--r-- | fs/proc/array.c | 2 | ||||
-rw-r--r-- | fs/proc/base.c | 169 | ||||
-rw-r--r-- | fs/proc/generic.c | 26 | ||||
-rw-r--r-- | fs/proc/inode.c | 6 | ||||
-rw-r--r-- | fs/proc/internal.h | 1 | ||||
-rw-r--r-- | fs/proc/namespaces.c | 185 | ||||
-rw-r--r-- | fs/proc/root.c | 17 | ||||
-rw-r--r-- | fs/proc/self.c | 59 | ||||
-rw-r--r-- | fs/sysfs/mount.c | 1 |
25 files changed, 493 insertions, 305 deletions
diff --git a/fs/attr.c b/fs/attr.c index cce7df53b69..1449adb14ef 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -49,14 +49,15 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) /* Make sure a caller can chown. */ if ((ia_valid & ATTR_UID) && (!uid_eq(current_fsuid(), inode->i_uid) || - !uid_eq(attr->ia_uid, inode->i_uid)) && !capable(CAP_CHOWN)) + !uid_eq(attr->ia_uid, inode->i_uid)) && + !inode_capable(inode, CAP_CHOWN)) return -EPERM; /* Make sure caller can chgrp. */ if ((ia_valid & ATTR_GID) && (!uid_eq(current_fsuid(), inode->i_uid) || (!in_group_p(attr->ia_gid) && !gid_eq(attr->ia_gid, inode->i_gid))) && - !capable(CAP_CHOWN)) + !inode_capable(inode, CAP_CHOWN)) return -EPERM; /* Make sure a caller can chmod. */ @@ -65,7 +66,8 @@ int inode_change_ok(const struct inode *inode, struct iattr *attr) return -EPERM; /* Also check the setgid bit! */ if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid : - inode->i_gid) && !capable(CAP_FSETID)) + inode->i_gid) && + !inode_capable(inode, CAP_FSETID)) attr->ia_mode &= ~S_ISGID; } @@ -157,7 +159,8 @@ void setattr_copy(struct inode *inode, const struct iattr *attr) if (ia_valid & ATTR_MODE) { umode_t mode = attr->ia_mode; - if (!in_group_p(inode->i_gid) && !capable(CAP_FSETID)) + if (!in_group_p(inode->i_gid) && + !inode_capable(inode, CAP_FSETID)) mode &= ~S_ISGID; inode->i_mode = mode; } diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h index 908e1845541..b785e770795 100644 --- a/fs/autofs4/autofs_i.h +++ b/fs/autofs4/autofs_i.h @@ -74,8 +74,8 @@ struct autofs_info { unsigned long last_used; atomic_t count; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; }; #define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */ @@ -89,8 +89,8 @@ struct autofs_wait_queue { struct qstr name; u32 dev; u64 ino; - uid_t uid; - gid_t gid; + kuid_t uid; + kgid_t gid; pid_t pid; pid_t tgid; /* This is for status reporting upon return */ diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index a16214109d3..9f68a37bb2b 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -437,8 +437,8 @@ static int autofs_dev_ioctl_requester(struct file *fp, err = 0; autofs4_expire_wait(path.dentry); spin_lock(&sbi->fs_lock); - param->requester.uid = ino->uid; - param->requester.gid = ino->gid; + param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid); + param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid); spin_unlock(&sbi->fs_lock); } path_put(&path); diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c index 8a4fed8ead3..b104726e2d0 100644 --- a/fs/autofs4/inode.c +++ b/fs/autofs4/inode.c @@ -36,8 +36,8 @@ struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi) void autofs4_clean_ino(struct autofs_info *ino) { - ino->uid = 0; - ino->gid = 0; + ino->uid = GLOBAL_ROOT_UID; + ino->gid = GLOBAL_ROOT_GID; ino->last_used = jiffies; } @@ -79,10 +79,12 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root) return 0; seq_printf(m, ",fd=%d", sbi->pipefd); - if (root_inode->i_uid != 0) - seq_printf(m, ",uid=%u", root_inode->i_uid); - if (root_inode->i_gid != 0) - seq_printf(m, ",gid=%u", root_inode->i_gid); + if (!uid_eq(root_inode->i_uid, GLOBAL_ROOT_UID)) + seq_printf(m, ",uid=%u", + from_kuid_munged(&init_user_ns, root_inode->i_uid)); + if (!gid_eq(root_inode->i_gid, GLOBAL_ROOT_GID)) + seq_printf(m, ",gid=%u", + from_kgid_munged(&init_user_ns, root_inode->i_gid)); seq_printf(m, ",pgrp=%d", sbi->oz_pgrp); seq_printf(m, ",timeout=%lu", sbi->exp_timeout/HZ); seq_printf(m, ",minproto=%d", sbi->min_proto); @@ -126,7 +128,7 @@ static const match_table_t tokens = { {Opt_err, NULL} }; -static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, +static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid, pid_t *pgrp, unsigned int *type, int *minproto, int *maxproto) { char *p; @@ -159,12 +161,16 @@ static int parse_options(char *options, int *pipefd, uid_t *uid, gid_t *gid, case Opt_uid: if (match_int(args, &option)) return 1; - *uid = option; + *uid = make_kuid(current_user_ns(), option); + if (!uid_valid(*uid)) + return 1; break; case Opt_gid: if (match_int(args, &option)) return 1; - *gid = option; + *gid = make_kgid(current_user_ns(), option); + if (!gid_valid(*gid)) + return 1; break; case Opt_pgrp: if (match_int(args, &option)) diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index dce436e595c..03bc1d347d8 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -154,6 +154,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, case autofs_ptype_expire_direct: { struct autofs_v5_packet *packet = &pkt.v5_pkt.v5_packet; + struct user_namespace *user_ns = sbi->pipe->f_cred->user_ns; pktsz = sizeof(*packet); @@ -163,8 +164,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi, packet->name[wq->name.len] = '\0'; packet->dev = wq->dev; packet->ino = wq->ino; - packet->uid = wq->uid; - packet->gid = wq->gid; + packet->uid = from_kuid_munged(user_ns, wq->uid); + packet->gid = from_kgid_munged(user_ns, wq->gid); packet->pid = wq->pid; packet->tgid = wq->tgid; break; diff --git a/fs/exec.c b/fs/exec.c index 721a2992951..b71b08ce712 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -1266,14 +1266,13 @@ int prepare_binprm(struct linux_binprm *bprm) bprm->cred->egid = current_egid(); if (!(bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID) && - !current->no_new_privs) { + !current->no_new_privs && + kuid_has_mapping(bprm->cred->user_ns, inode->i_uid) && + kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) { /* Set-uid? */ if (mode & S_ISUID) { - if (!kuid_has_mapping(bprm->cred->user_ns, inode->i_uid)) - return -EPERM; bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->euid = inode->i_uid; - } /* Set-gid? */ @@ -1283,8 +1282,6 @@ int prepare_binprm(struct linux_binprm *bprm) * executable. */ if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP)) { - if (!kgid_has_mapping(bprm->cred->user_ns, inode->i_gid)) - return -EPERM; bprm->per_clear |= PER_CLEAR_ON_SETID; bprm->cred->egid = inode->i_gid; } diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index 8c23fa7a91e..c16335315e5 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -92,8 +92,8 @@ static void __fuse_put_request(struct fuse_req *req) static void fuse_req_init_context(struct fuse_req *req) { - req->in.h.uid = current_fsuid(); - req->in.h.gid = current_fsgid(); + req->in.h.uid = from_kuid_munged(&init_user_ns, current_fsuid()); + req->in.h.gid = from_kgid_munged(&init_user_ns, current_fsgid()); req->in.h.pid = current->pid; } diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 324bc085053..b7c09f9eb40 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -818,8 +818,8 @@ static void fuse_fillattr(struct inode *inode, struct fuse_attr *attr, stat->ino = attr->ino; stat->mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); stat->nlink = attr->nlink; - stat->uid = attr->uid; - stat->gid = attr->gid; + stat->uid = make_kuid(&init_user_ns, attr->uid); + stat->gid = make_kgid(&init_user_ns, attr->gid); stat->rdev = inode->i_rdev; stat->atime.tv_sec = attr->atime; stat->atime.tv_nsec = attr->atimensec; @@ -1007,12 +1007,12 @@ int fuse_allow_task(struct fuse_conn *fc, struct task_struct *task) rcu_read_lock(); ret = 0; cred = __task_cred(task); - if (cred->euid == fc->user_id && - cred->suid == fc->user_id && - cred->uid == fc->user_id && - cred->egid == fc->group_id && - cred->sgid == fc->group_id && - cred->gid == fc->group_id) + if (uid_eq(cred->euid, fc->user_id) && + uid_eq(cred->suid, fc->user_id) && + uid_eq(cred->uid, fc->user_id) && + gid_eq(cred->egid, fc->group_id) && + gid_eq(cred->sgid, fc->group_id) && + gid_eq(cred->gid, fc->group_id)) ret = 1; rcu_read_unlock(); @@ -1306,9 +1306,9 @@ static void iattr_to_fattr(struct iattr *iattr, struct fuse_setattr_in *arg) if (ivalid & ATTR_MODE) arg->valid |= FATTR_MODE, arg->mode = iattr->ia_mode; if (ivalid & ATTR_UID) - arg->valid |= FATTR_UID, arg->uid = iattr->ia_uid; + arg->valid |= FATTR_UID, arg->uid = from_kuid(&init_user_ns, iattr->ia_uid); if (ivalid & ATTR_GID) - arg->valid |= FATTR_GID, arg->gid = iattr->ia_gid; + arg->valid |= FATTR_GID, arg->gid = from_kgid(&init_user_ns, iattr->ia_gid); if (ivalid & ATTR_SIZE) arg->valid |= FATTR_SIZE, arg->size = iattr->ia_size; if (ivalid & ATTR_ATIME) { diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h index e24dd74e306..e105a53fc72 100644 --- a/fs/fuse/fuse_i.h +++ b/fs/fuse/fuse_i.h @@ -333,10 +333,10 @@ struct fuse_conn { atomic_t count; /** The user id for this mount */ - uid_t user_id; + kuid_t user_id; /** The group id for this mount */ - gid_t group_id; + kgid_t group_id; /** The fuse mount flags for this mount */ unsigned flags; diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c index f0eda124cff..73ca6b72bea 100644 --- a/fs/fuse/inode.c +++ b/fs/fuse/inode.c @@ -60,8 +60,8 @@ MODULE_PARM_DESC(max_user_congthresh, struct fuse_mount_data { int fd; unsigned rootmode; - unsigned user_id; - unsigned group_id; + kuid_t user_id; + kgid_t group_id; unsigned fd_present:1; unsigned rootmode_present:1; unsigned user_id_present:1; @@ -164,8 +164,8 @@ void fuse_change_attributes_common(struct inode *inode, struct fuse_attr *attr, inode->i_ino = fuse_squash_ino(attr->ino); inode->i_mode = (inode->i_mode & S_IFMT) | (attr->mode & 07777); set_nlink(inode, attr->nlink); - inode->i_uid = attr->uid; - inode->i_gid = attr->gid; + inode->i_uid = make_kuid(&init_user_ns, attr->uid); + inode->i_gid = make_kgid(&init_user_ns, attr->gid); inode->i_blocks = attr->blocks; inode->i_atime.tv_sec = attr->atime; inode->i_atime.tv_nsec = attr->atimensec; @@ -492,14 +492,18 @@ static int parse_fuse_opt(char *opt, struct fuse_mount_data *d, int is_bdev) case OPT_USER_ID: if (match_int(&args[0], &value)) return 0; - d->user_id = value; + d->user_id = make_kuid(current_user_ns(), value); + if (!uid_valid(d->user_id)) + return 0; d->user_id_present = 1; break; case OPT_GROUP_ID: if (match_int(&args[0], &value)) return 0; - d->group_id = value; + d->group_id = make_kgid(current_user_ns(), value); + if (!gid_valid(d->group_id)) + return 0; d->group_id_present = 1; break; @@ -540,8 +544,8 @@ static int fuse_show_options(struct seq_file *m, struct dentry *root) struct super_block *sb = root->d_sb; struct fuse_conn *fc = get_fuse_conn_super(sb); - seq_printf(m, ",user_id=%u", fc->user_id); - seq_printf(m, ",group_id=%u", fc->group_id); + seq_printf(m, ",user_id=%u", from_kuid_munged(&init_user_ns, fc->user_id)); + seq_printf(m, ",group_id=%u", from_kgid_munged(&init_user_ns, fc->group_id)); if (fc->flags & FUSE_DEFAULT_PERMISSIONS) seq_puts(m, ",default_permissions"); if (fc->flags & FUSE_ALLOW_OTHER) @@ -989,7 +993,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent) if (!file) goto err; - if (file->f_op != &fuse_dev_operations) + if ((file->f_op != &fuse_dev_operations) || + (file->f_cred->user_ns != &init_user_ns)) goto err_fput; fc = kmalloc(sizeof(*fc), GFP_KERNEL); diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c index 78f21f8dc2e..43b315f2002 100644 --- a/fs/hppfs/hppfs.c +++ b/fs/hppfs/hppfs.c @@ -710,7 +710,7 @@ static int hppfs_fill_super(struct super_block *sb, void *d, int silent) struct vfsmount *proc_mnt; int err = -ENOENT; - proc_mnt = mntget(current->nsproxy->pid_ns->proc_mnt); + proc_mnt = mntget(task_active_pid_ns(current)->proc_mnt); if (IS_ERR(proc_mnt)) goto out; diff --git a/fs/mount.h b/fs/mount.h index 4f291f9de64..cd500798040 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -4,8 +4,11 @@ struct mnt_namespace { atomic_t count; + unsigned int proc_inum; struct mount * root; struct list_head list; + struct user_namespace *user_ns; + u64 seq; /* Sequence number to prevent loops */ wait_queue_head_t poll; int event; }; diff --git a/fs/namespace.c b/fs/namespace.c index 24960626bb6..c1bbe86f492 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -12,6 +12,7 @@ #include <linux/export.h> #include <linux/capability.h> #include <linux/mnt_namespace.h> +#include <linux/user_namespace.h> #include <linux/namei.h> #include <linux/security.h> #include <linux/idr.h> @@ -20,6 +21,7 @@ #include <linux/fs_struct.h> /* get_fs_root et.al. */ #include <linux/fsnotify.h> /* fsnotify_vfsmount_delete */ #include <linux/uaccess.h> +#include <linux/proc_fs.h> #include "pnode.h" #include "internal.h" @@ -784,7 +786,7 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, if (!mnt) return ERR_PTR(-ENOMEM); - if (flag & (CL_SLAVE | CL_PRIVATE)) + if (flag & (CL_SLAVE | CL_PRIVATE | CL_SHARED_TO_SLAVE)) mnt->mnt_group_id = 0; /* not a peer of original */ else mnt->mnt_group_id = old->mnt_group_id; @@ -805,7 +807,8 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root, list_add_tail(&mnt->mnt_instance, &sb->s_mounts); br_write_unlock(&vfsmount_lock); - if (flag & CL_SLAVE) { + if ((flag & CL_SLAVE) || + ((flag & CL_SHARED_TO_SLAVE) && IS_MNT_SHARED(old))) { list_add(&mnt->mnt_slave, &old->mnt_slave_list); mnt->mnt_master = old; CLEAR_MNT_SHARED(mnt); @@ -1266,7 +1269,7 @@ SYSCALL_DEFINE2(umount, char __user *, name, int, flags) goto dput_and_out; retval = -EPERM; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) goto dput_and_out; retval = do_umount(mnt, flags); @@ -1292,7 +1295,7 @@ SYSCALL_DEFINE1(oldumount, char __user *, name) static int mount_is_safe(struct path *path) { - if (capable(CAP_SYS_ADMIN)) + if (ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN)) return 0; return -EPERM; #ifdef notyet @@ -1308,6 +1311,26 @@ static int mount_is_safe(struct path *path) #endif } +static bool mnt_ns_loop(struct path *path) +{ + /* Could bind mounting the mount namespace inode cause a + * mount namespace loop? + */ + struct inode *inode = path->dentry->d_inode; + struct proc_inode *ei; + struct mnt_namespace *mnt_ns; + + if (!proc_ns_inode(inode)) + return false; + + ei = PROC_I(inode); + if (ei->ns_ops != &mntns_operations) + return false; + + mnt_ns = ei->ns; + return current->nsproxy->mnt_ns->seq >= mnt_ns->seq; +} + struct mount *copy_tree(struct mount *mnt, struct dentry *dentry, int flag) { @@ -1610,7 +1633,7 @@ static int do_change_type(struct path *path, int flag) int type; int err = 0; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(mnt->mnt_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; if (path->dentry != path->mnt->mnt_root) @@ -1655,6 +1678,10 @@ static int do_loopback(struct path *path, const char *old_name, if (err) return err; + err = -EINVAL; + if (mnt_ns_loop(&old_path)) + goto out; + err = lock_mount(path); if (err) goto out; @@ -1770,7 +1797,7 @@ static int do_move_mount(struct path *path, const char *old_name) struct mount *p; struct mount *old; int err = 0; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(real_mount(path->mnt)->mnt_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; if (!old_name || !*old_name) return -EINVAL; @@ -1857,21 +1884,6 @@ static struct vfsmount *fs_set_subtype(struct vfsmount *mnt, const char *fstype) return ERR_PTR(err); } -static struct vfsmount * -do_kern_mount(const char *fstype, int flags, const char *name, void *data) -{ - struct file_system_type *type = get_fs_type(fstype); - struct vfsmount *mnt; - if (!type) - return ERR_PTR(-ENODEV); - mnt = vfs_kern_mount(type, flags, name, data); - if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && - !mnt->mnt_sb->s_subtype) - mnt = fs_set_subtype(mnt, fstype); - put_filesystem(type); - return mnt; -} - /* * add a mount into a namespace's mount tree */ @@ -1917,20 +1929,46 @@ unlock: * create a new mount for userspace and request it to be added into the * namespace's tree */ -static int do_new_mount(struct path *path, const char *type, int flags, +static int do_new_mount(struct path *path, const char *fstype, int flags, int mnt_flags, const char *name, void *data) { + struct file_system_type *type; + struct user_namespace *user_ns; struct vfsmount *mnt; int err; - if (!type) + if (!fstype) return -EINVAL; /* we need capabilities... */ - if (!capable(CAP_SYS_ADMIN)) + user_ns = real_mount(path->mnt)->mnt_ns->user_ns; + if (!ns_capable(user_ns, CAP_SYS_ADMIN)) return -EPERM; - mnt = do_kern_mount(type, flags, name, data); + type = get_fs_type(fstype); + if (!type) + return -ENODEV; + + if (user_ns != &init_user_ns) { + if (!(type->fs_flags & FS_USERNS_MOUNT)) { + put_filesystem(type); + return -EPERM; + } + /* Only in special cases allow devices from mounts + * created outside the initial user namespace. + */ + if (!(type->fs_flags & FS_USERNS_DEV_MOUNT)) { + flags |= MS_NODEV; + mnt_flags |= MNT_NODEV; + } + } + + mnt = vfs_kern_mount(type, flags, name, data); + if (!IS_ERR(mnt) && (type->fs_flags & FS_HAS_SUBTYPE) && + !mnt->mnt_sb->s_subtype) + mnt = fs_set_subtype(mnt, fstype); + + put_filesystem(type); if (IS_ERR(mnt)) return PTR_ERR(mnt); @@ -2261,18 +2299,42 @@ dput_out: return retval; } -static struct mnt_namespace *alloc_mnt_ns(void) +static void free_mnt_ns(struct mnt_namespace *ns) +{ + proc_free_inum(ns->proc_inum); + put_user_ns(ns->user_ns); + kfree(ns); +} + +/* + * Assign a sequence number so we can detect when we attempt to bind + * mount a reference to an older mount namespace into the current + * mount namespace, preventing reference counting loops. A 64bit + * number incrementing at 10Ghz will take 12,427 years to wrap which + * is effectively never, so we can ignore the possibility. + */ +static atomic64_t mnt_ns_seq = ATOMIC64_INIT(1); + +static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns) { struct mnt_namespace *new_ns; + int ret; new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL); if (!new_ns) return ERR_PTR(-ENOMEM); + ret = proc_alloc_inum(&new_ns->proc_inum); + if (ret) { + kfree(new_ns); + return ERR_PTR(ret); + } + new_ns->seq = atomic64_add_return(1, &mnt_ns_seq); atomic_set(&new_ns->count, 1); new_ns->root = NULL; INIT_LIST_HEAD(&new_ns->list); init_waitqueue_head(&new_ns->poll); new_ns->event = 0; + new_ns->user_ns = get_user_ns(user_ns); return new_ns; } @@ -2281,24 +2343,28 @@ static struct mnt_namespace *alloc_mnt_ns(void) * copied from the namespace of the passed in task structure. */ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, - struct fs_struct *fs) + struct user_namespace *user_ns, struct fs_struct *fs) { struct mnt_namespace *new_ns; struct vfsmount *rootmnt = NULL, *pwdmnt = NULL; struct mount *p, *q; struct mount *old = mnt_ns->root; struct mount *new; + int copy_flags; - new_ns = alloc_mnt_ns(); + new_ns = alloc_mnt_ns(user_ns); if (IS_ERR(new_ns)) return new_ns; down_write(&namespace_sem); /* First pass: copy the tree topology */ - new = copy_tree(old, old->mnt.mnt_root, CL_COPY_ALL | CL_EXPIRE); + copy_flags = CL_COPY_ALL | CL_EXPIRE; + if (user_ns != mnt_ns->user_ns) + copy_flags |= CL_SHARED_TO_SLAVE; + new = copy_tree(old, old->mnt.mnt_root, copy_flags); if (IS_ERR(new)) { up_write(&namespace_sem); - kfree(new_ns); + free_mnt_ns(new_ns); return ERR_CAST(new); } new_ns->root = new; @@ -2339,7 +2405,7 @@ static struct mnt_namespace *dup_mnt_ns(struct mnt_namespace *mnt_ns, } struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, - struct fs_struct *new_fs) + struct user_namespace *user_ns, struct fs_struct *new_fs) { struct mnt_namespace *new_ns; @@ -2349,7 +2415,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, if (!(flags & CLONE_NEWNS)) return ns; - new_ns = dup_mnt_ns(ns, new_fs); + new_ns = dup_mnt_ns(ns, user_ns, new_fs); put_mnt_ns(ns); return new_ns; @@ -2361,7 +2427,7 @@ struct mnt_namespace *copy_mnt_ns(unsigned long flags, struct mnt_namespace *ns, */ static struct mnt_namespace *create_mnt_ns(struct vfsmount *m) { - struct mnt_namespace *new_ns = alloc_mnt_ns(); + struct mnt_namespace *new_ns = alloc_mnt_ns(&init_user_ns); if (!IS_ERR(new_ns)) { struct mount *mnt = real_mount(m); mnt->mnt_ns = new_ns; @@ -2501,7 +2567,7 @@ SYSCALL_DEFINE2(pivot_root, const char __user *, new_root, struct mount *new_mnt, *root_mnt; int error; - if (!capable(CAP_SYS_ADMIN)) + if (!ns_capable(current->nsproxy->mnt_ns->user_ns, CAP_SYS_ADMIN)) return -EPERM; error = user_path_dir(new_root, &new); @@ -2583,8 +2649,13 @@ static void __init init_mount_tree(void) struct vfsmount *mnt; struct mnt_namespace *ns; struct path root; + struct file_system_type *type; - mnt = do_kern_mount("rootfs", 0, "rootfs", NULL); + type = get_fs_type("rootfs"); + if (!type) + panic("Can't find rootfs type"); + mnt = vfs_kern_mount(type, 0, "rootfs", NULL); + put_filesystem(type); if (IS_ERR(mnt)) panic("Can't create rootfs"); @@ -2647,7 +2718,7 @@ void put_mnt_ns(struct mnt_namespace *ns) br_write_unlock(&vfsmount_lock); up_write(&namespace_sem); release_mounts(&umount_list); - kfree(ns); + free_mnt_ns(ns); } struct vfsmount *kern_mount_data(struct file_system_type *type, void *data) @@ -2681,3 +2752,71 @@ bool our_mnt(struct vfsmount *mnt) { return check_mnt(real_mount(mnt)); } + +static void *mntns_get(struct task_struct *task) +{ + struct mnt_namespace *ns = NULL; + struct nsproxy *nsproxy; + + rcu_read_lock(); + nsproxy = task_nsproxy(task); + if (nsproxy) { + ns = nsproxy->mnt_ns; + get_mnt_ns(ns); + } + rcu_read_unlock(); + + return ns; +} + +static void mntns_put(void *ns) +{ + put_mnt_ns(ns); +} + +static int mntns_install(struct nsproxy *nsproxy, void *ns) +{ + struct fs_struct *fs = current->fs; + struct mnt_namespace *mnt_ns = ns; + struct path root; + + if (!ns_capable(mnt_ns->user_ns, CAP_SYS_ADMIN) || + !nsown_capable(CAP_SYS_CHROOT)) + return -EPERM; + + if (fs->users != 1) + return -EINVAL; + + get_mnt_ns(mnt_ns); + put_mnt_ns(nsproxy->mnt_ns); + nsproxy->mnt_ns = mnt_ns; + + /* Find the root */ + root.mnt = &mnt_ns->root->mnt; + root.dentry = mnt_ns->root->mnt.mnt_root; + path_get(&root); + while(d_mountpoint(root.dentry) && follow_down_one(&root)) + ; + + /* Update the pwd and root */ + set_fs_pwd(fs, &root); + set_fs_root(fs, &root); + + path_put(&root); + return 0; +} + +static unsigned int mntns_inum(void *ns) +{ + struct mnt_namespace *mnt_ns = ns; + return mnt_ns->proc_inum; +} + +const struct proc_ns_operations mntns_operations = { + .name = "mnt", + .type = CLONE_NEWNS, + .get = mntns_get, + .put = mntns_put, + .install = mntns_install, + .inum = mntns_inum, +}; diff --git a/fs/open.c b/fs/open.c index 59071f55bf7..182d8667b7b 100644 --- a/fs/open.c +++ b/fs/open.c @@ -435,7 +435,7 @@ SYSCALL_DEFINE1(chroot, const char __user *, filename) goto dput_and_out; error = -EPERM; - if (!capable(CAP_SYS_CHROOT)) + if (!nsown_capable(CAP_SYS_CHROOT)) goto dput_and_out; error = security_path_chroot(&path); if (error) diff --git a/fs/pnode.h b/fs/pnode.h index 65c60979d54..19b853a3445 100644 --- a/fs/pnode.h +++ b/fs/pnode.h @@ -22,6 +22,7 @@ #define CL_COPY_ALL 0x04 #define CL_MAKE_SHARED 0x08 #define CL_PRIVATE 0x10 +#define CL_SHARED_TO_SLAVE 0x20 static inline void set_mnt_shared(struct mount *mnt) { diff --git a/fs/proc/Makefile b/fs/proc/Makefile index 99349efbbc2..981b0560193 100644 --- a/fs/proc/Makefile +++ b/fs/proc/Makefile @@ -21,6 +21,7 @@ proc-y += uptime.o proc-y += version.o proc-y += softirqs.o proc-y += namespaces.o +proc-y += self.o proc-$(CONFIG_PROC_SYSCTL) += proc_sysctl.o proc-$(CONFIG_NET) += proc_net.o proc-$(CONFIG_PROC_KCORE) += kcore.o diff --git a/fs/proc/array.c b/fs/proc/array.c index d3696708fc1..d66248a1919 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -162,7 +162,7 @@ static inline const char *get_task_state(struct task_struct *tsk) static inline void task_state(struct seq_file *m, struct pid_namespace *ns, struct pid *pid, struct task_struct *p) { - struct user_namespace *user_ns = current_user_ns(); + struct user_namespace *user_ns = seq_user_ns(m); struct group_info *group_info; int g; struct fdtable *fdt = NULL; diff --git a/fs/proc/base.c b/fs/proc/base.c index aa63d25157b..5a5a0be40e4 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -2345,146 +2345,6 @@ static const struct file_operations proc_coredump_filter_operations = { }; #endif -/* - * /proc/self: - */ -static int proc_self_readlink(struct dentry *dentry, char __user *buffer, - int buflen) -{ - struct pid_namespace *ns = dentry->d_sb->s_fs_info; - pid_t tgid = task_tgid_nr_ns(current, ns); - char tmp[PROC_NUMBUF]; - if (!tgid) - return -ENOENT; - sprintf(tmp, "%d", tgid); - return vfs_readlink(dentry,buffer,buflen,tmp); -} - -static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) -{ - struct pid_namespace *ns = dentry->d_sb->s_fs_info; - pid_t tgid = task_tgid_nr_ns(current, ns); - char *name = ERR_PTR(-ENOENT); - if (tgid) { - /* 11 for max length of signed int in decimal + NULL term */ - name = kmalloc(12, GFP_KERNEL); - if (!name) - name = ERR_PTR(-ENOMEM); - else - sprintf(name, "%d", tgid); - } - nd_set_link(nd, name); - return NULL; -} - -static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, - void *cookie) -{ - char *s = nd_get_link(nd); - if (!IS_ERR(s)) - kfree(s); -} - -static const struct inode_operations proc_self_inode_operations = { - .readlink = proc_self_readlink, - .follow_link = proc_self_follow_link, - .put_link = proc_self_put_link, -}; - -/* - * proc base - * - * These are the directory entries in the root directory of /proc - * that properly belong to the /proc filesystem, as they describe - * describe something that is process related. - */ -static const struct pid_entry proc_base_stuff[] = { - NOD("self", S_IFLNK|S_IRWXUGO, - &proc_self_inode_operations, NULL, {}), -}; - -static struct dentry *proc_base_instantiate(struct inode *dir, - struct dentry *dentry, struct task_struct *task, const void *ptr) -{ - const struct pid_entry *p = ptr; - struct inode *inode; - struct proc_inode *ei; - struct dentry *error; - - /* Allocate the inode */ - error = ERR_PTR(-ENOMEM); - inode = new_inode(dir->i_sb); - if (!inode) - goto out; - - /* Initialize the inode */ - ei = PROC_I(inode); - inode->i_ino = get_next_ino(); - inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; - - /* - * grab the reference to the task. - */ - ei->pid = get_task_pid(task, PIDTYPE_PID); - if (!ei->pid) - goto out_iput; - - inode->i_mode = p->mode; - if (S_ISDIR(inode->i_mode)) - set_nlink(inode, 2); - if (S_ISLNK(inode->i_mode)) - inode->i_size = 64; - if (p->iop) - inode->i_op = p->iop; - if (p->fop) - inode->i_fop = p->fop; - ei->op = p->op; - d_add(dentry, inode); - error = NULL; -out: - return error; -out_iput: - iput(inode); - goto out; -} - -static struct dentry *proc_base_lookup(struct inode *dir, struct dentry *dentry) -{ - struct dentry *error; - struct task_struct *task = get_proc_task(dir); - const struct pid_entry *p, *last; - - error = ERR_PTR(-ENOENT); - - if (!task) - goto out_no_task; - - /* Lookup the directory entry */ - last = &proc_base_stuff[ARRAY_SIZE(proc_base_stuff) - 1]; - for (p = proc_base_stuff; p <= last; p++) { - if (p->len != dentry->d_name.len) - continue; - if (!memcmp(dentry->d_name.name, p->name, p->len)) - break; - } - if (p > last) - goto out; - - error = proc_base_instantiate(dir, dentry, task, p); - -out: - put_task_struct(task); -out_no_task: - return error; -} - -static int proc_base_fill_cache(struct file *filp, void *dirent, - filldir_t filldir, struct task_struct *task, const struct pid_entry *p) -{ - return proc_fill_cache(filp, dirent, filldir, p->name, p->len, - proc_base_instantiate, task, p); -} - #ifdef CONFIG_TASK_IO_ACCOUNTING static int do_io_accounting(struct task_struct *task, char *buffer, int whole) { @@ -2839,10 +2699,6 @@ void proc_flush_task(struct task_struct *task) proc_flush_task_mnt(upid->ns->proc_mnt, upid->nr, tgid->numbers[i].nr); } - - upid = &pid->numbers[pid->level]; - if (upid->nr == 1) - pid_ns_release_proc(upid->ns); } static struct dentry *proc_pid_instantiate(struct inode *dir, @@ -2876,15 +2732,11 @@ out: struct dentry *proc_pid_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags) { - struct dentry *result; + struct dentry *result = NULL; struct task_struct *task; unsigned tgid; struct pid_namespace *ns; - result = proc_base_lookup(dir, dentry); - if (!IS_ERR(result) || PTR_ERR(result) != -ENOENT) - goto out; - tgid = name_to_int(dentry); if (tgid == ~0U) goto out; @@ -2947,7 +2799,7 @@ retry: return iter; } -#define TGID_OFFSET (FIRST_PROCESS_ENTRY + ARRAY_SIZE(proc_base_stuff)) +#define TGID_OFFSET (FIRST_PROCESS_ENTRY) static int proc_pid_fill_cache(struct file *filp, void *dirent, filldir_t filldir, struct tgid_iter iter) @@ -2967,25 +2819,12 @@ static int fake_filldir(void *buf, const char *name, int namelen, /* for the /proc/ directory itself, after non-process stuff has been done */ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) { - unsigned int nr; - struct task_struct *reaper; struct tgid_iter iter; struct pid_namespace *ns; filldir_t __filldir; if (filp->f_pos >= PID_MAX_LIMIT + TGID_OFFSET) - goto out_no_task; - nr = filp->f_pos - FIRST_PROCESS_ENTRY; - - reaper = get_proc_task(filp->f_path.dentry->d_inode); - if (!reaper) - goto out_no_task; - - for (; nr < ARRAY_SIZE(proc_base_stuff); filp->f_pos++, nr++) { - const struct pid_entry *p = &proc_base_stuff[nr]; - if (proc_base_fill_cache(filp, dirent, filldir, reaper, p) < 0) - goto out; - } + goto out; ns = filp->f_dentry->d_sb->s_fs_info; iter.task = NULL; @@ -3006,8 +2845,6 @@ int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir) } filp->f_pos = PID_MAX_LIMIT + TGID_OFFSET; out: - put_task_struct(reaper); -out_no_task: return 0; } diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 0d80cef4cfb..7b3ae3cc0ef 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -350,14 +350,14 @@ static DEFINE_SPINLOCK(proc_inum_lock); /* protects the above */ * Return an inode number between PROC_DYNAMIC_FIRST and * 0xffffffff, or zero on failure. */ -static unsigned int get_inode_number(void) +int proc_alloc_inum(unsigned int *inum) { unsigned int i; int error; retry: - if (ida_pre_get(&proc_inum_ida, GFP_KERNEL) == 0) - return 0; + if (!ida_pre_get(&proc_inum_ida, GFP_KERNEL)) + return -ENOMEM; spin_lock(&proc_inum_lock); error = ida_get_new(&proc_inum_ida, &i); @@ -365,18 +365,19 @@ retry: if (error == -EAGAIN) goto retry; else if (error) - return 0; + return error; if (i > UINT_MAX - PROC_DYNAMIC_FIRST) { spin_lock(&proc_inum_lock); ida_remove(&proc_inum_ida, i); spin_unlock(&proc_inum_lock); - return 0; + return -ENOSPC; } - return PROC_DYNAMIC_FIRST + i; + *inum = PROC_DYNAMIC_FIRST + i; + return 0; } -static void release_inode_number(unsigned int inum) +void proc_free_inum(unsigned int inum) { spin_lock(&proc_inum_lock); ida_remove(&proc_inum_ida, inum - PROC_DYNAMIC_FIRST); @@ -554,13 +555,12 @@ static const struct inode_operations proc_dir_inode_operations = { static int proc_register(struct proc_dir_entry * dir, struct proc_dir_entry * dp) { - unsigned int i; struct proc_dir_entry *tmp; + int ret; - i = get_inode_number(); - if (i == 0) - return -EAGAIN; - dp->low_ino = i; + ret = proc_alloc_inum(&dp->low_ino); + if (ret) + return ret; if (S_ISDIR(dp->mode)) { if (dp->proc_iops == NULL) { @@ -764,7 +764,7 @@ EXPORT_SYMBOL(proc_create_data); static void free_proc_entry(struct proc_dir_entry *de) { - release_inode_number(de->low_ino); + proc_free_inum(de->low_ino); if (S_ISLNK(de->mode)) kfree(de->data); diff --git a/fs/proc/inode.c b/fs/proc/inode.c index 3b22bbdee9e..439ae688650 100644 --- a/fs/proc/inode.c +++ b/fs/proc/inode.c @@ -31,6 +31,7 @@ static void proc_evict_inode(struct inode *inode) struct proc_dir_entry *de; struct ctl_table_header *head; const struct proc_ns_operations *ns_ops; + void *ns; truncate_inode_pages(&inode->i_data, 0); clear_inode(inode); @@ -49,8 +50,9 @@ static void proc_evict_inode(struct inode *inode) } /* Release any associated namespace */ ns_ops = PROC_I(inode)->ns_ops; - if (ns_ops && ns_ops->put) - ns_ops->put(PROC_I(inode)->ns); + ns = PROC_I(inode)->ns; + if (ns_ops && ns) + ns_ops->put(ns); } static struct kmem_cache * proc_inode_cachep; diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 43973b084ab..252544c0520 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -15,6 +15,7 @@ struct ctl_table_header; struct mempolicy; extern struct proc_dir_entry proc_root; +extern void proc_self_init(void); #ifdef CONFIG_PROC_SYSCTL extern int proc_sys_init(void); extern void sysctl_head_put(struct ctl_table_header *head); diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c index b178ed733c3..b7a47196c8c 100644 --- a/fs/proc/namespaces.c +++ b/fs/proc/namespaces.c @@ -11,6 +11,7 @@ #include <net/net_namespace.h> #include <linux/ipc_namespace.h> #include <linux/pid_namespace.h> +#include <linux/user_namespace.h> #include "internal.h" @@ -24,12 +25,168 @@ static const struct proc_ns_operations *ns_entries[] = { #ifdef CONFIG_IPC_NS &ipcns_operations, #endif +#ifdef CONFIG_PID_NS + &pidns_operations, +#endif +#ifdef CONFIG_USER_NS + &userns_operations, +#endif + &mntns_operations, }; static const struct file_operations ns_file_operations = { .llseek = no_llseek, }; +static const struct inode_operations ns_inode_operations = { + .setattr = proc_setattr, +}; + +static int ns_delete_dentry(const struct dentry *dentry) +{ + /* Don't cache namespace inodes when not in use */ + return 1; +} + +static char *ns_dname(struct dentry *dentry, char *buffer, int buflen) +{ + struct inode *inode = dentry->d_inode; + const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns_ops; + + return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]", + ns_ops->name, inode->i_ino); +} + +const struct dentry_operations ns_dentry_operations = +{ + .d_delete = ns_delete_dentry, + .d_dname = ns_dname, +}; + +static struct dentry *proc_ns_get_dentry(struct super_block *sb, + struct task_struct *task, const struct proc_ns_operations *ns_ops) +{ + struct dentry *dentry, *result; + struct inode *inode; + struct proc_inode *ei; + struct qstr qname = { .name = "", }; + void *ns; + + ns = ns_ops->get(task); + if (!ns) + return ERR_PTR(-ENOENT); + + dentry = d_alloc_pseudo(sb, &qname); + if (!dentry) { + ns_ops->put(ns); + return ERR_PTR(-ENOMEM); + } + + inode = iget_locked(sb, ns_ops->inum(ns)); + if (!inode) { + dput(dentry); + ns_ops->put(ns); + return ERR_PTR(-ENOMEM); + } + + ei = PROC_I(inode); + if (inode->i_state & I_NEW) { + inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME; + inode->i_op = &ns_inode_operations; + inode->i_mode = S_IFREG | S_IRUGO; + inode->i_fop = &ns_file_operations; + ei->ns_ops = ns_ops; + ei->ns = ns; + unlock_new_inode(inode); + } else { + ns_ops->put(ns); + } + + d_set_d_op(dentry, &ns_dentry_operations); + result = d_instantiate_unique(dentry, inode); + if (result) { + dput(dentry); + dentry = result; + } + + return dentry; +} + +static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct inode *inode = dentry->d_inode; + struct super_block *sb = inode->i_sb; + struct proc_inode *ei = PROC_I(inode); + struct task_struct *task; + struct dentry *ns_dentry; + void *error = ERR_PTR(-EACCES); + + task = get_proc_task(inode); + if (!task) + goto out; + + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out_put_task; + + ns_dentry = proc_ns_get_dentry(sb, task, ei->ns_ops); + if (IS_ERR(ns_dentry)) { + error = ERR_CAST(ns_dentry); + goto out_put_task; + } + + dput(nd->path.dentry); + nd->path.dentry = ns_dentry; + error = NULL; + +out_put_task: + put_task_struct(task); +out: + return error; +} + +static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen) +{ + struct inode *inode = dentry->d_inode; + struct proc_inode *ei = PROC_I(inode); + const struct proc_ns_operations *ns_ops = ei->ns_ops; + struct task_struct *task; + void *ns; + char name[50]; + int len = -EACCES; + + task = get_proc_task(inode); + if (!task) + goto out; + + if (!ptrace_may_access(task, PTRACE_MODE_READ)) + goto out_put_task; + + len = -ENOENT; + ns = ns_ops->get(task); + if (!ns) + goto out_put_task; + + snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns)); + len = strlen(name); + + if (len > buflen) + len = buflen; + if (copy_to_user(buffer, name, len)) + len = -EFAULT; + + ns_ops->put(ns); +out_put_task: + put_task_struct(task); +out: + return len; +} + +static const struct inode_operations proc_ns_link_inode_operations = { + .readlink = proc_ns_readlink, + .follow_link = proc_ns_follow_link, + .setattr = proc_setattr, +}; + static struct dentry *proc_ns_instantiate(struct inode *dir, struct dentry *dentry, struct task_struct *task, const void *ptr) { @@ -37,21 +194,15 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, struct inode *inode; struct proc_inode *ei; struct dentry *error = ERR_PTR(-ENOENT); - void *ns; inode = proc_pid_make_inode(dir->i_sb, task); if (!inode) goto out; - ns = ns_ops->get(task); - if (!ns) - goto out_iput; - ei = PROC_I(inode); - inode->i_mode = S_IFREG|S_IRUSR; - inode->i_fop = &ns_file_operations; - ei->ns_ops = ns_ops; - ei->ns = ns; + inode->i_mode = S_IFLNK|S_IRWXUGO; + inode->i_op = &proc_ns_link_inode_operations; + ei->ns_ops = ns_ops; d_set_d_op(dentry, &pid_dentry_operations); d_add(dentry, inode); @@ -60,9 +211,6 @@ static struct dentry *proc_ns_instantiate(struct inode *dir, error = NULL; out: return error; -out_iput: - iput(inode); - goto out; } static int proc_ns_fill_cache(struct file *filp, void *dirent, @@ -89,10 +237,6 @@ static int proc_ns_dir_readdir(struct file *filp, void *dirent, if (!task) goto out_no_task; - ret = -EPERM; - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out; - ret = 0; i = filp->f_pos; switch (i) { @@ -152,10 +296,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, if (!task) goto out_no_task; - error = ERR_PTR(-EPERM); - if (!ptrace_may_access(task, PTRACE_MODE_READ)) - goto out; - last = &ns_entries[ARRAY_SIZE(ns_entries)]; for (entry = ns_entries; entry < last; entry++) { if (strlen((*entry)->name) != len) @@ -163,7 +303,6 @@ static struct dentry *proc_ns_dir_lookup(struct inode *dir, if (!memcmp(dentry->d_name.name, (*entry)->name, len)) break; } - error = ERR_PTR(-ENOENT); if (entry == last) goto out; @@ -198,3 +337,7 @@ out_invalid: return ERR_PTR(-EINVAL); } +bool proc_ns_inode(struct inode *inode) +{ + return inode->i_fop == &ns_file_operations; +} diff --git a/fs/proc/root.c b/fs/proc/root.c index 9889a92d2e0..c6e9fac26ba 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -100,14 +100,13 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, int err; struct super_block *sb; struct pid_namespace *ns; - struct proc_inode *ei; char *options; if (flags & MS_KERNMOUNT) { ns = (struct pid_namespace *)data; options = NULL; } else { - ns = current->nsproxy->pid_ns; + ns = task_active_pid_ns(current); options = data; } @@ -130,13 +129,6 @@ static struct dentry *proc_mount(struct file_system_type *fs_type, sb->s_flags |= MS_ACTIVE; } - ei = PROC_I(sb->s_root->d_inode); - if (!ei->pid) { - rcu_read_lock(); - ei->pid = get_pid(find_pid_ns(1, ns)); - rcu_read_unlock(); - } - return dget(sb->s_root); } @@ -153,6 +145,7 @@ static struct file_system_type proc_fs_type = { .name = "proc", .mount = proc_mount, .kill_sb = proc_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; void __init proc_root_init(void) @@ -163,12 +156,8 @@ void __init proc_root_init(void) err = register_filesystem(&proc_fs_type); if (err) return; - err = pid_ns_prepare_proc(&init_pid_ns); - if (err) { - unregister_filesystem(&proc_fs_type); - return; - } + proc_self_init(); proc_symlink("mounts", NULL, "self/mounts"); proc_net_init(); diff --git a/fs/proc/self.c b/fs/proc/self.c new file mode 100644 index 00000000000..aa5cc3bff14 --- /dev/null +++ b/fs/proc/self.c @@ -0,0 +1,59 @@ +#include <linux/proc_fs.h> +#include <linux/sched.h> +#include <linux/namei.h> + +/* + * /proc/self: + */ +static int proc_self_readlink(struct dentry *dentry, char __user *buffer, + int buflen) +{ + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); + char tmp[PROC_NUMBUF]; + if (!tgid) + return -ENOENT; + sprintf(tmp, "%d", tgid); + return vfs_readlink(dentry,buffer,buflen,tmp); +} + +static void *proc_self_follow_link(struct dentry *dentry, struct nameidata *nd) +{ + struct pid_namespace *ns = dentry->d_sb->s_fs_info; + pid_t tgid = task_tgid_nr_ns(current, ns); + char *name = ERR_PTR(-ENOENT); + if (tgid) { + /* 11 for max length of signed int in decimal + NULL term */ + name = kmalloc(12, GFP_KERNEL); + if (!name) + name = ERR_PTR(-ENOMEM); + else + sprintf(name, "%d", tgid); + } + nd_set_link(nd, name); + return NULL; +} + +static void proc_self_put_link(struct dentry *dentry, struct nameidata *nd, + void *cookie) +{ + char *s = nd_get_link(nd); + if (!IS_ERR(s)) + kfree(s); +} + +static const struct inode_operations proc_self_inode_operations = { + .readlink = proc_self_readlink, + .follow_link = proc_self_follow_link, + .put_link = proc_self_put_link, +}; + +void __init proc_self_init(void) +{ + struct proc_dir_entry *proc_self_symlink; + mode_t mode; + + mode = S_IFLNK | S_IRWXUGO; + proc_self_symlink = proc_create("self", mode, NULL, NULL ); + proc_self_symlink->proc_iops = &proc_self_inode_operations; +} diff --git a/fs/sysfs/mount.c b/fs/sysfs/mount.c index 71eb7e25392..db940a9be04 100644 --- a/fs/sysfs/mount.c +++ b/fs/sysfs/mount.c @@ -149,6 +149,7 @@ static struct file_system_type sysfs_fs_type = { .name = "sysfs", .mount = sysfs_mount, .kill_sb = sysfs_kill_sb, + .fs_flags = FS_USERNS_MOUNT, }; int __init sysfs_init(void) |