diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 276ccaa2670c06892b8e5b402c1561750da6bc03..cab741c2d6033ad937b761852def3fbbf60b763b 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -8,6 +8,7 @@ #include #include #include +#include #define INIT_FDTABLE \ { \ @@ -78,6 +79,7 @@ extern struct nsproxy init_nsproxy; .uts_ns = &init_uts_ns, \ .mnt_ns = NULL, \ INIT_IPC_NS(ipc_ns) \ + .user_ns = &init_user_ns, \ } #define INIT_SIGHAND(sighand) { \ diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h index 189e0dc993ab6ab84a577d1ee0635946ec077072..6d179a397bfb8246c24f098ae90df05a864cee0a 100644 --- a/include/linux/nsproxy.h +++ b/include/linux/nsproxy.h @@ -28,6 +28,7 @@ struct nsproxy { struct ipc_namespace *ipc_ns; struct mnt_namespace *mnt_ns; struct pid_namespace *pid_ns; + struct user_namespace *user_ns; }; extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index b579624477f468a3443d279e173b85e8593f84f3..c667255d70dbf8373796c99e26f5d992f4a5633c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -287,6 +287,7 @@ extern signed long schedule_timeout_uninterruptible(signed long timeout); asmlinkage void schedule(void); struct nsproxy; +struct user_namespace; /* Maximum number of active map areas.. This is a random (large) number */ #define DEFAULT_MAX_MAP_COUNT 65536 @@ -1408,7 +1409,7 @@ extern struct task_struct *find_task_by_pid_type(int type, int pid); extern void __set_special_pids(pid_t session, pid_t pgrp); /* per-UID process charging. */ -extern struct user_struct * alloc_uid(uid_t); +extern struct user_struct * alloc_uid(struct user_namespace *, uid_t); static inline struct user_struct *get_uid(struct user_struct *u) { atomic_inc(&u->__count); diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h new file mode 100644 index 0000000000000000000000000000000000000000..92a45867ecfb743d47ab962fabe90a4af11b1573 --- /dev/null +++ b/include/linux/user_namespace.h @@ -0,0 +1,57 @@ +#ifndef _LINUX_USER_NAMESPACE_H +#define _LINUX_USER_NAMESPACE_H + +#include +#include +#include + +#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) +#define UIDHASH_SZ (1 << UIDHASH_BITS) + +struct user_namespace { + struct kref kref; + struct list_head uidhash_table[UIDHASH_SZ]; + struct user_struct *root_user; +}; + +extern struct user_namespace init_user_ns; + +#ifdef CONFIG_USER_NS + +static inline struct user_namespace *get_user_ns(struct user_namespace *ns) +{ + if (ns) + kref_get(&ns->kref); + return ns; +} + +extern struct user_namespace *copy_user_ns(int flags, + struct user_namespace *old_ns); +extern void free_user_ns(struct kref *kref); + +static inline void put_user_ns(struct user_namespace *ns) +{ + if (ns) + kref_put(&ns->kref, free_user_ns); +} + +#else + +static inline struct user_namespace *get_user_ns(struct user_namespace *ns) +{ + return &init_user_ns; +} + +static inline struct user_namespace *copy_user_ns(int flags, + struct user_namespace *old_ns) +{ + return NULL; +} + +static inline void put_user_ns(struct user_namespace *ns) +{ +} + +#endif + +#endif /* _LINUX_USER_H */ diff --git a/init/Kconfig b/init/Kconfig index 1e198b8c69360e447c0b12d705b008d1e032482f..0b0e29ed82d12456860e70484d5a73877d5ef7d3 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -209,6 +209,15 @@ config TASK_IO_ACCOUNTING Say N if unsure. +config USER_NS + bool "User Namespaces (EXPERIMENTAL)" + default n + depends on EXPERIMENTAL + help + Support user namespaces. This allows containers, i.e. + vservers, to use user namespaces to provide different + user info for different servers. If unsure, say N. + config AUDIT bool "Auditing support" depends on NET diff --git a/kernel/Makefile b/kernel/Makefile index fa8efd437afb4c0a52a8eb8520666c9da84a45e7..2a999836ca18092e172280d346a63c4a6763d776 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -4,7 +4,7 @@ obj-y = sched.o fork.o exec_domain.o panic.o printk.o profile.o \ exit.o itimer.o time.o softirq.o resource.o \ - sysctl.o capability.o ptrace.o timer.o user.o \ + sysctl.o capability.o ptrace.o timer.o user.o user_namespace.o \ signal.o sys.o kmod.o workqueue.o pid.o \ rcupdate.o extable.o params.o posix-timers.o \ kthread.o wait.o kfifo.o sys_ni.o posix-cpu-timers.o mutex.o \ diff --git a/kernel/fork.c b/kernel/fork.c index 4015912aaac22b5a2dc19bbdb1bb20746377b468..13cf0978780abbeec1e5dedc25e4f8abb8f1a8c3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1002,7 +1002,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, if (atomic_read(&p->user->processes) >= p->signal->rlim[RLIMIT_NPROC].rlim_cur) { if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) && - p->user != &root_user) + p->user != current->nsproxy->user_ns->root_user) goto bad_fork_free; } diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c index e38bed75367d6e0c480af576cbd54f1c347a4318..895e3a3f20442e9420464b00889ec16583fea313 100644 --- a/kernel/nsproxy.c +++ b/kernel/nsproxy.c @@ -79,8 +79,15 @@ static struct nsproxy *create_new_namespaces(int flags, struct task_struct *tsk, if (IS_ERR(new_nsp->pid_ns)) goto out_pid; + new_nsp->user_ns = copy_user_ns(flags, tsk->nsproxy->user_ns); + if (IS_ERR(new_nsp->user_ns)) + goto out_user; + return new_nsp; +out_user: + if (new_nsp->pid_ns) + put_pid_ns(new_nsp->pid_ns); out_pid: if (new_nsp->ipc_ns) put_ipc_ns(new_nsp->ipc_ns); @@ -140,6 +147,8 @@ void free_nsproxy(struct nsproxy *ns) put_ipc_ns(ns->ipc_ns); if (ns->pid_ns) put_pid_ns(ns->pid_ns); + if (ns->user_ns) + put_user_ns(ns->user_ns); kfree(ns); } diff --git a/kernel/sys.c b/kernel/sys.c index 872271ccc3843989ad493ae4dc219b38bf30b15e..ed92e2f033422b8cde3601ada74be31633834ef8 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -1078,13 +1079,13 @@ static int set_user(uid_t new_ruid, int dumpclear) { struct user_struct *new_user; - new_user = alloc_uid(new_ruid); + new_user = alloc_uid(current->nsproxy->user_ns, new_ruid); if (!new_user) return -EAGAIN; if (atomic_read(&new_user->processes) >= current->signal->rlim[RLIMIT_NPROC].rlim_cur && - new_user != &root_user) { + new_user != current->nsproxy->user_ns->root_user) { free_uid(new_user); return -EAGAIN; } diff --git a/kernel/user.c b/kernel/user.c index 4869563080e9e080954d26f34b70bbffc13de18b..98b82507797a9f26fd0a2d4ee3d9d22d92928c08 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -14,20 +14,19 @@ #include #include #include +#include +#include /* * UID task count cache, to get fast user lookup in "alloc_uid" * when changing user ID's (ie setuid() and friends). */ -#define UIDHASH_BITS (CONFIG_BASE_SMALL ? 3 : 8) -#define UIDHASH_SZ (1 << UIDHASH_BITS) #define UIDHASH_MASK (UIDHASH_SZ - 1) #define __uidhashfn(uid) (((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK) -#define uidhashentry(uid) (uidhash_table + __uidhashfn((uid))) +#define uidhashentry(ns, uid) ((ns)->uidhash_table + __uidhashfn((uid))) static struct kmem_cache *uid_cachep; -static struct list_head uidhash_table[UIDHASH_SZ]; /* * The uidhash_lock is mostly taken from process context, but it is @@ -94,9 +93,10 @@ struct user_struct *find_user(uid_t uid) { struct user_struct *ret; unsigned long flags; + struct user_namespace *ns = current->nsproxy->user_ns; spin_lock_irqsave(&uidhash_lock, flags); - ret = uid_hash_find(uid, uidhashentry(uid)); + ret = uid_hash_find(uid, uidhashentry(ns, uid)); spin_unlock_irqrestore(&uidhash_lock, flags); return ret; } @@ -120,9 +120,9 @@ void free_uid(struct user_struct *up) } } -struct user_struct * alloc_uid(uid_t uid) +struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid) { - struct list_head *hashent = uidhashentry(uid); + struct list_head *hashent = uidhashentry(ns, uid); struct user_struct *up; spin_lock_irq(&uidhash_lock); @@ -211,11 +211,11 @@ static int __init uid_cache_init(void) 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL, NULL); for(n = 0; n < UIDHASH_SZ; ++n) - INIT_LIST_HEAD(uidhash_table + n); + INIT_LIST_HEAD(init_user_ns.uidhash_table + n); /* Insert the root user immediately (init already runs as root) */ spin_lock_irq(&uidhash_lock); - uid_hash_insert(&root_user, uidhashentry(0)); + uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0)); spin_unlock_irq(&uidhash_lock); return 0; diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c new file mode 100644 index 0000000000000000000000000000000000000000..3d79642097748c0b89ad2ac4c3141ab601489fcb --- /dev/null +++ b/kernel/user_namespace.c @@ -0,0 +1,43 @@ +/* + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2 of the + * License. + */ + +#include +#include +#include +#include + +struct user_namespace init_user_ns = { + .kref = { + .refcount = ATOMIC_INIT(2), + }, + .root_user = &root_user, +}; + +EXPORT_SYMBOL_GPL(init_user_ns); + +#ifdef CONFIG_USER_NS + +struct user_namespace * copy_user_ns(int flags, struct user_namespace *old_ns) +{ + struct user_namespace *new_ns; + + BUG_ON(!old_ns); + get_user_ns(old_ns); + + new_ns = old_ns; + return new_ns; +} + +void free_user_ns(struct kref *kref) +{ + struct user_namespace *ns; + + ns = container_of(kref, struct user_namespace, kref); + kfree(ns); +} + +#endif /* CONFIG_USER_NS */