user.c 12.2 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
/*
 * The "user cache".
 *
 * (C) Copyright 1991-2000 Linus Torvalds
 *
 * We have a per-user structure to keep track of how many
 * processes, files etc the user has claimed, in order to be
 * able to have per-user limits for system resources. 
 */

#include <linux/init.h>
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/bitops.h>
#include <linux/key.h>
16
#include <linux/interrupt.h>
17 18
#include <linux/module.h>
#include <linux/user_namespace.h>
L
Linus Torvalds 已提交
19

20 21 22 23 24 25 26 27
struct user_namespace init_user_ns = {
	.kref = {
		.refcount	= ATOMIC_INIT(2),
	},
	.root_user = &root_user,
};
EXPORT_SYMBOL_GPL(init_user_ns);

L
Linus Torvalds 已提交
28 29 30 31 32 33 34
/*
 * UID task count cache, to get fast user lookup in "alloc_uid"
 * when changing user ID's (ie setuid() and friends).
 */

#define UIDHASH_MASK		(UIDHASH_SZ - 1)
#define __uidhashfn(uid)	(((uid >> UIDHASH_BITS) + uid) & UIDHASH_MASK)
35
#define uidhashentry(ns, uid)	((ns)->uidhash_table + __uidhashfn((uid)))
L
Linus Torvalds 已提交
36

37
static struct kmem_cache *uid_cachep;
38 39 40 41 42

/*
 * The uidhash_lock is mostly taken from process context, but it is
 * occasionally also taken from softirq/tasklet context, when
 * task-structs get RCU-freed. Hence all locking must be softirq-safe.
43 44 45 46
 * But free_uid() is also called with local interrupts disabled, and running
 * local_bh_enable() with local interrupts disabled is an error - we'll run
 * softirq callbacks, and they can unconditionally enable interrupts, and
 * the caller of free_uid() didn't expect that..
47
 */
L
Linus Torvalds 已提交
48 49 50 51 52 53 54 55 56 57 58 59
static DEFINE_SPINLOCK(uidhash_lock);

struct user_struct root_user = {
	.__count	= ATOMIC_INIT(1),
	.processes	= ATOMIC_INIT(1),
	.files		= ATOMIC_INIT(0),
	.sigpending	= ATOMIC_INIT(0),
	.locked_shm     = 0,
#ifdef CONFIG_KEYS
	.uid_keyring	= &root_user_keyring,
	.session_keyring = &root_session_keyring,
#endif
60
#ifdef CONFIG_USER_SCHED
61
	.tg		= &init_task_group,
62
#endif
L
Linus Torvalds 已提交
63 64
};

65 66 67
/*
 * These routines must be called with the uidhash spinlock held!
 */
A
Alexey Dobriyan 已提交
68
static void uid_hash_insert(struct user_struct *up, struct hlist_head *hashent)
69 70 71 72
{
	hlist_add_head(&up->uidhash_node, hashent);
}

A
Alexey Dobriyan 已提交
73
static void uid_hash_remove(struct user_struct *up)
74 75 76 77
{
	hlist_del_init(&up->uidhash_node);
}

A
Alexey Dobriyan 已提交
78
static struct user_struct *uid_hash_find(uid_t uid, struct hlist_head *hashent)
79 80 81 82 83 84 85 86 87 88 89 90 91 92
{
	struct user_struct *user;
	struct hlist_node *h;

	hlist_for_each_entry(user, h, hashent, uidhash_node) {
		if (user->uid == uid) {
			atomic_inc(&user->__count);
			return user;
		}
	}

	return NULL;
}

93
#ifdef CONFIG_USER_SCHED
94

95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
static void sched_destroy_user(struct user_struct *up)
{
	sched_destroy_group(up->tg);
}

static int sched_create_user(struct user_struct *up)
{
	int rc = 0;

	up->tg = sched_create_group();
	if (IS_ERR(up->tg))
		rc = -ENOMEM;

	return rc;
}

static void sched_switch_user(struct task_struct *p)
{
	sched_move_task(p);
}

116
#else	/* CONFIG_USER_SCHED */
D
Dhaval Giani 已提交
117 118 119 120 121

static void sched_destroy_user(struct user_struct *up) { }
static int sched_create_user(struct user_struct *up) { return 0; }
static void sched_switch_user(struct task_struct *p) { }

122
#endif	/* CONFIG_USER_SCHED */
D
Dhaval Giani 已提交
123

124
#if defined(CONFIG_USER_SCHED) && defined(CONFIG_SYSFS)
D
Dhaval Giani 已提交
125

126
static struct kset *uids_kset; /* represents the /sys/kernel/uids/ directory */
D
Dhaval Giani 已提交
127 128
static DEFINE_MUTEX(uids_mutex);

129 130 131 132
static inline void uids_mutex_lock(void)
{
	mutex_lock(&uids_mutex);
}
133

134 135 136 137
static inline void uids_mutex_unlock(void)
{
	mutex_unlock(&uids_mutex);
}
138

139
/* uid directory attributes */
140
#ifdef CONFIG_FAIR_GROUP_SCHED
141 142 143
static ssize_t cpu_shares_show(struct kobject *kobj,
			       struct kobj_attribute *attr,
			       char *buf)
144
{
145
	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
146

147
	return sprintf(buf, "%lu\n", sched_group_shares(up->tg));
148 149
}

150 151 152
static ssize_t cpu_shares_store(struct kobject *kobj,
				struct kobj_attribute *attr,
				const char *buf, size_t size)
153
{
154
	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
155 156 157
	unsigned long shares;
	int rc;

158
	sscanf(buf, "%lu", &shares);
159 160 161 162 163 164

	rc = sched_group_set_shares(up->tg, shares);

	return (rc ? rc : size);
}

165 166
static struct kobj_attribute cpu_share_attr =
	__ATTR(cpu_share, 0644, cpu_shares_show, cpu_shares_store);
167
#endif
168

169
#ifdef CONFIG_RT_GROUP_SCHED
P
Peter Zijlstra 已提交
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195
static ssize_t cpu_rt_runtime_show(struct kobject *kobj,
				   struct kobj_attribute *attr,
				   char *buf)
{
	struct user_struct *up = container_of(kobj, struct user_struct, kobj);

	return sprintf(buf, "%lu\n", sched_group_rt_runtime(up->tg));
}

static ssize_t cpu_rt_runtime_store(struct kobject *kobj,
				    struct kobj_attribute *attr,
				    const char *buf, size_t size)
{
	struct user_struct *up = container_of(kobj, struct user_struct, kobj);
	unsigned long rt_runtime;
	int rc;

	sscanf(buf, "%lu", &rt_runtime);

	rc = sched_group_set_rt_runtime(up->tg, rt_runtime);

	return (rc ? rc : size);
}

static struct kobj_attribute cpu_rt_runtime_attr =
	__ATTR(cpu_rt_runtime, 0644, cpu_rt_runtime_show, cpu_rt_runtime_store);
196
#endif
P
Peter Zijlstra 已提交
197

198 199
/* default attributes per uid directory */
static struct attribute *uids_attributes[] = {
200
#ifdef CONFIG_FAIR_GROUP_SCHED
201
	&cpu_share_attr.attr,
202 203
#endif
#ifdef CONFIG_RT_GROUP_SCHED
P
Peter Zijlstra 已提交
204
	&cpu_rt_runtime_attr.attr,
205
#endif
206 207 208 209 210
	NULL
};

/* the lifetime of user_struct is not managed by the core (now) */
static void uids_release(struct kobject *kobj)
211
{
212
	return;
213 214
}

215 216 217 218 219 220 221 222
static struct kobj_type uids_ktype = {
	.sysfs_ops = &kobj_sysfs_ops,
	.default_attrs = uids_attributes,
	.release = uids_release,
};

/* create /sys/kernel/uids/<uid>/cpu_share file for this user */
static int uids_user_create(struct user_struct *up)
L
Linus Torvalds 已提交
223
{
224
	struct kobject *kobj = &up->kobj;
225 226
	int error;

227 228
	memset(kobj, 0, sizeof(struct kobject));
	kobj->kset = uids_kset;
229 230 231
	error = kobject_init_and_add(kobj, &uids_ktype, NULL, "%d", up->uid);
	if (error) {
		kobject_put(kobj);
232
		goto done;
233
	}
234

235
	kobject_uevent(kobj, KOBJ_ADD);
236 237
done:
	return error;
L
Linus Torvalds 已提交
238 239
}

240
/* create these entries in sysfs:
241 242 243 244
 * 	"/sys/kernel/uids" directory
 * 	"/sys/kernel/uids/0" directory (for root user)
 * 	"/sys/kernel/uids/0/cpu_share" file (for root user)
 */
245
int __init uids_sysfs_init(void)
L
Linus Torvalds 已提交
246
{
247
	uids_kset = kset_create_and_add("uids", NULL, kernel_kobj);
248 249
	if (!uids_kset)
		return -ENOMEM;
250

251
	return uids_user_create(&root_user);
L
Linus Torvalds 已提交
252 253
}

254 255 256 257
/* work function to remove sysfs directory for a user and free up
 * corresponding structures.
 */
static void remove_user_sysfs_dir(struct work_struct *w)
L
Linus Torvalds 已提交
258
{
259 260 261
	struct user_struct *up = container_of(w, struct user_struct, work);
	unsigned long flags;
	int remove_user = 0;
L
Linus Torvalds 已提交
262

263 264 265 266 267 268 269 270 271 272 273 274 275
	/* Make uid_hash_remove() + sysfs_remove_file() + kobject_del()
	 * atomic.
	 */
	uids_mutex_lock();

	local_irq_save(flags);

	if (atomic_dec_and_lock(&up->__count, &uidhash_lock)) {
		uid_hash_remove(up);
		remove_user = 1;
		spin_unlock_irqrestore(&uidhash_lock, flags);
	} else {
		local_irq_restore(flags);
L
Linus Torvalds 已提交
276 277
	}

278 279 280
	if (!remove_user)
		goto done;

281 282 283
	kobject_uevent(&up->kobj, KOBJ_REMOVE);
	kobject_del(&up->kobj);
	kobject_put(&up->kobj);
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305

	sched_destroy_user(up);
	key_put(up->uid_keyring);
	key_put(up->session_keyring);
	kmem_cache_free(uid_cachep, up);

done:
	uids_mutex_unlock();
}

/* IRQs are disabled and uidhash_lock is held upon function entry.
 * IRQ state (as stored in flags) is restored and uidhash_lock released
 * upon function exit.
 */
static inline void free_user(struct user_struct *up, unsigned long flags)
{
	/* restore back the count */
	atomic_inc(&up->__count);
	spin_unlock_irqrestore(&uidhash_lock, flags);

	INIT_WORK(&up->work, remove_user_sysfs_dir);
	schedule_work(&up->work);
L
Linus Torvalds 已提交
306 307
}

308
#else	/* CONFIG_USER_SCHED && CONFIG_SYSFS */
309

310 311
int uids_sysfs_init(void) { return 0; }
static inline int uids_user_create(struct user_struct *up) { return 0; }
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328
static inline void uids_mutex_lock(void) { }
static inline void uids_mutex_unlock(void) { }

/* IRQs are disabled and uidhash_lock is held upon function entry.
 * IRQ state (as stored in flags) is restored and uidhash_lock released
 * upon function exit.
 */
static inline void free_user(struct user_struct *up, unsigned long flags)
{
	uid_hash_remove(up);
	spin_unlock_irqrestore(&uidhash_lock, flags);
	sched_destroy_user(up);
	key_put(up->uid_keyring);
	key_put(up->session_keyring);
	kmem_cache_free(uid_cachep, up);
}

D
Dhaval Giani 已提交
329
#endif
330

L
Linus Torvalds 已提交
331 332 333 334 335 336 337 338 339
/*
 * Locate the user_struct for the passed UID.  If found, take a ref on it.  The
 * caller must undo that ref with free_uid().
 *
 * If the user_struct could not be found, return NULL.
 */
struct user_struct *find_user(uid_t uid)
{
	struct user_struct *ret;
340
	unsigned long flags;
341
	struct user_namespace *ns = current->nsproxy->user_ns;
L
Linus Torvalds 已提交
342

343
	spin_lock_irqsave(&uidhash_lock, flags);
344
	ret = uid_hash_find(uid, uidhashentry(ns, uid));
345
	spin_unlock_irqrestore(&uidhash_lock, flags);
L
Linus Torvalds 已提交
346 347 348 349 350
	return ret;
}

void free_uid(struct user_struct *up)
{
351 352
	unsigned long flags;

353 354 355
	if (!up)
		return;

356
	local_irq_save(flags);
357 358 359
	if (atomic_dec_and_lock(&up->__count, &uidhash_lock))
		free_user(up, flags);
	else
360
		local_irq_restore(flags);
L
Linus Torvalds 已提交
361 362
}

363
struct user_struct * alloc_uid(struct user_namespace *ns, uid_t uid)
L
Linus Torvalds 已提交
364
{
P
Pavel Emelyanov 已提交
365
	struct hlist_head *hashent = uidhashentry(ns, uid);
366
	struct user_struct *up, *new;
L
Linus Torvalds 已提交
367

368
	/* Make uid_hash_find() + uids_user_create() + uid_hash_insert()
369 370 371 372
	 * atomic.
	 */
	uids_mutex_lock();

373
	spin_lock_irq(&uidhash_lock);
L
Linus Torvalds 已提交
374
	up = uid_hash_find(uid, hashent);
375
	spin_unlock_irq(&uidhash_lock);
L
Linus Torvalds 已提交
376 377

	if (!up) {
378
		new = kmem_cache_alloc(uid_cachep, GFP_KERNEL);
379 380
		if (!new)
			goto out_unlock;
381

L
Linus Torvalds 已提交
382 383 384 385 386
		new->uid = uid;
		atomic_set(&new->__count, 1);
		atomic_set(&new->processes, 0);
		atomic_set(&new->files, 0);
		atomic_set(&new->sigpending, 0);
387
#ifdef CONFIG_INOTIFY_USER
R
Robert Love 已提交
388 389 390
		atomic_set(&new->inotify_watches, 0);
		atomic_set(&new->inotify_devs, 0);
#endif
A
Alexey Dobriyan 已提交
391
#ifdef CONFIG_POSIX_MQUEUE
L
Linus Torvalds 已提交
392
		new->mq_bytes = 0;
A
Alexey Dobriyan 已提交
393
#endif
L
Linus Torvalds 已提交
394 395
		new->locked_shm = 0;

396 397
		if (alloc_uid_keyring(new, current) < 0)
			goto out_free_user;
L
Linus Torvalds 已提交
398

399 400
		if (sched_create_user(new) < 0)
			goto out_put_keys;
401

402 403
		if (uids_user_create(new))
			goto out_destoy_sched;
404

L
Linus Torvalds 已提交
405 406 407 408
		/*
		 * Before adding this, check whether we raced
		 * on adding the same user already..
		 */
409
		spin_lock_irq(&uidhash_lock);
L
Linus Torvalds 已提交
410 411
		up = uid_hash_find(uid, hashent);
		if (up) {
412
			/* This case is not possible when CONFIG_USER_SCHED
413 414 415 416
			 * is defined, since we serialize alloc_uid() using
			 * uids_mutex. Hence no need to call
			 * sched_destroy_user() or remove_user_sysfs_dir().
			 */
L
Linus Torvalds 已提交
417 418 419 420 421 422 423
			key_put(new->uid_keyring);
			key_put(new->session_keyring);
			kmem_cache_free(uid_cachep, new);
		} else {
			uid_hash_insert(new, hashent);
			up = new;
		}
424
		spin_unlock_irq(&uidhash_lock);
L
Linus Torvalds 已提交
425 426

	}
427 428 429

	uids_mutex_unlock();

L
Linus Torvalds 已提交
430
	return up;
431 432 433 434 435 436 437 438 439 440 441

out_destoy_sched:
	sched_destroy_user(new);
out_put_keys:
	key_put(new->uid_keyring);
	key_put(new->session_keyring);
out_free_user:
	kmem_cache_free(uid_cachep, new);
out_unlock:
	uids_mutex_unlock();
	return NULL;
L
Linus Torvalds 已提交
442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
}

void switch_uid(struct user_struct *new_user)
{
	struct user_struct *old_user;

	/* What if a process setreuid()'s and this brings the
	 * new uid over his NPROC rlimit?  We can check this now
	 * cheaply with the new uid cache, so if it matters
	 * we should be checking for it.  -DaveM
	 */
	old_user = current->user;
	atomic_inc(&new_user->processes);
	atomic_dec(&old_user->processes);
	switch_uid_keyring(new_user);
	current->user = new_user;
458
	sched_switch_user(current);
459 460 461 462 463 464 465 466 467 468 469

	/*
	 * We need to synchronize with __sigqueue_alloc()
	 * doing a get_uid(p->user).. If that saw the old
	 * user value, we need to wait until it has exited
	 * its critical region before we can free the old
	 * structure.
	 */
	smp_mb();
	spin_unlock_wait(&current->sighand->siglock);

L
Linus Torvalds 已提交
470 471 472 473
	free_uid(old_user);
	suid_keys(current);
}

474
#ifdef CONFIG_USER_NS
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498
void release_uids(struct user_namespace *ns)
{
	int i;
	unsigned long flags;
	struct hlist_head *head;
	struct hlist_node *nd;

	spin_lock_irqsave(&uidhash_lock, flags);
	/*
	 * collapse the chains so that the user_struct-s will
	 * be still alive, but not in hashes. subsequent free_uid()
	 * will free them.
	 */
	for (i = 0; i < UIDHASH_SZ; i++) {
		head = ns->uidhash_table + i;
		while (!hlist_empty(head)) {
			nd = head->first;
			hlist_del_init(nd);
		}
	}
	spin_unlock_irqrestore(&uidhash_lock, flags);

	free_uid(ns->root_user);
}
499
#endif
L
Linus Torvalds 已提交
500 501 502 503 504 505

static int __init uid_cache_init(void)
{
	int n;

	uid_cachep = kmem_cache_create("uid_cache", sizeof(struct user_struct),
506
			0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL);
L
Linus Torvalds 已提交
507 508

	for(n = 0; n < UIDHASH_SZ; ++n)
P
Pavel Emelyanov 已提交
509
		INIT_HLIST_HEAD(init_user_ns.uidhash_table + n);
L
Linus Torvalds 已提交
510 511

	/* Insert the root user immediately (init already runs as root) */
512
	spin_lock_irq(&uidhash_lock);
513
	uid_hash_insert(&root_user, uidhashentry(&init_user_ns, 0));
514
	spin_unlock_irq(&uidhash_lock);
L
Linus Torvalds 已提交
515 516 517 518 519

	return 0;
}

module_init(uid_cache_init);