sys.c 44.1 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
/*
 *  linux/kernel/sys.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 */

#include <linux/module.h>
#include <linux/mm.h>
#include <linux/utsname.h>
#include <linux/mman.h>
#include <linux/reboot.h>
#include <linux/prctl.h>
#include <linux/highuid.h>
#include <linux/fs.h>
15
#include <linux/perf_event.h>
16
#include <linux/resource.h>
17 18
#include <linux/kernel.h>
#include <linux/kexec.h>
L
Linus Torvalds 已提交
19
#include <linux/workqueue.h>
20
#include <linux/capability.h>
L
Linus Torvalds 已提交
21 22 23 24 25 26 27 28
#include <linux/device.h>
#include <linux/key.h>
#include <linux/times.h>
#include <linux/posix-timers.h>
#include <linux/security.h>
#include <linux/dcookies.h>
#include <linux/suspend.h>
#include <linux/tty.h>
29
#include <linux/signal.h>
M
Matt Helsley 已提交
30
#include <linux/cn_proc.h>
31
#include <linux/getcpu.h>
32
#include <linux/task_io_accounting_ops.h>
33
#include <linux/seccomp.h>
M
Mark Lord 已提交
34
#include <linux/cpu.h>
35
#include <linux/personality.h>
36
#include <linux/ptrace.h>
37
#include <linux/fs_struct.h>
38
#include <linux/gfp.h>
39
#include <linux/syscore_ops.h>
40 41
#include <linux/version.h>
#include <linux/ctype.h>
L
Linus Torvalds 已提交
42 43 44

#include <linux/compat.h>
#include <linux/syscalls.h>
45
#include <linux/kprobes.h>
46
#include <linux/user_namespace.h>
L
Linus Torvalds 已提交
47

48
#include <linux/kmsg_dump.h>
49 50
/* Move somewhere else to avoid recompiling? */
#include <generated/utsrelease.h>
51

L
Linus Torvalds 已提交
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73
#include <asm/uaccess.h>
#include <asm/io.h>
#include <asm/unistd.h>

#ifndef SET_UNALIGN_CTL
# define SET_UNALIGN_CTL(a,b)	(-EINVAL)
#endif
#ifndef GET_UNALIGN_CTL
# define GET_UNALIGN_CTL(a,b)	(-EINVAL)
#endif
#ifndef SET_FPEMU_CTL
# define SET_FPEMU_CTL(a,b)	(-EINVAL)
#endif
#ifndef GET_FPEMU_CTL
# define GET_FPEMU_CTL(a,b)	(-EINVAL)
#endif
#ifndef SET_FPEXC_CTL
# define SET_FPEXC_CTL(a,b)	(-EINVAL)
#endif
#ifndef GET_FPEXC_CTL
# define GET_FPEXC_CTL(a,b)	(-EINVAL)
#endif
74 75 76 77 78 79
#ifndef GET_ENDIAN
# define GET_ENDIAN(a,b)	(-EINVAL)
#endif
#ifndef SET_ENDIAN
# define SET_ENDIAN(a,b)	(-EINVAL)
#endif
80 81 82 83 84 85
#ifndef GET_TSC_CTL
# define GET_TSC_CTL(a)		(-EINVAL)
#endif
#ifndef SET_TSC_CTL
# define SET_TSC_CTL(a)		(-EINVAL)
#endif
L
Linus Torvalds 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115

/*
 * this is where the system-wide overflow UID and GID are defined, for
 * architectures that now have 32-bit UID/GID but didn't in the past
 */

int overflowuid = DEFAULT_OVERFLOWUID;
int overflowgid = DEFAULT_OVERFLOWGID;

#ifdef CONFIG_UID16
EXPORT_SYMBOL(overflowuid);
EXPORT_SYMBOL(overflowgid);
#endif

/*
 * the same as above, but for filesystems which can only store a 16-bit
 * UID and GID. as such, this is needed on all architectures
 */

int fs_overflowuid = DEFAULT_FS_OVERFLOWUID;
int fs_overflowgid = DEFAULT_FS_OVERFLOWUID;

EXPORT_SYMBOL(fs_overflowuid);
EXPORT_SYMBOL(fs_overflowgid);

/*
 * this indicates whether you can reboot with ctrl-alt-del: the default is yes
 */

int C_A_D = 1;
116 117
struct pid *cad_pid;
EXPORT_SYMBOL(cad_pid);
L
Linus Torvalds 已提交
118

119 120 121 122 123 124
/*
 * If set, this is used for preparing the system to power off.
 */

void (*pm_power_off_prepare)(void);

125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
/*
 * Returns true if current's euid is same as p's uid or euid,
 * or has CAP_SYS_NICE to p's user_ns.
 *
 * Called with rcu_read_lock, creds are safe
 */
static bool set_one_prio_perm(struct task_struct *p)
{
	const struct cred *cred = current_cred(), *pcred = __task_cred(p);

	if (pcred->user->user_ns == cred->user->user_ns &&
	    (pcred->uid  == cred->euid ||
	     pcred->euid == cred->euid))
		return true;
	if (ns_capable(pcred->user->user_ns, CAP_SYS_NICE))
		return true;
	return false;
}

144 145 146 147
/*
 * set the priority of a task
 * - the caller must hold the RCU read lock
 */
L
Linus Torvalds 已提交
148 149 150 151
static int set_one_prio(struct task_struct *p, int niceval, int error)
{
	int no_nice;

152
	if (!set_one_prio_perm(p)) {
L
Linus Torvalds 已提交
153 154 155
		error = -EPERM;
		goto out;
	}
M
Matt Mackall 已提交
156
	if (niceval < task_nice(p) && !can_nice(p, niceval)) {
L
Linus Torvalds 已提交
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
		error = -EACCES;
		goto out;
	}
	no_nice = security_task_setnice(p, niceval);
	if (no_nice) {
		error = no_nice;
		goto out;
	}
	if (error == -ESRCH)
		error = 0;
	set_user_nice(p, niceval);
out:
	return error;
}

172
SYSCALL_DEFINE3(setpriority, int, which, int, who, int, niceval)
L
Linus Torvalds 已提交
173 174 175
{
	struct task_struct *g, *p;
	struct user_struct *user;
176
	const struct cred *cred = current_cred();
L
Linus Torvalds 已提交
177
	int error = -EINVAL;
178
	struct pid *pgrp;
L
Linus Torvalds 已提交
179

180
	if (which > PRIO_USER || which < PRIO_PROCESS)
L
Linus Torvalds 已提交
181 182 183 184 185 186 187 188 189
		goto out;

	/* normalize: avoid signed division (rounding problems) */
	error = -ESRCH;
	if (niceval < -20)
		niceval = -20;
	if (niceval > 19)
		niceval = 19;

190
	rcu_read_lock();
L
Linus Torvalds 已提交
191 192 193
	read_lock(&tasklist_lock);
	switch (which) {
		case PRIO_PROCESS:
194
			if (who)
195
				p = find_task_by_vpid(who);
196 197
			else
				p = current;
L
Linus Torvalds 已提交
198 199 200 201
			if (p)
				error = set_one_prio(p, niceval, error);
			break;
		case PRIO_PGRP:
202
			if (who)
203
				pgrp = find_vpid(who);
204 205
			else
				pgrp = task_pgrp(current);
206
			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
L
Linus Torvalds 已提交
207
				error = set_one_prio(p, niceval, error);
208
			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
L
Linus Torvalds 已提交
209 210
			break;
		case PRIO_USER:
D
David Howells 已提交
211
			user = (struct user_struct *) cred->user;
L
Linus Torvalds 已提交
212
			if (!who)
213 214 215 216
				who = cred->uid;
			else if ((who != cred->uid) &&
				 !(user = find_user(who)))
				goto out_unlock;	/* No processes for this user */
L
Linus Torvalds 已提交
217

218
			do_each_thread(g, p) {
219
				if (__task_cred(p)->uid == who)
L
Linus Torvalds 已提交
220
					error = set_one_prio(p, niceval, error);
221
			} while_each_thread(g, p);
222
			if (who != cred->uid)
L
Linus Torvalds 已提交
223 224 225 226 227
				free_uid(user);		/* For find_user() */
			break;
	}
out_unlock:
	read_unlock(&tasklist_lock);
228
	rcu_read_unlock();
L
Linus Torvalds 已提交
229 230 231 232 233 234 235 236 237 238
out:
	return error;
}

/*
 * Ugh. To avoid negative return values, "getpriority()" will
 * not return the normal nice-value, but a negated value that
 * has been offset by 20 (ie it returns 40..1 instead of -20..19)
 * to stay compatible.
 */
239
SYSCALL_DEFINE2(getpriority, int, which, int, who)
L
Linus Torvalds 已提交
240 241 242
{
	struct task_struct *g, *p;
	struct user_struct *user;
243
	const struct cred *cred = current_cred();
L
Linus Torvalds 已提交
244
	long niceval, retval = -ESRCH;
245
	struct pid *pgrp;
L
Linus Torvalds 已提交
246

247
	if (which > PRIO_USER || which < PRIO_PROCESS)
L
Linus Torvalds 已提交
248 249
		return -EINVAL;

250
	rcu_read_lock();
L
Linus Torvalds 已提交
251 252 253
	read_lock(&tasklist_lock);
	switch (which) {
		case PRIO_PROCESS:
254
			if (who)
255
				p = find_task_by_vpid(who);
256 257
			else
				p = current;
L
Linus Torvalds 已提交
258 259 260 261 262 263 264
			if (p) {
				niceval = 20 - task_nice(p);
				if (niceval > retval)
					retval = niceval;
			}
			break;
		case PRIO_PGRP:
265
			if (who)
266
				pgrp = find_vpid(who);
267 268
			else
				pgrp = task_pgrp(current);
269
			do_each_pid_thread(pgrp, PIDTYPE_PGID, p) {
L
Linus Torvalds 已提交
270 271 272
				niceval = 20 - task_nice(p);
				if (niceval > retval)
					retval = niceval;
273
			} while_each_pid_thread(pgrp, PIDTYPE_PGID, p);
L
Linus Torvalds 已提交
274 275
			break;
		case PRIO_USER:
276
			user = (struct user_struct *) cred->user;
L
Linus Torvalds 已提交
277
			if (!who)
278 279 280 281
				who = cred->uid;
			else if ((who != cred->uid) &&
				 !(user = find_user(who)))
				goto out_unlock;	/* No processes for this user */
L
Linus Torvalds 已提交
282

283
			do_each_thread(g, p) {
284
				if (__task_cred(p)->uid == who) {
L
Linus Torvalds 已提交
285 286 287 288
					niceval = 20 - task_nice(p);
					if (niceval > retval)
						retval = niceval;
				}
289
			} while_each_thread(g, p);
290
			if (who != cred->uid)
L
Linus Torvalds 已提交
291 292 293 294 295
				free_uid(user);		/* for find_user() */
			break;
	}
out_unlock:
	read_unlock(&tasklist_lock);
296
	rcu_read_unlock();
L
Linus Torvalds 已提交
297 298 299 300

	return retval;
}

301 302 303 304 305 306 307 308
/**
 *	emergency_restart - reboot the system
 *
 *	Without shutting down any hardware or taking any locks
 *	reboot the system.  This is called when we know we are in
 *	trouble so this is our best effort to reboot.  This is
 *	safe to call in interrupt context.
 */
309 310
void emergency_restart(void)
{
311
	kmsg_dump(KMSG_DUMP_EMERG);
312 313 314 315
	machine_emergency_restart();
}
EXPORT_SYMBOL_GPL(emergency_restart);

316
void kernel_restart_prepare(char *cmd)
317
{
318
	blocking_notifier_call_chain(&reboot_notifier_list, SYS_RESTART, cmd);
319
	system_state = SYSTEM_RESTART;
320
	usermodehelper_disable();
321
	device_shutdown();
322
	syscore_shutdown();
323
}
324

325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
/**
 *	register_reboot_notifier - Register function to be called at reboot time
 *	@nb: Info about notifier function to be called
 *
 *	Registers a function with the list of functions
 *	to be called at reboot time.
 *
 *	Currently always returns zero, as blocking_notifier_chain_register()
 *	always returns zero.
 */
int register_reboot_notifier(struct notifier_block *nb)
{
	return blocking_notifier_chain_register(&reboot_notifier_list, nb);
}
EXPORT_SYMBOL(register_reboot_notifier);

/**
 *	unregister_reboot_notifier - Unregister previously registered reboot notifier
 *	@nb: Hook to be unregistered
 *
 *	Unregisters a previously registered reboot
 *	notifier function.
 *
 *	Returns zero on success, or %-ENOENT on failure.
 */
int unregister_reboot_notifier(struct notifier_block *nb)
{
	return blocking_notifier_chain_unregister(&reboot_notifier_list, nb);
}
EXPORT_SYMBOL(unregister_reboot_notifier);

356 357 358
/**
 *	kernel_restart - reboot the system
 *	@cmd: pointer to buffer containing command to execute for restart
359
 *		or %NULL
360 361 362 363
 *
 *	Shutdown everything and perform a clean reboot.
 *	This is not safe to call in interrupt context.
 */
364 365 366
void kernel_restart(char *cmd)
{
	kernel_restart_prepare(cmd);
367
	if (!cmd)
368
		printk(KERN_EMERG "Restarting system.\n");
369
	else
370
		printk(KERN_EMERG "Restarting system with command '%s'.\n", cmd);
371
	kmsg_dump(KMSG_DUMP_RESTART);
372 373 374 375
	machine_restart(cmd);
}
EXPORT_SYMBOL_GPL(kernel_restart);

376
static void kernel_shutdown_prepare(enum system_states state)
377
{
378
	blocking_notifier_call_chain(&reboot_notifier_list,
379 380
		(state == SYSTEM_HALT)?SYS_HALT:SYS_POWER_OFF, NULL);
	system_state = state;
381
	usermodehelper_disable();
382 383
	device_shutdown();
}
384 385 386 387 388 389 390
/**
 *	kernel_halt - halt the system
 *
 *	Shutdown everything and perform a clean system halt.
 */
void kernel_halt(void)
{
391
	kernel_shutdown_prepare(SYSTEM_HALT);
392
	syscore_shutdown();
393
	printk(KERN_EMERG "System halted.\n");
394
	kmsg_dump(KMSG_DUMP_HALT);
395 396
	machine_halt();
}
397

398 399
EXPORT_SYMBOL_GPL(kernel_halt);

400 401 402 403 404 405 406
/**
 *	kernel_power_off - power_off the system
 *
 *	Shutdown everything and perform a clean system power_off.
 */
void kernel_power_off(void)
{
407
	kernel_shutdown_prepare(SYSTEM_POWER_OFF);
408 409
	if (pm_power_off_prepare)
		pm_power_off_prepare();
M
Mark Lord 已提交
410
	disable_nonboot_cpus();
411
	syscore_shutdown();
412
	printk(KERN_EMERG "Power down.\n");
413
	kmsg_dump(KMSG_DUMP_POWEROFF);
414 415 416
	machine_power_off();
}
EXPORT_SYMBOL_GPL(kernel_power_off);
T
Thomas Gleixner 已提交
417 418 419

static DEFINE_MUTEX(reboot_mutex);

L
Linus Torvalds 已提交
420 421 422 423 424 425 426 427
/*
 * Reboot system call: for obvious reasons only root may call it,
 * and even root needs to set up some magic numbers in the registers
 * so that some mistake won't make this reboot the whole machine.
 * You can also set the meaning of the ctrl-alt-del-key here.
 *
 * reboot doesn't sync: do that yourself before calling this.
 */
428 429
SYSCALL_DEFINE4(reboot, int, magic1, int, magic2, unsigned int, cmd,
		void __user *, arg)
L
Linus Torvalds 已提交
430 431
{
	char buffer[256];
432
	int ret = 0;
L
Linus Torvalds 已提交
433 434 435 436 437 438 439 440 441 442 443 444 445

	/* We only trust the superuser with rebooting the system. */
	if (!capable(CAP_SYS_BOOT))
		return -EPERM;

	/* For safety, we require "magic" arguments. */
	if (magic1 != LINUX_REBOOT_MAGIC1 ||
	    (magic2 != LINUX_REBOOT_MAGIC2 &&
	                magic2 != LINUX_REBOOT_MAGIC2A &&
			magic2 != LINUX_REBOOT_MAGIC2B &&
	                magic2 != LINUX_REBOOT_MAGIC2C))
		return -EINVAL;

446 447 448 449 450 451
	/* Instead of trying to make the power_off code look like
	 * halt when pm_power_off is not set do it the easy way.
	 */
	if ((cmd == LINUX_REBOOT_CMD_POWER_OFF) && !pm_power_off)
		cmd = LINUX_REBOOT_CMD_HALT;

T
Thomas Gleixner 已提交
452
	mutex_lock(&reboot_mutex);
L
Linus Torvalds 已提交
453 454
	switch (cmd) {
	case LINUX_REBOOT_CMD_RESTART:
455
		kernel_restart(NULL);
L
Linus Torvalds 已提交
456 457 458 459 460 461 462 463 464 465 466
		break;

	case LINUX_REBOOT_CMD_CAD_ON:
		C_A_D = 1;
		break;

	case LINUX_REBOOT_CMD_CAD_OFF:
		C_A_D = 0;
		break;

	case LINUX_REBOOT_CMD_HALT:
467
		kernel_halt();
L
Linus Torvalds 已提交
468
		do_exit(0);
469
		panic("cannot halt");
L
Linus Torvalds 已提交
470 471

	case LINUX_REBOOT_CMD_POWER_OFF:
472
		kernel_power_off();
L
Linus Torvalds 已提交
473 474 475 476 477
		do_exit(0);
		break;

	case LINUX_REBOOT_CMD_RESTART2:
		if (strncpy_from_user(&buffer[0], arg, sizeof(buffer) - 1) < 0) {
T
Thomas Gleixner 已提交
478 479
			ret = -EFAULT;
			break;
L
Linus Torvalds 已提交
480 481 482
		}
		buffer[sizeof(buffer) - 1] = '\0';

483
		kernel_restart(buffer);
L
Linus Torvalds 已提交
484 485
		break;

H
Huang Ying 已提交
486
#ifdef CONFIG_KEXEC
487
	case LINUX_REBOOT_CMD_KEXEC:
488 489
		ret = kernel_kexec();
		break;
H
Huang Ying 已提交
490
#endif
491

492
#ifdef CONFIG_HIBERNATION
L
Linus Torvalds 已提交
493
	case LINUX_REBOOT_CMD_SW_SUSPEND:
494 495
		ret = hibernate();
		break;
L
Linus Torvalds 已提交
496 497 498
#endif

	default:
499 500
		ret = -EINVAL;
		break;
L
Linus Torvalds 已提交
501
	}
T
Thomas Gleixner 已提交
502
	mutex_unlock(&reboot_mutex);
503
	return ret;
L
Linus Torvalds 已提交
504 505
}

506
static void deferred_cad(struct work_struct *dummy)
L
Linus Torvalds 已提交
507
{
508
	kernel_restart(NULL);
L
Linus Torvalds 已提交
509 510 511 512 513 514 515 516 517
}

/*
 * This function gets called by ctrl-alt-del - ie the keyboard interrupt.
 * As it's called within an interrupt, it may NOT sync: the only choice
 * is whether to reboot at once, or just ignore the ctrl-alt-del.
 */
void ctrl_alt_del(void)
{
518
	static DECLARE_WORK(cad_work, deferred_cad);
L
Linus Torvalds 已提交
519 520 521 522

	if (C_A_D)
		schedule_work(&cad_work);
	else
523
		kill_cad_pid(SIGINT, 1);
L
Linus Torvalds 已提交
524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543
}
	
/*
 * Unprivileged users may change the real gid to the effective gid
 * or vice versa.  (BSD-style)
 *
 * If you set the real gid at all, or set the effective gid to a value not
 * equal to the real gid, then the saved gid is set to the new effective gid.
 *
 * This makes it possible for a setgid program to completely drop its
 * privileges, which is often a useful assertion to make when you are doing
 * a security audit over a program.
 *
 * The general idea is that a program which uses just setregid() will be
 * 100% compatible with BSD.  A program which uses just setgid() will be
 * 100% compatible with POSIX with saved IDs. 
 *
 * SMP: There are not races, the GIDs are checked only by filesystem
 *      operations (as far as semantic preservation is concerned).
 */
544
SYSCALL_DEFINE2(setregid, gid_t, rgid, gid_t, egid)
L
Linus Torvalds 已提交
545
{
D
David Howells 已提交
546 547
	const struct cred *old;
	struct cred *new;
L
Linus Torvalds 已提交
548 549
	int retval;

D
David Howells 已提交
550 551 552 553 554 555
	new = prepare_creds();
	if (!new)
		return -ENOMEM;
	old = current_cred();

	retval = -EPERM;
L
Linus Torvalds 已提交
556
	if (rgid != (gid_t) -1) {
D
David Howells 已提交
557 558
		if (old->gid == rgid ||
		    old->egid == rgid ||
559
		    nsown_capable(CAP_SETGID))
D
David Howells 已提交
560
			new->gid = rgid;
L
Linus Torvalds 已提交
561
		else
D
David Howells 已提交
562
			goto error;
L
Linus Torvalds 已提交
563 564
	}
	if (egid != (gid_t) -1) {
D
David Howells 已提交
565 566 567
		if (old->gid == egid ||
		    old->egid == egid ||
		    old->sgid == egid ||
568
		    nsown_capable(CAP_SETGID))
D
David Howells 已提交
569
			new->egid = egid;
570
		else
D
David Howells 已提交
571
			goto error;
L
Linus Torvalds 已提交
572
	}
D
David Howells 已提交
573

L
Linus Torvalds 已提交
574
	if (rgid != (gid_t) -1 ||
D
David Howells 已提交
575 576 577 578 579 580 581 582 583
	    (egid != (gid_t) -1 && egid != old->gid))
		new->sgid = new->egid;
	new->fsgid = new->egid;

	return commit_creds(new);

error:
	abort_creds(new);
	return retval;
L
Linus Torvalds 已提交
584 585 586 587 588 589 590
}

/*
 * setgid() is implemented like SysV w/ SAVED_IDS 
 *
 * SMP: Same implicit races as above.
 */
591
SYSCALL_DEFINE1(setgid, gid_t, gid)
L
Linus Torvalds 已提交
592
{
D
David Howells 已提交
593 594
	const struct cred *old;
	struct cred *new;
L
Linus Torvalds 已提交
595 596
	int retval;

D
David Howells 已提交
597 598 599 600 601 602
	new = prepare_creds();
	if (!new)
		return -ENOMEM;
	old = current_cred();

	retval = -EPERM;
603
	if (nsown_capable(CAP_SETGID))
D
David Howells 已提交
604 605 606
		new->gid = new->egid = new->sgid = new->fsgid = gid;
	else if (gid == old->gid || gid == old->sgid)
		new->egid = new->fsgid = gid;
L
Linus Torvalds 已提交
607
	else
D
David Howells 已提交
608
		goto error;
L
Linus Torvalds 已提交
609

D
David Howells 已提交
610 611 612 613 614
	return commit_creds(new);

error:
	abort_creds(new);
	return retval;
L
Linus Torvalds 已提交
615
}
616

D
David Howells 已提交
617 618 619 620
/*
 * change the user struct in a credentials set to match the new UID
 */
static int set_user(struct cred *new)
L
Linus Torvalds 已提交
621 622 623
{
	struct user_struct *new_user;

624
	new_user = alloc_uid(current_user_ns(), new->uid);
L
Linus Torvalds 已提交
625 626 627
	if (!new_user)
		return -EAGAIN;

628 629 630 631 632 633 634
	/*
	 * We don't fail in case of NPROC limit excess here because too many
	 * poorly written programs don't check set*uid() return code, assuming
	 * it never fails if called by root.  We may still enforce NPROC limit
	 * for programs doing set*uid()+execve() by harmlessly deferring the
	 * failure to the execve() stage.
	 */
635
	if (atomic_read(&new_user->processes) >= rlimit(RLIMIT_NPROC) &&
636 637 638 639
			new_user != INIT_USER)
		current->flags |= PF_NPROC_EXCEEDED;
	else
		current->flags &= ~PF_NPROC_EXCEEDED;
L
Linus Torvalds 已提交
640

D
David Howells 已提交
641 642
	free_uid(new->user);
	new->user = new_user;
L
Linus Torvalds 已提交
643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
	return 0;
}

/*
 * Unprivileged users may change the real uid to the effective uid
 * or vice versa.  (BSD-style)
 *
 * If you set the real uid at all, or set the effective uid to a value not
 * equal to the real uid, then the saved uid is set to the new effective uid.
 *
 * This makes it possible for a setuid program to completely drop its
 * privileges, which is often a useful assertion to make when you are doing
 * a security audit over a program.
 *
 * The general idea is that a program which uses just setreuid() will be
 * 100% compatible with BSD.  A program which uses just setuid() will be
 * 100% compatible with POSIX with saved IDs. 
 */
661
SYSCALL_DEFINE2(setreuid, uid_t, ruid, uid_t, euid)
L
Linus Torvalds 已提交
662
{
D
David Howells 已提交
663 664
	const struct cred *old;
	struct cred *new;
L
Linus Torvalds 已提交
665 666
	int retval;

D
David Howells 已提交
667 668 669 670 671 672
	new = prepare_creds();
	if (!new)
		return -ENOMEM;
	old = current_cred();

	retval = -EPERM;
L
Linus Torvalds 已提交
673
	if (ruid != (uid_t) -1) {
D
David Howells 已提交
674 675 676
		new->uid = ruid;
		if (old->uid != ruid &&
		    old->euid != ruid &&
677
		    !nsown_capable(CAP_SETUID))
D
David Howells 已提交
678
			goto error;
L
Linus Torvalds 已提交
679 680 681
	}

	if (euid != (uid_t) -1) {
D
David Howells 已提交
682 683 684 685
		new->euid = euid;
		if (old->uid != euid &&
		    old->euid != euid &&
		    old->suid != euid &&
686
		    !nsown_capable(CAP_SETUID))
D
David Howells 已提交
687
			goto error;
L
Linus Torvalds 已提交
688 689
	}

690 691 692 693 694
	if (new->uid != old->uid) {
		retval = set_user(new);
		if (retval < 0)
			goto error;
	}
L
Linus Torvalds 已提交
695
	if (ruid != (uid_t) -1 ||
D
David Howells 已提交
696 697 698
	    (euid != (uid_t) -1 && euid != old->uid))
		new->suid = new->euid;
	new->fsuid = new->euid;
L
Linus Torvalds 已提交
699

D
David Howells 已提交
700 701 702
	retval = security_task_fix_setuid(new, old, LSM_SETID_RE);
	if (retval < 0)
		goto error;
L
Linus Torvalds 已提交
703

D
David Howells 已提交
704
	return commit_creds(new);
L
Linus Torvalds 已提交
705

D
David Howells 已提交
706 707 708 709
error:
	abort_creds(new);
	return retval;
}
L
Linus Torvalds 已提交
710 711 712 713 714 715 716 717 718 719 720 721
		
/*
 * setuid() is implemented like SysV with SAVED_IDS 
 * 
 * Note that SAVED_ID's is deficient in that a setuid root program
 * like sendmail, for example, cannot set its uid to be a normal 
 * user and then switch back, because if you're root, setuid() sets
 * the saved uid too.  If you don't like this, blame the bright people
 * in the POSIX committee and/or USG.  Note that the BSD-style setreuid()
 * will allow a root program to temporarily drop privileges and be able to
 * regain them by swapping the real and effective uid.  
 */
722
SYSCALL_DEFINE1(setuid, uid_t, uid)
L
Linus Torvalds 已提交
723
{
D
David Howells 已提交
724 725
	const struct cred *old;
	struct cred *new;
L
Linus Torvalds 已提交
726 727
	int retval;

D
David Howells 已提交
728 729 730 731 732 733
	new = prepare_creds();
	if (!new)
		return -ENOMEM;
	old = current_cred();

	retval = -EPERM;
734
	if (nsown_capable(CAP_SETUID)) {
D
David Howells 已提交
735
		new->suid = new->uid = uid;
736 737 738 739
		if (uid != old->uid) {
			retval = set_user(new);
			if (retval < 0)
				goto error;
D
David Howells 已提交
740 741 742
		}
	} else if (uid != old->uid && uid != new->suid) {
		goto error;
L
Linus Torvalds 已提交
743 744
	}

D
David Howells 已提交
745 746 747 748 749
	new->fsuid = new->euid = uid;

	retval = security_task_fix_setuid(new, old, LSM_SETID_ID);
	if (retval < 0)
		goto error;
L
Linus Torvalds 已提交
750

D
David Howells 已提交
751
	return commit_creds(new);
L
Linus Torvalds 已提交
752

D
David Howells 已提交
753 754 755
error:
	abort_creds(new);
	return retval;
L
Linus Torvalds 已提交
756 757 758 759 760 761 762
}


/*
 * This function implements a generic ability to update ruid, euid,
 * and suid.  This allows you to implement the 4.4 compatible seteuid().
 */
763
SYSCALL_DEFINE3(setresuid, uid_t, ruid, uid_t, euid, uid_t, suid)
L
Linus Torvalds 已提交
764
{
D
David Howells 已提交
765 766
	const struct cred *old;
	struct cred *new;
L
Linus Torvalds 已提交
767 768
	int retval;

D
David Howells 已提交
769 770 771 772 773
	new = prepare_creds();
	if (!new)
		return -ENOMEM;

	old = current_cred();
L
Linus Torvalds 已提交
774

D
David Howells 已提交
775
	retval = -EPERM;
776
	if (!nsown_capable(CAP_SETUID)) {
D
David Howells 已提交
777 778 779 780 781 782 783 784 785
		if (ruid != (uid_t) -1 && ruid != old->uid &&
		    ruid != old->euid  && ruid != old->suid)
			goto error;
		if (euid != (uid_t) -1 && euid != old->uid &&
		    euid != old->euid  && euid != old->suid)
			goto error;
		if (suid != (uid_t) -1 && suid != old->uid &&
		    suid != old->euid  && suid != old->suid)
			goto error;
L
Linus Torvalds 已提交
786
	}
D
David Howells 已提交
787

L
Linus Torvalds 已提交
788
	if (ruid != (uid_t) -1) {
D
David Howells 已提交
789
		new->uid = ruid;
790 791 792 793 794
		if (ruid != old->uid) {
			retval = set_user(new);
			if (retval < 0)
				goto error;
		}
L
Linus Torvalds 已提交
795
	}
D
David Howells 已提交
796 797
	if (euid != (uid_t) -1)
		new->euid = euid;
L
Linus Torvalds 已提交
798
	if (suid != (uid_t) -1)
D
David Howells 已提交
799 800
		new->suid = suid;
	new->fsuid = new->euid;
L
Linus Torvalds 已提交
801

D
David Howells 已提交
802 803 804
	retval = security_task_fix_setuid(new, old, LSM_SETID_RES);
	if (retval < 0)
		goto error;
L
Linus Torvalds 已提交
805

D
David Howells 已提交
806
	return commit_creds(new);
L
Linus Torvalds 已提交
807

D
David Howells 已提交
808 809 810
error:
	abort_creds(new);
	return retval;
L
Linus Torvalds 已提交
811 812
}

813
SYSCALL_DEFINE3(getresuid, uid_t __user *, ruid, uid_t __user *, euid, uid_t __user *, suid)
L
Linus Torvalds 已提交
814
{
815
	const struct cred *cred = current_cred();
L
Linus Torvalds 已提交
816 817
	int retval;

818 819
	if (!(retval   = put_user(cred->uid,  ruid)) &&
	    !(retval   = put_user(cred->euid, euid)))
820
		retval = put_user(cred->suid, suid);
L
Linus Torvalds 已提交
821 822 823 824 825 826 827

	return retval;
}

/*
 * Same as above, but for rgid, egid, sgid.
 */
828
SYSCALL_DEFINE3(setresgid, gid_t, rgid, gid_t, egid, gid_t, sgid)
L
Linus Torvalds 已提交
829
{
D
David Howells 已提交
830 831
	const struct cred *old;
	struct cred *new;
L
Linus Torvalds 已提交
832 833
	int retval;

D
David Howells 已提交
834 835 836 837 838 839
	new = prepare_creds();
	if (!new)
		return -ENOMEM;
	old = current_cred();

	retval = -EPERM;
840
	if (!nsown_capable(CAP_SETGID)) {
D
David Howells 已提交
841 842 843 844 845 846 847 848 849
		if (rgid != (gid_t) -1 && rgid != old->gid &&
		    rgid != old->egid  && rgid != old->sgid)
			goto error;
		if (egid != (gid_t) -1 && egid != old->gid &&
		    egid != old->egid  && egid != old->sgid)
			goto error;
		if (sgid != (gid_t) -1 && sgid != old->gid &&
		    sgid != old->egid  && sgid != old->sgid)
			goto error;
L
Linus Torvalds 已提交
850
	}
D
David Howells 已提交
851

L
Linus Torvalds 已提交
852
	if (rgid != (gid_t) -1)
D
David Howells 已提交
853 854 855
		new->gid = rgid;
	if (egid != (gid_t) -1)
		new->egid = egid;
L
Linus Torvalds 已提交
856
	if (sgid != (gid_t) -1)
D
David Howells 已提交
857 858
		new->sgid = sgid;
	new->fsgid = new->egid;
L
Linus Torvalds 已提交
859

D
David Howells 已提交
860 861 862 863 864
	return commit_creds(new);

error:
	abort_creds(new);
	return retval;
L
Linus Torvalds 已提交
865 866
}

867
SYSCALL_DEFINE3(getresgid, gid_t __user *, rgid, gid_t __user *, egid, gid_t __user *, sgid)
L
Linus Torvalds 已提交
868
{
869
	const struct cred *cred = current_cred();
L
Linus Torvalds 已提交
870 871
	int retval;

872 873
	if (!(retval   = put_user(cred->gid,  rgid)) &&
	    !(retval   = put_user(cred->egid, egid)))
874
		retval = put_user(cred->sgid, sgid);
L
Linus Torvalds 已提交
875 876 877 878 879 880 881 882 883 884 885

	return retval;
}


/*
 * "setfsuid()" sets the fsuid - the uid used for filesystem checks. This
 * is used for "access()" and for the NFS daemon (letting nfsd stay at
 * whatever uid it wants to). It normally shadows "euid", except when
 * explicitly set by setfsuid() or for access..
 */
886
SYSCALL_DEFINE1(setfsuid, uid_t, uid)
L
Linus Torvalds 已提交
887
{
D
David Howells 已提交
888 889 890
	const struct cred *old;
	struct cred *new;
	uid_t old_fsuid;
L
Linus Torvalds 已提交
891

D
David Howells 已提交
892 893 894 895 896
	new = prepare_creds();
	if (!new)
		return current_fsuid();
	old = current_cred();
	old_fsuid = old->fsuid;
L
Linus Torvalds 已提交
897

D
David Howells 已提交
898 899
	if (uid == old->uid  || uid == old->euid  ||
	    uid == old->suid || uid == old->fsuid ||
900
	    nsown_capable(CAP_SETUID)) {
901
		if (uid != old_fsuid) {
D
David Howells 已提交
902 903 904
			new->fsuid = uid;
			if (security_task_fix_setuid(new, old, LSM_SETID_FS) == 0)
				goto change_okay;
L
Linus Torvalds 已提交
905 906 907
		}
	}

D
David Howells 已提交
908 909
	abort_creds(new);
	return old_fsuid;
L
Linus Torvalds 已提交
910

D
David Howells 已提交
911 912
change_okay:
	commit_creds(new);
L
Linus Torvalds 已提交
913 914 915 916
	return old_fsuid;
}

/*
917
 * Samma på svenska..
L
Linus Torvalds 已提交
918
 */
919
SYSCALL_DEFINE1(setfsgid, gid_t, gid)
L
Linus Torvalds 已提交
920
{
D
David Howells 已提交
921 922 923 924 925 926 927 928 929
	const struct cred *old;
	struct cred *new;
	gid_t old_fsgid;

	new = prepare_creds();
	if (!new)
		return current_fsgid();
	old = current_cred();
	old_fsgid = old->fsgid;
L
Linus Torvalds 已提交
930

D
David Howells 已提交
931 932
	if (gid == old->gid  || gid == old->egid  ||
	    gid == old->sgid || gid == old->fsgid ||
933
	    nsown_capable(CAP_SETGID)) {
934
		if (gid != old_fsgid) {
D
David Howells 已提交
935 936
			new->fsgid = gid;
			goto change_okay;
L
Linus Torvalds 已提交
937 938
		}
	}
D
David Howells 已提交
939 940 941 942 943 944

	abort_creds(new);
	return old_fsgid;

change_okay:
	commit_creds(new);
L
Linus Torvalds 已提交
945 946 947
	return old_fsgid;
}

948 949
void do_sys_times(struct tms *tms)
{
950
	cputime_t tgutime, tgstime, cutime, cstime;
951

952
	spin_lock_irq(&current->sighand->siglock);
953
	thread_group_times(current, &tgutime, &tgstime);
954 955 956
	cutime = current->signal->cutime;
	cstime = current->signal->cstime;
	spin_unlock_irq(&current->sighand->siglock);
957 958
	tms->tms_utime = cputime_to_clock_t(tgutime);
	tms->tms_stime = cputime_to_clock_t(tgstime);
959 960 961 962
	tms->tms_cutime = cputime_to_clock_t(cutime);
	tms->tms_cstime = cputime_to_clock_t(cstime);
}

963
SYSCALL_DEFINE1(times, struct tms __user *, tbuf)
L
Linus Torvalds 已提交
964 965 966
{
	if (tbuf) {
		struct tms tmp;
967 968

		do_sys_times(&tmp);
L
Linus Torvalds 已提交
969 970 971
		if (copy_to_user(tbuf, &tmp, sizeof(struct tms)))
			return -EFAULT;
	}
972
	force_successful_syscall_return();
L
Linus Torvalds 已提交
973 974 975 976 977 978 979 980 981 982 983 984 985 986 987
	return (long) jiffies_64_to_clock_t(get_jiffies_64());
}

/*
 * This needs some heavy checking ...
 * I just haven't the stomach for it. I also don't fully
 * understand sessions/pgrp etc. Let somebody who does explain it.
 *
 * OK, I think I have the protection semantics right.... this is really
 * only important on a multi-user system anyway, to make sure one user
 * can't send a signal to a process owned by another.  -TYT, 12/12/91
 *
 * Auch. Had to add the 'did_exec' flag to conform completely to POSIX.
 * LBT 04.03.94
 */
988
SYSCALL_DEFINE2(setpgid, pid_t, pid, pid_t, pgid)
L
Linus Torvalds 已提交
989 990
{
	struct task_struct *p;
991
	struct task_struct *group_leader = current->group_leader;
992 993
	struct pid *pgrp;
	int err;
L
Linus Torvalds 已提交
994 995

	if (!pid)
996
		pid = task_pid_vnr(group_leader);
L
Linus Torvalds 已提交
997 998 999 1000
	if (!pgid)
		pgid = pid;
	if (pgid < 0)
		return -EINVAL;
1001
	rcu_read_lock();
L
Linus Torvalds 已提交
1002 1003 1004 1005 1006 1007 1008

	/* From this point forward we keep holding onto the tasklist lock
	 * so that our parent does not change from under us. -DaveM
	 */
	write_lock_irq(&tasklist_lock);

	err = -ESRCH;
1009
	p = find_task_by_vpid(pid);
L
Linus Torvalds 已提交
1010 1011 1012 1013 1014 1015 1016
	if (!p)
		goto out;

	err = -EINVAL;
	if (!thread_group_leader(p))
		goto out;

1017
	if (same_thread_group(p->real_parent, group_leader)) {
L
Linus Torvalds 已提交
1018
		err = -EPERM;
1019
		if (task_session(p) != task_session(group_leader))
L
Linus Torvalds 已提交
1020 1021 1022 1023 1024 1025
			goto out;
		err = -EACCES;
		if (p->did_exec)
			goto out;
	} else {
		err = -ESRCH;
1026
		if (p != group_leader)
L
Linus Torvalds 已提交
1027 1028 1029 1030 1031 1032 1033
			goto out;
	}

	err = -EPERM;
	if (p->signal->leader)
		goto out;

1034
	pgrp = task_pid(p);
L
Linus Torvalds 已提交
1035
	if (pgid != pid) {
1036
		struct task_struct *g;
L
Linus Torvalds 已提交
1037

1038 1039
		pgrp = find_vpid(pgid);
		g = pid_task(pgrp, PIDTYPE_PGID);
1040
		if (!g || task_session(g) != task_session(group_leader))
1041
			goto out;
L
Linus Torvalds 已提交
1042 1043 1044 1045 1046 1047
	}

	err = security_task_setpgid(p, pgid);
	if (err)
		goto out;

1048
	if (task_pgrp(p) != pgrp)
1049
		change_pid(p, PIDTYPE_PGID, pgrp);
L
Linus Torvalds 已提交
1050 1051 1052 1053 1054

	err = 0;
out:
	/* All paths lead to here, thus we are safe. -DaveM */
	write_unlock_irq(&tasklist_lock);
1055
	rcu_read_unlock();
L
Linus Torvalds 已提交
1056 1057 1058
	return err;
}

1059
SYSCALL_DEFINE1(getpgid, pid_t, pid)
L
Linus Torvalds 已提交
1060
{
1061 1062 1063 1064 1065
	struct task_struct *p;
	struct pid *grp;
	int retval;

	rcu_read_lock();
1066
	if (!pid)
1067
		grp = task_pgrp(current);
1068
	else {
L
Linus Torvalds 已提交
1069
		retval = -ESRCH;
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079
		p = find_task_by_vpid(pid);
		if (!p)
			goto out;
		grp = task_pgrp(p);
		if (!grp)
			goto out;

		retval = security_task_getpgid(p);
		if (retval)
			goto out;
L
Linus Torvalds 已提交
1080
	}
1081 1082 1083 1084
	retval = pid_vnr(grp);
out:
	rcu_read_unlock();
	return retval;
L
Linus Torvalds 已提交
1085 1086 1087 1088
}

#ifdef __ARCH_WANT_SYS_GETPGRP

1089
SYSCALL_DEFINE0(getpgrp)
L
Linus Torvalds 已提交
1090
{
1091
	return sys_getpgid(0);
L
Linus Torvalds 已提交
1092 1093 1094 1095
}

#endif

1096
SYSCALL_DEFINE1(getsid, pid_t, pid)
L
Linus Torvalds 已提交
1097
{
1098 1099 1100 1101 1102
	struct task_struct *p;
	struct pid *sid;
	int retval;

	rcu_read_lock();
1103
	if (!pid)
1104
		sid = task_session(current);
1105
	else {
L
Linus Torvalds 已提交
1106
		retval = -ESRCH;
1107 1108 1109 1110 1111 1112 1113 1114 1115 1116
		p = find_task_by_vpid(pid);
		if (!p)
			goto out;
		sid = task_session(p);
		if (!sid)
			goto out;

		retval = security_task_getsid(p);
		if (retval)
			goto out;
L
Linus Torvalds 已提交
1117
	}
1118 1119 1120 1121
	retval = pid_vnr(sid);
out:
	rcu_read_unlock();
	return retval;
L
Linus Torvalds 已提交
1122 1123
}

1124
SYSCALL_DEFINE0(setsid)
L
Linus Torvalds 已提交
1125
{
1126
	struct task_struct *group_leader = current->group_leader;
1127 1128
	struct pid *sid = task_pid(group_leader);
	pid_t session = pid_vnr(sid);
L
Linus Torvalds 已提交
1129 1130 1131
	int err = -EPERM;

	write_lock_irq(&tasklist_lock);
1132 1133 1134 1135
	/* Fail if I am already a session leader */
	if (group_leader->signal->leader)
		goto out;

1136 1137
	/* Fail if a process group id already exists that equals the
	 * proposed session id.
1138
	 */
1139
	if (pid_task(sid, PIDTYPE_PGID))
L
Linus Torvalds 已提交
1140 1141
		goto out;

1142
	group_leader->signal->leader = 1;
1143
	__set_special_pids(sid);
1144

A
Alan Cox 已提交
1145
	proc_clear_tty(group_leader);
1146

1147
	err = session;
L
Linus Torvalds 已提交
1148 1149
out:
	write_unlock_irq(&tasklist_lock);
1150
	if (err > 0) {
1151
		proc_sid_connector(group_leader);
1152 1153
		sched_autogroup_create_attach(group_leader);
	}
L
Linus Torvalds 已提交
1154 1155 1156 1157 1158
	return err;
}

DECLARE_RWSEM(uts_sem);

1159 1160
#ifdef COMPAT_UTS_MACHINE
#define override_architecture(name) \
1161
	(personality(current->personality) == PER_LINUX32 && \
1162 1163 1164 1165 1166 1167
	 copy_to_user(name->machine, COMPAT_UTS_MACHINE, \
		      sizeof(COMPAT_UTS_MACHINE)))
#else
#define override_architecture(name)	0
#endif

1168 1169 1170 1171 1172 1173 1174
/*
 * Work around broken programs that cannot handle "Linux 3.0".
 * Instead we map 3.x to 2.6.40+x, so e.g. 3.0 would be 2.6.40
 */
static int override_release(char __user *release, int len)
{
	int ret = 0;
1175
	char buf[65];
1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195

	if (current->personality & UNAME26) {
		char *rest = UTS_RELEASE;
		int ndots = 0;
		unsigned v;

		while (*rest) {
			if (*rest == '.' && ++ndots >= 3)
				break;
			if (!isdigit(*rest) && *rest != '.')
				break;
			rest++;
		}
		v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 40;
		snprintf(buf, len, "2.6.%u%s", v, rest);
		ret = copy_to_user(release, buf, len);
	}
	return ret;
}

1196
SYSCALL_DEFINE1(newuname, struct new_utsname __user *, name)
L
Linus Torvalds 已提交
1197 1198 1199 1200
{
	int errno = 0;

	down_read(&uts_sem);
1201
	if (copy_to_user(name, utsname(), sizeof *name))
L
Linus Torvalds 已提交
1202 1203
		errno = -EFAULT;
	up_read(&uts_sem);
1204

1205 1206
	if (!errno && override_release(name->release, sizeof(name->release)))
		errno = -EFAULT;
1207 1208
	if (!errno && override_architecture(name))
		errno = -EFAULT;
L
Linus Torvalds 已提交
1209 1210 1211
	return errno;
}

C
Christoph Hellwig 已提交
1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224 1225 1226 1227
#ifdef __ARCH_WANT_SYS_OLD_UNAME
/*
 * Old cruft
 */
SYSCALL_DEFINE1(uname, struct old_utsname __user *, name)
{
	int error = 0;

	if (!name)
		return -EFAULT;

	down_read(&uts_sem);
	if (copy_to_user(name, utsname(), sizeof(*name)))
		error = -EFAULT;
	up_read(&uts_sem);

1228 1229
	if (!error && override_release(name->release, sizeof(name->release)))
		error = -EFAULT;
C
Christoph Hellwig 已提交
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263
	if (!error && override_architecture(name))
		error = -EFAULT;
	return error;
}

SYSCALL_DEFINE1(olduname, struct oldold_utsname __user *, name)
{
	int error;

	if (!name)
		return -EFAULT;
	if (!access_ok(VERIFY_WRITE, name, sizeof(struct oldold_utsname)))
		return -EFAULT;

	down_read(&uts_sem);
	error = __copy_to_user(&name->sysname, &utsname()->sysname,
			       __OLD_UTS_LEN);
	error |= __put_user(0, name->sysname + __OLD_UTS_LEN);
	error |= __copy_to_user(&name->nodename, &utsname()->nodename,
				__OLD_UTS_LEN);
	error |= __put_user(0, name->nodename + __OLD_UTS_LEN);
	error |= __copy_to_user(&name->release, &utsname()->release,
				__OLD_UTS_LEN);
	error |= __put_user(0, name->release + __OLD_UTS_LEN);
	error |= __copy_to_user(&name->version, &utsname()->version,
				__OLD_UTS_LEN);
	error |= __put_user(0, name->version + __OLD_UTS_LEN);
	error |= __copy_to_user(&name->machine, &utsname()->machine,
				__OLD_UTS_LEN);
	error |= __put_user(0, name->machine + __OLD_UTS_LEN);
	up_read(&uts_sem);

	if (!error && override_architecture(name))
		error = -EFAULT;
1264 1265
	if (!error && override_release(name->release, sizeof(name->release)))
		error = -EFAULT;
C
Christoph Hellwig 已提交
1266 1267 1268 1269
	return error ? -EFAULT : 0;
}
#endif

1270
SYSCALL_DEFINE2(sethostname, char __user *, name, int, len)
L
Linus Torvalds 已提交
1271 1272 1273 1274
{
	int errno;
	char tmp[__NEW_UTS_LEN];

1275
	if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
L
Linus Torvalds 已提交
1276
		return -EPERM;
1277

L
Linus Torvalds 已提交
1278 1279 1280 1281 1282
	if (len < 0 || len > __NEW_UTS_LEN)
		return -EINVAL;
	down_write(&uts_sem);
	errno = -EFAULT;
	if (!copy_from_user(tmp, name, len)) {
1283 1284 1285 1286
		struct new_utsname *u = utsname();

		memcpy(u->nodename, tmp, len);
		memset(u->nodename + len, 0, sizeof(u->nodename) - len);
L
Linus Torvalds 已提交
1287 1288 1289 1290 1291 1292 1293 1294
		errno = 0;
	}
	up_write(&uts_sem);
	return errno;
}

#ifdef __ARCH_WANT_SYS_GETHOSTNAME

1295
SYSCALL_DEFINE2(gethostname, char __user *, name, int, len)
L
Linus Torvalds 已提交
1296 1297
{
	int i, errno;
1298
	struct new_utsname *u;
L
Linus Torvalds 已提交
1299 1300 1301 1302

	if (len < 0)
		return -EINVAL;
	down_read(&uts_sem);
1303 1304
	u = utsname();
	i = 1 + strlen(u->nodename);
L
Linus Torvalds 已提交
1305 1306 1307
	if (i > len)
		i = len;
	errno = 0;
1308
	if (copy_to_user(name, u->nodename, i))
L
Linus Torvalds 已提交
1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319
		errno = -EFAULT;
	up_read(&uts_sem);
	return errno;
}

#endif

/*
 * Only setdomainname; getdomainname can be implemented by calling
 * uname()
 */
1320
SYSCALL_DEFINE2(setdomainname, char __user *, name, int, len)
L
Linus Torvalds 已提交
1321 1322 1323 1324
{
	int errno;
	char tmp[__NEW_UTS_LEN];

1325
	if (!ns_capable(current->nsproxy->uts_ns->user_ns, CAP_SYS_ADMIN))
L
Linus Torvalds 已提交
1326 1327 1328 1329 1330 1331 1332
		return -EPERM;
	if (len < 0 || len > __NEW_UTS_LEN)
		return -EINVAL;

	down_write(&uts_sem);
	errno = -EFAULT;
	if (!copy_from_user(tmp, name, len)) {
1333 1334 1335 1336
		struct new_utsname *u = utsname();

		memcpy(u->domainname, tmp, len);
		memset(u->domainname + len, 0, sizeof(u->domainname) - len);
L
Linus Torvalds 已提交
1337 1338 1339 1340 1341 1342
		errno = 0;
	}
	up_write(&uts_sem);
	return errno;
}

1343
SYSCALL_DEFINE2(getrlimit, unsigned int, resource, struct rlimit __user *, rlim)
L
Linus Torvalds 已提交
1344
{
1345 1346 1347 1348 1349 1350 1351 1352
	struct rlimit value;
	int ret;

	ret = do_prlimit(current, resource, NULL, &value);
	if (!ret)
		ret = copy_to_user(rlim, &value, sizeof(*rlim)) ? -EFAULT : 0;

	return ret;
L
Linus Torvalds 已提交
1353 1354 1355 1356 1357 1358 1359 1360
}

#ifdef __ARCH_WANT_SYS_OLD_GETRLIMIT

/*
 *	Back compatibility for getrlimit. Needed for some apps.
 */
 
1361 1362
SYSCALL_DEFINE2(old_getrlimit, unsigned int, resource,
		struct rlimit __user *, rlim)
L
Linus Torvalds 已提交
1363 1364 1365 1366 1367 1368 1369 1370
{
	struct rlimit x;
	if (resource >= RLIM_NLIMITS)
		return -EINVAL;

	task_lock(current->group_leader);
	x = current->signal->rlim[resource];
	task_unlock(current->group_leader);
1371
	if (x.rlim_cur > 0x7FFFFFFF)
L
Linus Torvalds 已提交
1372
		x.rlim_cur = 0x7FFFFFFF;
1373
	if (x.rlim_max > 0x7FFFFFFF)
L
Linus Torvalds 已提交
1374 1375 1376 1377 1378 1379
		x.rlim_max = 0x7FFFFFFF;
	return copy_to_user(rlim, &x, sizeof(x))?-EFAULT:0;
}

#endif

1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412
static inline bool rlim64_is_infinity(__u64 rlim64)
{
#if BITS_PER_LONG < 64
	return rlim64 >= ULONG_MAX;
#else
	return rlim64 == RLIM64_INFINITY;
#endif
}

static void rlim_to_rlim64(const struct rlimit *rlim, struct rlimit64 *rlim64)
{
	if (rlim->rlim_cur == RLIM_INFINITY)
		rlim64->rlim_cur = RLIM64_INFINITY;
	else
		rlim64->rlim_cur = rlim->rlim_cur;
	if (rlim->rlim_max == RLIM_INFINITY)
		rlim64->rlim_max = RLIM64_INFINITY;
	else
		rlim64->rlim_max = rlim->rlim_max;
}

static void rlim64_to_rlim(const struct rlimit64 *rlim64, struct rlimit *rlim)
{
	if (rlim64_is_infinity(rlim64->rlim_cur))
		rlim->rlim_cur = RLIM_INFINITY;
	else
		rlim->rlim_cur = (unsigned long)rlim64->rlim_cur;
	if (rlim64_is_infinity(rlim64->rlim_max))
		rlim->rlim_max = RLIM_INFINITY;
	else
		rlim->rlim_max = (unsigned long)rlim64->rlim_max;
}

1413
/* make sure you are allowed to change @tsk limits before calling this */
1414 1415
int do_prlimit(struct task_struct *tsk, unsigned int resource,
		struct rlimit *new_rlim, struct rlimit *old_rlim)
L
Linus Torvalds 已提交
1416
{
1417
	struct rlimit *rlim;
1418
	int retval = 0;
L
Linus Torvalds 已提交
1419 1420 1421

	if (resource >= RLIM_NLIMITS)
		return -EINVAL;
1422 1423 1424 1425 1426 1427 1428
	if (new_rlim) {
		if (new_rlim->rlim_cur > new_rlim->rlim_max)
			return -EINVAL;
		if (resource == RLIMIT_NOFILE &&
				new_rlim->rlim_max > sysctl_nr_open)
			return -EPERM;
	}
L
Linus Torvalds 已提交
1429

1430 1431 1432 1433 1434 1435 1436
	/* protect tsk->signal and tsk->sighand from disappearing */
	read_lock(&tasklist_lock);
	if (!tsk->sighand) {
		retval = -ESRCH;
		goto out;
	}

1437
	rlim = tsk->signal->rlim + resource;
1438
	task_lock(tsk->group_leader);
1439
	if (new_rlim) {
1440 1441
		/* Keep the capable check against init_user_ns until
		   cgroups can contain all limits */
1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462
		if (new_rlim->rlim_max > rlim->rlim_max &&
				!capable(CAP_SYS_RESOURCE))
			retval = -EPERM;
		if (!retval)
			retval = security_task_setrlimit(tsk->group_leader,
					resource, new_rlim);
		if (resource == RLIMIT_CPU && new_rlim->rlim_cur == 0) {
			/*
			 * The caller is asking for an immediate RLIMIT_CPU
			 * expiry.  But we use the zero value to mean "it was
			 * never set".  So let's cheat and make it one second
			 * instead
			 */
			new_rlim->rlim_cur = 1;
		}
	}
	if (!retval) {
		if (old_rlim)
			*old_rlim = *rlim;
		if (new_rlim)
			*rlim = *new_rlim;
1463
	}
J
Jiri Slaby 已提交
1464
	task_unlock(tsk->group_leader);
L
Linus Torvalds 已提交
1465

1466 1467 1468 1469 1470 1471
	/*
	 * RLIMIT_CPU handling.   Note that the kernel fails to return an error
	 * code if it rejected the user's attempt to set RLIMIT_CPU.  This is a
	 * very long-standing error, and fixing it now risks breakage of
	 * applications, so we live with it
	 */
1472 1473 1474
	 if (!retval && new_rlim && resource == RLIMIT_CPU &&
			 new_rlim->rlim_cur != RLIM_INFINITY)
		update_rlimit_cpu(tsk, new_rlim->rlim_cur);
A
Andrew Morton 已提交
1475
out:
1476
	read_unlock(&tasklist_lock);
1477
	return retval;
L
Linus Torvalds 已提交
1478 1479
}

1480 1481 1482 1483 1484
/* rcu lock must be held */
static int check_prlimit_permission(struct task_struct *task)
{
	const struct cred *cred = current_cred(), *tcred;

1485 1486
	if (current == task)
		return 0;
1487

1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500
	tcred = __task_cred(task);
	if (cred->user->user_ns == tcred->user->user_ns &&
	    (cred->uid == tcred->euid &&
	     cred->uid == tcred->suid &&
	     cred->uid == tcred->uid  &&
	     cred->gid == tcred->egid &&
	     cred->gid == tcred->sgid &&
	     cred->gid == tcred->gid))
		return 0;
	if (ns_capable(tcred->user->user_ns, CAP_SYS_RESOURCE))
		return 0;

	return -EPERM;
1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544
}

SYSCALL_DEFINE4(prlimit64, pid_t, pid, unsigned int, resource,
		const struct rlimit64 __user *, new_rlim,
		struct rlimit64 __user *, old_rlim)
{
	struct rlimit64 old64, new64;
	struct rlimit old, new;
	struct task_struct *tsk;
	int ret;

	if (new_rlim) {
		if (copy_from_user(&new64, new_rlim, sizeof(new64)))
			return -EFAULT;
		rlim64_to_rlim(&new64, &new);
	}

	rcu_read_lock();
	tsk = pid ? find_task_by_vpid(pid) : current;
	if (!tsk) {
		rcu_read_unlock();
		return -ESRCH;
	}
	ret = check_prlimit_permission(tsk);
	if (ret) {
		rcu_read_unlock();
		return ret;
	}
	get_task_struct(tsk);
	rcu_read_unlock();

	ret = do_prlimit(tsk, resource, new_rlim ? &new : NULL,
			old_rlim ? &old : NULL);

	if (!ret && old_rlim) {
		rlim_to_rlim64(&old, &old64);
		if (copy_to_user(old_rlim, &old64, sizeof(old64)))
			ret = -EFAULT;
	}

	put_task_struct(tsk);
	return ret;
}

J
Jiri Slaby 已提交
1545 1546 1547 1548 1549 1550
SYSCALL_DEFINE2(setrlimit, unsigned int, resource, struct rlimit __user *, rlim)
{
	struct rlimit new_rlim;

	if (copy_from_user(&new_rlim, rlim, sizeof(*rlim)))
		return -EFAULT;
1551
	return do_prlimit(current, resource, &new_rlim, NULL);
J
Jiri Slaby 已提交
1552 1553
}

L
Linus Torvalds 已提交
1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568
/*
 * It would make sense to put struct rusage in the task_struct,
 * except that would make the task_struct be *really big*.  After
 * task_struct gets moved into malloc'ed memory, it would
 * make sense to do this.  It will make moving the rest of the information
 * a lot simpler!  (Which we're not doing right now because we're not
 * measuring them yet).
 *
 * When sampling multiple threads for RUSAGE_SELF, under SMP we might have
 * races with threads incrementing their own counters.  But since word
 * reads are atomic, we either get new values or old values and we don't
 * care which for the sums.  We always take the siglock to protect reading
 * the c* fields from p->signal from races with exit.c updating those
 * fields when reaping, so a sample either gets all the additions of a
 * given child after it's reaped, or none so this sample is before reaping.
1569
 *
1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583
 * Locking:
 * We need to take the siglock for CHILDEREN, SELF and BOTH
 * for  the cases current multithreaded, non-current single threaded
 * non-current multithreaded.  Thread traversal is now safe with
 * the siglock held.
 * Strictly speaking, we donot need to take the siglock if we are current and
 * single threaded,  as no one else can take our signal_struct away, no one
 * else can  reap the  children to update signal->c* counters, and no one else
 * can race with the signal-> fields. If we do not take any lock, the
 * signal-> fields could be read out of order while another thread was just
 * exiting. So we should  place a read memory barrier when we avoid the lock.
 * On the writer side,  write memory barrier is implied in  __exit_signal
 * as __exit_signal releases  the siglock spinlock after updating the signal->
 * fields. But we don't do this yet to keep things simple.
1584
 *
L
Linus Torvalds 已提交
1585 1586
 */

1587
static void accumulate_thread_rusage(struct task_struct *t, struct rusage *r)
S
Sripathi Kodi 已提交
1588 1589 1590 1591 1592 1593 1594 1595 1596
{
	r->ru_nvcsw += t->nvcsw;
	r->ru_nivcsw += t->nivcsw;
	r->ru_minflt += t->min_flt;
	r->ru_majflt += t->maj_flt;
	r->ru_inblock += task_io_get_inblock(t);
	r->ru_oublock += task_io_get_oublock(t);
}

L
Linus Torvalds 已提交
1597 1598 1599 1600
static void k_getrusage(struct task_struct *p, int who, struct rusage *r)
{
	struct task_struct *t;
	unsigned long flags;
1601
	cputime_t tgutime, tgstime, utime, stime;
J
Jiri Pirko 已提交
1602
	unsigned long maxrss = 0;
L
Linus Torvalds 已提交
1603 1604

	memset((char *) r, 0, sizeof *r);
1605
	utime = stime = cputime_zero;
L
Linus Torvalds 已提交
1606

S
Sripathi Kodi 已提交
1607
	if (who == RUSAGE_THREAD) {
1608
		task_times(current, &utime, &stime);
1609
		accumulate_thread_rusage(p, r);
J
Jiri Pirko 已提交
1610
		maxrss = p->signal->maxrss;
S
Sripathi Kodi 已提交
1611 1612 1613
		goto out;
	}

1614
	if (!lock_task_sighand(p, &flags))
1615
		return;
O
Oleg Nesterov 已提交
1616

L
Linus Torvalds 已提交
1617
	switch (who) {
O
Oleg Nesterov 已提交
1618
		case RUSAGE_BOTH:
L
Linus Torvalds 已提交
1619 1620 1621 1622 1623 1624 1625
		case RUSAGE_CHILDREN:
			utime = p->signal->cutime;
			stime = p->signal->cstime;
			r->ru_nvcsw = p->signal->cnvcsw;
			r->ru_nivcsw = p->signal->cnivcsw;
			r->ru_minflt = p->signal->cmin_flt;
			r->ru_majflt = p->signal->cmaj_flt;
1626 1627
			r->ru_inblock = p->signal->cinblock;
			r->ru_oublock = p->signal->coublock;
J
Jiri Pirko 已提交
1628
			maxrss = p->signal->cmaxrss;
O
Oleg Nesterov 已提交
1629 1630 1631 1632

			if (who == RUSAGE_CHILDREN)
				break;

L
Linus Torvalds 已提交
1633
		case RUSAGE_SELF:
1634 1635 1636
			thread_group_times(p, &tgutime, &tgstime);
			utime = cputime_add(utime, tgutime);
			stime = cputime_add(stime, tgstime);
L
Linus Torvalds 已提交
1637 1638 1639 1640
			r->ru_nvcsw += p->signal->nvcsw;
			r->ru_nivcsw += p->signal->nivcsw;
			r->ru_minflt += p->signal->min_flt;
			r->ru_majflt += p->signal->maj_flt;
1641 1642
			r->ru_inblock += p->signal->inblock;
			r->ru_oublock += p->signal->oublock;
J
Jiri Pirko 已提交
1643 1644
			if (maxrss < p->signal->maxrss)
				maxrss = p->signal->maxrss;
L
Linus Torvalds 已提交
1645 1646
			t = p;
			do {
1647
				accumulate_thread_rusage(t, r);
L
Linus Torvalds 已提交
1648 1649 1650
				t = next_thread(t);
			} while (t != p);
			break;
O
Oleg Nesterov 已提交
1651

L
Linus Torvalds 已提交
1652 1653 1654
		default:
			BUG();
	}
1655 1656
	unlock_task_sighand(p, &flags);

S
Sripathi Kodi 已提交
1657
out:
O
Oleg Nesterov 已提交
1658 1659
	cputime_to_timeval(utime, &r->ru_utime);
	cputime_to_timeval(stime, &r->ru_stime);
J
Jiri Pirko 已提交
1660 1661 1662 1663 1664 1665 1666 1667 1668

	if (who != RUSAGE_CHILDREN) {
		struct mm_struct *mm = get_task_mm(p);
		if (mm) {
			setmax_mm_hiwater_rss(&maxrss, mm);
			mmput(mm);
		}
	}
	r->ru_maxrss = maxrss * (PAGE_SIZE / 1024); /* convert pages to KBs */
L
Linus Torvalds 已提交
1669 1670 1671 1672 1673 1674 1675 1676 1677
}

int getrusage(struct task_struct *p, int who, struct rusage __user *ru)
{
	struct rusage r;
	k_getrusage(p, who, &r);
	return copy_to_user(ru, &r, sizeof(r)) ? -EFAULT : 0;
}

1678
SYSCALL_DEFINE2(getrusage, int, who, struct rusage __user *, ru)
L
Linus Torvalds 已提交
1679
{
S
Sripathi Kodi 已提交
1680 1681
	if (who != RUSAGE_SELF && who != RUSAGE_CHILDREN &&
	    who != RUSAGE_THREAD)
L
Linus Torvalds 已提交
1682 1683 1684 1685
		return -EINVAL;
	return getrusage(current, who, ru);
}

1686
SYSCALL_DEFINE1(umask, int, mask)
L
Linus Torvalds 已提交
1687 1688 1689 1690
{
	mask = xchg(&current->fs->umask, mask & S_IRWXUGO);
	return mask;
}
1691

1692 1693
SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
		unsigned long, arg4, unsigned long, arg5)
L
Linus Torvalds 已提交
1694
{
1695 1696 1697
	struct task_struct *me = current;
	unsigned char comm[sizeof(me->comm)];
	long error;
L
Linus Torvalds 已提交
1698

D
David Howells 已提交
1699 1700
	error = security_task_prctl(option, arg2, arg3, arg4, arg5);
	if (error != -ENOSYS)
L
Linus Torvalds 已提交
1701 1702
		return error;

D
David Howells 已提交
1703
	error = 0;
L
Linus Torvalds 已提交
1704 1705
	switch (option) {
		case PR_SET_PDEATHSIG:
1706
			if (!valid_signal(arg2)) {
L
Linus Torvalds 已提交
1707 1708 1709
				error = -EINVAL;
				break;
			}
1710 1711
			me->pdeath_signal = arg2;
			error = 0;
L
Linus Torvalds 已提交
1712 1713
			break;
		case PR_GET_PDEATHSIG:
1714
			error = put_user(me->pdeath_signal, (int __user *)arg2);
L
Linus Torvalds 已提交
1715 1716
			break;
		case PR_GET_DUMPABLE:
1717
			error = get_dumpable(me->mm);
L
Linus Torvalds 已提交
1718 1719
			break;
		case PR_SET_DUMPABLE:
1720
			if (arg2 < 0 || arg2 > 1) {
L
Linus Torvalds 已提交
1721 1722 1723
				error = -EINVAL;
				break;
			}
1724 1725
			set_dumpable(me->mm, arg2);
			error = 0;
L
Linus Torvalds 已提交
1726 1727 1728
			break;

		case PR_SET_UNALIGN:
1729
			error = SET_UNALIGN_CTL(me, arg2);
L
Linus Torvalds 已提交
1730 1731
			break;
		case PR_GET_UNALIGN:
1732
			error = GET_UNALIGN_CTL(me, arg2);
L
Linus Torvalds 已提交
1733 1734
			break;
		case PR_SET_FPEMU:
1735
			error = SET_FPEMU_CTL(me, arg2);
L
Linus Torvalds 已提交
1736 1737
			break;
		case PR_GET_FPEMU:
1738
			error = GET_FPEMU_CTL(me, arg2);
L
Linus Torvalds 已提交
1739 1740
			break;
		case PR_SET_FPEXC:
1741
			error = SET_FPEXC_CTL(me, arg2);
L
Linus Torvalds 已提交
1742 1743
			break;
		case PR_GET_FPEXC:
1744
			error = GET_FPEXC_CTL(me, arg2);
L
Linus Torvalds 已提交
1745 1746 1747 1748 1749
			break;
		case PR_GET_TIMING:
			error = PR_TIMING_STATISTICAL;
			break;
		case PR_SET_TIMING:
1750
			if (arg2 != PR_TIMING_STATISTICAL)
L
Linus Torvalds 已提交
1751
				error = -EINVAL;
1752 1753
			else
				error = 0;
L
Linus Torvalds 已提交
1754 1755
			break;

1756 1757 1758 1759
		case PR_SET_NAME:
			comm[sizeof(me->comm)-1] = 0;
			if (strncpy_from_user(comm, (char __user *)arg2,
					      sizeof(me->comm) - 1) < 0)
L
Linus Torvalds 已提交
1760
				return -EFAULT;
1761
			set_task_comm(me, comm);
1762
			proc_comm_connector(me);
L
Linus Torvalds 已提交
1763
			return 0;
1764 1765 1766 1767
		case PR_GET_NAME:
			get_task_comm(comm, me);
			if (copy_to_user((char __user *)arg2, comm,
					 sizeof(comm)))
L
Linus Torvalds 已提交
1768 1769
				return -EFAULT;
			return 0;
1770
		case PR_GET_ENDIAN:
1771
			error = GET_ENDIAN(me, arg2);
1772 1773
			break;
		case PR_SET_ENDIAN:
1774
			error = SET_ENDIAN(me, arg2);
1775 1776
			break;

1777 1778 1779 1780 1781 1782
		case PR_GET_SECCOMP:
			error = prctl_get_seccomp();
			break;
		case PR_SET_SECCOMP:
			error = prctl_set_seccomp(arg2);
			break;
1783 1784 1785 1786 1787 1788
		case PR_GET_TSC:
			error = GET_TSC_CTL(arg2);
			break;
		case PR_SET_TSC:
			error = SET_TSC_CTL(arg2);
			break;
1789 1790
		case PR_TASK_PERF_EVENTS_DISABLE:
			error = perf_event_task_disable();
1791
			break;
1792 1793
		case PR_TASK_PERF_EVENTS_ENABLE:
			error = perf_event_task_enable();
1794
			break;
1795 1796 1797 1798 1799 1800 1801 1802 1803
		case PR_GET_TIMERSLACK:
			error = current->timer_slack_ns;
			break;
		case PR_SET_TIMERSLACK:
			if (arg2 <= 0)
				current->timer_slack_ns =
					current->default_timer_slack_ns;
			else
				current->timer_slack_ns = arg2;
1804
			error = 0;
1805
			break;
1806 1807 1808 1809
		case PR_MCE_KILL:
			if (arg4 | arg5)
				return -EINVAL;
			switch (arg2) {
1810
			case PR_MCE_KILL_CLEAR:
1811 1812 1813 1814
				if (arg3 != 0)
					return -EINVAL;
				current->flags &= ~PF_MCE_PROCESS;
				break;
1815
			case PR_MCE_KILL_SET:
1816
				current->flags |= PF_MCE_PROCESS;
1817
				if (arg3 == PR_MCE_KILL_EARLY)
1818
					current->flags |= PF_MCE_EARLY;
1819
				else if (arg3 == PR_MCE_KILL_LATE)
1820
					current->flags &= ~PF_MCE_EARLY;
1821 1822 1823 1824 1825
				else if (arg3 == PR_MCE_KILL_DEFAULT)
					current->flags &=
						~(PF_MCE_EARLY|PF_MCE_PROCESS);
				else
					return -EINVAL;
1826 1827 1828 1829 1830 1831
				break;
			default:
				return -EINVAL;
			}
			error = 0;
			break;
1832 1833 1834 1835 1836 1837 1838 1839 1840
		case PR_MCE_KILL_GET:
			if (arg2 | arg3 | arg4 | arg5)
				return -EINVAL;
			if (current->flags & PF_MCE_PROCESS)
				error = (current->flags & PF_MCE_EARLY) ?
					PR_MCE_KILL_EARLY : PR_MCE_KILL_LATE;
			else
				error = PR_MCE_KILL_DEFAULT;
			break;
L
Linus Torvalds 已提交
1841 1842 1843 1844 1845 1846
		default:
			error = -EINVAL;
			break;
	}
	return error;
}
1847

1848 1849
SYSCALL_DEFINE3(getcpu, unsigned __user *, cpup, unsigned __user *, nodep,
		struct getcpu_cache __user *, unused)
1850 1851 1852 1853 1854 1855 1856 1857 1858
{
	int err = 0;
	int cpu = raw_smp_processor_id();
	if (cpup)
		err |= put_user(cpu, cpup);
	if (nodep)
		err |= put_user(cpu_to_node(cpu), nodep);
	return err ? -EFAULT : 0;
}
1859 1860 1861

char poweroff_cmd[POWEROFF_CMD_PATH_LEN] = "/sbin/poweroff";

1862
static void argv_cleanup(struct subprocess_info *info)
1863
{
1864
	argv_free(info->argv);
1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891
}

/**
 * orderly_poweroff - Trigger an orderly system poweroff
 * @force: force poweroff if command execution fails
 *
 * This may be called from any context to trigger a system shutdown.
 * If the orderly shutdown fails, it will force an immediate shutdown.
 */
int orderly_poweroff(bool force)
{
	int argc;
	char **argv = argv_split(GFP_ATOMIC, poweroff_cmd, &argc);
	static char *envp[] = {
		"HOME=/",
		"PATH=/sbin:/bin:/usr/sbin:/usr/bin",
		NULL
	};
	int ret = -ENOMEM;
	struct subprocess_info *info;

	if (argv == NULL) {
		printk(KERN_WARNING "%s failed to allocate memory for \"%s\"\n",
		       __func__, poweroff_cmd);
		goto out;
	}

1892
	info = call_usermodehelper_setup(argv[0], argv, envp, GFP_ATOMIC);
1893 1894 1895 1896 1897
	if (info == NULL) {
		argv_free(argv);
		goto out;
	}

1898
	call_usermodehelper_setfns(info, NULL, argv_cleanup, NULL);
1899

1900
	ret = call_usermodehelper_exec(info, UMH_NO_WAIT);
1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916

  out:
	if (ret && force) {
		printk(KERN_WARNING "Failed to start orderly shutdown: "
		       "forcing the issue\n");

		/* I guess this should try to kick off some daemon to
		   sync and poweroff asap.  Or not even bother syncing
		   if we're doing an emergency shutdown? */
		emergency_sync();
		kernel_power_off();
	}

	return ret;
}
EXPORT_SYMBOL_GPL(orderly_poweroff);