sched.h 42.5 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3
#ifndef _LINUX_SCHED_H
#define _LINUX_SCHED_H

4
#include <uapi/linux/sched.h>
5

6 7
#include <linux/sched/prio.h>

8
#include <linux/mutex.h>
9
#include <linux/plist.h>
10
#include <linux/mm_types_task.h>
L
Linus Torvalds 已提交
11 12

#include <linux/sem.h>
13
#include <linux/shm.h>
L
Linus Torvalds 已提交
14
#include <linux/signal.h>
15
#include <linux/signal_types.h>
L
Linus Torvalds 已提交
16 17
#include <linux/pid.h>
#include <linux/seccomp.h>
18
#include <linux/rculist.h>
L
Linus Torvalds 已提交
19

20 21
#include <linux/resource.h>
#include <linux/hrtimer.h>
D
Dmitry Vyukov 已提交
22
#include <linux/kcov.h>
23
#include <linux/task_io_accounting.h>
A
Arjan van de Ven 已提交
24
#include <linux/latencytop.h>
25
#include <linux/gfp.h>
26
#include <linux/topology.h>
27
#include <linux/magic.h>
28

29 30
#include <asm/current.h>

31 32 33 34
/* task_struct member predeclarations: */
struct audit_context;
struct autogroup;
struct backing_dev_info;
35
struct bio_list;
36
struct blk_plug;
37
struct cfs_rq;
38
struct filename;
39 40 41 42
struct fs_struct;
struct futex_pi_state;
struct io_context;
struct mempolicy;
43
struct nameidata;
44 45 46 47 48 49 50 51 52
struct nsproxy;
struct perf_event_context;
struct pid_namespace;
struct pipe_inode_info;
struct rcu_node;
struct reclaim_state;
struct robust_list_head;
struct sched_attr;
struct sched_param;
I
Ingo Molnar 已提交
53
struct seq_file;
54 55 56
struct sighand_struct;
struct signal_struct;
struct task_delay_info;
57
struct task_group;
58 59
struct task_struct;
struct uts_namespace;
L
Linus Torvalds 已提交
60

61 62 63 64 65 66 67 68 69 70
/*
 * Task state bitmask. NOTE! These bits are also
 * encoded in fs/proc/array.c: get_task_state().
 *
 * We have two separate sets of flags: task->state
 * is about runnability, while task->exit_state are
 * about the task exiting. Confusing, but this way
 * modifying one set can't modify the other one by
 * mistake.
 */
L
Linus Torvalds 已提交
71 72 73
#define TASK_RUNNING		0
#define TASK_INTERRUPTIBLE	1
#define TASK_UNINTERRUPTIBLE	2
M
Matthew Wilcox 已提交
74 75
#define __TASK_STOPPED		4
#define __TASK_TRACED		8
76
/* in tsk->exit_state */
77 78
#define EXIT_DEAD		16
#define EXIT_ZOMBIE		32
79
#define EXIT_TRACE		(EXIT_ZOMBIE | EXIT_DEAD)
80
/* in tsk->state again */
81
#define TASK_DEAD		64
M
Matthew Wilcox 已提交
82
#define TASK_WAKEKILL		128
P
Peter Zijlstra 已提交
83
#define TASK_WAKING		256
84
#define TASK_PARKED		512
85
#define TASK_NOLOAD		1024
86 87
#define TASK_NEW		2048
#define TASK_STATE_MAX		4096
M
Matthew Wilcox 已提交
88

89
#define TASK_STATE_TO_CHAR_STR "RSDTtXZxKWPNn"
90

91
/* Convenience macros for the sake of set_current_state */
M
Matthew Wilcox 已提交
92 93 94
#define TASK_KILLABLE		(TASK_WAKEKILL | TASK_UNINTERRUPTIBLE)
#define TASK_STOPPED		(TASK_WAKEKILL | __TASK_STOPPED)
#define TASK_TRACED		(TASK_WAKEKILL | __TASK_TRACED)
L
Linus Torvalds 已提交
95

96 97
#define TASK_IDLE		(TASK_UNINTERRUPTIBLE | TASK_NOLOAD)

98 99
/* Convenience macros for the sake of wake_up */
#define TASK_NORMAL		(TASK_INTERRUPTIBLE | TASK_UNINTERRUPTIBLE)
M
Matthew Wilcox 已提交
100
#define TASK_ALL		(TASK_NORMAL | __TASK_STOPPED | __TASK_TRACED)
101 102 103

/* get_task_state() */
#define TASK_REPORT		(TASK_RUNNING | TASK_INTERRUPTIBLE | \
M
Matthew Wilcox 已提交
104
				 TASK_UNINTERRUPTIBLE | __TASK_STOPPED | \
105
				 __TASK_TRACED | EXIT_ZOMBIE | EXIT_DEAD)
106

M
Matthew Wilcox 已提交
107 108
#define task_is_traced(task)	((task->state & __TASK_TRACED) != 0)
#define task_is_stopped(task)	((task->state & __TASK_STOPPED) != 0)
109
#define task_is_stopped_or_traced(task)	\
M
Matthew Wilcox 已提交
110
			((task->state & (__TASK_STOPPED | __TASK_TRACED)) != 0)
111
#define task_contributes_to_load(task)	\
112
				((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
113 114
				 (task->flags & PF_FROZEN) == 0 && \
				 (task->state & TASK_NOLOAD) == 0)
L
Linus Torvalds 已提交
115

P
Peter Zijlstra 已提交
116 117 118 119 120 121 122 123 124 125
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP

#define __set_current_state(state_value)			\
	do {							\
		current->task_state_change = _THIS_IP_;		\
		current->state = (state_value);			\
	} while (0)
#define set_current_state(state_value)				\
	do {							\
		current->task_state_change = _THIS_IP_;		\
126
		smp_store_mb(current->state, (state_value));	\
P
Peter Zijlstra 已提交
127 128 129
	} while (0)

#else
130 131 132 133 134
/*
 * set_current_state() includes a barrier so that the write of current->state
 * is correctly serialised wrt the caller's subsequent test of whether to
 * actually sleep:
 *
135
 *   for (;;) {
136
 *	set_current_state(TASK_UNINTERRUPTIBLE);
137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
 *	if (!need_sleep)
 *		break;
 *
 *	schedule();
 *   }
 *   __set_current_state(TASK_RUNNING);
 *
 * If the caller does not need such serialisation (because, for instance, the
 * condition test and condition change and wakeup are under the same lock) then
 * use __set_current_state().
 *
 * The above is typically ordered against the wakeup, which does:
 *
 *	need_sleep = false;
 *	wake_up_state(p, TASK_UNINTERRUPTIBLE);
 *
 * Where wake_up_state() (and all other wakeup primitives) imply enough
 * barriers to order the store of the variable against wakeup.
 *
 * Wakeup will do: if (@state & p->state) p->state = TASK_RUNNING, that is,
 * once it observes the TASK_UNINTERRUPTIBLE store the waking CPU can issue a
 * TASK_RUNNING store which can collide with __set_current_state(TASK_RUNNING).
159
 *
160
 * This is obviously fine, since they both store the exact same value.
161
 *
162
 * Also see the comments of try_to_wake_up().
163
 */
P
Peter Zijlstra 已提交
164
#define __set_current_state(state_value)		\
L
Linus Torvalds 已提交
165
	do { current->state = (state_value); } while (0)
P
Peter Zijlstra 已提交
166
#define set_current_state(state_value)			\
167
	smp_store_mb(current->state, (state_value))
L
Linus Torvalds 已提交
168

P
Peter Zijlstra 已提交
169 170
#endif

L
Linus Torvalds 已提交
171 172 173
/* Task command name length */
#define TASK_COMM_LEN 16

174 175
extern cpumask_var_t cpu_isolated_map;

176
extern int runqueue_is_locked(int cpu);
I
Ingo Molnar 已提交
177

L
Linus Torvalds 已提交
178 179 180
extern void scheduler_tick(void);

#define	MAX_SCHEDULE_TIMEOUT	LONG_MAX
181
extern signed long schedule_timeout(signed long timeout);
182
extern signed long schedule_timeout_interruptible(signed long timeout);
M
Matthew Wilcox 已提交
183
extern signed long schedule_timeout_killable(signed long timeout);
184
extern signed long schedule_timeout_uninterruptible(signed long timeout);
185
extern signed long schedule_timeout_idle(signed long timeout);
L
Linus Torvalds 已提交
186
asmlinkage void schedule(void);
187
extern void schedule_preempt_disabled(void);
L
Linus Torvalds 已提交
188

189 190
extern int __must_check io_schedule_prepare(void);
extern void io_schedule_finish(int token);
191
extern long io_schedule_timeout(long timeout);
192
extern void io_schedule(void);
193

194
/**
195
 * struct prev_cputime - snaphsot of system and user cputime
196 197
 * @utime: time spent in user mode
 * @stime: time spent in system mode
198
 * @lock: protects the above two fields
199
 *
200 201
 * Stores previous user/system time values such that we can guarantee
 * monotonicity.
202
 */
203 204
struct prev_cputime {
#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
205 206
	u64 utime;
	u64 stime;
207 208
	raw_spinlock_t lock;
#endif
209 210
};

211 212
/**
 * struct task_cputime - collected CPU time counts
213 214
 * @utime:		time spent in user mode, in nanoseconds
 * @stime:		time spent in kernel mode, in nanoseconds
215
 * @sum_exec_runtime:	total time spent on the CPU, in nanoseconds
216
 *
217 218 219
 * This structure groups together three kinds of CPU time that are tracked for
 * threads and thread groups.  Most things considering CPU time want to group
 * these counts together and treat all three of them in parallel.
220 221
 */
struct task_cputime {
222 223
	u64 utime;
	u64 stime;
224 225
	unsigned long long sum_exec_runtime;
};
226

227 228
/* Alternate field names when used to cache expirations. */
#define virt_exp	utime
229
#define prof_exp	stime
230 231
#define sched_exp	sum_exec_runtime

232
#include <linux/rwsem.h>
L
Linus Torvalds 已提交
233

234
#ifdef CONFIG_SCHED_INFO
L
Linus Torvalds 已提交
235 236
struct sched_info {
	/* cumulative counters */
237
	unsigned long pcount;	      /* # of times run on this cpu */
238
	unsigned long long run_delay; /* time spent waiting on a runqueue */
L
Linus Torvalds 已提交
239 240

	/* timestamps */
241 242
	unsigned long long last_arrival,/* when we last ran on a cpu */
			   last_queued;	/* when we were last queued to run */
L
Linus Torvalds 已提交
243
};
244
#endif /* CONFIG_SCHED_INFO */
L
Linus Torvalds 已提交
245

246 247 248 249 250 251 252 253 254 255
/*
 * Integer metrics need fixed point arithmetic, e.g., sched/fair
 * has a few: load, load_avg, util_avg, freq, and capacity.
 *
 * We define a basic fixed point arithmetic range, and then formalize
 * all these metrics based on that basic range.
 */
# define SCHED_FIXEDPOINT_SHIFT	10
# define SCHED_FIXEDPOINT_SCALE	(1L << SCHED_FIXEDPOINT_SHIFT)

256
#ifdef ARCH_HAS_PREFETCH_SWITCH_STACK
257
extern void prefetch_stack(struct task_struct *t);
258 259 260
#else
static inline void prefetch_stack(struct task_struct *t) { }
#endif
L
Linus Torvalds 已提交
261

I
Ingo Molnar 已提交
262
struct load_weight {
263 264
	unsigned long weight;
	u32 inv_weight;
I
Ingo Molnar 已提交
265 266
};

267
/*
268 269 270 271 272 273 274 275 276
 * The load_avg/util_avg accumulates an infinite geometric series
 * (see __update_load_avg() in kernel/sched/fair.c).
 *
 * [load_avg definition]
 *
 *   load_avg = runnable% * scale_load_down(load)
 *
 * where runnable% is the time ratio that a sched_entity is runnable.
 * For cfs_rq, it is the aggregated load_avg of all runnable and
277
 * blocked sched_entities.
278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
 *
 * load_avg may also take frequency scaling into account:
 *
 *   load_avg = runnable% * scale_load_down(load) * freq%
 *
 * where freq% is the CPU frequency normalized to the highest frequency.
 *
 * [util_avg definition]
 *
 *   util_avg = running% * SCHED_CAPACITY_SCALE
 *
 * where running% is the time ratio that a sched_entity is running on
 * a CPU. For cfs_rq, it is the aggregated util_avg of all runnable
 * and blocked sched_entities.
 *
 * util_avg may also factor frequency scaling and CPU capacity scaling:
 *
 *   util_avg = running% * SCHED_CAPACITY_SCALE * freq% * capacity%
 *
 * where freq% is the same as above, and capacity% is the CPU capacity
 * normalized to the greatest capacity (due to uarch differences, etc).
 *
 * N.B., the above ratios (runnable%, running%, freq%, and capacity%)
 * themselves are in the range of [0, 1]. To do fixed point arithmetics,
 * we therefore scale them to as large a range as necessary. This is for
 * example reflected by util_avg's SCHED_CAPACITY_SCALE.
 *
 * [Overflow issue]
 *
 * The 64-bit load_sum can have 4353082796 (=2^64/47742/88761) entities
 * with the highest load (=88761), always runnable on a single cfs_rq,
 * and should not overflow as the number already hits PID_MAX_LIMIT.
 *
 * For all other cases (including 32-bit kernels), struct load_weight's
 * weight will overflow first before we do, because:
 *
 *    Max(load_avg) <= Max(load.weight)
 *
 * Then it is the load_weight's responsibility to consider overflow
 * issues.
318
 */
319
struct sched_avg {
320 321 322
	u64 last_update_time, load_sum;
	u32 util_sum, period_contrib;
	unsigned long load_avg, util_avg;
323 324
};

325
#ifdef CONFIG_SCHEDSTATS
326
struct sched_statistics {
I
Ingo Molnar 已提交
327
	u64			wait_start;
328
	u64			wait_max;
329 330
	u64			wait_count;
	u64			wait_sum;
331 332
	u64			iowait_count;
	u64			iowait_sum;
333

I
Ingo Molnar 已提交
334 335
	u64			sleep_start;
	u64			sleep_max;
336 337 338
	s64			sum_sleep_runtime;

	u64			block_start;
I
Ingo Molnar 已提交
339 340
	u64			block_max;
	u64			exec_max;
I
Ingo Molnar 已提交
341
	u64			slice_max;
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357

	u64			nr_migrations_cold;
	u64			nr_failed_migrations_affine;
	u64			nr_failed_migrations_running;
	u64			nr_failed_migrations_hot;
	u64			nr_forced_migrations;

	u64			nr_wakeups;
	u64			nr_wakeups_sync;
	u64			nr_wakeups_migrate;
	u64			nr_wakeups_local;
	u64			nr_wakeups_remote;
	u64			nr_wakeups_affine;
	u64			nr_wakeups_affine_attempts;
	u64			nr_wakeups_passive;
	u64			nr_wakeups_idle;
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375
};
#endif

struct sched_entity {
	struct load_weight	load;		/* for load-balancing */
	struct rb_node		run_node;
	struct list_head	group_node;
	unsigned int		on_rq;

	u64			exec_start;
	u64			sum_exec_runtime;
	u64			vruntime;
	u64			prev_sum_exec_runtime;

	u64			nr_migrations;

#ifdef CONFIG_SCHEDSTATS
	struct sched_statistics statistics;
376 377
#endif

I
Ingo Molnar 已提交
378
#ifdef CONFIG_FAIR_GROUP_SCHED
P
Peter Zijlstra 已提交
379
	int			depth;
I
Ingo Molnar 已提交
380 381 382 383 384 385
	struct sched_entity	*parent;
	/* rq on which this entity is (to be) queued: */
	struct cfs_rq		*cfs_rq;
	/* rq "owned" by this entity/group: */
	struct cfs_rq		*my_q;
#endif
386

387
#ifdef CONFIG_SMP
388 389 390 391 392 393 394
	/*
	 * Per entity load average tracking.
	 *
	 * Put into separate cache line so it does not
	 * collide with read-mostly values above.
	 */
	struct sched_avg	avg ____cacheline_aligned_in_smp;
395
#endif
I
Ingo Molnar 已提交
396
};
397

P
Peter Zijlstra 已提交
398 399
struct sched_rt_entity {
	struct list_head run_list;
400
	unsigned long timeout;
401
	unsigned long watchdog_stamp;
402
	unsigned int time_slice;
403 404
	unsigned short on_rq;
	unsigned short on_list;
P
Peter Zijlstra 已提交
405

406
	struct sched_rt_entity *back;
407
#ifdef CONFIG_RT_GROUP_SCHED
P
Peter Zijlstra 已提交
408 409 410 411 412 413
	struct sched_rt_entity	*parent;
	/* rq on which this entity is (to be) queued: */
	struct rt_rq		*rt_rq;
	/* rq "owned" by this entity/group: */
	struct rt_rq		*my_q;
#endif
P
Peter Zijlstra 已提交
414 415
};

416 417 418 419 420
struct sched_dl_entity {
	struct rb_node	rb_node;

	/*
	 * Original scheduling parameters. Copied here from sched_attr
421 422
	 * during sched_setattr(), they will remain the same until
	 * the next sched_setattr().
423 424 425
	 */
	u64 dl_runtime;		/* maximum runtime for each instance	*/
	u64 dl_deadline;	/* relative deadline of each instance	*/
426
	u64 dl_period;		/* separation of two instances (period) */
427
	u64 dl_bw;		/* dl_runtime / dl_deadline		*/
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444

	/*
	 * Actual scheduling parameters. Initialized with the values above,
	 * they are continously updated during task execution. Note that
	 * the remaining runtime could be < 0 in case we are in overrun.
	 */
	s64 runtime;		/* remaining runtime for this instance	*/
	u64 deadline;		/* absolute deadline for this instance	*/
	unsigned int flags;	/* specifying the scheduler behaviour	*/

	/*
	 * Some bool flags:
	 *
	 * @dl_throttled tells if we exhausted the runtime. If so, the
	 * task has to wait for a replenishment to be performed at the
	 * next firing of dl_timer.
	 *
445 446
	 * @dl_boosted tells if we are boosted due to DI. If so we are
	 * outside bandwidth enforcement mechanism (but only until we
447 448 449 450
	 * exit the critical section);
	 *
	 * @dl_yielded tells if task gave up the cpu before consuming
	 * all its available runtime during the last job.
451
	 */
452
	int dl_throttled, dl_boosted, dl_yielded;
453 454 455 456 457 458 459

	/*
	 * Bandwidth enforcement timer. Each -deadline task has its
	 * own bandwidth to be enforced, thus we need one timer per task.
	 */
	struct hrtimer dl_timer;
};
460

461 462
union rcu_special {
	struct {
463 464 465 466 467 468
		u8 blocked;
		u8 need_qs;
		u8 exp_need_qs;
		u8 pad;	/* Otherwise the compiler can store garbage here. */
	} b; /* Bits. */
	u32 s; /* Set of bits. */
469
};
470

P
Peter Zijlstra 已提交
471 472 473
enum perf_event_task_context {
	perf_invalid_context = -1,
	perf_hw_context = 0,
474
	perf_sw_context,
P
Peter Zijlstra 已提交
475 476 477
	perf_nr_task_contexts,
};

478 479 480 481
struct wake_q_node {
	struct wake_q_node *next;
};

L
Linus Torvalds 已提交
482
struct task_struct {
483 484 485 486 487 488 489
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/*
	 * For reasons of header soup (see current_thread_info()), this
	 * must be the first element of task_struct.
	 */
	struct thread_info thread_info;
#endif
L
Linus Torvalds 已提交
490
	volatile long state;	/* -1 unrunnable, 0 runnable, >0 stopped */
R
Roman Zippel 已提交
491
	void *stack;
L
Linus Torvalds 已提交
492
	atomic_t usage;
493 494
	unsigned int flags;	/* per process flags, defined below */
	unsigned int ptrace;
L
Linus Torvalds 已提交
495

496
#ifdef CONFIG_SMP
P
Peter Zijlstra 已提交
497
	struct llist_node wake_entry;
P
Peter Zijlstra 已提交
498
	int on_cpu;
499 500 501
#ifdef CONFIG_THREAD_INFO_IN_TASK
	unsigned int cpu;	/* current CPU */
#endif
M
Mike Galbraith 已提交
502
	unsigned int wakee_flips;
503
	unsigned long wakee_flip_decay_ts;
M
Mike Galbraith 已提交
504
	struct task_struct *last_wakee;
505 506

	int wake_cpu;
507
#endif
P
Peter Zijlstra 已提交
508
	int on_rq;
509

510
	int prio, static_prio, normal_prio;
511
	unsigned int rt_priority;
512
	const struct sched_class *sched_class;
I
Ingo Molnar 已提交
513
	struct sched_entity se;
P
Peter Zijlstra 已提交
514
	struct sched_rt_entity rt;
P
Peter Zijlstra 已提交
515 516 517
#ifdef CONFIG_CGROUP_SCHED
	struct task_group *sched_task_group;
#endif
518
	struct sched_dl_entity dl;
L
Linus Torvalds 已提交
519

520 521 522 523 524
#ifdef CONFIG_PREEMPT_NOTIFIERS
	/* list of struct preempt_notifier: */
	struct hlist_head preempt_notifiers;
#endif

525
#ifdef CONFIG_BLK_DEV_IO_TRACE
526
	unsigned int btrace_seq;
527
#endif
L
Linus Torvalds 已提交
528

529
	unsigned int policy;
530
	int nr_cpus_allowed;
L
Linus Torvalds 已提交
531 532
	cpumask_t cpus_allowed;

P
Paul E. McKenney 已提交
533
#ifdef CONFIG_PREEMPT_RCU
P
Paul E. McKenney 已提交
534
	int rcu_read_lock_nesting;
535
	union rcu_special rcu_read_unlock_special;
536
	struct list_head rcu_node_entry;
P
Paul E. McKenney 已提交
537
	struct rcu_node *rcu_blocked_node;
538
#endif /* #ifdef CONFIG_PREEMPT_RCU */
P
Paul E. McKenney 已提交
539 540 541 542
#ifdef CONFIG_TASKS_RCU
	unsigned long rcu_tasks_nvcsw;
	bool rcu_tasks_holdout;
	struct list_head rcu_tasks_holdout_list;
543
	int rcu_tasks_idle_cpu;
P
Paul E. McKenney 已提交
544
#endif /* #ifdef CONFIG_TASKS_RCU */
P
Paul E. McKenney 已提交
545

546
#ifdef CONFIG_SCHED_INFO
L
Linus Torvalds 已提交
547 548 549 550
	struct sched_info sched_info;
#endif

	struct list_head tasks;
551
#ifdef CONFIG_SMP
552
	struct plist_node pushable_tasks;
553
	struct rb_node pushable_dl_tasks;
554
#endif
L
Linus Torvalds 已提交
555 556

	struct mm_struct *mm, *active_mm;
557 558 559 560

	/* Per-thread vma caching: */
	struct vmacache vmacache;

561 562 563
#if defined(SPLIT_RSS_COUNTING)
	struct task_rss_stat	rss_stat;
#endif
L
Linus Torvalds 已提交
564
/* task state */
565
	int exit_state;
L
Linus Torvalds 已提交
566 567
	int exit_code, exit_signal;
	int pdeath_signal;  /*  The signal sent when the parent dies  */
568
	unsigned long jobctl;	/* JOBCTL_*, siglock protected */
569 570

	/* Used for emulating ABI behavior of previous Linux versions */
571
	unsigned int personality;
572

573
	/* scheduler bits, serialized by scheduler locks */
574
	unsigned sched_reset_on_fork:1;
575
	unsigned sched_contributes_to_load:1;
576
	unsigned sched_migrated:1;
P
Peter Zijlstra 已提交
577
	unsigned sched_remote_wakeup:1;
578 579 580 581 582
	unsigned :0; /* force alignment to the next boundary */

	/* unserialized, strictly 'current' */
	unsigned in_execve:1; /* bit to tell LSMs we're in execve */
	unsigned in_iowait:1;
583 584 585
#if !defined(TIF_RESTORE_SIGMASK)
	unsigned restore_sigmask:1;
#endif
T
Tejun Heo 已提交
586 587
#ifdef CONFIG_MEMCG
	unsigned memcg_may_oom:1;
588
#ifndef CONFIG_SLOB
589 590
	unsigned memcg_kmem_skip_account:1;
#endif
591
#endif
592 593 594
#ifdef CONFIG_COMPAT_BRK
	unsigned brk_randomized:1;
#endif
595

596 597
	unsigned long atomic_flags; /* Flags needing atomic access. */

598 599
	struct restart_block restart_block;

L
Linus Torvalds 已提交
600 601
	pid_t pid;
	pid_t tgid;
602

603
#ifdef CONFIG_CC_STACKPROTECTOR
604 605
	/* Canary value for the -fstack-protector gcc feature */
	unsigned long stack_canary;
606
#endif
607
	/*
L
Linus Torvalds 已提交
608
	 * pointers to (original) parent process, youngest child, younger sibling,
609
	 * older sibling, respectively.  (p->father can be replaced with
R
Roland McGrath 已提交
610
	 * p->real_parent->pid)
L
Linus Torvalds 已提交
611
	 */
612 613
	struct task_struct __rcu *real_parent; /* real parent process */
	struct task_struct __rcu *parent; /* recipient of SIGCHLD, wait4() reports */
L
Linus Torvalds 已提交
614
	/*
R
Roland McGrath 已提交
615
	 * children/sibling forms the list of my natural children
L
Linus Torvalds 已提交
616 617 618 619 620
	 */
	struct list_head children;	/* list of my children */
	struct list_head sibling;	/* linkage in my parent's children list */
	struct task_struct *group_leader;	/* threadgroup leader */

R
Roland McGrath 已提交
621 622 623 624 625 626 627 628
	/*
	 * ptraced is the list of tasks this task is using ptrace on.
	 * This includes both natural children and PTRACE_ATTACH targets.
	 * p->ptrace_entry is p's link on the p->parent->ptraced list.
	 */
	struct list_head ptraced;
	struct list_head ptrace_entry;

L
Linus Torvalds 已提交
629
	/* PID/PID hash table linkage. */
630
	struct pid_link pids[PIDTYPE_MAX];
O
Oleg Nesterov 已提交
631
	struct list_head thread_group;
632
	struct list_head thread_node;
L
Linus Torvalds 已提交
633 634 635 636 637

	struct completion *vfork_done;		/* for vfork() */
	int __user *set_child_tid;		/* CLONE_CHILD_SETTID */
	int __user *clear_child_tid;		/* CLONE_CHILD_CLEARTID */

638
	u64 utime, stime;
639
#ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
640
	u64 utimescaled, stimescaled;
641
#endif
642
	u64 gtime;
643
	struct prev_cputime prev_cputime;
644
#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN
645
	seqcount_t vtime_seqcount;
646 647
	unsigned long long vtime_snap;
	enum {
648 649 650
		/* Task is sleeping or running in a CPU with VTIME inactive */
		VTIME_INACTIVE = 0,
		/* Task runs in userspace in a CPU with VTIME active */
651
		VTIME_USER,
652
		/* Task runs in kernelspace in a CPU with VTIME active */
653 654
		VTIME_SYS,
	} vtime_snap_whence;
655
#endif
656 657

#ifdef CONFIG_NO_HZ_FULL
658
	atomic_t tick_dep_mask;
659
#endif
L
Linus Torvalds 已提交
660
	unsigned long nvcsw, nivcsw; /* context switch counts */
661
	u64 start_time;		/* monotonic time in nsec */
662
	u64 real_start_time;	/* boot based time in nsec */
L
Linus Torvalds 已提交
663 664 665
/* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */
	unsigned long min_flt, maj_flt;

666
#ifdef CONFIG_POSIX_TIMERS
667
	struct task_cputime cputime_expires;
L
Linus Torvalds 已提交
668
	struct list_head cpu_timers[3];
669
#endif
L
Linus Torvalds 已提交
670 671

/* process credentials */
672
	const struct cred __rcu *ptracer_cred; /* Tracer's credentials at attach */
A
Arnd Bergmann 已提交
673
	const struct cred __rcu *real_cred; /* objective and real subjective task
674
					 * credentials (COW) */
A
Arnd Bergmann 已提交
675
	const struct cred __rcu *cred;	/* effective (overridable) subjective task
676
					 * credentials (COW) */
677 678 679
	char comm[TASK_COMM_LEN]; /* executable name excluding path
				     - access with [gs]et_task_comm (which lock
				       it with task_lock())
680
				     - initialized normally by setup_new_exec */
L
Linus Torvalds 已提交
681
/* file system info */
682
	struct nameidata *nameidata;
683
#ifdef CONFIG_SYSVIPC
L
Linus Torvalds 已提交
684 685
/* ipc stuff */
	struct sysv_sem sysvsem;
686
	struct sysv_shm sysvshm;
687
#endif
688
#ifdef CONFIG_DETECT_HUNG_TASK
689 690 691
/* hung task detection */
	unsigned long last_switch_count;
#endif
L
Linus Torvalds 已提交
692 693 694 695
/* filesystem information */
	struct fs_struct *fs;
/* open file information */
	struct files_struct *files;
696
/* namespaces */
S
Serge E. Hallyn 已提交
697
	struct nsproxy *nsproxy;
L
Linus Torvalds 已提交
698 699 700 701 702
/* signal handlers */
	struct signal_struct *signal;
	struct sighand_struct *sighand;

	sigset_t blocked, real_blocked;
703
	sigset_t saved_sigmask;	/* restored if set_restore_sigmask() was used */
L
Linus Torvalds 已提交
704 705 706 707
	struct sigpending pending;

	unsigned long sas_ss_sp;
	size_t sas_ss_size;
708
	unsigned sas_ss_flags;
709

710
	struct callback_head *task_works;
711

L
Linus Torvalds 已提交
712
	struct audit_context *audit_context;
A
Al Viro 已提交
713
#ifdef CONFIG_AUDITSYSCALL
714
	kuid_t loginuid;
715
	unsigned int sessionid;
A
Al Viro 已提交
716
#endif
717
	struct seccomp seccomp;
L
Linus Torvalds 已提交
718 719 720 721

/* Thread group tracking */
   	u32 parent_exec_id;
   	u32 self_exec_id;
722 723
/* Protection of (de-)allocation: mm, files, fs, tty, keyrings, mems_allowed,
 * mempolicy */
L
Linus Torvalds 已提交
724 725
	spinlock_t alloc_lock;

726
	/* Protection of the PI data structures: */
727
	raw_spinlock_t pi_lock;
728

729 730
	struct wake_q_node wake_q;

I
Ingo Molnar 已提交
731 732
#ifdef CONFIG_RT_MUTEXES
	/* PI waiters blocked on a rt_mutex held by this task */
733 734
	struct rb_root pi_waiters;
	struct rb_node *pi_waiters_leftmost;
I
Ingo Molnar 已提交
735 736 737 738
	/* Deadlock detection and priority inheritance handling */
	struct rt_mutex_waiter *pi_blocked_on;
#endif

739 740 741 742
#ifdef CONFIG_DEBUG_MUTEXES
	/* mutex deadlock detection */
	struct mutex_waiter *blocked_on;
#endif
743 744 745 746
#ifdef CONFIG_TRACE_IRQFLAGS
	unsigned int irq_events;
	unsigned long hardirq_enable_ip;
	unsigned long hardirq_disable_ip;
747
	unsigned int hardirq_enable_event;
748
	unsigned int hardirq_disable_event;
749 750
	int hardirqs_enabled;
	int hardirq_context;
751 752
	unsigned long softirq_disable_ip;
	unsigned long softirq_enable_ip;
753
	unsigned int softirq_disable_event;
754
	unsigned int softirq_enable_event;
755
	int softirqs_enabled;
756 757
	int softirq_context;
#endif
I
Ingo Molnar 已提交
758
#ifdef CONFIG_LOCKDEP
759
# define MAX_LOCK_DEPTH 48UL
I
Ingo Molnar 已提交
760 761 762
	u64 curr_chain_key;
	int lockdep_depth;
	unsigned int lockdep_recursion;
763
	struct held_lock held_locks[MAX_LOCK_DEPTH];
764
	gfp_t lockdep_reclaim_gfp;
I
Ingo Molnar 已提交
765
#endif
766 767 768
#ifdef CONFIG_UBSAN
	unsigned int in_ubsan;
#endif
769

L
Linus Torvalds 已提交
770 771 772
/* journalling filesystem info */
	void *journal_info;

773
/* stacked block device info */
774
	struct bio_list *bio_list;
775

776 777 778 779 780
#ifdef CONFIG_BLOCK
/* stack plugging */
	struct blk_plug *plug;
#endif

L
Linus Torvalds 已提交
781 782 783 784 785 786 787 788 789
/* VM state */
	struct reclaim_state *reclaim_state;

	struct backing_dev_info *backing_dev_info;

	struct io_context *io_context;

	unsigned long ptrace_message;
	siginfo_t *last_siginfo; /* For ptrace use.  */
790
	struct task_io_accounting ioac;
791
#if defined(CONFIG_TASK_XACCT)
L
Linus Torvalds 已提交
792 793
	u64 acct_rss_mem1;	/* accumulated rss usage */
	u64 acct_vm_mem1;	/* accumulated virtual memory usage */
794
	u64 acct_timexpd;	/* stime + utime since last update */
L
Linus Torvalds 已提交
795 796
#endif
#ifdef CONFIG_CPUSETS
797
	nodemask_t mems_allowed;	/* Protected by alloc_lock */
798
	seqcount_t mems_allowed_seq;	/* Seqence no to catch updates */
799
	int cpuset_mem_spread_rotor;
800
	int cpuset_slab_spread_rotor;
L
Linus Torvalds 已提交
801
#endif
802
#ifdef CONFIG_CGROUPS
803
	/* Control Group info protected by css_set_lock */
A
Arnd Bergmann 已提交
804
	struct css_set __rcu *cgroups;
805 806
	/* cg_list protected by css_set_lock and tsk->alloc_lock */
	struct list_head cg_list;
807
#endif
F
Fenghua Yu 已提交
808 809 810
#ifdef CONFIG_INTEL_RDT_A
	int closid;
#endif
811
#ifdef CONFIG_FUTEX
812
	struct robust_list_head __user *robust_list;
813 814 815
#ifdef CONFIG_COMPAT
	struct compat_robust_list_head __user *compat_robust_list;
#endif
816 817
	struct list_head pi_state_list;
	struct futex_pi_state *pi_state_cache;
818
#endif
819
#ifdef CONFIG_PERF_EVENTS
P
Peter Zijlstra 已提交
820
	struct perf_event_context *perf_event_ctxp[perf_nr_task_contexts];
821 822
	struct mutex perf_event_mutex;
	struct list_head perf_event_list;
823
#endif
824 825 826
#ifdef CONFIG_DEBUG_PREEMPT
	unsigned long preempt_disable_ip;
#endif
827
#ifdef CONFIG_NUMA
828
	struct mempolicy *mempolicy;	/* Protected by alloc_lock */
829
	short il_next;
830
	short pref_node_fork;
831
#endif
832 833 834
#ifdef CONFIG_NUMA_BALANCING
	int numa_scan_seq;
	unsigned int numa_scan_period;
835
	unsigned int numa_scan_period_max;
836
	int numa_preferred_nid;
837
	unsigned long numa_migrate_retry;
838
	u64 node_stamp;			/* migration stamp  */
839 840
	u64 last_task_numa_placement;
	u64 last_sum_exec_runtime;
841
	struct callback_head numa_work;
842

843 844 845
	struct list_head numa_entry;
	struct numa_group *numa_group;

846
	/*
847 848 849 850 851 852 853 854 855 856 857 858
	 * numa_faults is an array split into four regions:
	 * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
	 * in this precise order.
	 *
	 * faults_memory: Exponential decaying average of faults on a per-node
	 * basis. Scheduling placement decisions are made based on these
	 * counts. The values remain static for the duration of a PTE scan.
	 * faults_cpu: Track the nodes the process was running on when a NUMA
	 * hinting fault was incurred.
	 * faults_memory_buffer and faults_cpu_buffer: Record faults per node
	 * during the current scan window. When the scan completes, the counts
	 * in faults_memory and faults_cpu decay and these values are copied.
859
	 */
860
	unsigned long *numa_faults;
861
	unsigned long total_numa_faults;
862

863 864
	/*
	 * numa_faults_locality tracks if faults recorded during the last
865 866 867
	 * scan window were remote/local or failed to migrate. The task scan
	 * period is adapted based on the locality of the faults with different
	 * weights depending on whether they were shared or private faults
868
	 */
869
	unsigned long numa_faults_locality[3];
870

I
Ingo Molnar 已提交
871
	unsigned long numa_pages_migrated;
872 873
#endif /* CONFIG_NUMA_BALANCING */

874 875
	struct tlbflush_unmap_batch tlb_ubc;

I
Ingo Molnar 已提交
876
	struct rcu_head rcu;
877 878 879 880 881

	/*
	 * cache last used pipe for splice
	 */
	struct pipe_inode_info *splice_pipe;
882 883 884

	struct page_frag task_frag;

885 886
#ifdef CONFIG_TASK_DELAY_ACCT
	struct task_delay_info		*delays;
887
#endif
888

889 890
#ifdef CONFIG_FAULT_INJECTION
	int make_it_fail;
891
#endif
892 893 894 895 896 897
	/*
	 * when (nr_dirtied >= nr_dirtied_pause), it's time to call
	 * balance_dirty_pages() for some dirty throttling pause
	 */
	int nr_dirtied;
	int nr_dirtied_pause;
898
	unsigned long dirty_paused_when; /* start of a write-and-pause period */
899

A
Arjan van de Ven 已提交
900 901 902 903
#ifdef CONFIG_LATENCYTOP
	int latency_record_count;
	struct latency_record latency_record[LT_SAVECOUNT];
#endif
904 905 906 907
	/*
	 * time slack values; these are used to round up poll() and
	 * select() etc timeout values. These are in nanoseconds.
	 */
908 909
	u64 timer_slack_ns;
	u64 default_timer_slack_ns;
910

911 912 913
#ifdef CONFIG_KASAN
	unsigned int kasan_depth;
#endif
914
#ifdef CONFIG_FUNCTION_GRAPH_TRACER
D
Daniel Mack 已提交
915
	/* Index of current stored address in ret_stack */
916 917 918
	int curr_ret_stack;
	/* Stack of return addresses for return function tracing */
	struct ftrace_ret_stack	*ret_stack;
919 920
	/* time stamp for last schedule */
	unsigned long long ftrace_timestamp;
921 922 923 924 925
	/*
	 * Number of functions that haven't been traced
	 * because of depth overrun.
	 */
	atomic_t trace_overrun;
926 927
	/* Pause for the tracing */
	atomic_t tracing_graph_pause;
928
#endif
929 930 931
#ifdef CONFIG_TRACING
	/* state flags for use by tracers */
	unsigned long trace;
932
	/* bitmask and counter of trace recursion */
933 934
	unsigned long trace_recursion;
#endif /* CONFIG_TRACING */
D
Dmitry Vyukov 已提交
935 936 937 938 939 940 941 942 943 944
#ifdef CONFIG_KCOV
	/* Coverage collection mode enabled for this task (0 if disabled). */
	enum kcov_mode kcov_mode;
	/* Size of the kcov_area. */
	unsigned	kcov_size;
	/* Buffer for coverage collection. */
	void		*kcov_area;
	/* kcov desciptor wired with this task or NULL. */
	struct kcov	*kcov;
#endif
945
#ifdef CONFIG_MEMCG
T
Tejun Heo 已提交
946 947 948
	struct mem_cgroup *memcg_in_oom;
	gfp_t memcg_oom_gfp_mask;
	int memcg_oom_order;
949 950 951

	/* number of pages to reclaim on returning to userland */
	unsigned int memcg_nr_pages_over_high;
952
#endif
953 954 955
#ifdef CONFIG_UPROBES
	struct uprobe_task *utask;
#endif
K
Kent Overstreet 已提交
956 957 958 959
#if defined(CONFIG_BCACHE) || defined(CONFIG_BCACHE_MODULE)
	unsigned int	sequential_io;
	unsigned int	sequential_io_avg;
#endif
P
Peter Zijlstra 已提交
960 961 962
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
	unsigned long	task_state_change;
#endif
963
	int pagefault_disabled;
964
#ifdef CONFIG_MMU
965
	struct task_struct *oom_reaper_list;
966
#endif
967 968 969
#ifdef CONFIG_VMAP_STACK
	struct vm_struct *stack_vm_area;
#endif
970 971 972 973
#ifdef CONFIG_THREAD_INFO_IN_TASK
	/* A live task holds one reference. */
	atomic_t stack_refcount;
#endif
974 975 976 977 978 979 980 981
/* CPU-specific state of this task */
	struct thread_struct thread;
/*
 * WARNING: on x86, 'thread_struct' contains a variable-sized
 * structure.  It *MUST* be at the end of 'task_struct'.
 *
 * Do not put anything below here!
 */
L
Linus Torvalds 已提交
982 983
};

A
Alexey Dobriyan 已提交
984
static inline struct pid *task_pid(struct task_struct *task)
985 986 987 988
{
	return task->pids[PIDTYPE_PID].pid;
}

A
Alexey Dobriyan 已提交
989
static inline struct pid *task_tgid(struct task_struct *task)
990 991 992 993
{
	return task->group_leader->pids[PIDTYPE_PID].pid;
}

994 995 996 997 998
/*
 * Without tasklist or rcu lock it is not safe to dereference
 * the result of task_pgrp/task_session even if task == current,
 * we can race with another thread doing sys_setsid/sys_setpgid.
 */
A
Alexey Dobriyan 已提交
999
static inline struct pid *task_pgrp(struct task_struct *task)
1000 1001 1002 1003
{
	return task->group_leader->pids[PIDTYPE_PGID].pid;
}

A
Alexey Dobriyan 已提交
1004
static inline struct pid *task_session(struct task_struct *task)
1005 1006 1007 1008
{
	return task->group_leader->pids[PIDTYPE_SID].pid;
}

1009 1010 1011 1012 1013
/*
 * the helpers to get the task's different pids as they are seen
 * from various namespaces
 *
 * task_xid_nr()     : global id, i.e. the id seen from the init namespace;
E
Eric W. Biederman 已提交
1014 1015
 * task_xid_vnr()    : virtual id, i.e. the id seen from the pid namespace of
 *                     current.
1016 1017 1018 1019 1020 1021
 * task_xid_nr_ns()  : id seen from the ns specified;
 *
 * set_task_vxid()   : assigns a virtual id to a task;
 *
 * see also pid_nr() etc in include/linux/pid.h
 */
1022 1023
pid_t __task_pid_nr_ns(struct task_struct *task, enum pid_type type,
			struct pid_namespace *ns);
1024

A
Alexey Dobriyan 已提交
1025
static inline pid_t task_pid_nr(struct task_struct *tsk)
1026 1027 1028 1029
{
	return tsk->pid;
}

1030 1031 1032 1033 1034
static inline pid_t task_pid_nr_ns(struct task_struct *tsk,
					struct pid_namespace *ns)
{
	return __task_pid_nr_ns(tsk, PIDTYPE_PID, ns);
}
1035 1036 1037

static inline pid_t task_pid_vnr(struct task_struct *tsk)
{
1038
	return __task_pid_nr_ns(tsk, PIDTYPE_PID, NULL);
1039 1040 1041
}


A
Alexey Dobriyan 已提交
1042
static inline pid_t task_tgid_nr(struct task_struct *tsk)
1043 1044 1045 1046
{
	return tsk->tgid;
}

1047
pid_t task_tgid_nr_ns(struct task_struct *tsk, struct pid_namespace *ns);
1048 1049 1050 1051 1052 1053 1054

static inline pid_t task_tgid_vnr(struct task_struct *tsk)
{
	return pid_vnr(task_tgid(tsk));
}


1055
static inline int pid_alive(const struct task_struct *p);
1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072
static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns)
{
	pid_t pid = 0;

	rcu_read_lock();
	if (pid_alive(tsk))
		pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns);
	rcu_read_unlock();

	return pid;
}

static inline pid_t task_ppid_nr(const struct task_struct *tsk)
{
	return task_ppid_nr_ns(tsk, &init_pid_ns);
}

1073 1074
static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk,
					struct pid_namespace *ns)
1075
{
1076
	return __task_pid_nr_ns(tsk, PIDTYPE_PGID, ns);
1077 1078 1079 1080
}

static inline pid_t task_pgrp_vnr(struct task_struct *tsk)
{
1081
	return __task_pid_nr_ns(tsk, PIDTYPE_PGID, NULL);
1082 1083 1084
}


1085 1086
static inline pid_t task_session_nr_ns(struct task_struct *tsk,
					struct pid_namespace *ns)
1087
{
1088
	return __task_pid_nr_ns(tsk, PIDTYPE_SID, ns);
1089 1090 1091 1092
}

static inline pid_t task_session_vnr(struct task_struct *tsk)
{
1093
	return __task_pid_nr_ns(tsk, PIDTYPE_SID, NULL);
1094 1095
}

1096 1097 1098 1099 1100
/* obsolete, do not use */
static inline pid_t task_pgrp_nr(struct task_struct *tsk)
{
	return task_pgrp_nr_ns(tsk, &init_pid_ns);
}
1101

L
Linus Torvalds 已提交
1102 1103 1104 1105 1106 1107 1108
/**
 * pid_alive - check that a task structure is not stale
 * @p: Task structure to be checked.
 *
 * Test if a process is not yet dead (at most zombie state)
 * If pid_alive fails, then pointers within the task structure
 * can be stale and must not be dereferenced.
1109 1110
 *
 * Return: 1 if the process is alive. 0 otherwise.
L
Linus Torvalds 已提交
1111
 */
1112
static inline int pid_alive(const struct task_struct *p)
L
Linus Torvalds 已提交
1113
{
1114
	return p->pids[PIDTYPE_PID].pid != NULL;
L
Linus Torvalds 已提交
1115 1116
}

1117
/**
1118 1119
 * is_global_init - check if a task structure is init. Since init
 * is free to have sub-threads we need to check tgid.
1120 1121 1122
 * @tsk: Task structure to be checked.
 *
 * Check if a task structure is the first user space task the kernel created.
1123 1124
 *
 * Return: 1 if the task structure is init. 0 otherwise.
1125
 */
A
Alexey Dobriyan 已提交
1126
static inline int is_global_init(struct task_struct *tsk)
1127
{
1128
	return task_tgid_nr(tsk) == 1;
1129
}
1130

1131 1132
extern struct pid *cad_pid;

L
Linus Torvalds 已提交
1133 1134 1135
/*
 * Per process flags
 */
1136
#define PF_IDLE		0x00000002	/* I am an IDLE thread */
L
Linus Torvalds 已提交
1137
#define PF_EXITING	0x00000004	/* getting shut down */
1138
#define PF_EXITPIDONE	0x00000008	/* pi exit done on shut down */
1139
#define PF_VCPU		0x00000010	/* I'm a virtual CPU */
T
Tejun Heo 已提交
1140
#define PF_WQ_WORKER	0x00000020	/* I'm a workqueue worker */
L
Linus Torvalds 已提交
1141
#define PF_FORKNOEXEC	0x00000040	/* forked but didn't exec */
1142
#define PF_MCE_PROCESS  0x00000080      /* process policy on mce errors */
L
Linus Torvalds 已提交
1143 1144 1145 1146
#define PF_SUPERPRIV	0x00000100	/* used super-user privileges */
#define PF_DUMPCORE	0x00000200	/* dumped core */
#define PF_SIGNALED	0x00000400	/* killed by a signal */
#define PF_MEMALLOC	0x00000800	/* Allocating memory */
1147
#define PF_NPROC_EXCEEDED 0x00001000	/* set_user noticed that RLIMIT_NPROC was exceeded */
L
Linus Torvalds 已提交
1148
#define PF_USED_MATH	0x00002000	/* if unset the fpu must be initialized before use */
1149
#define PF_USED_ASYNC	0x00004000	/* used async_schedule*(), used by module init */
L
Linus Torvalds 已提交
1150 1151 1152 1153
#define PF_NOFREEZE	0x00008000	/* this thread should not be frozen */
#define PF_FROZEN	0x00010000	/* frozen for system suspend */
#define PF_FSTRANS	0x00020000	/* inside a filesystem transaction */
#define PF_KSWAPD	0x00040000	/* I am kswapd */
1154
#define PF_MEMALLOC_NOIO 0x00080000	/* Allocating memory without IO involved */
L
Linus Torvalds 已提交
1155
#define PF_LESS_THROTTLE 0x00100000	/* Throttle me less: I clean memory */
1156
#define PF_KTHREAD	0x00200000	/* I am a kernel thread */
J
Jens Axboe 已提交
1157 1158
#define PF_RANDOMIZE	0x00400000	/* randomize virtual address space */
#define PF_SWAPWRITE	0x00800000	/* Allowed to write to swap */
1159
#define PF_NO_SETAFFINITY 0x04000000	/* Userland is not allowed to meddle with cpus_allowed */
1160
#define PF_MCE_EARLY    0x08000000      /* Early kill for mce process policy */
1161
#define PF_MUTEX_TESTER	0x20000000	/* Thread belongs to the rt mutex tester */
1162
#define PF_FREEZER_SKIP	0x40000000	/* Freezer should not count it as freezable */
1163
#define PF_SUSPEND_TASK 0x80000000      /* this thread called freeze_processes and should not be frozen */
L
Linus Torvalds 已提交
1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189

/*
 * Only the _current_ task can read/write to tsk->flags, but other
 * tasks can access tsk->flags in readonly mode for example
 * with tsk_used_math (like during threaded core dumping).
 * There is however an exception to this rule during ptrace
 * or during fork: the ptracer task is allowed to write to the
 * child->flags of its traced child (same goes for fork, the parent
 * can write to the child->flags), because we're guaranteed the
 * child is not running and in turn not changing child->flags
 * at the same time the parent does it.
 */
#define clear_stopped_child_used_math(child) do { (child)->flags &= ~PF_USED_MATH; } while (0)
#define set_stopped_child_used_math(child) do { (child)->flags |= PF_USED_MATH; } while (0)
#define clear_used_math() clear_stopped_child_used_math(current)
#define set_used_math() set_stopped_child_used_math(current)
#define conditional_stopped_child_used_math(condition, child) \
	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= (condition) ? PF_USED_MATH : 0; } while (0)
#define conditional_used_math(condition) \
	conditional_stopped_child_used_math(condition, current)
#define copy_to_stopped_child_used_math(child) \
	do { (child)->flags &= ~PF_USED_MATH, (child)->flags |= current->flags & PF_USED_MATH; } while (0)
/* NOTE: this will return 0 or PF_USED_MATH, it will never return 1 */
#define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
#define used_math() tsk_used_math(current)

1190
/* Per-process atomic flags. */
1191
#define PFA_NO_NEW_PRIVS 0	/* May not gain new privileges. */
1192 1193
#define PFA_SPREAD_PAGE  1      /* Spread page cache over cpuset */
#define PFA_SPREAD_SLAB  2      /* Spread some slab caches over cpuset */
1194
#define PFA_LMK_WAITING  3      /* Lowmemorykiller is waiting */
1195

1196

1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208
#define TASK_PFA_TEST(name, func)					\
	static inline bool task_##func(struct task_struct *p)		\
	{ return test_bit(PFA_##name, &p->atomic_flags); }
#define TASK_PFA_SET(name, func)					\
	static inline void task_set_##func(struct task_struct *p)	\
	{ set_bit(PFA_##name, &p->atomic_flags); }
#define TASK_PFA_CLEAR(name, func)					\
	static inline void task_clear_##func(struct task_struct *p)	\
	{ clear_bit(PFA_##name, &p->atomic_flags); }

TASK_PFA_TEST(NO_NEW_PRIVS, no_new_privs)
TASK_PFA_SET(NO_NEW_PRIVS, no_new_privs)
1209

1210 1211 1212 1213 1214 1215 1216
TASK_PFA_TEST(SPREAD_PAGE, spread_page)
TASK_PFA_SET(SPREAD_PAGE, spread_page)
TASK_PFA_CLEAR(SPREAD_PAGE, spread_page)

TASK_PFA_TEST(SPREAD_SLAB, spread_slab)
TASK_PFA_SET(SPREAD_SLAB, spread_slab)
TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab)
1217

1218 1219 1220
TASK_PFA_TEST(LMK_WAITING, lmk_waiting)
TASK_PFA_SET(LMK_WAITING, lmk_waiting)

1221 1222 1223 1224 1225 1226 1227
static inline void tsk_restore_flags(struct task_struct *task,
				unsigned long orig_flags, unsigned long flags)
{
	task->flags &= ~flags;
	task->flags |= orig_flags & flags;
}

1228 1229
extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
				     const struct cpumask *trial);
1230 1231
extern int task_can_attach(struct task_struct *p,
			   const struct cpumask *cs_cpus_allowed);
L
Linus Torvalds 已提交
1232
#ifdef CONFIG_SMP
1233 1234 1235
extern void do_set_cpus_allowed(struct task_struct *p,
			       const struct cpumask *new_mask);

1236
extern int set_cpus_allowed_ptr(struct task_struct *p,
1237
				const struct cpumask *new_mask);
L
Linus Torvalds 已提交
1238
#else
1239 1240 1241 1242
static inline void do_set_cpus_allowed(struct task_struct *p,
				      const struct cpumask *new_mask)
{
}
1243
static inline int set_cpus_allowed_ptr(struct task_struct *p,
1244
				       const struct cpumask *new_mask)
L
Linus Torvalds 已提交
1245
{
1246
	if (!cpumask_test_cpu(0, new_mask))
L
Linus Torvalds 已提交
1247 1248 1249 1250
		return -EINVAL;
	return 0;
}
#endif
1251

1252 1253 1254 1255
#ifndef cpu_relax_yield
#define cpu_relax_yield() cpu_relax()
#endif

1256
extern int yield_to(struct task_struct *p, bool preempt);
1257 1258
extern void set_user_nice(struct task_struct *p, long nice);
extern int task_prio(const struct task_struct *p);
1259 1260 1261 1262 1263 1264 1265 1266 1267 1268
/**
 * task_nice - return the nice value of a given task.
 * @p: the task in question.
 *
 * Return: The nice value [ -20 ... 0 ... 19 ].
 */
static inline int task_nice(const struct task_struct *p)
{
	return PRIO_TO_NICE((p)->static_prio);
}
1269 1270
extern int can_nice(const struct task_struct *p, const int nice);
extern int task_curr(const struct task_struct *p);
L
Linus Torvalds 已提交
1271
extern int idle_cpu(int cpu);
1272 1273
extern int sched_setscheduler(struct task_struct *, int,
			      const struct sched_param *);
1274
extern int sched_setscheduler_nocheck(struct task_struct *, int,
1275
				      const struct sched_param *);
1276 1277
extern int sched_setattr(struct task_struct *,
			 const struct sched_attr *);
1278
extern struct task_struct *idle_task(int cpu);
1279 1280
/**
 * is_idle_task - is the specified task an idle task?
1281
 * @p: the task in question.
1282 1283
 *
 * Return: 1 if @p is an idle task. 0 otherwise.
1284
 */
1285
static inline bool is_idle_task(const struct task_struct *p)
1286
{
1287
	return !!(p->flags & PF_IDLE);
1288
}
1289
extern struct task_struct *curr_task(int cpu);
1290
extern void ia64_set_curr_task(int cpu, struct task_struct *p);
L
Linus Torvalds 已提交
1291 1292 1293 1294

void yield(void);

union thread_union {
1295
#ifndef CONFIG_THREAD_INFO_IN_TASK
L
Linus Torvalds 已提交
1296
	struct thread_info thread_info;
1297
#endif
L
Linus Torvalds 已提交
1298 1299 1300
	unsigned long stack[THREAD_SIZE/sizeof(long)];
};

1301 1302 1303 1304 1305 1306 1307 1308 1309
#ifdef CONFIG_THREAD_INFO_IN_TASK
static inline struct thread_info *task_thread_info(struct task_struct *task)
{
	return &task->thread_info;
}
#elif !defined(__HAVE_THREAD_FUNCTIONS)
# define task_thread_info(task)	((struct thread_info *)(task)->stack)
#endif

1310 1311 1312 1313 1314 1315 1316
extern struct pid_namespace init_pid_ns;

/*
 * find a task by one of its numerical ids
 *
 * find_task_by_pid_ns():
 *      finds a task by its pid in the specified namespace
1317 1318
 * find_task_by_vpid():
 *      finds a task by its virtual pid
1319
 *
1320
 * see also find_vpid() etc in include/linux/pid.h
1321 1322
 */

1323 1324 1325
extern struct task_struct *find_task_by_vpid(pid_t nr);
extern struct task_struct *find_task_by_pid_ns(pid_t nr,
		struct pid_namespace *ns);
1326

1327 1328
extern int wake_up_state(struct task_struct *tsk, unsigned int state);
extern int wake_up_process(struct task_struct *tsk);
1329
extern void wake_up_new_task(struct task_struct *tsk);
L
Linus Torvalds 已提交
1330 1331 1332 1333 1334 1335
#ifdef CONFIG_SMP
 extern void kick_process(struct task_struct *tsk);
#else
 static inline void kick_process(struct task_struct *tsk) { }
#endif

1336 1337 1338 1339 1340
extern void __set_task_comm(struct task_struct *tsk, const char *from, bool exec);
static inline void set_task_comm(struct task_struct *tsk, const char *from)
{
	__set_task_comm(tsk, from, false);
}
1341
extern char *get_task_comm(char *to, struct task_struct *tsk);
L
Linus Torvalds 已提交
1342 1343

#ifdef CONFIG_SMP
1344
void scheduler_ipi(void);
R
Roland McGrath 已提交
1345
extern unsigned long wait_task_inactive(struct task_struct *, long match_state);
L
Linus Torvalds 已提交
1346
#else
1347
static inline void scheduler_ipi(void) { }
R
Roland McGrath 已提交
1348 1349 1350 1351 1352
static inline unsigned long wait_task_inactive(struct task_struct *p,
					       long match_state)
{
	return 1;
}
L
Linus Torvalds 已提交
1353 1354 1355 1356 1357 1358 1359
#endif

/* set thread flags in other task's structures
 * - see asm/thread_info.h for TIF_xxxx flags available
 */
static inline void set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
A
Al Viro 已提交
1360
	set_ti_thread_flag(task_thread_info(tsk), flag);
L
Linus Torvalds 已提交
1361 1362 1363 1364
}

static inline void clear_tsk_thread_flag(struct task_struct *tsk, int flag)
{
A
Al Viro 已提交
1365
	clear_ti_thread_flag(task_thread_info(tsk), flag);
L
Linus Torvalds 已提交
1366 1367 1368 1369
}

static inline int test_and_set_tsk_thread_flag(struct task_struct *tsk, int flag)
{
A
Al Viro 已提交
1370
	return test_and_set_ti_thread_flag(task_thread_info(tsk), flag);
L
Linus Torvalds 已提交
1371 1372 1373 1374
}

static inline int test_and_clear_tsk_thread_flag(struct task_struct *tsk, int flag)
{
A
Al Viro 已提交
1375
	return test_and_clear_ti_thread_flag(task_thread_info(tsk), flag);
L
Linus Torvalds 已提交
1376 1377 1378 1379
}

static inline int test_tsk_thread_flag(struct task_struct *tsk, int flag)
{
A
Al Viro 已提交
1380
	return test_ti_thread_flag(task_thread_info(tsk), flag);
L
Linus Torvalds 已提交
1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392
}

static inline void set_tsk_need_resched(struct task_struct *tsk)
{
	set_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
}

static inline void clear_tsk_need_resched(struct task_struct *tsk)
{
	clear_tsk_thread_flag(tsk,TIF_NEED_RESCHED);
}

1393 1394 1395 1396 1397
static inline int test_tsk_need_resched(struct task_struct *tsk)
{
	return unlikely(test_tsk_thread_flag(tsk,TIF_NEED_RESCHED));
}

L
Linus Torvalds 已提交
1398 1399 1400 1401 1402 1403 1404
/*
 * cond_resched() and cond_resched_lock(): latency reduction via
 * explicit rescheduling in places that are safe. The return
 * value indicates whether a reschedule was done in fact.
 * cond_resched_lock() will drop the spinlock before scheduling,
 * cond_resched_softirq() will enable bhs before scheduling.
 */
1405
#ifndef CONFIG_PREEMPT
1406
extern int _cond_resched(void);
1407 1408 1409
#else
static inline int _cond_resched(void) { return 0; }
#endif
1410

1411
#define cond_resched() ({			\
1412
	___might_sleep(__FILE__, __LINE__, 0);	\
1413 1414
	_cond_resched();			\
})
1415

1416 1417 1418
extern int __cond_resched_lock(spinlock_t *lock);

#define cond_resched_lock(lock) ({				\
1419
	___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
1420 1421 1422 1423 1424
	__cond_resched_lock(lock);				\
})

extern int __cond_resched_softirq(void);

1425
#define cond_resched_softirq() ({					\
1426
	___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);	\
1427
	__cond_resched_softirq();					\
1428
})
L
Linus Torvalds 已提交
1429

1430 1431 1432 1433 1434 1435 1436 1437 1438
static inline void cond_resched_rcu(void)
{
#if defined(CONFIG_DEBUG_ATOMIC_SLEEP) || !defined(CONFIG_PREEMPT_RCU)
	rcu_read_unlock();
	cond_resched();
	rcu_read_lock();
#endif
}

L
Linus Torvalds 已提交
1439 1440
/*
 * Does a critical section need to be broken due to another
N
Nick Piggin 已提交
1441 1442
 * task waiting?: (technically does not depend on CONFIG_PREEMPT,
 * but a general need for low latency)
L
Linus Torvalds 已提交
1443
 */
N
Nick Piggin 已提交
1444
static inline int spin_needbreak(spinlock_t *lock)
L
Linus Torvalds 已提交
1445
{
N
Nick Piggin 已提交
1446 1447 1448
#ifdef CONFIG_PREEMPT
	return spin_is_contended(lock);
#else
L
Linus Torvalds 已提交
1449
	return 0;
N
Nick Piggin 已提交
1450
#endif
L
Linus Torvalds 已提交
1451 1452
}

1453 1454 1455 1456 1457
static __always_inline bool need_resched(void)
{
	return unlikely(tif_need_resched());
}

L
Linus Torvalds 已提交
1458 1459 1460 1461 1462 1463 1464
/*
 * Wrappers for p->thread_info->cpu access. No-op on UP.
 */
#ifdef CONFIG_SMP

static inline unsigned int task_cpu(const struct task_struct *p)
{
1465 1466 1467
#ifdef CONFIG_THREAD_INFO_IN_TASK
	return p->cpu;
#else
A
Al Viro 已提交
1468
	return task_thread_info(p)->cpu;
1469
#endif
L
Linus Torvalds 已提交
1470 1471
}

I
Ingo Molnar 已提交
1472 1473 1474 1475 1476
static inline int task_node(const struct task_struct *p)
{
	return cpu_to_node(task_cpu(p));
}

I
Ingo Molnar 已提交
1477
extern void set_task_cpu(struct task_struct *p, unsigned int cpu);
L
Linus Torvalds 已提交
1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490 1491

#else

static inline unsigned int task_cpu(const struct task_struct *p)
{
	return 0;
}

static inline void set_task_cpu(struct task_struct *p, unsigned int cpu)
{
}

#endif /* CONFIG_SMP */

1492 1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503
/*
 * In order to reduce various lock holder preemption latencies provide an
 * interface to see if a vCPU is currently running or not.
 *
 * This allows us to terminate optimistic spin loops and block, analogous to
 * the native optimistic spin heuristic of testing if the lock owner task is
 * running or not.
 */
#ifndef vcpu_is_preempted
# define vcpu_is_preempted(cpu)	false
#endif

1504 1505
extern long sched_setaffinity(pid_t pid, const struct cpumask *new_mask);
extern long sched_getaffinity(pid_t pid, struct cpumask *mask);
1506

D
Dave Hansen 已提交
1507 1508 1509 1510
#ifndef TASK_SIZE_OF
#define TASK_SIZE_OF(tsk)	TASK_SIZE
#endif

L
Linus Torvalds 已提交
1511
#endif