mempolicy.h 9.4 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7 8 9 10
#ifndef _LINUX_MEMPOLICY_H
#define _LINUX_MEMPOLICY_H 1

#include <linux/errno.h>

/*
 * NUMA memory policies for Linux.
 * Copyright 2003,2004 Andi Kleen SuSE Labs
 */

11 12 13 14 15 16
/*
 * Both the MPOL_* mempolicy mode and the MPOL_F_* optional mode flags are
 * passed by the user to either set_mempolicy() or mbind() in an 'int' actual.
 * The MPOL_MODE_FLAGS macro determines the legal set of optional mode flags.
 */

L
Linus Torvalds 已提交
17
/* Policies */
18 19 20 21 22 23 24
enum {
	MPOL_DEFAULT,
	MPOL_PREFERRED,
	MPOL_BIND,
	MPOL_INTERLEAVE,
	MPOL_MAX,	/* always last member of enum */
};
L
Linus Torvalds 已提交
25

26 27 28 29 30 31 32
enum mpol_rebind_step {
	MPOL_REBIND_ONCE,	/* do rebind work at once(not by two step) */
	MPOL_REBIND_STEP1,	/* first step(set all the newly nodes) */
	MPOL_REBIND_STEP2,	/* second step(clean all the disallowed nodes)*/
	MPOL_REBIND_NSTEP,
};

33
/* Flags for set_mempolicy */
34
#define MPOL_F_STATIC_NODES	(1 << 15)
35
#define MPOL_F_RELATIVE_NODES	(1 << 14)
36

37 38 39 40
/*
 * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to
 * either set_mempolicy() or mbind().
 */
41
#define MPOL_MODE_FLAGS	(MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES)
42 43

/* Flags for get_mempolicy */
L
Linus Torvalds 已提交
44 45
#define MPOL_F_NODE	(1<<0)	/* return next IL mode instead of node mask */
#define MPOL_F_ADDR	(1<<1)	/* look up vma using address */
46
#define MPOL_F_MEMS_ALLOWED (1<<2) /* return allowed memories */
L
Linus Torvalds 已提交
47 48 49

/* Flags for mbind */
#define MPOL_MF_STRICT	(1<<0)	/* Verify existing pages in the mapping */
50 51 52
#define MPOL_MF_MOVE	(1<<1)	/* Move pages owned by this process to conform to mapping */
#define MPOL_MF_MOVE_ALL (1<<2)	/* Move every page to conform to mapping */
#define MPOL_MF_INTERNAL (1<<3)	/* Internal flags start here */
L
Linus Torvalds 已提交
53

54 55 56 57 58 59
/*
 * Internal flags that share the struct mempolicy flags word with
 * "mode flags".  These flags are allocated from bit 0 up, as they
 * are never OR'ed into the mode in mempolicy API arguments.
 */
#define MPOL_F_SHARED  (1 << 0)	/* identify shared policies */
60
#define MPOL_F_LOCAL   (1 << 1)	/* preferred local allocation */
61
#define MPOL_F_REBINDING (1 << 2)	/* identify policies in rebinding */
62

L
Linus Torvalds 已提交
63 64 65 66 67 68
#ifdef __KERNEL__

#include <linux/mmzone.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
#include <linux/spinlock.h>
69
#include <linux/nodemask.h>
70
#include <linux/pagemap.h>
L
Linus Torvalds 已提交
71

72
struct mm_struct;
L
Linus Torvalds 已提交
73 74 75 76 77 78 79 80 81 82 83 84 85 86

#ifdef CONFIG_NUMA

/*
 * Describe a memory policy.
 *
 * A mempolicy can be either associated with a process or with a VMA.
 * For VMA related allocations the VMA policy is preferred, otherwise
 * the process policy is used. Interrupts ignore the memory policy
 * of the current process.
 *
 * Locking policy for interlave:
 * In process context there is no locking because only the process accesses
 * its own state. All vma manipulation is somewhat protected by a down_read on
87
 * mmap_sem.
L
Linus Torvalds 已提交
88 89
 *
 * Freeing policy:
90
 * Mempolicy objects are reference counted.  A mempolicy will be freed when
91
 * mpol_put() decrements the reference count to zero.
L
Linus Torvalds 已提交
92
 *
93 94
 * Duplicating policy objects:
 * mpol_dup() allocates a new mempolicy and copies the specified mempolicy
95
 * to the new storage.  The reference count of the new object is initialized
96
 * to 1, representing the caller of mpol_dup().
L
Linus Torvalds 已提交
97 98 99
 */
struct mempolicy {
	atomic_t refcnt;
100
	unsigned short mode; 	/* See MPOL_* above */
101
	unsigned short flags;	/* See set_mempolicy() MPOL_F_* above */
L
Linus Torvalds 已提交
102 103
	union {
		short 		 preferred_node; /* preferred */
104
		nodemask_t	 nodes;		/* interleave/bind */
L
Linus Torvalds 已提交
105 106
		/* undefined for default */
	} v;
107 108 109 110
	union {
		nodemask_t cpuset_mems_allowed;	/* relative to these nodes */
		nodemask_t user_nodemask;	/* nodemask passed by user */
	} w;
L
Linus Torvalds 已提交
111 112 113 114 115 116 117
};

/*
 * Support for managing mempolicy data objects (clone, copy, destroy)
 * The default fast path of a NULL MPOL_DEFAULT policy is always inlined.
 */

118 119
extern void __mpol_put(struct mempolicy *pol);
static inline void mpol_put(struct mempolicy *pol)
L
Linus Torvalds 已提交
120 121
{
	if (pol)
122
		__mpol_put(pol);
L
Linus Torvalds 已提交
123 124
}

125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149
/*
 * Does mempolicy pol need explicit unref after use?
 * Currently only needed for shared policies.
 */
static inline int mpol_needs_cond_ref(struct mempolicy *pol)
{
	return (pol && (pol->flags & MPOL_F_SHARED));
}

static inline void mpol_cond_put(struct mempolicy *pol)
{
	if (mpol_needs_cond_ref(pol))
		__mpol_put(pol);
}

extern struct mempolicy *__mpol_cond_copy(struct mempolicy *tompol,
					  struct mempolicy *frompol);
static inline struct mempolicy *mpol_cond_copy(struct mempolicy *tompol,
						struct mempolicy *frompol)
{
	if (!frompol)
		return frompol;
	return __mpol_cond_copy(tompol, frompol);
}

150 151
extern struct mempolicy *__mpol_dup(struct mempolicy *pol);
static inline struct mempolicy *mpol_dup(struct mempolicy *pol)
L
Linus Torvalds 已提交
152 153
{
	if (pol)
154
		pol = __mpol_dup(pol);
L
Linus Torvalds 已提交
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193
	return pol;
}

#define vma_policy(vma) ((vma)->vm_policy)
#define vma_set_policy(vma, pol) ((vma)->vm_policy = (pol))

static inline void mpol_get(struct mempolicy *pol)
{
	if (pol)
		atomic_inc(&pol->refcnt);
}

extern int __mpol_equal(struct mempolicy *a, struct mempolicy *b);
static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
	if (a == b)
		return 1;
	return __mpol_equal(a, b);
}

/*
 * Tree of shared policies for a shared memory region.
 * Maintain the policies in a pseudo mm that contains vmas. The vmas
 * carry the policy. As a special twist the pseudo mm is indexed in pages, not
 * bytes, so that we can work with shared memory segments bigger than
 * unsigned long.
 */

struct sp_node {
	struct rb_node nd;
	unsigned long start, end;
	struct mempolicy *policy;
};

struct shared_policy {
	struct rb_root root;
	spinlock_t lock;
};

194
void mpol_shared_policy_init(struct shared_policy *sp, struct mempolicy *mpol);
L
Linus Torvalds 已提交
195 196 197 198 199 200 201 202 203
int mpol_set_shared_policy(struct shared_policy *info,
				struct vm_area_struct *vma,
				struct mempolicy *new);
void mpol_free_shared_policy(struct shared_policy *p);
struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
					    unsigned long idx);

extern void numa_default_policy(void);
extern void numa_policy_init(void);
204 205
extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new,
				enum mpol_rebind_step step);
206
extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new);
207
extern void mpol_fix_fork_child_flag(struct task_struct *p);
208

209
extern struct zonelist *huge_zonelist(struct vm_area_struct *vma,
210 211
				unsigned long addr, gfp_t gfp_flags,
				struct mempolicy **mpol, nodemask_t **nodemask);
212
extern bool init_nodemask_of_mempolicy(nodemask_t *mask);
213
extern unsigned slab_node(struct mempolicy *policy);
L
Linus Torvalds 已提交
214

215
extern enum zone_type policy_zone;
216

217
static inline void check_highest_zone(enum zone_type k)
218
{
219
	if (k > policy_zone && k != ZONE_MOVABLE)
220 221 222
		policy_zone = k;
}

223 224 225
int do_migrate_pages(struct mm_struct *mm,
	const nodemask_t *from_nodes, const nodemask_t *to_nodes, int flags);

226 227

#ifdef CONFIG_TMPFS
228
extern int mpol_parse_str(char *str, struct mempolicy **mpol, int no_context);
229

230 231
extern int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
			int no_context);
232
#endif
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250

/* Check if a vma is migratable */
static inline int vma_migratable(struct vm_area_struct *vma)
{
	if (vma->vm_flags & (VM_IO|VM_HUGETLB|VM_PFNMAP|VM_RESERVED))
		return 0;
	/*
	 * Migration allocates pages in the highest zone. If we cannot
	 * do so then migration (at least from node to node) is not
	 * possible.
	 */
	if (vma->vm_file &&
		gfp_zone(mapping_gfp_mask(vma->vm_file->f_mapping))
								< policy_zone)
			return 0;
	return 1;
}

L
Linus Torvalds 已提交
251 252 253 254 255 256 257 258 259
#else

struct mempolicy {};

static inline int mpol_equal(struct mempolicy *a, struct mempolicy *b)
{
	return 1;
}

260
static inline void mpol_put(struct mempolicy *p)
L
Linus Torvalds 已提交
261 262 263
{
}

264 265 266 267 268 269 270 271 272 273
static inline void mpol_cond_put(struct mempolicy *pol)
{
}

static inline struct mempolicy *mpol_cond_copy(struct mempolicy *to,
						struct mempolicy *from)
{
	return from;
}

L
Linus Torvalds 已提交
274 275 276 277
static inline void mpol_get(struct mempolicy *pol)
{
}

278
static inline struct mempolicy *mpol_dup(struct mempolicy *old)
L
Linus Torvalds 已提交
279 280 281 282 283 284 285 286 287 288 289 290 291
{
	return NULL;
}

struct shared_policy {};

static inline int mpol_set_shared_policy(struct shared_policy *info,
					struct vm_area_struct *vma,
					struct mempolicy *new)
{
	return -EINVAL;
}

292 293
static inline void mpol_shared_policy_init(struct shared_policy *sp,
						struct mempolicy *mpol)
L
Linus Torvalds 已提交
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
{
}

static inline void mpol_free_shared_policy(struct shared_policy *p)
{
}

static inline struct mempolicy *
mpol_shared_policy_lookup(struct shared_policy *sp, unsigned long idx)
{
	return NULL;
}

#define vma_policy(vma) NULL
#define vma_set_policy(vma, pol) do {} while(0)

static inline void numa_policy_init(void)
{
}

static inline void numa_default_policy(void)
{
}

318
static inline void mpol_rebind_task(struct task_struct *tsk,
319 320
				const nodemask_t *new,
				enum mpol_rebind_step step)
321 322 323
{
}

324 325 326 327
static inline void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new)
{
}

328 329 330 331
static inline void mpol_fix_fork_child_flag(struct task_struct *p)
{
}

332
static inline struct zonelist *huge_zonelist(struct vm_area_struct *vma,
333 334
				unsigned long addr, gfp_t gfp_flags,
				struct mempolicy **mpol, nodemask_t **nodemask)
335
{
336 337
	*mpol = NULL;
	*nodemask = NULL;
338
	return node_zonelist(0, gfp_flags);
339 340
}

341 342
static inline bool init_nodemask_of_mempolicy(nodemask_t *m) { return false; }

343 344 345 346 347 348 349
static inline int do_migrate_pages(struct mm_struct *mm,
			const nodemask_t *from_nodes,
			const nodemask_t *to_nodes, int flags)
{
	return 0;
}

350 351 352
static inline void check_highest_zone(int k)
{
}
353 354

#ifdef CONFIG_TMPFS
355 356
static inline int mpol_parse_str(char *str, struct mempolicy **mpol,
				int no_context)
357
{
358
	return 1;	/* error */
359 360
}

361 362
static inline int mpol_to_str(char *buffer, int maxlen, struct mempolicy *pol,
				int no_context)
363 364 365 366 367
{
	return 0;
}
#endif

L
Linus Torvalds 已提交
368 369 370 371
#endif /* CONFIG_NUMA */
#endif /* __KERNEL__ */

#endif