gfp.h 11.6 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6
#ifndef __LINUX_GFP_H
#define __LINUX_GFP_H

#include <linux/mmzone.h>
#include <linux/stddef.h>
#include <linux/linkage.h>
7
#include <linux/topology.h>
8
#include <linux/mmdebug.h>
L
Linus Torvalds 已提交
9 10 11 12 13

struct vm_area_struct;

/*
 * GFP bitmasks..
14 15 16 17 18 19
 *
 * Zone modifiers (see linux/mmzone.h - low three bits)
 *
 * Do not put any conditional on these. If necessary modify the definitions
 * without the underscores and use the consistently. The definitions here may
 * be used in bit comparisons.
L
Linus Torvalds 已提交
20
 */
A
Al Viro 已提交
21 22
#define __GFP_DMA	((__force gfp_t)0x01u)
#define __GFP_HIGHMEM	((__force gfp_t)0x02u)
23
#define __GFP_DMA32	((__force gfp_t)0x04u)
24 25
#define __GFP_MOVABLE	((__force gfp_t)0x08u)  /* Page is movable */
#define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
L
Linus Torvalds 已提交
26 27 28 29 30 31 32 33 34 35
/*
 * Action modifiers - doesn't change the zoning
 *
 * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
 * _might_ fail.  This depends upon the particular VM implementation.
 *
 * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
 * cannot handle allocation failures.
 *
 * __GFP_NORETRY: The VM implementation must not retry indefinitely.
36 37 38
 *
 * __GFP_MOVABLE: Flag that this page will be movable by the page migration
 * mechanism or reclaimed
L
Linus Torvalds 已提交
39
 */
A
Al Viro 已提交
40 41 42 43 44 45
#define __GFP_WAIT	((__force gfp_t)0x10u)	/* Can wait and reschedule? */
#define __GFP_HIGH	((__force gfp_t)0x20u)	/* Should access emergency pools? */
#define __GFP_IO	((__force gfp_t)0x40u)	/* Can start physical IO? */
#define __GFP_FS	((__force gfp_t)0x80u)	/* Can call down to low-level FS? */
#define __GFP_COLD	((__force gfp_t)0x100u)	/* Cache-cold page required */
#define __GFP_NOWARN	((__force gfp_t)0x200u)	/* Suppress page allocation failure warning */
46 47 48
#define __GFP_REPEAT	((__force gfp_t)0x400u)	/* See above */
#define __GFP_NOFAIL	((__force gfp_t)0x800u)	/* See above */
#define __GFP_NORETRY	((__force gfp_t)0x1000u)/* See above */
A
Al Viro 已提交
49 50 51
#define __GFP_COMP	((__force gfp_t)0x4000u)/* Add compound page metadata */
#define __GFP_ZERO	((__force gfp_t)0x8000u)/* Return zeroed page on success */
#define __GFP_NOMEMALLOC ((__force gfp_t)0x10000u) /* Don't use emergency reserves */
P
Paul Jackson 已提交
52
#define __GFP_HARDWALL   ((__force gfp_t)0x20000u) /* Enforce hardwall cpuset memory allocs */
53
#define __GFP_THISNODE	((__force gfp_t)0x40000u)/* No fallback, no policies */
54
#define __GFP_RECLAIMABLE ((__force gfp_t)0x80000u) /* Page is reclaimable */
55 56

#ifdef CONFIG_KMEMCHECK
V
Vegard Nossum 已提交
57
#define __GFP_NOTRACK	((__force gfp_t)0x200000u)  /* Don't track with kmemcheck */
58 59 60
#else
#define __GFP_NOTRACK	((__force gfp_t)0)
#endif
L
Linus Torvalds 已提交
61

V
Vegard Nossum 已提交
62 63 64 65 66 67 68
/*
 * This may seem redundant, but it's a way of annotating false positives vs.
 * allocations that simply cannot be supported (e.g. page tables).
 */
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)

#define __GFP_BITS_SHIFT 22	/* Room for 22 __GFP_FOO bits */
A
Al Viro 已提交
69
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
L
Linus Torvalds 已提交
70

J
Jeff Dike 已提交
71 72
/* This equals 0, but use constants in case they ever change */
#define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
P
Paul Jackson 已提交
73
/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
L
Linus Torvalds 已提交
74 75 76 77
#define GFP_ATOMIC	(__GFP_HIGH)
#define GFP_NOIO	(__GFP_WAIT)
#define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
#define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
78 79
#define GFP_TEMPORARY	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
			 __GFP_RECLAIMABLE)
80 81 82
#define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
#define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
			 __GFP_HIGHMEM)
83 84 85
#define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
				 __GFP_HARDWALL | __GFP_HIGHMEM | \
				 __GFP_MOVABLE)
L
Linus Torvalds 已提交
86

87
#ifdef CONFIG_NUMA
88
#define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
89
#else
90
#define GFP_THISNODE	((__force gfp_t)0)
91 92
#endif

C
Christoph Lameter 已提交
93
/* This mask makes up all the page movable related flags */
94
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
C
Christoph Lameter 已提交
95 96 97 98 99 100

/* Control page allocator reclaim behavior */
#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
			__GFP_NORETRY|__GFP_NOMEMALLOC)

101
/* Control slab gfp mask during early boot */
102
#define GFP_BOOT_MASK __GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS)
103

C
Christoph Lameter 已提交
104 105 106 107 108
/* Control allocation constraints */
#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)

/* Do not use these with a slab allocator */
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
109

L
Linus Torvalds 已提交
110 111 112 113 114
/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
   platforms, used as appropriate on others */

#define GFP_DMA		__GFP_DMA

A
Andi Kleen 已提交
115 116 117
/* 4GB DMA on some platforms */
#define GFP_DMA32	__GFP_DMA32

118 119 120 121 122 123 124 125 126 127 128 129
/* Convert GFP flags to their corresponding migrate type */
static inline int allocflags_to_migratetype(gfp_t gfp_flags)
{
	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);

	if (unlikely(page_group_by_mobility_disabled))
		return MIGRATE_UNMOVABLE;

	/* Group based on mobility */
	return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
		((gfp_flags & __GFP_RECLAIMABLE) != 0);
}
A
Andi Kleen 已提交
130

131 132 133 134 135 136
#ifdef CONFIG_HIGHMEM
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
#else
#define OPT_ZONE_HIGHMEM ZONE_NORMAL
#endif

137
#ifdef CONFIG_ZONE_DMA
138 139 140
#define OPT_ZONE_DMA ZONE_DMA
#else
#define OPT_ZONE_DMA ZONE_NORMAL
141
#endif
142

143
#ifdef CONFIG_ZONE_DMA32
144 145 146
#define OPT_ZONE_DMA32 ZONE_DMA32
#else
#define OPT_ZONE_DMA32 ZONE_NORMAL
147
#endif
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226

/*
 * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
 * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long
 * and there are 16 of them to cover all possible combinations of
 * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM
 *
 * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA.
 * But GFP_MOVABLE is not only a zone specifier but also an allocation
 * policy. Therefore __GFP_MOVABLE plus another zone selector is valid.
 * Only 1bit of the lowest 3 bit (DMA,DMA32,HIGHMEM) can be set to "1".
 *
 *       bit       result
 *       =================
 *       0x0    => NORMAL
 *       0x1    => DMA or NORMAL
 *       0x2    => HIGHMEM or NORMAL
 *       0x3    => BAD (DMA+HIGHMEM)
 *       0x4    => DMA32 or DMA or NORMAL
 *       0x5    => BAD (DMA+DMA32)
 *       0x6    => BAD (HIGHMEM+DMA32)
 *       0x7    => BAD (HIGHMEM+DMA32+DMA)
 *       0x8    => NORMAL (MOVABLE+0)
 *       0x9    => DMA or NORMAL (MOVABLE+DMA)
 *       0xa    => MOVABLE (Movable is valid only if HIGHMEM is set too)
 *       0xb    => BAD (MOVABLE+HIGHMEM+DMA)
 *       0xc    => DMA32 (MOVABLE+HIGHMEM+DMA32)
 *       0xd    => BAD (MOVABLE+DMA32+DMA)
 *       0xe    => BAD (MOVABLE+DMA32+HIGHMEM)
 *       0xf    => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
 *
 * ZONES_SHIFT must be <= 2 on 32 bit platforms.
 */

#if 16 * ZONES_SHIFT > BITS_PER_LONG
#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
#endif

#define GFP_ZONE_TABLE ( \
	(ZONE_NORMAL << 0 * ZONES_SHIFT)				\
	| (OPT_ZONE_DMA << __GFP_DMA * ZONES_SHIFT) 			\
	| (OPT_ZONE_HIGHMEM << __GFP_HIGHMEM * ZONES_SHIFT)		\
	| (OPT_ZONE_DMA32 << __GFP_DMA32 * ZONES_SHIFT)			\
	| (ZONE_NORMAL << __GFP_MOVABLE * ZONES_SHIFT)			\
	| (OPT_ZONE_DMA << (__GFP_MOVABLE | __GFP_DMA) * ZONES_SHIFT)	\
	| (ZONE_MOVABLE << (__GFP_MOVABLE | __GFP_HIGHMEM) * ZONES_SHIFT)\
	| (OPT_ZONE_DMA32 << (__GFP_MOVABLE | __GFP_DMA32) * ZONES_SHIFT)\
)

/*
 * GFP_ZONE_BAD is a bitmap for all combination of __GFP_DMA, __GFP_DMA32
 * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per
 * entry starting with bit 0. Bit is set if the combination is not
 * allowed.
 */
#define GFP_ZONE_BAD ( \
	1 << (__GFP_DMA | __GFP_HIGHMEM)				\
	| 1 << (__GFP_DMA | __GFP_DMA32)				\
	| 1 << (__GFP_DMA32 | __GFP_HIGHMEM)				\
	| 1 << (__GFP_DMA | __GFP_DMA32 | __GFP_HIGHMEM)		\
	| 1 << (__GFP_MOVABLE | __GFP_HIGHMEM | __GFP_DMA)		\
	| 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA)		\
	| 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_HIGHMEM)		\
	| 1 << (__GFP_MOVABLE | __GFP_DMA32 | __GFP_DMA | __GFP_HIGHMEM)\
)

static inline enum zone_type gfp_zone(gfp_t flags)
{
	enum zone_type z;
	int bit = flags & GFP_ZONEMASK;

	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
					 ((1 << ZONES_SHIFT) - 1);

	if (__builtin_constant_p(bit))
		BUILD_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
	else {
#ifdef CONFIG_DEBUG_VM
		BUG_ON((GFP_ZONE_BAD >> bit) & 1);
227
#endif
228 229
	}
	return z;
230 231
}

L
Linus Torvalds 已提交
232 233 234 235 236 237 238
/*
 * There is only one page-allocator function, and two main namespaces to
 * it. The alloc_page*() variants return 'struct page *' and as such
 * can allocate highmem pages, the *get*page*() variants return
 * virtual kernel addresses to the allocated page(s).
 */

239 240 241 242 243 244 245 246
static inline int gfp_zonelist(gfp_t flags)
{
	if (NUMA_BUILD && unlikely(flags & __GFP_THISNODE))
		return 1;

	return 0;
}

L
Linus Torvalds 已提交
247 248 249
/*
 * We get the zone list from the current node and the gfp_mask.
 * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
250 251
 * There are two zonelists per node, one for all zones with memory and
 * one containing just zones from the node the zonelist belongs to.
L
Linus Torvalds 已提交
252 253 254 255
 *
 * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
 * optimized to &contig_page_data at compile-time.
 */
256 257
static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
{
258
	return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
259
}
L
Linus Torvalds 已提交
260 261 262 263

#ifndef HAVE_ARCH_FREE_PAGE
static inline void arch_free_page(struct page *page, int order) { }
#endif
N
Nick Piggin 已提交
264 265 266
#ifndef HAVE_ARCH_ALLOC_PAGE
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
L
Linus Torvalds 已提交
267

268
struct page *
269
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
270 271 272 273 274 275
		       struct zonelist *zonelist, nodemask_t *nodemask);

static inline struct page *
__alloc_pages(gfp_t gfp_mask, unsigned int order,
		struct zonelist *zonelist)
{
276
	return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
277 278
}

A
Al Viro 已提交
279
static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
L
Linus Torvalds 已提交
280 281
						unsigned int order)
{
282 283 284 285
	/* Unknown node is current node */
	if (nid < 0)
		nid = numa_node_id();

286
	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
L
Linus Torvalds 已提交
287 288
}

289 290 291 292 293 294 295 296
static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask,
						unsigned int order)
{
	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES);

	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
}

L
Linus Torvalds 已提交
297
#ifdef CONFIG_NUMA
A
Al Viro 已提交
298
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
L
Linus Torvalds 已提交
299 300

static inline struct page *
A
Al Viro 已提交
301
alloc_pages(gfp_t gfp_mask, unsigned int order)
L
Linus Torvalds 已提交
302 303 304
{
	return alloc_pages_current(gfp_mask, order);
}
A
Al Viro 已提交
305
extern struct page *alloc_page_vma(gfp_t gfp_mask,
L
Linus Torvalds 已提交
306 307 308 309 310 311 312 313
			struct vm_area_struct *vma, unsigned long addr);
#else
#define alloc_pages(gfp_mask, order) \
		alloc_pages_node(numa_node_id(), gfp_mask, order)
#define alloc_page_vma(gfp_mask, vma, addr) alloc_pages(gfp_mask, 0)
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)

314 315
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
L
Linus Torvalds 已提交
316

317 318 319
void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
void free_pages_exact(void *virt, size_t size);

L
Linus Torvalds 已提交
320 321 322 323 324 325
#define __get_free_page(gfp_mask) \
		__get_free_pages((gfp_mask),0)

#define __get_dma_pages(gfp_mask, order) \
		__get_free_pages((gfp_mask) | GFP_DMA,(order))

326 327 328
extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
extern void free_hot_page(struct page *page);
L
Linus Torvalds 已提交
329 330 331 332 333

#define __free_page(page) __free_pages((page), 0)
#define free_page(addr) free_pages((addr),0)

void page_alloc_init(void);
334
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
335 336
void drain_all_pages(void);
void drain_local_pages(void *dummy);
L
Linus Torvalds 已提交
337

338 339 340 341 342 343 344 345 346 347 348 349
extern bool oom_killer_disabled;

static inline void oom_killer_disable(void)
{
	oom_killer_disabled = true;
}

static inline void oom_killer_enable(void)
{
	oom_killer_disabled = false;
}

350 351 352 353 354 355 356
extern gfp_t gfp_allowed_mask;

static inline void set_gfp_allowed_mask(gfp_t mask)
{
	gfp_allowed_mask = mask;
}

L
Linus Torvalds 已提交
357
#endif /* __LINUX_GFP_H */