gfp.h 14.8 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6
#ifndef __LINUX_GFP_H
#define __LINUX_GFP_H

#include <linux/mmzone.h>
#include <linux/stddef.h>
#include <linux/linkage.h>
7
#include <linux/topology.h>
8
#include <linux/mmdebug.h>
L
Linus Torvalds 已提交
9 10 11

struct vm_area_struct;

12 13 14 15 16 17 18 19 20 21 22 23 24 25
/* Plain integer GFP bitmasks. Do not use this directly. */
#define ___GFP_DMA		0x01u
#define ___GFP_HIGHMEM		0x02u
#define ___GFP_DMA32		0x04u
#define ___GFP_MOVABLE		0x08u
#define ___GFP_WAIT		0x10u
#define ___GFP_HIGH		0x20u
#define ___GFP_IO		0x40u
#define ___GFP_FS		0x80u
#define ___GFP_COLD		0x100u
#define ___GFP_NOWARN		0x200u
#define ___GFP_REPEAT		0x400u
#define ___GFP_NOFAIL		0x800u
#define ___GFP_NORETRY		0x1000u
26
#define ___GFP_MEMALLOC		0x2000u
27 28 29 30 31 32
#define ___GFP_COMP		0x4000u
#define ___GFP_ZERO		0x8000u
#define ___GFP_NOMEMALLOC	0x10000u
#define ___GFP_HARDWALL		0x20000u
#define ___GFP_THISNODE		0x40000u
#define ___GFP_RECLAIMABLE	0x80000u
G
Glauber Costa 已提交
33
#define ___GFP_KMEMCG		0x100000u
34 35 36 37
#define ___GFP_NOTRACK		0x200000u
#define ___GFP_NO_KSWAPD	0x400000u
#define ___GFP_OTHER_NODE	0x800000u
#define ___GFP_WRITE		0x1000000u
38
/* If the above are modified, __GFP_BITS_SHIFT may need updating */
39

L
Linus Torvalds 已提交
40 41
/*
 * GFP bitmasks..
42 43 44 45
 *
 * Zone modifiers (see linux/mmzone.h - low three bits)
 *
 * Do not put any conditional on these. If necessary modify the definitions
46
 * without the underscores and use them consistently. The definitions here may
47
 * be used in bit comparisons.
L
Linus Torvalds 已提交
48
 */
49 50 51 52
#define __GFP_DMA	((__force gfp_t)___GFP_DMA)
#define __GFP_HIGHMEM	((__force gfp_t)___GFP_HIGHMEM)
#define __GFP_DMA32	((__force gfp_t)___GFP_DMA32)
#define __GFP_MOVABLE	((__force gfp_t)___GFP_MOVABLE)  /* Page is movable */
53
#define GFP_ZONEMASK	(__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE)
L
Linus Torvalds 已提交
54 55 56 57 58 59 60
/*
 * Action modifiers - doesn't change the zoning
 *
 * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt
 * _might_ fail.  This depends upon the particular VM implementation.
 *
 * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller
61 62
 * cannot handle allocation failures.  This modifier is deprecated and no new
 * users should be added.
L
Linus Torvalds 已提交
63 64
 *
 * __GFP_NORETRY: The VM implementation must not retry indefinitely.
65 66 67
 *
 * __GFP_MOVABLE: Flag that this page will be movable by the page migration
 * mechanism or reclaimed
L
Linus Torvalds 已提交
68
 */
69 70 71 72 73 74 75 76 77
#define __GFP_WAIT	((__force gfp_t)___GFP_WAIT)	/* Can wait and reschedule? */
#define __GFP_HIGH	((__force gfp_t)___GFP_HIGH)	/* Should access emergency pools? */
#define __GFP_IO	((__force gfp_t)___GFP_IO)	/* Can start physical IO? */
#define __GFP_FS	((__force gfp_t)___GFP_FS)	/* Can call down to low-level FS? */
#define __GFP_COLD	((__force gfp_t)___GFP_COLD)	/* Cache-cold page required */
#define __GFP_NOWARN	((__force gfp_t)___GFP_NOWARN)	/* Suppress page allocation failure warning */
#define __GFP_REPEAT	((__force gfp_t)___GFP_REPEAT)	/* See above */
#define __GFP_NOFAIL	((__force gfp_t)___GFP_NOFAIL)	/* See above */
#define __GFP_NORETRY	((__force gfp_t)___GFP_NORETRY) /* See above */
78
#define __GFP_MEMALLOC	((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */
79 80
#define __GFP_COMP	((__force gfp_t)___GFP_COMP)	/* Add compound page metadata */
#define __GFP_ZERO	((__force gfp_t)___GFP_ZERO)	/* Return zeroed page on success */
81 82 83 84 85
#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves.
							 * This takes precedence over the
							 * __GFP_MEMALLOC flag if both are
							 * set
							 */
86 87 88 89
#define __GFP_HARDWALL   ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */
#define __GFP_THISNODE	((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */
#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */
#define __GFP_NOTRACK	((__force gfp_t)___GFP_NOTRACK)  /* Don't track with kmemcheck */
L
Linus Torvalds 已提交
90

91
#define __GFP_NO_KSWAPD	((__force gfp_t)___GFP_NO_KSWAPD)
A
Andi Kleen 已提交
92
#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */
G
Glauber Costa 已提交
93
#define __GFP_KMEMCG	((__force gfp_t)___GFP_KMEMCG) /* Allocation comes from a memcg-accounted resource */
94
#define __GFP_WRITE	((__force gfp_t)___GFP_WRITE)	/* Allocator intends to dirty page */
A
Andrea Arcangeli 已提交
95

V
Vegard Nossum 已提交
96 97 98 99 100 101
/*
 * This may seem redundant, but it's a way of annotating false positives vs.
 * allocations that simply cannot be supported (e.g. page tables).
 */
#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK)

102
#define __GFP_BITS_SHIFT 25	/* Room for N __GFP_FOO bits */
A
Al Viro 已提交
103
#define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1))
L
Linus Torvalds 已提交
104

J
Jeff Dike 已提交
105 106
/* This equals 0, but use constants in case they ever change */
#define GFP_NOWAIT	(GFP_ATOMIC & ~__GFP_HIGH)
P
Paul Jackson 已提交
107
/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */
L
Linus Torvalds 已提交
108 109 110 111
#define GFP_ATOMIC	(__GFP_HIGH)
#define GFP_NOIO	(__GFP_WAIT)
#define GFP_NOFS	(__GFP_WAIT | __GFP_IO)
#define GFP_KERNEL	(__GFP_WAIT | __GFP_IO | __GFP_FS)
112 113
#define GFP_TEMPORARY	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
			 __GFP_RECLAIMABLE)
114 115 116
#define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
#define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
			 __GFP_HIGHMEM)
117 118 119
#define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
				 __GFP_HARDWALL | __GFP_HIGHMEM | \
				 __GFP_MOVABLE)
120
#define GFP_IOFS	(__GFP_IO | __GFP_FS)
121
#define GFP_TRANSHUGE	(GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
122 123
			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
			 __GFP_NO_KSWAPD)
L
Linus Torvalds 已提交
124

125
#ifdef CONFIG_NUMA
126
#define GFP_THISNODE	(__GFP_THISNODE | __GFP_NOWARN | __GFP_NORETRY)
127
#else
128
#define GFP_THISNODE	((__force gfp_t)0)
129 130
#endif

C
Christoph Lameter 已提交
131
/* This mask makes up all the page movable related flags */
132
#define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE)
C
Christoph Lameter 已提交
133 134 135 136

/* Control page allocator reclaim behavior */
#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\
			__GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\
137
			__GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC)
C
Christoph Lameter 已提交
138

139
/* Control slab gfp mask during early boot */
140
#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS))
141

C
Christoph Lameter 已提交
142 143 144 145 146
/* Control allocation constraints */
#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE)

/* Do not use these with a slab allocator */
#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK)
147

L
Linus Torvalds 已提交
148 149 150 151 152
/* Flag - indicates that the buffer will be suitable for DMA.  Ignored on some
   platforms, used as appropriate on others */

#define GFP_DMA		__GFP_DMA

A
Andi Kleen 已提交
153 154 155
/* 4GB DMA on some platforms */
#define GFP_DMA32	__GFP_DMA32

156 157 158 159 160 161 162 163 164 165 166 167
/* Convert GFP flags to their corresponding migrate type */
static inline int allocflags_to_migratetype(gfp_t gfp_flags)
{
	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);

	if (unlikely(page_group_by_mobility_disabled))
		return MIGRATE_UNMOVABLE;

	/* Group based on mobility */
	return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) |
		((gfp_flags & __GFP_RECLAIMABLE) != 0);
}
A
Andi Kleen 已提交
168

169 170 171 172 173 174
#ifdef CONFIG_HIGHMEM
#define OPT_ZONE_HIGHMEM ZONE_HIGHMEM
#else
#define OPT_ZONE_HIGHMEM ZONE_NORMAL
#endif

175
#ifdef CONFIG_ZONE_DMA
176 177 178
#define OPT_ZONE_DMA ZONE_DMA
#else
#define OPT_ZONE_DMA ZONE_NORMAL
179
#endif
180

181
#ifdef CONFIG_ZONE_DMA32
182 183 184
#define OPT_ZONE_DMA32 ZONE_DMA32
#else
#define OPT_ZONE_DMA32 ZONE_NORMAL
185
#endif
186 187 188 189 190

/*
 * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the
 * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long
 * and there are 16 of them to cover all possible combinations of
191
 * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM.
192 193 194 195
 *
 * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA.
 * But GFP_MOVABLE is not only a zone specifier but also an allocation
 * policy. Therefore __GFP_MOVABLE plus another zone selector is valid.
196
 * Only 1 bit of the lowest 3 bits (DMA,DMA32,HIGHMEM) can be set to "1".
197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224
 *
 *       bit       result
 *       =================
 *       0x0    => NORMAL
 *       0x1    => DMA or NORMAL
 *       0x2    => HIGHMEM or NORMAL
 *       0x3    => BAD (DMA+HIGHMEM)
 *       0x4    => DMA32 or DMA or NORMAL
 *       0x5    => BAD (DMA+DMA32)
 *       0x6    => BAD (HIGHMEM+DMA32)
 *       0x7    => BAD (HIGHMEM+DMA32+DMA)
 *       0x8    => NORMAL (MOVABLE+0)
 *       0x9    => DMA or NORMAL (MOVABLE+DMA)
 *       0xa    => MOVABLE (Movable is valid only if HIGHMEM is set too)
 *       0xb    => BAD (MOVABLE+HIGHMEM+DMA)
 *       0xc    => DMA32 (MOVABLE+HIGHMEM+DMA32)
 *       0xd    => BAD (MOVABLE+DMA32+DMA)
 *       0xe    => BAD (MOVABLE+DMA32+HIGHMEM)
 *       0xf    => BAD (MOVABLE+DMA32+HIGHMEM+DMA)
 *
 * ZONES_SHIFT must be <= 2 on 32 bit platforms.
 */

#if 16 * ZONES_SHIFT > BITS_PER_LONG
#error ZONES_SHIFT too large to create GFP_ZONE_TABLE integer
#endif

#define GFP_ZONE_TABLE ( \
225 226 227 228 229 230 231 232
	(ZONE_NORMAL << 0 * ZONES_SHIFT)				      \
	| (OPT_ZONE_DMA << ___GFP_DMA * ZONES_SHIFT)			      \
	| (OPT_ZONE_HIGHMEM << ___GFP_HIGHMEM * ZONES_SHIFT)		      \
	| (OPT_ZONE_DMA32 << ___GFP_DMA32 * ZONES_SHIFT)		      \
	| (ZONE_NORMAL << ___GFP_MOVABLE * ZONES_SHIFT)			      \
	| (OPT_ZONE_DMA << (___GFP_MOVABLE | ___GFP_DMA) * ZONES_SHIFT)	      \
	| (ZONE_MOVABLE << (___GFP_MOVABLE | ___GFP_HIGHMEM) * ZONES_SHIFT)   \
	| (OPT_ZONE_DMA32 << (___GFP_MOVABLE | ___GFP_DMA32) * ZONES_SHIFT)   \
233 234 235
)

/*
236
 * GFP_ZONE_BAD is a bitmap for all combinations of __GFP_DMA, __GFP_DMA32
237 238 239 240 241
 * __GFP_HIGHMEM and __GFP_MOVABLE that are not permitted. One flag per
 * entry starting with bit 0. Bit is set if the combination is not
 * allowed.
 */
#define GFP_ZONE_BAD ( \
242 243 244 245 246 247 248 249
	1 << (___GFP_DMA | ___GFP_HIGHMEM)				      \
	| 1 << (___GFP_DMA | ___GFP_DMA32)				      \
	| 1 << (___GFP_DMA32 | ___GFP_HIGHMEM)				      \
	| 1 << (___GFP_DMA | ___GFP_DMA32 | ___GFP_HIGHMEM)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_HIGHMEM | ___GFP_DMA)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_HIGHMEM)		      \
	| 1 << (___GFP_MOVABLE | ___GFP_DMA32 | ___GFP_DMA | ___GFP_HIGHMEM)  \
250 251 252 253 254
)

static inline enum zone_type gfp_zone(gfp_t flags)
{
	enum zone_type z;
255
	int bit = (__force int) (flags & GFP_ZONEMASK);
256 257 258

	z = (GFP_ZONE_TABLE >> (bit * ZONES_SHIFT)) &
					 ((1 << ZONES_SHIFT) - 1);
259
	VM_BUG_ON((GFP_ZONE_BAD >> bit) & 1);
260
	return z;
261 262
}

L
Linus Torvalds 已提交
263 264 265 266 267 268 269
/*
 * There is only one page-allocator function, and two main namespaces to
 * it. The alloc_page*() variants return 'struct page *' and as such
 * can allocate highmem pages, the *get*page*() variants return
 * virtual kernel addresses to the allocated page(s).
 */

270 271
static inline int gfp_zonelist(gfp_t flags)
{
272
	if (IS_ENABLED(CONFIG_NUMA) && unlikely(flags & __GFP_THISNODE))
273 274 275 276 277
		return 1;

	return 0;
}

L
Linus Torvalds 已提交
278 279 280
/*
 * We get the zone list from the current node and the gfp_mask.
 * This zone list contains a maximum of MAXNODES*MAX_NR_ZONES zones.
281 282
 * There are two zonelists per node, one for all zones with memory and
 * one containing just zones from the node the zonelist belongs to.
L
Linus Torvalds 已提交
283 284 285 286
 *
 * For the normal case of non-DISCONTIGMEM systems the NODE_DATA() gets
 * optimized to &contig_page_data at compile-time.
 */
287 288
static inline struct zonelist *node_zonelist(int nid, gfp_t flags)
{
289
	return NODE_DATA(nid)->node_zonelists + gfp_zonelist(flags);
290
}
L
Linus Torvalds 已提交
291 292 293 294

#ifndef HAVE_ARCH_FREE_PAGE
static inline void arch_free_page(struct page *page, int order) { }
#endif
N
Nick Piggin 已提交
295 296 297
#ifndef HAVE_ARCH_ALLOC_PAGE
static inline void arch_alloc_page(struct page *page, int order) { }
#endif
L
Linus Torvalds 已提交
298

299
struct page *
300
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
301 302 303 304 305 306
		       struct zonelist *zonelist, nodemask_t *nodemask);

static inline struct page *
__alloc_pages(gfp_t gfp_mask, unsigned int order,
		struct zonelist *zonelist)
{
307
	return __alloc_pages_nodemask(gfp_mask, order, zonelist, NULL);
308 309
}

A
Al Viro 已提交
310
static inline struct page *alloc_pages_node(int nid, gfp_t gfp_mask,
L
Linus Torvalds 已提交
311 312
						unsigned int order)
{
313 314 315 316
	/* Unknown node is current node */
	if (nid < 0)
		nid = numa_node_id();

317
	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
L
Linus Torvalds 已提交
318 319
}

320 321 322
static inline struct page *alloc_pages_exact_node(int nid, gfp_t gfp_mask,
						unsigned int order)
{
323
	VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES || !node_online(nid));
324 325 326 327

	return __alloc_pages(gfp_mask, order, node_zonelist(nid, gfp_mask));
}

L
Linus Torvalds 已提交
328
#ifdef CONFIG_NUMA
A
Al Viro 已提交
329
extern struct page *alloc_pages_current(gfp_t gfp_mask, unsigned order);
L
Linus Torvalds 已提交
330 331

static inline struct page *
A
Al Viro 已提交
332
alloc_pages(gfp_t gfp_mask, unsigned int order)
L
Linus Torvalds 已提交
333 334 335
{
	return alloc_pages_current(gfp_mask, order);
}
336
extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order,
337 338
			struct vm_area_struct *vma, unsigned long addr,
			int node);
L
Linus Torvalds 已提交
339 340 341
#else
#define alloc_pages(gfp_mask, order) \
		alloc_pages_node(numa_node_id(), gfp_mask, order)
342
#define alloc_pages_vma(gfp_mask, order, vma, addr, node)	\
343
	alloc_pages(gfp_mask, order)
L
Linus Torvalds 已提交
344 345
#endif
#define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0)
346 347
#define alloc_page_vma(gfp_mask, vma, addr)			\
	alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id())
A
Andi Kleen 已提交
348 349
#define alloc_page_vma_node(gfp_mask, vma, addr, node)		\
	alloc_pages_vma(gfp_mask, 0, vma, addr, node)
L
Linus Torvalds 已提交
350

351 352
extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order);
extern unsigned long get_zeroed_page(gfp_t gfp_mask);
L
Linus Torvalds 已提交
353

354 355
void *alloc_pages_exact(size_t size, gfp_t gfp_mask);
void free_pages_exact(void *virt, size_t size);
A
Andi Kleen 已提交
356 357
/* This is different from alloc_pages_exact_node !!! */
void *alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
358

L
Linus Torvalds 已提交
359
#define __get_free_page(gfp_mask) \
360
		__get_free_pages((gfp_mask), 0)
L
Linus Torvalds 已提交
361 362

#define __get_dma_pages(gfp_mask, order) \
363
		__get_free_pages((gfp_mask) | GFP_DMA, (order))
L
Linus Torvalds 已提交
364

365 366
extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
L
Li Hong 已提交
367
extern void free_hot_cold_page(struct page *page, int cold);
368
extern void free_hot_cold_page_list(struct list_head *list, int cold);
L
Linus Torvalds 已提交
369

370 371 372
extern void __free_memcg_kmem_pages(struct page *page, unsigned int order);
extern void free_memcg_kmem_pages(unsigned long addr, unsigned int order);

L
Linus Torvalds 已提交
373
#define __free_page(page) __free_pages((page), 0)
374
#define free_page(addr) free_pages((addr), 0)
L
Linus Torvalds 已提交
375 376

void page_alloc_init(void);
377
void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
378 379
void drain_all_pages(void);
void drain_local_pages(void *dummy);
L
Linus Torvalds 已提交
380

381 382 383 384 385 386 387
/*
 * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
 * GFP flags are used before interrupts are enabled. Once interrupts are
 * enabled, it is set to __GFP_BITS_MASK while the system is running. During
 * hibernation, it is used by PM to avoid I/O during memory allocation while
 * devices are suspended.
 */
388 389
extern gfp_t gfp_allowed_mask;

390 391 392
/* Returns true if the gfp_mask allows use of ALLOC_NO_WATERMARK */
bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);

393 394
extern void pm_restrict_gfp_mask(void);
extern void pm_restore_gfp_mask(void);
395

396 397 398 399 400 401 402 403 404
#ifdef CONFIG_PM_SLEEP
extern bool pm_suspended_storage(void);
#else
static inline bool pm_suspended_storage(void)
{
	return false;
}
#endif /* CONFIG_PM_SLEEP */

405 406 407
#ifdef CONFIG_CMA

/* The below functions must be run on a range from a single zone. */
408 409
extern int alloc_contig_range(unsigned long start, unsigned long end,
			      unsigned migratetype);
410 411
extern void free_contig_range(unsigned long pfn, unsigned nr_pages);

412 413 414
/* CMA stuff */
extern void init_cma_reserved_pageblock(struct page *page);

415 416
#endif

L
Linus Torvalds 已提交
417
#endif /* __LINUX_GFP_H */