swap.h 16.7 KB
Newer Older
L
Linus Torvalds 已提交
1 2 3 4 5 6 7
#ifndef _LINUX_SWAP_H
#define _LINUX_SWAP_H

#include <linux/spinlock.h>
#include <linux/linkage.h>
#include <linux/mmzone.h>
#include <linux/list.h>
8
#include <linux/memcontrol.h>
L
Linus Torvalds 已提交
9
#include <linux/sched.h>
10
#include <linux/node.h>
11
#include <linux/fs.h>
A
Arun Sharma 已提交
12
#include <linux/atomic.h>
13
#include <linux/page-flags.h>
L
Linus Torvalds 已提交
14 15
#include <asm/page.h>

16 17
struct notifier_block;

A
Andrew Morton 已提交
18 19
struct bio;

L
Linus Torvalds 已提交
20 21 22
#define SWAP_FLAG_PREFER	0x8000	/* set if swap priority specified */
#define SWAP_FLAG_PRIO_MASK	0x7fff
#define SWAP_FLAG_PRIO_SHIFT	0
23 24 25
#define SWAP_FLAG_DISCARD	0x10000 /* enable discard for swap */
#define SWAP_FLAG_DISCARD_ONCE	0x20000 /* discard swap area at swapon-time */
#define SWAP_FLAG_DISCARD_PAGES 0x40000 /* discard page-clusters after use */
L
Linus Torvalds 已提交
26

27
#define SWAP_FLAGS_VALID	(SWAP_FLAG_PRIO_MASK | SWAP_FLAG_PREFER | \
28 29
				 SWAP_FLAG_DISCARD | SWAP_FLAG_DISCARD_ONCE | \
				 SWAP_FLAG_DISCARD_PAGES)
30

L
Linus Torvalds 已提交
31 32 33 34 35 36 37 38 39 40 41 42 43 44
static inline int current_is_kswapd(void)
{
	return current->flags & PF_KSWAPD;
}

/*
 * MAX_SWAPFILES defines the maximum number of swaptypes: things which can
 * be swapped to.  The swap type and the offset into that swap type are
 * encoded into pte's and into pgoff_t's in the swapcache.  Using five bits
 * for the type means that the maximum number of swapcache pages is 27 bits
 * on 32-bit-pgoff_t architectures.  And that assumes that the architecture packs
 * the type/offset into the pte as 5/27 as well.
 */
#define MAX_SWAPFILES_SHIFT	5
45 46 47 48 49 50 51 52 53 54 55 56 57 58

/*
 * Use some of the swap files numbers for other purposes. This
 * is a convenient way to hook into the VM to trigger special
 * actions on faults.
 */

/*
 * NUMA node memory migration support
 */
#ifdef CONFIG_MIGRATION
#define SWP_MIGRATION_NUM 2
#define SWP_MIGRATION_READ	(MAX_SWAPFILES + SWP_HWPOISON_NUM)
#define SWP_MIGRATION_WRITE	(MAX_SWAPFILES + SWP_HWPOISON_NUM + 1)
59
#else
60
#define SWP_MIGRATION_NUM 0
61
#endif
L
Linus Torvalds 已提交
62

63 64 65 66 67 68 69 70 71 72 73 74 75
/*
 * Handling of hardware poisoned pages with memory corruption.
 */
#ifdef CONFIG_MEMORY_FAILURE
#define SWP_HWPOISON_NUM 1
#define SWP_HWPOISON		MAX_SWAPFILES
#else
#define SWP_HWPOISON_NUM 0
#endif

#define MAX_SWAPFILES \
	((1 << MAX_SWAPFILES_SHIFT) - SWP_MIGRATION_NUM - SWP_HWPOISON_NUM)

L
Linus Torvalds 已提交
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
/*
 * Magic header for a swap area. The first part of the union is
 * what the swap magic looks like for the old (limited to 128MB)
 * swap area format, the second part of the union adds - in the
 * old reserved area - some extra information. Note that the first
 * kilobyte is reserved for boot loader or disk label stuff...
 *
 * Having the magic at the end of the PAGE_SIZE makes detecting swap
 * areas somewhat tricky on machines that support multiple page sizes.
 * For 2.5 we'll probably want to move the magic to just beyond the
 * bootbits...
 */
union swap_header {
	struct {
		char reserved[PAGE_SIZE - 10];
		char magic[10];			/* SWAP-SPACE or SWAPSPACE2 */
	} magic;
	struct {
94 95 96 97 98 99 100 101
		char		bootbits[1024];	/* Space for disklabel etc. */
		__u32		version;
		__u32		last_page;
		__u32		nr_badpages;
		unsigned char	sws_uuid[16];
		unsigned char	sws_volume[16];
		__u32		padding[117];
		__u32		badpages[1];
L
Linus Torvalds 已提交
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
	} info;
};

 /* A swap entry has to fit into a "unsigned long", as
  * the entry is hidden in the "index" field of the
  * swapper address space.
  */
typedef struct {
	unsigned long val;
} swp_entry_t;

/*
 * current->reclaim_state points to one of these when a task is running
 * memory reclaim
 */
struct reclaim_state {
	unsigned long reclaimed_slab;
};

#ifdef __KERNEL__

struct address_space;
struct sysinfo;
struct writeback_control;
struct zone;

/*
 * A swap extent maps a range of a swapfile's PAGE_SIZE pages onto a range of
 * disk blocks.  A list of swap extents maps the entire swapfile.  (Where the
 * term `swapfile' refers to either a blockdevice or an IS_REG file.  Apart
 * from setup, they're handled identically.
 *
 * We always assume that blocks are of size PAGE_SIZE.
 */
struct swap_extent {
	struct list_head list;
	pgoff_t start_page;
	pgoff_t nr_pages;
	sector_t start_block;
};

/*
 * Max bad pages in the new format..
 */
#define __swapoffset(x) ((unsigned long)&((union swap_header *)0)->x)
#define MAX_SWAP_BADPAGES \
	((__swapoffset(magic.magic) - __swapoffset(info.badpages)) / sizeof(int))

enum {
	SWP_USED	= (1 << 0),	/* is slot in swap_info[] used? */
	SWP_WRITEOK	= (1 << 1),	/* ok to write to this swap?	*/
153
	SWP_DISCARDABLE = (1 << 2),	/* blkdev support discard */
154
	SWP_DISCARDING	= (1 << 3),	/* now discarding a free cluster */
155
	SWP_SOLIDSTATE	= (1 << 4),	/* blkdev seeks are cheap */
H
Hugh Dickins 已提交
156
	SWP_CONTINUED	= (1 << 5),	/* swap_map has count continuation */
157
	SWP_BLKDEV	= (1 << 6),	/* its a block device */
158
	SWP_FILE	= (1 << 7),	/* set after swap_activate success */
159 160
	SWP_AREA_DISCARD = (1 << 8),	/* single-time swap area discards */
	SWP_PAGE_DISCARD = (1 << 9),	/* freed swap page-cluster discards */
161
					/* add others here before... */
162
	SWP_SCANNING	= (1 << 10),	/* refcount in scan_swap_map */
L
Linus Torvalds 已提交
163 164
};

165
#define SWAP_CLUSTER_MAX 32UL
166
#define COMPACT_CLUSTER_MAX SWAP_CLUSTER_MAX
L
Linus Torvalds 已提交
167

168 169 170 171 172 173 174 175 176
/*
 * Ratio between the present memory in the zone and the "gap" that
 * we're allowing kswapd to shrink in addition to the per-zone high
 * wmark, even for zones that already have the high wmark satisfied,
 * in order to provide better per-zone lru behavior. We are ok to
 * spend not more than 1% of the memory for this zone balancing "gap".
 */
#define KSWAPD_ZONE_BALANCE_GAP_RATIO 100

H
Hugh Dickins 已提交
177 178 179 180 181
#define SWAP_MAP_MAX	0x3e	/* Max duplication count, in first swap_map */
#define SWAP_MAP_BAD	0x3f	/* Note pageblock is bad, in first swap_map */
#define SWAP_HAS_CACHE	0x40	/* Flag page is cached, in first swap_map */
#define SWAP_CONT_MAX	0x7f	/* Max count, in each swap_map continuation */
#define COUNT_CONTINUED	0x80	/* See swap_map continuation for full count */
H
Hugh Dickins 已提交
182
#define SWAP_MAP_SHMEM	0xbf	/* Owned by shmem/tmpfs, in first swap_map */
H
Hugh Dickins 已提交
183

184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200
/*
 * We use this to track usage of a cluster. A cluster is a block of swap disk
 * space with SWAPFILE_CLUSTER pages long and naturally aligns in disk. All
 * free clusters are organized into a list. We fetch an entry from the list to
 * get a free cluster.
 *
 * The data field stores next cluster if the cluster is free or cluster usage
 * counter otherwise. The flags field determines if a cluster is free. This is
 * protected by swap_info_struct.lock.
 */
struct swap_cluster_info {
	unsigned int data:24;
	unsigned int flags:8;
};
#define CLUSTER_FLAG_FREE 1 /* This cluster is free */
#define CLUSTER_FLAG_NEXT_NULL 2 /* This cluster has no next cluster */

L
Linus Torvalds 已提交
201 202 203 204
/*
 * The in-memory structure used to track swap areas.
 */
struct swap_info_struct {
205 206 207 208
	unsigned long	flags;		/* SWP_USED etc: see above */
	signed short	prio;		/* swap priority of this type */
	signed char	type;		/* strange name for an index */
	signed char	next;		/* next type on the swap list */
H
Hugh Dickins 已提交
209 210
	unsigned int	max;		/* extent of the swap_map */
	unsigned char *swap_map;	/* vmalloc'ed array of usage counts */
211 212 213
	struct swap_cluster_info *cluster_info; /* cluster info. Only for SSD */
	struct swap_cluster_info free_cluster_head; /* free cluster list head */
	struct swap_cluster_info free_cluster_tail; /* free cluster list tail */
H
Hugh Dickins 已提交
214 215 216 217 218 219
	unsigned int lowest_bit;	/* index of first free in swap_map */
	unsigned int highest_bit;	/* index of last free in swap_map */
	unsigned int pages;		/* total of usable pages of swap */
	unsigned int inuse_pages;	/* number of those currently in use */
	unsigned int cluster_next;	/* likely index for next allocation */
	unsigned int cluster_nr;	/* countdown to next cluster search */
220 221
	unsigned int lowest_alloc;	/* while preparing discard cluster */
	unsigned int highest_alloc;	/* while preparing discard cluster */
H
Hugh Dickins 已提交
222 223 224 225 226
	struct swap_extent *curr_swap_extent;
	struct swap_extent first_swap_extent;
	struct block_device *bdev;	/* swap device or bdev of swap file */
	struct file *swap_file;		/* seldom referenced */
	unsigned int old_block_size;	/* seldom referenced */
227 228 229 230
#ifdef CONFIG_FRONTSWAP
	unsigned long *frontswap_map;	/* frontswap in-use, one bit per page */
	atomic_t frontswap_pages;	/* frontswap pages in-use counter */
#endif
231 232 233 234 235 236 237 238 239 240 241 242
	spinlock_t lock;		/*
					 * protect map scan related fields like
					 * swap_map, lowest_bit, highest_bit,
					 * inuse_pages, cluster_next,
					 * cluster_nr, lowest_alloc and
					 * highest_alloc. other fields are only
					 * changed at swapon/swapoff, so are
					 * protected by swap_lock. changing
					 * flags need hold this lock and
					 * swap_lock. If both locks need hold,
					 * hold swap_lock first.
					 */
L
Linus Torvalds 已提交
243 244 245 246 247 248 249 250 251
};

struct swap_list_t {
	int head;	/* head of priority-ordered swapfile list */
	int next;	/* swapfile to be used next */
};

/* linux/mm/page_alloc.c */
extern unsigned long totalram_pages;
252
extern unsigned long totalreserve_pages;
253
extern unsigned long dirty_balance_reserve;
254 255
extern unsigned long nr_free_buffer_pages(void);
extern unsigned long nr_free_pagecache_pages(void);
L
Linus Torvalds 已提交
256

C
Christoph Lameter 已提交
257 258 259 260
/* Definition of global_page_state not available yet */
#define nr_free_pages() global_page_state(NR_FREE_PAGES)


L
Linus Torvalds 已提交
261
/* linux/mm/swap.c */
262 263
extern void __lru_cache_add(struct page *);
extern void lru_cache_add(struct page *);
264
extern void lru_add_page_tail(struct page *page, struct page *page_tail,
265
			 struct lruvec *lruvec, struct list_head *head);
266 267
extern void activate_page(struct page *);
extern void mark_page_accessed(struct page *);
L
Linus Torvalds 已提交
268
extern void lru_add_drain(void);
269
extern void lru_add_drain_cpu(int cpu);
270
extern int lru_add_drain_all(void);
271
extern void rotate_reclaimable_page(struct page *page);
M
Minchan Kim 已提交
272
extern void deactivate_page(struct page *page);
L
Linus Torvalds 已提交
273 274
extern void swap_setup(void);

L
Lee Schermerhorn 已提交
275 276
extern void add_page_to_unevictable_list(struct page *page);

277 278 279 280
/**
 * lru_cache_add: add a page to the page lists
 * @page: the page to add
 */
281
static inline void lru_cache_add_anon(struct page *page)
282
{
283 284
	ClearPageActive(page);
	__lru_cache_add(page);
285 286
}

287 288
static inline void lru_cache_add_file(struct page *page)
{
289 290
	ClearPageActive(page);
	__lru_cache_add(page);
291 292
}

L
Linus Torvalds 已提交
293
/* linux/mm/vmscan.c */
294
extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
295
					gfp_t gfp_mask, nodemask_t *mask);
296
extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
297 298 299 300 301 302
extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
						  gfp_t gfp_mask, bool noswap);
extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
						gfp_t gfp_mask, bool noswap,
						struct zone *zone,
						unsigned long *nr_scanned);
303
extern unsigned long shrink_all_memory(unsigned long nr_pages);
L
Linus Torvalds 已提交
304
extern int vm_swappiness;
C
Christoph Lameter 已提交
305
extern int remove_mapping(struct address_space *mapping, struct page *page);
306
extern unsigned long vm_total_pages;
C
Christoph Lameter 已提交
307

308 309
#ifdef CONFIG_NUMA
extern int zone_reclaim_mode;
310
extern int sysctl_min_unmapped_ratio;
311
extern int sysctl_min_slab_ratio;
312 313 314 315 316 317 318 319 320
extern int zone_reclaim(struct zone *, gfp_t, unsigned int);
#else
#define zone_reclaim_mode 0
static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
{
	return 0;
}
#endif

321
extern int page_evictable(struct page *page);
322
extern void check_move_unevictable_pages(struct page **, int nr_pages);
323 324

extern unsigned long scan_unevictable_pages;
325
extern int scan_unevictable_handler(struct ctl_table *, int,
326
					void __user *, size_t *, loff_t *);
327
#ifdef CONFIG_NUMA
328 329
extern int scan_unevictable_register_node(struct node *node);
extern void scan_unevictable_unregister_node(struct node *node);
330 331 332 333 334 335 336 337 338
#else
static inline int scan_unevictable_register_node(struct node *node)
{
	return 0;
}
static inline void scan_unevictable_unregister_node(struct node *node)
{
}
#endif
L
Lee Schermerhorn 已提交
339

340
extern int kswapd_run(int nid);
341
extern void kswapd_stop(int nid);
A
Andrew Morton 已提交
342
#ifdef CONFIG_MEMCG
343 344 345 346 347 348 349
extern int mem_cgroup_swappiness(struct mem_cgroup *mem);
#else
static inline int mem_cgroup_swappiness(struct mem_cgroup *mem)
{
	return vm_swappiness;
}
#endif
A
Andrew Morton 已提交
350
#ifdef CONFIG_MEMCG_SWAP
351 352 353 354 355 356
extern void mem_cgroup_uncharge_swap(swp_entry_t ent);
#else
static inline void mem_cgroup_uncharge_swap(swp_entry_t ent)
{
}
#endif
L
Linus Torvalds 已提交
357 358
#ifdef CONFIG_SWAP
/* linux/mm/page_io.c */
359
extern int swap_readpage(struct page *);
L
Linus Torvalds 已提交
360
extern int swap_writepage(struct page *page, struct writeback_control *wbc);
361 362 363
extern void end_swap_bio_write(struct bio *bio, int err);
extern int __swap_writepage(struct page *page, struct writeback_control *wbc,
	void (*end_write_func)(struct bio *, int));
364
extern int swap_set_page_dirty(struct page *page);
365
extern void end_swap_bio_read(struct bio *bio, int err);
L
Linus Torvalds 已提交
366

367 368 369 370 371
int add_swap_extent(struct swap_info_struct *sis, unsigned long start_page,
		unsigned long nr_pages, sector_t start_block);
int generic_swapfile_activate(struct swap_info_struct *, struct file *,
		sector_t *);

L
Linus Torvalds 已提交
372
/* linux/mm/swap_state.c */
373 374 375
extern struct address_space swapper_spaces[];
#define swap_address_space(entry) (&swapper_spaces[swp_type(entry)])
extern unsigned long total_swapcache_pages(void);
L
Linus Torvalds 已提交
376
extern void show_swap_cache_info(void);
377
extern int add_to_swap(struct page *, struct list_head *list);
378
extern int add_to_swap_cache(struct page *, swp_entry_t, gfp_t);
379
extern int __add_to_swap_cache(struct page *page, swp_entry_t entry);
L
Linus Torvalds 已提交
380 381 382 383
extern void __delete_from_swap_cache(struct page *);
extern void delete_from_swap_cache(struct page *);
extern void free_page_and_swap_cache(struct page *);
extern void free_pages_and_swap_cache(struct page **, int);
384
extern struct page *lookup_swap_cache(swp_entry_t);
385
extern struct page *read_swap_cache_async(swp_entry_t, gfp_t,
386
			struct vm_area_struct *vma, unsigned long addr);
387
extern struct page *swapin_readahead(swp_entry_t, gfp_t,
388 389
			struct vm_area_struct *vma, unsigned long addr);

L
Linus Torvalds 已提交
390
/* linux/mm/swapfile.c */
391
extern atomic_long_t nr_swap_pages;
L
Linus Torvalds 已提交
392
extern long total_swap_pages;
393 394 395 396 397 398 399 400 401 402 403 404

/* Swap 50% full? Release swapcache more aggressively.. */
static inline bool vm_swap_full(void)
{
	return atomic_long_read(&nr_swap_pages) * 2 < total_swap_pages;
}

static inline long get_nr_swap_pages(void)
{
	return atomic_long_read(&nr_swap_pages);
}

L
Linus Torvalds 已提交
405 406
extern void si_swapinfo(struct sysinfo *);
extern swp_entry_t get_swap_page(void);
407
extern swp_entry_t get_swap_page_of_type(int);
H
Hugh Dickins 已提交
408
extern int add_swap_count_continuation(swp_entry_t, gfp_t);
H
Hugh Dickins 已提交
409
extern void swap_shmem_alloc(swp_entry_t);
H
Hugh Dickins 已提交
410 411
extern int swap_duplicate(swp_entry_t);
extern int swapcache_prepare(swp_entry_t);
L
Linus Torvalds 已提交
412
extern void swap_free(swp_entry_t);
413
extern void swapcache_free(swp_entry_t, struct page *page);
414
extern int free_swap_and_cache(swp_entry_t);
415
extern int swap_type_of(dev_t, sector_t, struct block_device **);
416
extern unsigned int count_swap_pages(int, int);
417
extern sector_t map_swap_page(struct page *, struct block_device **);
418
extern sector_t swapdev_block(int, pgoff_t);
419
extern int page_swapcount(struct page *);
420
extern struct swap_info_struct *page_swap_info(struct page *);
421
extern int reuse_swap_page(struct page *);
422
extern int try_to_free_swap(struct page *);
L
Linus Torvalds 已提交
423 424
struct backing_dev_info;

A
Andrew Morton 已提交
425
#ifdef CONFIG_MEMCG
K
KAMEZAWA Hiroyuki 已提交
426 427
extern void
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout);
K
KAMEZAWA Hiroyuki 已提交
428
#else
429
static inline void
K
KAMEZAWA Hiroyuki 已提交
430
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout)
431 432 433
{
}
#endif
K
KAMEZAWA Hiroyuki 已提交
434

L
Linus Torvalds 已提交
435 436
#else /* CONFIG_SWAP */

437
#define get_nr_swap_pages()			0L
438
#define total_swap_pages			0L
439
#define total_swapcache_pages()			0UL
440
#define vm_swap_full()				0
L
Linus Torvalds 已提交
441 442 443

#define si_swapinfo(val) \
	do { (val)->freeswap = (val)->totalswap = 0; } while (0)
444 445
/* only sparc can not include linux/pagemap.h in this file
 * so leave page_cache_release and release_pages undeclared... */
L
Linus Torvalds 已提交
446 447 448 449 450
#define free_page_and_swap_cache(page) \
	page_cache_release(page)
#define free_pages_and_swap_cache(pages, nr) \
	release_pages((pages), (nr), 0);

451 452 453 454
static inline void show_swap_cache_info(void)
{
}

455
#define free_swap_and_cache(swp)	is_migration_entry(swp)
456
#define swapcache_prepare(swp)		is_migration_entry(swp)
457

H
Hugh Dickins 已提交
458
static inline int add_swap_count_continuation(swp_entry_t swp, gfp_t gfp_mask)
459
{
H
Hugh Dickins 已提交
460 461 462
	return 0;
}

H
Hugh Dickins 已提交
463 464 465 466
static inline void swap_shmem_alloc(swp_entry_t swp)
{
}

H
Hugh Dickins 已提交
467 468 469
static inline int swap_duplicate(swp_entry_t swp)
{
	return 0;
470 471
}

472 473 474 475
static inline void swap_free(swp_entry_t swp)
{
}

476 477 478 479
static inline void swapcache_free(swp_entry_t swp, struct page *page)
{
}

480
static inline struct page *swapin_readahead(swp_entry_t swp, gfp_t gfp_mask,
481 482 483 484 485
			struct vm_area_struct *vma, unsigned long addr)
{
	return NULL;
}

H
Hugh Dickins 已提交
486 487 488 489 490
static inline int swap_writepage(struct page *p, struct writeback_control *wbc)
{
	return 0;
}

491 492 493 494 495
static inline struct page *lookup_swap_cache(swp_entry_t swp)
{
	return NULL;
}

496
static inline int add_to_swap(struct page *page, struct list_head *list)
H
Hugh Dickins 已提交
497 498 499 500
{
	return 0;
}

501 502
static inline int add_to_swap_cache(struct page *page, swp_entry_t entry,
							gfp_t gfp_mask)
503
{
504
	return -1;
505 506 507 508 509 510 511 512 513 514
}

static inline void __delete_from_swap_cache(struct page *page)
{
}

static inline void delete_from_swap_cache(struct page *page)
{
}

515 516 517 518 519
static inline int page_swapcount(struct page *page)
{
	return 0;
}

520
#define reuse_swap_page(page)	(page_mapcount(page) == 1)
L
Linus Torvalds 已提交
521

522
static inline int try_to_free_swap(struct page *page)
523 524 525 526
{
	return 0;
}

L
Linus Torvalds 已提交
527 528 529 530 531 532 533
static inline swp_entry_t get_swap_page(void)
{
	swp_entry_t entry;
	entry.val = 0;
	return entry;
}

534 535 536 537 538
static inline void
mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent)
{
}

L
Linus Torvalds 已提交
539 540 541
#endif /* CONFIG_SWAP */
#endif /* __KERNEL__*/
#endif /* _LINUX_SWAP_H */