zram_drv.c 33.0 KB
Newer Older
1
/*
2
 * Compressed RAM block device
3
 *
4
 * Copyright (C) 2008, 2009, 2010  Nitin Gupta
M
Minchan Kim 已提交
5
 *               2012, 2013 Minchan Kim
6 7 8 9 10 11 12 13 14
 *
 * This code is released using a dual license strategy: BSD/GPL
 * You can choose the licence that better fits your requirements.
 *
 * Released under the terms of 3-clause BSD License
 * Released under the terms of GNU General Public License Version 2.0
 *
 */

15
#define KMSG_COMPONENT "zram"
16 17 18 19
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

#include <linux/module.h>
#include <linux/kernel.h>
20
#include <linux/bio.h>
21 22 23 24 25 26
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
27
#include <linux/slab.h>
28
#include <linux/backing-dev.h>
29 30
#include <linux/string.h>
#include <linux/vmalloc.h>
31
#include <linux/err.h>
32
#include <linux/idr.h>
33
#include <linux/sysfs.h>
34
#include <linux/cpuhotplug.h>
35

36
#include "zram_drv.h"
37

38
static DEFINE_IDR(zram_index_idr);
39 40 41
/* idr index must be protected */
static DEFINE_MUTEX(zram_index_mutex);

42
static int zram_major;
43
static const char *default_compressor = "lzo";
44 45

/* Module params (documentation at end) */
46
static unsigned int num_devices = 1;
47

M
Minchan Kim 已提交
48 49
static void zram_free_page(struct zram *zram, size_t index);

50
static inline bool init_done(struct zram *zram)
51
{
52
	return zram->disksize;
53 54
}

55 56 57 58 59
static inline struct zram *dev_to_zram(struct device *dev)
{
	return (struct zram *)dev_to_disk(dev)->private_data;
}

M
Minchan Kim 已提交
60 61 62 63 64 65 66 67 68 69
static unsigned long zram_get_handle(struct zram *zram, u32 index)
{
	return zram->table[index].handle;
}

static void zram_set_handle(struct zram *zram, u32 index, unsigned long handle)
{
	zram->table[index].handle = handle;
}

70
/* flag operations require table entry bit_spin_lock() being held */
M
Minchan Kim 已提交
71
static int zram_test_flag(struct zram *zram, u32 index,
72
			enum zram_pageflags flag)
73
{
M
Minchan Kim 已提交
74
	return zram->table[index].value & BIT(flag);
75
}
76

M
Minchan Kim 已提交
77
static void zram_set_flag(struct zram *zram, u32 index,
78 79
			enum zram_pageflags flag)
{
M
Minchan Kim 已提交
80
	zram->table[index].value |= BIT(flag);
81
}
82

M
Minchan Kim 已提交
83
static void zram_clear_flag(struct zram *zram, u32 index,
84 85
			enum zram_pageflags flag)
{
M
Minchan Kim 已提交
86
	zram->table[index].value &= ~BIT(flag);
87
}
88

M
Minchan Kim 已提交
89
static inline void zram_set_element(struct zram *zram, u32 index,
90 91
			unsigned long element)
{
M
Minchan Kim 已提交
92
	zram->table[index].element = element;
93 94
}

M
Minchan Kim 已提交
95
static unsigned long zram_get_element(struct zram *zram, u32 index)
96
{
M
Minchan Kim 已提交
97
	return zram->table[index].element;
98 99
}

M
Minchan Kim 已提交
100
static size_t zram_get_obj_size(struct zram *zram, u32 index)
101
{
M
Minchan Kim 已提交
102
	return zram->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
103 104
}

M
Minchan Kim 已提交
105
static void zram_set_obj_size(struct zram *zram,
106
					u32 index, size_t size)
107
{
M
Minchan Kim 已提交
108
	unsigned long flags = zram->table[index].value >> ZRAM_FLAG_SHIFT;
109

M
Minchan Kim 已提交
110
	zram->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
111 112
}

M
Minchan Kim 已提交
113
#if PAGE_SIZE != 4096
114
static inline bool is_partial_io(struct bio_vec *bvec)
115 116 117
{
	return bvec->bv_len != PAGE_SIZE;
}
M
Minchan Kim 已提交
118 119 120 121 122 123
#else
static inline bool is_partial_io(struct bio_vec *bvec)
{
	return false;
}
#endif
124

125 126 127 128
static void zram_revalidate_disk(struct zram *zram)
{
	revalidate_disk(zram->disk);
	/* revalidate_disk reset the BDI_CAP_STABLE_WRITES so set again */
129
	zram->disk->queue->backing_dev_info->capabilities |=
130 131 132
		BDI_CAP_STABLE_WRITES;
}

133 134 135
/*
 * Check if request is within bounds and aligned on zram logical blocks.
 */
136
static inline bool valid_io_request(struct zram *zram,
137 138 139 140 141 142
		sector_t start, unsigned int size)
{
	u64 end, bound;

	/* unaligned request */
	if (unlikely(start & (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
143
		return false;
144
	if (unlikely(size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
145
		return false;
146 147 148 149 150

	end = start + (size >> SECTOR_SHIFT);
	bound = zram->disksize >> SECTOR_SHIFT;
	/* out of range range */
	if (unlikely(start >= bound || end > bound || start > end))
151
		return false;
152 153

	/* I/O request is valid */
154
	return true;
155 156 157 158
}

static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
{
159
	*index  += (*offset + bvec->bv_len) / PAGE_SIZE;
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}

static inline void update_used_max(struct zram *zram,
					const unsigned long pages)
{
	unsigned long old_max, cur_max;

	old_max = atomic_long_read(&zram->stats.max_used_pages);

	do {
		cur_max = old_max;
		if (pages > cur_max)
			old_max = atomic_long_cmpxchg(
				&zram->stats.max_used_pages, cur_max, pages);
	} while (old_max != cur_max);
}

178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
static inline void zram_fill_page(char *ptr, unsigned long len,
					unsigned long value)
{
	int i;
	unsigned long *page = (unsigned long *)ptr;

	WARN_ON_ONCE(!IS_ALIGNED(len, sizeof(unsigned long)));

	if (likely(value == 0)) {
		memset(ptr, 0, len);
	} else {
		for (i = 0; i < len / sizeof(*page); i++)
			page[i] = value;
	}
}

static bool page_same_filled(void *ptr, unsigned long *element)
195 196 197
{
	unsigned int pos;
	unsigned long *page;
198
	unsigned long val;
199 200

	page = (unsigned long *)ptr;
201
	val = page[0];
202

203 204
	for (pos = 1; pos < PAGE_SIZE / sizeof(*page); pos++) {
		if (val != page[pos])
205
			return false;
206 207
	}

208
	*element = val;
209

210
	return true;
211 212
}

213 214 215
static ssize_t initstate_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
216
	u32 val;
217 218
	struct zram *zram = dev_to_zram(dev);

219 220 221
	down_read(&zram->init_lock);
	val = init_done(zram);
	up_read(&zram->init_lock);
222

223
	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
224 225
}

226 227 228 229 230 231 232 233
static ssize_t disksize_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
}

M
Minchan Kim 已提交
234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
static ssize_t mem_limit_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	u64 limit;
	char *tmp;
	struct zram *zram = dev_to_zram(dev);

	limit = memparse(buf, &tmp);
	if (buf == tmp) /* no chars parsed, invalid input */
		return -EINVAL;

	down_write(&zram->init_lock);
	zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
	up_write(&zram->init_lock);

	return len;
}

M
Minchan Kim 已提交
252 253 254 255 256 257 258 259 260 261 262 263
static ssize_t mem_used_max_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	int err;
	unsigned long val;
	struct zram *zram = dev_to_zram(dev);

	err = kstrtoul(buf, 10, &val);
	if (err || val != 0)
		return -EINVAL;

	down_read(&zram->init_lock);
264
	if (init_done(zram)) {
M
Minchan Kim 已提交
265
		atomic_long_set(&zram->stats.max_used_pages,
M
Minchan Kim 已提交
266
				zs_get_total_pages(zram->mem_pool));
267
	}
M
Minchan Kim 已提交
268 269 270 271 272
	up_read(&zram->init_lock);

	return len;
}

273 274 275 276 277 278 279 280 281
/*
 * We switched to per-cpu streams and this attr is not needed anymore.
 * However, we will keep it around for some time, because:
 * a) we may revert per-cpu streams in the future
 * b) it's visible to user space and we need to follow our 2 years
 *    retirement rule; but we already have a number of 'soon to be
 *    altered' attrs, so max_comp_streams need to wait for the next
 *    layoff cycle.
 */
282 283 284
static ssize_t max_comp_streams_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
285
	return scnprintf(buf, PAGE_SIZE, "%d\n", num_online_cpus());
286 287
}

288 289 290
static ssize_t max_comp_streams_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
291
	return len;
292 293
}

294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310
static ssize_t comp_algorithm_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	size_t sz;
	struct zram *zram = dev_to_zram(dev);

	down_read(&zram->init_lock);
	sz = zcomp_available_show(zram->compressor, buf);
	up_read(&zram->init_lock);

	return sz;
}

static ssize_t comp_algorithm_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	struct zram *zram = dev_to_zram(dev);
311
	char compressor[ARRAY_SIZE(zram->compressor)];
312 313
	size_t sz;

314 315 316 317 318 319 320
	strlcpy(compressor, buf, sizeof(compressor));
	/* ignore trailing newline */
	sz = strlen(compressor);
	if (sz > 0 && compressor[sz - 1] == '\n')
		compressor[sz - 1] = 0x00;

	if (!zcomp_available_algorithm(compressor))
321 322
		return -EINVAL;

323 324 325 326 327 328
	down_write(&zram->init_lock);
	if (init_done(zram)) {
		up_write(&zram->init_lock);
		pr_info("Can't change algorithm for initialized device\n");
		return -EBUSY;
	}
329

330
	strcpy(zram->compressor, compressor);
331 332 333 334
	up_write(&zram->init_lock);
	return len;
}

335 336
static ssize_t compact_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
337
{
338
	struct zram *zram = dev_to_zram(dev);
339

340 341 342 343 344
	down_read(&zram->init_lock);
	if (!init_done(zram)) {
		up_read(&zram->init_lock);
		return -EINVAL;
	}
345

M
Minchan Kim 已提交
346
	zs_compact(zram->mem_pool);
347
	up_read(&zram->init_lock);
348

349
	return len;
350 351
}

352 353
static ssize_t io_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
354
{
355 356
	struct zram *zram = dev_to_zram(dev);
	ssize_t ret;
357

358 359 360 361 362 363 364 365
	down_read(&zram->init_lock);
	ret = scnprintf(buf, PAGE_SIZE,
			"%8llu %8llu %8llu %8llu\n",
			(u64)atomic64_read(&zram->stats.failed_reads),
			(u64)atomic64_read(&zram->stats.failed_writes),
			(u64)atomic64_read(&zram->stats.invalid_io),
			(u64)atomic64_read(&zram->stats.notify_free));
	up_read(&zram->init_lock);
366

367
	return ret;
368 369
}

370 371
static ssize_t mm_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
372
{
373
	struct zram *zram = dev_to_zram(dev);
374
	struct zs_pool_stats pool_stats;
375 376 377
	u64 orig_size, mem_used = 0;
	long max_used;
	ssize_t ret;
378

379 380
	memset(&pool_stats, 0x00, sizeof(struct zs_pool_stats));

381
	down_read(&zram->init_lock);
382
	if (init_done(zram)) {
M
Minchan Kim 已提交
383 384
		mem_used = zs_get_total_pages(zram->mem_pool);
		zs_pool_stats(zram->mem_pool, &pool_stats);
385
	}
386

387 388
	orig_size = atomic64_read(&zram->stats.pages_stored);
	max_used = atomic_long_read(&zram->stats.max_used_pages);
389

390
	ret = scnprintf(buf, PAGE_SIZE,
391
			"%8llu %8llu %8llu %8lu %8ld %8llu %8lu\n",
392 393 394 395 396
			orig_size << PAGE_SHIFT,
			(u64)atomic64_read(&zram->stats.compr_data_size),
			mem_used << PAGE_SHIFT,
			zram->limit_pages << PAGE_SHIFT,
			max_used << PAGE_SHIFT,
397
			(u64)atomic64_read(&zram->stats.same_pages),
398
			pool_stats.pages_compacted);
399
	up_read(&zram->init_lock);
400

401 402 403
	return ret;
}

404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
static ssize_t debug_stat_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int version = 1;
	struct zram *zram = dev_to_zram(dev);
	ssize_t ret;

	down_read(&zram->init_lock);
	ret = scnprintf(buf, PAGE_SIZE,
			"version: %d\n%8llu\n",
			version,
			(u64)atomic64_read(&zram->stats.writestall));
	up_read(&zram->init_lock);

	return ret;
}

421 422
static DEVICE_ATTR_RO(io_stat);
static DEVICE_ATTR_RO(mm_stat);
423
static DEVICE_ATTR_RO(debug_stat);
424

425 426
static void zram_slot_lock(struct zram *zram, u32 index)
{
M
Minchan Kim 已提交
427
	bit_spin_lock(ZRAM_ACCESS, &zram->table[index].value);
428 429 430 431
}

static void zram_slot_unlock(struct zram *zram, u32 index)
{
M
Minchan Kim 已提交
432
	bit_spin_unlock(ZRAM_ACCESS, &zram->table[index].value);
433 434
}

M
Minchan Kim 已提交
435 436 437 438
static bool zram_same_page_read(struct zram *zram, u32 index,
				struct page *page,
				unsigned int offset, unsigned int len)
{
439
	zram_slot_lock(zram, index);
M
Minchan Kim 已提交
440 441
	if (unlikely(!zram_get_handle(zram, index) ||
			zram_test_flag(zram, index, ZRAM_SAME))) {
M
Minchan Kim 已提交
442 443
		void *mem;

444
		zram_slot_unlock(zram, index);
M
Minchan Kim 已提交
445
		mem = kmap_atomic(page);
M
Minchan Kim 已提交
446 447
		zram_fill_page(mem + offset, len,
					zram_get_element(zram, index));
M
Minchan Kim 已提交
448 449 450
		kunmap_atomic(mem);
		return true;
	}
451
	zram_slot_unlock(zram, index);
M
Minchan Kim 已提交
452 453 454 455 456 457 458 459 460 461 462 463 464

	return false;
}

static bool zram_same_page_write(struct zram *zram, u32 index,
					struct page *page)
{
	unsigned long element;
	void *mem = kmap_atomic(page);

	if (page_same_filled(mem, &element)) {
		kunmap_atomic(mem);
		/* Free memory associated with this sector now. */
465
		zram_slot_lock(zram, index);
M
Minchan Kim 已提交
466
		zram_free_page(zram, index);
M
Minchan Kim 已提交
467 468
		zram_set_flag(zram, index, ZRAM_SAME);
		zram_set_element(zram, index, element);
469
		zram_slot_unlock(zram, index);
M
Minchan Kim 已提交
470 471

		atomic64_inc(&zram->stats.same_pages);
472
		atomic64_inc(&zram->stats.pages_stored);
M
Minchan Kim 已提交
473 474 475 476 477 478 479
		return true;
	}
	kunmap_atomic(mem);

	return false;
}

M
Minchan Kim 已提交
480
static void zram_meta_free(struct zram *zram, u64 disksize)
481 482 483
{
	size_t num_pages = disksize >> PAGE_SHIFT;
	size_t index;
484 485

	/* Free all pages that are still in this zram device */
486 487
	for (index = 0; index < num_pages; index++)
		zram_free_page(zram, index);
488

M
Minchan Kim 已提交
489 490
	zs_destroy_pool(zram->mem_pool);
	vfree(zram->table);
491 492
}

M
Minchan Kim 已提交
493
static bool zram_meta_alloc(struct zram *zram, u64 disksize)
494 495 496 497
{
	size_t num_pages;

	num_pages = disksize >> PAGE_SHIFT;
M
Minchan Kim 已提交
498 499 500
	zram->table = vzalloc(num_pages * sizeof(*zram->table));
	if (!zram->table)
		return false;
501

M
Minchan Kim 已提交
502 503 504 505
	zram->mem_pool = zs_create_pool(zram->disk->disk_name);
	if (!zram->mem_pool) {
		vfree(zram->table);
		return false;
506 507
	}

M
Minchan Kim 已提交
508
	return true;
509 510
}

511 512 513 514 515
/*
 * To protect concurrent access to the same index entry,
 * caller should hold this table index entry's bit_spinlock to
 * indicate this index entry is accessing.
 */
516
static void zram_free_page(struct zram *zram, size_t index)
517
{
M
Minchan Kim 已提交
518
	unsigned long handle = zram_get_handle(zram, index);
519

520 521 522 523
	/*
	 * No memory is allocated for same element filled pages.
	 * Simply clear same page flag.
	 */
M
Minchan Kim 已提交
524 525
	if (zram_test_flag(zram, index, ZRAM_SAME)) {
		zram_clear_flag(zram, index, ZRAM_SAME);
M
Minchan Kim 已提交
526
		zram_set_element(zram, index, 0);
527
		atomic64_dec(&zram->stats.same_pages);
528
		atomic64_dec(&zram->stats.pages_stored);
529 530 531
		return;
	}

532 533 534
	if (!handle)
		return;

M
Minchan Kim 已提交
535
	zs_free(zram->mem_pool, handle);
536

M
Minchan Kim 已提交
537
	atomic64_sub(zram_get_obj_size(zram, index),
538
			&zram->stats.compr_data_size);
539
	atomic64_dec(&zram->stats.pages_stored);
540

M
Minchan Kim 已提交
541
	zram_set_handle(zram, index, 0);
M
Minchan Kim 已提交
542
	zram_set_obj_size(zram, index, 0);
543 544
}

M
Minchan Kim 已提交
545
static int zram_decompress_page(struct zram *zram, struct page *page, u32 index)
546
{
M
Minchan Kim 已提交
547
	int ret;
M
Minchan Kim 已提交
548
	unsigned long handle;
549
	unsigned int size;
M
Minchan Kim 已提交
550 551 552 553
	void *src, *dst;

	if (zram_same_page_read(zram, index, page, 0, PAGE_SIZE))
		return 0;
M
Minchan Kim 已提交
554

555
	zram_slot_lock(zram, index);
M
Minchan Kim 已提交
556
	handle = zram_get_handle(zram, index);
M
Minchan Kim 已提交
557
	size = zram_get_obj_size(zram, index);
558

M
Minchan Kim 已提交
559
	src = zs_map_object(zram->mem_pool, handle, ZS_MM_RO);
560
	if (size == PAGE_SIZE) {
M
Minchan Kim 已提交
561 562 563 564
		dst = kmap_atomic(page);
		memcpy(dst, src, PAGE_SIZE);
		kunmap_atomic(dst);
		ret = 0;
565 566 567
	} else {
		struct zcomp_strm *zstrm = zcomp_stream_get(zram->comp);

M
Minchan Kim 已提交
568 569 570
		dst = kmap_atomic(page);
		ret = zcomp_decompress(zstrm, src, size, dst);
		kunmap_atomic(dst);
571 572
		zcomp_stream_put(zram->comp);
	}
M
Minchan Kim 已提交
573
	zs_unmap_object(zram->mem_pool, handle);
574
	zram_slot_unlock(zram, index);
575

576
	/* Should NEVER happen. Return bio error if it does. */
M
Minchan Kim 已提交
577
	if (unlikely(ret))
578
		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
579

M
Minchan Kim 已提交
580
	return ret;
581 582
}

583
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
M
Minchan Kim 已提交
584
				u32 index, int offset)
585 586
{
	int ret;
587 588
	struct page *page;

M
Minchan Kim 已提交
589 590 591 592 593 594
	page = bvec->bv_page;
	if (is_partial_io(bvec)) {
		/* Use a temporary buffer to decompress the page */
		page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
		if (!page)
			return -ENOMEM;
595 596
	}

M
Minchan Kim 已提交
597 598 599
	ret = zram_decompress_page(zram, page, index);
	if (unlikely(ret))
		goto out;
600

M
Minchan Kim 已提交
601 602 603
	if (is_partial_io(bvec)) {
		void *dst = kmap_atomic(bvec->bv_page);
		void *src = kmap_atomic(page);
604

M
Minchan Kim 已提交
605 606 607
		memcpy(dst + bvec->bv_offset, src + offset, bvec->bv_len);
		kunmap_atomic(src);
		kunmap_atomic(dst);
608
	}
M
Minchan Kim 已提交
609
out:
610
	if (is_partial_io(bvec))
M
Minchan Kim 已提交
611
		__free_page(page);
612 613

	return ret;
614 615
}

M
Minchan Kim 已提交
616 617 618
static int zram_compress(struct zram *zram, struct zcomp_strm **zstrm,
			struct page *page,
			unsigned long *out_handle, unsigned int *out_comp_len)
619
{
M
Minchan Kim 已提交
620 621 622 623
	int ret;
	unsigned int comp_len;
	void *src;
	unsigned long alloced_pages;
624
	unsigned long handle = 0;
625

626
compress_again:
M
Minchan Kim 已提交
627 628 629
	src = kmap_atomic(page);
	ret = zcomp_compress(*zstrm, src, &comp_len);
	kunmap_atomic(src);
630

631
	if (unlikely(ret)) {
632
		pr_err("Compression failed! err=%d\n", ret);
M
Minchan Kim 已提交
633
		if (handle)
M
Minchan Kim 已提交
634
			zs_free(zram->mem_pool, handle);
M
Minchan Kim 已提交
635
		return ret;
636
	}
637

M
Minchan Kim 已提交
638 639
	if (unlikely(comp_len > max_zpage_size))
		comp_len = PAGE_SIZE;
640

641 642 643 644 645 646 647 648 649 650 651 652 653 654
	/*
	 * handle allocation has 2 paths:
	 * a) fast path is executed with preemption disabled (for
	 *  per-cpu streams) and has __GFP_DIRECT_RECLAIM bit clear,
	 *  since we can't sleep;
	 * b) slow path enables preemption and attempts to allocate
	 *  the page with __GFP_DIRECT_RECLAIM bit set. we have to
	 *  put per-cpu compression stream and, thus, to re-do
	 *  the compression once handle is allocated.
	 *
	 * if we have a 'non-null' handle here then we are coming
	 * from the slow path and handle has already been allocated.
	 */
	if (!handle)
M
Minchan Kim 已提交
655
		handle = zs_malloc(zram->mem_pool, comp_len,
656 657
				__GFP_KSWAPD_RECLAIM |
				__GFP_NOWARN |
658 659
				__GFP_HIGHMEM |
				__GFP_MOVABLE);
660
	if (!handle) {
661
		zcomp_stream_put(zram->comp);
662
		atomic64_inc(&zram->stats.writestall);
M
Minchan Kim 已提交
663
		handle = zs_malloc(zram->mem_pool, comp_len,
664 665
				GFP_NOIO | __GFP_HIGHMEM |
				__GFP_MOVABLE);
M
Minchan Kim 已提交
666
		*zstrm = zcomp_stream_get(zram->comp);
667 668
		if (handle)
			goto compress_again;
M
Minchan Kim 已提交
669
		return -ENOMEM;
670
	}
M
Minchan Kim 已提交
671

M
Minchan Kim 已提交
672
	alloced_pages = zs_get_total_pages(zram->mem_pool);
673 674
	update_used_max(zram, alloced_pages);

M
Minchan Kim 已提交
675
	if (zram->limit_pages && alloced_pages > zram->limit_pages) {
M
Minchan Kim 已提交
676
		zs_free(zram->mem_pool, handle);
M
Minchan Kim 已提交
677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
		return -ENOMEM;
	}

	*out_handle = handle;
	*out_comp_len = comp_len;
	return 0;
}

static int __zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index)
{
	int ret;
	unsigned long handle;
	unsigned int comp_len;
	void *src, *dst;
	struct zcomp_strm *zstrm;
	struct page *page = bvec->bv_page;

	if (zram_same_page_write(zram, index, page))
		return 0;

	zstrm = zcomp_stream_get(zram->comp);
	ret = zram_compress(zram, &zstrm, page, &handle, &comp_len);
	if (ret) {
		zcomp_stream_put(zram->comp);
		return ret;
M
Minchan Kim 已提交
702 703
	}

M
Minchan Kim 已提交
704
	dst = zs_map_object(zram->mem_pool, handle, ZS_MM_WO);
M
Minchan Kim 已提交
705 706 707

	src = zstrm->buffer;
	if (comp_len == PAGE_SIZE)
708
		src = kmap_atomic(page);
M
Minchan Kim 已提交
709 710
	memcpy(dst, src, comp_len);
	if (comp_len == PAGE_SIZE)
711
		kunmap_atomic(src);
712

713
	zcomp_stream_put(zram->comp);
M
Minchan Kim 已提交
714
	zs_unmap_object(zram->mem_pool, handle);
715

716 717 718 719
	/*
	 * Free memory associated with this sector
	 * before overwriting unused sectors.
	 */
720
	zram_slot_lock(zram, index);
721
	zram_free_page(zram, index);
M
Minchan Kim 已提交
722
	zram_set_handle(zram, index, handle);
M
Minchan Kim 已提交
723
	zram_set_obj_size(zram, index, comp_len);
724
	zram_slot_unlock(zram, index);
725

726
	/* Update stats */
M
Minchan Kim 已提交
727
	atomic64_add(comp_len, &zram->stats.compr_data_size);
728
	atomic64_inc(&zram->stats.pages_stored);
M
Minchan Kim 已提交
729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766
	return 0;
}

static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec,
				u32 index, int offset)
{
	int ret;
	struct page *page = NULL;
	void *src;
	struct bio_vec vec;

	vec = *bvec;
	if (is_partial_io(bvec)) {
		void *dst;
		/*
		 * This is a partial IO. We need to read the full page
		 * before to write the changes.
		 */
		page = alloc_page(GFP_NOIO|__GFP_HIGHMEM);
		if (!page)
			return -ENOMEM;

		ret = zram_decompress_page(zram, page, index);
		if (ret)
			goto out;

		src = kmap_atomic(bvec->bv_page);
		dst = kmap_atomic(page);
		memcpy(dst + offset, src + bvec->bv_offset, bvec->bv_len);
		kunmap_atomic(dst);
		kunmap_atomic(src);

		vec.bv_page = page;
		vec.bv_len = PAGE_SIZE;
		vec.bv_offset = 0;
	}

	ret = __zram_bvec_write(zram, &vec, index);
767
out:
768
	if (is_partial_io(bvec))
M
Minchan Kim 已提交
769
		__free_page(page);
770
	return ret;
771 772
}

J
Joonsoo Kim 已提交
773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
/*
 * zram_bio_discard - handler on discard request
 * @index: physical block index in PAGE_SIZE units
 * @offset: byte offset within physical block
 */
static void zram_bio_discard(struct zram *zram, u32 index,
			     int offset, struct bio *bio)
{
	size_t n = bio->bi_iter.bi_size;

	/*
	 * zram manages data in physical block size units. Because logical block
	 * size isn't identical with physical block size on some arch, we
	 * could get a discard request pointing to a specific offset within a
	 * certain physical block.  Although we can handle this request by
	 * reading that physiclal block and decompressing and partially zeroing
	 * and re-compressing and then re-storing it, this isn't reasonable
	 * because our intent with a discard request is to save memory.  So
	 * skipping this logical block is appropriate here.
	 */
	if (offset) {
794
		if (n <= (PAGE_SIZE - offset))
J
Joonsoo Kim 已提交
795 796
			return;

797
		n -= (PAGE_SIZE - offset);
J
Joonsoo Kim 已提交
798 799 800 801
		index++;
	}

	while (n >= PAGE_SIZE) {
802
		zram_slot_lock(zram, index);
J
Joonsoo Kim 已提交
803
		zram_free_page(zram, index);
804
		zram_slot_unlock(zram, index);
805
		atomic64_inc(&zram->stats.notify_free);
J
Joonsoo Kim 已提交
806 807 808 809 810
		index++;
		n -= PAGE_SIZE;
	}
}

811
static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
812
			int offset, bool is_write)
813
{
814
	unsigned long start_time = jiffies;
815
	int rw_acct = is_write ? REQ_OP_WRITE : REQ_OP_READ;
816 817
	int ret;

818
	generic_start_io_acct(rw_acct, bvec->bv_len >> SECTOR_SHIFT,
819
			&zram->disk->part0);
820

821
	if (!is_write) {
822 823
		atomic64_inc(&zram->stats.num_reads);
		ret = zram_bvec_read(zram, bvec, index, offset);
M
Minchan Kim 已提交
824
		flush_dcache_page(bvec->bv_page);
825 826 827
	} else {
		atomic64_inc(&zram->stats.num_writes);
		ret = zram_bvec_write(zram, bvec, index, offset);
828
	}
829

830
	generic_end_io_acct(rw_acct, &zram->disk->part0, start_time);
831

832
	if (unlikely(ret)) {
833
		if (!is_write)
834 835 836
			atomic64_inc(&zram->stats.failed_reads);
		else
			atomic64_inc(&zram->stats.failed_writes);
837
	}
838

839
	return ret;
840 841
}

842
static void __zram_make_request(struct zram *zram, struct bio *bio)
843
{
844
	int offset;
845
	u32 index;
846 847
	struct bio_vec bvec;
	struct bvec_iter iter;
848

849 850 851
	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
	offset = (bio->bi_iter.bi_sector &
		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
852

853 854 855
	switch (bio_op(bio)) {
	case REQ_OP_DISCARD:
	case REQ_OP_WRITE_ZEROES:
J
Joonsoo Kim 已提交
856
		zram_bio_discard(zram, index, offset, bio);
857
		bio_endio(bio);
J
Joonsoo Kim 已提交
858
		return;
859 860
	default:
		break;
J
Joonsoo Kim 已提交
861 862
	}

863
	bio_for_each_segment(bvec, bio, iter) {
864 865
		struct bio_vec bv = bvec;
		unsigned int unwritten = bvec.bv_len;
866

867 868 869
		do {
			bv.bv_len = min_t(unsigned int, PAGE_SIZE - offset,
							unwritten);
870
			if (zram_bvec_rw(zram, &bv, index, offset,
871
					op_is_write(bio_op(bio))) < 0)
872 873
				goto out;

874 875
			bv.bv_offset += bv.bv_len;
			unwritten -= bv.bv_len;
876

877 878
			update_position(&index, &offset, &bv);
		} while (unwritten);
879
	}
880

881
	bio_endio(bio);
882
	return;
883 884 885 886 887 888

out:
	bio_io_error(bio);
}

/*
889
 * Handler function for all zram I/O requests.
890
 */
891
static blk_qc_t zram_make_request(struct request_queue *queue, struct bio *bio)
892
{
893
	struct zram *zram = queue->queuedata;
894

895 896
	if (!valid_io_request(zram, bio->bi_iter.bi_sector,
					bio->bi_iter.bi_size)) {
897
		atomic64_inc(&zram->stats.invalid_io);
M
Minchan Kim 已提交
898
		goto error;
899 900
	}

901
	__zram_make_request(zram, bio);
902
	return BLK_QC_T_NONE;
M
Minchan Kim 已提交
903

904 905
error:
	bio_io_error(bio);
906
	return BLK_QC_T_NONE;
907 908
}

N
Nitin Gupta 已提交
909 910
static void zram_slot_free_notify(struct block_device *bdev,
				unsigned long index)
911
{
912
	struct zram *zram;
913

914
	zram = bdev->bd_disk->private_data;
915

916
	zram_slot_lock(zram, index);
917
	zram_free_page(zram, index);
918
	zram_slot_unlock(zram, index);
919
	atomic64_inc(&zram->stats.notify_free);
920 921
}

922
static int zram_rw_page(struct block_device *bdev, sector_t sector,
923
		       struct page *page, bool is_write)
924
{
925
	int offset, err = -EIO;
926 927 928 929 930
	u32 index;
	struct zram *zram;
	struct bio_vec bv;

	zram = bdev->bd_disk->private_data;
931

932 933
	if (!valid_io_request(zram, sector, PAGE_SIZE)) {
		atomic64_inc(&zram->stats.invalid_io);
934
		err = -EINVAL;
M
Minchan Kim 已提交
935
		goto out;
936 937 938
	}

	index = sector >> SECTORS_PER_PAGE_SHIFT;
939
	offset = (sector & (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
940 941 942 943 944

	bv.bv_page = page;
	bv.bv_len = PAGE_SIZE;
	bv.bv_offset = 0;

945
	err = zram_bvec_rw(zram, &bv, index, offset, is_write);
946
out:
947 948 949 950 951 952 953 954 955
	/*
	 * If I/O fails, just return error(ie, non-zero) without
	 * calling page_endio.
	 * It causes resubmit the I/O with bio request by upper functions
	 * of rw_page(e.g., swap_readpage, __swap_writepage) and
	 * bio->bi_end_io does things to handle the error
	 * (e.g., SetPageError, set_page_dirty and extra works).
	 */
	if (err == 0)
956
		page_endio(page, is_write, 0);
957 958 959
	return err;
}

960 961 962 963
static void zram_reset_device(struct zram *zram)
{
	struct zcomp *comp;
	u64 disksize;
964

965
	down_write(&zram->init_lock);
966

967 968 969 970 971 972 973 974 975 976 977 978 979 980 981 982
	zram->limit_pages = 0;

	if (!init_done(zram)) {
		up_write(&zram->init_lock);
		return;
	}

	comp = zram->comp;
	disksize = zram->disksize;
	zram->disksize = 0;

	set_capacity(zram->disk, 0);
	part_stat_set_all(&zram->disk->part0, 0);

	up_write(&zram->init_lock);
	/* I/O operation under all of CPU are done so let's free */
M
Minchan Kim 已提交
983
	zram_meta_free(zram, disksize);
984
	memset(&zram->stats, 0, sizeof(zram->stats));
985 986 987 988 989
	zcomp_destroy(comp);
}

static ssize_t disksize_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
990
{
991 992
	u64 disksize;
	struct zcomp *comp;
993
	struct zram *zram = dev_to_zram(dev);
994
	int err;
995

996 997 998
	disksize = memparse(buf, NULL);
	if (!disksize)
		return -EINVAL;
999

M
Minchan Kim 已提交
1000 1001 1002 1003 1004 1005 1006
	down_write(&zram->init_lock);
	if (init_done(zram)) {
		pr_info("Cannot change disksize for initialized device\n");
		err = -EBUSY;
		goto out_unlock;
	}

1007
	disksize = PAGE_ALIGN(disksize);
M
Minchan Kim 已提交
1008 1009 1010 1011
	if (!zram_meta_alloc(zram, disksize)) {
		err = -ENOMEM;
		goto out_unlock;
	}
1012

1013
	comp = zcomp_create(zram->compressor);
1014
	if (IS_ERR(comp)) {
S
Sergey Senozhatsky 已提交
1015
		pr_err("Cannot initialise %s compressing backend\n",
1016 1017 1018 1019 1020 1021 1022 1023
				zram->compressor);
		err = PTR_ERR(comp);
		goto out_free_meta;
	}

	zram->comp = comp;
	zram->disksize = disksize;
	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
1024
	zram_revalidate_disk(zram);
1025
	up_write(&zram->init_lock);
1026 1027 1028 1029

	return len;

out_free_meta:
M
Minchan Kim 已提交
1030 1031 1032
	zram_meta_free(zram, disksize);
out_unlock:
	up_write(&zram->init_lock);
1033
	return err;
1034 1035
}

1036 1037
static ssize_t reset_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
1038
{
1039 1040 1041 1042
	int ret;
	unsigned short do_reset;
	struct zram *zram;
	struct block_device *bdev;
1043

1044 1045 1046 1047 1048 1049 1050
	ret = kstrtou16(buf, 10, &do_reset);
	if (ret)
		return ret;

	if (!do_reset)
		return -EINVAL;

1051 1052 1053 1054
	zram = dev_to_zram(dev);
	bdev = bdget_disk(zram->disk, 0);
	if (!bdev)
		return -ENOMEM;
1055

1056
	mutex_lock(&bdev->bd_mutex);
1057 1058 1059 1060 1061
	/* Do not reset an active device or claimed device */
	if (bdev->bd_openers || zram->claim) {
		mutex_unlock(&bdev->bd_mutex);
		bdput(bdev);
		return -EBUSY;
1062 1063
	}

1064 1065 1066
	/* From now on, anyone can't open /dev/zram[0-9] */
	zram->claim = true;
	mutex_unlock(&bdev->bd_mutex);
1067

1068
	/* Make sure all the pending I/O are finished */
1069 1070
	fsync_bdev(bdev);
	zram_reset_device(zram);
1071
	zram_revalidate_disk(zram);
1072 1073
	bdput(bdev);

1074 1075 1076 1077
	mutex_lock(&bdev->bd_mutex);
	zram->claim = false;
	mutex_unlock(&bdev->bd_mutex);

1078
	return len;
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091
}

static int zram_open(struct block_device *bdev, fmode_t mode)
{
	int ret = 0;
	struct zram *zram;

	WARN_ON(!mutex_is_locked(&bdev->bd_mutex));

	zram = bdev->bd_disk->private_data;
	/* zram was claimed to reset so open request fails */
	if (zram->claim)
		ret = -EBUSY;
1092 1093 1094 1095

	return ret;
}

1096
static const struct block_device_operations zram_devops = {
1097
	.open = zram_open,
1098 1099 1100 1101 1102 1103 1104 1105 1106
	.swap_slot_free_notify = zram_slot_free_notify,
	.rw_page = zram_rw_page,
	.owner = THIS_MODULE
};

static DEVICE_ATTR_WO(compact);
static DEVICE_ATTR_RW(disksize);
static DEVICE_ATTR_RO(initstate);
static DEVICE_ATTR_WO(reset);
1107 1108
static DEVICE_ATTR_WO(mem_limit);
static DEVICE_ATTR_WO(mem_used_max);
1109 1110
static DEVICE_ATTR_RW(max_comp_streams);
static DEVICE_ATTR_RW(comp_algorithm);
1111

1112 1113 1114 1115
static struct attribute *zram_disk_attrs[] = {
	&dev_attr_disksize.attr,
	&dev_attr_initstate.attr,
	&dev_attr_reset.attr,
1116
	&dev_attr_compact.attr,
M
Minchan Kim 已提交
1117
	&dev_attr_mem_limit.attr,
M
Minchan Kim 已提交
1118
	&dev_attr_mem_used_max.attr,
1119
	&dev_attr_max_comp_streams.attr,
1120
	&dev_attr_comp_algorithm.attr,
1121
	&dev_attr_io_stat.attr,
1122
	&dev_attr_mm_stat.attr,
1123
	&dev_attr_debug_stat.attr,
1124 1125 1126
	NULL,
};

1127
static const struct attribute_group zram_disk_attr_group = {
1128 1129 1130
	.attrs = zram_disk_attrs,
};

1131 1132 1133 1134 1135
/*
 * Allocate and initialize new zram device. the function returns
 * '>= 0' device_id upon success, and negative value otherwise.
 */
static int zram_add(void)
1136
{
1137
	struct zram *zram;
1138
	struct request_queue *queue;
1139
	int ret, device_id;
1140 1141 1142 1143 1144

	zram = kzalloc(sizeof(struct zram), GFP_KERNEL);
	if (!zram)
		return -ENOMEM;

1145
	ret = idr_alloc(&zram_index_idr, zram, 0, 0, GFP_KERNEL);
1146 1147
	if (ret < 0)
		goto out_free_dev;
1148
	device_id = ret;
1149

1150
	init_rwsem(&zram->init_lock);
1151

1152 1153
	queue = blk_alloc_queue(GFP_KERNEL);
	if (!queue) {
1154 1155
		pr_err("Error allocating disk queue for device %d\n",
			device_id);
1156 1157
		ret = -ENOMEM;
		goto out_free_idr;
1158 1159
	}

1160
	blk_queue_make_request(queue, zram_make_request);
1161

1162
	/* gendisk structure */
1163 1164
	zram->disk = alloc_disk(1);
	if (!zram->disk) {
S
Sergey Senozhatsky 已提交
1165
		pr_err("Error allocating disk structure for device %d\n",
1166
			device_id);
J
Julia Lawall 已提交
1167
		ret = -ENOMEM;
1168
		goto out_free_queue;
1169 1170
	}

1171 1172 1173
	zram->disk->major = zram_major;
	zram->disk->first_minor = device_id;
	zram->disk->fops = &zram_devops;
1174 1175
	zram->disk->queue = queue;
	zram->disk->queue->queuedata = zram;
1176 1177
	zram->disk->private_data = zram;
	snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1178

1179
	/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1180
	set_capacity(zram->disk, 0);
1181 1182
	/* zram devices sort of resembles non-rotational disks */
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
1183
	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1184 1185 1186 1187
	/*
	 * To ensure that we always get PAGE_SIZE aligned
	 * and n*PAGE_SIZED sized I/O requests.
	 */
1188
	blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1189 1190
	blk_queue_logical_block_size(zram->disk->queue,
					ZRAM_LOGICAL_BLOCK_SIZE);
1191 1192
	blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
	blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
J
Joonsoo Kim 已提交
1193
	zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
1194
	blk_queue_max_discard_sectors(zram->disk->queue, UINT_MAX);
1195 1196
	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);

J
Joonsoo Kim 已提交
1197 1198 1199 1200 1201 1202 1203 1204 1205
	/*
	 * zram_bio_discard() will clear all logical blocks if logical block
	 * size is identical with physical block size(PAGE_SIZE). But if it is
	 * different, we will skip discarding some parts of logical blocks in
	 * the part of the request range which isn't aligned to physical block
	 * size.  So we can't ensure that all discarded logical blocks are
	 * zeroed.
	 */
	if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
1206
		blk_queue_max_write_zeroes_sectors(zram->disk->queue, UINT_MAX);
1207

1208
	add_disk(zram->disk);
1209

1210 1211 1212
	ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
				&zram_disk_attr_group);
	if (ret < 0) {
S
Sergey Senozhatsky 已提交
1213 1214
		pr_err("Error creating sysfs group for device %d\n",
				device_id);
1215
		goto out_free_disk;
1216
	}
1217
	strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1218 1219

	pr_info("Added device: %s\n", zram->disk->disk_name);
1220
	return device_id;
1221

1222 1223 1224 1225
out_free_disk:
	del_gendisk(zram->disk);
	put_disk(zram->disk);
out_free_queue:
1226
	blk_cleanup_queue(queue);
1227 1228 1229 1230
out_free_idr:
	idr_remove(&zram_index_idr, device_id);
out_free_dev:
	kfree(zram);
1231
	return ret;
1232 1233
}

1234
static int zram_remove(struct zram *zram)
1235
{
1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251
	struct block_device *bdev;

	bdev = bdget_disk(zram->disk, 0);
	if (!bdev)
		return -ENOMEM;

	mutex_lock(&bdev->bd_mutex);
	if (bdev->bd_openers || zram->claim) {
		mutex_unlock(&bdev->bd_mutex);
		bdput(bdev);
		return -EBUSY;
	}

	zram->claim = true;
	mutex_unlock(&bdev->bd_mutex);

1252 1253
	/*
	 * Remove sysfs first, so no one will perform a disksize
1254 1255 1256 1257
	 * store while we destroy the devices. This also helps during
	 * hot_remove -- zram_reset_device() is the last holder of
	 * ->init_lock, no later/concurrent disksize_store() or any
	 * other sysfs handlers are possible.
1258 1259 1260
	 */
	sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
			&zram_disk_attr_group);
1261

1262 1263
	/* Make sure all the pending I/O are finished */
	fsync_bdev(bdev);
1264
	zram_reset_device(zram);
1265 1266 1267 1268
	bdput(bdev);

	pr_info("Removed device: %s\n", zram->disk->disk_name);

1269 1270 1271 1272
	blk_cleanup_queue(zram->disk->queue);
	del_gendisk(zram->disk);
	put_disk(zram->disk);
	kfree(zram);
1273 1274 1275 1276
	return 0;
}

/* zram-control sysfs attributes */
1277 1278 1279 1280 1281 1282 1283

/*
 * NOTE: hot_add attribute is not the usual read-only sysfs attribute. In a
 * sense that reading from this file does alter the state of your system -- it
 * creates a new un-initialized zram device and returns back this device's
 * device_id (or an error code if it fails to create a new device).
 */
1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297
static ssize_t hot_add_show(struct class *class,
			struct class_attribute *attr,
			char *buf)
{
	int ret;

	mutex_lock(&zram_index_mutex);
	ret = zram_add();
	mutex_unlock(&zram_index_mutex);

	if (ret < 0)
		return ret;
	return scnprintf(buf, PAGE_SIZE, "%d\n", ret);
}
1298
static CLASS_ATTR_RO(hot_add);
1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317

static ssize_t hot_remove_store(struct class *class,
			struct class_attribute *attr,
			const char *buf,
			size_t count)
{
	struct zram *zram;
	int ret, dev_id;

	/* dev_id is gendisk->first_minor, which is `int' */
	ret = kstrtoint(buf, 10, &dev_id);
	if (ret)
		return ret;
	if (dev_id < 0)
		return -EINVAL;

	mutex_lock(&zram_index_mutex);

	zram = idr_find(&zram_index_idr, dev_id);
1318
	if (zram) {
1319
		ret = zram_remove(zram);
1320 1321
		if (!ret)
			idr_remove(&zram_index_idr, dev_id);
1322
	} else {
1323
		ret = -ENODEV;
1324
	}
1325 1326 1327

	mutex_unlock(&zram_index_mutex);
	return ret ? ret : count;
1328
}
1329
static CLASS_ATTR_WO(hot_remove);
1330

1331 1332 1333 1334
static struct attribute *zram_control_class_attrs[] = {
	&class_attr_hot_add.attr,
	&class_attr_hot_remove.attr,
	NULL,
1335
};
1336
ATTRIBUTE_GROUPS(zram_control_class);
1337 1338 1339 1340

static struct class zram_control_class = {
	.name		= "zram-control",
	.owner		= THIS_MODULE,
1341
	.class_groups	= zram_control_class_groups,
1342 1343
};

1344 1345 1346 1347 1348
static int zram_remove_cb(int id, void *ptr, void *data)
{
	zram_remove(ptr);
	return 0;
}
1349

1350 1351
static void destroy_devices(void)
{
1352
	class_unregister(&zram_control_class);
1353 1354
	idr_for_each(&zram_index_idr, &zram_remove_cb, NULL);
	idr_destroy(&zram_index_idr);
1355
	unregister_blkdev(zram_major, "zram");
1356
	cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1357 1358
}

1359
static int __init zram_init(void)
1360
{
1361
	int ret;
1362

1363 1364 1365 1366 1367
	ret = cpuhp_setup_state_multi(CPUHP_ZCOMP_PREPARE, "block/zram:prepare",
				      zcomp_cpu_up_prepare, zcomp_cpu_dead);
	if (ret < 0)
		return ret;

1368 1369
	ret = class_register(&zram_control_class);
	if (ret) {
S
Sergey Senozhatsky 已提交
1370
		pr_err("Unable to register zram-control class\n");
1371
		cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1372 1373 1374
		return ret;
	}

1375 1376
	zram_major = register_blkdev(0, "zram");
	if (zram_major <= 0) {
S
Sergey Senozhatsky 已提交
1377
		pr_err("Unable to get major number\n");
1378
		class_unregister(&zram_control_class);
1379
		cpuhp_remove_multi_state(CPUHP_ZCOMP_PREPARE);
1380
		return -EBUSY;
1381 1382
	}

1383
	while (num_devices != 0) {
1384
		mutex_lock(&zram_index_mutex);
1385
		ret = zram_add();
1386
		mutex_unlock(&zram_index_mutex);
1387
		if (ret < 0)
1388
			goto out_error;
1389
		num_devices--;
1390 1391
	}

1392
	return 0;
1393

1394
out_error:
1395
	destroy_devices();
1396 1397 1398
	return ret;
}

1399
static void __exit zram_exit(void)
1400
{
1401
	destroy_devices();
1402 1403
}

1404 1405
module_init(zram_init);
module_exit(zram_exit);
1406

1407
module_param(num_devices, uint, 0);
1408
MODULE_PARM_DESC(num_devices, "Number of pre-created zram devices");
1409

1410 1411
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1412
MODULE_DESCRIPTION("Compressed RAM Block Device");