zram_drv.c 27.4 KB
Newer Older
1
/*
2
 * Compressed RAM block device
3
 *
4
 * Copyright (C) 2008, 2009, 2010  Nitin Gupta
M
Minchan Kim 已提交
5
 *               2012, 2013 Minchan Kim
6 7 8 9 10 11 12 13 14
 *
 * This code is released using a dual license strategy: BSD/GPL
 * You can choose the licence that better fits your requirements.
 *
 * Released under the terms of 3-clause BSD License
 * Released under the terms of GNU General Public License Version 2.0
 *
 */

15
#define KMSG_COMPONENT "zram"
16 17
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

18 19 20 21
#ifdef CONFIG_ZRAM_DEBUG
#define DEBUG
#endif

22 23
#include <linux/module.h>
#include <linux/kernel.h>
24
#include <linux/bio.h>
25 26 27 28 29 30
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
31
#include <linux/slab.h>
32 33
#include <linux/string.h>
#include <linux/vmalloc.h>
34
#include <linux/err.h>
35

36
#include "zram_drv.h"
37 38

/* Globals */
39
static int zram_major;
40
static struct zram *zram_devices;
41
static const char *default_compressor = "lzo";
42 43

/* Module params (documentation at end) */
44
static unsigned int num_devices = 1;
45

46 47 48 49 50
#define ZRAM_ATTR_RO(name)						\
static ssize_t zram_attr_##name##_show(struct device *d,		\
				struct device_attribute *attr, char *b)	\
{									\
	struct zram *zram = dev_to_zram(d);				\
51
	return scnprintf(b, PAGE_SIZE, "%llu\n",			\
52 53 54 55 56
		(u64)atomic64_read(&zram->stats.name));			\
}									\
static struct device_attribute dev_attr_##name =			\
	__ATTR(name, S_IRUGO, zram_attr_##name##_show, NULL);

57 58 59 60 61
static inline int init_done(struct zram *zram)
{
	return zram->meta != NULL;
}

62 63 64 65 66 67 68 69 70 71
static inline struct zram *dev_to_zram(struct device *dev)
{
	return (struct zram *)dev_to_disk(dev)->private_data;
}

static ssize_t disksize_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

72
	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
73 74 75 76 77
}

static ssize_t initstate_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
78
	u32 val;
79 80
	struct zram *zram = dev_to_zram(dev);

81 82 83
	down_read(&zram->init_lock);
	val = init_done(zram);
	up_read(&zram->init_lock);
84

85
	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
86 87 88 89 90 91 92
}

static ssize_t orig_data_size_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

93
	return scnprintf(buf, PAGE_SIZE, "%llu\n",
94
		(u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
95 96 97 98 99 100 101 102 103
}

static ssize_t mem_used_total_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val = 0;
	struct zram *zram = dev_to_zram(dev);

	down_read(&zram->init_lock);
104 105
	if (init_done(zram)) {
		struct zram_meta *meta = zram->meta;
106
		val = zs_get_total_pages(meta->mem_pool);
107
	}
108 109
	up_read(&zram->init_lock);

110
	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
111 112
}

113 114 115 116 117 118 119 120 121 122
static ssize_t max_comp_streams_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int val;
	struct zram *zram = dev_to_zram(dev);

	down_read(&zram->init_lock);
	val = zram->max_comp_streams;
	up_read(&zram->init_lock);

123
	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
124 125
}

M
Minchan Kim 已提交
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
static ssize_t mem_limit_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val;
	struct zram *zram = dev_to_zram(dev);

	down_read(&zram->init_lock);
	val = zram->limit_pages;
	up_read(&zram->init_lock);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
}

static ssize_t mem_limit_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	u64 limit;
	char *tmp;
	struct zram *zram = dev_to_zram(dev);

	limit = memparse(buf, &tmp);
	if (buf == tmp) /* no chars parsed, invalid input */
		return -EINVAL;

	down_write(&zram->init_lock);
	zram->limit_pages = PAGE_ALIGN(limit) >> PAGE_SHIFT;
	up_write(&zram->init_lock);

	return len;
}

M
Minchan Kim 已提交
157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182
static ssize_t mem_used_max_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val = 0;
	struct zram *zram = dev_to_zram(dev);

	down_read(&zram->init_lock);
	if (init_done(zram))
		val = atomic_long_read(&zram->stats.max_used_pages);
	up_read(&zram->init_lock);

	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
}

static ssize_t mem_used_max_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	int err;
	unsigned long val;
	struct zram *zram = dev_to_zram(dev);

	err = kstrtoul(buf, 10, &val);
	if (err || val != 0)
		return -EINVAL;

	down_read(&zram->init_lock);
183 184
	if (init_done(zram)) {
		struct zram_meta *meta = zram->meta;
M
Minchan Kim 已提交
185 186
		atomic_long_set(&zram->stats.max_used_pages,
				zs_get_total_pages(meta->mem_pool));
187
	}
M
Minchan Kim 已提交
188 189 190 191 192
	up_read(&zram->init_lock);

	return len;
}

193 194 195 196 197
static ssize_t max_comp_streams_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	int num;
	struct zram *zram = dev_to_zram(dev);
M
Minchan Kim 已提交
198
	int ret;
199

M
Minchan Kim 已提交
200 201 202
	ret = kstrtoint(buf, 0, &num);
	if (ret < 0)
		return ret;
203 204
	if (num < 1)
		return -EINVAL;
M
Minchan Kim 已提交
205

206 207
	down_write(&zram->init_lock);
	if (init_done(zram)) {
M
Minchan Kim 已提交
208
		if (!zcomp_set_max_streams(zram->comp, num)) {
209
			pr_info("Cannot change max compression streams\n");
M
Minchan Kim 已提交
210 211 212
			ret = -EINVAL;
			goto out;
		}
213
	}
M
Minchan Kim 已提交
214

215
	zram->max_comp_streams = num;
M
Minchan Kim 已提交
216 217
	ret = len;
out:
218
	up_write(&zram->init_lock);
M
Minchan Kim 已提交
219
	return ret;
220 221
}

222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249
static ssize_t comp_algorithm_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	size_t sz;
	struct zram *zram = dev_to_zram(dev);

	down_read(&zram->init_lock);
	sz = zcomp_available_show(zram->compressor, buf);
	up_read(&zram->init_lock);

	return sz;
}

static ssize_t comp_algorithm_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	struct zram *zram = dev_to_zram(dev);
	down_write(&zram->init_lock);
	if (init_done(zram)) {
		up_write(&zram->init_lock);
		pr_info("Can't change algorithm for initialized device\n");
		return -EBUSY;
	}
	strlcpy(zram->compressor, buf, sizeof(zram->compressor));
	up_write(&zram->init_lock);
	return len;
}

M
Minchan Kim 已提交
250
/* flag operations needs meta->tb_lock */
M
Minchan Kim 已提交
251
static int zram_test_flag(struct zram_meta *meta, u32 index,
252
			enum zram_pageflags flag)
253
{
254
	return meta->table[index].value & BIT(flag);
255 256
}

M
Minchan Kim 已提交
257
static void zram_set_flag(struct zram_meta *meta, u32 index,
258
			enum zram_pageflags flag)
259
{
260
	meta->table[index].value |= BIT(flag);
261 262
}

M
Minchan Kim 已提交
263
static void zram_clear_flag(struct zram_meta *meta, u32 index,
264
			enum zram_pageflags flag)
265
{
266 267 268 269 270 271 272 273 274 275 276 277 278 279
	meta->table[index].value &= ~BIT(flag);
}

static size_t zram_get_obj_size(struct zram_meta *meta, u32 index)
{
	return meta->table[index].value & (BIT(ZRAM_FLAG_SHIFT) - 1);
}

static void zram_set_obj_size(struct zram_meta *meta,
					u32 index, size_t size)
{
	unsigned long flags = meta->table[index].value >> ZRAM_FLAG_SHIFT;

	meta->table[index].value = (flags << ZRAM_FLAG_SHIFT) | size;
280 281
}

282 283 284 285 286 287 288 289 290 291 292
static inline int is_partial_io(struct bio_vec *bvec)
{
	return bvec->bv_len != PAGE_SIZE;
}

/*
 * Check if request is within bounds and aligned on zram logical blocks.
 */
static inline int valid_io_request(struct zram *zram, struct bio *bio)
{
	u64 start, end, bound;
293

294
	/* unaligned request */
295 296
	if (unlikely(bio->bi_iter.bi_sector &
		     (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
297
		return 0;
298
	if (unlikely(bio->bi_iter.bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
299 300
		return 0;

301 302
	start = bio->bi_iter.bi_sector;
	end = start + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
303 304
	bound = zram->disksize >> SECTOR_SHIFT;
	/* out of range range */
305
	if (unlikely(start >= bound || end > bound || start > end))
306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329
		return 0;

	/* I/O request is valid */
	return 1;
}

static void zram_meta_free(struct zram_meta *meta)
{
	zs_destroy_pool(meta->mem_pool);
	vfree(meta->table);
	kfree(meta);
}

static struct zram_meta *zram_meta_alloc(u64 disksize)
{
	size_t num_pages;
	struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
	if (!meta)
		goto out;

	num_pages = disksize >> PAGE_SHIFT;
	meta->table = vzalloc(num_pages * sizeof(*meta->table));
	if (!meta->table) {
		pr_err("Error allocating zram address table\n");
330
		goto free_meta;
331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
	}

	meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
	if (!meta->mem_pool) {
		pr_err("Error creating memory pool\n");
		goto free_table;
	}

	return meta;

free_table:
	vfree(meta->table);
free_meta:
	kfree(meta);
	meta = NULL;
out:
	return meta;
}

static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
{
	if (*offset + bvec->bv_len >= PAGE_SIZE)
		(*index)++;
	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}

357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
static int page_zero_filled(void *ptr)
{
	unsigned int pos;
	unsigned long *page;

	page = (unsigned long *)ptr;

	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
		if (page[pos])
			return 0;
	}

	return 1;
}

372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
static void handle_zero_page(struct bio_vec *bvec)
{
	struct page *page = bvec->bv_page;
	void *user_mem;

	user_mem = kmap_atomic(page);
	if (is_partial_io(bvec))
		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
	else
		clear_page(user_mem);
	kunmap_atomic(user_mem);

	flush_dcache_page(page);
}

387 388 389 390 391 392

/*
 * To protect concurrent access to the same index entry,
 * caller should hold this table index entry's bit_spinlock to
 * indicate this index entry is accessing.
 */
393
static void zram_free_page(struct zram *zram, size_t index)
394
{
M
Minchan Kim 已提交
395 396
	struct zram_meta *meta = zram->meta;
	unsigned long handle = meta->table[index].handle;
397

398
	if (unlikely(!handle)) {
399 400 401 402
		/*
		 * No memory is allocated for zero filled pages.
		 * Simply clear zero page flag.
		 */
M
Minchan Kim 已提交
403 404
		if (zram_test_flag(meta, index, ZRAM_ZERO)) {
			zram_clear_flag(meta, index, ZRAM_ZERO);
405
			atomic64_dec(&zram->stats.zero_pages);
406 407 408 409
		}
		return;
	}

M
Minchan Kim 已提交
410
	zs_free(meta->mem_pool, handle);
411

412 413
	atomic64_sub(zram_get_obj_size(meta, index),
			&zram->stats.compr_data_size);
414
	atomic64_dec(&zram->stats.pages_stored);
415

M
Minchan Kim 已提交
416
	meta->table[index].handle = 0;
417
	zram_set_obj_size(meta, index, 0);
418 419
}

420
static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
421
{
422
	int ret = 0;
423
	unsigned char *cmem;
M
Minchan Kim 已提交
424
	struct zram_meta *meta = zram->meta;
M
Minchan Kim 已提交
425
	unsigned long handle;
M
Minchan Kim 已提交
426
	size_t size;
M
Minchan Kim 已提交
427

428
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
M
Minchan Kim 已提交
429
	handle = meta->table[index].handle;
430
	size = zram_get_obj_size(meta, index);
431

M
Minchan Kim 已提交
432
	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
433
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
434
		clear_page(mem);
435 436
		return 0;
	}
437

M
Minchan Kim 已提交
438
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
M
Minchan Kim 已提交
439
	if (size == PAGE_SIZE)
440
		copy_page(mem, cmem);
441
	else
442
		ret = zcomp_decompress(zram->comp, cmem, size, mem);
M
Minchan Kim 已提交
443
	zs_unmap_object(meta->mem_pool, handle);
444
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
445

446
	/* Should NEVER happen. Return bio error if it does. */
447
	if (unlikely(ret)) {
448 449
		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
		return ret;
450
	}
451

452
	return 0;
453 454
}

455
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
456
			  u32 index, int offset)
457 458
{
	int ret;
459 460
	struct page *page;
	unsigned char *user_mem, *uncmem = NULL;
M
Minchan Kim 已提交
461
	struct zram_meta *meta = zram->meta;
462 463
	page = bvec->bv_page;

464
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
M
Minchan Kim 已提交
465 466
	if (unlikely(!meta->table[index].handle) ||
			zram_test_flag(meta, index, ZRAM_ZERO)) {
467
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
468
		handle_zero_page(bvec);
469 470
		return 0;
	}
471
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
472

473 474
	if (is_partial_io(bvec))
		/* Use  a temporary buffer to decompress the page */
475 476 477 478
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);

	user_mem = kmap_atomic(page);
	if (!is_partial_io(bvec))
479 480 481 482 483 484 485
		uncmem = user_mem;

	if (!uncmem) {
		pr_info("Unable to allocate temp memory\n");
		ret = -ENOMEM;
		goto out_cleanup;
	}
486

487
	ret = zram_decompress_page(zram, uncmem, index);
488
	/* Should NEVER happen. Return bio error if it does. */
489
	if (unlikely(ret))
490
		goto out_cleanup;
491

492 493 494 495 496 497 498 499 500 501 502
	if (is_partial_io(bvec))
		memcpy(user_mem + bvec->bv_offset, uncmem + offset,
				bvec->bv_len);

	flush_dcache_page(page);
	ret = 0;
out_cleanup:
	kunmap_atomic(user_mem);
	if (is_partial_io(bvec))
		kfree(uncmem);
	return ret;
503 504
}

M
Minchan Kim 已提交
505 506 507 508 509 510 511 512 513 514 515 516 517 518 519
static inline void update_used_max(struct zram *zram,
					const unsigned long pages)
{
	int old_max, cur_max;

	old_max = atomic_long_read(&zram->stats.max_used_pages);

	do {
		cur_max = old_max;
		if (pages > cur_max)
			old_max = atomic_long_cmpxchg(
				&zram->stats.max_used_pages, cur_max, pages);
	} while (old_max != cur_max);
}

520 521
static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
			   int offset)
522
{
523
	int ret = 0;
524
	size_t clen;
525
	unsigned long handle;
526
	struct page *page;
527
	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
M
Minchan Kim 已提交
528
	struct zram_meta *meta = zram->meta;
529
	struct zcomp_strm *zstrm;
530
	bool locked = false;
M
Minchan Kim 已提交
531
	unsigned long alloced_pages;
532

533
	page = bvec->bv_page;
534 535 536 537 538
	if (is_partial_io(bvec)) {
		/*
		 * This is a partial IO. We need to read the full page
		 * before to write the changes.
		 */
539
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
540 541 542 543
		if (!uncmem) {
			ret = -ENOMEM;
			goto out;
		}
544
		ret = zram_decompress_page(zram, uncmem, index);
545
		if (ret)
546 547 548
			goto out;
	}

549
	zstrm = zcomp_strm_find(zram->comp);
550
	locked = true;
551
	user_mem = kmap_atomic(page);
552

553
	if (is_partial_io(bvec)) {
554 555
		memcpy(uncmem + offset, user_mem + bvec->bv_offset,
		       bvec->bv_len);
556 557 558
		kunmap_atomic(user_mem);
		user_mem = NULL;
	} else {
559
		uncmem = user_mem;
560
	}
561 562

	if (page_zero_filled(uncmem)) {
563 564
		if (user_mem)
			kunmap_atomic(user_mem);
565
		/* Free memory associated with this sector now. */
566
		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
567
		zram_free_page(zram, index);
M
Minchan Kim 已提交
568
		zram_set_flag(meta, index, ZRAM_ZERO);
569
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
570

571
		atomic64_inc(&zram->stats.zero_pages);
572 573
		ret = 0;
		goto out;
574
	}
575

576
	ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
577 578 579 580 581
	if (!is_partial_io(bvec)) {
		kunmap_atomic(user_mem);
		user_mem = NULL;
		uncmem = NULL;
	}
582

583
	if (unlikely(ret)) {
584
		pr_err("Compression failed! err=%d\n", ret);
585
		goto out;
586
	}
587
	src = zstrm->buffer;
588 589
	if (unlikely(clen > max_zpage_size)) {
		clen = PAGE_SIZE;
590 591
		if (is_partial_io(bvec))
			src = uncmem;
592
	}
593

M
Minchan Kim 已提交
594
	handle = zs_malloc(meta->mem_pool, clen);
595
	if (!handle) {
596 597
		pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
			index, clen);
598 599
		ret = -ENOMEM;
		goto out;
600
	}
M
Minchan Kim 已提交
601

M
Minchan Kim 已提交
602 603
	alloced_pages = zs_get_total_pages(meta->mem_pool);
	if (zram->limit_pages && alloced_pages > zram->limit_pages) {
M
Minchan Kim 已提交
604 605 606 607 608
		zs_free(meta->mem_pool, handle);
		ret = -ENOMEM;
		goto out;
	}

M
Minchan Kim 已提交
609 610
	update_used_max(zram, alloced_pages);

M
Minchan Kim 已提交
611
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
612

613
	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
614
		src = kmap_atomic(page);
615
		copy_page(cmem, src);
616
		kunmap_atomic(src);
617 618 619
	} else {
		memcpy(cmem, src, clen);
	}
620

621 622
	zcomp_strm_release(zram->comp, zstrm);
	locked = false;
M
Minchan Kim 已提交
623
	zs_unmap_object(meta->mem_pool, handle);
624

625 626 627 628
	/*
	 * Free memory associated with this sector
	 * before overwriting unused sectors.
	 */
629
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
630 631
	zram_free_page(zram, index);

M
Minchan Kim 已提交
632
	meta->table[index].handle = handle;
633 634
	zram_set_obj_size(meta, index, clen);
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
635

636
	/* Update stats */
637 638
	atomic64_add(clen, &zram->stats.compr_data_size);
	atomic64_inc(&zram->stats.pages_stored);
639
out:
640
	if (locked)
641
		zcomp_strm_release(zram->comp, zstrm);
642 643
	if (is_partial_io(bvec))
		kfree(uncmem);
644
	return ret;
645 646 647
}

static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
648
			int offset, int rw)
649
{
650
	int ret;
651

652 653
	if (rw == READ) {
		atomic64_inc(&zram->stats.num_reads);
654
		ret = zram_bvec_read(zram, bvec, index, offset);
655 656
	} else {
		atomic64_inc(&zram->stats.num_writes);
657
		ret = zram_bvec_write(zram, bvec, index, offset);
658
	}
659

660 661 662 663 664 665 666
	if (unlikely(ret)) {
		if (rw == READ)
			atomic64_inc(&zram->stats.failed_reads);
		else
			atomic64_inc(&zram->stats.failed_writes);
	}

667
	return ret;
668 669
}

J
Joonsoo Kim 已提交
670 671 672 673 674 675 676 677 678
/*
 * zram_bio_discard - handler on discard request
 * @index: physical block index in PAGE_SIZE units
 * @offset: byte offset within physical block
 */
static void zram_bio_discard(struct zram *zram, u32 index,
			     int offset, struct bio *bio)
{
	size_t n = bio->bi_iter.bi_size;
679
	struct zram_meta *meta = zram->meta;
J
Joonsoo Kim 已提交
680 681 682 683 684 685 686 687 688 689 690 691

	/*
	 * zram manages data in physical block size units. Because logical block
	 * size isn't identical with physical block size on some arch, we
	 * could get a discard request pointing to a specific offset within a
	 * certain physical block.  Although we can handle this request by
	 * reading that physiclal block and decompressing and partially zeroing
	 * and re-compressing and then re-storing it, this isn't reasonable
	 * because our intent with a discard request is to save memory.  So
	 * skipping this logical block is appropriate here.
	 */
	if (offset) {
692
		if (n <= (PAGE_SIZE - offset))
J
Joonsoo Kim 已提交
693 694
			return;

695
		n -= (PAGE_SIZE - offset);
J
Joonsoo Kim 已提交
696 697 698 699
		index++;
	}

	while (n >= PAGE_SIZE) {
700
		bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
J
Joonsoo Kim 已提交
701
		zram_free_page(zram, index);
702
		bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
703
		atomic64_inc(&zram->stats.notify_free);
J
Joonsoo Kim 已提交
704 705 706 707 708
		index++;
		n -= PAGE_SIZE;
	}
}

M
Minchan Kim 已提交
709
static void zram_reset_device(struct zram *zram, bool reset_capacity)
710
{
711 712 713
	size_t index;
	struct zram_meta *meta;

714
	down_write(&zram->init_lock);
M
Minchan Kim 已提交
715 716 717

	zram->limit_pages = 0;

718
	if (!init_done(zram)) {
719
		up_write(&zram->init_lock);
720
		return;
721
	}
722 723 724 725 726 727 728 729 730 731 732

	meta = zram->meta;
	/* Free all pages that are still in this zram device */
	for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
		unsigned long handle = meta->table[index].handle;
		if (!handle)
			continue;

		zs_free(meta->mem_pool, handle);
	}

733
	zcomp_destroy(zram->comp);
734 735
	zram->max_comp_streams = 1;

736 737 738 739 740 741
	zram_meta_free(zram->meta);
	zram->meta = NULL;
	/* Reset stats */
	memset(&zram->stats, 0, sizeof(zram->stats));

	zram->disksize = 0;
742
	if (reset_capacity)
M
Minchan Kim 已提交
743
		set_capacity(zram->disk, 0);
744

745
	up_write(&zram->init_lock);
746 747 748 749 750 751 752 753

	/*
	 * Revalidate disk out of the init_lock to avoid lockdep splat.
	 * It's okay because disk's capacity is protected by init_lock
	 * so that revalidate_disk always sees up-to-date capacity.
	 */
	if (reset_capacity)
		revalidate_disk(zram->disk);
754 755 756 757 758 759
}

static ssize_t disksize_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	u64 disksize;
760
	struct zcomp *comp;
761 762
	struct zram_meta *meta;
	struct zram *zram = dev_to_zram(dev);
763
	int err;
764 765 766 767 768 769 770

	disksize = memparse(buf, NULL);
	if (!disksize)
		return -EINVAL;

	disksize = PAGE_ALIGN(disksize);
	meta = zram_meta_alloc(disksize);
771 772
	if (!meta)
		return -ENOMEM;
773

774
	comp = zcomp_create(zram->compressor, zram->max_comp_streams);
775
	if (IS_ERR(comp)) {
776 777
		pr_info("Cannot initialise %s compressing backend\n",
				zram->compressor);
778 779
		err = PTR_ERR(comp);
		goto out_free_meta;
780 781
	}

782
	down_write(&zram->init_lock);
783
	if (init_done(zram)) {
784
		pr_info("Cannot change disksize for initialized device\n");
785
		err = -EBUSY;
786
		goto out_destroy_comp;
787 788
	}

789
	zram->meta = meta;
790
	zram->comp = comp;
791 792 793
	zram->disksize = disksize;
	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
	up_write(&zram->init_lock);
794 795 796 797 798 799 800 801

	/*
	 * Revalidate disk out of the init_lock to avoid lockdep splat.
	 * It's okay because disk's capacity is protected by init_lock
	 * so that revalidate_disk always sees up-to-date capacity.
	 */
	revalidate_disk(zram->disk);

802
	return len;
803

804 805 806 807
out_destroy_comp:
	up_write(&zram->init_lock);
	zcomp_destroy(comp);
out_free_meta:
808 809
	zram_meta_free(meta);
	return err;
810 811 812 813 814 815 816 817 818 819 820 821 822
}

static ssize_t reset_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	int ret;
	unsigned short do_reset;
	struct zram *zram;
	struct block_device *bdev;

	zram = dev_to_zram(dev);
	bdev = bdget_disk(zram->disk, 0);

823 824 825
	if (!bdev)
		return -ENOMEM;

826
	/* Do not reset an active device! */
827 828 829 830
	if (bdev->bd_holders) {
		ret = -EBUSY;
		goto out;
	}
831 832 833

	ret = kstrtou16(buf, 10, &do_reset);
	if (ret)
834
		goto out;
835

836 837 838 839
	if (!do_reset) {
		ret = -EINVAL;
		goto out;
	}
840 841

	/* Make sure all pending I/O is finished */
842
	fsync_bdev(bdev);
843
	bdput(bdev);
844

M
Minchan Kim 已提交
845
	zram_reset_device(zram, true);
846
	return len;
847 848 849 850

out:
	bdput(bdev);
	return ret;
851 852
}

853
static void __zram_make_request(struct zram *zram, struct bio *bio)
854
{
855
	int offset, rw;
856
	u32 index;
857 858
	struct bio_vec bvec;
	struct bvec_iter iter;
859

860 861 862
	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
	offset = (bio->bi_iter.bi_sector &
		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
863

J
Joonsoo Kim 已提交
864 865 866 867 868 869
	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
		zram_bio_discard(zram, index, offset, bio);
		bio_endio(bio, 0);
		return;
	}

870
	rw = bio_data_dir(bio);
871
	bio_for_each_segment(bvec, bio, iter) {
872 873
		int max_transfer_size = PAGE_SIZE - offset;

874
		if (bvec.bv_len > max_transfer_size) {
875 876 877 878 879 880
			/*
			 * zram_bvec_rw() can only make operation on a single
			 * zram page. Split the bio vector.
			 */
			struct bio_vec bv;

881
			bv.bv_page = bvec.bv_page;
882
			bv.bv_len = max_transfer_size;
883
			bv.bv_offset = bvec.bv_offset;
884

885
			if (zram_bvec_rw(zram, &bv, index, offset, rw) < 0)
886 887
				goto out;

888
			bv.bv_len = bvec.bv_len - max_transfer_size;
889
			bv.bv_offset += max_transfer_size;
890
			if (zram_bvec_rw(zram, &bv, index + 1, 0, rw) < 0)
891 892
				goto out;
		} else
893
			if (zram_bvec_rw(zram, &bvec, index, offset, rw) < 0)
894 895
				goto out;

896
		update_position(&index, &offset, &bvec);
897
	}
898 899 900

	set_bit(BIO_UPTODATE, &bio->bi_flags);
	bio_endio(bio, 0);
901
	return;
902 903 904 905 906 907

out:
	bio_io_error(bio);
}

/*
908
 * Handler function for all zram I/O requests.
909
 */
910
static void zram_make_request(struct request_queue *queue, struct bio *bio)
911
{
912
	struct zram *zram = queue->queuedata;
913

914
	down_read(&zram->init_lock);
915
	if (unlikely(!init_done(zram)))
916
		goto error;
917

918
	if (!valid_io_request(zram, bio)) {
919
		atomic64_inc(&zram->stats.invalid_io);
920
		goto error;
921 922
	}

923
	__zram_make_request(zram, bio);
924
	up_read(&zram->init_lock);
925

926
	return;
927 928

error:
929
	up_read(&zram->init_lock);
930
	bio_io_error(bio);
931 932
}

N
Nitin Gupta 已提交
933 934
static void zram_slot_free_notify(struct block_device *bdev,
				unsigned long index)
935
{
936
	struct zram *zram;
937
	struct zram_meta *meta;
938

939
	zram = bdev->bd_disk->private_data;
940
	meta = zram->meta;
941

942
	bit_spin_lock(ZRAM_ACCESS, &meta->table[index].value);
943
	zram_free_page(zram, index);
944
	bit_spin_unlock(ZRAM_ACCESS, &meta->table[index].value);
945
	atomic64_inc(&zram->stats.notify_free);
946 947
}

948 949
static const struct block_device_operations zram_devops = {
	.swap_slot_free_notify = zram_slot_free_notify,
950
	.owner = THIS_MODULE
951 952
};

953 954 955 956 957 958
static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
		disksize_show, disksize_store);
static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
M
Minchan Kim 已提交
959 960
static DEVICE_ATTR(mem_limit, S_IRUGO | S_IWUSR, mem_limit_show,
		mem_limit_store);
M
Minchan Kim 已提交
961 962
static DEVICE_ATTR(mem_used_max, S_IRUGO | S_IWUSR, mem_used_max_show,
		mem_used_max_store);
963 964
static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR,
		max_comp_streams_show, max_comp_streams_store);
965 966
static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR,
		comp_algorithm_show, comp_algorithm_store);
967

968 969
ZRAM_ATTR_RO(num_reads);
ZRAM_ATTR_RO(num_writes);
970 971
ZRAM_ATTR_RO(failed_reads);
ZRAM_ATTR_RO(failed_writes);
972 973 974 975 976
ZRAM_ATTR_RO(invalid_io);
ZRAM_ATTR_RO(notify_free);
ZRAM_ATTR_RO(zero_pages);
ZRAM_ATTR_RO(compr_data_size);

977 978 979 980 981 982
static struct attribute *zram_disk_attrs[] = {
	&dev_attr_disksize.attr,
	&dev_attr_initstate.attr,
	&dev_attr_reset.attr,
	&dev_attr_num_reads.attr,
	&dev_attr_num_writes.attr,
983 984
	&dev_attr_failed_reads.attr,
	&dev_attr_failed_writes.attr,
985 986 987 988 989 990
	&dev_attr_invalid_io.attr,
	&dev_attr_notify_free.attr,
	&dev_attr_zero_pages.attr,
	&dev_attr_orig_data_size.attr,
	&dev_attr_compr_data_size.attr,
	&dev_attr_mem_used_total.attr,
M
Minchan Kim 已提交
991
	&dev_attr_mem_limit.attr,
M
Minchan Kim 已提交
992
	&dev_attr_mem_used_max.attr,
993
	&dev_attr_max_comp_streams.attr,
994
	&dev_attr_comp_algorithm.attr,
995 996 997 998 999 1000 1001
	NULL,
};

static struct attribute_group zram_disk_attr_group = {
	.attrs = zram_disk_attrs,
};

1002
static int create_device(struct zram *zram, int device_id)
1003
{
1004
	int ret = -ENOMEM;
1005

1006
	init_rwsem(&zram->init_lock);
1007

1008 1009
	zram->queue = blk_alloc_queue(GFP_KERNEL);
	if (!zram->queue) {
1010 1011
		pr_err("Error allocating disk queue for device %d\n",
			device_id);
1012
		goto out;
1013 1014
	}

1015 1016
	blk_queue_make_request(zram->queue, zram_make_request);
	zram->queue->queuedata = zram;
1017 1018

	 /* gendisk structure */
1019 1020
	zram->disk = alloc_disk(1);
	if (!zram->disk) {
1021
		pr_warn("Error allocating disk structure for device %d\n",
1022
			device_id);
1023
		goto out_free_queue;
1024 1025
	}

1026 1027 1028 1029 1030 1031
	zram->disk->major = zram_major;
	zram->disk->first_minor = device_id;
	zram->disk->fops = &zram_devops;
	zram->disk->queue = zram->queue;
	zram->disk->private_data = zram;
	snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
1032

1033
	/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
1034
	set_capacity(zram->disk, 0);
1035 1036
	/* zram devices sort of resembles non-rotational disks */
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
1037
	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, zram->disk->queue);
1038 1039 1040 1041
	/*
	 * To ensure that we always get PAGE_SIZE aligned
	 * and n*PAGE_SIZED sized I/O requests.
	 */
1042
	blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
1043 1044
	blk_queue_logical_block_size(zram->disk->queue,
					ZRAM_LOGICAL_BLOCK_SIZE);
1045 1046
	blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
	blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
J
Joonsoo Kim 已提交
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061
	zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
	zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
	/*
	 * zram_bio_discard() will clear all logical blocks if logical block
	 * size is identical with physical block size(PAGE_SIZE). But if it is
	 * different, we will skip discarding some parts of logical blocks in
	 * the part of the request range which isn't aligned to physical block
	 * size.  So we can't ensure that all discarded logical blocks are
	 * zeroed.
	 */
	if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
		zram->disk->queue->limits.discard_zeroes_data = 1;
	else
		zram->disk->queue->limits.discard_zeroes_data = 0;
	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
1062

1063
	add_disk(zram->disk);
1064

1065 1066 1067
	ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
				&zram_disk_attr_group);
	if (ret < 0) {
1068
		pr_warn("Error creating sysfs group");
1069
		goto out_free_disk;
1070
	}
1071
	strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
1072
	zram->meta = NULL;
1073
	zram->max_comp_streams = 1;
1074
	return 0;
1075

1076 1077 1078 1079 1080
out_free_disk:
	del_gendisk(zram->disk);
	put_disk(zram->disk);
out_free_queue:
	blk_cleanup_queue(zram->queue);
1081 1082
out:
	return ret;
1083 1084
}

1085
static void destroy_device(struct zram *zram)
1086
{
1087 1088 1089
	sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
			&zram_disk_attr_group);

1090 1091
	del_gendisk(zram->disk);
	put_disk(zram->disk);
1092

1093
	blk_cleanup_queue(zram->queue);
1094 1095
}

1096
static int __init zram_init(void)
1097
{
1098
	int ret, dev_id;
1099

1100
	if (num_devices > max_num_devices) {
1101
		pr_warn("Invalid value for num_devices: %u\n",
1102
				num_devices);
1103 1104
		ret = -EINVAL;
		goto out;
1105 1106
	}

1107 1108
	zram_major = register_blkdev(0, "zram");
	if (zram_major <= 0) {
1109
		pr_warn("Unable to get major number\n");
1110 1111
		ret = -EBUSY;
		goto out;
1112 1113 1114
	}

	/* Allocate the device array and initialize each one */
1115
	zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
1116
	if (!zram_devices) {
1117 1118 1119
		ret = -ENOMEM;
		goto unregister;
	}
1120

1121
	for (dev_id = 0; dev_id < num_devices; dev_id++) {
1122
		ret = create_device(&zram_devices[dev_id], dev_id);
1123
		if (ret)
1124
			goto free_devices;
1125 1126
	}

1127 1128
	pr_info("Created %u device(s) ...\n", num_devices);

1129
	return 0;
1130

1131
free_devices:
1132
	while (dev_id)
1133 1134
		destroy_device(&zram_devices[--dev_id]);
	kfree(zram_devices);
1135
unregister:
1136
	unregister_blkdev(zram_major, "zram");
1137
out:
1138 1139 1140
	return ret;
}

1141
static void __exit zram_exit(void)
1142 1143
{
	int i;
1144
	struct zram *zram;
1145

1146
	for (i = 0; i < num_devices; i++) {
1147
		zram = &zram_devices[i];
1148

1149
		destroy_device(zram);
M
Minchan Kim 已提交
1150 1151 1152 1153 1154
		/*
		 * Shouldn't access zram->disk after destroy_device
		 * because destroy_device already released zram->disk.
		 */
		zram_reset_device(zram, false);
1155 1156
	}

1157
	unregister_blkdev(zram_major, "zram");
1158

1159
	kfree(zram_devices);
1160 1161 1162
	pr_debug("Cleanup done!\n");
}

1163 1164
module_init(zram_init);
module_exit(zram_exit);
1165

1166 1167 1168
module_param(num_devices, uint, 0);
MODULE_PARM_DESC(num_devices, "Number of zram devices");

1169 1170
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1171
MODULE_DESCRIPTION("Compressed RAM Block Device");