zram_drv.c 23.7 KB
Newer Older
1
/*
2
 * Compressed RAM block device
3
 *
4
 * Copyright (C) 2008, 2009, 2010  Nitin Gupta
M
Minchan Kim 已提交
5
 *               2012, 2013 Minchan Kim
6 7 8 9 10 11 12 13 14
 *
 * This code is released using a dual license strategy: BSD/GPL
 * You can choose the licence that better fits your requirements.
 *
 * Released under the terms of 3-clause BSD License
 * Released under the terms of GNU General Public License Version 2.0
 *
 */

15
#define KMSG_COMPONENT "zram"
16 17
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

18 19 20 21
#ifdef CONFIG_ZRAM_DEBUG
#define DEBUG
#endif

22 23
#include <linux/module.h>
#include <linux/kernel.h>
24
#include <linux/bio.h>
25 26 27 28 29 30
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
31
#include <linux/slab.h>
32 33
#include <linux/string.h>
#include <linux/vmalloc.h>
34
#include <linux/err.h>
35

36
#include "zram_drv.h"
37 38

/* Globals */
39
static int zram_major;
40
static struct zram *zram_devices;
41
static const char *default_compressor = "lzo";
42 43

/* Module params (documentation at end) */
44
static unsigned int num_devices = 1;
45

46 47 48 49 50
#define ZRAM_ATTR_RO(name)						\
static ssize_t zram_attr_##name##_show(struct device *d,		\
				struct device_attribute *attr, char *b)	\
{									\
	struct zram *zram = dev_to_zram(d);				\
51
	return scnprintf(b, PAGE_SIZE, "%llu\n",			\
52 53 54 55 56
		(u64)atomic64_read(&zram->stats.name));			\
}									\
static struct device_attribute dev_attr_##name =			\
	__ATTR(name, S_IRUGO, zram_attr_##name##_show, NULL);

57 58 59 60 61
static inline int init_done(struct zram *zram)
{
	return zram->meta != NULL;
}

62 63 64 65 66 67 68 69 70 71
static inline struct zram *dev_to_zram(struct device *dev)
{
	return (struct zram *)dev_to_disk(dev)->private_data;
}

static ssize_t disksize_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

72
	return scnprintf(buf, PAGE_SIZE, "%llu\n", zram->disksize);
73 74 75 76 77
}

static ssize_t initstate_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
78
	u32 val;
79 80
	struct zram *zram = dev_to_zram(dev);

81 82 83
	down_read(&zram->init_lock);
	val = init_done(zram);
	up_read(&zram->init_lock);
84

85
	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
86 87 88 89 90 91 92
}

static ssize_t orig_data_size_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

93
	return scnprintf(buf, PAGE_SIZE, "%llu\n",
94
		(u64)(atomic64_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
95 96 97 98 99 100 101 102 103 104
}

static ssize_t mem_used_total_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val = 0;
	struct zram *zram = dev_to_zram(dev);
	struct zram_meta *meta = zram->meta;

	down_read(&zram->init_lock);
105
	if (init_done(zram))
106 107 108
		val = zs_get_total_size_bytes(meta->mem_pool);
	up_read(&zram->init_lock);

109
	return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
110 111
}

112 113 114 115 116 117 118 119 120 121
static ssize_t max_comp_streams_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	int val;
	struct zram *zram = dev_to_zram(dev);

	down_read(&zram->init_lock);
	val = zram->max_comp_streams;
	up_read(&zram->init_lock);

122
	return scnprintf(buf, PAGE_SIZE, "%d\n", val);
123 124 125 126 127 128 129
}

static ssize_t max_comp_streams_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	int num;
	struct zram *zram = dev_to_zram(dev);
M
Minchan Kim 已提交
130
	int ret;
131

M
Minchan Kim 已提交
132 133 134
	ret = kstrtoint(buf, 0, &num);
	if (ret < 0)
		return ret;
135 136
	if (num < 1)
		return -EINVAL;
M
Minchan Kim 已提交
137

138 139
	down_write(&zram->init_lock);
	if (init_done(zram)) {
M
Minchan Kim 已提交
140
		if (!zcomp_set_max_streams(zram->comp, num)) {
141
			pr_info("Cannot change max compression streams\n");
M
Minchan Kim 已提交
142 143 144
			ret = -EINVAL;
			goto out;
		}
145
	}
M
Minchan Kim 已提交
146

147
	zram->max_comp_streams = num;
M
Minchan Kim 已提交
148 149
	ret = len;
out:
150
	up_write(&zram->init_lock);
M
Minchan Kim 已提交
151
	return ret;
152 153
}

154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
static ssize_t comp_algorithm_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	size_t sz;
	struct zram *zram = dev_to_zram(dev);

	down_read(&zram->init_lock);
	sz = zcomp_available_show(zram->compressor, buf);
	up_read(&zram->init_lock);

	return sz;
}

static ssize_t comp_algorithm_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	struct zram *zram = dev_to_zram(dev);
	down_write(&zram->init_lock);
	if (init_done(zram)) {
		up_write(&zram->init_lock);
		pr_info("Can't change algorithm for initialized device\n");
		return -EBUSY;
	}
	strlcpy(zram->compressor, buf, sizeof(zram->compressor));
	up_write(&zram->init_lock);
	return len;
}

M
Minchan Kim 已提交
182
/* flag operations needs meta->tb_lock */
M
Minchan Kim 已提交
183
static int zram_test_flag(struct zram_meta *meta, u32 index,
184
			enum zram_pageflags flag)
185
{
M
Minchan Kim 已提交
186
	return meta->table[index].flags & BIT(flag);
187 188
}

M
Minchan Kim 已提交
189
static void zram_set_flag(struct zram_meta *meta, u32 index,
190
			enum zram_pageflags flag)
191
{
M
Minchan Kim 已提交
192
	meta->table[index].flags |= BIT(flag);
193 194
}

M
Minchan Kim 已提交
195
static void zram_clear_flag(struct zram_meta *meta, u32 index,
196
			enum zram_pageflags flag)
197
{
M
Minchan Kim 已提交
198
	meta->table[index].flags &= ~BIT(flag);
199 200
}

201 202 203 204 205 206 207 208 209 210 211
static inline int is_partial_io(struct bio_vec *bvec)
{
	return bvec->bv_len != PAGE_SIZE;
}

/*
 * Check if request is within bounds and aligned on zram logical blocks.
 */
static inline int valid_io_request(struct zram *zram, struct bio *bio)
{
	u64 start, end, bound;
212

213
	/* unaligned request */
214 215
	if (unlikely(bio->bi_iter.bi_sector &
		     (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
216
		return 0;
217
	if (unlikely(bio->bi_iter.bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
218 219
		return 0;

220 221
	start = bio->bi_iter.bi_sector;
	end = start + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
222 223
	bound = zram->disksize >> SECTOR_SHIFT;
	/* out of range range */
224
	if (unlikely(start >= bound || end > bound || start > end))
225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248
		return 0;

	/* I/O request is valid */
	return 1;
}

static void zram_meta_free(struct zram_meta *meta)
{
	zs_destroy_pool(meta->mem_pool);
	vfree(meta->table);
	kfree(meta);
}

static struct zram_meta *zram_meta_alloc(u64 disksize)
{
	size_t num_pages;
	struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
	if (!meta)
		goto out;

	num_pages = disksize >> PAGE_SHIFT;
	meta->table = vzalloc(num_pages * sizeof(*meta->table));
	if (!meta->table) {
		pr_err("Error allocating zram address table\n");
249
		goto free_meta;
250 251 252 253 254 255 256 257
	}

	meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
	if (!meta->mem_pool) {
		pr_err("Error creating memory pool\n");
		goto free_table;
	}

M
Minchan Kim 已提交
258
	rwlock_init(&meta->tb_lock);
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276
	return meta;

free_table:
	vfree(meta->table);
free_meta:
	kfree(meta);
	meta = NULL;
out:
	return meta;
}

static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
{
	if (*offset + bvec->bv_len >= PAGE_SIZE)
		(*index)++;
	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}

277 278 279 280 281 282 283 284 285 286 287 288 289 290 291
static int page_zero_filled(void *ptr)
{
	unsigned int pos;
	unsigned long *page;

	page = (unsigned long *)ptr;

	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
		if (page[pos])
			return 0;
	}

	return 1;
}

292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
static void handle_zero_page(struct bio_vec *bvec)
{
	struct page *page = bvec->bv_page;
	void *user_mem;

	user_mem = kmap_atomic(page);
	if (is_partial_io(bvec))
		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
	else
		clear_page(user_mem);
	kunmap_atomic(user_mem);

	flush_dcache_page(page);
}

M
Minchan Kim 已提交
307
/* NOTE: caller should hold meta->tb_lock with write-side */
308
static void zram_free_page(struct zram *zram, size_t index)
309
{
M
Minchan Kim 已提交
310 311
	struct zram_meta *meta = zram->meta;
	unsigned long handle = meta->table[index].handle;
312

313
	if (unlikely(!handle)) {
314 315 316 317
		/*
		 * No memory is allocated for zero filled pages.
		 * Simply clear zero page flag.
		 */
M
Minchan Kim 已提交
318 319
		if (zram_test_flag(meta, index, ZRAM_ZERO)) {
			zram_clear_flag(meta, index, ZRAM_ZERO);
320
			atomic64_dec(&zram->stats.zero_pages);
321 322 323 324
		}
		return;
	}

M
Minchan Kim 已提交
325
	zs_free(meta->mem_pool, handle);
326

327 328
	atomic64_sub(meta->table[index].size, &zram->stats.compr_data_size);
	atomic64_dec(&zram->stats.pages_stored);
329

M
Minchan Kim 已提交
330 331
	meta->table[index].handle = 0;
	meta->table[index].size = 0;
332 333
}

334
static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
335
{
336
	int ret = 0;
337
	unsigned char *cmem;
M
Minchan Kim 已提交
338
	struct zram_meta *meta = zram->meta;
M
Minchan Kim 已提交
339 340 341 342 343 344
	unsigned long handle;
	u16 size;

	read_lock(&meta->tb_lock);
	handle = meta->table[index].handle;
	size = meta->table[index].size;
345

M
Minchan Kim 已提交
346
	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
M
Minchan Kim 已提交
347
		read_unlock(&meta->tb_lock);
348
		clear_page(mem);
349 350
		return 0;
	}
351

M
Minchan Kim 已提交
352
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
M
Minchan Kim 已提交
353
	if (size == PAGE_SIZE)
354
		copy_page(mem, cmem);
355
	else
356
		ret = zcomp_decompress(zram->comp, cmem, size, mem);
M
Minchan Kim 已提交
357
	zs_unmap_object(meta->mem_pool, handle);
M
Minchan Kim 已提交
358
	read_unlock(&meta->tb_lock);
359

360
	/* Should NEVER happen. Return bio error if it does. */
361
	if (unlikely(ret)) {
362
		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
363
		atomic64_inc(&zram->stats.failed_reads);
364
		return ret;
365
	}
366

367
	return 0;
368 369
}

370 371
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
			  u32 index, int offset, struct bio *bio)
372 373
{
	int ret;
374 375
	struct page *page;
	unsigned char *user_mem, *uncmem = NULL;
M
Minchan Kim 已提交
376
	struct zram_meta *meta = zram->meta;
377 378
	page = bvec->bv_page;

M
Minchan Kim 已提交
379
	read_lock(&meta->tb_lock);
M
Minchan Kim 已提交
380 381
	if (unlikely(!meta->table[index].handle) ||
			zram_test_flag(meta, index, ZRAM_ZERO)) {
M
Minchan Kim 已提交
382
		read_unlock(&meta->tb_lock);
383
		handle_zero_page(bvec);
384 385
		return 0;
	}
M
Minchan Kim 已提交
386
	read_unlock(&meta->tb_lock);
387

388 389
	if (is_partial_io(bvec))
		/* Use  a temporary buffer to decompress the page */
390 391 392 393
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);

	user_mem = kmap_atomic(page);
	if (!is_partial_io(bvec))
394 395 396 397 398 399 400
		uncmem = user_mem;

	if (!uncmem) {
		pr_info("Unable to allocate temp memory\n");
		ret = -ENOMEM;
		goto out_cleanup;
	}
401

402
	ret = zram_decompress_page(zram, uncmem, index);
403
	/* Should NEVER happen. Return bio error if it does. */
404
	if (unlikely(ret))
405
		goto out_cleanup;
406

407 408 409 410 411 412 413 414 415 416 417
	if (is_partial_io(bvec))
		memcpy(user_mem + bvec->bv_offset, uncmem + offset,
				bvec->bv_len);

	flush_dcache_page(page);
	ret = 0;
out_cleanup:
	kunmap_atomic(user_mem);
	if (is_partial_io(bvec))
		kfree(uncmem);
	return ret;
418 419 420 421
}

static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
			   int offset)
422
{
423
	int ret = 0;
424
	size_t clen;
425
	unsigned long handle;
426
	struct page *page;
427
	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
M
Minchan Kim 已提交
428
	struct zram_meta *meta = zram->meta;
429
	struct zcomp_strm *zstrm;
430
	bool locked = false;
431

432
	page = bvec->bv_page;
433 434 435 436 437
	if (is_partial_io(bvec)) {
		/*
		 * This is a partial IO. We need to read the full page
		 * before to write the changes.
		 */
438
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
439 440 441 442
		if (!uncmem) {
			ret = -ENOMEM;
			goto out;
		}
443
		ret = zram_decompress_page(zram, uncmem, index);
444
		if (ret)
445 446 447
			goto out;
	}

448
	zstrm = zcomp_strm_find(zram->comp);
449
	locked = true;
450
	user_mem = kmap_atomic(page);
451

452
	if (is_partial_io(bvec)) {
453 454
		memcpy(uncmem + offset, user_mem + bvec->bv_offset,
		       bvec->bv_len);
455 456 457
		kunmap_atomic(user_mem);
		user_mem = NULL;
	} else {
458
		uncmem = user_mem;
459
	}
460 461

	if (page_zero_filled(uncmem)) {
462
		kunmap_atomic(user_mem);
463
		/* Free memory associated with this sector now. */
M
Minchan Kim 已提交
464
		write_lock(&zram->meta->tb_lock);
465
		zram_free_page(zram, index);
M
Minchan Kim 已提交
466 467
		zram_set_flag(meta, index, ZRAM_ZERO);
		write_unlock(&zram->meta->tb_lock);
468

469
		atomic64_inc(&zram->stats.zero_pages);
470 471
		ret = 0;
		goto out;
472
	}
473

474
	ret = zcomp_compress(zram->comp, zstrm, uncmem, &clen);
475 476 477 478 479
	if (!is_partial_io(bvec)) {
		kunmap_atomic(user_mem);
		user_mem = NULL;
		uncmem = NULL;
	}
480

481
	if (unlikely(ret)) {
482
		pr_err("Compression failed! err=%d\n", ret);
483
		goto out;
484
	}
485
	src = zstrm->buffer;
486 487
	if (unlikely(clen > max_zpage_size)) {
		clen = PAGE_SIZE;
488 489
		if (is_partial_io(bvec))
			src = uncmem;
490
	}
491

M
Minchan Kim 已提交
492
	handle = zs_malloc(meta->mem_pool, clen);
493
	if (!handle) {
494 495
		pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
			index, clen);
496 497
		ret = -ENOMEM;
		goto out;
498
	}
M
Minchan Kim 已提交
499
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
500

501
	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
502
		src = kmap_atomic(page);
503
		copy_page(cmem, src);
504
		kunmap_atomic(src);
505 506 507
	} else {
		memcpy(cmem, src, clen);
	}
508

509 510
	zcomp_strm_release(zram->comp, zstrm);
	locked = false;
M
Minchan Kim 已提交
511
	zs_unmap_object(meta->mem_pool, handle);
512

513 514 515 516
	/*
	 * Free memory associated with this sector
	 * before overwriting unused sectors.
	 */
M
Minchan Kim 已提交
517
	write_lock(&zram->meta->tb_lock);
518 519
	zram_free_page(zram, index);

M
Minchan Kim 已提交
520 521
	meta->table[index].handle = handle;
	meta->table[index].size = clen;
M
Minchan Kim 已提交
522
	write_unlock(&zram->meta->tb_lock);
523

524
	/* Update stats */
525 526
	atomic64_add(clen, &zram->stats.compr_data_size);
	atomic64_inc(&zram->stats.pages_stored);
527
out:
528
	if (locked)
529
		zcomp_strm_release(zram->comp, zstrm);
530 531
	if (is_partial_io(bvec))
		kfree(uncmem);
532
	if (ret)
533
		atomic64_inc(&zram->stats.failed_writes);
534
	return ret;
535 536 537
}

static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
538
			int offset, struct bio *bio)
539
{
540
	int ret;
541
	int rw = bio_data_dir(bio);
542

543 544
	if (rw == READ) {
		atomic64_inc(&zram->stats.num_reads);
545
		ret = zram_bvec_read(zram, bvec, index, offset, bio);
546 547
	} else {
		atomic64_inc(&zram->stats.num_writes);
548
		ret = zram_bvec_write(zram, bvec, index, offset);
549
	}
550 551

	return ret;
552 553
}

J
Joonsoo Kim 已提交
554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
/*
 * zram_bio_discard - handler on discard request
 * @index: physical block index in PAGE_SIZE units
 * @offset: byte offset within physical block
 */
static void zram_bio_discard(struct zram *zram, u32 index,
			     int offset, struct bio *bio)
{
	size_t n = bio->bi_iter.bi_size;

	/*
	 * zram manages data in physical block size units. Because logical block
	 * size isn't identical with physical block size on some arch, we
	 * could get a discard request pointing to a specific offset within a
	 * certain physical block.  Although we can handle this request by
	 * reading that physiclal block and decompressing and partially zeroing
	 * and re-compressing and then re-storing it, this isn't reasonable
	 * because our intent with a discard request is to save memory.  So
	 * skipping this logical block is appropriate here.
	 */
	if (offset) {
		if (n < offset)
			return;

		n -= offset;
		index++;
	}

	while (n >= PAGE_SIZE) {
		/*
		 * Discard request can be large so the lock hold times could be
		 * lengthy.  So take the lock once per page.
		 */
		write_lock(&zram->meta->tb_lock);
		zram_free_page(zram, index);
		write_unlock(&zram->meta->tb_lock);
		index++;
		n -= PAGE_SIZE;
	}
}

M
Minchan Kim 已提交
595
static void zram_reset_device(struct zram *zram, bool reset_capacity)
596
{
597 598 599
	size_t index;
	struct zram_meta *meta;

600
	down_write(&zram->init_lock);
601
	if (!init_done(zram)) {
602
		up_write(&zram->init_lock);
603
		return;
604
	}
605 606 607 608 609 610 611 612 613 614 615

	meta = zram->meta;
	/* Free all pages that are still in this zram device */
	for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
		unsigned long handle = meta->table[index].handle;
		if (!handle)
			continue;

		zs_free(meta->mem_pool, handle);
	}

616
	zcomp_destroy(zram->comp);
617 618
	zram->max_comp_streams = 1;

619 620 621 622 623 624
	zram_meta_free(zram->meta);
	zram->meta = NULL;
	/* Reset stats */
	memset(&zram->stats, 0, sizeof(zram->stats));

	zram->disksize = 0;
M
Minchan Kim 已提交
625 626
	if (reset_capacity)
		set_capacity(zram->disk, 0);
627
	up_write(&zram->init_lock);
628 629 630 631 632 633
}

static ssize_t disksize_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	u64 disksize;
634
	struct zcomp *comp;
635 636
	struct zram_meta *meta;
	struct zram *zram = dev_to_zram(dev);
637
	int err;
638 639 640 641 642 643 644

	disksize = memparse(buf, NULL);
	if (!disksize)
		return -EINVAL;

	disksize = PAGE_ALIGN(disksize);
	meta = zram_meta_alloc(disksize);
645 646
	if (!meta)
		return -ENOMEM;
647

648
	comp = zcomp_create(zram->compressor, zram->max_comp_streams);
649
	if (IS_ERR(comp)) {
650 651
		pr_info("Cannot initialise %s compressing backend\n",
				zram->compressor);
652 653
		err = PTR_ERR(comp);
		goto out_free_meta;
654 655
	}

656
	down_write(&zram->init_lock);
657
	if (init_done(zram)) {
658
		pr_info("Cannot change disksize for initialized device\n");
659
		err = -EBUSY;
660
		goto out_destroy_comp;
661 662
	}

663
	zram->meta = meta;
664
	zram->comp = comp;
665 666 667 668
	zram->disksize = disksize;
	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
	up_write(&zram->init_lock);
	return len;
669

670 671 672 673
out_destroy_comp:
	up_write(&zram->init_lock);
	zcomp_destroy(comp);
out_free_meta:
674 675
	zram_meta_free(meta);
	return err;
676 677 678 679 680 681 682 683 684 685 686 687 688
}

static ssize_t reset_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	int ret;
	unsigned short do_reset;
	struct zram *zram;
	struct block_device *bdev;

	zram = dev_to_zram(dev);
	bdev = bdget_disk(zram->disk, 0);

689 690 691
	if (!bdev)
		return -ENOMEM;

692
	/* Do not reset an active device! */
693 694 695 696
	if (bdev->bd_holders) {
		ret = -EBUSY;
		goto out;
	}
697 698 699

	ret = kstrtou16(buf, 10, &do_reset);
	if (ret)
700
		goto out;
701

702 703 704 705
	if (!do_reset) {
		ret = -EINVAL;
		goto out;
	}
706 707

	/* Make sure all pending I/O is finished */
708
	fsync_bdev(bdev);
709
	bdput(bdev);
710

M
Minchan Kim 已提交
711
	zram_reset_device(zram, true);
712
	return len;
713 714 715 716

out:
	bdput(bdev);
	return ret;
717 718
}

719
static void __zram_make_request(struct zram *zram, struct bio *bio)
720
{
721
	int offset;
722
	u32 index;
723 724
	struct bio_vec bvec;
	struct bvec_iter iter;
725

726 727 728
	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
	offset = (bio->bi_iter.bi_sector &
		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
729

J
Joonsoo Kim 已提交
730 731 732 733 734 735
	if (unlikely(bio->bi_rw & REQ_DISCARD)) {
		zram_bio_discard(zram, index, offset, bio);
		bio_endio(bio, 0);
		return;
	}

736
	bio_for_each_segment(bvec, bio, iter) {
737 738
		int max_transfer_size = PAGE_SIZE - offset;

739
		if (bvec.bv_len > max_transfer_size) {
740 741 742 743 744 745
			/*
			 * zram_bvec_rw() can only make operation on a single
			 * zram page. Split the bio vector.
			 */
			struct bio_vec bv;

746
			bv.bv_page = bvec.bv_page;
747
			bv.bv_len = max_transfer_size;
748
			bv.bv_offset = bvec.bv_offset;
749

750
			if (zram_bvec_rw(zram, &bv, index, offset, bio) < 0)
751 752
				goto out;

753
			bv.bv_len = bvec.bv_len - max_transfer_size;
754
			bv.bv_offset += max_transfer_size;
755
			if (zram_bvec_rw(zram, &bv, index + 1, 0, bio) < 0)
756 757
				goto out;
		} else
758
			if (zram_bvec_rw(zram, &bvec, index, offset, bio) < 0)
759 760
				goto out;

761
		update_position(&index, &offset, &bvec);
762
	}
763 764 765

	set_bit(BIO_UPTODATE, &bio->bi_flags);
	bio_endio(bio, 0);
766
	return;
767 768 769 770 771 772

out:
	bio_io_error(bio);
}

/*
773
 * Handler function for all zram I/O requests.
774
 */
775
static void zram_make_request(struct request_queue *queue, struct bio *bio)
776
{
777
	struct zram *zram = queue->queuedata;
778

779
	down_read(&zram->init_lock);
780
	if (unlikely(!init_done(zram)))
781
		goto error;
782

783
	if (!valid_io_request(zram, bio)) {
784
		atomic64_inc(&zram->stats.invalid_io);
785
		goto error;
786 787
	}

788
	__zram_make_request(zram, bio);
789
	up_read(&zram->init_lock);
790

791
	return;
792 793

error:
794
	up_read(&zram->init_lock);
795
	bio_io_error(bio);
796 797
}

N
Nitin Gupta 已提交
798 799
static void zram_slot_free_notify(struct block_device *bdev,
				unsigned long index)
800
{
801
	struct zram *zram;
802
	struct zram_meta *meta;
803

804
	zram = bdev->bd_disk->private_data;
805
	meta = zram->meta;
806

807 808 809 810
	write_lock(&meta->tb_lock);
	zram_free_page(zram, index);
	write_unlock(&meta->tb_lock);
	atomic64_inc(&zram->stats.notify_free);
811 812
}

813 814
static const struct block_device_operations zram_devops = {
	.swap_slot_free_notify = zram_slot_free_notify,
815
	.owner = THIS_MODULE
816 817
};

818 819 820 821 822 823
static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
		disksize_show, disksize_store);
static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);
824 825
static DEVICE_ATTR(max_comp_streams, S_IRUGO | S_IWUSR,
		max_comp_streams_show, max_comp_streams_store);
826 827
static DEVICE_ATTR(comp_algorithm, S_IRUGO | S_IWUSR,
		comp_algorithm_show, comp_algorithm_store);
828

829 830
ZRAM_ATTR_RO(num_reads);
ZRAM_ATTR_RO(num_writes);
831 832
ZRAM_ATTR_RO(failed_reads);
ZRAM_ATTR_RO(failed_writes);
833 834 835 836 837
ZRAM_ATTR_RO(invalid_io);
ZRAM_ATTR_RO(notify_free);
ZRAM_ATTR_RO(zero_pages);
ZRAM_ATTR_RO(compr_data_size);

838 839 840 841 842 843
static struct attribute *zram_disk_attrs[] = {
	&dev_attr_disksize.attr,
	&dev_attr_initstate.attr,
	&dev_attr_reset.attr,
	&dev_attr_num_reads.attr,
	&dev_attr_num_writes.attr,
844 845
	&dev_attr_failed_reads.attr,
	&dev_attr_failed_writes.attr,
846 847 848 849 850 851
	&dev_attr_invalid_io.attr,
	&dev_attr_notify_free.attr,
	&dev_attr_zero_pages.attr,
	&dev_attr_orig_data_size.attr,
	&dev_attr_compr_data_size.attr,
	&dev_attr_mem_used_total.attr,
852
	&dev_attr_max_comp_streams.attr,
853
	&dev_attr_comp_algorithm.attr,
854 855 856 857 858 859 860
	NULL,
};

static struct attribute_group zram_disk_attr_group = {
	.attrs = zram_disk_attrs,
};

861
static int create_device(struct zram *zram, int device_id)
862
{
863
	int ret = -ENOMEM;
864

865
	init_rwsem(&zram->init_lock);
866

867 868
	zram->queue = blk_alloc_queue(GFP_KERNEL);
	if (!zram->queue) {
869 870
		pr_err("Error allocating disk queue for device %d\n",
			device_id);
871
		goto out;
872 873
	}

874 875
	blk_queue_make_request(zram->queue, zram_make_request);
	zram->queue->queuedata = zram;
876 877

	 /* gendisk structure */
878 879
	zram->disk = alloc_disk(1);
	if (!zram->disk) {
880
		pr_warn("Error allocating disk structure for device %d\n",
881
			device_id);
882
		goto out_free_queue;
883 884
	}

885 886 887 888 889 890
	zram->disk->major = zram_major;
	zram->disk->first_minor = device_id;
	zram->disk->fops = &zram_devops;
	zram->disk->queue = zram->queue;
	zram->disk->private_data = zram;
	snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
891

892
	/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
893
	set_capacity(zram->disk, 0);
894 895
	/* zram devices sort of resembles non-rotational disks */
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);
896 897 898 899
	/*
	 * To ensure that we always get PAGE_SIZE aligned
	 * and n*PAGE_SIZED sized I/O requests.
	 */
900
	blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
901 902
	blk_queue_logical_block_size(zram->disk->queue,
					ZRAM_LOGICAL_BLOCK_SIZE);
903 904
	blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
	blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
J
Joonsoo Kim 已提交
905 906 907 908 909 910 911 912 913 914 915 916 917 918 919
	zram->disk->queue->limits.discard_granularity = PAGE_SIZE;
	zram->disk->queue->limits.max_discard_sectors = UINT_MAX;
	/*
	 * zram_bio_discard() will clear all logical blocks if logical block
	 * size is identical with physical block size(PAGE_SIZE). But if it is
	 * different, we will skip discarding some parts of logical blocks in
	 * the part of the request range which isn't aligned to physical block
	 * size.  So we can't ensure that all discarded logical blocks are
	 * zeroed.
	 */
	if (ZRAM_LOGICAL_BLOCK_SIZE == PAGE_SIZE)
		zram->disk->queue->limits.discard_zeroes_data = 1;
	else
		zram->disk->queue->limits.discard_zeroes_data = 0;
	queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, zram->disk->queue);
920

921
	add_disk(zram->disk);
922

923 924 925
	ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
				&zram_disk_attr_group);
	if (ret < 0) {
926
		pr_warn("Error creating sysfs group");
927
		goto out_free_disk;
928
	}
929
	strlcpy(zram->compressor, default_compressor, sizeof(zram->compressor));
930
	zram->meta = NULL;
931
	zram->max_comp_streams = 1;
932
	return 0;
933

934 935 936 937 938
out_free_disk:
	del_gendisk(zram->disk);
	put_disk(zram->disk);
out_free_queue:
	blk_cleanup_queue(zram->queue);
939 940
out:
	return ret;
941 942
}

943
static void destroy_device(struct zram *zram)
944
{
945 946 947
	sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
			&zram_disk_attr_group);

948 949
	del_gendisk(zram->disk);
	put_disk(zram->disk);
950

951
	blk_cleanup_queue(zram->queue);
952 953
}

954
static int __init zram_init(void)
955
{
956
	int ret, dev_id;
957

958
	if (num_devices > max_num_devices) {
959
		pr_warn("Invalid value for num_devices: %u\n",
960
				num_devices);
961 962
		ret = -EINVAL;
		goto out;
963 964
	}

965 966
	zram_major = register_blkdev(0, "zram");
	if (zram_major <= 0) {
967
		pr_warn("Unable to get major number\n");
968 969
		ret = -EBUSY;
		goto out;
970 971 972
	}

	/* Allocate the device array and initialize each one */
973
	zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
974
	if (!zram_devices) {
975 976 977
		ret = -ENOMEM;
		goto unregister;
	}
978

979
	for (dev_id = 0; dev_id < num_devices; dev_id++) {
980
		ret = create_device(&zram_devices[dev_id], dev_id);
981
		if (ret)
982
			goto free_devices;
983 984
	}

985 986
	pr_info("Created %u device(s) ...\n", num_devices);

987
	return 0;
988

989
free_devices:
990
	while (dev_id)
991 992
		destroy_device(&zram_devices[--dev_id]);
	kfree(zram_devices);
993
unregister:
994
	unregister_blkdev(zram_major, "zram");
995
out:
996 997 998
	return ret;
}

999
static void __exit zram_exit(void)
1000 1001
{
	int i;
1002
	struct zram *zram;
1003

1004
	for (i = 0; i < num_devices; i++) {
1005
		zram = &zram_devices[i];
1006

1007
		destroy_device(zram);
M
Minchan Kim 已提交
1008 1009 1010 1011 1012
		/*
		 * Shouldn't access zram->disk after destroy_device
		 * because destroy_device already released zram->disk.
		 */
		zram_reset_device(zram, false);
1013 1014
	}

1015
	unregister_blkdev(zram_major, "zram");
1016

1017
	kfree(zram_devices);
1018 1019 1020
	pr_debug("Cleanup done!\n");
}

1021 1022
module_init(zram_init);
module_exit(zram_exit);
1023

1024 1025 1026
module_param(num_devices, uint, 0);
MODULE_PARM_DESC(num_devices, "Number of zram devices");

1027 1028
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
1029
MODULE_DESCRIPTION("Compressed RAM Block Device");