zram_drv.c 21.4 KB
Newer Older
1
/*
2
 * Compressed RAM block device
3
 *
4
 * Copyright (C) 2008, 2009, 2010  Nitin Gupta
M
Minchan Kim 已提交
5
 *               2012, 2013 Minchan Kim
6 7 8 9 10 11 12 13 14
 *
 * This code is released using a dual license strategy: BSD/GPL
 * You can choose the licence that better fits your requirements.
 *
 * Released under the terms of 3-clause BSD License
 * Released under the terms of GNU General Public License Version 2.0
 *
 */

15
#define KMSG_COMPONENT "zram"
16 17
#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt

18 19 20 21
#ifdef CONFIG_ZRAM_DEBUG
#define DEBUG
#endif

22 23
#include <linux/module.h>
#include <linux/kernel.h>
24
#include <linux/bio.h>
25 26 27 28 29 30
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/buffer_head.h>
#include <linux/device.h>
#include <linux/genhd.h>
#include <linux/highmem.h>
31
#include <linux/slab.h>
32 33 34 35
#include <linux/lzo.h>
#include <linux/string.h>
#include <linux/vmalloc.h>

36
#include "zram_drv.h"
37 38

/* Globals */
39
static int zram_major;
40
static struct zram *zram_devices;
41 42

/* Module params (documentation at end) */
43
static unsigned int num_devices = 1;
44

45 46 47 48 49
static inline int init_done(struct zram *zram)
{
	return zram->meta != NULL;
}

50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67
static inline struct zram *dev_to_zram(struct device *dev)
{
	return (struct zram *)dev_to_disk(dev)->private_data;
}

static ssize_t disksize_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

	return sprintf(buf, "%llu\n", zram->disksize);
}

static ssize_t initstate_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

68
	return sprintf(buf, "%u\n", init_done(zram));
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111
}

static ssize_t num_reads_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

	return sprintf(buf, "%llu\n",
			(u64)atomic64_read(&zram->stats.num_reads));
}

static ssize_t num_writes_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

	return sprintf(buf, "%llu\n",
			(u64)atomic64_read(&zram->stats.num_writes));
}

static ssize_t invalid_io_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

	return sprintf(buf, "%llu\n",
			(u64)atomic64_read(&zram->stats.invalid_io));
}

static ssize_t notify_free_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

	return sprintf(buf, "%llu\n",
			(u64)atomic64_read(&zram->stats.notify_free));
}

static ssize_t zero_pages_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

112
	return sprintf(buf, "%u\n", atomic_read(&zram->stats.pages_zero));
113 114 115 116 117 118 119 120
}

static ssize_t orig_data_size_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

	return sprintf(buf, "%llu\n",
121
		(u64)(atomic_read(&zram->stats.pages_stored)) << PAGE_SHIFT);
122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140
}

static ssize_t compr_data_size_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	struct zram *zram = dev_to_zram(dev);

	return sprintf(buf, "%llu\n",
			(u64)atomic64_read(&zram->stats.compr_size));
}

static ssize_t mem_used_total_show(struct device *dev,
		struct device_attribute *attr, char *buf)
{
	u64 val = 0;
	struct zram *zram = dev_to_zram(dev);
	struct zram_meta *meta = zram->meta;

	down_read(&zram->init_lock);
141
	if (init_done(zram))
142 143 144 145 146 147
		val = zs_get_total_size_bytes(meta->mem_pool);
	up_read(&zram->init_lock);

	return sprintf(buf, "%llu\n", val);
}

M
Minchan Kim 已提交
148
/* flag operations needs meta->tb_lock */
M
Minchan Kim 已提交
149
static int zram_test_flag(struct zram_meta *meta, u32 index,
150
			enum zram_pageflags flag)
151
{
M
Minchan Kim 已提交
152
	return meta->table[index].flags & BIT(flag);
153 154
}

M
Minchan Kim 已提交
155
static void zram_set_flag(struct zram_meta *meta, u32 index,
156
			enum zram_pageflags flag)
157
{
M
Minchan Kim 已提交
158
	meta->table[index].flags |= BIT(flag);
159 160
}

M
Minchan Kim 已提交
161
static void zram_clear_flag(struct zram_meta *meta, u32 index,
162
			enum zram_pageflags flag)
163
{
M
Minchan Kim 已提交
164
	meta->table[index].flags &= ~BIT(flag);
165 166
}

167 168 169 170 171 172 173 174 175 176 177
static inline int is_partial_io(struct bio_vec *bvec)
{
	return bvec->bv_len != PAGE_SIZE;
}

/*
 * Check if request is within bounds and aligned on zram logical blocks.
 */
static inline int valid_io_request(struct zram *zram, struct bio *bio)
{
	u64 start, end, bound;
178

179
	/* unaligned request */
180 181
	if (unlikely(bio->bi_iter.bi_sector &
		     (ZRAM_SECTOR_PER_LOGICAL_BLOCK - 1)))
182
		return 0;
183
	if (unlikely(bio->bi_iter.bi_size & (ZRAM_LOGICAL_BLOCK_SIZE - 1)))
184 185
		return 0;

186 187
	start = bio->bi_iter.bi_sector;
	end = start + (bio->bi_iter.bi_size >> SECTOR_SHIFT);
188 189
	bound = zram->disksize >> SECTOR_SHIFT;
	/* out of range range */
190
	if (unlikely(start >= bound || end > bound || start > end))
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236
		return 0;

	/* I/O request is valid */
	return 1;
}

static void zram_meta_free(struct zram_meta *meta)
{
	zs_destroy_pool(meta->mem_pool);
	kfree(meta->compress_workmem);
	free_pages((unsigned long)meta->compress_buffer, 1);
	vfree(meta->table);
	kfree(meta);
}

static struct zram_meta *zram_meta_alloc(u64 disksize)
{
	size_t num_pages;
	struct zram_meta *meta = kmalloc(sizeof(*meta), GFP_KERNEL);
	if (!meta)
		goto out;

	meta->compress_workmem = kzalloc(LZO1X_MEM_COMPRESS, GFP_KERNEL);
	if (!meta->compress_workmem)
		goto free_meta;

	meta->compress_buffer =
		(void *)__get_free_pages(GFP_KERNEL | __GFP_ZERO, 1);
	if (!meta->compress_buffer) {
		pr_err("Error allocating compressor buffer space\n");
		goto free_workmem;
	}

	num_pages = disksize >> PAGE_SHIFT;
	meta->table = vzalloc(num_pages * sizeof(*meta->table));
	if (!meta->table) {
		pr_err("Error allocating zram address table\n");
		goto free_buffer;
	}

	meta->mem_pool = zs_create_pool(GFP_NOIO | __GFP_HIGHMEM);
	if (!meta->mem_pool) {
		pr_err("Error creating memory pool\n");
		goto free_table;
	}

M
Minchan Kim 已提交
237
	rwlock_init(&meta->tb_lock);
238
	mutex_init(&meta->buffer_lock);
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
	return meta;

free_table:
	vfree(meta->table);
free_buffer:
	free_pages((unsigned long)meta->compress_buffer, 1);
free_workmem:
	kfree(meta->compress_workmem);
free_meta:
	kfree(meta);
	meta = NULL;
out:
	return meta;
}

static void update_position(u32 *index, int *offset, struct bio_vec *bvec)
{
	if (*offset + bvec->bv_len >= PAGE_SIZE)
		(*index)++;
	*offset = (*offset + bvec->bv_len) % PAGE_SIZE;
}

261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
static int page_zero_filled(void *ptr)
{
	unsigned int pos;
	unsigned long *page;

	page = (unsigned long *)ptr;

	for (pos = 0; pos != PAGE_SIZE / sizeof(*page); pos++) {
		if (page[pos])
			return 0;
	}

	return 1;
}

276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
static void handle_zero_page(struct bio_vec *bvec)
{
	struct page *page = bvec->bv_page;
	void *user_mem;

	user_mem = kmap_atomic(page);
	if (is_partial_io(bvec))
		memset(user_mem + bvec->bv_offset, 0, bvec->bv_len);
	else
		clear_page(user_mem);
	kunmap_atomic(user_mem);

	flush_dcache_page(page);
}

M
Minchan Kim 已提交
291
/* NOTE: caller should hold meta->tb_lock with write-side */
292
static void zram_free_page(struct zram *zram, size_t index)
293
{
M
Minchan Kim 已提交
294 295
	struct zram_meta *meta = zram->meta;
	unsigned long handle = meta->table[index].handle;
296

297
	if (unlikely(!handle)) {
298 299 300 301
		/*
		 * No memory is allocated for zero filled pages.
		 * Simply clear zero page flag.
		 */
M
Minchan Kim 已提交
302 303
		if (zram_test_flag(meta, index, ZRAM_ZERO)) {
			zram_clear_flag(meta, index, ZRAM_ZERO);
304
			atomic_dec(&zram->stats.pages_zero);
305 306 307 308
		}
		return;
	}

M
Minchan Kim 已提交
309
	zs_free(meta->mem_pool, handle);
310

311
	atomic64_sub(meta->table[index].size, &zram->stats.compr_size);
312
	atomic_dec(&zram->stats.pages_stored);
313

M
Minchan Kim 已提交
314 315
	meta->table[index].handle = 0;
	meta->table[index].size = 0;
316 317
}

318
static int zram_decompress_page(struct zram *zram, char *mem, u32 index)
319
{
320 321 322
	int ret = LZO_E_OK;
	size_t clen = PAGE_SIZE;
	unsigned char *cmem;
M
Minchan Kim 已提交
323
	struct zram_meta *meta = zram->meta;
M
Minchan Kim 已提交
324 325 326 327 328 329
	unsigned long handle;
	u16 size;

	read_lock(&meta->tb_lock);
	handle = meta->table[index].handle;
	size = meta->table[index].size;
330

M
Minchan Kim 已提交
331
	if (!handle || zram_test_flag(meta, index, ZRAM_ZERO)) {
M
Minchan Kim 已提交
332
		read_unlock(&meta->tb_lock);
333
		clear_page(mem);
334 335
		return 0;
	}
336

M
Minchan Kim 已提交
337
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_RO);
M
Minchan Kim 已提交
338
	if (size == PAGE_SIZE)
339
		copy_page(mem, cmem);
340
	else
M
Minchan Kim 已提交
341
		ret = lzo1x_decompress_safe(cmem, size,	mem, &clen);
M
Minchan Kim 已提交
342
	zs_unmap_object(meta->mem_pool, handle);
M
Minchan Kim 已提交
343
	read_unlock(&meta->tb_lock);
344

345 346 347
	/* Should NEVER happen. Return bio error if it does. */
	if (unlikely(ret != LZO_E_OK)) {
		pr_err("Decompression failed! err=%d, page=%u\n", ret, index);
348
		atomic64_inc(&zram->stats.failed_reads);
349
		return ret;
350
	}
351

352
	return 0;
353 354
}

355 356
static int zram_bvec_read(struct zram *zram, struct bio_vec *bvec,
			  u32 index, int offset, struct bio *bio)
357 358
{
	int ret;
359 360
	struct page *page;
	unsigned char *user_mem, *uncmem = NULL;
M
Minchan Kim 已提交
361
	struct zram_meta *meta = zram->meta;
362 363
	page = bvec->bv_page;

M
Minchan Kim 已提交
364
	read_lock(&meta->tb_lock);
M
Minchan Kim 已提交
365 366
	if (unlikely(!meta->table[index].handle) ||
			zram_test_flag(meta, index, ZRAM_ZERO)) {
M
Minchan Kim 已提交
367
		read_unlock(&meta->tb_lock);
368
		handle_zero_page(bvec);
369 370
		return 0;
	}
M
Minchan Kim 已提交
371
	read_unlock(&meta->tb_lock);
372

373 374
	if (is_partial_io(bvec))
		/* Use  a temporary buffer to decompress the page */
375 376 377 378
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);

	user_mem = kmap_atomic(page);
	if (!is_partial_io(bvec))
379 380 381 382 383 384 385
		uncmem = user_mem;

	if (!uncmem) {
		pr_info("Unable to allocate temp memory\n");
		ret = -ENOMEM;
		goto out_cleanup;
	}
386

387
	ret = zram_decompress_page(zram, uncmem, index);
388
	/* Should NEVER happen. Return bio error if it does. */
389
	if (unlikely(ret != LZO_E_OK))
390
		goto out_cleanup;
391

392 393 394 395 396 397 398 399 400 401 402
	if (is_partial_io(bvec))
		memcpy(user_mem + bvec->bv_offset, uncmem + offset,
				bvec->bv_len);

	flush_dcache_page(page);
	ret = 0;
out_cleanup:
	kunmap_atomic(user_mem);
	if (is_partial_io(bvec))
		kfree(uncmem);
	return ret;
403 404 405 406
}

static int zram_bvec_write(struct zram *zram, struct bio_vec *bvec, u32 index,
			   int offset)
407
{
408
	int ret = 0;
409
	size_t clen;
410
	unsigned long handle;
411
	struct page *page;
412
	unsigned char *user_mem, *cmem, *src, *uncmem = NULL;
M
Minchan Kim 已提交
413
	struct zram_meta *meta = zram->meta;
414
	bool locked = false;
415

416
	page = bvec->bv_page;
M
Minchan Kim 已提交
417
	src = meta->compress_buffer;
418

419 420 421 422 423
	if (is_partial_io(bvec)) {
		/*
		 * This is a partial IO. We need to read the full page
		 * before to write the changes.
		 */
424
		uncmem = kmalloc(PAGE_SIZE, GFP_NOIO);
425 426 427 428
		if (!uncmem) {
			ret = -ENOMEM;
			goto out;
		}
429
		ret = zram_decompress_page(zram, uncmem, index);
430
		if (ret)
431 432 433
			goto out;
	}

434 435
	mutex_lock(&meta->buffer_lock);
	locked = true;
436
	user_mem = kmap_atomic(page);
437

438
	if (is_partial_io(bvec)) {
439 440
		memcpy(uncmem + offset, user_mem + bvec->bv_offset,
		       bvec->bv_len);
441 442 443
		kunmap_atomic(user_mem);
		user_mem = NULL;
	} else {
444
		uncmem = user_mem;
445
	}
446 447

	if (page_zero_filled(uncmem)) {
448
		kunmap_atomic(user_mem);
449
		/* Free memory associated with this sector now. */
M
Minchan Kim 已提交
450
		write_lock(&zram->meta->tb_lock);
451
		zram_free_page(zram, index);
M
Minchan Kim 已提交
452 453
		zram_set_flag(meta, index, ZRAM_ZERO);
		write_unlock(&zram->meta->tb_lock);
454

455
		atomic_inc(&zram->stats.pages_zero);
456 457
		ret = 0;
		goto out;
458
	}
459

460
	ret = lzo1x_1_compress(uncmem, PAGE_SIZE, src, &clen,
M
Minchan Kim 已提交
461
			       meta->compress_workmem);
462 463 464 465 466
	if (!is_partial_io(bvec)) {
		kunmap_atomic(user_mem);
		user_mem = NULL;
		uncmem = NULL;
	}
467

468 469
	if (unlikely(ret != LZO_E_OK)) {
		pr_err("Compression failed! err=%d\n", ret);
470
		goto out;
471
	}
472

473 474
	if (unlikely(clen > max_zpage_size)) {
		clen = PAGE_SIZE;
475 476 477
		src = NULL;
		if (is_partial_io(bvec))
			src = uncmem;
478
	}
479

M
Minchan Kim 已提交
480
	handle = zs_malloc(meta->mem_pool, clen);
481
	if (!handle) {
482 483
		pr_info("Error allocating memory for compressed page: %u, size=%zu\n",
			index, clen);
484 485
		ret = -ENOMEM;
		goto out;
486
	}
M
Minchan Kim 已提交
487
	cmem = zs_map_object(meta->mem_pool, handle, ZS_MM_WO);
488

489
	if ((clen == PAGE_SIZE) && !is_partial_io(bvec)) {
490
		src = kmap_atomic(page);
491
		copy_page(cmem, src);
492
		kunmap_atomic(src);
493 494 495
	} else {
		memcpy(cmem, src, clen);
	}
496

M
Minchan Kim 已提交
497
	zs_unmap_object(meta->mem_pool, handle);
498

499 500 501 502
	/*
	 * Free memory associated with this sector
	 * before overwriting unused sectors.
	 */
M
Minchan Kim 已提交
503
	write_lock(&zram->meta->tb_lock);
504 505
	zram_free_page(zram, index);

M
Minchan Kim 已提交
506 507
	meta->table[index].handle = handle;
	meta->table[index].size = clen;
M
Minchan Kim 已提交
508
	write_unlock(&zram->meta->tb_lock);
509

510
	/* Update stats */
511
	atomic64_add(clen, &zram->stats.compr_size);
512
	atomic_inc(&zram->stats.pages_stored);
513
out:
514 515
	if (locked)
		mutex_unlock(&meta->buffer_lock);
516 517 518
	if (is_partial_io(bvec))
		kfree(uncmem);

519
	if (ret)
520
		atomic64_inc(&zram->stats.failed_writes);
521
	return ret;
522 523 524
}

static int zram_bvec_rw(struct zram *zram, struct bio_vec *bvec, u32 index,
525
			int offset, struct bio *bio)
526
{
527
	int ret;
528
	int rw = bio_data_dir(bio);
529

530 531
	if (rw == READ) {
		atomic64_inc(&zram->stats.num_reads);
532
		ret = zram_bvec_read(zram, bvec, index, offset, bio);
533 534
	} else {
		atomic64_inc(&zram->stats.num_writes);
535
		ret = zram_bvec_write(zram, bvec, index, offset);
536
	}
537 538

	return ret;
539 540
}

M
Minchan Kim 已提交
541
static void zram_reset_device(struct zram *zram, bool reset_capacity)
542
{
543 544 545
	size_t index;
	struct zram_meta *meta;

546
	down_write(&zram->init_lock);
547
	if (!init_done(zram)) {
548
		up_write(&zram->init_lock);
549
		return;
550
	}
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567

	meta = zram->meta;
	/* Free all pages that are still in this zram device */
	for (index = 0; index < zram->disksize >> PAGE_SHIFT; index++) {
		unsigned long handle = meta->table[index].handle;
		if (!handle)
			continue;

		zs_free(meta->mem_pool, handle);
	}

	zram_meta_free(zram->meta);
	zram->meta = NULL;
	/* Reset stats */
	memset(&zram->stats, 0, sizeof(zram->stats));

	zram->disksize = 0;
M
Minchan Kim 已提交
568 569
	if (reset_capacity)
		set_capacity(zram->disk, 0);
570
	up_write(&zram->init_lock);
571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608
}

static void zram_init_device(struct zram *zram, struct zram_meta *meta)
{
	if (zram->disksize > 2 * (totalram_pages << PAGE_SHIFT)) {
		pr_info(
		"There is little point creating a zram of greater than "
		"twice the size of memory since we expect a 2:1 compression "
		"ratio. Note that zram uses about 0.1%% of the size of "
		"the disk when not in use so a huge zram is "
		"wasteful.\n"
		"\tMemory Size: %lu kB\n"
		"\tSize you selected: %llu kB\n"
		"Continuing anyway ...\n",
		(totalram_pages << PAGE_SHIFT) >> 10, zram->disksize >> 10
		);
	}

	/* zram devices sort of resembles non-rotational disks */
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, zram->disk->queue);

	zram->meta = meta;
	pr_debug("Initialization done!\n");
}

static ssize_t disksize_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	u64 disksize;
	struct zram_meta *meta;
	struct zram *zram = dev_to_zram(dev);

	disksize = memparse(buf, NULL);
	if (!disksize)
		return -EINVAL;

	disksize = PAGE_ALIGN(disksize);
	meta = zram_meta_alloc(disksize);
609 610
	if (!meta)
		return -ENOMEM;
611
	down_write(&zram->init_lock);
612
	if (init_done(zram)) {
613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637
		up_write(&zram->init_lock);
		zram_meta_free(meta);
		pr_info("Cannot change disksize for initialized device\n");
		return -EBUSY;
	}

	zram->disksize = disksize;
	set_capacity(zram->disk, zram->disksize >> SECTOR_SHIFT);
	zram_init_device(zram, meta);
	up_write(&zram->init_lock);

	return len;
}

static ssize_t reset_store(struct device *dev,
		struct device_attribute *attr, const char *buf, size_t len)
{
	int ret;
	unsigned short do_reset;
	struct zram *zram;
	struct block_device *bdev;

	zram = dev_to_zram(dev);
	bdev = bdget_disk(zram->disk, 0);

638 639 640
	if (!bdev)
		return -ENOMEM;

641
	/* Do not reset an active device! */
642 643 644 645
	if (bdev->bd_holders) {
		ret = -EBUSY;
		goto out;
	}
646 647 648

	ret = kstrtou16(buf, 10, &do_reset);
	if (ret)
649
		goto out;
650

651 652 653 654
	if (!do_reset) {
		ret = -EINVAL;
		goto out;
	}
655 656

	/* Make sure all pending I/O is finished */
657
	fsync_bdev(bdev);
658
	bdput(bdev);
659

M
Minchan Kim 已提交
660
	zram_reset_device(zram, true);
661
	return len;
662 663 664 665

out:
	bdput(bdev);
	return ret;
666 667
}

668
static void __zram_make_request(struct zram *zram, struct bio *bio)
669
{
670
	int offset;
671
	u32 index;
672 673
	struct bio_vec bvec;
	struct bvec_iter iter;
674

675 676 677
	index = bio->bi_iter.bi_sector >> SECTORS_PER_PAGE_SHIFT;
	offset = (bio->bi_iter.bi_sector &
		  (SECTORS_PER_PAGE - 1)) << SECTOR_SHIFT;
678

679
	bio_for_each_segment(bvec, bio, iter) {
680 681
		int max_transfer_size = PAGE_SIZE - offset;

682
		if (bvec.bv_len > max_transfer_size) {
683 684 685 686 687 688
			/*
			 * zram_bvec_rw() can only make operation on a single
			 * zram page. Split the bio vector.
			 */
			struct bio_vec bv;

689
			bv.bv_page = bvec.bv_page;
690
			bv.bv_len = max_transfer_size;
691
			bv.bv_offset = bvec.bv_offset;
692

693
			if (zram_bvec_rw(zram, &bv, index, offset, bio) < 0)
694 695
				goto out;

696
			bv.bv_len = bvec.bv_len - max_transfer_size;
697
			bv.bv_offset += max_transfer_size;
698
			if (zram_bvec_rw(zram, &bv, index + 1, 0, bio) < 0)
699 700
				goto out;
		} else
701
			if (zram_bvec_rw(zram, &bvec, index, offset, bio) < 0)
702 703
				goto out;

704
		update_position(&index, &offset, &bvec);
705
	}
706 707 708

	set_bit(BIO_UPTODATE, &bio->bi_flags);
	bio_endio(bio, 0);
709
	return;
710 711 712 713 714 715

out:
	bio_io_error(bio);
}

/*
716
 * Handler function for all zram I/O requests.
717
 */
718
static void zram_make_request(struct request_queue *queue, struct bio *bio)
719
{
720
	struct zram *zram = queue->queuedata;
721

722
	down_read(&zram->init_lock);
723
	if (unlikely(!init_done(zram)))
724
		goto error;
725

726
	if (!valid_io_request(zram, bio)) {
727
		atomic64_inc(&zram->stats.invalid_io);
728
		goto error;
729 730
	}

731
	__zram_make_request(zram, bio);
732
	up_read(&zram->init_lock);
733

734
	return;
735 736

error:
737
	up_read(&zram->init_lock);
738
	bio_io_error(bio);
739 740
}

N
Nitin Gupta 已提交
741 742
static void zram_slot_free_notify(struct block_device *bdev,
				unsigned long index)
743
{
744
	struct zram *zram;
745
	struct zram_meta *meta;
746

747
	zram = bdev->bd_disk->private_data;
748
	meta = zram->meta;
749

750 751 752 753
	write_lock(&meta->tb_lock);
	zram_free_page(zram, index);
	write_unlock(&meta->tb_lock);
	atomic64_inc(&zram->stats.notify_free);
754 755
}

756 757
static const struct block_device_operations zram_devops = {
	.swap_slot_free_notify = zram_slot_free_notify,
758
	.owner = THIS_MODULE
759 760
};

761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792
static DEVICE_ATTR(disksize, S_IRUGO | S_IWUSR,
		disksize_show, disksize_store);
static DEVICE_ATTR(initstate, S_IRUGO, initstate_show, NULL);
static DEVICE_ATTR(reset, S_IWUSR, NULL, reset_store);
static DEVICE_ATTR(num_reads, S_IRUGO, num_reads_show, NULL);
static DEVICE_ATTR(num_writes, S_IRUGO, num_writes_show, NULL);
static DEVICE_ATTR(invalid_io, S_IRUGO, invalid_io_show, NULL);
static DEVICE_ATTR(notify_free, S_IRUGO, notify_free_show, NULL);
static DEVICE_ATTR(zero_pages, S_IRUGO, zero_pages_show, NULL);
static DEVICE_ATTR(orig_data_size, S_IRUGO, orig_data_size_show, NULL);
static DEVICE_ATTR(compr_data_size, S_IRUGO, compr_data_size_show, NULL);
static DEVICE_ATTR(mem_used_total, S_IRUGO, mem_used_total_show, NULL);

static struct attribute *zram_disk_attrs[] = {
	&dev_attr_disksize.attr,
	&dev_attr_initstate.attr,
	&dev_attr_reset.attr,
	&dev_attr_num_reads.attr,
	&dev_attr_num_writes.attr,
	&dev_attr_invalid_io.attr,
	&dev_attr_notify_free.attr,
	&dev_attr_zero_pages.attr,
	&dev_attr_orig_data_size.attr,
	&dev_attr_compr_data_size.attr,
	&dev_attr_mem_used_total.attr,
	NULL,
};

static struct attribute_group zram_disk_attr_group = {
	.attrs = zram_disk_attrs,
};

793
static int create_device(struct zram *zram, int device_id)
794
{
795
	int ret = -ENOMEM;
796

797
	init_rwsem(&zram->init_lock);
798

799 800
	zram->queue = blk_alloc_queue(GFP_KERNEL);
	if (!zram->queue) {
801 802
		pr_err("Error allocating disk queue for device %d\n",
			device_id);
803
		goto out;
804 805
	}

806 807
	blk_queue_make_request(zram->queue, zram_make_request);
	zram->queue->queuedata = zram;
808 809

	 /* gendisk structure */
810 811
	zram->disk = alloc_disk(1);
	if (!zram->disk) {
812
		pr_warn("Error allocating disk structure for device %d\n",
813
			device_id);
814
		goto out_free_queue;
815 816
	}

817 818 819 820 821 822
	zram->disk->major = zram_major;
	zram->disk->first_minor = device_id;
	zram->disk->fops = &zram_devops;
	zram->disk->queue = zram->queue;
	zram->disk->private_data = zram;
	snprintf(zram->disk->disk_name, 16, "zram%d", device_id);
823

824
	/* Actual capacity set using syfs (/sys/block/zram<id>/disksize */
825
	set_capacity(zram->disk, 0);
826

827 828 829 830
	/*
	 * To ensure that we always get PAGE_SIZE aligned
	 * and n*PAGE_SIZED sized I/O requests.
	 */
831
	blk_queue_physical_block_size(zram->disk->queue, PAGE_SIZE);
832 833
	blk_queue_logical_block_size(zram->disk->queue,
					ZRAM_LOGICAL_BLOCK_SIZE);
834 835
	blk_queue_io_min(zram->disk->queue, PAGE_SIZE);
	blk_queue_io_opt(zram->disk->queue, PAGE_SIZE);
836

837
	add_disk(zram->disk);
838

839 840 841
	ret = sysfs_create_group(&disk_to_dev(zram->disk)->kobj,
				&zram_disk_attr_group);
	if (ret < 0) {
842
		pr_warn("Error creating sysfs group");
843
		goto out_free_disk;
844 845
	}

846
	zram->meta = NULL;
847
	return 0;
848

849 850 851 852 853
out_free_disk:
	del_gendisk(zram->disk);
	put_disk(zram->disk);
out_free_queue:
	blk_cleanup_queue(zram->queue);
854 855
out:
	return ret;
856 857
}

858
static void destroy_device(struct zram *zram)
859
{
860 861 862
	sysfs_remove_group(&disk_to_dev(zram->disk)->kobj,
			&zram_disk_attr_group);

863 864
	del_gendisk(zram->disk);
	put_disk(zram->disk);
865

866
	blk_cleanup_queue(zram->queue);
867 868
}

869
static int __init zram_init(void)
870
{
871
	int ret, dev_id;
872

873
	if (num_devices > max_num_devices) {
874
		pr_warn("Invalid value for num_devices: %u\n",
875
				num_devices);
876 877
		ret = -EINVAL;
		goto out;
878 879
	}

880 881
	zram_major = register_blkdev(0, "zram");
	if (zram_major <= 0) {
882
		pr_warn("Unable to get major number\n");
883 884
		ret = -EBUSY;
		goto out;
885 886 887
	}

	/* Allocate the device array and initialize each one */
888
	zram_devices = kzalloc(num_devices * sizeof(struct zram), GFP_KERNEL);
889
	if (!zram_devices) {
890 891 892
		ret = -ENOMEM;
		goto unregister;
	}
893

894
	for (dev_id = 0; dev_id < num_devices; dev_id++) {
895
		ret = create_device(&zram_devices[dev_id], dev_id);
896
		if (ret)
897
			goto free_devices;
898 899
	}

900 901
	pr_info("Created %u device(s) ...\n", num_devices);

902
	return 0;
903

904
free_devices:
905
	while (dev_id)
906 907
		destroy_device(&zram_devices[--dev_id]);
	kfree(zram_devices);
908
unregister:
909
	unregister_blkdev(zram_major, "zram");
910
out:
911 912 913
	return ret;
}

914
static void __exit zram_exit(void)
915 916
{
	int i;
917
	struct zram *zram;
918

919
	for (i = 0; i < num_devices; i++) {
920
		zram = &zram_devices[i];
921

922
		destroy_device(zram);
M
Minchan Kim 已提交
923 924 925 926 927
		/*
		 * Shouldn't access zram->disk after destroy_device
		 * because destroy_device already released zram->disk.
		 */
		zram_reset_device(zram, false);
928 929
	}

930
	unregister_blkdev(zram_major, "zram");
931

932
	kfree(zram_devices);
933 934 935
	pr_debug("Cleanup done!\n");
}

936 937
module_init(zram_init);
module_exit(zram_exit);
938

939 940 941
module_param(num_devices, uint, 0);
MODULE_PARM_DESC(num_devices, "Number of zram devices");

942 943
MODULE_LICENSE("Dual BSD/GPL");
MODULE_AUTHOR("Nitin Gupta <ngupta@vflare.org>");
944
MODULE_DESCRIPTION("Compressed RAM Block Device");