pmem.c 6.8 KB
Newer Older
1 2 3
/*
 * Persistent Memory Driver
 *
4
 * Copyright (c) 2014-2015, Intel Corporation.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
 * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */

#include <asm/cacheflush.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
26 27
#include <linux/nd.h>
#include "nd.h"
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62

struct pmem_device {
	struct request_queue	*pmem_queue;
	struct gendisk		*pmem_disk;

	/* One contiguous memory region per device */
	phys_addr_t		phys_addr;
	void			*virt_addr;
	size_t			size;
};

static int pmem_major;

static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
			unsigned int len, unsigned int off, int rw,
			sector_t sector)
{
	void *mem = kmap_atomic(page);
	size_t pmem_off = sector << 9;

	if (rw == READ) {
		memcpy(mem + off, pmem->virt_addr + pmem_off, len);
		flush_dcache_page(page);
	} else {
		flush_dcache_page(page);
		memcpy(pmem->virt_addr + pmem_off, mem + off, len);
	}

	kunmap_atomic(mem);
}

static void pmem_make_request(struct request_queue *q, struct bio *bio)
{
	struct bio_vec bvec;
	struct bvec_iter iter;
D
Dan Williams 已提交
63 64
	struct block_device *bdev = bio->bi_bdev;
	struct pmem_device *pmem = bdev->bd_disk->private_data;
65

D
Dan Williams 已提交
66
	bio_for_each_segment(bvec, bio, iter)
67
		pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
D
Dan Williams 已提交
68 69
				bio_data_dir(bio), iter.bi_sector);
	bio_endio(bio, 0);
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
}

static int pmem_rw_page(struct block_device *bdev, sector_t sector,
		       struct page *page, int rw)
{
	struct pmem_device *pmem = bdev->bd_disk->private_data;

	pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
	page_endio(page, rw & WRITE, 0);

	return 0;
}

static long pmem_direct_access(struct block_device *bdev, sector_t sector,
			      void **kaddr, unsigned long *pfn, long size)
{
	struct pmem_device *pmem = bdev->bd_disk->private_data;
	size_t offset = sector << 9;

	if (!pmem)
		return -ENODEV;

	*kaddr = pmem->virt_addr + offset;
	*pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;

	return pmem->size - offset;
}

static const struct block_device_operations pmem_fops = {
	.owner =		THIS_MODULE,
	.rw_page =		pmem_rw_page,
	.direct_access =	pmem_direct_access,
};

104 105
static struct pmem_device *pmem_alloc(struct device *dev,
		struct resource *res, int id)
106 107 108 109 110
{
	struct pmem_device *pmem;

	pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
	if (!pmem)
111
		return ERR_PTR(-ENOMEM);
112 113 114 115

	pmem->phys_addr = res->start;
	pmem->size = resource_size(res);

116
	if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
117 118
		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
				&pmem->phys_addr, pmem->size);
119 120
		kfree(pmem);
		return ERR_PTR(-EBUSY);
121 122 123 124 125 126 127
	}

	/*
	 * Map the memory as non-cachable, as we can't write back the contents
	 * of the CPU caches in case of a crash.
	 */
	pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147
	if (!pmem->virt_addr) {
		release_mem_region(pmem->phys_addr, pmem->size);
		kfree(pmem);
		return ERR_PTR(-ENXIO);
	}

	return pmem;
}

static void pmem_detach_disk(struct pmem_device *pmem)
{
	del_gendisk(pmem->pmem_disk);
	put_disk(pmem->pmem_disk);
	blk_cleanup_queue(pmem->pmem_queue);
}

static int pmem_attach_disk(struct nd_namespace_common *ndns,
		struct pmem_device *pmem)
{
	struct gendisk *disk;
148 149 150

	pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
	if (!pmem->pmem_queue)
151
		return -ENOMEM;
152 153

	blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
154
	blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
155 156
	blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);

157
	disk = alloc_disk(0);
158 159 160 161
	if (!disk) {
		blk_cleanup_queue(pmem->pmem_queue);
		return -ENOMEM;
	}
162 163

	disk->major		= pmem_major;
164
	disk->first_minor	= 0;
165 166 167 168
	disk->fops		= &pmem_fops;
	disk->private_data	= pmem;
	disk->queue		= pmem->pmem_queue;
	disk->flags		= GENHD_FL_EXT_DEVT;
V
Vishal Verma 已提交
169
	nvdimm_namespace_disk_name(ndns, disk->disk_name);
170
	disk->driverfs_dev = &ndns->dev;
171 172 173 174 175
	set_capacity(disk, pmem->size >> 9);
	pmem->pmem_disk = disk;

	add_disk(disk);

176 177
	return 0;
}
178

179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
static int pmem_rw_bytes(struct nd_namespace_common *ndns,
		resource_size_t offset, void *buf, size_t size, int rw)
{
	struct pmem_device *pmem = dev_get_drvdata(ndns->claim);

	if (unlikely(offset + size > pmem->size)) {
		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
		return -EFAULT;
	}

	if (rw == READ)
		memcpy(buf, pmem->virt_addr + offset, size);
	else
		memcpy(pmem->virt_addr + offset, buf, size);

	return 0;
}

197 198 199 200 201 202 203
static void pmem_free(struct pmem_device *pmem)
{
	iounmap(pmem->virt_addr);
	release_mem_region(pmem->phys_addr, pmem->size);
	kfree(pmem);
}

204
static int nd_pmem_probe(struct device *dev)
205
{
206
	struct nd_region *nd_region = to_nd_region(dev->parent);
207 208
	struct nd_namespace_common *ndns;
	struct nd_namespace_io *nsio;
209
	struct pmem_device *pmem;
210
	int rc;
211

212 213 214
	ndns = nvdimm_namespace_common_probe(dev);
	if (IS_ERR(ndns))
		return PTR_ERR(ndns);
215

216
	nsio = to_nd_namespace_io(&ndns->dev);
217
	pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
218 219 220
	if (IS_ERR(pmem))
		return PTR_ERR(pmem);

221
	dev_set_drvdata(dev, pmem);
222 223 224 225 226 227 228 229 230 231 232
	ndns->rw_bytes = pmem_rw_bytes;
	if (is_nd_btt(dev))
		rc = nvdimm_namespace_attach_btt(ndns);
	else if (nd_btt_probe(ndns, pmem) == 0) {
		/* we'll come back as btt-pmem */
		rc = -ENXIO;
	} else
		rc = pmem_attach_disk(ndns, pmem);
	if (rc)
		pmem_free(pmem);
	return rc;
233 234
}

235
static int nd_pmem_remove(struct device *dev)
236
{
237
	struct pmem_device *pmem = dev_get_drvdata(dev);
238

239 240 241 242
	if (is_nd_btt(dev))
		nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
	else
		pmem_detach_disk(pmem);
243
	pmem_free(pmem);
244

245 246 247
	return 0;
}

248 249
MODULE_ALIAS("pmem");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
250
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
251 252 253 254 255
static struct nd_device_driver nd_pmem_driver = {
	.probe = nd_pmem_probe,
	.remove = nd_pmem_remove,
	.drv = {
		.name = "nd_pmem",
256
	},
257
	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
258 259 260 261 262 263 264 265 266 267
};

static int __init pmem_init(void)
{
	int error;

	pmem_major = register_blkdev(0, "pmem");
	if (pmem_major < 0)
		return pmem_major;

268 269
	error = nd_driver_register(&nd_pmem_driver);
	if (error) {
270
		unregister_blkdev(pmem_major, "pmem");
271 272 273 274
		return error;
	}

	return 0;
275 276 277 278 279
}
module_init(pmem_init);

static void pmem_exit(void)
{
280
	driver_unregister(&nd_pmem_driver.drv);
281 282 283 284 285 286
	unregister_blkdev(pmem_major, "pmem");
}
module_exit(pmem_exit);

MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
MODULE_LICENSE("GPL v2");