pmem.c 7.3 KB
Newer Older
1 2 3
/*
 * Persistent Memory Driver
 *
4
 * Copyright (c) 2014-2015, Intel Corporation.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
 * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */

#include <asm/cacheflush.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/slab.h>
26 27
#include <linux/nd.h>
#include "nd.h"
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119

struct pmem_device {
	struct request_queue	*pmem_queue;
	struct gendisk		*pmem_disk;

	/* One contiguous memory region per device */
	phys_addr_t		phys_addr;
	void			*virt_addr;
	size_t			size;
};

static int pmem_major;

static void pmem_do_bvec(struct pmem_device *pmem, struct page *page,
			unsigned int len, unsigned int off, int rw,
			sector_t sector)
{
	void *mem = kmap_atomic(page);
	size_t pmem_off = sector << 9;

	if (rw == READ) {
		memcpy(mem + off, pmem->virt_addr + pmem_off, len);
		flush_dcache_page(page);
	} else {
		flush_dcache_page(page);
		memcpy(pmem->virt_addr + pmem_off, mem + off, len);
	}

	kunmap_atomic(mem);
}

static void pmem_make_request(struct request_queue *q, struct bio *bio)
{
	struct block_device *bdev = bio->bi_bdev;
	struct pmem_device *pmem = bdev->bd_disk->private_data;
	int rw;
	struct bio_vec bvec;
	sector_t sector;
	struct bvec_iter iter;
	int err = 0;

	if (bio_end_sector(bio) > get_capacity(bdev->bd_disk)) {
		err = -EIO;
		goto out;
	}

	BUG_ON(bio->bi_rw & REQ_DISCARD);

	rw = bio_data_dir(bio);
	sector = bio->bi_iter.bi_sector;
	bio_for_each_segment(bvec, bio, iter) {
		pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len, bvec.bv_offset,
			     rw, sector);
		sector += bvec.bv_len >> 9;
	}

out:
	bio_endio(bio, err);
}

static int pmem_rw_page(struct block_device *bdev, sector_t sector,
		       struct page *page, int rw)
{
	struct pmem_device *pmem = bdev->bd_disk->private_data;

	pmem_do_bvec(pmem, page, PAGE_CACHE_SIZE, 0, rw, sector);
	page_endio(page, rw & WRITE, 0);

	return 0;
}

static long pmem_direct_access(struct block_device *bdev, sector_t sector,
			      void **kaddr, unsigned long *pfn, long size)
{
	struct pmem_device *pmem = bdev->bd_disk->private_data;
	size_t offset = sector << 9;

	if (!pmem)
		return -ENODEV;

	*kaddr = pmem->virt_addr + offset;
	*pfn = (pmem->phys_addr + offset) >> PAGE_SHIFT;

	return pmem->size - offset;
}

static const struct block_device_operations pmem_fops = {
	.owner =		THIS_MODULE,
	.rw_page =		pmem_rw_page,
	.direct_access =	pmem_direct_access,
};

120 121
static struct pmem_device *pmem_alloc(struct device *dev,
		struct resource *res, int id)
122 123 124 125 126
{
	struct pmem_device *pmem;

	pmem = kzalloc(sizeof(*pmem), GFP_KERNEL);
	if (!pmem)
127
		return ERR_PTR(-ENOMEM);
128 129 130 131

	pmem->phys_addr = res->start;
	pmem->size = resource_size(res);

132
	if (!request_mem_region(pmem->phys_addr, pmem->size, dev_name(dev))) {
133 134
		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
				&pmem->phys_addr, pmem->size);
135 136
		kfree(pmem);
		return ERR_PTR(-EBUSY);
137 138 139 140 141 142 143
	}

	/*
	 * Map the memory as non-cachable, as we can't write back the contents
	 * of the CPU caches in case of a crash.
	 */
	pmem->virt_addr = ioremap_nocache(pmem->phys_addr, pmem->size);
144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
	if (!pmem->virt_addr) {
		release_mem_region(pmem->phys_addr, pmem->size);
		kfree(pmem);
		return ERR_PTR(-ENXIO);
	}

	return pmem;
}

static void pmem_detach_disk(struct pmem_device *pmem)
{
	del_gendisk(pmem->pmem_disk);
	put_disk(pmem->pmem_disk);
	blk_cleanup_queue(pmem->pmem_queue);
}

static int pmem_attach_disk(struct nd_namespace_common *ndns,
		struct pmem_device *pmem)
{
	struct nd_region *nd_region = to_nd_region(ndns->dev.parent);
	struct gendisk *disk;
165 166 167

	pmem->pmem_queue = blk_alloc_queue(GFP_KERNEL);
	if (!pmem->pmem_queue)
168
		return -ENOMEM;
169 170 171 172 173

	blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
	blk_queue_max_hw_sectors(pmem->pmem_queue, 1024);
	blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);

174
	disk = alloc_disk(0);
175 176 177 178
	if (!disk) {
		blk_cleanup_queue(pmem->pmem_queue);
		return -ENOMEM;
	}
179 180

	disk->major		= pmem_major;
181
	disk->first_minor	= 0;
182 183 184 185
	disk->fops		= &pmem_fops;
	disk->private_data	= pmem;
	disk->queue		= pmem->pmem_queue;
	disk->flags		= GENHD_FL_EXT_DEVT;
186 187
	sprintf(disk->disk_name, "pmem%d", nd_region->id);
	disk->driverfs_dev = &ndns->dev;
188 189 190 191 192
	set_capacity(disk, pmem->size >> 9);
	pmem->pmem_disk = disk;

	add_disk(disk);

193 194
	return 0;
}
195

196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
static int pmem_rw_bytes(struct nd_namespace_common *ndns,
		resource_size_t offset, void *buf, size_t size, int rw)
{
	struct pmem_device *pmem = dev_get_drvdata(ndns->claim);

	if (unlikely(offset + size > pmem->size)) {
		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
		return -EFAULT;
	}

	if (rw == READ)
		memcpy(buf, pmem->virt_addr + offset, size);
	else
		memcpy(pmem->virt_addr + offset, buf, size);

	return 0;
}

static int nvdimm_namespace_attach_btt(struct nd_namespace_common *ndns)
{
	/* TODO */
	return -ENXIO;
}

static void nvdimm_namespace_detach_btt(struct nd_namespace_common *ndns)
{
	/* TODO */
223 224 225 226 227 228 229 230 231
}

static void pmem_free(struct pmem_device *pmem)
{
	iounmap(pmem->virt_addr);
	release_mem_region(pmem->phys_addr, pmem->size);
	kfree(pmem);
}

232
static int nd_pmem_probe(struct device *dev)
233
{
234
	struct nd_region *nd_region = to_nd_region(dev->parent);
235 236
	struct nd_namespace_common *ndns;
	struct nd_namespace_io *nsio;
237
	struct pmem_device *pmem;
238
	int rc;
239

240 241 242
	ndns = nvdimm_namespace_common_probe(dev);
	if (IS_ERR(ndns))
		return PTR_ERR(ndns);
243

244
	nsio = to_nd_namespace_io(&ndns->dev);
245
	pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
246 247 248
	if (IS_ERR(pmem))
		return PTR_ERR(pmem);

249
	dev_set_drvdata(dev, pmem);
250 251 252 253 254 255 256 257 258 259 260
	ndns->rw_bytes = pmem_rw_bytes;
	if (is_nd_btt(dev))
		rc = nvdimm_namespace_attach_btt(ndns);
	else if (nd_btt_probe(ndns, pmem) == 0) {
		/* we'll come back as btt-pmem */
		rc = -ENXIO;
	} else
		rc = pmem_attach_disk(ndns, pmem);
	if (rc)
		pmem_free(pmem);
	return rc;
261 262
}

263
static int nd_pmem_remove(struct device *dev)
264
{
265
	struct pmem_device *pmem = dev_get_drvdata(dev);
266

267 268 269 270
	if (is_nd_btt(dev))
		nvdimm_namespace_detach_btt(to_nd_btt(dev)->ndns);
	else
		pmem_detach_disk(pmem);
271
	pmem_free(pmem);
272

273 274 275
	return 0;
}

276 277
MODULE_ALIAS("pmem");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
278
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
279 280 281 282 283
static struct nd_device_driver nd_pmem_driver = {
	.probe = nd_pmem_probe,
	.remove = nd_pmem_remove,
	.drv = {
		.name = "nd_pmem",
284
	},
285
	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
286 287 288 289 290 291 292 293 294 295
};

static int __init pmem_init(void)
{
	int error;

	pmem_major = register_blkdev(0, "pmem");
	if (pmem_major < 0)
		return pmem_major;

296 297
	error = nd_driver_register(&nd_pmem_driver);
	if (error) {
298
		unregister_blkdev(pmem_major, "pmem");
299 300 301 302
		return error;
	}

	return 0;
303 304 305 306 307
}
module_init(pmem_init);

static void pmem_exit(void)
{
308
	driver_unregister(&nd_pmem_driver.drv);
309 310 311 312 313 314
	unregister_blkdev(pmem_major, "pmem");
}
module_exit(pmem_exit);

MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
MODULE_LICENSE("GPL v2");