pmem.c 17.2 KB
Newer Older
1 2 3
/*
 * Persistent Memory Driver
 *
4
 * Copyright (c) 2014-2015, Intel Corporation.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 * Copyright (c) 2015, Christoph Hellwig <hch@lst.de>.
 * Copyright (c) 2015, Boaz Harrosh <boaz@plexistor.com>.
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms and conditions of the GNU General Public License,
 * version 2, as published by the Free Software Foundation.
 *
 * This program is distributed in the hope it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
 * more details.
 */

#include <asm/cacheflush.h>
#include <linux/blkdev.h>
#include <linux/hdreg.h>
#include <linux/init.h>
#include <linux/platform_device.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
25
#include <linux/badblocks.h>
D
Dan Williams 已提交
26
#include <linux/memremap.h>
27
#include <linux/vmalloc.h>
D
Dan Williams 已提交
28
#include <linux/pfn_t.h>
29
#include <linux/slab.h>
30
#include <linux/pmem.h>
31
#include <linux/nd.h>
32
#include "pfn.h"
33
#include "nd.h"
34 35 36 37

struct pmem_device {
	struct request_queue	*pmem_queue;
	struct gendisk		*pmem_disk;
38
	struct nd_namespace_common *ndns;
39 40 41

	/* One contiguous memory region per device */
	phys_addr_t		phys_addr;
42 43
	/* when non-zero this device is hosting a 'pfn' instance */
	phys_addr_t		data_offset;
A
Arnd Bergmann 已提交
44
	u64			pfn_flags;
45
	void __pmem		*virt_addr;
46
	/* immutable base size of the namespace */
47
	size_t			size;
48 49
	/* trim size when namespace capacity has been section aligned */
	u32			pfn_pad;
50
	struct badblocks	bb;
51 52
};

53 54 55 56 57 58 59 60 61 62 63 64 65
static bool is_bad_pmem(struct badblocks *bb, sector_t sector, unsigned int len)
{
	if (bb->count) {
		sector_t first_bad;
		int num_bad;

		return !!badblocks_check(bb, sector, len / 512, &first_bad,
				&num_bad);
	}

	return false;
}

66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84
static void pmem_clear_poison(struct pmem_device *pmem, phys_addr_t offset,
		unsigned int len)
{
	struct device *dev = disk_to_dev(pmem->pmem_disk);
	sector_t sector;
	long cleared;

	sector = (offset - pmem->data_offset) / 512;
	cleared = nvdimm_clear_poison(dev, pmem->phys_addr + offset, len);

	if (cleared > 0 && cleared / 512) {
		dev_dbg(dev, "%s: %llx clear %ld sector%s\n",
				__func__, (unsigned long long) sector,
				cleared / 512, cleared / 512 > 1 ? "s" : "");
		badblocks_clear(&pmem->bb, sector, cleared / 512);
	}
	invalidate_pmem(pmem->virt_addr + offset, len);
}

85
static int pmem_do_bvec(struct pmem_device *pmem, struct page *page,
86 87 88
			unsigned int len, unsigned int off, int rw,
			sector_t sector)
{
89
	int rc = 0;
90
	bool bad_pmem = false;
91
	void *mem = kmap_atomic(page);
92
	phys_addr_t pmem_off = sector * 512 + pmem->data_offset;
93
	void __pmem *pmem_addr = pmem->virt_addr + pmem_off;
94

95 96 97
	if (unlikely(is_bad_pmem(&pmem->bb, sector, len)))
		bad_pmem = true;

98
	if (rw == READ) {
99
		if (unlikely(bad_pmem))
100 101
			rc = -EIO;
		else {
102
			rc = memcpy_from_pmem(mem + off, pmem_addr, len);
103 104
			flush_dcache_page(page);
		}
105 106
	} else {
		flush_dcache_page(page);
107
		memcpy_to_pmem(pmem_addr, mem + off, len);
108 109 110 111
		if (unlikely(bad_pmem)) {
			pmem_clear_poison(pmem, pmem_off, len);
			memcpy_to_pmem(pmem_addr, mem + off, len);
		}
112 113 114
	}

	kunmap_atomic(mem);
115
	return rc;
116 117
}

118
static blk_qc_t pmem_make_request(struct request_queue *q, struct bio *bio)
119
{
120
	int rc = 0;
D
Dan Williams 已提交
121 122
	bool do_acct;
	unsigned long start;
123 124
	struct bio_vec bvec;
	struct bvec_iter iter;
D
Dan Williams 已提交
125 126
	struct block_device *bdev = bio->bi_bdev;
	struct pmem_device *pmem = bdev->bd_disk->private_data;
127

D
Dan Williams 已提交
128
	do_acct = nd_iostat_start(bio, &start);
129 130 131 132 133 134 135 136 137
	bio_for_each_segment(bvec, bio, iter) {
		rc = pmem_do_bvec(pmem, bvec.bv_page, bvec.bv_len,
				bvec.bv_offset, bio_data_dir(bio),
				iter.bi_sector);
		if (rc) {
			bio->bi_error = rc;
			break;
		}
	}
D
Dan Williams 已提交
138 139
	if (do_acct)
		nd_iostat_end(bio, start);
140 141 142 143

	if (bio_data_dir(bio))
		wmb_pmem();

144
	bio_endio(bio);
145
	return BLK_QC_T_NONE;
146 147 148 149 150 151
}

static int pmem_rw_page(struct block_device *bdev, sector_t sector,
		       struct page *page, int rw)
{
	struct pmem_device *pmem = bdev->bd_disk->private_data;
152
	int rc;
153

154
	rc = pmem_do_bvec(pmem, page, PAGE_SIZE, 0, rw, sector);
155 156
	if (rw & WRITE)
		wmb_pmem();
157

158 159 160 161 162 163 164 165 166 167
	/*
	 * The ->rw_page interface is subtle and tricky.  The core
	 * retries on any error, so we can only invoke page_endio() in
	 * the successful completion case.  Otherwise, we'll see crashes
	 * caused by double completion.
	 */
	if (rc == 0)
		page_endio(page, rw & WRITE, 0);

	return rc;
168 169 170
}

static long pmem_direct_access(struct block_device *bdev, sector_t sector,
D
Dan Williams 已提交
171
		      void __pmem **kaddr, pfn_t *pfn)
172 173
{
	struct pmem_device *pmem = bdev->bd_disk->private_data;
174
	resource_size_t offset = sector * 512 + pmem->data_offset;
175

176
	*kaddr = pmem->virt_addr + offset;
D
Dan Williams 已提交
177
	*pfn = phys_to_pfn_t(pmem->phys_addr + offset, pmem->pfn_flags);
178

179
	return pmem->size - pmem->pfn_pad - offset;
180 181 182 183 184 185
}

static const struct block_device_operations pmem_fops = {
	.owner =		THIS_MODULE,
	.rw_page =		pmem_rw_page,
	.direct_access =	pmem_direct_access,
186
	.revalidate_disk =	nvdimm_revalidate_disk,
187 188
};

189 190
static struct pmem_device *pmem_alloc(struct device *dev,
		struct resource *res, int id)
191 192
{
	struct pmem_device *pmem;
193
	struct request_queue *q;
194

195
	pmem = devm_kzalloc(dev, sizeof(*pmem), GFP_KERNEL);
196
	if (!pmem)
197
		return ERR_PTR(-ENOMEM);
198 199 200

	pmem->phys_addr = res->start;
	pmem->size = resource_size(res);
201
	if (!arch_has_wmb_pmem())
202
		dev_warn(dev, "unable to guarantee persistence of writes\n");
203

204 205
	if (!devm_request_mem_region(dev, pmem->phys_addr, pmem->size,
			dev_name(dev))) {
206 207
		dev_warn(dev, "could not reserve region [0x%pa:0x%zx]\n",
				&pmem->phys_addr, pmem->size);
208
		return ERR_PTR(-EBUSY);
209 210
	}

211 212 213 214
	q = blk_alloc_queue_node(GFP_KERNEL, dev_to_node(dev));
	if (!q)
		return ERR_PTR(-ENOMEM);

D
Dan Williams 已提交
215 216
	pmem->pfn_flags = PFN_DEV;
	if (pmem_should_map_pages(dev)) {
217
		pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, res,
218
				&q->q_usage_counter, NULL);
D
Dan Williams 已提交
219 220
		pmem->pfn_flags |= PFN_MAP;
	} else
D
Dan Williams 已提交
221 222 223
		pmem->virt_addr = (void __pmem *) devm_memremap(dev,
				pmem->phys_addr, pmem->size,
				ARCH_MEMREMAP_PMEM);
224

225 226
	if (IS_ERR(pmem->virt_addr)) {
		blk_cleanup_queue(q);
227
		return (void __force *) pmem->virt_addr;
228
	}
229

230
	pmem->pmem_queue = q;
231 232 233 234 235
	return pmem;
}

static void pmem_detach_disk(struct pmem_device *pmem)
{
236 237 238
	if (!pmem->pmem_disk)
		return;

239 240 241 242 243
	del_gendisk(pmem->pmem_disk);
	put_disk(pmem->pmem_disk);
	blk_cleanup_queue(pmem->pmem_queue);
}

244 245
static int pmem_attach_disk(struct device *dev,
		struct nd_namespace_common *ndns, struct pmem_device *pmem)
246
{
247
	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
248
	int nid = dev_to_node(dev);
249
	struct resource bb_res;
250
	struct gendisk *disk;
251 252

	blk_queue_make_request(pmem->pmem_queue, pmem_make_request);
253
	blk_queue_physical_block_size(pmem->pmem_queue, PAGE_SIZE);
254
	blk_queue_max_hw_sectors(pmem->pmem_queue, UINT_MAX);
255
	blk_queue_bounce_limit(pmem->pmem_queue, BLK_BOUNCE_ANY);
256
	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, pmem->pmem_queue);
257

258
	disk = alloc_disk_node(0, nid);
259 260 261 262
	if (!disk) {
		blk_cleanup_queue(pmem->pmem_queue);
		return -ENOMEM;
	}
263 264 265 266 267

	disk->fops		= &pmem_fops;
	disk->private_data	= pmem;
	disk->queue		= pmem->pmem_queue;
	disk->flags		= GENHD_FL_EXT_DEVT;
V
Vishal Verma 已提交
268
	nvdimm_namespace_disk_name(ndns, disk->disk_name);
269
	disk->driverfs_dev = dev;
270 271
	set_capacity(disk, (pmem->size - pmem->pfn_pad - pmem->data_offset)
			/ 512);
272
	pmem->pmem_disk = disk;
273
	devm_exit_badblocks(dev, &pmem->bb);
274 275
	if (devm_init_badblocks(dev, &pmem->bb))
		return -ENOMEM;
276 277 278 279 280 281 282 283 284 285 286
	bb_res.start = nsio->res.start + pmem->data_offset;
	bb_res.end = nsio->res.end;
	if (is_nd_pfn(dev)) {
		struct nd_pfn *nd_pfn = to_nd_pfn(dev);
		struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;

		bb_res.start += __le32_to_cpu(pfn_sb->start_pad);
		bb_res.end -= __le32_to_cpu(pfn_sb->end_trunc);
	}
	nvdimm_badblocks_populate(to_nd_region(dev->parent), &pmem->bb,
			&bb_res);
287
	disk->bb = &pmem->bb;
288
	add_disk(disk);
289
	revalidate_disk(disk);
290

291 292
	return 0;
}
293

294 295 296 297 298 299 300 301 302 303
static int pmem_rw_bytes(struct nd_namespace_common *ndns,
		resource_size_t offset, void *buf, size_t size, int rw)
{
	struct pmem_device *pmem = dev_get_drvdata(ndns->claim);

	if (unlikely(offset + size > pmem->size)) {
		dev_WARN_ONCE(&ndns->dev, 1, "request out of range\n");
		return -EFAULT;
	}

304 305 306 307 308
	if (rw == READ) {
		unsigned int sz_align = ALIGN(size + (offset & (512 - 1)), 512);

		if (unlikely(is_bad_pmem(&pmem->bb, offset / 512, sz_align)))
			return -EIO;
309
		return memcpy_from_pmem(buf, pmem->virt_addr + offset, size);
310
	} else {
311 312 313
		memcpy_to_pmem(pmem->virt_addr + offset, buf, size);
		wmb_pmem();
	}
314 315 316 317

	return 0;
}

318 319 320 321 322
static int nd_pfn_init(struct nd_pfn *nd_pfn)
{
	struct nd_pfn_sb *pfn_sb = kzalloc(sizeof(*pfn_sb), GFP_KERNEL);
	struct pmem_device *pmem = dev_get_drvdata(&nd_pfn->dev);
	struct nd_namespace_common *ndns = nd_pfn->ndns;
323 324 325
	u32 start_pad = 0, end_trunc = 0;
	resource_size_t start, size;
	struct nd_namespace_io *nsio;
326 327 328 329 330 331 332 333 334 335 336
	struct nd_region *nd_region;
	unsigned long npfns;
	phys_addr_t offset;
	u64 checksum;
	int rc;

	if (!pfn_sb)
		return -ENOMEM;

	nd_pfn->pfn_sb = pfn_sb;
	rc = nd_pfn_validate(nd_pfn);
337 338 339
	if (rc == -ENODEV)
		/* no info block, do init */;
	else
340 341 342 343 344 345 346 347 348 349 350
		return rc;

	nd_region = to_nd_region(nd_pfn->dev.parent);
	if (nd_region->ro) {
		dev_info(&nd_pfn->dev,
				"%s is read-only, unable to init metadata\n",
				dev_name(&nd_region->dev));
		goto err;
	}

	memset(pfn_sb, 0, sizeof(*pfn_sb));
351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377

	/*
	 * Check if pmem collides with 'System RAM' when section aligned and
	 * trim it accordingly
	 */
	nsio = to_nd_namespace_io(&ndns->dev);
	start = PHYS_SECTION_ALIGN_DOWN(nsio->res.start);
	size = resource_size(&nsio->res);
	if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
				IORES_DESC_NONE) == REGION_MIXED) {

		start = nsio->res.start;
		start_pad = PHYS_SECTION_ALIGN_UP(start) - start;
	}

	start = nsio->res.start;
	size = PHYS_SECTION_ALIGN_UP(start + size) - start;
	if (region_intersects(start, size, IORESOURCE_SYSTEM_RAM,
				IORES_DESC_NONE) == REGION_MIXED) {
		size = resource_size(&nsio->res);
		end_trunc = start + size - PHYS_SECTION_ALIGN_DOWN(start + size);
	}

	if (start_pad + end_trunc)
		dev_info(&nd_pfn->dev, "%s section collision, truncate %d bytes\n",
				dev_name(&ndns->dev), start_pad + end_trunc);

378 379 380 381 382 383
	/*
	 * Note, we use 64 here for the standard size of struct page,
	 * debugging options may cause it to be larger in which case the
	 * implementation will limit the pfns advertised through
	 * ->direct_access() to those that are included in the memmap.
	 */
384 385
	start += start_pad;
	npfns = (pmem->size - start_pad - end_trunc - SZ_8K) / SZ_4K;
386
	if (nd_pfn->mode == PFN_MODE_PMEM)
387 388
		offset = ALIGN(start + SZ_8K + 64 * npfns, nd_pfn->align)
			- start;
389
	else if (nd_pfn->mode == PFN_MODE_RAM)
390
		offset = ALIGN(start + SZ_8K, nd_pfn->align) - start;
391 392 393
	else
		goto err;

394 395 396 397 398 399 400
	if (offset + start_pad + end_trunc >= pmem->size) {
		dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n",
				dev_name(&ndns->dev));
		goto err;
	}

	npfns = (pmem->size - offset - start_pad - end_trunc) / SZ_4K;
401 402 403 404 405
	pfn_sb->mode = cpu_to_le32(nd_pfn->mode);
	pfn_sb->dataoff = cpu_to_le64(offset);
	pfn_sb->npfns = cpu_to_le64(npfns);
	memcpy(pfn_sb->signature, PFN_SIG, PFN_SIG_LEN);
	memcpy(pfn_sb->uuid, nd_pfn->uuid, 16);
406
	memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16);
407
	pfn_sb->version_major = cpu_to_le16(1);
408 409 410
	pfn_sb->version_minor = cpu_to_le16(1);
	pfn_sb->start_pad = cpu_to_le32(start_pad);
	pfn_sb->end_trunc = cpu_to_le32(end_trunc);
411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440
	checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb);
	pfn_sb->checksum = cpu_to_le64(checksum);

	rc = nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb));
	if (rc)
		goto err;

	return 0;
 err:
	nd_pfn->pfn_sb = NULL;
	kfree(pfn_sb);
	return -ENXIO;
}

static int nvdimm_namespace_detach_pfn(struct nd_namespace_common *ndns)
{
	struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
	struct pmem_device *pmem;

	/* free pmem disk */
	pmem = dev_get_drvdata(&nd_pfn->dev);
	pmem_detach_disk(pmem);

	/* release nd_pfn resources */
	kfree(nd_pfn->pfn_sb);
	nd_pfn->pfn_sb = NULL;

	return 0;
}

441 442 443 444 445 446
/*
 * We hotplug memory at section granularity, pad the reserved area from
 * the previous section base to the namespace base address.
 */
static unsigned long init_altmap_base(resource_size_t base)
{
447
	unsigned long base_pfn = PHYS_PFN(base);
448 449 450 451 452 453

	return PFN_SECTION_ALIGN_DOWN(base_pfn);
}

static unsigned long init_altmap_reserve(resource_size_t base)
{
454 455
	unsigned long reserve = PHYS_PFN(SZ_8K);
	unsigned long base_pfn = PHYS_PFN(base);
456 457 458 459 460

	reserve += base_pfn - PFN_SECTION_ALIGN_DOWN(base_pfn);
	return reserve;
}

461
static int __nvdimm_namespace_attach_pfn(struct nd_pfn *nd_pfn)
462
{
463
	int rc;
464 465 466 467 468 469 470 471 472 473 474
	struct resource res;
	struct request_queue *q;
	struct pmem_device *pmem;
	struct vmem_altmap *altmap;
	struct device *dev = &nd_pfn->dev;
	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
	struct nd_namespace_common *ndns = nd_pfn->ndns;
	u32 start_pad = __le32_to_cpu(pfn_sb->start_pad);
	u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc);
	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
	resource_size_t base = nsio->res.start + start_pad;
475
	struct vmem_altmap __altmap = {
476 477
		.base_pfn = init_altmap_base(base),
		.reserve = init_altmap_reserve(base),
478
	};
479

480 481 482
	pmem = dev_get_drvdata(dev);
	pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
	pmem->pfn_pad = start_pad + end_trunc;
483 484
	nd_pfn->mode = le32_to_cpu(nd_pfn->pfn_sb->mode);
	if (nd_pfn->mode == PFN_MODE_RAM) {
485
		if (pmem->data_offset < SZ_8K)
486 487 488
			return -EINVAL;
		nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
		altmap = NULL;
489
	} else if (nd_pfn->mode == PFN_MODE_PMEM) {
490
		nd_pfn->npfns = (pmem->size - pmem->pfn_pad - pmem->data_offset)
491 492 493 494 495 496 497
			/ PAGE_SIZE;
		if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
			dev_info(&nd_pfn->dev,
					"number of pfns truncated from %lld to %ld\n",
					le64_to_cpu(nd_pfn->pfn_sb->npfns),
					nd_pfn->npfns);
		altmap = & __altmap;
498
		altmap->free = PHYS_PFN(pmem->data_offset - SZ_8K);
499
		altmap->alloc = 0;
500 501 502 503 504 505
	} else {
		rc = -ENXIO;
		goto err;
	}

	/* establish pfn range for lookup, and switch to direct map */
506
	q = pmem->pmem_queue;
507 508 509
	memcpy(&res, &nsio->res, sizeof(res));
	res.start += start_pad;
	res.end -= end_trunc;
D
Dan Williams 已提交
510
	devm_memunmap(dev, (void __force *) pmem->virt_addr);
511
	pmem->virt_addr = (void __pmem *) devm_memremap_pages(dev, &res,
512
			&q->q_usage_counter, altmap);
D
Dan Williams 已提交
513
	pmem->pfn_flags |= PFN_MAP;
514 515 516 517 518 519 520 521 522 523 524 525 526 527
	if (IS_ERR(pmem->virt_addr)) {
		rc = PTR_ERR(pmem->virt_addr);
		goto err;
	}

	/* attach pmem disk in "pfn-mode" */
	rc = pmem_attach_disk(dev, ndns, pmem);
	if (rc)
		goto err;

	return rc;
 err:
	nvdimm_namespace_detach_pfn(ndns);
	return rc;
528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543

}

static int nvdimm_namespace_attach_pfn(struct nd_namespace_common *ndns)
{
	struct nd_pfn *nd_pfn = to_nd_pfn(ndns->claim);
	int rc;

	if (!nd_pfn->uuid || !nd_pfn->ndns)
		return -ENODEV;

	rc = nd_pfn_init(nd_pfn);
	if (rc)
		return rc;
	/* we need a valid pfn_sb before we can init a vmem_altmap */
	return __nvdimm_namespace_attach_pfn(nd_pfn);
544 545
}

546
static int nd_pmem_probe(struct device *dev)
547
{
548
	struct nd_region *nd_region = to_nd_region(dev->parent);
549 550
	struct nd_namespace_common *ndns;
	struct nd_namespace_io *nsio;
551 552
	struct pmem_device *pmem;

553 554 555
	ndns = nvdimm_namespace_common_probe(dev);
	if (IS_ERR(ndns))
		return PTR_ERR(ndns);
556

557
	nsio = to_nd_namespace_io(&ndns->dev);
558
	pmem = pmem_alloc(dev, &nsio->res, nd_region->id);
559 560 561
	if (IS_ERR(pmem))
		return PTR_ERR(pmem);

562
	pmem->ndns = ndns;
563
	dev_set_drvdata(dev, pmem);
564
	ndns->rw_bytes = pmem_rw_bytes;
565 566
	if (devm_init_badblocks(dev, &pmem->bb))
		return -ENOMEM;
567
	nvdimm_badblocks_populate(nd_region, &pmem->bb, &nsio->res);
568

569 570 571 572
	if (is_nd_btt(dev)) {
		/* btt allocates its own request_queue */
		blk_cleanup_queue(pmem->pmem_queue);
		pmem->pmem_queue = NULL;
573
		return nvdimm_namespace_attach_btt(ndns);
574
	}
575

576 577 578
	if (is_nd_pfn(dev))
		return nvdimm_namespace_attach_pfn(ndns);

579 580 581 582 583 584
	if (nd_btt_probe(ndns, pmem) == 0 || nd_pfn_probe(ndns, pmem) == 0) {
		/*
		 * We'll come back as either btt-pmem, or pfn-pmem, so
		 * drop the queue allocation for now.
		 */
		blk_cleanup_queue(pmem->pmem_queue);
585 586 587 588
		return -ENXIO;
	}

	return pmem_attach_disk(dev, ndns, pmem);
589 590
}

591
static int nd_pmem_remove(struct device *dev)
592
{
593
	struct pmem_device *pmem = dev_get_drvdata(dev);
594

595
	if (is_nd_btt(dev))
596 597 598
		nvdimm_namespace_detach_btt(pmem->ndns);
	else if (is_nd_pfn(dev))
		nvdimm_namespace_detach_pfn(pmem->ndns);
599 600 601
	else
		pmem_detach_disk(pmem);

602 603 604
	return 0;
}

605 606 607 608
static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
{
	struct pmem_device *pmem = dev_get_drvdata(dev);
	struct nd_namespace_common *ndns = pmem->ndns;
609 610 611 612 613 614
	struct nd_region *nd_region = to_nd_region(dev->parent);
	struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev);
	struct resource res = {
		.start = nsio->res.start + pmem->data_offset,
		.end = nsio->res.end,
	};
615 616 617 618

	if (event != NVDIMM_REVALIDATE_POISON)
		return;

619 620 621 622 623 624 625 626 627
	if (is_nd_pfn(dev)) {
		struct nd_pfn *nd_pfn = to_nd_pfn(dev);
		struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;

		res.start += __le32_to_cpu(pfn_sb->start_pad);
		res.end -= __le32_to_cpu(pfn_sb->end_trunc);
	}

	nvdimm_badblocks_populate(nd_region, &pmem->bb, &res);
628 629
}

630 631
MODULE_ALIAS("pmem");
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_IO);
632
MODULE_ALIAS_ND_DEVICE(ND_DEVICE_NAMESPACE_PMEM);
633 634 635
static struct nd_device_driver nd_pmem_driver = {
	.probe = nd_pmem_probe,
	.remove = nd_pmem_remove,
636
	.notify = nd_pmem_notify,
637 638
	.drv = {
		.name = "nd_pmem",
639
	},
640
	.type = ND_DRIVER_NAMESPACE_IO | ND_DRIVER_NAMESPACE_PMEM,
641 642 643 644
};

static int __init pmem_init(void)
{
645
	return nd_driver_register(&nd_pmem_driver);
646 647 648 649 650
}
module_init(pmem_init);

static void pmem_exit(void)
{
651
	driver_unregister(&nd_pmem_driver.drv);
652 653 654 655 656
}
module_exit(pmem_exit);

MODULE_AUTHOR("Ross Zwisler <ross.zwisler@linux.intel.com>");
MODULE_LICENSE("GPL v2");