pblk-init.c 28.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (C) 2015 IT University of Copenhagen (rrpc.c)
 * Copyright (C) 2016 CNEX Labs
 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
 *                  Matias Bjorling <matias@cnexlabs.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version
 * 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * Implementation of a physical block-device target for Open-channel SSDs.
 *
 * pblk-init.c - pblk's initialization.
 */

#include "pblk.h"

23
static struct kmem_cache *pblk_ws_cache, *pblk_rec_cache, *pblk_g_rq_cache,
24
				*pblk_w_rq_cache;
25
static DECLARE_RWSEM(pblk_lock);
26
struct bio_set *pblk_bio_set;
27 28 29 30 31 32 33 34 35 36

static int pblk_rw_io(struct request_queue *q, struct pblk *pblk,
			  struct bio *bio)
{
	int ret;

	/* Read requests must be <= 256kb due to NVMe's 64 bit completion bitmap
	 * constraint. Writes can be of arbitrary size.
	 */
	if (bio_data_dir(bio) == READ) {
37
		blk_queue_split(q, &bio);
38 39 40 41 42 43 44 45 46 47 48
		ret = pblk_submit_read(pblk, bio);
		if (ret == NVM_IO_DONE && bio_flagged(bio, BIO_CLONED))
			bio_put(bio);

		return ret;
	}

	/* Prevent deadlock in the case of a modest LUN configuration and large
	 * user I/Os. Unless stalled, the rate limiter leaves at least 256KB
	 * available for user I/O.
	 */
49
	if (pblk_get_secs(bio) > pblk_rl_max_io(&pblk->rl))
50
		blk_queue_split(q, &bio);
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78

	return pblk_write_to_cache(pblk, bio, PBLK_IOTYPE_USER);
}

static blk_qc_t pblk_make_rq(struct request_queue *q, struct bio *bio)
{
	struct pblk *pblk = q->queuedata;

	if (bio_op(bio) == REQ_OP_DISCARD) {
		pblk_discard(pblk, bio);
		if (!(bio->bi_opf & REQ_PREFLUSH)) {
			bio_endio(bio);
			return BLK_QC_T_NONE;
		}
	}

	switch (pblk_rw_io(q, pblk, bio)) {
	case NVM_IO_ERR:
		bio_io_error(bio);
		break;
	case NVM_IO_DONE:
		bio_endio(bio);
		break;
	}

	return BLK_QC_T_NONE;
}

79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100
static size_t pblk_trans_map_size(struct pblk *pblk)
{
	int entry_size = 8;

	if (pblk->ppaf_bitsize < 32)
		entry_size = 4;

	return entry_size * pblk->rl.nr_secs;
}

#ifdef CONFIG_NVM_DEBUG
static u32 pblk_l2p_crc(struct pblk *pblk)
{
	size_t map_size;
	u32 crc = ~(u32)0;

	map_size = pblk_trans_map_size(pblk);
	crc = crc32_le(crc, pblk->trans_map, map_size);
	return crc;
}
#endif

101 102 103 104 105
static void pblk_l2p_free(struct pblk *pblk)
{
	vfree(pblk->trans_map);
}

106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
static int pblk_l2p_recover(struct pblk *pblk, bool factory_init)
{
	struct pblk_line *line = NULL;

	if (factory_init) {
		pblk_setup_uuid(pblk);
	} else {
		line = pblk_recov_l2p(pblk);
		if (IS_ERR(line)) {
			pr_err("pblk: could not recover l2p table\n");
			return -EFAULT;
		}
	}

#ifdef CONFIG_NVM_DEBUG
	pr_info("pblk init: L2P CRC: %x\n", pblk_l2p_crc(pblk));
#endif

	/* Free full lines directly as GC has not been started yet */
	pblk_gc_free_full_lines(pblk);

	if (!line) {
		/* Configure next line for user data */
		line = pblk_line_get_first_data(pblk);
		if (!line) {
			pr_err("pblk: line list corrupted\n");
			return -EFAULT;
		}
	}

	return 0;
}

static int pblk_l2p_init(struct pblk *pblk, bool factory_init)
140 141 142
{
	sector_t i;
	struct ppa_addr ppa;
143
	size_t map_size;
144

145 146
	map_size = pblk_trans_map_size(pblk);
	pblk->trans_map = vmalloc(map_size);
147 148 149 150 151 152 153 154
	if (!pblk->trans_map)
		return -ENOMEM;

	pblk_ppa_set_empty(&ppa);

	for (i = 0; i < pblk->rl.nr_secs; i++)
		pblk_trans_map_set(pblk, i, ppa);

155
	return pblk_l2p_recover(pblk, factory_init);
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194
}

static void pblk_rwb_free(struct pblk *pblk)
{
	if (pblk_rb_tear_down_check(&pblk->rwb))
		pr_err("pblk: write buffer error on tear down\n");

	pblk_rb_data_free(&pblk->rwb);
	vfree(pblk_rb_entries_ref(&pblk->rwb));
}

static int pblk_rwb_init(struct pblk *pblk)
{
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
	struct pblk_rb_entry *entries;
	unsigned long nr_entries;
	unsigned int power_size, power_seg_sz;

	nr_entries = pblk_rb_calculate_size(pblk->pgs_in_buffer);

	entries = vzalloc(nr_entries * sizeof(struct pblk_rb_entry));
	if (!entries)
		return -ENOMEM;

	power_size = get_count_order(nr_entries);
	power_seg_sz = get_count_order(geo->sec_size);

	return pblk_rb_init(&pblk->rwb, entries, power_size, power_seg_sz);
}

/* Minimum pages needed within a lun */
#define ADDR_POOL_SIZE 64

static int pblk_set_ppaf(struct pblk *pblk)
{
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
	struct nvm_addr_format ppaf = geo->ppaf;
195 196 197 198 199 200 201
	int mod, power_len;

	div_u64_rem(geo->sec_per_chk, pblk->min_write_pgs, &mod);
	if (mod) {
		pr_err("pblk: bad configuration of sectors/pages\n");
		return -EINVAL;
	}
202 203 204 205 206 207 208 209 210

	/* Re-calculate channel and lun format to adapt to configuration */
	power_len = get_count_order(geo->nr_chnls);
	if (1 << power_len != geo->nr_chnls) {
		pr_err("pblk: supports only power-of-two channel config.\n");
		return -EINVAL;
	}
	ppaf.ch_len = power_len;

211 212
	power_len = get_count_order(geo->nr_luns);
	if (1 << power_len != geo->nr_luns) {
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
		pr_err("pblk: supports only power-of-two LUN config.\n");
		return -EINVAL;
	}
	ppaf.lun_len = power_len;

	pblk->ppaf.sec_offset = 0;
	pblk->ppaf.pln_offset = ppaf.sect_len;
	pblk->ppaf.ch_offset = pblk->ppaf.pln_offset + ppaf.pln_len;
	pblk->ppaf.lun_offset = pblk->ppaf.ch_offset + ppaf.ch_len;
	pblk->ppaf.pg_offset = pblk->ppaf.lun_offset + ppaf.lun_len;
	pblk->ppaf.blk_offset = pblk->ppaf.pg_offset + ppaf.pg_len;
	pblk->ppaf.sec_mask = (1ULL << ppaf.sect_len) - 1;
	pblk->ppaf.pln_mask = ((1ULL << ppaf.pln_len) - 1) <<
							pblk->ppaf.pln_offset;
	pblk->ppaf.ch_mask = ((1ULL << ppaf.ch_len) - 1) <<
							pblk->ppaf.ch_offset;
	pblk->ppaf.lun_mask = ((1ULL << ppaf.lun_len) - 1) <<
							pblk->ppaf.lun_offset;
	pblk->ppaf.pg_mask = ((1ULL << ppaf.pg_len) - 1) <<
							pblk->ppaf.pg_offset;
	pblk->ppaf.blk_mask = ((1ULL << ppaf.blk_len) - 1) <<
							pblk->ppaf.blk_offset;

	pblk->ppaf_bitsize = pblk->ppaf.blk_offset + ppaf.blk_len;

	return 0;
}

static int pblk_init_global_caches(struct pblk *pblk)
{
	down_write(&pblk_lock);
244
	pblk_ws_cache = kmem_cache_create("pblk_blk_ws",
245
				sizeof(struct pblk_line_ws), 0, 0, NULL);
246
	if (!pblk_ws_cache) {
247 248 249 250 251 252 253
		up_write(&pblk_lock);
		return -ENOMEM;
	}

	pblk_rec_cache = kmem_cache_create("pblk_rec",
				sizeof(struct pblk_rec_ctx), 0, 0, NULL);
	if (!pblk_rec_cache) {
254
		kmem_cache_destroy(pblk_ws_cache);
255 256 257 258
		up_write(&pblk_lock);
		return -ENOMEM;
	}

259
	pblk_g_rq_cache = kmem_cache_create("pblk_g_rq", pblk_g_rq_size,
260
				0, 0, NULL);
261
	if (!pblk_g_rq_cache) {
262
		kmem_cache_destroy(pblk_ws_cache);
263 264 265 266 267 268 269 270
		kmem_cache_destroy(pblk_rec_cache);
		up_write(&pblk_lock);
		return -ENOMEM;
	}

	pblk_w_rq_cache = kmem_cache_create("pblk_w_rq", pblk_w_rq_size,
				0, 0, NULL);
	if (!pblk_w_rq_cache) {
271
		kmem_cache_destroy(pblk_ws_cache);
272
		kmem_cache_destroy(pblk_rec_cache);
273
		kmem_cache_destroy(pblk_g_rq_cache);
274 275 276 277 278 279 280 281
		up_write(&pblk_lock);
		return -ENOMEM;
	}
	up_write(&pblk_lock);

	return 0;
}

282 283 284 285 286 287 288 289
static void pblk_free_global_caches(struct pblk *pblk)
{
	kmem_cache_destroy(pblk_ws_cache);
	kmem_cache_destroy(pblk_rec_cache);
	kmem_cache_destroy(pblk_g_rq_cache);
	kmem_cache_destroy(pblk_w_rq_cache);
}

290 291 292 293
static int pblk_core_init(struct pblk *pblk)
{
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
294 295 296 297 298 299 300 301 302 303 304
	int max_write_ppas;

	atomic64_set(&pblk->user_wa, 0);
	atomic64_set(&pblk->pad_wa, 0);
	atomic64_set(&pblk->gc_wa, 0);
	pblk->user_rst_wa = 0;
	pblk->pad_rst_wa = 0;
	pblk->gc_rst_wa = 0;

	atomic64_set(&pblk->nr_flush, 0);
	pblk->nr_flush_rst = 0;
305 306

	pblk->pgs_in_buffer = NVM_MEM_PAGE_WRITE * geo->sec_per_pg *
307
						geo->nr_planes * geo->all_luns;
308

309 310 311 312 313 314 315 316 317 318 319 320 321 322
	pblk->min_write_pgs = geo->sec_per_pl * (geo->sec_size / PAGE_SIZE);
	max_write_ppas = pblk->min_write_pgs * geo->all_luns;
	pblk->max_write_pgs = min_t(int, max_write_ppas, NVM_MAX_VLBA);
	pblk_set_sec_per_write(pblk, pblk->min_write_pgs);

	if (pblk->max_write_pgs > PBLK_MAX_REQ_ADDRS) {
		pr_err("pblk: vector list too big(%u > %u)\n",
				pblk->max_write_pgs, PBLK_MAX_REQ_ADDRS);
		return -EINVAL;
	}

	pblk->pad_dist = kzalloc((pblk->min_write_pgs - 1) * sizeof(atomic64_t),
								GFP_KERNEL);
	if (!pblk->pad_dist)
323 324
		return -ENOMEM;

325 326 327
	if (pblk_init_global_caches(pblk))
		goto fail_free_pad_dist;

328
	/* Internal bios can be at most the sectors signaled by the device. */
329
	pblk->page_bio_pool = mempool_create_page_pool(NVM_MAX_VLBA, 0);
330
	if (!pblk->page_bio_pool)
331
		goto free_global_caches;
332

333 334 335
	pblk->gen_ws_pool = mempool_create_slab_pool(PBLK_GEN_WS_POOL_SIZE,
							pblk_ws_cache);
	if (!pblk->gen_ws_pool)
336
		goto free_page_bio_pool;
337

338 339
	pblk->rec_pool = mempool_create_slab_pool(geo->all_luns,
							pblk_rec_cache);
340
	if (!pblk->rec_pool)
341
		goto free_gen_ws_pool;
342

343
	pblk->r_rq_pool = mempool_create_slab_pool(geo->all_luns,
344
							pblk_g_rq_cache);
345
	if (!pblk->r_rq_pool)
346 347
		goto free_rec_pool;

348
	pblk->e_rq_pool = mempool_create_slab_pool(geo->all_luns,
349 350 351 352
							pblk_g_rq_cache);
	if (!pblk->e_rq_pool)
		goto free_r_rq_pool;

353
	pblk->w_rq_pool = mempool_create_slab_pool(geo->all_luns,
354
							pblk_w_rq_cache);
355
	if (!pblk->w_rq_pool)
356
		goto free_e_rq_pool;
357

358 359 360
	pblk->close_wq = alloc_workqueue("pblk-close-wq",
			WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_NR_CLOSE_JOBS);
	if (!pblk->close_wq)
361
		goto free_w_rq_pool;
362

363 364 365 366 367
	pblk->bb_wq = alloc_workqueue("pblk-bb-wq",
			WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
	if (!pblk->bb_wq)
		goto free_close_wq;

368 369 370
	pblk->r_end_wq = alloc_workqueue("pblk-read-end-wq",
			WQ_MEM_RECLAIM | WQ_UNBOUND, 0);
	if (!pblk->r_end_wq)
371
		goto free_bb_wq;
372

373 374 375
	if (pblk_set_ppaf(pblk))
		goto free_r_end_wq;

376
	INIT_LIST_HEAD(&pblk->compl_list);
377

378 379
	return 0;

380 381
free_r_end_wq:
	destroy_workqueue(pblk->r_end_wq);
382 383 384 385
free_bb_wq:
	destroy_workqueue(pblk->bb_wq);
free_close_wq:
	destroy_workqueue(pblk->close_wq);
386 387
free_w_rq_pool:
	mempool_destroy(pblk->w_rq_pool);
388 389 390 391
free_e_rq_pool:
	mempool_destroy(pblk->e_rq_pool);
free_r_rq_pool:
	mempool_destroy(pblk->r_rq_pool);
392 393
free_rec_pool:
	mempool_destroy(pblk->rec_pool);
394 395
free_gen_ws_pool:
	mempool_destroy(pblk->gen_ws_pool);
396 397
free_page_bio_pool:
	mempool_destroy(pblk->page_bio_pool);
398 399
free_global_caches:
	pblk_free_global_caches(pblk);
400 401
fail_free_pad_dist:
	kfree(pblk->pad_dist);
402 403 404 405 406
	return -ENOMEM;
}

static void pblk_core_free(struct pblk *pblk)
{
407 408 409
	if (pblk->close_wq)
		destroy_workqueue(pblk->close_wq);

410 411 412
	if (pblk->r_end_wq)
		destroy_workqueue(pblk->r_end_wq);

413 414
	if (pblk->bb_wq)
		destroy_workqueue(pblk->bb_wq);
415

416
	mempool_destroy(pblk->page_bio_pool);
417
	mempool_destroy(pblk->gen_ws_pool);
418
	mempool_destroy(pblk->rec_pool);
419 420
	mempool_destroy(pblk->r_rq_pool);
	mempool_destroy(pblk->e_rq_pool);
421 422
	mempool_destroy(pblk->w_rq_pool);

423
	pblk_free_global_caches(pblk);
424
	kfree(pblk->pad_dist);
425 426
}

427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443
static void pblk_line_mg_free(struct pblk *pblk)
{
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	int i;

	kfree(l_mg->bb_template);
	kfree(l_mg->bb_aux);
	kfree(l_mg->vsc_list);

	for (i = 0; i < PBLK_DATA_LINES; i++) {
		kfree(l_mg->sline_meta[i]);
		pblk_mfree(l_mg->eline_meta[i]->buf, l_mg->emeta_alloc_type);
		kfree(l_mg->eline_meta[i]);
	}
}

static void pblk_line_meta_free(struct pblk_line *line)
444 445 446 447 448
{
	kfree(line->blk_bitmap);
	kfree(line->erase_bitmap);
}

449 450 451 452 453 454 455 456 457 458 459
static void pblk_lines_free(struct pblk *pblk)
{
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line *line;
	int i;

	spin_lock(&l_mg->free_lock);
	for (i = 0; i < l_mg->nr_lines; i++) {
		line = &pblk->lines[i];

		pblk_line_free(pblk, line);
460
		pblk_line_meta_free(line);
461 462
	}
	spin_unlock(&l_mg->free_lock);
463 464 465 466 467

	pblk_line_mg_free(pblk);

	kfree(pblk->luns);
	kfree(pblk->lines);
468 469
}

470 471
static int pblk_bb_get_tbl(struct nvm_tgt_dev *dev, struct pblk_lun *rlun,
			   u8 *blks, int nr_blks)
472 473
{
	struct ppa_addr ppa;
474
	int ret;
475 476 477 478 479 480 481

	ppa.ppa = 0;
	ppa.g.ch = rlun->bppa.g.ch;
	ppa.g.lun = rlun->bppa.g.lun;

	ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
	if (ret)
482
		return ret;
483 484

	nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
485 486
	if (nr_blks < 0)
		return -EIO;
487

488
	return 0;
489 490
}

491
static void *pblk_bb_get_log(struct pblk *pblk)
492
{
493 494
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
495 496 497
	u8 *log;
	int i, nr_blks, blk_per_lun;
	int ret;
498

499 500
	blk_per_lun = geo->nr_chks * geo->plane_mode;
	nr_blks = blk_per_lun * geo->all_luns;
501

502 503 504 505 506 507 508 509 510 511 512 513 514
	log = kmalloc(nr_blks, GFP_KERNEL);
	if (!log)
		return ERR_PTR(-ENOMEM);

	for (i = 0; i < geo->all_luns; i++) {
		struct pblk_lun *rlun = &pblk->luns[i];
		u8 *log_pos = log + i * blk_per_lun;

		ret = pblk_bb_get_tbl(dev, rlun, log_pos, blk_per_lun);
		if (ret) {
			kfree(log);
			return ERR_PTR(-EIO);
		}
515 516
	}

517
	return log;
518 519
}

520 521
static int pblk_bb_line(struct pblk *pblk, struct pblk_line *line,
			u8 *bb_log, int blk_per_line)
522
{
523 524 525
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
	int i, bb_cnt = 0;
526
	int blk_per_lun = geo->nr_chks * geo->plane_mode;
527

528 529
	for (i = 0; i < blk_per_line; i++) {
		struct pblk_lun *rlun = &pblk->luns[i];
530
		u8 *lun_bb_log = bb_log + i * blk_per_lun;
531

532 533 534 535 536
		if (lun_bb_log[line->id] == NVM_BLK_T_FREE)
			continue;

		set_bit(pblk_ppa_to_pos(geo, rlun->bppa), line->blk_bitmap);
		bb_cnt++;
537 538
	}

539
	return bb_cnt;
540 541
}

542
static int pblk_luns_init(struct pblk *pblk)
543 544 545 546
{
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
	struct pblk_lun *rlun;
547
	int i;
548 549

	/* TODO: Implement unbalanced LUN support */
550
	if (geo->nr_luns < 0) {
551 552 553 554
		pr_err("pblk: unbalanced LUN config.\n");
		return -EINVAL;
	}

555 556
	pblk->luns = kcalloc(geo->all_luns, sizeof(struct pblk_lun),
								GFP_KERNEL);
557 558 559
	if (!pblk->luns)
		return -ENOMEM;

560
	for (i = 0; i < geo->all_luns; i++) {
561 562 563
		/* Stripe across channels */
		int ch = i % geo->nr_chnls;
		int lun_raw = i / geo->nr_chnls;
564
		int lunid = lun_raw + ch * geo->nr_luns;
565 566

		rlun = &pblk->luns[i];
567
		rlun->bppa = dev->luns[lunid];
568 569 570 571 572 573 574 575

		sema_init(&rlun->wr_sem, 1);
	}

	return 0;
}

/* See comment over struct line_emeta definition */
576
static unsigned int calc_emeta_len(struct pblk *pblk)
577
{
578 579 580 581 582 583 584
	struct pblk_line_meta *lm = &pblk->lm;
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;

	/* Round to sector size so that lba_list starts on its own sector */
	lm->emeta_sec[1] = DIV_ROUND_UP(
585 586
			sizeof(struct line_emeta) + lm->blk_bitmap_len +
			sizeof(struct wa_counters), geo->sec_size);
587 588 589 590 591 592 593 594 595 596 597 598 599 600 601
	lm->emeta_len[1] = lm->emeta_sec[1] * geo->sec_size;

	/* Round to sector size so that vsc_list starts on its own sector */
	lm->dsec_per_line = lm->sec_per_line - lm->emeta_sec[0];
	lm->emeta_sec[2] = DIV_ROUND_UP(lm->dsec_per_line * sizeof(u64),
			geo->sec_size);
	lm->emeta_len[2] = lm->emeta_sec[2] * geo->sec_size;

	lm->emeta_sec[3] = DIV_ROUND_UP(l_mg->nr_lines * sizeof(u32),
			geo->sec_size);
	lm->emeta_len[3] = lm->emeta_sec[3] * geo->sec_size;

	lm->vsc_list_len = l_mg->nr_lines * sizeof(u32);

	return (lm->emeta_len[1] + lm->emeta_len[2] + lm->emeta_len[3]);
602 603 604 605 606
}

static void pblk_set_provision(struct pblk *pblk, long nr_free_blks)
{
	struct nvm_tgt_dev *dev = pblk->dev;
607 608
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_meta *lm = &pblk->lm;
609 610
	struct nvm_geo *geo = &dev->geo;
	sector_t provisioned;
611
	int sec_meta, blk_meta;
612

613 614 615 616
	if (geo->op == NVM_TARGET_DEFAULT_OP)
		pblk->op = PBLK_DEFAULT_OP;
	else
		pblk->op = geo->op;
617 618

	provisioned = nr_free_blks;
619
	provisioned *= (100 - pblk->op);
620 621
	sector_div(provisioned, 100);

622 623
	pblk->op_blks = nr_free_blks - provisioned;

624 625 626 627
	/* Internally pblk manages all free blocks, but all calculations based
	 * on user capacity consider only provisioned blocks
	 */
	pblk->rl.total_blocks = nr_free_blks;
628
	pblk->rl.nr_secs = nr_free_blks * geo->sec_per_chk;
629 630 631 632 633 634 635

	/* Consider sectors used for metadata */
	sec_meta = (lm->smeta_sec + lm->emeta_sec[0]) * l_mg->nr_free_lines;
	blk_meta = DIV_ROUND_UP(sec_meta, geo->sec_per_chk);

	pblk->capacity = (provisioned - blk_meta) * geo->sec_per_chk;

636
	atomic_set(&pblk->rl.free_blocks, nr_free_blks);
637
	atomic_set(&pblk->rl.free_user_blocks, nr_free_blks);
638 639
}

640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660
static int pblk_setup_line_meta(struct pblk *pblk, struct pblk_line *line,
				void *chunk_log, long *nr_bad_blks)
{
	struct pblk_line_meta *lm = &pblk->lm;

	line->blk_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
	if (!line->blk_bitmap)
		return -ENOMEM;

	line->erase_bitmap = kzalloc(lm->blk_bitmap_len, GFP_KERNEL);
	if (!line->erase_bitmap) {
		kfree(line->blk_bitmap);
		return -ENOMEM;
	}

	*nr_bad_blks = pblk_bb_line(pblk, line, chunk_log, lm->blk_per_line);

	return 0;
}

static int pblk_line_mg_init(struct pblk *pblk)
661
{
662 663
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
664 665
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_meta *lm = &pblk->lm;
666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703
	int i, bb_distance;

	l_mg->nr_lines = geo->nr_chks;
	l_mg->log_line = l_mg->data_line = NULL;
	l_mg->l_seq_nr = l_mg->d_seq_nr = 0;
	l_mg->nr_free_lines = 0;
	bitmap_zero(&l_mg->meta_bitmap, PBLK_DATA_LINES);

	INIT_LIST_HEAD(&l_mg->free_list);
	INIT_LIST_HEAD(&l_mg->corrupt_list);
	INIT_LIST_HEAD(&l_mg->bad_list);
	INIT_LIST_HEAD(&l_mg->gc_full_list);
	INIT_LIST_HEAD(&l_mg->gc_high_list);
	INIT_LIST_HEAD(&l_mg->gc_mid_list);
	INIT_LIST_HEAD(&l_mg->gc_low_list);
	INIT_LIST_HEAD(&l_mg->gc_empty_list);

	INIT_LIST_HEAD(&l_mg->emeta_list);

	l_mg->gc_lists[0] = &l_mg->gc_high_list;
	l_mg->gc_lists[1] = &l_mg->gc_mid_list;
	l_mg->gc_lists[2] = &l_mg->gc_low_list;

	spin_lock_init(&l_mg->free_lock);
	spin_lock_init(&l_mg->close_lock);
	spin_lock_init(&l_mg->gc_lock);

	l_mg->vsc_list = kcalloc(l_mg->nr_lines, sizeof(__le32), GFP_KERNEL);
	if (!l_mg->vsc_list)
		goto fail;

	l_mg->bb_template = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
	if (!l_mg->bb_template)
		goto fail_free_vsc_list;

	l_mg->bb_aux = kzalloc(lm->sec_bitmap_len, GFP_KERNEL);
	if (!l_mg->bb_aux)
		goto fail_free_bb_template;
704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751

	/* smeta is always small enough to fit on a kmalloc memory allocation,
	 * emeta depends on the number of LUNs allocated to the pblk instance
	 */
	for (i = 0; i < PBLK_DATA_LINES; i++) {
		l_mg->sline_meta[i] = kmalloc(lm->smeta_len, GFP_KERNEL);
		if (!l_mg->sline_meta[i])
			goto fail_free_smeta;
	}

	/* emeta allocates three different buffers for managing metadata with
	 * in-memory and in-media layouts
	 */
	for (i = 0; i < PBLK_DATA_LINES; i++) {
		struct pblk_emeta *emeta;

		emeta = kmalloc(sizeof(struct pblk_emeta), GFP_KERNEL);
		if (!emeta)
			goto fail_free_emeta;

		if (lm->emeta_len[0] > KMALLOC_MAX_CACHE_SIZE) {
			l_mg->emeta_alloc_type = PBLK_VMALLOC_META;

			emeta->buf = vmalloc(lm->emeta_len[0]);
			if (!emeta->buf) {
				kfree(emeta);
				goto fail_free_emeta;
			}

			emeta->nr_entries = lm->emeta_sec[0];
			l_mg->eline_meta[i] = emeta;
		} else {
			l_mg->emeta_alloc_type = PBLK_KMALLOC_META;

			emeta->buf = kmalloc(lm->emeta_len[0], GFP_KERNEL);
			if (!emeta->buf) {
				kfree(emeta);
				goto fail_free_emeta;
			}

			emeta->nr_entries = lm->emeta_sec[0];
			l_mg->eline_meta[i] = emeta;
		}
	}

	for (i = 0; i < l_mg->nr_lines; i++)
		l_mg->vsc_list[i] = cpu_to_le32(EMPTY_ENTRY);

752 753 754 755
	bb_distance = (geo->all_luns) * geo->ws_opt;
	for (i = 0; i < lm->sec_per_line; i += bb_distance)
		bitmap_set(l_mg->bb_template, i, geo->ws_opt);

756 757 758 759
	return 0;

fail_free_emeta:
	while (--i >= 0) {
760 761 762 763
		if (l_mg->emeta_alloc_type == PBLK_VMALLOC_META)
			vfree(l_mg->eline_meta[i]->buf);
		else
			kfree(l_mg->eline_meta[i]->buf);
764
		kfree(l_mg->eline_meta[i]);
765 766 767
	}
fail_free_smeta:
	for (i = 0; i < PBLK_DATA_LINES; i++)
768
		kfree(l_mg->sline_meta[i]);
769 770 771 772 773 774
	kfree(l_mg->bb_aux);
fail_free_bb_template:
	kfree(l_mg->bb_template);
fail_free_vsc_list:
	kfree(l_mg->vsc_list);
fail:
775 776 777
	return -ENOMEM;
}

778
static int pblk_line_meta_init(struct pblk *pblk)
779 780 781 782 783
{
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
	struct pblk_line_meta *lm = &pblk->lm;
	unsigned int smeta_len, emeta_len;
784
	int i;
785

786 787 788
	lm->sec_per_line = geo->sec_per_chk * geo->all_luns;
	lm->blk_per_line = geo->all_luns;
	lm->blk_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long);
789
	lm->sec_bitmap_len = BITS_TO_LONGS(lm->sec_per_line) * sizeof(long);
790
	lm->lun_bitmap_len = BITS_TO_LONGS(geo->all_luns) * sizeof(long);
791 792
	lm->mid_thrs = lm->sec_per_line / 2;
	lm->high_thrs = lm->sec_per_line / 4;
793
	lm->meta_distance = (geo->all_luns / 2) * pblk->min_write_pgs;
794 795 796 797 798 799 800 801 802

	/* Calculate necessary pages for smeta. See comment over struct
	 * line_smeta definition
	 */
	i = 1;
add_smeta_page:
	lm->smeta_sec = i * geo->sec_per_pl;
	lm->smeta_len = lm->smeta_sec * geo->sec_size;

803
	smeta_len = sizeof(struct line_smeta) + lm->lun_bitmap_len;
804 805 806 807 808 809 810 811 812 813
	if (smeta_len > lm->smeta_len) {
		i++;
		goto add_smeta_page;
	}

	/* Calculate necessary pages for emeta. See comment over struct
	 * line_emeta definition
	 */
	i = 1;
add_emeta_page:
814 815
	lm->emeta_sec[0] = i * geo->sec_per_pl;
	lm->emeta_len[0] = lm->emeta_sec[0] * geo->sec_size;
816

817 818
	emeta_len = calc_emeta_len(pblk);
	if (emeta_len > lm->emeta_len[0]) {
819 820 821 822
		i++;
		goto add_emeta_page;
	}

823
	lm->emeta_bb = geo->all_luns > i ? geo->all_luns - i : 0;
824 825

	lm->min_blk_line = 1;
826
	if (geo->all_luns > 1)
827
		lm->min_blk_line += DIV_ROUND_UP(lm->smeta_sec +
828
					lm->emeta_sec[0], geo->sec_per_chk);
829

830 831 832
	if (lm->min_blk_line > lm->blk_per_line) {
		pr_err("pblk: config. not supported. Min. LUN in line:%d\n",
							lm->blk_per_line);
833
		return -EINVAL;
834
	}
835

836 837 838 839 840 841 842 843 844 845 846 847 848
	return 0;
}

static int pblk_lines_init(struct pblk *pblk)
{
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_line_meta *lm = &pblk->lm;
	struct pblk_line *line;
	void *chunk_log;
	long nr_bad_blks = 0, nr_free_blks = 0;
	int i, ret;

	ret = pblk_line_meta_init(pblk);
849
	if (ret)
850
		return ret;
851

852 853 854 855 856 857
	ret = pblk_line_mg_init(pblk);
	if (ret)
		return ret;

	ret = pblk_luns_init(pblk);
	if (ret)
858 859
		goto fail_free_meta;

860 861 862 863 864 865
	chunk_log = pblk_bb_get_log(pblk);
	if (IS_ERR(chunk_log)) {
		pr_err("pblk: could not get bad block log (%lu)\n",
							PTR_ERR(chunk_log));
		ret = PTR_ERR(chunk_log);
		goto fail_free_luns;
866
	}
867 868 869

	pblk->lines = kcalloc(l_mg->nr_lines, sizeof(struct pblk_line),
								GFP_KERNEL);
870 871
	if (!pblk->lines) {
		ret = -ENOMEM;
872
		goto fail_free_chunk_log;
873 874
	}

875
	for (i = 0; i < l_mg->nr_lines; i++) {
876
		int chk_in_line;
877

878 879 880 881 882 883 884
		line = &pblk->lines[i];

		line->pblk = pblk;
		line->id = i;
		line->type = PBLK_LINETYPE_FREE;
		line->state = PBLK_LINESTATE_FREE;
		line->gc_group = PBLK_LINEGC_NONE;
885
		line->vsc = &l_mg->vsc_list[i];
886 887
		spin_lock_init(&line->lock);

888
		ret = pblk_setup_line_meta(pblk, line, chunk_log, &nr_bad_blks);
889
		if (ret)
890
			goto fail_free_lines;
891

892 893 894
		chk_in_line = lm->blk_per_line - nr_bad_blks;
		if (nr_bad_blks < 0 || nr_bad_blks > lm->blk_per_line ||
					chk_in_line < lm->min_blk_line) {
895 896 897 898 899
			line->state = PBLK_LINESTATE_BAD;
			list_add_tail(&line->list, &l_mg->bad_list);
			continue;
		}

900 901
		nr_free_blks += chk_in_line;
		atomic_set(&line->blk_in_line, chk_in_line);
902 903 904 905 906 907 908

		l_mg->nr_free_lines++;
		list_add_tail(&line->list, &l_mg->free_list);
	}

	pblk_set_provision(pblk, nr_free_blks);

909
	kfree(chunk_log);
910
	return 0;
911

912
fail_free_lines:
913
	while (--i >= 0)
914
		pblk_line_meta_free(&pblk->lines[i]);
915 916 917 918 919
	kfree(pblk->lines);
fail_free_chunk_log:
	kfree(chunk_log);
fail_free_luns:
	kfree(pblk->luns);
920
fail_free_meta:
921
	pblk_line_mg_free(pblk);
922 923 924 925 926 927 928 929

	return ret;
}

static int pblk_writer_init(struct pblk *pblk)
{
	pblk->writer_ts = kthread_create(pblk_write_ts, pblk, "pblk-writer-t");
	if (IS_ERR(pblk->writer_ts)) {
930 931 932 933 934 935
		int err = PTR_ERR(pblk->writer_ts);

		if (err != -EINTR)
			pr_err("pblk: could not allocate writer kthread (%d)\n",
					err);
		return err;
936 937
	}

938 939 940
	timer_setup(&pblk->wtimer, pblk_write_timer_fn, 0);
	mod_timer(&pblk->wtimer, jiffies + msecs_to_jiffies(100));

941 942 943 944 945
	return 0;
}

static void pblk_writer_stop(struct pblk *pblk)
{
946 947 948 949 950 951 952 953 954
	/* The pipeline must be stopped and the write buffer emptied before the
	 * write thread is stopped
	 */
	WARN(pblk_rb_read_count(&pblk->rwb),
			"Stopping not fully persisted write buffer\n");

	WARN(pblk_rb_sync_count(&pblk->rwb),
			"Stopping not fully synced write buffer\n");

955
	del_timer_sync(&pblk->wtimer);
956 957 958 959 960 961 962 963
	if (pblk->writer_ts)
		kthread_stop(pblk->writer_ts);
}

static void pblk_free(struct pblk *pblk)
{
	pblk_lines_free(pblk);
	pblk_l2p_free(pblk);
964 965
	pblk_rwb_free(pblk);
	pblk_core_free(pblk);
966 967 968 969 970 971

	kfree(pblk);
}

static void pblk_tear_down(struct pblk *pblk)
{
972
	pblk_pipeline_stop(pblk);
973 974 975 976 977 978 979 980 981 982 983 984 985 986
	pblk_writer_stop(pblk);
	pblk_rb_sync_l2p(&pblk->rwb);
	pblk_rl_free(&pblk->rl);

	pr_debug("pblk: consistent tear down\n");
}

static void pblk_exit(void *private)
{
	struct pblk *pblk = private;

	down_write(&pblk_lock);
	pblk_gc_exit(pblk);
	pblk_tear_down(pblk);
987 988 989 990 991

#ifdef CONFIG_NVM_DEBUG
	pr_info("pblk exit: L2P CRC: %x\n", pblk_l2p_crc(pblk));
#endif

992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
	pblk_free(pblk);
	up_write(&pblk_lock);
}

static sector_t pblk_capacity(void *private)
{
	struct pblk *pblk = private;

	return pblk->capacity * NR_PHY_IN_LOG;
}

static void *pblk_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
		       int flags)
{
	struct nvm_geo *geo = &dev->geo;
	struct request_queue *bqueue = dev->q;
	struct request_queue *tqueue = tdisk->queue;
	struct pblk *pblk;
	int ret;

	if (dev->identity.dom & NVM_RSP_L2P) {
1013
		pr_err("pblk: host-side L2P table not supported. (%x)\n",
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023
							dev->identity.dom);
		return ERR_PTR(-EINVAL);
	}

	pblk = kzalloc(sizeof(struct pblk), GFP_KERNEL);
	if (!pblk)
		return ERR_PTR(-ENOMEM);

	pblk->dev = dev;
	pblk->disk = tdisk;
1024
	pblk->state = PBLK_STATE_RUNNING;
1025
	pblk->gc.gc_enabled = 0;
1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037

	spin_lock_init(&pblk->trans_lock);
	spin_lock_init(&pblk->lock);

#ifdef CONFIG_NVM_DEBUG
	atomic_long_set(&pblk->inflight_writes, 0);
	atomic_long_set(&pblk->padded_writes, 0);
	atomic_long_set(&pblk->padded_wb, 0);
	atomic_long_set(&pblk->req_writes, 0);
	atomic_long_set(&pblk->sub_writes, 0);
	atomic_long_set(&pblk->sync_writes, 0);
	atomic_long_set(&pblk->inflight_reads, 0);
1038
	atomic_long_set(&pblk->cache_reads, 0);
1039 1040 1041 1042
	atomic_long_set(&pblk->sync_reads, 0);
	atomic_long_set(&pblk->recov_writes, 0);
	atomic_long_set(&pblk->recov_writes, 0);
	atomic_long_set(&pblk->recov_gc_writes, 0);
1043
	atomic_long_set(&pblk->recov_gc_reads, 0);
1044 1045 1046 1047 1048 1049 1050 1051 1052
#endif

	atomic_long_set(&pblk->read_failed, 0);
	atomic_long_set(&pblk->read_empty, 0);
	atomic_long_set(&pblk->read_high_ecc, 0);
	atomic_long_set(&pblk->read_failed_gc, 0);
	atomic_long_set(&pblk->write_failed, 0);
	atomic_long_set(&pblk->erase_failed, 0);

1053
	ret = pblk_core_init(pblk);
1054
	if (ret) {
1055
		pr_err("pblk: could not initialize core\n");
1056 1057 1058 1059 1060 1061
		goto fail;
	}

	ret = pblk_lines_init(pblk);
	if (ret) {
		pr_err("pblk: could not initialize lines\n");
1062
		goto fail_free_core;
1063 1064
	}

1065
	ret = pblk_rwb_init(pblk);
1066
	if (ret) {
1067 1068
		pr_err("pblk: could not initialize write buffer\n");
		goto fail_free_lines;
1069 1070
	}

1071
	ret = pblk_l2p_init(pblk, flags & NVM_TARGET_FACTORY);
1072 1073
	if (ret) {
		pr_err("pblk: could not initialize maps\n");
1074
		goto fail_free_rwb;
1075 1076 1077 1078
	}

	ret = pblk_writer_init(pblk);
	if (ret) {
1079 1080
		if (ret != -EINTR)
			pr_err("pblk: could not initialize write thread\n");
1081
		goto fail_free_l2p;
1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095
	}

	ret = pblk_gc_init(pblk);
	if (ret) {
		pr_err("pblk: could not initialize gc\n");
		goto fail_stop_writer;
	}

	/* inherit the size from the underlying device */
	blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
	blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));

	blk_queue_write_cache(tqueue, true, false);

1096
	tqueue->limits.discard_granularity = geo->sec_per_chk * geo->sec_size;
1097 1098
	tqueue->limits.discard_alignment = 0;
	blk_queue_max_discard_sectors(tqueue, UINT_MAX >> 9);
1099
	blk_queue_flag_set(QUEUE_FLAG_DISCARD, tqueue);
1100

1101 1102
	pr_info("pblk(%s): luns:%u, lines:%d, secs:%llu, buf entries:%u\n",
			tdisk->disk_name,
1103
			geo->all_luns, pblk->l_mg.nr_lines,
1104 1105 1106 1107
			(unsigned long long)pblk->rl.nr_secs,
			pblk->rwb.nr_entries);

	wake_up_process(pblk->writer_ts);
1108 1109 1110 1111

	/* Check if we need to start GC */
	pblk_gc_should_kick(pblk);

1112 1113 1114 1115 1116 1117
	return pblk;

fail_stop_writer:
	pblk_writer_stop(pblk);
fail_free_l2p:
	pblk_l2p_free(pblk);
1118 1119 1120 1121
fail_free_rwb:
	pblk_rwb_free(pblk);
fail_free_lines:
	pblk_lines_free(pblk);
1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141
fail_free_core:
	pblk_core_free(pblk);
fail:
	kfree(pblk);
	return ERR_PTR(ret);
}

/* physical block device target */
static struct nvm_tgt_type tt_pblk = {
	.name		= "pblk",
	.version	= {1, 0, 0},

	.make_rq	= pblk_make_rq,
	.capacity	= pblk_capacity,

	.init		= pblk_init,
	.exit		= pblk_exit,

	.sysfs_init	= pblk_sysfs_init,
	.sysfs_exit	= pblk_sysfs_exit,
1142
	.owner		= THIS_MODULE,
1143 1144 1145 1146
};

static int __init pblk_module_init(void)
{
1147 1148 1149 1150 1151 1152 1153 1154 1155
	int ret;

	pblk_bio_set = bioset_create(BIO_POOL_SIZE, 0, 0);
	if (!pblk_bio_set)
		return -ENOMEM;
	ret = nvm_register_tgt_type(&tt_pblk);
	if (ret)
		bioset_free(pblk_bio_set);
	return ret;
1156 1157 1158 1159
}

static void pblk_module_exit(void)
{
1160
	bioset_free(pblk_bio_set);
1161 1162 1163 1164 1165 1166 1167 1168 1169
	nvm_unregister_tgt_type(&tt_pblk);
}

module_init(pblk_module_init);
module_exit(pblk_module_exit);
MODULE_AUTHOR("Javier Gonzalez <javier@cnexlabs.com>");
MODULE_AUTHOR("Matias Bjorling <matias@cnexlabs.com>");
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Physical Block-Device for Open-Channel SSDs");