rrpc.c 37.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * Copyright (C) 2015 IT University of Copenhagen
 * Initial release: Matias Bjorling <m@bjorling.me>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version
 * 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * Implementation of a Round-robin page-based Hybrid FTL for Open-channel SSDs.
 */

#include "rrpc.h"

static struct kmem_cache *rrpc_gcb_cache, *rrpc_rq_cache;
static DECLARE_RWSEM(rrpc_lock);

static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
				struct nvm_rq *rqd, unsigned long flags);

#define rrpc_for_each_lun(rrpc, rlun, i) \
		for ((i) = 0, rlun = &(rrpc)->luns[0]; \
			(i) < (rrpc)->nr_luns; (i)++, rlun = &(rrpc)->luns[(i)])

static void rrpc_page_invalidate(struct rrpc *rrpc, struct rrpc_addr *a)
{
31
	struct nvm_tgt_dev *dev = rrpc->dev;
32 33 34 35 36 37 38 39 40 41
	struct rrpc_block *rblk = a->rblk;
	unsigned int pg_offset;

	lockdep_assert_held(&rrpc->rev_lock);

	if (a->addr == ADDR_EMPTY || !rblk)
		return;

	spin_lock(&rblk->lock);

42
	div_u64_rem(a->addr, dev->geo.sec_per_blk, &pg_offset);
43 44 45 46 47
	WARN_ON(test_and_set_bit(pg_offset, rblk->invalid_pages));
	rblk->nr_invalid_pages++;

	spin_unlock(&rblk->lock);

48
	rrpc->rev_trans_map[a->addr].addr = ADDR_EMPTY;
49 50 51
}

static void rrpc_invalidate_range(struct rrpc *rrpc, sector_t slba,
52
							unsigned int len)
53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99
{
	sector_t i;

	spin_lock(&rrpc->rev_lock);
	for (i = slba; i < slba + len; i++) {
		struct rrpc_addr *gp = &rrpc->trans_map[i];

		rrpc_page_invalidate(rrpc, gp);
		gp->rblk = NULL;
	}
	spin_unlock(&rrpc->rev_lock);
}

static struct nvm_rq *rrpc_inflight_laddr_acquire(struct rrpc *rrpc,
					sector_t laddr, unsigned int pages)
{
	struct nvm_rq *rqd;
	struct rrpc_inflight_rq *inf;

	rqd = mempool_alloc(rrpc->rq_pool, GFP_ATOMIC);
	if (!rqd)
		return ERR_PTR(-ENOMEM);

	inf = rrpc_get_inflight_rq(rqd);
	if (rrpc_lock_laddr(rrpc, laddr, pages, inf)) {
		mempool_free(rqd, rrpc->rq_pool);
		return NULL;
	}

	return rqd;
}

static void rrpc_inflight_laddr_release(struct rrpc *rrpc, struct nvm_rq *rqd)
{
	struct rrpc_inflight_rq *inf = rrpc_get_inflight_rq(rqd);

	rrpc_unlock_laddr(rrpc, inf);

	mempool_free(rqd, rrpc->rq_pool);
}

static void rrpc_discard(struct rrpc *rrpc, struct bio *bio)
{
	sector_t slba = bio->bi_iter.bi_sector / NR_PHY_IN_LOG;
	sector_t len = bio->bi_iter.bi_size / RRPC_EXPOSED_PAGE_SIZE;
	struct nvm_rq *rqd;

100
	while (1) {
101
		rqd = rrpc_inflight_laddr_acquire(rrpc, slba, len);
102 103 104
		if (rqd)
			break;

105
		schedule();
106
	}
107 108 109 110 111 112 113 114 115 116 117 118 119

	if (IS_ERR(rqd)) {
		pr_err("rrpc: unable to acquire inflight IO\n");
		bio_io_error(bio);
		return;
	}

	rrpc_invalidate_range(rrpc, slba, len);
	rrpc_inflight_laddr_release(rrpc, rqd);
}

static int block_is_full(struct rrpc *rrpc, struct rrpc_block *rblk)
{
120 121 122
	struct nvm_tgt_dev *dev = rrpc->dev;

	return (rblk->next_page == dev->geo.sec_per_blk);
123 124
}

125 126 127
/* Calculate relative addr for the given block, considering instantiated LUNs */
static u64 block_to_rel_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
{
128
	struct nvm_tgt_dev *dev = rrpc->dev;
129
	struct rrpc_lun *rlun = rblk->rlun;
130

131
	return rlun->id * dev->geo.sec_per_blk;
132 133
}

134 135
static struct ppa_addr rrpc_ppa_to_gaddr(struct nvm_tgt_dev *dev,
					 struct rrpc_addr *gp)
136
{
137
	struct rrpc_block *rblk = gp->rblk;
138
	struct rrpc_lun *rlun = rblk->rlun;
139
	u64 addr = gp->addr;
140 141 142
	struct ppa_addr paddr;

	paddr.ppa = addr;
143 144 145 146 147 148
	paddr = rrpc_linear_to_generic_addr(&dev->geo, paddr);
	paddr.g.ch = rlun->bppa.g.ch;
	paddr.g.lun = rlun->bppa.g.lun;
	paddr.g.blk = rblk->id;

	return paddr;
149 150 151
}

/* requires lun->lock taken */
152 153
static void rrpc_set_lun_cur(struct rrpc_lun *rlun, struct rrpc_block *new_rblk,
						struct rrpc_block **cur_rblk)
154 155 156
{
	struct rrpc *rrpc = rlun->rrpc;

157 158 159 160
	if (*cur_rblk) {
		spin_lock(&(*cur_rblk)->lock);
		WARN_ON(!block_is_full(rrpc, *cur_rblk));
		spin_unlock(&(*cur_rblk)->lock);
161
	}
162
	*cur_rblk = new_rblk;
163 164
}

165
static struct rrpc_block *__rrpc_get_blk(struct rrpc *rrpc,
166 167
							struct rrpc_lun *rlun)
{
168
	struct rrpc_block *rblk = NULL;
169

170
	if (list_empty(&rlun->free_list))
171 172
		goto out;

173
	rblk = list_first_entry(&rlun->free_list, struct rrpc_block, list);
174

175 176 177
	list_move_tail(&rblk->list, &rlun->used_list);
	rblk->state = NVM_BLK_ST_TGT;
	rlun->nr_free_blocks--;
178 179

out:
180
	return rblk;
181 182
}

183 184 185
static struct rrpc_block *rrpc_get_blk(struct rrpc *rrpc, struct rrpc_lun *rlun,
							unsigned long flags)
{
186
	struct nvm_tgt_dev *dev = rrpc->dev;
187
	struct rrpc_block *rblk;
188 189 190
	int is_gc = flags & NVM_IOTYPE_GC;

	spin_lock(&rlun->lock);
191
	if (!is_gc && rlun->nr_free_blocks < rlun->reserved_blocks) {
192 193 194 195
		pr_err("nvm: rrpc: cannot give block to non GC request\n");
		spin_unlock(&rlun->lock);
		return NULL;
	}
196

197 198
	rblk = __rrpc_get_blk(rrpc, rlun);
	if (!rblk) {
199 200
		pr_err("nvm: rrpc: cannot get new block\n");
		spin_unlock(&rlun->lock);
201
		return NULL;
202
	}
203
	spin_unlock(&rlun->lock);
204

205
	bitmap_zero(rblk->invalid_pages, dev->geo.sec_per_blk);
206 207 208 209 210 211 212 213 214
	rblk->next_page = 0;
	rblk->nr_invalid_pages = 0;
	atomic_set(&rblk->data_cmnt_size, 0);

	return rblk;
}

static void rrpc_put_blk(struct rrpc *rrpc, struct rrpc_block *rblk)
{
215 216 217
	struct rrpc_lun *rlun = rblk->rlun;

	spin_lock(&rlun->lock);
218 219 220 221 222 223 224
	if (rblk->state & NVM_BLK_ST_TGT) {
		list_move_tail(&rblk->list, &rlun->free_list);
		rlun->nr_free_blocks++;
		rblk->state = NVM_BLK_ST_FREE;
	} else if (rblk->state & NVM_BLK_ST_BAD) {
		list_move_tail(&rblk->list, &rlun->bb_list);
		rblk->state = NVM_BLK_ST_BAD;
225 226
	} else {
		WARN_ON_ONCE(1);
227
		pr_err("rrpc: erroneous type (ch:%d,lun:%d,blk%d-> %u)\n",
228 229
					rlun->bppa.g.ch, rlun->bppa.g.lun,
					rblk->id, rblk->state);
230
		list_move_tail(&rblk->list, &rlun->bb_list);
231 232
	}
	spin_unlock(&rlun->lock);
233 234
}

235 236 237 238 239 240 241 242 243 244 245 246 247 248
static void rrpc_put_blks(struct rrpc *rrpc)
{
	struct rrpc_lun *rlun;
	int i;

	for (i = 0; i < rrpc->nr_luns; i++) {
		rlun = &rrpc->luns[i];
		if (rlun->cur)
			rrpc_put_blk(rrpc, rlun->cur);
		if (rlun->gc_cur)
			rrpc_put_blk(rrpc, rlun->gc_cur);
	}
}

249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
static struct rrpc_lun *get_next_lun(struct rrpc *rrpc)
{
	int next = atomic_inc_return(&rrpc->next_lun);

	return &rrpc->luns[next % rrpc->nr_luns];
}

static void rrpc_gc_kick(struct rrpc *rrpc)
{
	struct rrpc_lun *rlun;
	unsigned int i;

	for (i = 0; i < rrpc->nr_luns; i++) {
		rlun = &rrpc->luns[i];
		queue_work(rrpc->krqd_wq, &rlun->ws_gc);
	}
}

/*
 * timed GC every interval.
 */
270
static void rrpc_gc_timer(struct timer_list *t)
271
{
272
	struct rrpc *rrpc = from_timer(rrpc, t, gc_timer);
273 274 275 276 277 278 279 280 281

	rrpc_gc_kick(rrpc);
	mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10));
}

static void rrpc_end_sync_bio(struct bio *bio)
{
	struct completion *waiting = bio->bi_private;

282 283
	if (bio->bi_status)
		pr_err("nvm: gc request failed (%u).\n", bio->bi_status);
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299

	complete(waiting);
}

/*
 * rrpc_move_valid_pages -- migrate live data off the block
 * @rrpc: the 'rrpc' structure
 * @block: the block from which to migrate live pages
 *
 * Description:
 *   GC algorithms may call this function to migrate remaining live
 *   pages off the block prior to erasing it. This function blocks
 *   further execution until the operation is complete.
 */
static int rrpc_move_valid_pages(struct rrpc *rrpc, struct rrpc_block *rblk)
{
300 301
	struct nvm_tgt_dev *dev = rrpc->dev;
	struct request_queue *q = dev->q;
302 303 304 305 306
	struct rrpc_rev_addr *rev;
	struct nvm_rq *rqd;
	struct bio *bio;
	struct page *page;
	int slot;
307
	int nr_sec_per_blk = dev->geo.sec_per_blk;
308
	u64 phys_addr;
309 310
	DECLARE_COMPLETION_ONSTACK(wait);

311
	if (bitmap_full(rblk->invalid_pages, nr_sec_per_blk))
312 313 314 315 316 317 318 319 320 321 322
		return 0;

	bio = bio_alloc(GFP_NOIO, 1);
	if (!bio) {
		pr_err("nvm: could not alloc bio to gc\n");
		return -ENOMEM;
	}

	page = mempool_alloc(rrpc->page_pool, GFP_NOIO);

	while ((slot = find_first_zero_bit(rblk->invalid_pages,
323
					    nr_sec_per_blk)) < nr_sec_per_blk) {
324 325

		/* Lock laddr */
326
		phys_addr = rrpc_blk_to_ppa(rrpc, rblk) + slot;
327 328 329 330

try:
		spin_lock(&rrpc->rev_lock);
		/* Get logical address from physical to logical table */
331
		rev = &rrpc->rev_trans_map[phys_addr];
332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
		/* already updated by previous regular write */
		if (rev->addr == ADDR_EMPTY) {
			spin_unlock(&rrpc->rev_lock);
			continue;
		}

		rqd = rrpc_inflight_laddr_acquire(rrpc, rev->addr, 1);
		if (IS_ERR_OR_NULL(rqd)) {
			spin_unlock(&rrpc->rev_lock);
			schedule();
			goto try;
		}

		spin_unlock(&rrpc->rev_lock);

		/* Perform read to do GC */
		bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
349
		bio_set_op_attrs(bio,  REQ_OP_READ, 0);
350 351 352 353 354 355 356 357 358 359 360 361
		bio->bi_private = &wait;
		bio->bi_end_io = rrpc_end_sync_bio;

		/* TODO: may fail when EXP_PG_SIZE > PAGE_SIZE */
		bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);

		if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) {
			pr_err("rrpc: gc read failed.\n");
			rrpc_inflight_laddr_release(rrpc, rqd);
			goto finished;
		}
		wait_for_completion_io(&wait);
362
		if (bio->bi_status) {
W
Wenwei Tao 已提交
363 364 365
			rrpc_inflight_laddr_release(rrpc, rqd);
			goto finished;
		}
366 367 368 369 370

		bio_reset(bio);
		reinit_completion(&wait);

		bio->bi_iter.bi_sector = rrpc_get_sector(rev->addr);
371
		bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
		bio->bi_private = &wait;
		bio->bi_end_io = rrpc_end_sync_bio;

		bio_add_pc_page(q, bio, page, RRPC_EXPOSED_PAGE_SIZE, 0);

		/* turn the command around and write the data back to a new
		 * address
		 */
		if (rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_GC)) {
			pr_err("rrpc: gc write failed.\n");
			rrpc_inflight_laddr_release(rrpc, rqd);
			goto finished;
		}
		wait_for_completion_io(&wait);

		rrpc_inflight_laddr_release(rrpc, rqd);
388
		if (bio->bi_status)
W
Wenwei Tao 已提交
389
			goto finished;
390 391 392 393 394 395 396 397

		bio_reset(bio);
	}

finished:
	mempool_free(page, rrpc->page_pool);
	bio_put(bio);

398
	if (!bitmap_full(rblk->invalid_pages, nr_sec_per_blk)) {
399 400 401 402 403 404 405 406 407 408 409 410 411
		pr_err("nvm: failed to garbage collect block\n");
		return -EIO;
	}

	return 0;
}

static void rrpc_block_gc(struct work_struct *work)
{
	struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
									ws_gc);
	struct rrpc *rrpc = gcb->rrpc;
	struct rrpc_block *rblk = gcb->rblk;
412
	struct rrpc_lun *rlun = rblk->rlun;
413
	struct ppa_addr ppa;
414

415
	mempool_free(gcb, rrpc->gcb_pool);
416
	pr_debug("nvm: block 'ch:%d,lun:%d,blk:%d' being reclaimed\n",
417
			rlun->bppa.g.ch, rlun->bppa.g.lun,
418
			rblk->id);
419 420

	if (rrpc_move_valid_pages(rrpc, rblk))
421 422
		goto put_back;

423
	ppa.ppa = 0;
424 425
	ppa.g.ch = rlun->bppa.g.ch;
	ppa.g.lun = rlun->bppa.g.lun;
426 427
	ppa.g.blk = rblk->id;

428
	if (nvm_erase_sync(rrpc->dev, &ppa, 1))
429
		goto put_back;
430 431

	rrpc_put_blk(rrpc, rblk);
432 433 434 435 436 437 438

	return;

put_back:
	spin_lock(&rlun->lock);
	list_add_tail(&rblk->prio, &rlun->prio_list);
	spin_unlock(&rlun->lock);
439 440 441 442 443
}

/* the block with highest number of invalid pages, will be in the beginning
 * of the list
 */
444
static struct rrpc_block *rblk_max_invalid(struct rrpc_block *ra,
445 446 447 448 449 450 451 452 453 454 455 456 457 458
							struct rrpc_block *rb)
{
	if (ra->nr_invalid_pages == rb->nr_invalid_pages)
		return ra;

	return (ra->nr_invalid_pages < rb->nr_invalid_pages) ? rb : ra;
}

/* linearly find the block with highest number of invalid pages
 * requires lun->lock
 */
static struct rrpc_block *block_prio_find_max(struct rrpc_lun *rlun)
{
	struct list_head *prio_list = &rlun->prio_list;
459
	struct rrpc_block *rblk, *max;
460 461 462 463

	BUG_ON(list_empty(prio_list));

	max = list_first_entry(prio_list, struct rrpc_block, prio);
464 465
	list_for_each_entry(rblk, prio_list, prio)
		max = rblk_max_invalid(max, rblk);
466 467 468 469 470 471 472 473

	return max;
}

static void rrpc_lun_gc(struct work_struct *work)
{
	struct rrpc_lun *rlun = container_of(work, struct rrpc_lun, ws_gc);
	struct rrpc *rrpc = rlun->rrpc;
474
	struct nvm_tgt_dev *dev = rrpc->dev;
475 476 477
	struct rrpc_block_gc *gcb;
	unsigned int nr_blocks_need;

478
	nr_blocks_need = dev->geo.blks_per_lun / GC_LIMIT_INVERSE;
479 480 481 482

	if (nr_blocks_need < rrpc->nr_luns)
		nr_blocks_need = rrpc->nr_luns;

483
	spin_lock(&rlun->lock);
484
	while (nr_blocks_need > rlun->nr_free_blocks &&
485
					!list_empty(&rlun->prio_list)) {
486
		struct rrpc_block *rblk = block_prio_find_max(rlun);
487

488
		if (!rblk->nr_invalid_pages)
489 490
			break;

491 492 493 494
		gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
		if (!gcb)
			break;

495
		list_del_init(&rblk->prio);
496

497
		WARN_ON(!block_is_full(rrpc, rblk));
498

499
		pr_debug("rrpc: selected block 'ch:%d,lun:%d,blk:%d' for GC\n",
500
					rlun->bppa.g.ch, rlun->bppa.g.lun,
501
					rblk->id);
502 503

		gcb->rrpc = rrpc;
504
		gcb->rblk = rblk;
505 506 507 508 509 510
		INIT_WORK(&gcb->ws_gc, rrpc_block_gc);

		queue_work(rrpc->kgc_wq, &gcb->ws_gc);

		nr_blocks_need--;
	}
511
	spin_unlock(&rlun->lock);
512 513 514 515 516 517 518 519 520 521

	/* TODO: Hint that request queue can be started again */
}

static void rrpc_gc_queue(struct work_struct *work)
{
	struct rrpc_block_gc *gcb = container_of(work, struct rrpc_block_gc,
									ws_gc);
	struct rrpc *rrpc = gcb->rrpc;
	struct rrpc_block *rblk = gcb->rblk;
522
	struct rrpc_lun *rlun = rblk->rlun;
523 524 525 526 527 528

	spin_lock(&rlun->lock);
	list_add_tail(&rblk->prio, &rlun->prio_list);
	spin_unlock(&rlun->lock);

	mempool_free(gcb, rrpc->gcb_pool);
529
	pr_debug("nvm: block 'ch:%d,lun:%d,blk:%d' full, allow GC (sched)\n",
530
					rlun->bppa.g.ch, rlun->bppa.g.lun,
531
					rblk->id);
532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
}

static const struct block_device_operations rrpc_fops = {
	.owner		= THIS_MODULE,
};

static struct rrpc_lun *rrpc_get_lun_rr(struct rrpc *rrpc, int is_gc)
{
	unsigned int i;
	struct rrpc_lun *rlun, *max_free;

	if (!is_gc)
		return get_next_lun(rrpc);

	/* during GC, we don't care about RR, instead we want to make
	 * sure that we maintain evenness between the block luns.
	 */
	max_free = &rrpc->luns[0];
	/* prevent GC-ing lun from devouring pages of a lun with
	 * little free blocks. We don't take the lock as we only need an
	 * estimate.
	 */
	rrpc_for_each_lun(rrpc, rlun, i) {
555
		if (rlun->nr_free_blocks > max_free->nr_free_blocks)
556 557 558 559 560 561 562
			max_free = rlun;
	}

	return max_free;
}

static struct rrpc_addr *rrpc_update_map(struct rrpc *rrpc, sector_t laddr,
563
					struct rrpc_block *rblk, u64 paddr)
564 565 566 567
{
	struct rrpc_addr *gp;
	struct rrpc_rev_addr *rev;

568
	BUG_ON(laddr >= rrpc->nr_sects);
569 570 571 572 573 574 575 576 577

	gp = &rrpc->trans_map[laddr];
	spin_lock(&rrpc->rev_lock);
	if (gp->rblk)
		rrpc_page_invalidate(rrpc, gp);

	gp->addr = paddr;
	gp->rblk = rblk;

578
	rev = &rrpc->rev_trans_map[gp->addr];
579 580 581 582 583 584
	rev->addr = laddr;
	spin_unlock(&rrpc->rev_lock);

	return gp;
}

585
static u64 rrpc_alloc_addr(struct rrpc *rrpc, struct rrpc_block *rblk)
586
{
587
	u64 addr = ADDR_EMPTY;
588 589 590 591 592

	spin_lock(&rblk->lock);
	if (block_is_full(rrpc, rblk))
		goto out;

593
	addr = rblk->next_page;
594 595 596 597 598 599 600

	rblk->next_page++;
out:
	spin_unlock(&rblk->lock);
	return addr;
}

601 602
/* Map logical address to a physical page. The mapping implements a round robin
 * approach and allocates a page from the next lun available.
603
 *
604 605
 * Returns rrpc_addr with the physical address and block. Returns NULL if no
 * blocks in the next rlun are available.
606
 */
607
static struct ppa_addr rrpc_map_page(struct rrpc *rrpc, sector_t laddr,
608 609
								int is_gc)
{
610
	struct nvm_tgt_dev *tgt_dev = rrpc->dev;
611
	struct rrpc_lun *rlun;
612
	struct rrpc_block *rblk, **cur_rblk;
613 614
	struct rrpc_addr *p;
	struct ppa_addr ppa;
615
	u64 paddr;
616
	int gc_force = 0;
617

618
	ppa.ppa = ADDR_EMPTY;
619 620
	rlun = rrpc_get_lun_rr(rrpc, is_gc);

621
	if (!is_gc && rlun->nr_free_blocks < rrpc->nr_luns * 4)
622
		return ppa;
623

624 625 626 627 628 629 630 631 632 633 634 635 636
	/*
	 * page allocation steps:
	 * 1. Try to allocate new page from current rblk
	 * 2a. If succeed, proceed to map it in and return
	 * 2b. If fail, first try to allocate a new block from media manger,
	 *     and then retry step 1. Retry until the normal block pool is
	 *     exhausted.
	 * 3. If exhausted, and garbage collector is requesting the block,
	 *    go to the reserved block and retry step 1.
	 *    In the case that this fails as well, or it is not GC
	 *    requesting, report not able to retrieve a block and let the
	 *    caller handle further processing.
	 */
637

638 639
	spin_lock(&rlun->lock);
	cur_rblk = &rlun->cur;
640 641 642 643
	rblk = rlun->cur;
retry:
	paddr = rrpc_alloc_addr(rrpc, rblk);

644 645
	if (paddr != ADDR_EMPTY)
		goto done;
646

647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
	if (!list_empty(&rlun->wblk_list)) {
new_blk:
		rblk = list_first_entry(&rlun->wblk_list, struct rrpc_block,
									prio);
		rrpc_set_lun_cur(rlun, rblk, cur_rblk);
		list_del(&rblk->prio);
		goto retry;
	}
	spin_unlock(&rlun->lock);

	rblk = rrpc_get_blk(rrpc, rlun, gc_force);
	if (rblk) {
		spin_lock(&rlun->lock);
		list_add_tail(&rblk->prio, &rlun->wblk_list);
		/*
		 * another thread might already have added a new block,
		 * Therefore, make sure that one is used, instead of the
		 * one just added.
		 */
		goto new_blk;
	}

	if (unlikely(is_gc) && !gc_force) {
		/* retry from emergency gc block */
		cur_rblk = &rlun->gc_cur;
		rblk = rlun->gc_cur;
		gc_force = 1;
		spin_lock(&rlun->lock);
		goto retry;
676 677
	}

678
	pr_err("rrpc: failed to allocate new block\n");
679
	return ppa;
680
done:
681
	spin_unlock(&rlun->lock);
682 683 684 685 686 687
	p = rrpc_update_map(rrpc, laddr, rblk, paddr);
	if (!p)
		return ppa;

	/* return global address */
	return rrpc_ppa_to_gaddr(tgt_dev, p);
688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706
}

static void rrpc_run_gc(struct rrpc *rrpc, struct rrpc_block *rblk)
{
	struct rrpc_block_gc *gcb;

	gcb = mempool_alloc(rrpc->gcb_pool, GFP_ATOMIC);
	if (!gcb) {
		pr_err("rrpc: unable to queue block for gc.");
		return;
	}

	gcb->rrpc = rrpc;
	gcb->rblk = rblk;

	INIT_WORK(&gcb->ws_gc, rrpc_gc_queue);
	queue_work(rrpc->kgc_wq, &gcb->ws_gc);
}

707
static struct rrpc_lun *rrpc_ppa_to_lun(struct rrpc *rrpc, struct ppa_addr p)
708
{
709 710 711 712
	struct rrpc_lun *rlun = NULL;
	int i;

	for (i = 0; i < rrpc->nr_luns; i++) {
713 714
		if (rrpc->luns[i].bppa.g.ch == p.g.ch &&
				rrpc->luns[i].bppa.g.lun == p.g.lun) {
715 716 717 718 719 720
			rlun = &rrpc->luns[i];
			break;
		}
	}

	return rlun;
721 722
}

723
static void __rrpc_mark_bad_block(struct rrpc *rrpc, struct ppa_addr ppa)
724
{
725
	struct nvm_tgt_dev *dev = rrpc->dev;
726 727 728 729 730 731 732
	struct rrpc_lun *rlun;
	struct rrpc_block *rblk;

	rlun = rrpc_ppa_to_lun(rrpc, ppa);
	rblk = &rlun->blocks[ppa.g.blk];
	rblk->state = NVM_BLK_ST_BAD;

733
	nvm_set_tgt_bb_tbl(dev, &ppa, 1, NVM_BLK_T_GRWN_BAD);
734 735 736 737
}

static void rrpc_mark_bad_block(struct rrpc *rrpc, struct nvm_rq *rqd)
{
738 739 740 741 742 743
	void *comp_bits = &rqd->ppa_status;
	struct ppa_addr ppa, prev_ppa;
	int nr_ppas = rqd->nr_ppas;
	int bit;

	if (rqd->nr_ppas == 1)
744
		__rrpc_mark_bad_block(rrpc, rqd->ppa_addr);
745 746 747 748 749 750 751 752

	ppa_set_empty(&prev_ppa);
	bit = -1;
	while ((bit = find_next_bit(comp_bits, nr_ppas, bit + 1)) < nr_ppas) {
		ppa = rqd->ppa_list[bit];
		if (ppa_cmp_blk(ppa, prev_ppa))
			continue;

753
		__rrpc_mark_bad_block(rrpc, ppa);
754 755 756
	}
}

757 758 759
static void rrpc_end_io_write(struct rrpc *rrpc, struct rrpc_rq *rrqd,
						sector_t laddr, uint8_t npages)
{
760
	struct nvm_tgt_dev *dev = rrpc->dev;
761 762 763 764 765 766 767 768 769
	struct rrpc_addr *p;
	struct rrpc_block *rblk;
	int cmnt_size, i;

	for (i = 0; i < npages; i++) {
		p = &rrpc->trans_map[laddr + i];
		rblk = p->rblk;

		cmnt_size = atomic_inc_return(&rblk->data_cmnt_size);
770
		if (unlikely(cmnt_size == dev->geo.sec_per_blk))
771 772 773 774
			rrpc_run_gc(rrpc, rblk);
	}
}

775
static void rrpc_end_io(struct nvm_rq *rqd)
776
{
777
	struct rrpc *rrpc = rqd->private;
778
	struct nvm_tgt_dev *dev = rrpc->dev;
779
	struct rrpc_rq *rrqd = nvm_rq_to_pdu(rqd);
780
	uint8_t npages = rqd->nr_ppas;
781 782
	sector_t laddr = rrpc_get_laddr(rqd->bio) - npages;

783 784 785 786
	if (bio_data_dir(rqd->bio) == WRITE) {
		if (rqd->error == NVM_RSP_ERR_FAILWRITE)
			rrpc_mark_bad_block(rrpc, rqd);

787
		rrpc_end_io_write(rrpc, rrqd, laddr, npages);
788
	}
789

W
Wenwei Tao 已提交
790 791
	bio_put(rqd->bio);

792
	if (rrqd->flags & NVM_IOTYPE_GC)
793
		return;
794 795 796 797

	rrpc_unlock_rq(rrpc, rqd);

	if (npages > 1)
798
		nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
799 800 801 802 803 804 805

	mempool_free(rqd, rrpc->rq_pool);
}

static int rrpc_read_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
			struct nvm_rq *rqd, unsigned long flags, int npages)
{
806
	struct nvm_tgt_dev *dev = rrpc->dev;
807 808 809 810 811 812 813
	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
	struct rrpc_addr *gp;
	sector_t laddr = rrpc_get_laddr(bio);
	int is_gc = flags & NVM_IOTYPE_GC;
	int i;

	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) {
814
		nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
815 816 817 818 819
		return NVM_IO_REQUEUE;
	}

	for (i = 0; i < npages; i++) {
		/* We assume that mapping occurs at 4KB granularity */
820
		BUG_ON(!(laddr + i < rrpc->nr_sects));
821 822 823
		gp = &rrpc->trans_map[laddr + i];

		if (gp->rblk) {
824
			rqd->ppa_list[i] = rrpc_ppa_to_gaddr(dev, gp);
825 826 827
		} else {
			BUG_ON(is_gc);
			rrpc_unlock_laddr(rrpc, r);
828
			nvm_dev_dma_free(dev->parent, rqd->ppa_list,
829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848
							rqd->dma_ppa_list);
			return NVM_IO_DONE;
		}
	}

	rqd->opcode = NVM_OP_HBREAD;

	return NVM_IO_OK;
}

static int rrpc_read_rq(struct rrpc *rrpc, struct bio *bio, struct nvm_rq *rqd,
							unsigned long flags)
{
	int is_gc = flags & NVM_IOTYPE_GC;
	sector_t laddr = rrpc_get_laddr(bio);
	struct rrpc_addr *gp;

	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
		return NVM_IO_REQUEUE;

849
	BUG_ON(!(laddr < rrpc->nr_sects));
850 851 852
	gp = &rrpc->trans_map[laddr];

	if (gp->rblk) {
853
		rqd->ppa_addr = rrpc_ppa_to_gaddr(rrpc->dev, gp);
854 855 856 857 858 859 860 861 862 863 864 865 866 867
	} else {
		BUG_ON(is_gc);
		rrpc_unlock_rq(rrpc, rqd);
		return NVM_IO_DONE;
	}

	rqd->opcode = NVM_OP_HBREAD;

	return NVM_IO_OK;
}

static int rrpc_write_ppalist_rq(struct rrpc *rrpc, struct bio *bio,
			struct nvm_rq *rqd, unsigned long flags, int npages)
{
868
	struct nvm_tgt_dev *dev = rrpc->dev;
869
	struct rrpc_inflight_rq *r = rrpc_get_inflight_rq(rqd);
870
	struct ppa_addr p;
871 872 873 874 875
	sector_t laddr = rrpc_get_laddr(bio);
	int is_gc = flags & NVM_IOTYPE_GC;
	int i;

	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd)) {
876
		nvm_dev_dma_free(dev->parent, rqd->ppa_list, rqd->dma_ppa_list);
877 878 879 880 881 882
		return NVM_IO_REQUEUE;
	}

	for (i = 0; i < npages; i++) {
		/* We assume that mapping occurs at 4KB granularity */
		p = rrpc_map_page(rrpc, laddr + i, is_gc);
883
		if (p.ppa == ADDR_EMPTY) {
884 885
			BUG_ON(is_gc);
			rrpc_unlock_laddr(rrpc, r);
886
			nvm_dev_dma_free(dev->parent, rqd->ppa_list,
887 888 889 890 891
							rqd->dma_ppa_list);
			rrpc_gc_kick(rrpc);
			return NVM_IO_REQUEUE;
		}

892
		rqd->ppa_list[i] = p;
893 894 895 896 897 898 899 900 901 902
	}

	rqd->opcode = NVM_OP_HBWRITE;

	return NVM_IO_OK;
}

static int rrpc_write_rq(struct rrpc *rrpc, struct bio *bio,
				struct nvm_rq *rqd, unsigned long flags)
{
903
	struct ppa_addr p;
904 905 906 907 908 909 910
	int is_gc = flags & NVM_IOTYPE_GC;
	sector_t laddr = rrpc_get_laddr(bio);

	if (!is_gc && rrpc_lock_rq(rrpc, bio, rqd))
		return NVM_IO_REQUEUE;

	p = rrpc_map_page(rrpc, laddr, is_gc);
911
	if (p.ppa == ADDR_EMPTY) {
912 913 914 915 916 917
		BUG_ON(is_gc);
		rrpc_unlock_rq(rrpc, rqd);
		rrpc_gc_kick(rrpc);
		return NVM_IO_REQUEUE;
	}

918
	rqd->ppa_addr = p;
919 920 921 922 923 924 925 926
	rqd->opcode = NVM_OP_HBWRITE;

	return NVM_IO_OK;
}

static int rrpc_setup_rq(struct rrpc *rrpc, struct bio *bio,
			struct nvm_rq *rqd, unsigned long flags, uint8_t npages)
{
927 928
	struct nvm_tgt_dev *dev = rrpc->dev;

929
	if (npages > 1) {
930
		rqd->ppa_list = nvm_dev_dma_alloc(dev->parent, GFP_KERNEL,
931 932 933 934 935 936
							&rqd->dma_ppa_list);
		if (!rqd->ppa_list) {
			pr_err("rrpc: not able to allocate ppa list\n");
			return NVM_IO_ERR;
		}

937
		if (bio_op(bio) == REQ_OP_WRITE)
938 939 940 941 942 943
			return rrpc_write_ppalist_rq(rrpc, bio, rqd, flags,
									npages);

		return rrpc_read_ppalist_rq(rrpc, bio, rqd, flags, npages);
	}

944
	if (bio_op(bio) == REQ_OP_WRITE)
945 946 947 948 949 950 951 952
		return rrpc_write_rq(rrpc, bio, rqd, flags);

	return rrpc_read_rq(rrpc, bio, rqd, flags);
}

static int rrpc_submit_io(struct rrpc *rrpc, struct bio *bio,
				struct nvm_rq *rqd, unsigned long flags)
{
953
	struct nvm_tgt_dev *dev = rrpc->dev;
954 955 956
	struct rrpc_rq *rrq = nvm_rq_to_pdu(rqd);
	uint8_t nr_pages = rrpc_get_pages(bio);
	int bio_size = bio_sectors(bio) << 9;
957
	int err;
958

959
	if (bio_size < dev->geo.sec_size)
960
		return NVM_IO_ERR;
961
	else if (bio_size > dev->geo.max_rq_size)
962 963 964 965 966 967 968 969
		return NVM_IO_ERR;

	err = rrpc_setup_rq(rrpc, bio, rqd, flags, nr_pages);
	if (err)
		return err;

	bio_get(bio);
	rqd->bio = bio;
970
	rqd->private = rrpc;
971
	rqd->nr_ppas = nr_pages;
972
	rqd->end_io = rrpc_end_io;
973 974
	rrq->flags = flags;

975
	err = nvm_submit_io(dev, rqd);
976 977
	if (err) {
		pr_err("rrpc: I/O submission failed: %d\n", err);
W
Wenwei Tao 已提交
978
		bio_put(bio);
979 980
		if (!(flags & NVM_IOTYPE_GC)) {
			rrpc_unlock_rq(rrpc, rqd);
981
			if (rqd->nr_ppas > 1)
982 983
				nvm_dev_dma_free(dev->parent, rqd->ppa_list,
							rqd->dma_ppa_list);
984
		}
985 986 987 988 989 990
		return NVM_IO_ERR;
	}

	return NVM_IO_OK;
}

991
static blk_qc_t rrpc_make_rq(struct request_queue *q, struct bio *bio)
992 993 994 995 996
{
	struct rrpc *rrpc = q->queuedata;
	struct nvm_rq *rqd;
	int err;

997
	blk_queue_split(q, &bio);
998

999
	if (bio_op(bio) == REQ_OP_DISCARD) {
1000
		rrpc_discard(rrpc, bio);
1001
		return BLK_QC_T_NONE;
1002 1003 1004 1005 1006 1007 1008 1009
	}

	rqd = mempool_alloc(rrpc->rq_pool, GFP_KERNEL);
	memset(rqd, 0, sizeof(struct nvm_rq));

	err = rrpc_submit_io(rrpc, bio, rqd, NVM_IOTYPE_NONE);
	switch (err) {
	case NVM_IO_OK:
1010
		return BLK_QC_T_NONE;
1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025
	case NVM_IO_ERR:
		bio_io_error(bio);
		break;
	case NVM_IO_DONE:
		bio_endio(bio);
		break;
	case NVM_IO_REQUEUE:
		spin_lock(&rrpc->bio_lock);
		bio_list_add(&rrpc->requeue_bios, bio);
		spin_unlock(&rrpc->bio_lock);
		queue_work(rrpc->kgc_wq, &rrpc->ws_requeue);
		break;
	}

	mempool_free(rqd, rrpc->rq_pool);
1026
	return BLK_QC_T_NONE;
1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065
}

static void rrpc_requeue(struct work_struct *work)
{
	struct rrpc *rrpc = container_of(work, struct rrpc, ws_requeue);
	struct bio_list bios;
	struct bio *bio;

	bio_list_init(&bios);

	spin_lock(&rrpc->bio_lock);
	bio_list_merge(&bios, &rrpc->requeue_bios);
	bio_list_init(&rrpc->requeue_bios);
	spin_unlock(&rrpc->bio_lock);

	while ((bio = bio_list_pop(&bios)))
		rrpc_make_rq(rrpc->disk->queue, bio);
}

static void rrpc_gc_free(struct rrpc *rrpc)
{
	if (rrpc->krqd_wq)
		destroy_workqueue(rrpc->krqd_wq);

	if (rrpc->kgc_wq)
		destroy_workqueue(rrpc->kgc_wq);
}

static int rrpc_gc_init(struct rrpc *rrpc)
{
	rrpc->krqd_wq = alloc_workqueue("rrpc-lun", WQ_MEM_RECLAIM|WQ_UNBOUND,
								rrpc->nr_luns);
	if (!rrpc->krqd_wq)
		return -ENOMEM;

	rrpc->kgc_wq = alloc_workqueue("rrpc-bg", WQ_MEM_RECLAIM, 1);
	if (!rrpc->kgc_wq)
		return -ENOMEM;

1066
	timer_setup(&rrpc->gc_timer, rrpc_gc_timer, 0);
1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078 1079

	return 0;
}

static void rrpc_map_free(struct rrpc *rrpc)
{
	vfree(rrpc->rev_trans_map);
	vfree(rrpc->trans_map);
}

static int rrpc_l2p_update(u64 slba, u32 nlb, __le64 *entries, void *private)
{
	struct rrpc *rrpc = (struct rrpc *)private;
1080
	struct nvm_tgt_dev *dev = rrpc->dev;
1081 1082
	struct rrpc_addr *addr = rrpc->trans_map + slba;
	struct rrpc_rev_addr *raddr = rrpc->rev_trans_map;
1083 1084
	struct rrpc_lun *rlun;
	struct rrpc_block *rblk;
1085 1086 1087
	u64 i;

	for (i = 0; i < nlb; i++) {
1088
		struct ppa_addr gaddr;
1089
		u64 pba = le64_to_cpu(entries[i]);
1090
		unsigned int mod;
1091

1092 1093 1094
		/* LNVM treats address-spaces as silos, LBA and PBA are
		 * equally large and zero-indexed.
		 */
1095
		if (unlikely(pba >= dev->total_secs && pba != U64_MAX)) {
1096
			pr_err("nvm: L2P data entry is out of bounds!\n");
1097
			pr_err("nvm: Maybe loaded an old target L2P\n");
1098 1099 1100 1101 1102 1103 1104 1105 1106 1107
			return -EINVAL;
		}

		/* Address zero is a special one. The first page on a disk is
		 * protected. As it often holds internal device boot
		 * information.
		 */
		if (!pba)
			continue;

1108 1109
		div_u64_rem(pba, rrpc->nr_sects, &mod);

1110
		gaddr = rrpc_recov_addr(dev, pba);
1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1125 1126 1127
		rlun = rrpc_ppa_to_lun(rrpc, gaddr);
		if (!rlun) {
			pr_err("rrpc: l2p corruption on lba %llu\n",
							slba + i);
			return -EINVAL;
		}

		rblk = &rlun->blocks[gaddr.g.blk];
		if (!rblk->state) {
			/* at this point, we don't know anything about the
			 * block. It's up to the FTL on top to re-etablish the
			 * block state. The block is assumed to be open.
			 */
			list_move_tail(&rblk->list, &rlun->used_list);
			rblk->state = NVM_BLK_ST_TGT;
			rlun->nr_free_blocks--;
		}
1128 1129 1130 1131

		addr[i].addr = pba;
		addr[i].rblk = rblk;
		raddr[mod].addr = slba + i;
1132 1133 1134 1135 1136 1137 1138
	}

	return 0;
}

static int rrpc_map_init(struct rrpc *rrpc)
{
1139
	struct nvm_tgt_dev *dev = rrpc->dev;
1140 1141 1142
	sector_t i;
	int ret;

1143
	rrpc->trans_map = vzalloc(sizeof(struct rrpc_addr) * rrpc->nr_sects);
1144 1145 1146 1147
	if (!rrpc->trans_map)
		return -ENOMEM;

	rrpc->rev_trans_map = vmalloc(sizeof(struct rrpc_rev_addr)
1148
							* rrpc->nr_sects);
1149 1150 1151
	if (!rrpc->rev_trans_map)
		return -ENOMEM;

1152
	for (i = 0; i < rrpc->nr_sects; i++) {
1153 1154 1155 1156 1157 1158 1159 1160
		struct rrpc_addr *p = &rrpc->trans_map[i];
		struct rrpc_rev_addr *r = &rrpc->rev_trans_map[i];

		p->addr = ADDR_EMPTY;
		r->addr = ADDR_EMPTY;
	}

	/* Bring up the mapping table from device */
1161 1162
	ret = nvm_get_l2p_tbl(dev, rrpc->soffset, rrpc->nr_sects,
							rrpc_l2p_update, rrpc);
1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
	if (ret) {
		pr_err("nvm: rrpc: could not read L2P table.\n");
		return -EINVAL;
	}

	return 0;
}

/* Minimum pages needed within a lun */
#define PAGE_POOL_SIZE 16
#define ADDR_POOL_SIZE 64

static int rrpc_core_init(struct rrpc *rrpc)
{
	down_write(&rrpc_lock);
	if (!rrpc_gcb_cache) {
		rrpc_gcb_cache = kmem_cache_create("rrpc_gcb",
				sizeof(struct rrpc_block_gc), 0, 0, NULL);
		if (!rrpc_gcb_cache) {
			up_write(&rrpc_lock);
			return -ENOMEM;
		}

		rrpc_rq_cache = kmem_cache_create("rrpc_rq",
				sizeof(struct nvm_rq) + sizeof(struct rrpc_rq),
				0, 0, NULL);
		if (!rrpc_rq_cache) {
			kmem_cache_destroy(rrpc_gcb_cache);
			up_write(&rrpc_lock);
			return -ENOMEM;
		}
	}
	up_write(&rrpc_lock);

	rrpc->page_pool = mempool_create_page_pool(PAGE_POOL_SIZE, 0);
	if (!rrpc->page_pool)
		return -ENOMEM;

1201
	rrpc->gcb_pool = mempool_create_slab_pool(rrpc->dev->geo.nr_luns,
1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
								rrpc_gcb_cache);
	if (!rrpc->gcb_pool)
		return -ENOMEM;

	rrpc->rq_pool = mempool_create_slab_pool(64, rrpc_rq_cache);
	if (!rrpc->rq_pool)
		return -ENOMEM;

	spin_lock_init(&rrpc->inflights.lock);
	INIT_LIST_HEAD(&rrpc->inflights.reqs);

	return 0;
}

static void rrpc_core_free(struct rrpc *rrpc)
{
	mempool_destroy(rrpc->page_pool);
	mempool_destroy(rrpc->gcb_pool);
	mempool_destroy(rrpc->rq_pool);
}

static void rrpc_luns_free(struct rrpc *rrpc)
{
W
Wenwei Tao 已提交
1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235
	struct rrpc_lun *rlun;
	int i;

	if (!rrpc->luns)
		return;

	for (i = 0; i < rrpc->nr_luns; i++) {
		rlun = &rrpc->luns[i];
		vfree(rlun->blocks);
	}

1236 1237 1238
	kfree(rrpc->luns);
}

1239 1240 1241 1242 1243 1244 1245 1246 1247 1248
static int rrpc_bb_discovery(struct nvm_tgt_dev *dev, struct rrpc_lun *rlun)
{
	struct nvm_geo *geo = &dev->geo;
	struct rrpc_block *rblk;
	struct ppa_addr ppa;
	u8 *blks;
	int nr_blks;
	int i;
	int ret;

1249 1250 1251
	if (!dev->parent->ops->get_bb_tbl)
		return 0;

1252 1253 1254 1255 1256 1257
	nr_blks = geo->blks_per_lun * geo->plane_mode;
	blks = kmalloc(nr_blks, GFP_KERNEL);
	if (!blks)
		return -ENOMEM;

	ppa.ppa = 0;
1258 1259
	ppa.g.ch = rlun->bppa.g.ch;
	ppa.g.lun = rlun->bppa.g.lun;
1260

1261
	ret = nvm_get_tgt_bb_tbl(dev, ppa, blks);
1262 1263 1264 1265 1266 1267
	if (ret) {
		pr_err("rrpc: could not get BB table\n");
		goto out;
	}

	nr_blks = nvm_bb_tbl_fold(dev->parent, blks, nr_blks);
1268 1269 1270 1271
	if (nr_blks < 0) {
		ret = nr_blks;
		goto out;
	}
1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287

	for (i = 0; i < nr_blks; i++) {
		if (blks[i] == NVM_BLK_T_FREE)
			continue;

		rblk = &rlun->blocks[i];
		list_move_tail(&rblk->list, &rlun->bb_list);
		rblk->state = NVM_BLK_ST_BAD;
		rlun->nr_free_blocks--;
	}

out:
	kfree(blks);
	return ret;
}

1288 1289 1290 1291 1292 1293 1294 1295
static void rrpc_set_lun_ppa(struct rrpc_lun *rlun, struct ppa_addr ppa)
{
	rlun->bppa.ppa = 0;
	rlun->bppa.g.ch = ppa.g.ch;
	rlun->bppa.g.lun = ppa.g.lun;
}

static int rrpc_luns_init(struct rrpc *rrpc, struct ppa_addr *luns)
1296
{
1297 1298
	struct nvm_tgt_dev *dev = rrpc->dev;
	struct nvm_geo *geo = &dev->geo;
1299
	struct rrpc_lun *rlun;
W
Wenwei Tao 已提交
1300
	int i, j, ret = -EINVAL;
1301

1302
	if (geo->sec_per_blk > MAX_INVALID_PAGES_STORAGE * BITS_PER_LONG) {
1303 1304 1305 1306
		pr_err("rrpc: number of pages per block too high.");
		return -EINVAL;
	}

1307 1308 1309 1310 1311 1312 1313
	spin_lock_init(&rrpc->rev_lock);

	rrpc->luns = kcalloc(rrpc->nr_luns, sizeof(struct rrpc_lun),
								GFP_KERNEL);
	if (!rrpc->luns)
		return -ENOMEM;

1314
	/* 1:1 mapping */
1315 1316 1317 1318
	for (i = 0; i < rrpc->nr_luns; i++) {
		rlun = &rrpc->luns[i];
		rlun->id = i;
		rrpc_set_lun_ppa(rlun, luns[i]);
1319
		rlun->blocks = vzalloc(sizeof(struct rrpc_block) *
1320
							geo->blks_per_lun);
W
Wenwei Tao 已提交
1321 1322
		if (!rlun->blocks) {
			ret = -ENOMEM;
1323
			goto err;
W
Wenwei Tao 已提交
1324
		}
1325

1326 1327 1328 1329
		INIT_LIST_HEAD(&rlun->free_list);
		INIT_LIST_HEAD(&rlun->used_list);
		INIT_LIST_HEAD(&rlun->bb_list);

1330
		for (j = 0; j < geo->blks_per_lun; j++) {
1331 1332
			struct rrpc_block *rblk = &rlun->blocks[j];

1333
			rblk->id = j;
1334
			rblk->rlun = rlun;
1335
			rblk->state = NVM_BLK_T_FREE;
1336
			INIT_LIST_HEAD(&rblk->prio);
1337
			INIT_LIST_HEAD(&rblk->list);
1338
			spin_lock_init(&rblk->lock);
1339 1340

			list_add_tail(&rblk->list, &rlun->free_list);
1341
		}
W
Wenwei Tao 已提交
1342

1343 1344
		rlun->rrpc = rrpc;
		rlun->nr_free_blocks = geo->blks_per_lun;
1345 1346
		rlun->reserved_blocks = 2; /* for GC only */

W
Wenwei Tao 已提交
1347
		INIT_LIST_HEAD(&rlun->prio_list);
1348
		INIT_LIST_HEAD(&rlun->wblk_list);
W
Wenwei Tao 已提交
1349 1350 1351

		INIT_WORK(&rlun->ws_gc, rrpc_lun_gc);
		spin_lock_init(&rlun->lock);
1352 1353 1354 1355

		if (rrpc_bb_discovery(dev, rlun))
			goto err;

1356 1357 1358 1359
	}

	return 0;
err:
W
Wenwei Tao 已提交
1360
	return ret;
1361 1362
}

1363 1364 1365
/* returns 0 on success and stores the beginning address in *begin */
static int rrpc_area_init(struct rrpc *rrpc, sector_t *begin)
{
1366 1367
	struct nvm_tgt_dev *dev = rrpc->dev;
	sector_t size = rrpc->nr_sects * dev->geo.sec_size;
1368
	int ret;
1369 1370 1371

	size >>= 9;

1372
	ret = nvm_get_area(dev, begin, size);
1373
	if (!ret)
1374
		*begin >>= (ilog2(dev->geo.sec_size) - 9);
1375 1376

	return ret;
1377 1378 1379 1380
}

static void rrpc_area_free(struct rrpc *rrpc)
{
1381 1382
	struct nvm_tgt_dev *dev = rrpc->dev;
	sector_t begin = rrpc->soffset << (ilog2(dev->geo.sec_size) - 9);
1383

1384
	nvm_put_area(dev, begin);
1385 1386
}

1387 1388 1389 1390 1391 1392
static void rrpc_free(struct rrpc *rrpc)
{
	rrpc_gc_free(rrpc);
	rrpc_map_free(rrpc);
	rrpc_core_free(rrpc);
	rrpc_luns_free(rrpc);
1393
	rrpc_area_free(rrpc);
1394 1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412

	kfree(rrpc);
}

static void rrpc_exit(void *private)
{
	struct rrpc *rrpc = private;

	del_timer(&rrpc->gc_timer);

	flush_workqueue(rrpc->krqd_wq);
	flush_workqueue(rrpc->kgc_wq);

	rrpc_free(rrpc);
}

static sector_t rrpc_capacity(void *private)
{
	struct rrpc *rrpc = private;
1413
	struct nvm_tgt_dev *dev = rrpc->dev;
1414 1415 1416
	sector_t reserved, provisioned;

	/* cur, gc, and two emergency blocks for each lun */
1417
	reserved = rrpc->nr_luns * dev->geo.sec_per_blk * 4;
1418
	provisioned = rrpc->nr_sects - reserved;
1419

1420
	if (reserved > rrpc->nr_sects) {
1421 1422 1423 1424 1425 1426 1427 1428 1429 1430 1431 1432 1433 1434 1435
		pr_err("rrpc: not enough space available to expose storage.\n");
		return 0;
	}

	sector_div(provisioned, 10);
	return provisioned * 9 * NR_PHY_IN_LOG;
}

/*
 * Looks up the logical address from reverse trans map and check if its valid by
 * comparing the logical to physical address with the physical address.
 * Returns 0 on free, otherwise 1 if in use
 */
static void rrpc_block_map_update(struct rrpc *rrpc, struct rrpc_block *rblk)
{
1436
	struct nvm_tgt_dev *dev = rrpc->dev;
1437 1438
	int offset;
	struct rrpc_addr *laddr;
1439
	u64 bpaddr, paddr, pladdr;
1440

1441
	bpaddr = block_to_rel_addr(rrpc, rblk);
1442
	for (offset = 0; offset < dev->geo.sec_per_blk; offset++) {
1443
		paddr = bpaddr + offset;
1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461

		pladdr = rrpc->rev_trans_map[paddr].addr;
		if (pladdr == ADDR_EMPTY)
			continue;

		laddr = &rrpc->trans_map[pladdr];

		if (paddr == laddr->addr) {
			laddr->rblk = rblk;
		} else {
			set_bit(offset, rblk->invalid_pages);
			rblk->nr_invalid_pages++;
		}
	}
}

static int rrpc_blocks_init(struct rrpc *rrpc)
{
1462
	struct nvm_tgt_dev *dev = rrpc->dev;
1463 1464 1465 1466 1467 1468 1469
	struct rrpc_lun *rlun;
	struct rrpc_block *rblk;
	int lun_iter, blk_iter;

	for (lun_iter = 0; lun_iter < rrpc->nr_luns; lun_iter++) {
		rlun = &rrpc->luns[lun_iter];

1470
		for (blk_iter = 0; blk_iter < dev->geo.blks_per_lun;
1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487 1488 1489 1490
								blk_iter++) {
			rblk = &rlun->blocks[blk_iter];
			rrpc_block_map_update(rrpc, rblk);
		}
	}

	return 0;
}

static int rrpc_luns_configure(struct rrpc *rrpc)
{
	struct rrpc_lun *rlun;
	struct rrpc_block *rblk;
	int i;

	for (i = 0; i < rrpc->nr_luns; i++) {
		rlun = &rrpc->luns[i];

		rblk = rrpc_get_blk(rrpc, rlun, 0);
		if (!rblk)
1491
			goto err;
1492
		rrpc_set_lun_cur(rlun, rblk, &rlun->cur);
1493 1494 1495 1496

		/* Emergency gc block */
		rblk = rrpc_get_blk(rrpc, rlun, 1);
		if (!rblk)
1497
			goto err;
1498
		rrpc_set_lun_cur(rlun, rblk, &rlun->gc_cur);
1499 1500 1501
	}

	return 0;
1502 1503 1504
err:
	rrpc_put_blks(rrpc);
	return -EINVAL;
1505 1506 1507 1508
}

static struct nvm_tgt_type tt_rrpc;

1509 1510
static void *rrpc_init(struct nvm_tgt_dev *dev, struct gendisk *tdisk,
		       int flags)
1511 1512 1513
{
	struct request_queue *bqueue = dev->q;
	struct request_queue *tqueue = tdisk->queue;
1514
	struct nvm_geo *geo = &dev->geo;
1515
	struct rrpc *rrpc;
1516
	sector_t soffset;
1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535
	int ret;

	if (!(dev->identity.dom & NVM_RSP_L2P)) {
		pr_err("nvm: rrpc: device does not support l2p (%x)\n",
							dev->identity.dom);
		return ERR_PTR(-EINVAL);
	}

	rrpc = kzalloc(sizeof(struct rrpc), GFP_KERNEL);
	if (!rrpc)
		return ERR_PTR(-ENOMEM);

	rrpc->dev = dev;
	rrpc->disk = tdisk;

	bio_list_init(&rrpc->requeue_bios);
	spin_lock_init(&rrpc->bio_lock);
	INIT_WORK(&rrpc->ws_requeue, rrpc_requeue);

1536 1537
	rrpc->nr_luns = geo->nr_luns;
	rrpc->nr_sects = (unsigned long long)geo->sec_per_lun * rrpc->nr_luns;
1538 1539 1540 1541

	/* simple round-robin strategy */
	atomic_set(&rrpc->next_lun, -1);

1542 1543 1544 1545 1546 1547 1548
	ret = rrpc_area_init(rrpc, &soffset);
	if (ret < 0) {
		pr_err("nvm: rrpc: could not initialize area\n");
		return ERR_PTR(ret);
	}
	rrpc->soffset = soffset;

1549
	ret = rrpc_luns_init(rrpc, dev->luns);
1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589
	if (ret) {
		pr_err("nvm: rrpc: could not initialize luns\n");
		goto err;
	}

	ret = rrpc_core_init(rrpc);
	if (ret) {
		pr_err("nvm: rrpc: could not initialize core\n");
		goto err;
	}

	ret = rrpc_map_init(rrpc);
	if (ret) {
		pr_err("nvm: rrpc: could not initialize maps\n");
		goto err;
	}

	ret = rrpc_blocks_init(rrpc);
	if (ret) {
		pr_err("nvm: rrpc: could not initialize state for blocks\n");
		goto err;
	}

	ret = rrpc_luns_configure(rrpc);
	if (ret) {
		pr_err("nvm: rrpc: not enough blocks available in LUNs.\n");
		goto err;
	}

	ret = rrpc_gc_init(rrpc);
	if (ret) {
		pr_err("nvm: rrpc: could not initialize gc\n");
		goto err;
	}

	/* inherit the size from the underlying device */
	blk_queue_logical_block_size(tqueue, queue_physical_block_size(bqueue));
	blk_queue_max_hw_sectors(tqueue, queue_max_hw_sectors(bqueue));

	pr_info("nvm: rrpc initialized with %u luns and %llu pages.\n",
1590
			rrpc->nr_luns, (unsigned long long)rrpc->nr_sects);
1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613

	mod_timer(&rrpc->gc_timer, jiffies + msecs_to_jiffies(10));

	return rrpc;
err:
	rrpc_free(rrpc);
	return ERR_PTR(ret);
}

/* round robin, page-based FTL, and cost-based GC */
static struct nvm_tgt_type tt_rrpc = {
	.name		= "rrpc",
	.version	= {1, 0, 0},

	.make_rq	= rrpc_make_rq,
	.capacity	= rrpc_capacity,

	.init		= rrpc_init,
	.exit		= rrpc_exit,
};

static int __init rrpc_module_init(void)
{
1614
	return nvm_register_tgt_type(&tt_rrpc);
1615 1616 1617 1618
}

static void rrpc_module_exit(void)
{
1619
	nvm_unregister_tgt_type(&tt_rrpc);
1620 1621 1622 1623 1624 1625
}

module_init(rrpc_module_init);
module_exit(rrpc_module_exit);
MODULE_LICENSE("GPL v2");
MODULE_DESCRIPTION("Block-Device Target for Open-Channel SSDs");