pblk-gc.c 14.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (C) 2016 CNEX Labs
 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
 *                  Matias Bjorling <matias@cnexlabs.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version
 * 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * pblk-gc.c - pblk's garbage collector
 */

#include "pblk.h"
#include <linux/delay.h>

static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
{
23
	vfree(gc_rq->data);
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38
	kfree(gc_rq);
}

static int pblk_gc_write(struct pblk *pblk)
{
	struct pblk_gc *gc = &pblk->gc;
	struct pblk_gc_rq *gc_rq, *tgc_rq;
	LIST_HEAD(w_list);

	spin_lock(&gc->w_lock);
	if (list_empty(&gc->w_list)) {
		spin_unlock(&gc->w_lock);
		return 1;
	}

39 40
	list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
	gc->w_entries = 0;
41 42 43 44 45 46 47 48
	spin_unlock(&gc->w_lock);

	list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
		pblk_write_gc_to_cache(pblk, gc_rq->data, gc_rq->lba_list,
				gc_rq->nr_secs, gc_rq->secs_to_gc,
				gc_rq->line, PBLK_IOTYPE_GC);

		list_del(&gc_rq->list);
49
		kref_put(&gc_rq->line->ref, pblk_line_put);
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64
		pblk_gc_free_gc_rq(gc_rq);
	}

	return 0;
}

static void pblk_gc_writer_kick(struct pblk_gc *gc)
{
	wake_up_process(gc->gc_writer_ts);
}

/*
 * Responsible for managing all memory related to a gc request. Also in case of
 * failure
 */
65
static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
66 67 68 69
{
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
	struct pblk_gc *gc = &pblk->gc;
70
	struct pblk_line *line = gc_rq->line;
71 72
	void *data;
	unsigned int secs_to_gc;
73
	int ret = 0;
74

75
	data = vmalloc(gc_rq->nr_secs * geo->sec_size);
76
	if (!data) {
77 78
		ret = -ENOMEM;
		goto out;
79 80 81
	}

	/* Read from GC victim block */
82
	if (pblk_submit_read_gc(pblk, gc_rq->lba_list, data, gc_rq->nr_secs,
83
							&secs_to_gc, line)) {
84
		ret = -EFAULT;
85 86 87 88
		goto free_data;
	}

	if (!secs_to_gc)
89
		goto free_rq;
90 91 92 93 94 95

	gc_rq->data = data;
	gc_rq->secs_to_gc = secs_to_gc;

retry:
	spin_lock(&gc->w_lock);
96
	if (gc->w_entries >= PBLK_GC_W_QD) {
97
		spin_unlock(&gc->w_lock);
98 99
		pblk_gc_writer_kick(&pblk->gc);
		usleep_range(128, 256);
100 101 102 103 104 105 106 107
		goto retry;
	}
	gc->w_entries++;
	list_add_tail(&gc_rq->list, &gc->w_list);
	spin_unlock(&gc->w_lock);

	pblk_gc_writer_kick(&pblk->gc);

108
	return 0;
109

110 111
free_rq:
	kfree(gc_rq);
112
free_data:
113
	vfree(data);
114 115
out:
	kref_put(&line->ref, pblk_line_put);
116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
	return ret;
}

static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
{
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct list_head *move_list;

	spin_lock(&line->lock);
	WARN_ON(line->state != PBLK_LINESTATE_GC);
	line->state = PBLK_LINESTATE_CLOSED;
	move_list = pblk_line_gc_list(pblk, line);
	spin_unlock(&line->lock);

	if (move_list) {
		spin_lock(&l_mg->gc_lock);
		list_add_tail(&line->list, move_list);
		spin_unlock(&l_mg->gc_lock);
	}
}

static void pblk_gc_line_ws(struct work_struct *work)
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
{
	struct pblk_line_ws *line_rq_ws = container_of(work,
						struct pblk_line_ws, ws);
	struct pblk *pblk = line_rq_ws->pblk;
	struct pblk_gc *gc = &pblk->gc;
	struct pblk_line *line = line_rq_ws->line;
	struct pblk_gc_rq *gc_rq = line_rq_ws->priv;

	up(&gc->gc_sem);

	if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
		pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
						line->id, *line->vsc,
						gc_rq->nr_secs);
	}

	mempool_free(line_rq_ws, pblk->line_ws_pool);
}

static void pblk_gc_line_prepare_ws(struct work_struct *work)
158 159 160 161 162
{
	struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
									ws);
	struct pblk *pblk = line_ws->pblk;
	struct pblk_line *line = line_ws->line;
163
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
164
	struct pblk_line_meta *lm = &pblk->lm;
165 166 167 168
	struct pblk_gc *gc = &pblk->gc;
	struct line_emeta *emeta_buf;
	struct pblk_line_ws *line_rq_ws;
	struct pblk_gc_rq *gc_rq;
169
	__le64 *lba_list;
170 171
	int sec_left, nr_secs, bit;
	int ret;
172

173 174 175 176 177 178 179 180 181 182 183 184
	emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
								GFP_KERNEL);
	if (!emeta_buf) {
		pr_err("pblk: cannot use GC emeta\n");
		return;
	}

	ret = pblk_line_read_emeta(pblk, line, emeta_buf);
	if (ret) {
		pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
		goto fail_free_emeta;
	}
185

186 187 188 189 190 191 192
	/* If this read fails, it means that emeta is corrupted. For now, leave
	 * the line untouched. TODO: Implement a recovery routine that scans and
	 * moves all sectors on the line.
	 */
	lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
	if (!lba_list) {
		pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
193
		goto fail_free_emeta;
194 195
	}

196
	sec_left = pblk_line_vsc(line);
197 198
	if (sec_left < 0) {
		pr_err("pblk: corrupted GC line (%d)\n", line->id);
199
		goto fail_free_emeta;
200 201 202 203
	}

	bit = -1;
next_rq:
204 205 206
	gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
	if (!gc_rq)
		goto fail_free_emeta;
207

208
	nr_secs = 0;
209 210 211 212 213 214
	do {
		bit = find_next_zero_bit(line->invalid_bitmap, lm->sec_per_line,
								bit + 1);
		if (bit > line->emeta_ssec)
			break;

215 216
		gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
	} while (nr_secs < pblk->max_write_pgs);
217

218 219
	if (unlikely(!nr_secs)) {
		kfree(gc_rq);
220 221 222
		goto out;
	}

223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
	gc_rq->nr_secs = nr_secs;
	gc_rq->line = line;

	line_rq_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
	if (!line_rq_ws)
		goto fail_free_gc_rq;

	line_rq_ws->pblk = pblk;
	line_rq_ws->line = line;
	line_rq_ws->priv = gc_rq;

	down(&gc->gc_sem);
	kref_get(&line->ref);

	INIT_WORK(&line_rq_ws->ws, pblk_gc_line_ws);
	queue_work(gc->gc_line_reader_wq, &line_rq_ws->ws);
239

240
	sec_left -= nr_secs;
241 242 243 244
	if (sec_left > 0)
		goto next_rq;

out:
245
	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
246
	mempool_free(line_ws, pblk->line_ws_pool);
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262

	kref_put(&line->ref, pblk_line_put);
	atomic_dec(&gc->inflight_gc);

	return;

fail_free_gc_rq:
	kfree(gc_rq);
fail_free_emeta:
	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
	pblk_put_line_back(pblk, line);
	kref_put(&line->ref, pblk_line_put);
	mempool_free(line_ws, pblk->line_ws_pool);
	atomic_dec(&gc->inflight_gc);

	pr_err("pblk: Failed to GC line %d\n", line->id);
263 264 265 266
}

static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
{
267
	struct pblk_gc *gc = &pblk->gc;
268 269
	struct pblk_line_ws *line_ws;

270
	pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
271

272 273 274
	line_ws = mempool_alloc(pblk->line_ws_pool, GFP_KERNEL);
	if (!line_ws)
		return -ENOMEM;
275 276 277 278

	line_ws->pblk = pblk;
	line_ws->line = line;

279 280
	INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
	queue_work(gc->gc_reader_wq, &line_ws->ws);
281 282 283 284

	return 0;
}

285
static int pblk_gc_read(struct pblk *pblk)
286
{
287 288
	struct pblk_gc *gc = &pblk->gc;
	struct pblk_line *line;
289

290 291 292 293
	spin_lock(&gc->r_lock);
	if (list_empty(&gc->r_list)) {
		spin_unlock(&gc->r_lock);
		return 1;
294
	}
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310

	line = list_first_entry(&gc->r_list, struct pblk_line, list);
	list_del(&line->list);
	spin_unlock(&gc->r_lock);

	pblk_gc_kick(pblk);

	if (pblk_gc_line(pblk, line))
		pr_err("pblk: failed to GC line %d\n", line->id);

	return 0;
}

static void pblk_gc_reader_kick(struct pblk_gc *gc)
{
	wake_up_process(gc->gc_reader_ts);
311 312
}

313 314 315 316
static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
						 struct list_head *group_list)
{
	struct pblk_line *line, *victim;
317
	int line_vsc, victim_vsc;
318 319 320

	victim = list_first_entry(group_list, struct pblk_line, list);
	list_for_each_entry(line, group_list, list) {
321 322 323
		line_vsc = le32_to_cpu(*line->vsc);
		victim_vsc = le32_to_cpu(*victim->vsc);
		if (line_vsc < victim_vsc)
324 325 326 327 328 329
			victim = line;
	}

	return victim;
}

330 331 332 333 334 335 336 337 338 339 340
static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
{
	unsigned int nr_blocks_free, nr_blocks_need;

	nr_blocks_need = pblk_rl_high_thrs(rl);
	nr_blocks_free = pblk_rl_nr_free_blks(rl);

	/* This is not critical, no need to take lock here */
	return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
}

341 342 343 344 345 346 347 348 349 350
/*
 * Lines with no valid sectors will be returned to the free list immediately. If
 * GC is activated - either because the free block count is under the determined
 * threshold, or because it is being forced from user space - only lines with a
 * high count of invalid sectors will be recycled.
 */
static void pblk_gc_run(struct pblk *pblk)
{
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_gc *gc = &pblk->gc;
351
	struct pblk_line *line;
352
	struct list_head *group_list;
353 354 355 356 357 358 359 360 361 362 363 364
	bool run_gc;
	int inflight_gc, gc_group = 0, prev_group = 0;

	do {
		spin_lock(&l_mg->gc_lock);
		if (list_empty(&l_mg->gc_full_list)) {
			spin_unlock(&l_mg->gc_lock);
			break;
		}

		line = list_first_entry(&l_mg->gc_full_list,
							struct pblk_line, list);
365 366 367 368 369 370 371

		spin_lock(&line->lock);
		WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
		line->state = PBLK_LINESTATE_GC;
		spin_unlock(&line->lock);

		list_del(&line->list);
372 373
		spin_unlock(&l_mg->gc_lock);

374
		kref_put(&line->ref, pblk_line_put);
375
	} while (1);
376

377 378 379
	run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
	if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
		return;
380 381 382

next_gc_group:
	group_list = l_mg->gc_lists[gc_group++];
383 384 385 386

	do {
		spin_lock(&l_mg->gc_lock);
		if (list_empty(group_list)) {
387
			spin_unlock(&l_mg->gc_lock);
388
			break;
389 390
		}

391
		line = pblk_gc_get_victim_line(pblk, group_list);
392 393 394 395 396 397

		spin_lock(&line->lock);
		WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
		line->state = PBLK_LINESTATE_GC;
		spin_unlock(&line->lock);

398 399 400 401 402 403 404 405 406
		list_del(&line->list);
		spin_unlock(&l_mg->gc_lock);

		spin_lock(&gc->r_lock);
		list_add_tail(&line->list, &gc->r_list);
		spin_unlock(&gc->r_lock);

		inflight_gc = atomic_inc_return(&gc->inflight_gc);
		pblk_gc_reader_kick(gc);
407

408
		prev_group = 1;
409

410 411 412 413 414 415 416 417
		/* No need to queue up more GC lines than we can handle */
		run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
		if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
			break;
	} while (1);

	if (!prev_group && pblk->rl.rb_state > gc_group &&
						gc_group < PBLK_GC_NR_LISTS)
418 419 420
		goto next_gc_group;
}

421
void pblk_gc_kick(struct pblk *pblk)
422 423 424 425 426
{
	struct pblk_gc *gc = &pblk->gc;

	wake_up_process(gc->gc_ts);
	pblk_gc_writer_kick(gc);
427
	pblk_gc_reader_kick(gc);
428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464
	mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
}

static void pblk_gc_timer(unsigned long data)
{
	struct pblk *pblk = (struct pblk *)data;

	pblk_gc_kick(pblk);
}

static int pblk_gc_ts(void *data)
{
	struct pblk *pblk = data;

	while (!kthread_should_stop()) {
		pblk_gc_run(pblk);
		set_current_state(TASK_INTERRUPTIBLE);
		io_schedule();
	}

	return 0;
}

static int pblk_gc_writer_ts(void *data)
{
	struct pblk *pblk = data;

	while (!kthread_should_stop()) {
		if (!pblk_gc_write(pblk))
			continue;
		set_current_state(TASK_INTERRUPTIBLE);
		io_schedule();
	}

	return 0;
}

465
static int pblk_gc_reader_ts(void *data)
466
{
467
	struct pblk *pblk = data;
468

469 470 471 472 473 474 475 476
	while (!kthread_should_stop()) {
		if (!pblk_gc_read(pblk))
			continue;
		set_current_state(TASK_INTERRUPTIBLE);
		io_schedule();
	}

	return 0;
477 478
}

479
static void pblk_gc_start(struct pblk *pblk)
480
{
481 482
	pblk->gc.gc_active = 1;
	pr_debug("pblk: gc start\n");
483 484
}

485
void pblk_gc_should_start(struct pblk *pblk)
486 487 488 489 490 491
{
	struct pblk_gc *gc = &pblk->gc;

	if (gc->gc_enabled && !gc->gc_active)
		pblk_gc_start(pblk);

492
	pblk_gc_kick(pblk);
493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
}

/*
 * If flush_wq == 1 then no lock should be held by the caller since
 * flush_workqueue can sleep
 */
static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
{
	pblk->gc.gc_active = 0;
	pr_debug("pblk: gc stop\n");
}

void pblk_gc_should_stop(struct pblk *pblk)
{
	struct pblk_gc *gc = &pblk->gc;

	if (gc->gc_active && !gc->gc_forced)
		pblk_gc_stop(pblk, 0);
}

void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
			      int *gc_active)
{
	struct pblk_gc *gc = &pblk->gc;

	spin_lock(&gc->lock);
	*gc_enabled = gc->gc_enabled;
	*gc_active = gc->gc_active;
	spin_unlock(&gc->lock);
}

524
int pblk_gc_sysfs_force(struct pblk *pblk, int force)
525 526
{
	struct pblk_gc *gc = &pblk->gc;
527 528 529

	if (force < 0 || force > 1)
		return -EINVAL;
530 531 532

	spin_lock(&gc->lock);
	gc->gc_forced = force;
533 534 535 536 537

	if (force)
		gc->gc_enabled = 1;
	else
		gc->gc_enabled = 0;
538
	spin_unlock(&gc->lock);
539 540 541 542

	pblk_gc_should_start(pblk);

	return 0;
543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563
}

int pblk_gc_init(struct pblk *pblk)
{
	struct pblk_gc *gc = &pblk->gc;
	int ret;

	gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
	if (IS_ERR(gc->gc_ts)) {
		pr_err("pblk: could not allocate GC main kthread\n");
		return PTR_ERR(gc->gc_ts);
	}

	gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
							"pblk-gc-writer-ts");
	if (IS_ERR(gc->gc_writer_ts)) {
		pr_err("pblk: could not allocate GC writer kthread\n");
		ret = PTR_ERR(gc->gc_writer_ts);
		goto fail_free_main_kthread;
	}

564 565 566 567 568 569 570 571
	gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
							"pblk-gc-reader-ts");
	if (IS_ERR(gc->gc_reader_ts)) {
		pr_err("pblk: could not allocate GC reader kthread\n");
		ret = PTR_ERR(gc->gc_reader_ts);
		goto fail_free_writer_kthread;
	}

572 573 574 575 576 577 578 579 580
	setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
	mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));

	gc->gc_active = 0;
	gc->gc_forced = 0;
	gc->gc_enabled = 1;
	gc->w_entries = 0;
	atomic_set(&gc->inflight_gc, 0);

581 582 583 584 585 586 587 588 589 590 591 592 593 594
	/* Workqueue that reads valid sectors from a line and submit them to the
	 * GC writer to be recycled.
	 */
	gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
			WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
	if (!gc->gc_line_reader_wq) {
		pr_err("pblk: could not allocate GC line reader workqueue\n");
		ret = -ENOMEM;
		goto fail_free_reader_kthread;
	}

	/* Workqueue that prepare lines for GC */
	gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
					WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
595 596 597
	if (!gc->gc_reader_wq) {
		pr_err("pblk: could not allocate GC reader workqueue\n");
		ret = -ENOMEM;
598
		goto fail_free_reader_line_wq;
599 600 601 602
	}

	spin_lock_init(&gc->lock);
	spin_lock_init(&gc->w_lock);
603 604 605 606
	spin_lock_init(&gc->r_lock);

	sema_init(&gc->gc_sem, 128);

607
	INIT_LIST_HEAD(&gc->w_list);
608
	INIT_LIST_HEAD(&gc->r_list);
609 610 611

	return 0;

612 613 614 615
fail_free_reader_line_wq:
	destroy_workqueue(gc->gc_line_reader_wq);
fail_free_reader_kthread:
	kthread_stop(gc->gc_reader_ts);
616 617
fail_free_writer_kthread:
	kthread_stop(gc->gc_writer_ts);
618 619
fail_free_main_kthread:
	kthread_stop(gc->gc_ts);
620 621 622 623 624 625 626 627 628

	return ret;
}

void pblk_gc_exit(struct pblk *pblk)
{
	struct pblk_gc *gc = &pblk->gc;

	flush_workqueue(gc->gc_reader_wq);
629
	flush_workqueue(gc->gc_line_reader_wq);
630 631 632 633 634 635 636

	del_timer(&gc->gc_timer);
	pblk_gc_stop(pblk, 1);

	if (gc->gc_ts)
		kthread_stop(gc->gc_ts);

637 638 639 640 641
	if (gc->gc_reader_wq)
		destroy_workqueue(gc->gc_reader_wq);

	if (gc->gc_line_reader_wq)
		destroy_workqueue(gc->gc_line_reader_wq);
642 643 644

	if (gc->gc_writer_ts)
		kthread_stop(gc->gc_writer_ts);
645 646 647

	if (gc->gc_reader_ts)
		kthread_stop(gc->gc_reader_ts);
648
}
新手
引导
客服 返回
顶部