pblk-gc.c 14.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright (C) 2016 CNEX Labs
 * Initial release: Javier Gonzalez <javier@cnexlabs.com>
 *                  Matias Bjorling <matias@cnexlabs.com>
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License version
 * 2 as published by the Free Software Foundation.
 *
 * This program is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * General Public License for more details.
 *
 * pblk-gc.c - pblk's garbage collector
 */

#include "pblk.h"
#include <linux/delay.h>

static void pblk_gc_free_gc_rq(struct pblk_gc_rq *gc_rq)
{
23 24
	if (gc_rq->data)
		vfree(gc_rq->data);
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39
	kfree(gc_rq);
}

static int pblk_gc_write(struct pblk *pblk)
{
	struct pblk_gc *gc = &pblk->gc;
	struct pblk_gc_rq *gc_rq, *tgc_rq;
	LIST_HEAD(w_list);

	spin_lock(&gc->w_lock);
	if (list_empty(&gc->w_list)) {
		spin_unlock(&gc->w_lock);
		return 1;
	}

40 41
	list_cut_position(&w_list, &gc->w_list, gc->w_list.prev);
	gc->w_entries = 0;
42 43 44
	spin_unlock(&gc->w_lock);

	list_for_each_entry_safe(gc_rq, tgc_rq, &w_list, list) {
45
		pblk_write_gc_to_cache(pblk, gc_rq);
46
		list_del(&gc_rq->list);
47
		kref_put(&gc_rq->line->ref, pblk_line_put);
48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
		pblk_gc_free_gc_rq(gc_rq);
	}

	return 0;
}

static void pblk_gc_writer_kick(struct pblk_gc *gc)
{
	wake_up_process(gc->gc_writer_ts);
}

/*
 * Responsible for managing all memory related to a gc request. Also in case of
 * failure
 */
63
static int pblk_gc_move_valid_secs(struct pblk *pblk, struct pblk_gc_rq *gc_rq)
64 65 66 67
{
	struct nvm_tgt_dev *dev = pblk->dev;
	struct nvm_geo *geo = &dev->geo;
	struct pblk_gc *gc = &pblk->gc;
68
	struct pblk_line *line = gc_rq->line;
69
	void *data;
70
	int ret = 0;
71

72
	data = vmalloc(gc_rq->nr_secs * geo->sec_size);
73
	if (!data) {
74
		ret = -ENOMEM;
75
		goto fail;
76 77
	}

78
	gc_rq->data = data;
79

80 81 82 83
	/* Read from GC victim block */
	ret = pblk_submit_read_gc(pblk, gc_rq);
	if (ret)
		goto fail;
84

85 86
	if (!gc_rq->secs_to_gc)
		goto fail;
87 88 89

retry:
	spin_lock(&gc->w_lock);
90
	if (gc->w_entries >= PBLK_GC_RQ_QD) {
91
		spin_unlock(&gc->w_lock);
92 93
		pblk_gc_writer_kick(&pblk->gc);
		usleep_range(128, 256);
94 95 96 97 98 99 100 101
		goto retry;
	}
	gc->w_entries++;
	list_add_tail(&gc_rq->list, &gc->w_list);
	spin_unlock(&gc->w_lock);

	pblk_gc_writer_kick(&pblk->gc);

102
	return 0;
103

104 105
fail:
	pblk_gc_free_gc_rq(gc_rq);
106
	kref_put(&line->ref, pblk_line_put);
107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128
	return ret;
}

static void pblk_put_line_back(struct pblk *pblk, struct pblk_line *line)
{
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct list_head *move_list;

	spin_lock(&line->lock);
	WARN_ON(line->state != PBLK_LINESTATE_GC);
	line->state = PBLK_LINESTATE_CLOSED;
	move_list = pblk_line_gc_list(pblk, line);
	spin_unlock(&line->lock);

	if (move_list) {
		spin_lock(&l_mg->gc_lock);
		list_add_tail(&line->list, move_list);
		spin_unlock(&l_mg->gc_lock);
	}
}

static void pblk_gc_line_ws(struct work_struct *work)
129
{
130
	struct pblk_line_ws *gc_rq_ws = container_of(work,
131
						struct pblk_line_ws, ws);
132
	struct pblk *pblk = gc_rq_ws->pblk;
133
	struct pblk_gc *gc = &pblk->gc;
134 135
	struct pblk_line *line = gc_rq_ws->line;
	struct pblk_gc_rq *gc_rq = gc_rq_ws->priv;
136 137 138 139 140 141 142 143 144

	up(&gc->gc_sem);

	if (pblk_gc_move_valid_secs(pblk, gc_rq)) {
		pr_err("pblk: could not GC all sectors: line:%d (%d/%d)\n",
						line->id, *line->vsc,
						gc_rq->nr_secs);
	}

145
	kfree(gc_rq_ws);
146 147 148
}

static void pblk_gc_line_prepare_ws(struct work_struct *work)
149 150 151 152 153
{
	struct pblk_line_ws *line_ws = container_of(work, struct pblk_line_ws,
									ws);
	struct pblk *pblk = line_ws->pblk;
	struct pblk_line *line = line_ws->line;
154
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
155
	struct pblk_line_meta *lm = &pblk->lm;
156 157
	struct pblk_gc *gc = &pblk->gc;
	struct line_emeta *emeta_buf;
158
	struct pblk_line_ws *gc_rq_ws;
159
	struct pblk_gc_rq *gc_rq;
160
	__le64 *lba_list;
161
	unsigned long *invalid_bitmap;
162 163
	int sec_left, nr_secs, bit;
	int ret;
164

165 166 167 168 169 170
	invalid_bitmap = kmalloc(lm->sec_bitmap_len, GFP_KERNEL);
	if (!invalid_bitmap) {
		pr_err("pblk: could not allocate GC invalid bitmap\n");
		goto fail_free_ws;
	}

171 172 173 174
	emeta_buf = pblk_malloc(lm->emeta_len[0], l_mg->emeta_alloc_type,
								GFP_KERNEL);
	if (!emeta_buf) {
		pr_err("pblk: cannot use GC emeta\n");
175
		goto fail_free_bitmap;
176 177 178 179 180 181 182
	}

	ret = pblk_line_read_emeta(pblk, line, emeta_buf);
	if (ret) {
		pr_err("pblk: line %d read emeta failed (%d)\n", line->id, ret);
		goto fail_free_emeta;
	}
183

184 185 186 187 188 189 190
	/* If this read fails, it means that emeta is corrupted. For now, leave
	 * the line untouched. TODO: Implement a recovery routine that scans and
	 * moves all sectors on the line.
	 */
	lba_list = pblk_recov_get_lba_list(pblk, emeta_buf);
	if (!lba_list) {
		pr_err("pblk: could not interpret emeta (line %d)\n", line->id);
191
		goto fail_free_emeta;
192 193
	}

194 195
	spin_lock(&line->lock);
	bitmap_copy(invalid_bitmap, line->invalid_bitmap, lm->sec_per_line);
196
	sec_left = pblk_line_vsc(line);
197 198
	spin_unlock(&line->lock);

199 200
	if (sec_left < 0) {
		pr_err("pblk: corrupted GC line (%d)\n", line->id);
201
		goto fail_free_emeta;
202 203 204 205
	}

	bit = -1;
next_rq:
206 207 208
	gc_rq = kmalloc(sizeof(struct pblk_gc_rq), GFP_KERNEL);
	if (!gc_rq)
		goto fail_free_emeta;
209

210
	nr_secs = 0;
211
	do {
212
		bit = find_next_zero_bit(invalid_bitmap, lm->sec_per_line,
213 214 215 216
								bit + 1);
		if (bit > line->emeta_ssec)
			break;

217
		gc_rq->paddr_list[nr_secs] = bit;
218 219
		gc_rq->lba_list[nr_secs++] = le64_to_cpu(lba_list[bit]);
	} while (nr_secs < pblk->max_write_pgs);
220

221 222
	if (unlikely(!nr_secs)) {
		kfree(gc_rq);
223 224 225
		goto out;
	}

226 227 228
	gc_rq->nr_secs = nr_secs;
	gc_rq->line = line;

229 230
	gc_rq_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
	if (!gc_rq_ws)
231 232
		goto fail_free_gc_rq;

233 234 235
	gc_rq_ws->pblk = pblk;
	gc_rq_ws->line = line;
	gc_rq_ws->priv = gc_rq;
236 237 238 239

	down(&gc->gc_sem);
	kref_get(&line->ref);

240 241
	INIT_WORK(&gc_rq_ws->ws, pblk_gc_line_ws);
	queue_work(gc->gc_line_reader_wq, &gc_rq_ws->ws);
242

243
	sec_left -= nr_secs;
244 245 246 247
	if (sec_left > 0)
		goto next_rq;

out:
248
	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
249
	kfree(line_ws);
250
	kfree(invalid_bitmap);
251 252 253 254 255 256 257 258 259 260

	kref_put(&line->ref, pblk_line_put);
	atomic_dec(&gc->inflight_gc);

	return;

fail_free_gc_rq:
	kfree(gc_rq);
fail_free_emeta:
	pblk_mfree(emeta_buf, l_mg->emeta_alloc_type);
261 262 263 264 265
fail_free_bitmap:
	kfree(invalid_bitmap);
fail_free_ws:
	kfree(line_ws);

266 267 268 269 270
	pblk_put_line_back(pblk, line);
	kref_put(&line->ref, pblk_line_put);
	atomic_dec(&gc->inflight_gc);

	pr_err("pblk: Failed to GC line %d\n", line->id);
271 272 273 274
}

static int pblk_gc_line(struct pblk *pblk, struct pblk_line *line)
{
275
	struct pblk_gc *gc = &pblk->gc;
276 277
	struct pblk_line_ws *line_ws;

278
	pr_debug("pblk: line '%d' being reclaimed for GC\n", line->id);
279

280
	line_ws = kmalloc(sizeof(struct pblk_line_ws), GFP_KERNEL);
281 282
	if (!line_ws)
		return -ENOMEM;
283 284 285 286

	line_ws->pblk = pblk;
	line_ws->line = line;

287 288
	INIT_WORK(&line_ws->ws, pblk_gc_line_prepare_ws);
	queue_work(gc->gc_reader_wq, &line_ws->ws);
289 290 291 292

	return 0;
}

293
static int pblk_gc_read(struct pblk *pblk)
294
{
295 296
	struct pblk_gc *gc = &pblk->gc;
	struct pblk_line *line;
297

298 299 300 301
	spin_lock(&gc->r_lock);
	if (list_empty(&gc->r_list)) {
		spin_unlock(&gc->r_lock);
		return 1;
302
	}
303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318

	line = list_first_entry(&gc->r_list, struct pblk_line, list);
	list_del(&line->list);
	spin_unlock(&gc->r_lock);

	pblk_gc_kick(pblk);

	if (pblk_gc_line(pblk, line))
		pr_err("pblk: failed to GC line %d\n", line->id);

	return 0;
}

static void pblk_gc_reader_kick(struct pblk_gc *gc)
{
	wake_up_process(gc->gc_reader_ts);
319 320
}

321 322 323 324
static struct pblk_line *pblk_gc_get_victim_line(struct pblk *pblk,
						 struct list_head *group_list)
{
	struct pblk_line *line, *victim;
325
	int line_vsc, victim_vsc;
326 327 328

	victim = list_first_entry(group_list, struct pblk_line, list);
	list_for_each_entry(line, group_list, list) {
329 330 331
		line_vsc = le32_to_cpu(*line->vsc);
		victim_vsc = le32_to_cpu(*victim->vsc);
		if (line_vsc < victim_vsc)
332 333 334 335 336 337
			victim = line;
	}

	return victim;
}

338 339 340 341 342 343 344 345 346 347 348
static bool pblk_gc_should_run(struct pblk_gc *gc, struct pblk_rl *rl)
{
	unsigned int nr_blocks_free, nr_blocks_need;

	nr_blocks_need = pblk_rl_high_thrs(rl);
	nr_blocks_free = pblk_rl_nr_free_blks(rl);

	/* This is not critical, no need to take lock here */
	return ((gc->gc_active) && (nr_blocks_need > nr_blocks_free));
}

349 350 351 352 353 354 355 356 357 358
/*
 * Lines with no valid sectors will be returned to the free list immediately. If
 * GC is activated - either because the free block count is under the determined
 * threshold, or because it is being forced from user space - only lines with a
 * high count of invalid sectors will be recycled.
 */
static void pblk_gc_run(struct pblk *pblk)
{
	struct pblk_line_mgmt *l_mg = &pblk->l_mg;
	struct pblk_gc *gc = &pblk->gc;
359
	struct pblk_line *line;
360
	struct list_head *group_list;
361 362 363 364 365 366 367 368 369 370 371 372
	bool run_gc;
	int inflight_gc, gc_group = 0, prev_group = 0;

	do {
		spin_lock(&l_mg->gc_lock);
		if (list_empty(&l_mg->gc_full_list)) {
			spin_unlock(&l_mg->gc_lock);
			break;
		}

		line = list_first_entry(&l_mg->gc_full_list,
							struct pblk_line, list);
373 374 375 376 377 378 379

		spin_lock(&line->lock);
		WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
		line->state = PBLK_LINESTATE_GC;
		spin_unlock(&line->lock);

		list_del(&line->list);
380 381
		spin_unlock(&l_mg->gc_lock);

382
		kref_put(&line->ref, pblk_line_put);
383
	} while (1);
384

385 386 387
	run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
	if (!run_gc || (atomic_read(&gc->inflight_gc) >= PBLK_GC_L_QD))
		return;
388 389 390

next_gc_group:
	group_list = l_mg->gc_lists[gc_group++];
391 392 393 394

	do {
		spin_lock(&l_mg->gc_lock);
		if (list_empty(group_list)) {
395
			spin_unlock(&l_mg->gc_lock);
396
			break;
397 398
		}

399
		line = pblk_gc_get_victim_line(pblk, group_list);
400 401 402 403 404 405

		spin_lock(&line->lock);
		WARN_ON(line->state != PBLK_LINESTATE_CLOSED);
		line->state = PBLK_LINESTATE_GC;
		spin_unlock(&line->lock);

406 407 408 409 410 411 412 413 414
		list_del(&line->list);
		spin_unlock(&l_mg->gc_lock);

		spin_lock(&gc->r_lock);
		list_add_tail(&line->list, &gc->r_list);
		spin_unlock(&gc->r_lock);

		inflight_gc = atomic_inc_return(&gc->inflight_gc);
		pblk_gc_reader_kick(gc);
415

416
		prev_group = 1;
417

418 419 420 421 422 423 424 425
		/* No need to queue up more GC lines than we can handle */
		run_gc = pblk_gc_should_run(&pblk->gc, &pblk->rl);
		if (!run_gc || inflight_gc >= PBLK_GC_L_QD)
			break;
	} while (1);

	if (!prev_group && pblk->rl.rb_state > gc_group &&
						gc_group < PBLK_GC_NR_LISTS)
426 427 428
		goto next_gc_group;
}

429
void pblk_gc_kick(struct pblk *pblk)
430 431 432 433 434
{
	struct pblk_gc *gc = &pblk->gc;

	wake_up_process(gc->gc_ts);
	pblk_gc_writer_kick(gc);
435
	pblk_gc_reader_kick(gc);
436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
	mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));
}

static void pblk_gc_timer(unsigned long data)
{
	struct pblk *pblk = (struct pblk *)data;

	pblk_gc_kick(pblk);
}

static int pblk_gc_ts(void *data)
{
	struct pblk *pblk = data;

	while (!kthread_should_stop()) {
		pblk_gc_run(pblk);
		set_current_state(TASK_INTERRUPTIBLE);
		io_schedule();
	}

	return 0;
}

static int pblk_gc_writer_ts(void *data)
{
	struct pblk *pblk = data;

	while (!kthread_should_stop()) {
		if (!pblk_gc_write(pblk))
			continue;
		set_current_state(TASK_INTERRUPTIBLE);
		io_schedule();
	}

	return 0;
}

473
static int pblk_gc_reader_ts(void *data)
474
{
475
	struct pblk *pblk = data;
476

477 478 479 480 481 482 483 484
	while (!kthread_should_stop()) {
		if (!pblk_gc_read(pblk))
			continue;
		set_current_state(TASK_INTERRUPTIBLE);
		io_schedule();
	}

	return 0;
485 486
}

487
static void pblk_gc_start(struct pblk *pblk)
488
{
489 490
	pblk->gc.gc_active = 1;
	pr_debug("pblk: gc start\n");
491 492
}

493
void pblk_gc_should_start(struct pblk *pblk)
494 495 496 497 498 499
{
	struct pblk_gc *gc = &pblk->gc;

	if (gc->gc_enabled && !gc->gc_active)
		pblk_gc_start(pblk);

500
	pblk_gc_kick(pblk);
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
}

/*
 * If flush_wq == 1 then no lock should be held by the caller since
 * flush_workqueue can sleep
 */
static void pblk_gc_stop(struct pblk *pblk, int flush_wq)
{
	pblk->gc.gc_active = 0;
	pr_debug("pblk: gc stop\n");
}

void pblk_gc_should_stop(struct pblk *pblk)
{
	struct pblk_gc *gc = &pblk->gc;

	if (gc->gc_active && !gc->gc_forced)
		pblk_gc_stop(pblk, 0);
}

void pblk_gc_sysfs_state_show(struct pblk *pblk, int *gc_enabled,
			      int *gc_active)
{
	struct pblk_gc *gc = &pblk->gc;

	spin_lock(&gc->lock);
	*gc_enabled = gc->gc_enabled;
	*gc_active = gc->gc_active;
	spin_unlock(&gc->lock);
}

532
int pblk_gc_sysfs_force(struct pblk *pblk, int force)
533 534
{
	struct pblk_gc *gc = &pblk->gc;
535 536 537

	if (force < 0 || force > 1)
		return -EINVAL;
538 539 540

	spin_lock(&gc->lock);
	gc->gc_forced = force;
541 542 543 544 545

	if (force)
		gc->gc_enabled = 1;
	else
		gc->gc_enabled = 0;
546
	spin_unlock(&gc->lock);
547 548 549 550

	pblk_gc_should_start(pblk);

	return 0;
551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
}

int pblk_gc_init(struct pblk *pblk)
{
	struct pblk_gc *gc = &pblk->gc;
	int ret;

	gc->gc_ts = kthread_create(pblk_gc_ts, pblk, "pblk-gc-ts");
	if (IS_ERR(gc->gc_ts)) {
		pr_err("pblk: could not allocate GC main kthread\n");
		return PTR_ERR(gc->gc_ts);
	}

	gc->gc_writer_ts = kthread_create(pblk_gc_writer_ts, pblk,
							"pblk-gc-writer-ts");
	if (IS_ERR(gc->gc_writer_ts)) {
		pr_err("pblk: could not allocate GC writer kthread\n");
		ret = PTR_ERR(gc->gc_writer_ts);
		goto fail_free_main_kthread;
	}

572 573 574 575 576 577 578 579
	gc->gc_reader_ts = kthread_create(pblk_gc_reader_ts, pblk,
							"pblk-gc-reader-ts");
	if (IS_ERR(gc->gc_reader_ts)) {
		pr_err("pblk: could not allocate GC reader kthread\n");
		ret = PTR_ERR(gc->gc_reader_ts);
		goto fail_free_writer_kthread;
	}

580 581 582 583 584 585 586 587 588
	setup_timer(&gc->gc_timer, pblk_gc_timer, (unsigned long)pblk);
	mod_timer(&gc->gc_timer, jiffies + msecs_to_jiffies(GC_TIME_MSECS));

	gc->gc_active = 0;
	gc->gc_forced = 0;
	gc->gc_enabled = 1;
	gc->w_entries = 0;
	atomic_set(&gc->inflight_gc, 0);

589 590 591 592 593 594 595 596 597 598 599 600 601 602
	/* Workqueue that reads valid sectors from a line and submit them to the
	 * GC writer to be recycled.
	 */
	gc->gc_line_reader_wq = alloc_workqueue("pblk-gc-line-reader-wq",
			WQ_MEM_RECLAIM | WQ_UNBOUND, PBLK_GC_MAX_READERS);
	if (!gc->gc_line_reader_wq) {
		pr_err("pblk: could not allocate GC line reader workqueue\n");
		ret = -ENOMEM;
		goto fail_free_reader_kthread;
	}

	/* Workqueue that prepare lines for GC */
	gc->gc_reader_wq = alloc_workqueue("pblk-gc-line_wq",
					WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
603 604 605
	if (!gc->gc_reader_wq) {
		pr_err("pblk: could not allocate GC reader workqueue\n");
		ret = -ENOMEM;
606
		goto fail_free_reader_line_wq;
607 608 609 610
	}

	spin_lock_init(&gc->lock);
	spin_lock_init(&gc->w_lock);
611 612
	spin_lock_init(&gc->r_lock);

613
	sema_init(&gc->gc_sem, PBLK_GC_RQ_QD);
614

615
	INIT_LIST_HEAD(&gc->w_list);
616
	INIT_LIST_HEAD(&gc->r_list);
617 618 619

	return 0;

620 621 622 623
fail_free_reader_line_wq:
	destroy_workqueue(gc->gc_line_reader_wq);
fail_free_reader_kthread:
	kthread_stop(gc->gc_reader_ts);
624 625
fail_free_writer_kthread:
	kthread_stop(gc->gc_writer_ts);
626 627
fail_free_main_kthread:
	kthread_stop(gc->gc_ts);
628 629 630 631 632 633 634 635 636

	return ret;
}

void pblk_gc_exit(struct pblk *pblk)
{
	struct pblk_gc *gc = &pblk->gc;

	flush_workqueue(gc->gc_reader_wq);
637
	flush_workqueue(gc->gc_line_reader_wq);
638 639 640 641 642 643 644

	del_timer(&gc->gc_timer);
	pblk_gc_stop(pblk, 1);

	if (gc->gc_ts)
		kthread_stop(gc->gc_ts);

645 646 647 648 649
	if (gc->gc_reader_wq)
		destroy_workqueue(gc->gc_reader_wq);

	if (gc->gc_line_reader_wq)
		destroy_workqueue(gc->gc_line_reader_wq);
650 651 652

	if (gc->gc_writer_ts)
		kthread_stop(gc->gc_writer_ts);
653 654 655

	if (gc->gc_reader_ts)
		kthread_stop(gc->gc_reader_ts);
656
}
新手
引导
客服 返回
顶部