gc.c 21.4 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 * fs/f2fs/gc.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/module.h>
#include <linux/backing-dev.h>
#include <linux/init.h>
#include <linux/f2fs_fs.h>
#include <linux/kthread.h>
#include <linux/delay.h>
#include <linux/freezer.h>
#include <linux/blkdev.h>

#include "f2fs.h"
#include "node.h"
#include "segment.h"
#include "gc.h"
25
#include <trace/events/f2fs.h>
26 27 28 29

static int gc_thread_func(void *data)
{
	struct f2fs_sb_info *sbi = data;
30
	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
31 32 33
	wait_queue_head_t *wq = &sbi->gc_thread->gc_wait_queue_head;
	long wait_ms;

34
	wait_ms = gc_th->min_sleep_time;
35 36 37 38 39 40 41 42 43 44 45

	do {
		if (try_to_freeze())
			continue;
		else
			wait_event_interruptible_timeout(*wq,
						kthread_should_stop(),
						msecs_to_jiffies(wait_ms));
		if (kthread_should_stop())
			break;

46
		if (sbi->sb->s_writers.frozen >= SB_FREEZE_WRITE) {
47
			increase_sleep_time(gc_th, &wait_ms);
48 49 50
			continue;
		}

51 52 53 54 55 56 57 58
		/*
		 * [GC triggering condition]
		 * 0. GC is not conducted currently.
		 * 1. There are enough dirty segments.
		 * 2. IO subsystem is idle by checking the # of writeback pages.
		 * 3. IO subsystem is idle by checking the # of requests in
		 *    bdev's request list.
		 *
A
arter97 已提交
59
		 * Note) We have to avoid triggering GCs frequently.
60 61 62 63 64 65 66 67
		 * Because it is possible that some segments can be
		 * invalidated soon after by user update or deletion.
		 * So, I'd like to wait some time to collect dirty segments.
		 */
		if (!mutex_trylock(&sbi->gc_mutex))
			continue;

		if (!is_idle(sbi)) {
68
			increase_sleep_time(gc_th, &wait_ms);
69 70 71 72 73
			mutex_unlock(&sbi->gc_mutex);
			continue;
		}

		if (has_enough_invalid_blocks(sbi))
74
			decrease_sleep_time(gc_th, &wait_ms);
75
		else
76
			increase_sleep_time(gc_th, &wait_ms);
77

78
		stat_inc_bggc_count(sbi);
79

80 81 82
		trace_f2fs_background_gc(sbi->sb, wait_ms,
				prefree_segments(sbi), free_segments(sbi));

83
		/* if return value is not zero, no victim was selected */
84
		if (f2fs_gc(sbi, test_opt(sbi, FORCE_FG_GC)))
85
			wait_ms = gc_th->no_gc_sleep_time;
86

87 88
		/* balancing f2fs's metadata periodically */
		f2fs_balance_fs_bg(sbi);
89

90 91 92 93 94 95
	} while (!kthread_should_stop());
	return 0;
}

int start_gc_thread(struct f2fs_sb_info *sbi)
{
N
Namjae Jeon 已提交
96
	struct f2fs_gc_kthread *gc_th;
97
	dev_t dev = sbi->sb->s_bdev->bd_dev;
98
	int err = 0;
99 100

	gc_th = kmalloc(sizeof(struct f2fs_gc_kthread), GFP_KERNEL);
101 102 103 104
	if (!gc_th) {
		err = -ENOMEM;
		goto out;
	}
105

106 107 108 109
	gc_th->min_sleep_time = DEF_GC_THREAD_MIN_SLEEP_TIME;
	gc_th->max_sleep_time = DEF_GC_THREAD_MAX_SLEEP_TIME;
	gc_th->no_gc_sleep_time = DEF_GC_THREAD_NOGC_SLEEP_TIME;

110 111
	gc_th->gc_idle = 0;

112 113 114
	sbi->gc_thread = gc_th;
	init_waitqueue_head(&sbi->gc_thread->gc_wait_queue_head);
	sbi->gc_thread->f2fs_gc_task = kthread_run(gc_thread_func, sbi,
115
			"f2fs_gc-%u:%u", MAJOR(dev), MINOR(dev));
116
	if (IS_ERR(gc_th->f2fs_gc_task)) {
117
		err = PTR_ERR(gc_th->f2fs_gc_task);
118
		kfree(gc_th);
119
		sbi->gc_thread = NULL;
120
	}
121 122
out:
	return err;
123 124 125 126 127 128 129 130 131 132 133 134
}

void stop_gc_thread(struct f2fs_sb_info *sbi)
{
	struct f2fs_gc_kthread *gc_th = sbi->gc_thread;
	if (!gc_th)
		return;
	kthread_stop(gc_th->f2fs_gc_task);
	kfree(gc_th);
	sbi->gc_thread = NULL;
}

135
static int select_gc_type(struct f2fs_gc_kthread *gc_th, int gc_type)
136
{
137 138 139 140 141 142 143 144 145
	int gc_mode = (gc_type == BG_GC) ? GC_CB : GC_GREEDY;

	if (gc_th && gc_th->gc_idle) {
		if (gc_th->gc_idle == 1)
			gc_mode = GC_CB;
		else if (gc_th->gc_idle == 2)
			gc_mode = GC_GREEDY;
	}
	return gc_mode;
146 147 148 149 150 151 152
}

static void select_policy(struct f2fs_sb_info *sbi, int gc_type,
			int type, struct victim_sel_policy *p)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

153
	if (p->alloc_mode == SSR) {
154 155
		p->gc_mode = GC_GREEDY;
		p->dirty_segmap = dirty_i->dirty_segmap[type];
156
		p->max_search = dirty_i->nr_dirty[type];
157 158
		p->ofs_unit = 1;
	} else {
159
		p->gc_mode = select_gc_type(sbi->gc_thread, gc_type);
160
		p->dirty_segmap = dirty_i->dirty_segmap[DIRTY];
161
		p->max_search = dirty_i->nr_dirty[DIRTY];
162 163
		p->ofs_unit = sbi->segs_per_sec;
	}
164

165 166
	if (p->max_search > sbi->max_victim_search)
		p->max_search = sbi->max_victim_search;
167

168 169 170 171 172 173
	p->offset = sbi->last_victim[p->gc_mode];
}

static unsigned int get_max_cost(struct f2fs_sb_info *sbi,
				struct victim_sel_policy *p)
{
174 175 176
	/* SSR allocates in a segment unit */
	if (p->alloc_mode == SSR)
		return 1 << sbi->log_blocks_per_seg;
177 178 179 180 181 182 183 184 185 186 187
	if (p->gc_mode == GC_GREEDY)
		return (1 << sbi->log_blocks_per_seg) * p->ofs_unit;
	else if (p->gc_mode == GC_CB)
		return UINT_MAX;
	else /* No other gc_mode */
		return 0;
}

static unsigned int check_bg_victims(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
188
	unsigned int secno;
189 190 191 192 193 194

	/*
	 * If the gc_type is FG_GC, we can select victim segments
	 * selected by background GC before.
	 * Those segments guarantee they have small valid blocks.
	 */
195
	for_each_set_bit(secno, dirty_i->victim_secmap, MAIN_SECS(sbi)) {
196
		if (sec_usage_check(sbi, secno))
197
			continue;
198 199
		clear_bit(secno, dirty_i->victim_secmap);
		return secno * sbi->segs_per_sec;
200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
	}
	return NULL_SEGNO;
}

static unsigned int get_cb_cost(struct f2fs_sb_info *sbi, unsigned int segno)
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int secno = GET_SECNO(sbi, segno);
	unsigned int start = secno * sbi->segs_per_sec;
	unsigned long long mtime = 0;
	unsigned int vblocks;
	unsigned char age = 0;
	unsigned char u;
	unsigned int i;

	for (i = 0; i < sbi->segs_per_sec; i++)
		mtime += get_seg_entry(sbi, start + i)->mtime;
	vblocks = get_valid_blocks(sbi, segno, sbi->segs_per_sec);

	mtime = div_u64(mtime, sbi->segs_per_sec);
	vblocks = div_u64(vblocks, sbi->segs_per_sec);

	u = (vblocks * 100) >> sbi->log_blocks_per_seg;

A
arter97 已提交
224
	/* Handle if the system time has changed by the user */
225 226 227 228 229 230 231 232 233 234 235
	if (mtime < sit_i->min_mtime)
		sit_i->min_mtime = mtime;
	if (mtime > sit_i->max_mtime)
		sit_i->max_mtime = mtime;
	if (sit_i->max_mtime != sit_i->min_mtime)
		age = 100 - div64_u64(100 * (mtime - sit_i->min_mtime),
				sit_i->max_mtime - sit_i->min_mtime);

	return UINT_MAX - ((100 * (100 - u) * age) / (100 + u));
}

236 237
static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
			unsigned int segno, struct victim_sel_policy *p)
238 239 240 241 242 243 244 245 246 247 248
{
	if (p->alloc_mode == SSR)
		return get_seg_entry(sbi, segno)->ckpt_valid_blocks;

	/* alloc_mode == LFS */
	if (p->gc_mode == GC_GREEDY)
		return get_valid_blocks(sbi, segno, sbi->segs_per_sec);
	else
		return get_cb_cost(sbi, segno);
}

J
Jaegeuk Kim 已提交
249
/*
M
Masanari Iida 已提交
250
 * This function is called from two paths.
251 252 253 254 255 256 257 258 259 260 261
 * One is garbage collection and the other is SSR segment selection.
 * When it is called during GC, it just gets a victim segment
 * and it does not remove it from dirty seglist.
 * When it is called from SSR segment selection, it finds a segment
 * which has minimum valid blocks and removes it from dirty seglist.
 */
static int get_victim_by_default(struct f2fs_sb_info *sbi,
		unsigned int *result, int gc_type, int type, char alloc_mode)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	struct victim_sel_policy p;
262
	unsigned int secno, max_cost;
263
	unsigned int last_segment = MAIN_SEGS(sbi);
264 265
	int nsearched = 0;

266 267
	mutex_lock(&dirty_i->seglist_lock);

268 269 270 271
	p.alloc_mode = alloc_mode;
	select_policy(sbi, gc_type, type, &p);

	p.min_segno = NULL_SEGNO;
272
	p.min_cost = max_cost = get_max_cost(sbi, &p);
273

274 275 276
	if (p.max_search == 0)
		goto out;

277 278 279 280 281 282 283 284
	if (p.alloc_mode == LFS && gc_type == FG_GC) {
		p.min_segno = check_bg_victims(sbi);
		if (p.min_segno != NULL_SEGNO)
			goto got_it;
	}

	while (1) {
		unsigned long cost;
285
		unsigned int segno;
286

287 288
		segno = find_next_bit(p.dirty_segmap, last_segment, p.offset);
		if (segno >= last_segment) {
289
			if (sbi->last_victim[p.gc_mode]) {
290
				last_segment = sbi->last_victim[p.gc_mode];
291 292 293 294 295 296
				sbi->last_victim[p.gc_mode] = 0;
				p.offset = 0;
				continue;
			}
			break;
		}
297 298 299 300 301

		p.offset = segno + p.ofs_unit;
		if (p.ofs_unit > 1)
			p.offset -= segno % p.ofs_unit;

302
		secno = GET_SECNO(sbi, segno);
303

304
		if (sec_usage_check(sbi, secno))
305
			continue;
306
		if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
307 308 309 310 311 312 313
			continue;

		cost = get_gc_cost(sbi, segno, &p);

		if (p.min_cost > cost) {
			p.min_segno = segno;
			p.min_cost = cost;
314
		} else if (unlikely(cost == max_cost)) {
315
			continue;
316
		}
317

318
		if (nsearched++ >= p.max_search) {
319 320 321 322 323
			sbi->last_victim[p.gc_mode] = segno;
			break;
		}
	}
	if (p.min_segno != NULL_SEGNO) {
324
got_it:
325
		if (p.alloc_mode == LFS) {
326 327 328 329 330
			secno = GET_SECNO(sbi, p.min_segno);
			if (gc_type == FG_GC)
				sbi->cur_victim_sec = secno;
			else
				set_bit(secno, dirty_i->victim_secmap);
331
		}
332
		*result = (p.min_segno / p.ofs_unit) * p.ofs_unit;
333 334 335 336

		trace_f2fs_get_victim(sbi->sb, type, gc_type, &p,
				sbi->cur_victim_sec,
				prefree_segments(sbi), free_segments(sbi));
337
	}
338
out:
339 340 341 342 343 344 345 346 347
	mutex_unlock(&dirty_i->seglist_lock);

	return (p.min_segno == NULL_SEGNO) ? 0 : 1;
}

static const struct victim_selection default_v_ops = {
	.get_victim = get_victim_by_default,
};

348
static struct inode *find_gc_inode(struct gc_inode_list *gc_list, nid_t ino)
349 350 351
{
	struct inode_entry *ie;

352 353 354
	ie = radix_tree_lookup(&gc_list->iroot, ino);
	if (ie)
		return ie->inode;
355 356 357
	return NULL;
}

358
static void add_gc_inode(struct gc_inode_list *gc_list, struct inode *inode)
359
{
360 361
	struct inode_entry *new_ie;

362
	if (inode == find_gc_inode(gc_list, inode->i_ino)) {
363 364
		iput(inode);
		return;
365
	}
366
	new_ie = f2fs_kmem_cache_alloc(inode_entry_slab, GFP_NOFS);
367
	new_ie->inode = inode;
368 369

	f2fs_radix_tree_insert(&gc_list->iroot, inode->i_ino, new_ie);
370
	list_add_tail(&new_ie->list, &gc_list->ilist);
371 372
}

373
static void put_gc_inode(struct gc_inode_list *gc_list)
374 375
{
	struct inode_entry *ie, *next_ie;
376 377
	list_for_each_entry_safe(ie, next_ie, &gc_list->ilist, list) {
		radix_tree_delete(&gc_list->iroot, ie->inode->i_ino);
378 379
		iput(ie->inode);
		list_del(&ie->list);
380
		kmem_cache_free(inode_entry_slab, ie);
381 382 383 384 385 386 387 388 389 390 391 392 393 394
	}
}

static int check_valid_map(struct f2fs_sb_info *sbi,
				unsigned int segno, int offset)
{
	struct sit_info *sit_i = SIT_I(sbi);
	struct seg_entry *sentry;
	int ret;

	mutex_lock(&sit_i->sentry_lock);
	sentry = get_seg_entry(sbi, segno);
	ret = f2fs_test_bit(offset, sentry->cur_valid_map);
	mutex_unlock(&sit_i->sentry_lock);
395
	return ret;
396 397
}

J
Jaegeuk Kim 已提交
398
/*
399 400 401 402
 * This function compares node address got in summary with that in NAT.
 * On validity, copy that node with cold status, otherwise (invalid node)
 * ignore that.
 */
403
static int gc_node_segment(struct f2fs_sb_info *sbi,
404 405 406 407
		struct f2fs_summary *sum, unsigned int segno, int gc_type)
{
	bool initial = true;
	struct f2fs_summary *entry;
408
	block_t start_addr;
409 410
	int off;

411 412
	start_addr = START_BLOCK(sbi, segno);

413 414
next_step:
	entry = sum;
415

416 417 418
	for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
		nid_t nid = le32_to_cpu(entry->nid);
		struct page *node_page;
419
		struct node_info ni;
420

421 422
		/* stop BG_GC if there is not enough free sections. */
		if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
423
			return 0;
424

425
		if (check_valid_map(sbi, segno, off) == 0)
426 427 428 429 430 431 432 433 434 435
			continue;

		if (initial) {
			ra_node_page(sbi, nid);
			continue;
		}
		node_page = get_node_page(sbi, nid);
		if (IS_ERR(node_page))
			continue;

436 437 438 439
		/* block may become invalid during get_node_page */
		if (check_valid_map(sbi, segno, off) == 0) {
			f2fs_put_page(node_page, 1);
			continue;
440 441 442 443 444 445
		}

		get_node_info(sbi, nid, &ni);
		if (ni.blk_addr != start_addr + off) {
			f2fs_put_page(node_page, 1);
			continue;
446 447
		}

448
		/* set page dirty and write it */
449
		if (gc_type == FG_GC) {
450
			f2fs_wait_on_page_writeback(node_page, NODE);
451
			set_page_dirty(node_page);
452 453 454 455
		} else {
			if (!PageWriteback(node_page))
				set_page_dirty(node_page);
		}
456
		f2fs_put_page(node_page, 1);
457
		stat_inc_node_blk_count(sbi, 1, gc_type);
458
	}
459

460 461 462 463 464 465 466 467 468 469 470 471
	if (initial) {
		initial = false;
		goto next_step;
	}

	if (gc_type == FG_GC) {
		struct writeback_control wbc = {
			.sync_mode = WB_SYNC_ALL,
			.nr_to_write = LONG_MAX,
			.for_reclaim = 0,
		};
		sync_node_pages(sbi, 0, &wbc);
472

473 474 475
		/* return 1 only if FG_GC succefully reclaimed one */
		if (get_valid_blocks(sbi, segno, 1) == 0)
			return 1;
476
	}
477
	return 0;
478 479
}

J
Jaegeuk Kim 已提交
480
/*
481 482 483 484 485
 * Calculate start block index indicating the given node offset.
 * Be careful, caller should give this node offset only indicating direct node
 * blocks. If any node offsets, which point the other types of node blocks such
 * as indirect or double indirect node blocks, are given, it must be a caller's
 * bug.
486
 */
487
block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
488
{
489 490
	unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
	unsigned int bidx;
491

492 493
	if (node_ofs == 0)
		return 0;
494

495
	if (node_ofs <= 2) {
496 497
		bidx = node_ofs - 1;
	} else if (node_ofs <= indirect_blks) {
498
		int dec = (node_ofs - 4) / (NIDS_PER_BLOCK + 1);
499 500
		bidx = node_ofs - 2 - dec;
	} else {
501
		int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
502 503
		bidx = node_ofs - 5 - dec;
	}
504
	return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
505 506
}

507
static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
508 509 510 511 512 513 514 515 516 517 518 519
		struct node_info *dni, block_t blkaddr, unsigned int *nofs)
{
	struct page *node_page;
	nid_t nid;
	unsigned int ofs_in_node;
	block_t source_blkaddr;

	nid = le32_to_cpu(sum->nid);
	ofs_in_node = le16_to_cpu(sum->ofs_in_node);

	node_page = get_node_page(sbi, nid);
	if (IS_ERR(node_page))
520
		return false;
521 522 523 524 525

	get_node_info(sbi, nid, dni);

	if (sum->version != dni->version) {
		f2fs_put_page(node_page, 1);
526
		return false;
527 528 529 530 531 532 533
	}

	*nofs = ofs_of_node(node_page);
	source_blkaddr = datablock_addr(node_page, ofs_in_node);
	f2fs_put_page(node_page, 1);

	if (source_blkaddr != blkaddr)
534 535
		return false;
	return true;
536 537
}

538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571
static void move_encrypted_block(struct inode *inode, block_t bidx)
{
	struct f2fs_io_info fio = {
		.sbi = F2FS_I_SB(inode),
		.type = DATA,
		.rw = READ_SYNC,
		.encrypted_page = NULL,
	};
	struct dnode_of_data dn;
	struct f2fs_summary sum;
	struct node_info ni;
	struct page *page;
	int err;

	/* do not read out */
	page = grab_cache_page(inode->i_mapping, bidx);
	if (!page)
		return;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
	err = get_dnode_of_data(&dn, bidx, LOOKUP_NODE);
	if (err)
		goto out;

	if (unlikely(dn.data_blkaddr == NULL_ADDR))
		goto put_out;

	get_node_info(fio.sbi, dn.nid, &ni);
	set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);

	/* read page */
	fio.page = page;
	fio.blk_addr = dn.data_blkaddr;

572 573 574 575
	fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi),
					fio.blk_addr,
					FGP_LOCK|FGP_CREAT,
					GFP_NOFS);
576 577 578
	if (!fio.encrypted_page)
		goto put_out;

579 580 581 582 583 584 585 586 587 588 589 590
	err = f2fs_submit_page_bio(&fio);
	if (err)
		goto put_page_out;

	/* write page */
	lock_page(fio.encrypted_page);

	if (unlikely(!PageUptodate(fio.encrypted_page)))
		goto put_page_out;
	if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi)))
		goto put_page_out;

591 592 593 594 595
	set_page_dirty(fio.encrypted_page);
	f2fs_wait_on_page_writeback(fio.encrypted_page, META);
	if (clear_page_dirty_for_io(fio.encrypted_page))
		dec_page_count(fio.sbi, F2FS_DIRTY_META);

596
	set_page_writeback(fio.encrypted_page);
597 598 599 600 601 602 603 604

	/* allocate block address */
	f2fs_wait_on_page_writeback(dn.node_page, NODE);
	allocate_data_block(fio.sbi, NULL, fio.blk_addr,
					&fio.blk_addr, &sum, CURSEG_COLD_DATA);
	fio.rw = WRITE_SYNC;
	f2fs_submit_page_mbio(&fio);

605
	dn.data_blkaddr = fio.blk_addr;
606 607 608 609 610
	set_data_blkaddr(&dn);
	f2fs_update_extent_cache(&dn);
	set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
	if (page->index == 0)
		set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
611
put_page_out:
612 613 614 615 616 617 618
	f2fs_put_page(fio.encrypted_page, 1);
put_out:
	f2fs_put_dnode(&dn);
out:
	f2fs_put_page(page, 1);
}

J
Jaegeuk Kim 已提交
619
static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
620
{
J
Jaegeuk Kim 已提交
621 622 623 624 625
	struct page *page;

	page = get_lock_data_page(inode, bidx);
	if (IS_ERR(page))
		return;
626

627
	if (gc_type == BG_GC) {
628 629
		if (PageWriteback(page))
			goto out;
630 631 632
		set_page_dirty(page);
		set_cold_data(page);
	} else {
J
Jaegeuk Kim 已提交
633 634 635 636 637
		struct f2fs_io_info fio = {
			.sbi = F2FS_I_SB(inode),
			.type = DATA,
			.rw = WRITE_SYNC,
			.page = page,
638
			.encrypted_page = NULL,
J
Jaegeuk Kim 已提交
639
		};
640
		set_page_dirty(page);
641
		f2fs_wait_on_page_writeback(page, DATA);
642
		if (clear_page_dirty_for_io(page))
643
			inode_dec_dirty_pages(inode);
644
		set_cold_data(page);
645
		do_write_data_page(&fio);
646 647 648 649 650 651
		clear_cold_data(page);
	}
out:
	f2fs_put_page(page, 1);
}

J
Jaegeuk Kim 已提交
652
/*
653 654 655 656 657 658
 * This function tries to get parent node of victim data block, and identifies
 * data block validity. If the block is valid, copy that with cold status and
 * modify parent node.
 * If the parent node is not valid or the data block address is different,
 * the victim data block is ignored.
 */
659
static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
660
		struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
661 662 663 664
{
	struct super_block *sb = sbi->sb;
	struct f2fs_summary *entry;
	block_t start_addr;
665
	int off;
666 667 668 669 670 671
	int phase = 0;

	start_addr = START_BLOCK(sbi, segno);

next_step:
	entry = sum;
672

673 674 675 676 677 678 679
	for (off = 0; off < sbi->blocks_per_seg; off++, entry++) {
		struct page *data_page;
		struct inode *inode;
		struct node_info dni; /* dnode info for the data */
		unsigned int ofs_in_node, nofs;
		block_t start_bidx;

680 681
		/* stop BG_GC if there is not enough free sections. */
		if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
682
			return 0;
683

684
		if (check_valid_map(sbi, segno, off) == 0)
685 686 687 688 689 690 691 692
			continue;

		if (phase == 0) {
			ra_node_page(sbi, le32_to_cpu(entry->nid));
			continue;
		}

		/* Get an inode by ino with checking validity */
693
		if (!is_alive(sbi, entry, &dni, start_addr + off, &nofs))
694 695 696 697 698 699 700 701 702 703
			continue;

		if (phase == 1) {
			ra_node_page(sbi, dni.ino);
			continue;
		}

		ofs_in_node = le16_to_cpu(entry->ofs_in_node);

		if (phase == 2) {
704
			inode = f2fs_iget(sb, dni.ino);
705
			if (IS_ERR(inode) || is_bad_inode(inode))
706 707
				continue;

708 709 710 711 712 713 714
			/* if encrypted inode, let's go phase 3 */
			if (f2fs_encrypted_inode(inode) &&
						S_ISREG(inode->i_mode)) {
				add_gc_inode(gc_list, inode);
				continue;
			}

715
			start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
716 717
			data_page = get_read_data_page(inode,
					start_bidx + ofs_in_node, READA);
718 719 720 721
			if (IS_ERR(data_page)) {
				iput(inode);
				continue;
			}
722 723

			f2fs_put_page(data_page, 0);
724
			add_gc_inode(gc_list, inode);
725 726 727 728
			continue;
		}

		/* phase 3 */
729
		inode = find_gc_inode(gc_list, dni.ino);
730
		if (inode) {
J
Jaegeuk Kim 已提交
731 732
			start_bidx = start_bidx_of_node(nofs, F2FS_I(inode))
								+ ofs_in_node;
733 734 735 736
			if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
				move_encrypted_block(inode, start_bidx);
			else
				move_data_page(inode, start_bidx, gc_type);
737
			stat_inc_data_blk_count(sbi, 1, gc_type);
738 739
		}
	}
740

741 742
	if (++phase < 4)
		goto next_step;
743

744
	if (gc_type == FG_GC) {
J
Jaegeuk Kim 已提交
745
		f2fs_submit_merged_bio(sbi, DATA, WRITE);
746

747 748 749
		/* return 1 only if FG_GC succefully reclaimed one */
		if (get_valid_blocks(sbi, segno, 1) == 0)
			return 1;
750
	}
751
	return 0;
752 753 754
}

static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
755
			int gc_type)
756 757 758
{
	struct sit_info *sit_i = SIT_I(sbi);
	int ret;
759

760
	mutex_lock(&sit_i->sentry_lock);
761 762
	ret = DIRTY_I(sbi)->v_ops->get_victim(sbi, victim, gc_type,
					      NO_CHECK_TYPE, LFS);
763 764 765 766
	mutex_unlock(&sit_i->sentry_lock);
	return ret;
}

767
static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
768
				struct gc_inode_list *gc_list, int gc_type)
769 770 771
{
	struct page *sum_page;
	struct f2fs_summary_block *sum;
772
	struct blk_plug plug;
773
	int nfree = 0;
774 775 776 777

	/* read segment summary of victim */
	sum_page = get_sum_page(sbi, segno);

778 779
	blk_start_plug(&plug);

780 781
	sum = page_address(sum_page);

782 783 784 785 786 787 788 789 790
	/*
	 * this is to avoid deadlock:
	 * - lock_page(sum_page)         - f2fs_replace_block
	 *  - check_valid_map()            - mutex_lock(sentry_lock)
	 *   - mutex_lock(sentry_lock)     - change_curseg()
	 *                                  - lock_page(sum_page)
	 */
	unlock_page(sum_page);

791 792
	switch (GET_SUM_TYPE((&sum->footer))) {
	case SUM_TYPE_NODE:
793
		nfree = gc_node_segment(sbi, sum->entries, segno, gc_type);
794 795
		break;
	case SUM_TYPE_DATA:
796 797
		nfree = gc_data_segment(sbi, sum->entries, gc_list,
							segno, gc_type);
798 799
		break;
	}
800 801
	blk_finish_plug(&plug);

802
	stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type);
803 804
	stat_inc_call_count(sbi->stat_info);

805
	f2fs_put_page(sum_page, 0);
806
	return nfree;
807 808
}

C
Chao Yu 已提交
809
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
810
{
811
	unsigned int segno, i;
C
Chao Yu 已提交
812
	int gc_type = sync ? FG_GC : BG_GC;
813
	int sec_freed = 0;
C
Chao Yu 已提交
814
	int ret = -EINVAL;
815
	struct cp_control cpc;
816 817
	struct gc_inode_list gc_list = {
		.ilist = LIST_HEAD_INIT(gc_list.ilist),
818
		.iroot = RADIX_TREE_INIT(GFP_NOFS),
819
	};
820

821
	cpc.reason = __get_cp_reason(sbi);
822
gc_more:
823 824
	segno = NULL_SEGNO;

825
	if (unlikely(!(sbi->sb->s_flags & MS_ACTIVE)))
J
Jaegeuk Kim 已提交
826
		goto stop;
827
	if (unlikely(f2fs_cp_error(sbi)))
828
		goto stop;
829

830
	if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
J
Jaegeuk Kim 已提交
831
		gc_type = FG_GC;
832 833
		if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
			write_checkpoint(sbi, &cpc);
834
	}
835

836
	if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
J
Jaegeuk Kim 已提交
837
		goto stop;
838
	ret = 0;
839

840 841 842 843 844
	/* readahead multi ssa blocks those have contiguous address */
	if (sbi->segs_per_sec > 1)
		ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
								META_SSA);

845 846 847 848 849 850 851 852 853 854 855 856
	for (i = 0; i < sbi->segs_per_sec; i++) {
		/*
		 * for FG_GC case, halt gcing left segments once failed one
		 * of segments in selected section to avoid long latency.
		 */
		if (!do_garbage_collect(sbi, segno + i, &gc_list, gc_type) &&
				gc_type == FG_GC)
			break;
	}

	if (i == sbi->segs_per_sec && gc_type == FG_GC)
		sec_freed++;
857

858
	if (gc_type == FG_GC)
859
		sbi->cur_victim_sec = NULL_SEGNO;
860

C
Chao Yu 已提交
861 862 863
	if (!sync) {
		if (has_not_enough_free_secs(sbi, sec_freed))
			goto gc_more;
864

C
Chao Yu 已提交
865 866 867
		if (gc_type == FG_GC)
			write_checkpoint(sbi, &cpc);
	}
J
Jaegeuk Kim 已提交
868
stop:
869 870
	mutex_unlock(&sbi->gc_mutex);

871
	put_gc_inode(&gc_list);
C
Chao Yu 已提交
872 873 874

	if (sync)
		ret = sec_freed ? 0 : -EAGAIN;
875
	return ret;
876 877 878 879 880 881
}

void build_gc_manager(struct f2fs_sb_info *sbi)
{
	DIRTY_I(sbi)->v_ops = &default_v_ops;
}