checkpoint.c 36.3 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
J
Jaegeuk Kim 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 * fs/f2fs/checkpoint.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/f2fs_fs.h>
#include <linux/pagevec.h>
#include <linux/swap.h>

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
23
#include "trace.h"
24
#include <trace/events/f2fs.h>
J
Jaegeuk Kim 已提交
25

J
Jaegeuk Kim 已提交
26
static struct kmem_cache *ino_entry_slab;
27
struct kmem_cache *inode_entry_slab;
J
Jaegeuk Kim 已提交
28

29 30
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
31
	set_ckpt_flags(sbi, CP_ERROR_FLAG);
32
	if (!end_io)
33
		f2fs_flush_merged_writes(sbi);
34 35
}

J
Jaegeuk Kim 已提交
36
/*
J
Jaegeuk Kim 已提交
37 38 39 40
 * We guarantee no failure on the returned page.
 */
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
G
Gu Zheng 已提交
41
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
42 43
	struct page *page = NULL;
repeat:
44
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
45 46 47 48
	if (!page) {
		cond_resched();
		goto repeat;
	}
49
	f2fs_wait_on_page_writeback(page, META, true);
50 51
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
52 53 54
	return page;
}

J
Jaegeuk Kim 已提交
55
/*
J
Jaegeuk Kim 已提交
56 57
 * We guarantee no failure on the returned page.
 */
58 59
static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
							bool is_meta)
J
Jaegeuk Kim 已提交
60
{
G
Gu Zheng 已提交
61
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
62
	struct page *page;
63
	struct f2fs_io_info fio = {
64
		.sbi = sbi,
65
		.type = META,
M
Mike Christie 已提交
66
		.op = REQ_OP_READ,
67
		.op_flags = REQ_META | REQ_PRIO,
68 69
		.old_blkaddr = index,
		.new_blkaddr = index,
70
		.encrypted_page = NULL,
71
		.is_meta = is_meta,
72
	};
73 74

	if (unlikely(!is_meta))
M
Mike Christie 已提交
75
		fio.op_flags &= ~REQ_META;
J
Jaegeuk Kim 已提交
76
repeat:
77
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
78 79 80 81
	if (!page) {
		cond_resched();
		goto repeat;
	}
82 83 84
	if (PageUptodate(page))
		goto out;

85 86
	fio.page = page;

87 88
	if (f2fs_submit_page_bio(&fio)) {
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
89
		goto repeat;
90
	}
J
Jaegeuk Kim 已提交
91

92
	lock_page(page);
93
	if (unlikely(page->mapping != mapping)) {
94 95 96
		f2fs_put_page(page, 1);
		goto repeat;
	}
97 98 99 100 101 102 103

	/*
	 * if there is any IO error when accessing device, make our filesystem
	 * readonly and make sure do not write checkpoint with non-uptodate
	 * meta page.
	 */
	if (unlikely(!PageUptodate(page)))
104
		f2fs_stop_checkpoint(sbi, false);
105
out:
J
Jaegeuk Kim 已提交
106 107 108
	return page;
}

109 110 111 112 113 114 115 116 117 118 119
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, true);
}

/* for POR only */
struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, false);
}

120
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
121 122 123
{
	switch (type) {
	case META_NAT:
124
		break;
125
	case META_SIT:
126 127 128
		if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
			return false;
		break;
129
	case META_SSA:
130 131 132 133
		if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
			blkaddr < SM_I(sbi)->ssa_blkaddr))
			return false;
		break;
134
	case META_CP:
135 136 137 138
		if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
			blkaddr < __start_cp_addr(sbi)))
			return false;
		break;
139
	case META_POR:
140 141 142 143
		if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
			blkaddr < MAIN_BLKADDR(sbi)))
			return false;
		break;
144 145 146
	default:
		BUG();
	}
147 148

	return true;
149 150 151
}

/*
152
 * Readahead CP/NAT/SIT/SSA pages
153
 */
154 155
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
							int type, bool sync)
156 157
{
	struct page *page;
158
	block_t blkno = start;
159
	struct f2fs_io_info fio = {
160
		.sbi = sbi,
161
		.type = META,
M
Mike Christie 已提交
162
		.op = REQ_OP_READ,
163
		.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
164
		.encrypted_page = NULL,
165
		.in_list = false,
166
		.is_meta = (type != META_POR),
167
	};
C
Chao Yu 已提交
168
	struct blk_plug plug;
169

170
	if (unlikely(type == META_POR))
M
Mike Christie 已提交
171
		fio.op_flags &= ~REQ_META;
172

C
Chao Yu 已提交
173
	blk_start_plug(&plug);
174 175
	for (; nrpages-- > 0; blkno++) {

176 177 178
		if (!is_valid_blkaddr(sbi, blkno, type))
			goto out;

179 180
		switch (type) {
		case META_NAT:
181 182
			if (unlikely(blkno >=
					NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
183
				blkno = 0;
184
			/* get nat block addr */
185
			fio.new_blkaddr = current_nat_addr(sbi,
186 187 188 189
					blkno * NAT_ENTRY_PER_BLOCK);
			break;
		case META_SIT:
			/* get sit block addr */
190
			fio.new_blkaddr = current_sit_addr(sbi,
191 192
					blkno * SIT_ENTRY_PER_BLOCK);
			break;
193
		case META_SSA:
194
		case META_CP:
195
		case META_POR:
196
			fio.new_blkaddr = blkno;
197 198 199 200 201
			break;
		default:
			BUG();
		}

202 203
		page = f2fs_grab_cache_page(META_MAPPING(sbi),
						fio.new_blkaddr, false);
204 205 206 207 208 209 210
		if (!page)
			continue;
		if (PageUptodate(page)) {
			f2fs_put_page(page, 1);
			continue;
		}

211
		fio.page = page;
212
		f2fs_submit_page_bio(&fio);
213 214 215
		f2fs_put_page(page, 0);
	}
out:
C
Chao Yu 已提交
216
	blk_finish_plug(&plug);
217 218 219
	return blkno - start;
}

220 221 222 223 224 225
void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
{
	struct page *page;
	bool readahead = false;

	page = find_get_page(META_MAPPING(sbi), index);
226
	if (!page || !PageUptodate(page))
227 228 229 230
		readahead = true;
	f2fs_put_page(page, 0);

	if (readahead)
231
		ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
232 233
}

C
Chao Yu 已提交
234 235 236
static int __f2fs_write_meta_page(struct page *page,
				struct writeback_control *wbc,
				enum iostat_type io_type)
J
Jaegeuk Kim 已提交
237
{
238
	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
J
Jaegeuk Kim 已提交
239

240 241
	trace_f2fs_writepage(page, META);

242 243 244 245 246
	if (unlikely(f2fs_cp_error(sbi))) {
		dec_page_count(sbi, F2FS_DIRTY_META);
		unlock_page(page);
		return 0;
	}
247
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
248
		goto redirty_out;
249
	if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
250
		goto redirty_out;
J
Jaegeuk Kim 已提交
251

C
Chao Yu 已提交
252
	write_meta_page(sbi, page, io_type);
253
	dec_page_count(sbi, F2FS_DIRTY_META);
254 255

	if (wbc->for_reclaim)
256 257
		f2fs_submit_merged_write_cond(sbi, page->mapping->host,
						0, page->index, META);
258

259
	unlock_page(page);
260

261
	if (unlikely(f2fs_cp_error(sbi)))
262
		f2fs_submit_merged_write(sbi, META);
263

264
	return 0;
265 266

redirty_out:
267
	redirty_page_for_writepage(wbc, page);
268
	return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
269 270
}

C
Chao Yu 已提交
271 272 273 274 275 276
static int f2fs_write_meta_page(struct page *page,
				struct writeback_control *wbc)
{
	return __f2fs_write_meta_page(page, wbc, FS_META_IO);
}

J
Jaegeuk Kim 已提交
277 278 279
static int f2fs_write_meta_pages(struct address_space *mapping,
				struct writeback_control *wbc)
{
280
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
281
	long diff, written;
J
Jaegeuk Kim 已提交
282

283 284 285
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

286
	/* collect a number of dirty meta pages and write together */
287 288
	if (wbc->for_kupdate ||
		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
289
		goto skip_write;
J
Jaegeuk Kim 已提交
290

291 292 293
	/* if locked failed, cp will flush dirty pages instead */
	if (!mutex_trylock(&sbi->cp_mutex))
		goto skip_write;
Y
Yunlei He 已提交
294

295
	trace_f2fs_writepages(mapping->host, wbc, META);
296
	diff = nr_pages_to_write(sbi, META, wbc);
C
Chao Yu 已提交
297
	written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
J
Jaegeuk Kim 已提交
298
	mutex_unlock(&sbi->cp_mutex);
299
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
J
Jaegeuk Kim 已提交
300
	return 0;
301 302 303

skip_write:
	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
Y
Yunlei He 已提交
304
	trace_f2fs_writepages(mapping->host, wbc, META);
305
	return 0;
J
Jaegeuk Kim 已提交
306 307 308
}

long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
C
Chao Yu 已提交
309
				long nr_to_write, enum iostat_type io_type)
J
Jaegeuk Kim 已提交
310
{
G
Gu Zheng 已提交
311
	struct address_space *mapping = META_MAPPING(sbi);
J
Jan Kara 已提交
312
	pgoff_t index = 0, prev = ULONG_MAX;
J
Jaegeuk Kim 已提交
313 314
	struct pagevec pvec;
	long nwritten = 0;
J
Jan Kara 已提交
315
	int nr_pages;
J
Jaegeuk Kim 已提交
316 317 318
	struct writeback_control wbc = {
		.for_reclaim = 0,
	};
C
Chao Yu 已提交
319
	struct blk_plug plug;
J
Jaegeuk Kim 已提交
320

321
	pagevec_init(&pvec);
J
Jaegeuk Kim 已提交
322

C
Chao Yu 已提交
323 324
	blk_start_plug(&plug);

J
Jan Kara 已提交
325
	while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
326
				PAGECACHE_TAG_DIRTY))) {
J
Jan Kara 已提交
327
		int i;
J
Jaegeuk Kim 已提交
328 329 330

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
331

332
			if (prev == ULONG_MAX)
333 334 335 336 337 338
				prev = page->index - 1;
			if (nr_to_write != LONG_MAX && page->index != prev + 1) {
				pagevec_release(&pvec);
				goto stop;
			}

J
Jaegeuk Kim 已提交
339
			lock_page(page);
340 341 342 343 344 345 346 347 348 349 350

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}
			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

351 352 353
			f2fs_wait_on_page_writeback(page, META, true);

			BUG_ON(PageWriteback(page));
354 355 356
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

C
Chao Yu 已提交
357
			if (__f2fs_write_meta_page(page, &wbc, io_type)) {
358 359 360
				unlock_page(page);
				break;
			}
361
			nwritten++;
362
			prev = page->index;
363
			if (unlikely(nwritten >= nr_to_write))
J
Jaegeuk Kim 已提交
364 365 366 367 368
				break;
		}
		pagevec_release(&pvec);
		cond_resched();
	}
369
stop:
J
Jaegeuk Kim 已提交
370
	if (nwritten)
371
		f2fs_submit_merged_write(sbi, type);
J
Jaegeuk Kim 已提交
372

C
Chao Yu 已提交
373 374
	blk_finish_plug(&plug);

J
Jaegeuk Kim 已提交
375 376 377 378 379
	return nwritten;
}

static int f2fs_set_meta_page_dirty(struct page *page)
{
380 381
	trace_f2fs_set_page_dirty(page, META);

382 383
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
384
	if (!PageDirty(page)) {
385
		__set_page_dirty_nobuffers(page);
386
		inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
387
		SetPagePrivate(page);
J
Jaegeuk Kim 已提交
388
		f2fs_trace_pid(page);
J
Jaegeuk Kim 已提交
389 390 391 392 393 394 395 396 397
		return 1;
	}
	return 0;
}

const struct address_space_operations f2fs_meta_aops = {
	.writepage	= f2fs_write_meta_page,
	.writepages	= f2fs_write_meta_pages,
	.set_page_dirty	= f2fs_set_meta_page_dirty,
398 399
	.invalidatepage = f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
400 401 402
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
J
Jaegeuk Kim 已提交
403 404
};

C
Chao Yu 已提交
405 406
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino,
						unsigned int devidx, int type)
407
{
408
	struct inode_management *im = &sbi->im[type];
409 410 411
	struct ino_entry *e, *tmp;

	tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
412

413
	radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
414

415 416
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
417
	if (!e) {
418
		e = tmp;
419 420 421
		if (unlikely(radix_tree_insert(&im->ino_root, ino, e)))
			f2fs_bug_on(sbi, 1);

422 423
		memset(e, 0, sizeof(struct ino_entry));
		e->ino = ino;
424

425
		list_add_tail(&e->list, &im->ino_list);
426
		if (type != ORPHAN_INO)
427
			im->ino_num++;
428
	}
C
Chao Yu 已提交
429 430 431 432

	if (type == FLUSH_INO)
		f2fs_set_bit(devidx, (char *)&e->dirty_device);

433
	spin_unlock(&im->ino_lock);
434
	radix_tree_preload_end();
435 436 437

	if (e != tmp)
		kmem_cache_free(ino_entry_slab, tmp);
438 439
}

J
Jaegeuk Kim 已提交
440
static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
441
{
442
	struct inode_management *im = &sbi->im[type];
J
Jaegeuk Kim 已提交
443
	struct ino_entry *e;
444

445 446
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
447 448
	if (e) {
		list_del(&e->list);
449 450 451
		radix_tree_delete(&im->ino_root, ino);
		im->ino_num--;
		spin_unlock(&im->ino_lock);
452 453
		kmem_cache_free(ino_entry_slab, e);
		return;
454
	}
455
	spin_unlock(&im->ino_lock);
456 457
}

458
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
459 460
{
	/* add new dirty ino entry into list */
C
Chao Yu 已提交
461
	__add_ino_entry(sbi, ino, 0, type);
462 463
}

464
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
465 466 467 468 469 470 471 472
{
	/* remove dirty ino entry from list */
	__remove_ino_entry(sbi, ino, type);
}

/* mode should be APPEND_INO or UPDATE_INO */
bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
473
	struct inode_management *im = &sbi->im[mode];
474
	struct ino_entry *e;
475 476 477 478

	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
	spin_unlock(&im->ino_lock);
479 480 481
	return e ? true : false;
}

482
void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
483 484 485 486
{
	struct ino_entry *e, *tmp;
	int i;

C
Chao Yu 已提交
487
	for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) {
488 489 490 491
		struct inode_management *im = &sbi->im[i];

		spin_lock(&im->ino_lock);
		list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
492
			list_del(&e->list);
493
			radix_tree_delete(&im->ino_root, e->ino);
494
			kmem_cache_free(ino_entry_slab, e);
495
			im->ino_num--;
496
		}
497
		spin_unlock(&im->ino_lock);
498 499 500
	}
}

C
Chao Yu 已提交
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
					unsigned int devidx, int type)
{
	__add_ino_entry(sbi, ino, devidx, type);
}

bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
					unsigned int devidx, int type)
{
	struct inode_management *im = &sbi->im[type];
	struct ino_entry *e;
	bool is_dirty = false;

	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
	if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device))
		is_dirty = true;
	spin_unlock(&im->ino_lock);
	return is_dirty;
}

J
Jaegeuk Kim 已提交
522
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
523
{
524
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
525 526
	int err = 0;

527
	spin_lock(&im->ino_lock);
J
Jaegeuk Kim 已提交
528 529

#ifdef CONFIG_F2FS_FAULT_INJECTION
530
	if (time_to_inject(sbi, FAULT_ORPHAN)) {
J
Jaegeuk Kim 已提交
531
		spin_unlock(&im->ino_lock);
532
		f2fs_show_injection_info(FAULT_ORPHAN);
J
Jaegeuk Kim 已提交
533 534 535
		return -ENOSPC;
	}
#endif
536
	if (unlikely(im->ino_num >= sbi->max_orphans))
J
Jaegeuk Kim 已提交
537
		err = -ENOSPC;
J
Jaegeuk Kim 已提交
538
	else
539 540
		im->ino_num++;
	spin_unlock(&im->ino_lock);
541

J
Jaegeuk Kim 已提交
542 543 544
	return err;
}

J
Jaegeuk Kim 已提交
545 546
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
547 548 549 550 551 552
	struct inode_management *im = &sbi->im[ORPHAN_INO];

	spin_lock(&im->ino_lock);
	f2fs_bug_on(sbi, im->ino_num == 0);
	im->ino_num--;
	spin_unlock(&im->ino_lock);
J
Jaegeuk Kim 已提交
553 554
}

555
void add_orphan_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
556
{
557
	/* add new orphan ino entry into list */
C
Chao Yu 已提交
558
	__add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO);
559
	update_inode_page(inode);
J
Jaegeuk Kim 已提交
560 561 562 563
}

void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
564
	/* remove orphan entry from orphan list */
J
Jaegeuk Kim 已提交
565
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
J
Jaegeuk Kim 已提交
566 567
}

568
static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
J
Jaegeuk Kim 已提交
569
{
570
	struct inode *inode;
571
	struct node_info ni;
572 573
	int err = acquire_orphan_inode(sbi);

574 575
	if (err)
		goto err_out;
576

C
Chao Yu 已提交
577
	__add_ino_entry(sbi, ino, 0, ORPHAN_INO);
578

579
	inode = f2fs_iget_retry(sbi->sb, ino);
580 581 582 583 584 585 586 587 588
	if (IS_ERR(inode)) {
		/*
		 * there should be a bug that we can't find the entry
		 * to orphan inode.
		 */
		f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
		return PTR_ERR(inode);
	}

589 590 591 592 593
	err = dquot_initialize(inode);
	if (err)
		goto err_out;

	dquot_initialize(inode);
J
Jaegeuk Kim 已提交
594 595 596 597
	clear_nlink(inode);

	/* truncate all the data during iput */
	iput(inode);
598 599 600 601 602

	get_node_info(sbi, ino, &ni);

	/* ENOMEM was fully retried in f2fs_evict_inode. */
	if (ni.blk_addr != NULL_ADDR) {
603 604
		err = -EIO;
		goto err_out;
605
	}
606
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
607
	return 0;
608 609 610 611 612 613 614

err_out:
	set_sbi_flag(sbi, SBI_NEED_FSCK);
	f2fs_msg(sbi->sb, KERN_WARNING,
			"%s: orphan failed (ino=%x), run fsck to fix.",
			__func__, ino);
	return err;
J
Jaegeuk Kim 已提交
615 616
}

617
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
618
{
619
	block_t start_blk, orphan_blocks, i, j;
C
Chao Yu 已提交
620 621
	unsigned int s_flags = sbi->sb->s_flags;
	int err = 0;
J
Jaegeuk Kim 已提交
622 623 624
#ifdef CONFIG_QUOTA
	int quota_enabled;
#endif
J
Jaegeuk Kim 已提交
625

626
	if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
627
		return 0;
J
Jaegeuk Kim 已提交
628

629
	if (s_flags & SB_RDONLY) {
C
Chao Yu 已提交
630
		f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
631
		sbi->sb->s_flags &= ~SB_RDONLY;
C
Chao Yu 已提交
632 633 634 635
	}

#ifdef CONFIG_QUOTA
	/* Needed for iput() to work correctly and not trash data */
636
	sbi->sb->s_flags |= SB_ACTIVE;
J
Jaegeuk Kim 已提交
637

C
Chao Yu 已提交
638
	/* Turn on quotas so that they are updated correctly */
639
	quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
C
Chao Yu 已提交
640 641
#endif

W
Wanpeng Li 已提交
642
	start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
643
	orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
J
Jaegeuk Kim 已提交
644

645
	ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
646

647
	for (i = 0; i < orphan_blocks; i++) {
J
Jaegeuk Kim 已提交
648 649 650 651 652 653
		struct page *page = get_meta_page(sbi, start_blk + i);
		struct f2fs_orphan_block *orphan_blk;

		orphan_blk = (struct f2fs_orphan_block *)page_address(page);
		for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
			nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
654 655 656
			err = recover_orphan_inode(sbi, ino);
			if (err) {
				f2fs_put_page(page, 1);
C
Chao Yu 已提交
657
				goto out;
658
			}
J
Jaegeuk Kim 已提交
659 660 661 662
		}
		f2fs_put_page(page, 1);
	}
	/* clear Orphan Flag */
663
	clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
C
Chao Yu 已提交
664 665 666
out:
#ifdef CONFIG_QUOTA
	/* Turn quotas off */
J
Jaegeuk Kim 已提交
667 668
	if (quota_enabled)
		f2fs_quota_off_umount(sbi->sb);
C
Chao Yu 已提交
669
#endif
670
	sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
C
Chao Yu 已提交
671 672

	return err;
J
Jaegeuk Kim 已提交
673 674 675 676
}

static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
{
677
	struct list_head *head;
J
Jaegeuk Kim 已提交
678 679
	struct f2fs_orphan_block *orphan_blk = NULL;
	unsigned int nentries = 0;
C
Chao Yu 已提交
680
	unsigned short index = 1;
681
	unsigned short orphan_blocks;
682
	struct page *page = NULL;
J
Jaegeuk Kim 已提交
683
	struct ino_entry *orphan = NULL;
684
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
685

686
	orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
687

688 689 690 691 692
	/*
	 * we don't need to do spin_lock(&im->ino_lock) here, since all the
	 * orphan inode operations are covered under f2fs_lock_op().
	 * And, spin_lock should be avoided due to page operations below.
	 */
693
	head = &im->ino_list;
J
Jaegeuk Kim 已提交
694 695

	/* loop for each orphan inode entry and write them in Jornal block */
696 697
	list_for_each_entry(orphan, head, list) {
		if (!page) {
C
Chao Yu 已提交
698
			page = grab_meta_page(sbi, start_blk++);
699 700 701 702
			orphan_blk =
				(struct f2fs_orphan_block *)page_address(page);
			memset(orphan_blk, 0, sizeof(*orphan_blk));
		}
J
Jaegeuk Kim 已提交
703

704
		orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
J
Jaegeuk Kim 已提交
705

706
		if (nentries == F2FS_ORPHANS_PER_BLOCK) {
J
Jaegeuk Kim 已提交
707 708 709 710 711 712 713 714 715 716 717 718 719 720
			/*
			 * an orphan block is full of 1020 entries,
			 * then we need to flush current orphan blocks
			 * and bring another one in memory
			 */
			orphan_blk->blk_addr = cpu_to_le16(index);
			orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
			orphan_blk->entry_count = cpu_to_le32(nentries);
			set_page_dirty(page);
			f2fs_put_page(page, 1);
			index++;
			nentries = 0;
			page = NULL;
		}
721
	}
J
Jaegeuk Kim 已提交
722

723 724 725 726 727 728
	if (page) {
		orphan_blk->blk_addr = cpu_to_le16(index);
		orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
		orphan_blk->entry_count = cpu_to_le32(nentries);
		set_page_dirty(page);
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
729 730 731
	}
}

732 733 734
static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
		struct f2fs_checkpoint **cp_block, struct page **cp_page,
		unsigned long long *version)
J
Jaegeuk Kim 已提交
735 736
{
	unsigned long blk_size = sbi->blocksize;
737
	size_t crc_offset = 0;
J
Jaegeuk Kim 已提交
738
	__u32 crc = 0;
J
Jaegeuk Kim 已提交
739

740 741
	*cp_page = get_meta_page(sbi, cp_addr);
	*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
J
Jaegeuk Kim 已提交
742

743
	crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
744
	if (crc_offset > (blk_size - sizeof(__le32))) {
745 746 747 748
		f2fs_msg(sbi->sb, KERN_WARNING,
			"invalid crc_offset: %zu", crc_offset);
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
749

750
	crc = cur_cp_crc(*cp_block);
751 752 753 754
	if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
		f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
755

756 757 758
	*version = cur_cp_version(*cp_block);
	return 0;
}
J
Jaegeuk Kim 已提交
759

760 761 762 763 764 765 766
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
				block_t cp_addr, unsigned long long *version)
{
	struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
	struct f2fs_checkpoint *cp_block = NULL;
	unsigned long long cur_version = 0, pre_version = 0;
	int err;
J
Jaegeuk Kim 已提交
767

768 769 770 771 772
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_1, version);
	if (err)
		goto invalid_cp1;
	pre_version = *version;
J
Jaegeuk Kim 已提交
773

774 775 776 777
	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_2, version);
	if (err)
J
Jaegeuk Kim 已提交
778
		goto invalid_cp2;
779
	cur_version = *version;
J
Jaegeuk Kim 已提交
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800

	if (cur_version == pre_version) {
		*version = cur_version;
		f2fs_put_page(cp_page_2, 1);
		return cp_page_1;
	}
invalid_cp2:
	f2fs_put_page(cp_page_2, 1);
invalid_cp1:
	f2fs_put_page(cp_page_1, 1);
	return NULL;
}

int get_valid_checkpoint(struct f2fs_sb_info *sbi)
{
	struct f2fs_checkpoint *cp_block;
	struct f2fs_super_block *fsb = sbi->raw_super;
	struct page *cp1, *cp2, *cur_page;
	unsigned long blk_size = sbi->blocksize;
	unsigned long long cp1_version = 0, cp2_version = 0;
	unsigned long long cp_start_blk_no;
W
Wanpeng Li 已提交
801
	unsigned int cp_blks = 1 + __cp_payload(sbi);
C
Changman Lee 已提交
802 803
	block_t cp_blk_no;
	int i;
J
Jaegeuk Kim 已提交
804

C
Chao Yu 已提交
805
	sbi->ckpt = f2fs_kzalloc(sbi, cp_blks * blk_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
806 807 808 809 810 811 812 813 814 815
	if (!sbi->ckpt)
		return -ENOMEM;
	/*
	 * Finding out valid cp block involves read both
	 * sets( cp pack1 and cp pack 2)
	 */
	cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);

	/* The second checkpoint pack should start at the next segment */
816 817
	cp_start_blk_no += ((unsigned long long)1) <<
				le32_to_cpu(fsb->log_blocks_per_seg);
J
Jaegeuk Kim 已提交
818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835
	cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);

	if (cp1 && cp2) {
		if (ver_after(cp2_version, cp1_version))
			cur_page = cp2;
		else
			cur_page = cp1;
	} else if (cp1) {
		cur_page = cp1;
	} else if (cp2) {
		cur_page = cp2;
	} else {
		goto fail_no_cp;
	}

	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
	memcpy(sbi->ckpt, cp_block, blk_size);

836 837
	/* Sanity checking of checkpoint */
	if (sanity_check_ckpt(sbi))
838
		goto free_fail_no_cp;
839

840 841 842 843
	if (cur_page == cp1)
		sbi->cur_cp_pack = 1;
	else
		sbi->cur_cp_pack = 2;
844

C
Changman Lee 已提交
845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
	if (cp_blks <= 1)
		goto done;

	cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	if (cur_page == cp2)
		cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);

	for (i = 1; i < cp_blks; i++) {
		void *sit_bitmap_ptr;
		unsigned char *ckpt = (unsigned char *)sbi->ckpt;

		cur_page = get_meta_page(sbi, cp_blk_no + i);
		sit_bitmap_ptr = page_address(cur_page);
		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
		f2fs_put_page(cur_page, 1);
	}
done:
J
Jaegeuk Kim 已提交
862 863 864 865
	f2fs_put_page(cp1, 1);
	f2fs_put_page(cp2, 1);
	return 0;

866 867 868
free_fail_no_cp:
	f2fs_put_page(cp1, 1);
	f2fs_put_page(cp2, 1);
J
Jaegeuk Kim 已提交
869 870 871 872 873
fail_no_cp:
	kfree(sbi->ckpt);
	return -EINVAL;
}

874
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
J
Jaegeuk Kim 已提交
875
{
876
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
877
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
J
Jaegeuk Kim 已提交
878

879
	if (is_inode_flag_set(inode, flag))
880
		return;
881

882
	set_inode_flag(inode, flag);
883 884 885
	if (!f2fs_is_volatile_file(inode))
		list_add_tail(&F2FS_I(inode)->dirty_list,
						&sbi->inode_list[type]);
C
Chao Yu 已提交
886
	stat_inc_dirty_inode(sbi, type);
887 888
}

889
static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
C
Chao Yu 已提交
890
{
891
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
C
Chao Yu 已提交
892

893
	if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag))
C
Chao Yu 已提交
894 895
		return;

896 897
	list_del_init(&F2FS_I(inode)->dirty_list);
	clear_inode_flag(inode, flag);
C
Chao Yu 已提交
898
	stat_dec_dirty_inode(F2FS_I_SB(inode), type);
C
Chao Yu 已提交
899 900
}

901
void update_dirty_page(struct inode *inode, struct page *page)
902
{
903
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
904
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
905

906 907
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
908
		return;
909

910 911
	spin_lock(&sbi->inode_lock[type]);
	if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH))
912
		__add_dirty_inode(inode, type);
913
	inode_inc_dirty_pages(inode);
914 915
	spin_unlock(&sbi->inode_lock[type]);

916
	SetPagePrivate(page);
J
Jaegeuk Kim 已提交
917
	f2fs_trace_pid(page);
918 919
}

920
void remove_dirty_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
921
{
922
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
923
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
J
Jaegeuk Kim 已提交
924

925 926
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
927 928
		return;

929 930 931
	if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH))
		return;

932 933 934
	spin_lock(&sbi->inode_lock[type]);
	__remove_dirty_inode(inode, type);
	spin_unlock(&sbi->inode_lock[type]);
935 936
}

C
Chao Yu 已提交
937
int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
J
Jaegeuk Kim 已提交
938
{
939
	struct list_head *head;
J
Jaegeuk Kim 已提交
940
	struct inode *inode;
941
	struct f2fs_inode_info *fi;
942
	bool is_dir = (type == DIR_INODE);
J
Jaegeuk Kim 已提交
943
	unsigned long ino = 0;
944 945 946 947

	trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
J
Jaegeuk Kim 已提交
948
retry:
949
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
950
		return -EIO;
951

952
	spin_lock(&sbi->inode_lock[type]);
953

954
	head = &sbi->inode_list[type];
J
Jaegeuk Kim 已提交
955
	if (list_empty(head)) {
956
		spin_unlock(&sbi->inode_lock[type]);
957 958 959
		trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
C
Chao Yu 已提交
960
		return 0;
J
Jaegeuk Kim 已提交
961
	}
962
	fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
963
	inode = igrab(&fi->vfs_inode);
964
	spin_unlock(&sbi->inode_lock[type]);
J
Jaegeuk Kim 已提交
965
	if (inode) {
J
Jaegeuk Kim 已提交
966 967
		unsigned long cur_ino = inode->i_ino;

C
Chao Yu 已提交
968 969 970
		if (is_dir)
			F2FS_I(inode)->cp_task = current;

971
		filemap_fdatawrite(inode->i_mapping);
C
Chao Yu 已提交
972 973 974 975

		if (is_dir)
			F2FS_I(inode)->cp_task = NULL;

J
Jaegeuk Kim 已提交
976
		iput(inode);
J
Jaegeuk Kim 已提交
977 978 979 980 981 982 983
		/* We need to give cpu to another writers. */
		if (ino == cur_ino) {
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			cond_resched();
		} else {
			ino = cur_ino;
		}
J
Jaegeuk Kim 已提交
984 985 986 987 988
	} else {
		/*
		 * We should submit bio, since it exists several
		 * wribacking dentry pages in the freeing inode.
		 */
989
		f2fs_submit_merged_write(sbi, DATA);
990
		cond_resched();
J
Jaegeuk Kim 已提交
991 992 993 994
	}
	goto retry;
}

995 996 997 998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010
int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
{
	struct list_head *head = &sbi->inode_list[DIRTY_META];
	struct inode *inode;
	struct f2fs_inode_info *fi;
	s64 total = get_pages(sbi, F2FS_DIRTY_IMETA);

	while (total--) {
		if (unlikely(f2fs_cp_error(sbi)))
			return -EIO;

		spin_lock(&sbi->inode_lock[DIRTY_META]);
		if (list_empty(head)) {
			spin_unlock(&sbi->inode_lock[DIRTY_META]);
			return 0;
		}
1011
		fi = list_first_entry(head, struct f2fs_inode_info,
1012 1013 1014 1015
							gdirty_list);
		inode = igrab(&fi->vfs_inode);
		spin_unlock(&sbi->inode_lock[DIRTY_META]);
		if (inode) {
1016 1017 1018 1019 1020
			sync_inode_metadata(inode, 0);

			/* it's on eviction */
			if (is_inode_flag_set(inode, FI_DIRTY_INODE))
				update_inode_page(inode);
1021 1022
			iput(inode);
		}
C
Chao Yu 已提交
1023
	}
1024 1025 1026
	return 0;
}

1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
static void __prepare_cp_block(struct f2fs_sb_info *sbi)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	nid_t last_nid = nm_i->next_scan_nid;

	next_free_nid(sbi, &last_nid);
	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
	ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
	ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
	ckpt->next_free_nid = cpu_to_le32(last_nid);
}

J
Jaegeuk Kim 已提交
1040
/*
J
Jaegeuk Kim 已提交
1041 1042
 * Freeze all the FS-operations for checkpoint.
 */
1043
static int block_operations(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
1044 1045 1046 1047 1048 1049
{
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = LONG_MAX,
		.for_reclaim = 0,
	};
1050
	struct blk_plug plug;
1051
	int err = 0;
1052 1053 1054

	blk_start_plug(&plug);

1055
retry_flush_dents:
1056
	f2fs_lock_all(sbi);
J
Jaegeuk Kim 已提交
1057 1058
	/* write all the dirty dentry pages */
	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
1059
		f2fs_unlock_all(sbi);
C
Chao Yu 已提交
1060 1061
		err = sync_dirty_inodes(sbi, DIR_INODE);
		if (err)
1062
			goto out;
1063
		cond_resched();
1064
		goto retry_flush_dents;
J
Jaegeuk Kim 已提交
1065 1066
	}

1067 1068 1069 1070 1071 1072
	/*
	 * POR: we should ensure that there are no dirty node pages
	 * until finishing nat/sit flush. inode->i_blocks can be updated.
	 */
	down_write(&sbi->node_change);

1073
	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
1074
		up_write(&sbi->node_change);
1075 1076 1077 1078
		f2fs_unlock_all(sbi);
		err = f2fs_sync_inode_meta(sbi);
		if (err)
			goto out;
1079
		cond_resched();
1080 1081 1082
		goto retry_flush_dents;
	}

1083
retry_flush_nodes:
1084
	down_write(&sbi->node_write);
J
Jaegeuk Kim 已提交
1085 1086

	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
1087
		up_write(&sbi->node_write);
C
Chao Yu 已提交
1088
		err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
C
Chao Yu 已提交
1089
		if (err) {
1090
			up_write(&sbi->node_change);
1091 1092 1093
			f2fs_unlock_all(sbi);
			goto out;
		}
1094
		cond_resched();
1095
		goto retry_flush_nodes;
J
Jaegeuk Kim 已提交
1096
	}
1097 1098 1099 1100 1101 1102 1103

	/*
	 * sbi->node_change is used only for AIO write_begin path which produces
	 * dirty node blocks and some checkpoint values by block allocation.
	 */
	__prepare_cp_block(sbi);
	up_write(&sbi->node_change);
1104
out:
1105
	blk_finish_plug(&plug);
1106
	return err;
J
Jaegeuk Kim 已提交
1107 1108 1109 1110
}

static void unblock_operations(struct f2fs_sb_info *sbi)
{
1111
	up_write(&sbi->node_write);
1112
	f2fs_unlock_all(sbi);
J
Jaegeuk Kim 已提交
1113 1114
}

1115 1116 1117 1118 1119 1120 1121
static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
	DEFINE_WAIT(wait);

	for (;;) {
		prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);

1122
		if (!get_pages(sbi, F2FS_WB_CP_DATA))
1123 1124
			break;

1125
		io_schedule_timeout(5*HZ);
1126 1127 1128 1129
	}
	finish_wait(&sbi->cp_wait, &wait);
}

1130 1131 1132 1133
static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1134
	unsigned long flags;
1135

1136
	spin_lock_irqsave(&sbi->cp_lock, flags);
1137

1138
	if ((cpc->reason & CP_UMOUNT) &&
1139
			le32_to_cpu(ckpt->cp_pack_total_block_count) >
1140 1141 1142
			sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
		disable_nat_bits(sbi, false);

1143 1144
	if (cpc->reason & CP_TRIMMED)
		__set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
C
Chao Yu 已提交
1145 1146
	else
		__clear_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
1147

1148
	if (cpc->reason & CP_UMOUNT)
1149 1150 1151 1152
		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);

1153
	if (cpc->reason & CP_FASTBOOT)
1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
		__set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);

	if (orphan_num)
		__set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);

	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);

	/* set this flag to activate crc|cp_ver for recovery */
	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
1168
	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
1169

1170
	spin_unlock_irqrestore(&sbi->cp_lock, flags);
1171 1172
}

1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205
static void commit_checkpoint(struct f2fs_sb_info *sbi,
	void *src, block_t blk_addr)
{
	struct writeback_control wbc = {
		.for_reclaim = 0,
	};

	/*
	 * pagevec_lookup_tag and lock_page again will take
	 * some extra time. Therefore, update_meta_pages and
	 * sync_meta_pages are combined in this function.
	 */
	struct page *page = grab_meta_page(sbi, blk_addr);
	int err;

	memcpy(page_address(page), src, PAGE_SIZE);
	set_page_dirty(page);

	f2fs_wait_on_page_writeback(page, META, true);
	f2fs_bug_on(sbi, PageWriteback(page));
	if (unlikely(!clear_page_dirty_for_io(page)))
		f2fs_bug_on(sbi, 1);

	/* writeout cp pack 2 page */
	err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
	f2fs_bug_on(sbi, err);

	f2fs_put_page(page, 0);

	/* submit checkpoint (with barrier if NOBARRIER is not set) */
	f2fs_submit_merged_write(sbi, META_FLUSH);
}

C
Chao Yu 已提交
1206
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1207 1208
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1209
	struct f2fs_nm_info *nm_i = NM_I(sbi);
1210
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags;
J
Jaegeuk Kim 已提交
1211 1212
	block_t start_blk;
	unsigned int data_sum_blocks, orphan_blocks;
J
Jaegeuk Kim 已提交
1213
	__u32 crc32 = 0;
J
Jaegeuk Kim 已提交
1214
	int i;
W
Wanpeng Li 已提交
1215
	int cp_payload_blks = __cp_payload(sbi);
1216 1217 1218
	struct super_block *sb = sbi->sb;
	struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
	u64 kbytes_written;
1219
	int err;
J
Jaegeuk Kim 已提交
1220 1221

	/* Flush all the NAT/SIT pages */
1222
	while (get_pages(sbi, F2FS_DIRTY_META)) {
C
Chao Yu 已提交
1223
		sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
1224
		if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1225
			return -EIO;
1226
	}
J
Jaegeuk Kim 已提交
1227 1228 1229 1230 1231 1232 1233

	/*
	 * modify checkpoint
	 * version number is already updated
	 */
	ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
	ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
C
Chao Yu 已提交
1234
	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
J
Jaegeuk Kim 已提交
1235 1236 1237 1238 1239 1240 1241
		ckpt->cur_node_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
		ckpt->cur_node_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
		ckpt->alloc_type[i + CURSEG_HOT_NODE] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
	}
C
Chao Yu 已提交
1242
	for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
J
Jaegeuk Kim 已提交
1243 1244 1245 1246 1247 1248 1249 1250 1251
		ckpt->cur_data_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
		ckpt->cur_data_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
		ckpt->alloc_type[i + CURSEG_HOT_DATA] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
	}

	/* 2 cp  + n data seg summary + orphan inode blocks */
1252
	data_sum_blocks = npages_for_summary_flush(sbi, false);
1253
	spin_lock_irqsave(&sbi->cp_lock, flags);
C
Chao Yu 已提交
1254
	if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
1255
		__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
J
Jaegeuk Kim 已提交
1256
	else
1257
		__clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
1258
	spin_unlock_irqrestore(&sbi->cp_lock, flags);
J
Jaegeuk Kim 已提交
1259

1260
	orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
C
Changman Lee 已提交
1261 1262
	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
			orphan_blocks);
J
Jaegeuk Kim 已提交
1263

1264
	if (__remain_node_summaries(cpc->reason))
C
Chao Yu 已提交
1265
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
C
Changman Lee 已提交
1266 1267
				cp_payload_blks + data_sum_blocks +
				orphan_blocks + NR_CURSEG_NODE_TYPE);
1268
	else
C
Chao Yu 已提交
1269
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
C
Changman Lee 已提交
1270 1271
				cp_payload_blks + data_sum_blocks +
				orphan_blocks);
1272

1273 1274
	/* update ckpt flag for checkpoint */
	update_ckpt_flags(sbi, cpc);
1275

J
Jaegeuk Kim 已提交
1276 1277 1278 1279
	/* update SIT/NAT bitmap */
	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));

K
Keith Mok 已提交
1280
	crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
J
Jaegeuk Kim 已提交
1281 1282
	*((__le32 *)((unsigned char *)ckpt +
				le32_to_cpu(ckpt->checksum_offset)))
J
Jaegeuk Kim 已提交
1283 1284
				= cpu_to_le32(crc32);

1285
	start_blk = __start_cp_next_addr(sbi);
J
Jaegeuk Kim 已提交
1286

1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301
	/* write nat bits */
	if (enabled_nat_bits(sbi, cpc)) {
		__u64 cp_ver = cur_cp_version(ckpt);
		block_t blk;

		cp_ver |= ((__u64)crc32 << 32);
		*(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);

		blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
		for (i = 0; i < nm_i->nat_bits_blocks; i++)
			update_meta_page(sbi, nm_i->nat_bits +
					(i << F2FS_BLKSIZE_BITS), blk + i);

		/* Flush all the NAT BITS pages */
		while (get_pages(sbi, F2FS_DIRTY_META)) {
C
Chao Yu 已提交
1302
			sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
1303 1304 1305 1306 1307
			if (unlikely(f2fs_cp_error(sbi)))
				return -EIO;
		}
	}

J
Jaegeuk Kim 已提交
1308
	/* write out checkpoint buffer at block 0 */
C
Chao Yu 已提交
1309 1310 1311 1312 1313
	update_meta_page(sbi, ckpt, start_blk++);

	for (i = 1; i < 1 + cp_payload_blks; i++)
		update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
							start_blk++);
C
Changman Lee 已提交
1314

1315
	if (orphan_num) {
J
Jaegeuk Kim 已提交
1316 1317 1318 1319 1320 1321
		write_orphan_inodes(sbi, start_blk);
		start_blk += orphan_blocks;
	}

	write_data_summaries(sbi, start_blk);
	start_blk += data_sum_blocks;
1322 1323 1324 1325 1326 1327

	/* Record write statistics in the hot node summary */
	kbytes_written = sbi->kbytes_written;
	if (sb->s_bdev->bd_part)
		kbytes_written += BD_PART_WRITTEN(sbi);

1328
	seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
1329

1330
	if (__remain_node_summaries(cpc->reason)) {
J
Jaegeuk Kim 已提交
1331 1332 1333 1334
		write_node_summaries(sbi, start_blk);
		start_blk += NR_CURSEG_NODE_TYPE;
	}

1335 1336 1337
	/* update user_block_counts */
	sbi->last_valid_block_count = sbi->total_valid_block_count;
	percpu_counter_set(&sbi->alloc_valid_block_count, 0);
J
Jaegeuk Kim 已提交
1338

1339 1340 1341 1342
	/* Here, we have one bio having CP pack except cp pack 2 page */
	sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);

	/* wait for previous submitted meta pages writeback */
1343
	wait_on_all_pages_writeback(sbi);
J
Jaegeuk Kim 已提交
1344

1345
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1346
		return -EIO;
1347

1348 1349 1350 1351
	/* flush all device cache */
	err = f2fs_flush_device_cache(sbi);
	if (err)
		return err;
J
Jaegeuk Kim 已提交
1352

1353 1354
	/* barrier and flush checkpoint cp pack 2 page if it can */
	commit_checkpoint(sbi, ckpt, start_blk);
1355 1356
	wait_on_all_pages_writeback(sbi);

1357
	release_ino_entry(sbi, false);
1358 1359

	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1360
		return -EIO;
1361

1362
	clear_sbi_flag(sbi, SBI_IS_DIRTY);
1363
	clear_sbi_flag(sbi, SBI_NEED_CP);
1364
	__set_cp_next_pack(sbi);
C
Chao Yu 已提交
1365

1366 1367 1368 1369 1370 1371 1372 1373 1374 1375
	/*
	 * redirty superblock if metadata like node page or inode cache is
	 * updated during writing checkpoint.
	 */
	if (get_pages(sbi, F2FS_DIRTY_NODES) ||
			get_pages(sbi, F2FS_DIRTY_IMETA))
		set_sbi_flag(sbi, SBI_IS_DIRTY);

	f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));

C
Chao Yu 已提交
1376
	return 0;
J
Jaegeuk Kim 已提交
1377 1378
}

J
Jaegeuk Kim 已提交
1379
/*
A
arter97 已提交
1380
 * We guarantee that this checkpoint procedure will not fail.
J
Jaegeuk Kim 已提交
1381
 */
C
Chao Yu 已提交
1382
int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1383 1384 1385
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	unsigned long long ckpt_ver;
C
Chao Yu 已提交
1386
	int err = 0;
J
Jaegeuk Kim 已提交
1387

1388
	mutex_lock(&sbi->cp_mutex);
J
Jaegeuk Kim 已提交
1389

1390
	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1391 1392
		((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
		((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
J
Jaegeuk Kim 已提交
1393
		goto out;
C
Chao Yu 已提交
1394 1395
	if (unlikely(f2fs_cp_error(sbi))) {
		err = -EIO;
1396
		goto out;
C
Chao Yu 已提交
1397 1398 1399
	}
	if (f2fs_readonly(sbi->sb)) {
		err = -EROFS;
1400
		goto out;
C
Chao Yu 已提交
1401
	}
W
Wanpeng Li 已提交
1402 1403 1404

	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");

C
Chao Yu 已提交
1405 1406
	err = block_operations(sbi);
	if (err)
1407
		goto out;
J
Jaegeuk Kim 已提交
1408

1409
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
1410

1411
	f2fs_flush_merged_writes(sbi);
J
Jaegeuk Kim 已提交
1412

1413
	/* this is the case of multiple fstrims without any changes */
1414
	if (cpc->reason & CP_DISCARD) {
1415 1416 1417 1418 1419
		if (!exist_trim_candidates(sbi, cpc)) {
			unblock_operations(sbi);
			goto out;
		}

1420 1421 1422 1423 1424 1425 1426 1427
		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
				SIT_I(sbi)->dirty_sentries == 0 &&
				prefree_segments(sbi) == 0) {
			flush_sit_entries(sbi, cpc);
			clear_prefree_segments(sbi, cpc);
			unblock_operations(sbi);
			goto out;
		}
1428 1429
	}

J
Jaegeuk Kim 已提交
1430 1431 1432 1433 1434
	/*
	 * update checkpoint pack index
	 * Increase the version number so that
	 * SIT entries and seg summaries are written at correct place
	 */
1435
	ckpt_ver = cur_cp_version(ckpt);
J
Jaegeuk Kim 已提交
1436 1437 1438
	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);

	/* write cached NAT/SIT entries to NAT/SIT area */
1439
	flush_nat_entries(sbi, cpc);
1440
	flush_sit_entries(sbi, cpc);
J
Jaegeuk Kim 已提交
1441 1442

	/* unlock all the fs_lock[] in do_checkpoint() */
C
Chao Yu 已提交
1443
	err = do_checkpoint(sbi, cpc);
1444
	if (err)
1445
		release_discard_addrs(sbi);
1446
	else
1447
		clear_prefree_segments(sbi, cpc);
C
Chao Yu 已提交
1448

J
Jaegeuk Kim 已提交
1449
	unblock_operations(sbi);
1450
	stat_inc_cp_count(sbi->stat_info);
1451

1452
	if (cpc->reason & CP_RECOVERY)
1453 1454
		f2fs_msg(sbi->sb, KERN_NOTICE,
			"checkpoint: version = %llx", ckpt_ver);
1455 1456

	/* do checkpoint periodically */
1457
	f2fs_update_time(sbi, CP_TIME);
1458
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
J
Jaegeuk Kim 已提交
1459 1460
out:
	mutex_unlock(&sbi->cp_mutex);
C
Chao Yu 已提交
1461
	return err;
J
Jaegeuk Kim 已提交
1462 1463
}

J
Jaegeuk Kim 已提交
1464
void init_ino_entry_info(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
1465
{
J
Jaegeuk Kim 已提交
1466 1467 1468
	int i;

	for (i = 0; i < MAX_INO_ENTRY; i++) {
1469 1470 1471 1472 1473 1474
		struct inode_management *im = &sbi->im[i];

		INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
		spin_lock_init(&im->ino_lock);
		INIT_LIST_HEAD(&im->ino_list);
		im->ino_num = 0;
J
Jaegeuk Kim 已提交
1475 1476
	}

C
Chao Yu 已提交
1477
	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1478 1479
			NR_CURSEG_TYPE - __cp_payload(sbi)) *
				F2FS_ORPHANS_PER_BLOCK;
J
Jaegeuk Kim 已提交
1480 1481
}

1482
int __init create_checkpoint_caches(void)
J
Jaegeuk Kim 已提交
1483
{
J
Jaegeuk Kim 已提交
1484 1485 1486
	ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
			sizeof(struct ino_entry));
	if (!ino_entry_slab)
J
Jaegeuk Kim 已提交
1487
		return -ENOMEM;
1488 1489
	inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
			sizeof(struct inode_entry));
1490
	if (!inode_entry_slab) {
J
Jaegeuk Kim 已提交
1491
		kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1492 1493 1494 1495 1496 1497 1498
		return -ENOMEM;
	}
	return 0;
}

void destroy_checkpoint_caches(void)
{
J
Jaegeuk Kim 已提交
1499
	kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1500 1501
	kmem_cache_destroy(inode_entry_slab);
}