checkpoint.c 32.2 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
J
Jaegeuk Kim 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 * fs/f2fs/checkpoint.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/f2fs_fs.h>
#include <linux/pagevec.h>
#include <linux/swap.h>

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
23
#include "trace.h"
24
#include <trace/events/f2fs.h>
J
Jaegeuk Kim 已提交
25

J
Jaegeuk Kim 已提交
26
static struct kmem_cache *ino_entry_slab;
27
struct kmem_cache *inode_entry_slab;
J
Jaegeuk Kim 已提交
28

29 30
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
31
	set_ckpt_flags(sbi, CP_ERROR_FLAG);
32 33 34 35 36
	sbi->sb->s_flags |= MS_RDONLY;
	if (!end_io)
		f2fs_flush_merged_bios(sbi);
}

J
Jaegeuk Kim 已提交
37
/*
J
Jaegeuk Kim 已提交
38 39 40 41
 * We guarantee no failure on the returned page.
 */
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
G
Gu Zheng 已提交
42
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
43 44
	struct page *page = NULL;
repeat:
45
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
46 47 48 49
	if (!page) {
		cond_resched();
		goto repeat;
	}
50
	f2fs_wait_on_page_writeback(page, META, true);
51 52
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
53 54 55
	return page;
}

J
Jaegeuk Kim 已提交
56
/*
J
Jaegeuk Kim 已提交
57 58
 * We guarantee no failure on the returned page.
 */
59 60
static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
							bool is_meta)
J
Jaegeuk Kim 已提交
61
{
G
Gu Zheng 已提交
62
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
63
	struct page *page;
64
	struct f2fs_io_info fio = {
65
		.sbi = sbi,
66
		.type = META,
M
Mike Christie 已提交
67
		.op = REQ_OP_READ,
68
		.op_flags = REQ_META | REQ_PRIO,
69 70
		.old_blkaddr = index,
		.new_blkaddr = index,
71
		.encrypted_page = NULL,
72
	};
73 74

	if (unlikely(!is_meta))
M
Mike Christie 已提交
75
		fio.op_flags &= ~REQ_META;
J
Jaegeuk Kim 已提交
76
repeat:
77
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
78 79 80 81
	if (!page) {
		cond_resched();
		goto repeat;
	}
82 83 84
	if (PageUptodate(page))
		goto out;

85 86
	fio.page = page;

87 88
	if (f2fs_submit_page_bio(&fio)) {
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
89
		goto repeat;
90
	}
J
Jaegeuk Kim 已提交
91

92
	lock_page(page);
93
	if (unlikely(page->mapping != mapping)) {
94 95 96
		f2fs_put_page(page, 1);
		goto repeat;
	}
97 98 99 100 101 102 103

	/*
	 * if there is any IO error when accessing device, make our filesystem
	 * readonly and make sure do not write checkpoint with non-uptodate
	 * meta page.
	 */
	if (unlikely(!PageUptodate(page)))
104
		f2fs_stop_checkpoint(sbi, false);
105
out:
J
Jaegeuk Kim 已提交
106 107 108
	return page;
}

109 110 111 112 113 114 115 116 117 118 119
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, true);
}

/* for POR only */
struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, false);
}

120
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
121 122 123
{
	switch (type) {
	case META_NAT:
124
		break;
125
	case META_SIT:
126 127 128
		if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
			return false;
		break;
129
	case META_SSA:
130 131 132 133
		if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
			blkaddr < SM_I(sbi)->ssa_blkaddr))
			return false;
		break;
134
	case META_CP:
135 136 137 138
		if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
			blkaddr < __start_cp_addr(sbi)))
			return false;
		break;
139
	case META_POR:
140 141 142 143
		if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
			blkaddr < MAIN_BLKADDR(sbi)))
			return false;
		break;
144 145 146
	default:
		BUG();
	}
147 148

	return true;
149 150 151
}

/*
152
 * Readahead CP/NAT/SIT/SSA pages
153
 */
154 155
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
							int type, bool sync)
156 157
{
	struct page *page;
158
	block_t blkno = start;
159
	struct f2fs_io_info fio = {
160
		.sbi = sbi,
161
		.type = META,
M
Mike Christie 已提交
162
		.op = REQ_OP_READ,
163
		.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
164
		.encrypted_page = NULL,
165
	};
C
Chao Yu 已提交
166
	struct blk_plug plug;
167

168
	if (unlikely(type == META_POR))
M
Mike Christie 已提交
169
		fio.op_flags &= ~REQ_META;
170

C
Chao Yu 已提交
171
	blk_start_plug(&plug);
172 173
	for (; nrpages-- > 0; blkno++) {

174 175 176
		if (!is_valid_blkaddr(sbi, blkno, type))
			goto out;

177 178
		switch (type) {
		case META_NAT:
179 180
			if (unlikely(blkno >=
					NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
181
				blkno = 0;
182
			/* get nat block addr */
183
			fio.new_blkaddr = current_nat_addr(sbi,
184 185 186 187
					blkno * NAT_ENTRY_PER_BLOCK);
			break;
		case META_SIT:
			/* get sit block addr */
188
			fio.new_blkaddr = current_sit_addr(sbi,
189 190
					blkno * SIT_ENTRY_PER_BLOCK);
			break;
191
		case META_SSA:
192
		case META_CP:
193
		case META_POR:
194
			fio.new_blkaddr = blkno;
195 196 197 198 199
			break;
		default:
			BUG();
		}

200 201
		page = f2fs_grab_cache_page(META_MAPPING(sbi),
						fio.new_blkaddr, false);
202 203 204 205 206 207 208
		if (!page)
			continue;
		if (PageUptodate(page)) {
			f2fs_put_page(page, 1);
			continue;
		}

209
		fio.page = page;
210
		fio.old_blkaddr = fio.new_blkaddr;
211
		f2fs_submit_page_mbio(&fio);
212 213 214 215
		f2fs_put_page(page, 0);
	}
out:
	f2fs_submit_merged_bio(sbi, META, READ);
C
Chao Yu 已提交
216
	blk_finish_plug(&plug);
217 218 219
	return blkno - start;
}

220 221 222 223 224 225
void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
{
	struct page *page;
	bool readahead = false;

	page = find_get_page(META_MAPPING(sbi), index);
226
	if (!page || !PageUptodate(page))
227 228 229 230
		readahead = true;
	f2fs_put_page(page, 0);

	if (readahead)
231
		ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
232 233
}

J
Jaegeuk Kim 已提交
234 235 236
static int f2fs_write_meta_page(struct page *page,
				struct writeback_control *wbc)
{
237
	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
J
Jaegeuk Kim 已提交
238

239 240
	trace_f2fs_writepage(page, META);

241
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
242
		goto redirty_out;
243
	if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
244
		goto redirty_out;
245
	if (unlikely(f2fs_cp_error(sbi)))
246
		goto redirty_out;
J
Jaegeuk Kim 已提交
247

248 249
	write_meta_page(sbi, page);
	dec_page_count(sbi, F2FS_DIRTY_META);
250 251 252 253

	if (wbc->for_reclaim)
		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);

254
	unlock_page(page);
255

256
	if (unlikely(f2fs_cp_error(sbi)))
257
		f2fs_submit_merged_bio(sbi, META, WRITE);
258

259
	return 0;
260 261

redirty_out:
262
	redirty_page_for_writepage(wbc, page);
263
	return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
264 265 266 267 268
}

static int f2fs_write_meta_pages(struct address_space *mapping,
				struct writeback_control *wbc)
{
269
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
270
	long diff, written;
J
Jaegeuk Kim 已提交
271

272
	/* collect a number of dirty meta pages and write together */
273 274
	if (wbc->for_kupdate ||
		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
275
		goto skip_write;
J
Jaegeuk Kim 已提交
276

Y
Yunlei He 已提交
277 278
	trace_f2fs_writepages(mapping->host, wbc, META);

J
Jaegeuk Kim 已提交
279 280
	/* if mounting is failed, skip writing node pages */
	mutex_lock(&sbi->cp_mutex);
281 282
	diff = nr_pages_to_write(sbi, META, wbc);
	written = sync_meta_pages(sbi, META, wbc->nr_to_write);
J
Jaegeuk Kim 已提交
283
	mutex_unlock(&sbi->cp_mutex);
284
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
J
Jaegeuk Kim 已提交
285
	return 0;
286 287 288

skip_write:
	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
Y
Yunlei He 已提交
289
	trace_f2fs_writepages(mapping->host, wbc, META);
290
	return 0;
J
Jaegeuk Kim 已提交
291 292 293 294 295
}

long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
						long nr_to_write)
{
G
Gu Zheng 已提交
296
	struct address_space *mapping = META_MAPPING(sbi);
297
	pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
J
Jaegeuk Kim 已提交
298 299 300 301 302
	struct pagevec pvec;
	long nwritten = 0;
	struct writeback_control wbc = {
		.for_reclaim = 0,
	};
C
Chao Yu 已提交
303
	struct blk_plug plug;
J
Jaegeuk Kim 已提交
304 305 306

	pagevec_init(&pvec, 0);

C
Chao Yu 已提交
307 308
	blk_start_plug(&plug);

J
Jaegeuk Kim 已提交
309 310 311 312 313
	while (index <= end) {
		int i, nr_pages;
		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
				PAGECACHE_TAG_DIRTY,
				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
314
		if (unlikely(nr_pages == 0))
J
Jaegeuk Kim 已提交
315 316 317 318
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
319

320
			if (prev == ULONG_MAX)
321 322 323 324 325 326
				prev = page->index - 1;
			if (nr_to_write != LONG_MAX && page->index != prev + 1) {
				pagevec_release(&pvec);
				goto stop;
			}

J
Jaegeuk Kim 已提交
327
			lock_page(page);
328 329 330 331 332 333 334 335 336 337 338

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}
			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

339 340 341
			f2fs_wait_on_page_writeback(page, META, true);

			BUG_ON(PageWriteback(page));
342 343 344
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

345
			if (mapping->a_ops->writepage(page, &wbc)) {
346 347 348
				unlock_page(page);
				break;
			}
349
			nwritten++;
350
			prev = page->index;
351
			if (unlikely(nwritten >= nr_to_write))
J
Jaegeuk Kim 已提交
352 353 354 355 356
				break;
		}
		pagevec_release(&pvec);
		cond_resched();
	}
357
stop:
J
Jaegeuk Kim 已提交
358
	if (nwritten)
J
Jaegeuk Kim 已提交
359
		f2fs_submit_merged_bio(sbi, type, WRITE);
J
Jaegeuk Kim 已提交
360

C
Chao Yu 已提交
361 362
	blk_finish_plug(&plug);

J
Jaegeuk Kim 已提交
363 364 365 366 367
	return nwritten;
}

static int f2fs_set_meta_page_dirty(struct page *page)
{
368 369
	trace_f2fs_set_page_dirty(page, META);

370 371
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
372
	if (!PageDirty(page)) {
373
		f2fs_set_page_dirty_nobuffers(page);
374
		inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
375
		SetPagePrivate(page);
J
Jaegeuk Kim 已提交
376
		f2fs_trace_pid(page);
J
Jaegeuk Kim 已提交
377 378 379 380 381 382 383 384 385
		return 1;
	}
	return 0;
}

const struct address_space_operations f2fs_meta_aops = {
	.writepage	= f2fs_write_meta_page,
	.writepages	= f2fs_write_meta_pages,
	.set_page_dirty	= f2fs_set_meta_page_dirty,
386 387
	.invalidatepage = f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
388 389 390
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
J
Jaegeuk Kim 已提交
391 392
};

J
Jaegeuk Kim 已提交
393
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
394
{
395
	struct inode_management *im = &sbi->im[type];
396 397 398
	struct ino_entry *e, *tmp;

	tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
399
retry:
400
	radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
401

402 403
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
404
	if (!e) {
405
		e = tmp;
406 407
		if (radix_tree_insert(&im->ino_root, ino, e)) {
			spin_unlock(&im->ino_lock);
408
			radix_tree_preload_end();
409 410 411 412
			goto retry;
		}
		memset(e, 0, sizeof(struct ino_entry));
		e->ino = ino;
413

414
		list_add_tail(&e->list, &im->ino_list);
415
		if (type != ORPHAN_INO)
416
			im->ino_num++;
417
	}
418
	spin_unlock(&im->ino_lock);
419
	radix_tree_preload_end();
420 421 422

	if (e != tmp)
		kmem_cache_free(ino_entry_slab, tmp);
423 424
}

J
Jaegeuk Kim 已提交
425
static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
426
{
427
	struct inode_management *im = &sbi->im[type];
J
Jaegeuk Kim 已提交
428
	struct ino_entry *e;
429

430 431
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
432 433
	if (e) {
		list_del(&e->list);
434 435 436
		radix_tree_delete(&im->ino_root, ino);
		im->ino_num--;
		spin_unlock(&im->ino_lock);
437 438
		kmem_cache_free(ino_entry_slab, e);
		return;
439
	}
440
	spin_unlock(&im->ino_lock);
441 442
}

443
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
444 445 446 447 448
{
	/* add new dirty ino entry into list */
	__add_ino_entry(sbi, ino, type);
}

449
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
450 451 452 453 454 455 456 457
{
	/* remove dirty ino entry from list */
	__remove_ino_entry(sbi, ino, type);
}

/* mode should be APPEND_INO or UPDATE_INO */
bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
458
	struct inode_management *im = &sbi->im[mode];
459
	struct ino_entry *e;
460 461 462 463

	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
	spin_unlock(&im->ino_lock);
464 465 466
	return e ? true : false;
}

467
void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
468 469 470 471
{
	struct ino_entry *e, *tmp;
	int i;

472
	for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
473 474 475 476
		struct inode_management *im = &sbi->im[i];

		spin_lock(&im->ino_lock);
		list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
477
			list_del(&e->list);
478
			radix_tree_delete(&im->ino_root, e->ino);
479
			kmem_cache_free(ino_entry_slab, e);
480
			im->ino_num--;
481
		}
482
		spin_unlock(&im->ino_lock);
483 484 485
	}
}

J
Jaegeuk Kim 已提交
486
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
487
{
488
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
489 490
	int err = 0;

491
	spin_lock(&im->ino_lock);
J
Jaegeuk Kim 已提交
492 493

#ifdef CONFIG_F2FS_FAULT_INJECTION
494
	if (time_to_inject(sbi, FAULT_ORPHAN)) {
J
Jaegeuk Kim 已提交
495 496 497 498
		spin_unlock(&im->ino_lock);
		return -ENOSPC;
	}
#endif
499
	if (unlikely(im->ino_num >= sbi->max_orphans))
J
Jaegeuk Kim 已提交
500
		err = -ENOSPC;
J
Jaegeuk Kim 已提交
501
	else
502 503
		im->ino_num++;
	spin_unlock(&im->ino_lock);
504

J
Jaegeuk Kim 已提交
505 506 507
	return err;
}

J
Jaegeuk Kim 已提交
508 509
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
510 511 512 513 514 515
	struct inode_management *im = &sbi->im[ORPHAN_INO];

	spin_lock(&im->ino_lock);
	f2fs_bug_on(sbi, im->ino_num == 0);
	im->ino_num--;
	spin_unlock(&im->ino_lock);
J
Jaegeuk Kim 已提交
516 517
}

518
void add_orphan_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
519
{
520
	/* add new orphan ino entry into list */
521 522
	__add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO);
	update_inode_page(inode);
J
Jaegeuk Kim 已提交
523 524 525 526
}

void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
527
	/* remove orphan entry from orphan list */
J
Jaegeuk Kim 已提交
528
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
J
Jaegeuk Kim 已提交
529 530
}

531
static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
J
Jaegeuk Kim 已提交
532
{
533
	struct inode *inode;
534
	struct node_info ni;
535 536 537 538 539 540 541 542 543 544 545
	int err = acquire_orphan_inode(sbi);

	if (err) {
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		f2fs_msg(sbi->sb, KERN_WARNING,
				"%s: orphan failed (ino=%x), run fsck to fix.",
				__func__, ino);
		return err;
	}

	__add_ino_entry(sbi, ino, ORPHAN_INO);
546

547
	inode = f2fs_iget_retry(sbi->sb, ino);
548 549 550 551 552 553 554 555 556
	if (IS_ERR(inode)) {
		/*
		 * there should be a bug that we can't find the entry
		 * to orphan inode.
		 */
		f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
		return PTR_ERR(inode);
	}

J
Jaegeuk Kim 已提交
557 558 559 560
	clear_nlink(inode);

	/* truncate all the data during iput */
	iput(inode);
561 562 563 564 565

	get_node_info(sbi, ino, &ni);

	/* ENOMEM was fully retried in f2fs_evict_inode. */
	if (ni.blk_addr != NULL_ADDR) {
566 567 568 569 570
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		f2fs_msg(sbi->sb, KERN_WARNING,
			"%s: orphan failed (ino=%x), run fsck to fix.",
				__func__, ino);
		return -EIO;
571
	}
572
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
573
	return 0;
J
Jaegeuk Kim 已提交
574 575
}

576
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
577
{
578
	block_t start_blk, orphan_blocks, i, j;
579
	int err;
J
Jaegeuk Kim 已提交
580

581
	if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
582
		return 0;
J
Jaegeuk Kim 已提交
583

W
Wanpeng Li 已提交
584
	start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
585
	orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
J
Jaegeuk Kim 已提交
586

587
	ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
588

589
	for (i = 0; i < orphan_blocks; i++) {
J
Jaegeuk Kim 已提交
590 591 592 593 594 595
		struct page *page = get_meta_page(sbi, start_blk + i);
		struct f2fs_orphan_block *orphan_blk;

		orphan_blk = (struct f2fs_orphan_block *)page_address(page);
		for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
			nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
596 597 598 599 600
			err = recover_orphan_inode(sbi, ino);
			if (err) {
				f2fs_put_page(page, 1);
				return err;
			}
J
Jaegeuk Kim 已提交
601 602 603 604
		}
		f2fs_put_page(page, 1);
	}
	/* clear Orphan Flag */
605
	clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
606
	return 0;
J
Jaegeuk Kim 已提交
607 608 609 610
}

static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
{
611
	struct list_head *head;
J
Jaegeuk Kim 已提交
612 613
	struct f2fs_orphan_block *orphan_blk = NULL;
	unsigned int nentries = 0;
C
Chao Yu 已提交
614
	unsigned short index = 1;
615
	unsigned short orphan_blocks;
616
	struct page *page = NULL;
J
Jaegeuk Kim 已提交
617
	struct ino_entry *orphan = NULL;
618
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
619

620
	orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
621

622 623 624 625 626
	/*
	 * we don't need to do spin_lock(&im->ino_lock) here, since all the
	 * orphan inode operations are covered under f2fs_lock_op().
	 * And, spin_lock should be avoided due to page operations below.
	 */
627
	head = &im->ino_list;
J
Jaegeuk Kim 已提交
628 629

	/* loop for each orphan inode entry and write them in Jornal block */
630 631
	list_for_each_entry(orphan, head, list) {
		if (!page) {
C
Chao Yu 已提交
632
			page = grab_meta_page(sbi, start_blk++);
633 634 635 636
			orphan_blk =
				(struct f2fs_orphan_block *)page_address(page);
			memset(orphan_blk, 0, sizeof(*orphan_blk));
		}
J
Jaegeuk Kim 已提交
637

638
		orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
J
Jaegeuk Kim 已提交
639

640
		if (nentries == F2FS_ORPHANS_PER_BLOCK) {
J
Jaegeuk Kim 已提交
641 642 643 644 645 646 647 648 649 650 651 652 653 654
			/*
			 * an orphan block is full of 1020 entries,
			 * then we need to flush current orphan blocks
			 * and bring another one in memory
			 */
			orphan_blk->blk_addr = cpu_to_le16(index);
			orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
			orphan_blk->entry_count = cpu_to_le32(nentries);
			set_page_dirty(page);
			f2fs_put_page(page, 1);
			index++;
			nentries = 0;
			page = NULL;
		}
655
	}
J
Jaegeuk Kim 已提交
656

657 658 659 660 661 662
	if (page) {
		orphan_blk->blk_addr = cpu_to_le16(index);
		orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
		orphan_blk->entry_count = cpu_to_le32(nentries);
		set_page_dirty(page);
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
663 664 665
	}
}

666 667 668
static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
		struct f2fs_checkpoint **cp_block, struct page **cp_page,
		unsigned long long *version)
J
Jaegeuk Kim 已提交
669 670
{
	unsigned long blk_size = sbi->blocksize;
671
	size_t crc_offset = 0;
J
Jaegeuk Kim 已提交
672
	__u32 crc = 0;
J
Jaegeuk Kim 已提交
673

674 675
	*cp_page = get_meta_page(sbi, cp_addr);
	*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
J
Jaegeuk Kim 已提交
676

677 678 679 680 681 682
	crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
	if (crc_offset >= blk_size) {
		f2fs_msg(sbi->sb, KERN_WARNING,
			"invalid crc_offset: %zu", crc_offset);
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
683

684 685 686 687 688 689
	crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
							+ crc_offset)));
	if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
		f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
690

691 692 693
	*version = cur_cp_version(*cp_block);
	return 0;
}
J
Jaegeuk Kim 已提交
694

695 696 697 698 699 700 701
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
				block_t cp_addr, unsigned long long *version)
{
	struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
	struct f2fs_checkpoint *cp_block = NULL;
	unsigned long long cur_version = 0, pre_version = 0;
	int err;
J
Jaegeuk Kim 已提交
702

703 704 705 706 707
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_1, version);
	if (err)
		goto invalid_cp1;
	pre_version = *version;
J
Jaegeuk Kim 已提交
708

709 710 711 712
	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_2, version);
	if (err)
J
Jaegeuk Kim 已提交
713
		goto invalid_cp2;
714
	cur_version = *version;
J
Jaegeuk Kim 已提交
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735

	if (cur_version == pre_version) {
		*version = cur_version;
		f2fs_put_page(cp_page_2, 1);
		return cp_page_1;
	}
invalid_cp2:
	f2fs_put_page(cp_page_2, 1);
invalid_cp1:
	f2fs_put_page(cp_page_1, 1);
	return NULL;
}

int get_valid_checkpoint(struct f2fs_sb_info *sbi)
{
	struct f2fs_checkpoint *cp_block;
	struct f2fs_super_block *fsb = sbi->raw_super;
	struct page *cp1, *cp2, *cur_page;
	unsigned long blk_size = sbi->blocksize;
	unsigned long long cp1_version = 0, cp2_version = 0;
	unsigned long long cp_start_blk_no;
W
Wanpeng Li 已提交
736
	unsigned int cp_blks = 1 + __cp_payload(sbi);
C
Changman Lee 已提交
737 738
	block_t cp_blk_no;
	int i;
J
Jaegeuk Kim 已提交
739

C
Changman Lee 已提交
740
	sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
741 742 743 744 745 746 747 748 749 750
	if (!sbi->ckpt)
		return -ENOMEM;
	/*
	 * Finding out valid cp block involves read both
	 * sets( cp pack1 and cp pack 2)
	 */
	cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);

	/* The second checkpoint pack should start at the next segment */
751 752
	cp_start_blk_no += ((unsigned long long)1) <<
				le32_to_cpu(fsb->log_blocks_per_seg);
J
Jaegeuk Kim 已提交
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
	cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);

	if (cp1 && cp2) {
		if (ver_after(cp2_version, cp1_version))
			cur_page = cp2;
		else
			cur_page = cp1;
	} else if (cp1) {
		cur_page = cp1;
	} else if (cp2) {
		cur_page = cp2;
	} else {
		goto fail_no_cp;
	}

	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
	memcpy(sbi->ckpt, cp_block, blk_size);

771 772
	/* Sanity checking of checkpoint */
	if (sanity_check_ckpt(sbi))
773
		goto free_fail_no_cp;
774

775 776 777 778
	if (cur_page == cp1)
		sbi->cur_cp_pack = 1;
	else
		sbi->cur_cp_pack = 2;
779

C
Changman Lee 已提交
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796
	if (cp_blks <= 1)
		goto done;

	cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	if (cur_page == cp2)
		cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);

	for (i = 1; i < cp_blks; i++) {
		void *sit_bitmap_ptr;
		unsigned char *ckpt = (unsigned char *)sbi->ckpt;

		cur_page = get_meta_page(sbi, cp_blk_no + i);
		sit_bitmap_ptr = page_address(cur_page);
		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
		f2fs_put_page(cur_page, 1);
	}
done:
J
Jaegeuk Kim 已提交
797 798 799 800
	f2fs_put_page(cp1, 1);
	f2fs_put_page(cp2, 1);
	return 0;

801 802 803
free_fail_no_cp:
	f2fs_put_page(cp1, 1);
	f2fs_put_page(cp2, 1);
J
Jaegeuk Kim 已提交
804 805 806 807 808
fail_no_cp:
	kfree(sbi->ckpt);
	return -EINVAL;
}

809
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
J
Jaegeuk Kim 已提交
810
{
811
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
812
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
J
Jaegeuk Kim 已提交
813

814
	if (is_inode_flag_set(inode, flag))
815
		return;
816

817 818
	set_inode_flag(inode, flag);
	list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
C
Chao Yu 已提交
819
	stat_inc_dirty_inode(sbi, type);
820 821
}

822
static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
C
Chao Yu 已提交
823
{
824
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
C
Chao Yu 已提交
825

826
	if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag))
C
Chao Yu 已提交
827 828
		return;

829 830
	list_del_init(&F2FS_I(inode)->dirty_list);
	clear_inode_flag(inode, flag);
C
Chao Yu 已提交
831
	stat_dec_dirty_inode(F2FS_I_SB(inode), type);
C
Chao Yu 已提交
832 833
}

834
void update_dirty_page(struct inode *inode, struct page *page)
835
{
836
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
837
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
838

839 840
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
841
		return;
842

843 844
	spin_lock(&sbi->inode_lock[type]);
	if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH))
845
		__add_dirty_inode(inode, type);
846
	inode_inc_dirty_pages(inode);
847 848
	spin_unlock(&sbi->inode_lock[type]);

849
	SetPagePrivate(page);
J
Jaegeuk Kim 已提交
850
	f2fs_trace_pid(page);
851 852
}

853
void remove_dirty_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
854
{
855
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
856
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
J
Jaegeuk Kim 已提交
857

858 859
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
860 861
		return;

862 863 864
	if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH))
		return;

865 866 867
	spin_lock(&sbi->inode_lock[type]);
	__remove_dirty_inode(inode, type);
	spin_unlock(&sbi->inode_lock[type]);
868 869
}

C
Chao Yu 已提交
870
int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
J
Jaegeuk Kim 已提交
871
{
872
	struct list_head *head;
J
Jaegeuk Kim 已提交
873
	struct inode *inode;
874
	struct f2fs_inode_info *fi;
875 876 877 878 879
	bool is_dir = (type == DIR_INODE);

	trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
J
Jaegeuk Kim 已提交
880
retry:
881
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
882
		return -EIO;
883

884
	spin_lock(&sbi->inode_lock[type]);
885

886
	head = &sbi->inode_list[type];
J
Jaegeuk Kim 已提交
887
	if (list_empty(head)) {
888
		spin_unlock(&sbi->inode_lock[type]);
889 890 891
		trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
C
Chao Yu 已提交
892
		return 0;
J
Jaegeuk Kim 已提交
893
	}
894
	fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
895
	inode = igrab(&fi->vfs_inode);
896
	spin_unlock(&sbi->inode_lock[type]);
J
Jaegeuk Kim 已提交
897
	if (inode) {
898
		filemap_fdatawrite(inode->i_mapping);
J
Jaegeuk Kim 已提交
899 900 901 902 903 904
		iput(inode);
	} else {
		/*
		 * We should submit bio, since it exists several
		 * wribacking dentry pages in the freeing inode.
		 */
J
Jaegeuk Kim 已提交
905
		f2fs_submit_merged_bio(sbi, DATA, WRITE);
906
		cond_resched();
J
Jaegeuk Kim 已提交
907 908 909 910
	}
	goto retry;
}

911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926
int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
{
	struct list_head *head = &sbi->inode_list[DIRTY_META];
	struct inode *inode;
	struct f2fs_inode_info *fi;
	s64 total = get_pages(sbi, F2FS_DIRTY_IMETA);

	while (total--) {
		if (unlikely(f2fs_cp_error(sbi)))
			return -EIO;

		spin_lock(&sbi->inode_lock[DIRTY_META]);
		if (list_empty(head)) {
			spin_unlock(&sbi->inode_lock[DIRTY_META]);
			return 0;
		}
927
		fi = list_first_entry(head, struct f2fs_inode_info,
928 929 930 931
							gdirty_list);
		inode = igrab(&fi->vfs_inode);
		spin_unlock(&sbi->inode_lock[DIRTY_META]);
		if (inode) {
932 933 934 935 936
			sync_inode_metadata(inode, 0);

			/* it's on eviction */
			if (is_inode_flag_set(inode, FI_DIRTY_INODE))
				update_inode_page(inode);
937 938 939 940 941 942
			iput(inode);
		}
	};
	return 0;
}

J
Jaegeuk Kim 已提交
943
/*
J
Jaegeuk Kim 已提交
944 945
 * Freeze all the FS-operations for checkpoint.
 */
946
static int block_operations(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
947 948 949 950 951 952
{
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = LONG_MAX,
		.for_reclaim = 0,
	};
953
	struct blk_plug plug;
954
	int err = 0;
955 956 957

	blk_start_plug(&plug);

958
retry_flush_dents:
959
	f2fs_lock_all(sbi);
J
Jaegeuk Kim 已提交
960 961
	/* write all the dirty dentry pages */
	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
962
		f2fs_unlock_all(sbi);
C
Chao Yu 已提交
963 964
		err = sync_dirty_inodes(sbi, DIR_INODE);
		if (err)
965
			goto out;
966
		goto retry_flush_dents;
J
Jaegeuk Kim 已提交
967 968
	}

969 970 971 972 973 974 975 976
	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
		f2fs_unlock_all(sbi);
		err = f2fs_sync_inode_meta(sbi);
		if (err)
			goto out;
		goto retry_flush_dents;
	}

J
Jaegeuk Kim 已提交
977
	/*
A
arter97 已提交
978
	 * POR: we should ensure that there are no dirty node pages
J
Jaegeuk Kim 已提交
979 980
	 * until finishing nat/sit flush.
	 */
981
retry_flush_nodes:
982
	down_write(&sbi->node_write);
J
Jaegeuk Kim 已提交
983 984

	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
985
		up_write(&sbi->node_write);
986
		err = sync_node_pages(sbi, &wbc);
C
Chao Yu 已提交
987
		if (err) {
988 989 990
			f2fs_unlock_all(sbi);
			goto out;
		}
991
		goto retry_flush_nodes;
J
Jaegeuk Kim 已提交
992
	}
993
out:
994
	blk_finish_plug(&plug);
995
	return err;
J
Jaegeuk Kim 已提交
996 997 998 999
}

static void unblock_operations(struct f2fs_sb_info *sbi)
{
1000
	up_write(&sbi->node_write);
1001

1002
	build_free_nids(sbi, false);
1003
	f2fs_unlock_all(sbi);
J
Jaegeuk Kim 已提交
1004 1005
}

1006 1007 1008 1009 1010 1011 1012
static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
	DEFINE_WAIT(wait);

	for (;;) {
		prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);

1013
		if (!get_pages(sbi, F2FS_WB_CP_DATA))
1014 1015
			break;

1016
		io_schedule_timeout(5*HZ);
1017 1018 1019 1020
	}
	finish_wait(&sbi->cp_wait, &wait);
}

1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050 1051
static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);

	spin_lock(&sbi->cp_lock);

	if (cpc->reason == CP_UMOUNT)
		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);

	if (cpc->reason == CP_FASTBOOT)
		__set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);

	if (orphan_num)
		__set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);

	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);

	/* set this flag to activate crc|cp_ver for recovery */
	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);

	spin_unlock(&sbi->cp_lock);
}

C
Chao Yu 已提交
1052
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1053 1054
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1055
	struct f2fs_nm_info *nm_i = NM_I(sbi);
1056
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
1057
	nid_t last_nid = nm_i->next_scan_nid;
J
Jaegeuk Kim 已提交
1058 1059
	block_t start_blk;
	unsigned int data_sum_blocks, orphan_blocks;
J
Jaegeuk Kim 已提交
1060
	__u32 crc32 = 0;
J
Jaegeuk Kim 已提交
1061
	int i;
W
Wanpeng Li 已提交
1062
	int cp_payload_blks = __cp_payload(sbi);
1063 1064 1065
	struct super_block *sb = sbi->sb;
	struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
	u64 kbytes_written;
J
Jaegeuk Kim 已提交
1066 1067

	/* Flush all the NAT/SIT pages */
1068
	while (get_pages(sbi, F2FS_DIRTY_META)) {
J
Jaegeuk Kim 已提交
1069
		sync_meta_pages(sbi, META, LONG_MAX);
1070
		if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1071
			return -EIO;
1072
	}
J
Jaegeuk Kim 已提交
1073 1074 1075 1076 1077 1078 1079 1080 1081 1082

	next_free_nid(sbi, &last_nid);

	/*
	 * modify checkpoint
	 * version number is already updated
	 */
	ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
	ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
C
Chao Yu 已提交
1083
	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
J
Jaegeuk Kim 已提交
1084 1085 1086 1087 1088 1089 1090
		ckpt->cur_node_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
		ckpt->cur_node_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
		ckpt->alloc_type[i + CURSEG_HOT_NODE] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
	}
C
Chao Yu 已提交
1091
	for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
J
Jaegeuk Kim 已提交
1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
		ckpt->cur_data_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
		ckpt->cur_data_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
		ckpt->alloc_type[i + CURSEG_HOT_DATA] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
	}

	ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
	ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
	ckpt->next_free_nid = cpu_to_le32(last_nid);

	/* 2 cp  + n data seg summary + orphan inode blocks */
1105
	data_sum_blocks = npages_for_summary_flush(sbi, false);
1106
	spin_lock(&sbi->cp_lock);
C
Chao Yu 已提交
1107
	if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
1108
		__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
J
Jaegeuk Kim 已提交
1109
	else
1110 1111
		__clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
	spin_unlock(&sbi->cp_lock);
J
Jaegeuk Kim 已提交
1112

1113
	orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
C
Changman Lee 已提交
1114 1115
	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
			orphan_blocks);
J
Jaegeuk Kim 已提交
1116

1117
	if (__remain_node_summaries(cpc->reason))
C
Chao Yu 已提交
1118
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
C
Changman Lee 已提交
1119 1120
				cp_payload_blks + data_sum_blocks +
				orphan_blocks + NR_CURSEG_NODE_TYPE);
1121
	else
C
Chao Yu 已提交
1122
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
C
Changman Lee 已提交
1123 1124
				cp_payload_blks + data_sum_blocks +
				orphan_blocks);
1125

1126 1127
	/* update ckpt flag for checkpoint */
	update_ckpt_flags(sbi, cpc);
1128

J
Jaegeuk Kim 已提交
1129 1130 1131 1132
	/* update SIT/NAT bitmap */
	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));

K
Keith Mok 已提交
1133
	crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
J
Jaegeuk Kim 已提交
1134 1135
	*((__le32 *)((unsigned char *)ckpt +
				le32_to_cpu(ckpt->checksum_offset)))
J
Jaegeuk Kim 已提交
1136 1137
				= cpu_to_le32(crc32);

1138
	start_blk = __start_cp_next_addr(sbi);
J
Jaegeuk Kim 已提交
1139

1140 1141 1142
	/* need to wait for end_io results */
	wait_on_all_pages_writeback(sbi);
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1143
		return -EIO;
1144

J
Jaegeuk Kim 已提交
1145
	/* write out checkpoint buffer at block 0 */
C
Chao Yu 已提交
1146 1147 1148 1149 1150
	update_meta_page(sbi, ckpt, start_blk++);

	for (i = 1; i < 1 + cp_payload_blks; i++)
		update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
							start_blk++);
C
Changman Lee 已提交
1151

1152
	if (orphan_num) {
J
Jaegeuk Kim 已提交
1153 1154 1155 1156 1157 1158
		write_orphan_inodes(sbi, start_blk);
		start_blk += orphan_blocks;
	}

	write_data_summaries(sbi, start_blk);
	start_blk += data_sum_blocks;
1159 1160 1161 1162 1163 1164

	/* Record write statistics in the hot node summary */
	kbytes_written = sbi->kbytes_written;
	if (sb->s_bdev->bd_part)
		kbytes_written += BD_PART_WRITTEN(sbi);

1165
	seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
1166

1167
	if (__remain_node_summaries(cpc->reason)) {
J
Jaegeuk Kim 已提交
1168 1169 1170 1171 1172
		write_node_summaries(sbi, start_blk);
		start_blk += NR_CURSEG_NODE_TYPE;
	}

	/* writeout checkpoint block */
C
Chao Yu 已提交
1173
	update_meta_page(sbi, ckpt, start_blk);
J
Jaegeuk Kim 已提交
1174 1175

	/* wait for previous submitted node/meta pages writeback */
1176
	wait_on_all_pages_writeback(sbi);
J
Jaegeuk Kim 已提交
1177

1178
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1179
		return -EIO;
1180

1181 1182
	filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
	filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
J
Jaegeuk Kim 已提交
1183 1184 1185

	/* update user_block_counts */
	sbi->last_valid_block_count = sbi->total_valid_block_count;
1186
	percpu_counter_set(&sbi->alloc_valid_block_count, 0);
J
Jaegeuk Kim 已提交
1187 1188

	/* Here, we only have one bio having CP pack */
1189
	sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
J
Jaegeuk Kim 已提交
1190

1191 1192 1193
	/* wait for previous submitted meta pages writeback */
	wait_on_all_pages_writeback(sbi);

1194
	release_ino_entry(sbi, false);
1195 1196

	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1197
		return -EIO;
1198

1199
	clear_sbi_flag(sbi, SBI_IS_DIRTY);
1200
	clear_sbi_flag(sbi, SBI_NEED_CP);
1201
	__set_cp_next_pack(sbi);
C
Chao Yu 已提交
1202

1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
	/*
	 * redirty superblock if metadata like node page or inode cache is
	 * updated during writing checkpoint.
	 */
	if (get_pages(sbi, F2FS_DIRTY_NODES) ||
			get_pages(sbi, F2FS_DIRTY_IMETA))
		set_sbi_flag(sbi, SBI_IS_DIRTY);

	f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));

C
Chao Yu 已提交
1213
	return 0;
J
Jaegeuk Kim 已提交
1214 1215
}

J
Jaegeuk Kim 已提交
1216
/*
A
arter97 已提交
1217
 * We guarantee that this checkpoint procedure will not fail.
J
Jaegeuk Kim 已提交
1218
 */
C
Chao Yu 已提交
1219
int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1220 1221 1222
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	unsigned long long ckpt_ver;
C
Chao Yu 已提交
1223
	int err = 0;
J
Jaegeuk Kim 已提交
1224

1225
	mutex_lock(&sbi->cp_mutex);
J
Jaegeuk Kim 已提交
1226

1227
	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1228 1229
		(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
		(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
J
Jaegeuk Kim 已提交
1230
		goto out;
C
Chao Yu 已提交
1231 1232
	if (unlikely(f2fs_cp_error(sbi))) {
		err = -EIO;
1233
		goto out;
C
Chao Yu 已提交
1234 1235 1236
	}
	if (f2fs_readonly(sbi->sb)) {
		err = -EROFS;
1237
		goto out;
C
Chao Yu 已提交
1238
	}
W
Wanpeng Li 已提交
1239 1240 1241

	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");

C
Chao Yu 已提交
1242 1243
	err = block_operations(sbi);
	if (err)
1244
		goto out;
J
Jaegeuk Kim 已提交
1245

1246
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
1247

1248
	f2fs_flush_merged_bios(sbi);
J
Jaegeuk Kim 已提交
1249

1250
	/* this is the case of multiple fstrims without any changes */
1251
	if (cpc->reason == CP_DISCARD) {
1252 1253 1254 1255 1256
		if (!exist_trim_candidates(sbi, cpc)) {
			unblock_operations(sbi);
			goto out;
		}

1257 1258 1259 1260 1261 1262 1263 1264
		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
				SIT_I(sbi)->dirty_sentries == 0 &&
				prefree_segments(sbi) == 0) {
			flush_sit_entries(sbi, cpc);
			clear_prefree_segments(sbi, cpc);
			unblock_operations(sbi);
			goto out;
		}
1265 1266
	}

J
Jaegeuk Kim 已提交
1267 1268 1269 1270 1271
	/*
	 * update checkpoint pack index
	 * Increase the version number so that
	 * SIT entries and seg summaries are written at correct place
	 */
1272
	ckpt_ver = cur_cp_version(ckpt);
J
Jaegeuk Kim 已提交
1273 1274 1275 1276
	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);

	/* write cached NAT/SIT entries to NAT/SIT area */
	flush_nat_entries(sbi);
1277
	flush_sit_entries(sbi, cpc);
J
Jaegeuk Kim 已提交
1278 1279

	/* unlock all the fs_lock[] in do_checkpoint() */
C
Chao Yu 已提交
1280
	err = do_checkpoint(sbi, cpc);
1281
	if (err)
1282
		release_discard_addrs(sbi);
1283
	else
1284
		clear_prefree_segments(sbi, cpc);
C
Chao Yu 已提交
1285

J
Jaegeuk Kim 已提交
1286
	unblock_operations(sbi);
1287
	stat_inc_cp_count(sbi->stat_info);
1288 1289 1290 1291

	if (cpc->reason == CP_RECOVERY)
		f2fs_msg(sbi->sb, KERN_NOTICE,
			"checkpoint: version = %llx", ckpt_ver);
1292 1293

	/* do checkpoint periodically */
1294
	f2fs_update_time(sbi, CP_TIME);
1295
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
J
Jaegeuk Kim 已提交
1296 1297
out:
	mutex_unlock(&sbi->cp_mutex);
C
Chao Yu 已提交
1298
	return err;
J
Jaegeuk Kim 已提交
1299 1300
}

J
Jaegeuk Kim 已提交
1301
void init_ino_entry_info(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
1302
{
J
Jaegeuk Kim 已提交
1303 1304 1305
	int i;

	for (i = 0; i < MAX_INO_ENTRY; i++) {
1306 1307 1308 1309 1310 1311
		struct inode_management *im = &sbi->im[i];

		INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
		spin_lock_init(&im->ino_lock);
		INIT_LIST_HEAD(&im->ino_list);
		im->ino_num = 0;
J
Jaegeuk Kim 已提交
1312 1313
	}

C
Chao Yu 已提交
1314
	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1315 1316
			NR_CURSEG_TYPE - __cp_payload(sbi)) *
				F2FS_ORPHANS_PER_BLOCK;
J
Jaegeuk Kim 已提交
1317 1318
}

1319
int __init create_checkpoint_caches(void)
J
Jaegeuk Kim 已提交
1320
{
J
Jaegeuk Kim 已提交
1321 1322 1323
	ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
			sizeof(struct ino_entry));
	if (!ino_entry_slab)
J
Jaegeuk Kim 已提交
1324
		return -ENOMEM;
1325 1326
	inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
			sizeof(struct inode_entry));
1327
	if (!inode_entry_slab) {
J
Jaegeuk Kim 已提交
1328
		kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1329 1330 1331 1332 1333 1334 1335
		return -ENOMEM;
	}
	return 0;
}

void destroy_checkpoint_caches(void)
{
J
Jaegeuk Kim 已提交
1336
	kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1337 1338
	kmem_cache_destroy(inode_entry_slab);
}