checkpoint.c 31.9 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
J
Jaegeuk Kim 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 * fs/f2fs/checkpoint.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/f2fs_fs.h>
#include <linux/pagevec.h>
#include <linux/swap.h>

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
23
#include "trace.h"
24
#include <trace/events/f2fs.h>
J
Jaegeuk Kim 已提交
25

J
Jaegeuk Kim 已提交
26
static struct kmem_cache *ino_entry_slab;
27
struct kmem_cache *inode_entry_slab;
J
Jaegeuk Kim 已提交
28

29 30
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
31
	set_ckpt_flags(sbi, CP_ERROR_FLAG);
32 33 34 35 36
	sbi->sb->s_flags |= MS_RDONLY;
	if (!end_io)
		f2fs_flush_merged_bios(sbi);
}

J
Jaegeuk Kim 已提交
37
/*
J
Jaegeuk Kim 已提交
38 39 40 41
 * We guarantee no failure on the returned page.
 */
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
G
Gu Zheng 已提交
42
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
43 44
	struct page *page = NULL;
repeat:
45
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
46 47 48 49
	if (!page) {
		cond_resched();
		goto repeat;
	}
50
	f2fs_wait_on_page_writeback(page, META, true);
51 52
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
53 54 55
	return page;
}

J
Jaegeuk Kim 已提交
56
/*
J
Jaegeuk Kim 已提交
57 58
 * We guarantee no failure on the returned page.
 */
59 60
static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
							bool is_meta)
J
Jaegeuk Kim 已提交
61
{
G
Gu Zheng 已提交
62
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
63
	struct page *page;
64
	struct f2fs_io_info fio = {
65
		.sbi = sbi,
66
		.type = META,
M
Mike Christie 已提交
67 68
		.op = REQ_OP_READ,
		.op_flags = READ_SYNC | REQ_META | REQ_PRIO,
69 70
		.old_blkaddr = index,
		.new_blkaddr = index,
71
		.encrypted_page = NULL,
72
	};
73 74

	if (unlikely(!is_meta))
M
Mike Christie 已提交
75
		fio.op_flags &= ~REQ_META;
J
Jaegeuk Kim 已提交
76
repeat:
77
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
78 79 80 81
	if (!page) {
		cond_resched();
		goto repeat;
	}
82 83 84
	if (PageUptodate(page))
		goto out;

85 86
	fio.page = page;

87 88
	if (f2fs_submit_page_bio(&fio)) {
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
89
		goto repeat;
90
	}
J
Jaegeuk Kim 已提交
91

92
	lock_page(page);
93
	if (unlikely(page->mapping != mapping)) {
94 95 96
		f2fs_put_page(page, 1);
		goto repeat;
	}
97 98 99 100 101 102 103

	/*
	 * if there is any IO error when accessing device, make our filesystem
	 * readonly and make sure do not write checkpoint with non-uptodate
	 * meta page.
	 */
	if (unlikely(!PageUptodate(page)))
104
		f2fs_stop_checkpoint(sbi, false);
105
out:
J
Jaegeuk Kim 已提交
106 107 108
	return page;
}

109 110 111 112 113 114 115 116 117 118 119
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, true);
}

/* for POR only */
struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, false);
}

120
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
121 122 123
{
	switch (type) {
	case META_NAT:
124
		break;
125
	case META_SIT:
126 127 128
		if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
			return false;
		break;
129
	case META_SSA:
130 131 132 133
		if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
			blkaddr < SM_I(sbi)->ssa_blkaddr))
			return false;
		break;
134
	case META_CP:
135 136 137 138
		if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
			blkaddr < __start_cp_addr(sbi)))
			return false;
		break;
139
	case META_POR:
140 141 142 143
		if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
			blkaddr < MAIN_BLKADDR(sbi)))
			return false;
		break;
144 145 146
	default:
		BUG();
	}
147 148

	return true;
149 150 151
}

/*
152
 * Readahead CP/NAT/SIT/SSA pages
153
 */
154 155
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
							int type, bool sync)
156 157
{
	struct page *page;
158
	block_t blkno = start;
159
	struct f2fs_io_info fio = {
160
		.sbi = sbi,
161
		.type = META,
M
Mike Christie 已提交
162
		.op = REQ_OP_READ,
163
		.op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD,
164
		.encrypted_page = NULL,
165
	};
C
Chao Yu 已提交
166
	struct blk_plug plug;
167

168
	if (unlikely(type == META_POR))
M
Mike Christie 已提交
169
		fio.op_flags &= ~REQ_META;
170

C
Chao Yu 已提交
171
	blk_start_plug(&plug);
172 173
	for (; nrpages-- > 0; blkno++) {

174 175 176
		if (!is_valid_blkaddr(sbi, blkno, type))
			goto out;

177 178
		switch (type) {
		case META_NAT:
179 180
			if (unlikely(blkno >=
					NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
181
				blkno = 0;
182
			/* get nat block addr */
183
			fio.new_blkaddr = current_nat_addr(sbi,
184 185 186 187
					blkno * NAT_ENTRY_PER_BLOCK);
			break;
		case META_SIT:
			/* get sit block addr */
188
			fio.new_blkaddr = current_sit_addr(sbi,
189 190
					blkno * SIT_ENTRY_PER_BLOCK);
			break;
191
		case META_SSA:
192
		case META_CP:
193
		case META_POR:
194
			fio.new_blkaddr = blkno;
195 196 197 198 199
			break;
		default:
			BUG();
		}

200 201
		page = f2fs_grab_cache_page(META_MAPPING(sbi),
						fio.new_blkaddr, false);
202 203 204 205 206 207 208
		if (!page)
			continue;
		if (PageUptodate(page)) {
			f2fs_put_page(page, 1);
			continue;
		}

209
		fio.page = page;
210
		fio.old_blkaddr = fio.new_blkaddr;
211
		f2fs_submit_page_mbio(&fio);
212 213 214 215
		f2fs_put_page(page, 0);
	}
out:
	f2fs_submit_merged_bio(sbi, META, READ);
C
Chao Yu 已提交
216
	blk_finish_plug(&plug);
217 218 219
	return blkno - start;
}

220 221 222 223 224 225
void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
{
	struct page *page;
	bool readahead = false;

	page = find_get_page(META_MAPPING(sbi), index);
226
	if (!page || !PageUptodate(page))
227 228 229 230
		readahead = true;
	f2fs_put_page(page, 0);

	if (readahead)
231
		ra_meta_pages(sbi, index, MAX_BIO_BLOCKS(sbi), META_POR, true);
232 233
}

J
Jaegeuk Kim 已提交
234 235 236
static int f2fs_write_meta_page(struct page *page,
				struct writeback_control *wbc)
{
237
	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
J
Jaegeuk Kim 已提交
238

239 240
	trace_f2fs_writepage(page, META);

241
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
242
		goto redirty_out;
243
	if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
244
		goto redirty_out;
245
	if (unlikely(f2fs_cp_error(sbi)))
246
		goto redirty_out;
J
Jaegeuk Kim 已提交
247

248 249
	write_meta_page(sbi, page);
	dec_page_count(sbi, F2FS_DIRTY_META);
250 251 252 253

	if (wbc->for_reclaim)
		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);

254
	unlock_page(page);
255

256
	if (unlikely(f2fs_cp_error(sbi)))
257
		f2fs_submit_merged_bio(sbi, META, WRITE);
258

259
	return 0;
260 261

redirty_out:
262
	redirty_page_for_writepage(wbc, page);
263
	return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
264 265 266 267 268
}

static int f2fs_write_meta_pages(struct address_space *mapping,
				struct writeback_control *wbc)
{
269
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
270
	long diff, written;
J
Jaegeuk Kim 已提交
271

272
	/* collect a number of dirty meta pages and write together */
273 274
	if (wbc->for_kupdate ||
		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
275
		goto skip_write;
J
Jaegeuk Kim 已提交
276

Y
Yunlei He 已提交
277 278
	trace_f2fs_writepages(mapping->host, wbc, META);

J
Jaegeuk Kim 已提交
279 280
	/* if mounting is failed, skip writing node pages */
	mutex_lock(&sbi->cp_mutex);
281 282
	diff = nr_pages_to_write(sbi, META, wbc);
	written = sync_meta_pages(sbi, META, wbc->nr_to_write);
J
Jaegeuk Kim 已提交
283
	mutex_unlock(&sbi->cp_mutex);
284
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
J
Jaegeuk Kim 已提交
285
	return 0;
286 287 288

skip_write:
	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
Y
Yunlei He 已提交
289
	trace_f2fs_writepages(mapping->host, wbc, META);
290
	return 0;
J
Jaegeuk Kim 已提交
291 292 293 294 295
}

long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
						long nr_to_write)
{
G
Gu Zheng 已提交
296
	struct address_space *mapping = META_MAPPING(sbi);
297
	pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
J
Jaegeuk Kim 已提交
298 299 300 301 302
	struct pagevec pvec;
	long nwritten = 0;
	struct writeback_control wbc = {
		.for_reclaim = 0,
	};
C
Chao Yu 已提交
303
	struct blk_plug plug;
J
Jaegeuk Kim 已提交
304 305 306

	pagevec_init(&pvec, 0);

C
Chao Yu 已提交
307 308
	blk_start_plug(&plug);

J
Jaegeuk Kim 已提交
309 310 311 312 313
	while (index <= end) {
		int i, nr_pages;
		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
				PAGECACHE_TAG_DIRTY,
				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
314
		if (unlikely(nr_pages == 0))
J
Jaegeuk Kim 已提交
315 316 317 318
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
319

320
			if (prev == ULONG_MAX)
321 322 323 324 325 326
				prev = page->index - 1;
			if (nr_to_write != LONG_MAX && page->index != prev + 1) {
				pagevec_release(&pvec);
				goto stop;
			}

J
Jaegeuk Kim 已提交
327
			lock_page(page);
328 329 330 331 332 333 334 335 336 337 338

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}
			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

339 340 341
			f2fs_wait_on_page_writeback(page, META, true);

			BUG_ON(PageWriteback(page));
342 343 344
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

345
			if (mapping->a_ops->writepage(page, &wbc)) {
346 347 348
				unlock_page(page);
				break;
			}
349
			nwritten++;
350
			prev = page->index;
351
			if (unlikely(nwritten >= nr_to_write))
J
Jaegeuk Kim 已提交
352 353 354 355 356
				break;
		}
		pagevec_release(&pvec);
		cond_resched();
	}
357
stop:
J
Jaegeuk Kim 已提交
358
	if (nwritten)
J
Jaegeuk Kim 已提交
359
		f2fs_submit_merged_bio(sbi, type, WRITE);
J
Jaegeuk Kim 已提交
360

C
Chao Yu 已提交
361 362
	blk_finish_plug(&plug);

J
Jaegeuk Kim 已提交
363 364 365 366 367
	return nwritten;
}

static int f2fs_set_meta_page_dirty(struct page *page)
{
368 369
	trace_f2fs_set_page_dirty(page, META);

370 371
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
372
	if (!PageDirty(page)) {
373
		f2fs_set_page_dirty_nobuffers(page);
374
		inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
375
		SetPagePrivate(page);
J
Jaegeuk Kim 已提交
376
		f2fs_trace_pid(page);
J
Jaegeuk Kim 已提交
377 378 379 380 381 382 383 384 385
		return 1;
	}
	return 0;
}

const struct address_space_operations f2fs_meta_aops = {
	.writepage	= f2fs_write_meta_page,
	.writepages	= f2fs_write_meta_pages,
	.set_page_dirty	= f2fs_set_meta_page_dirty,
386 387
	.invalidatepage = f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
388 389 390
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
J
Jaegeuk Kim 已提交
391 392
};

J
Jaegeuk Kim 已提交
393
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
394
{
395
	struct inode_management *im = &sbi->im[type];
396 397 398
	struct ino_entry *e, *tmp;

	tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
399
retry:
400
	radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
401

402 403
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
404
	if (!e) {
405
		e = tmp;
406 407
		if (radix_tree_insert(&im->ino_root, ino, e)) {
			spin_unlock(&im->ino_lock);
408
			radix_tree_preload_end();
409 410 411 412
			goto retry;
		}
		memset(e, 0, sizeof(struct ino_entry));
		e->ino = ino;
413

414
		list_add_tail(&e->list, &im->ino_list);
415
		if (type != ORPHAN_INO)
416
			im->ino_num++;
417
	}
418
	spin_unlock(&im->ino_lock);
419
	radix_tree_preload_end();
420 421 422

	if (e != tmp)
		kmem_cache_free(ino_entry_slab, tmp);
423 424
}

J
Jaegeuk Kim 已提交
425
static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
426
{
427
	struct inode_management *im = &sbi->im[type];
J
Jaegeuk Kim 已提交
428
	struct ino_entry *e;
429

430 431
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
432 433
	if (e) {
		list_del(&e->list);
434 435 436
		radix_tree_delete(&im->ino_root, ino);
		im->ino_num--;
		spin_unlock(&im->ino_lock);
437 438
		kmem_cache_free(ino_entry_slab, e);
		return;
439
	}
440
	spin_unlock(&im->ino_lock);
441 442
}

443
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
444 445 446 447 448
{
	/* add new dirty ino entry into list */
	__add_ino_entry(sbi, ino, type);
}

449
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
450 451 452 453 454 455 456 457
{
	/* remove dirty ino entry from list */
	__remove_ino_entry(sbi, ino, type);
}

/* mode should be APPEND_INO or UPDATE_INO */
bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
458
	struct inode_management *im = &sbi->im[mode];
459
	struct ino_entry *e;
460 461 462 463

	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
	spin_unlock(&im->ino_lock);
464 465 466
	return e ? true : false;
}

467
void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
468 469 470 471
{
	struct ino_entry *e, *tmp;
	int i;

472
	for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
473 474 475 476
		struct inode_management *im = &sbi->im[i];

		spin_lock(&im->ino_lock);
		list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
477
			list_del(&e->list);
478
			radix_tree_delete(&im->ino_root, e->ino);
479
			kmem_cache_free(ino_entry_slab, e);
480
			im->ino_num--;
481
		}
482
		spin_unlock(&im->ino_lock);
483 484 485
	}
}

J
Jaegeuk Kim 已提交
486
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
487
{
488
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
489 490
	int err = 0;

491
	spin_lock(&im->ino_lock);
J
Jaegeuk Kim 已提交
492 493

#ifdef CONFIG_F2FS_FAULT_INJECTION
494
	if (time_to_inject(sbi, FAULT_ORPHAN)) {
J
Jaegeuk Kim 已提交
495 496 497 498
		spin_unlock(&im->ino_lock);
		return -ENOSPC;
	}
#endif
499
	if (unlikely(im->ino_num >= sbi->max_orphans))
J
Jaegeuk Kim 已提交
500
		err = -ENOSPC;
J
Jaegeuk Kim 已提交
501
	else
502 503
		im->ino_num++;
	spin_unlock(&im->ino_lock);
504

J
Jaegeuk Kim 已提交
505 506 507
	return err;
}

J
Jaegeuk Kim 已提交
508 509
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
510 511 512 513 514 515
	struct inode_management *im = &sbi->im[ORPHAN_INO];

	spin_lock(&im->ino_lock);
	f2fs_bug_on(sbi, im->ino_num == 0);
	im->ino_num--;
	spin_unlock(&im->ino_lock);
J
Jaegeuk Kim 已提交
516 517
}

518
void add_orphan_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
519
{
520
	/* add new orphan ino entry into list */
521 522
	__add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO);
	update_inode_page(inode);
J
Jaegeuk Kim 已提交
523 524 525 526
}

void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
527
	/* remove orphan entry from orphan list */
J
Jaegeuk Kim 已提交
528
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
J
Jaegeuk Kim 已提交
529 530
}

531
static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
J
Jaegeuk Kim 已提交
532
{
533
	struct inode *inode;
534
	struct node_info ni;
535 536 537 538 539 540 541 542 543 544 545
	int err = acquire_orphan_inode(sbi);

	if (err) {
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		f2fs_msg(sbi->sb, KERN_WARNING,
				"%s: orphan failed (ino=%x), run fsck to fix.",
				__func__, ino);
		return err;
	}

	__add_ino_entry(sbi, ino, ORPHAN_INO);
546

547
	inode = f2fs_iget_retry(sbi->sb, ino);
548 549 550 551 552 553 554 555 556
	if (IS_ERR(inode)) {
		/*
		 * there should be a bug that we can't find the entry
		 * to orphan inode.
		 */
		f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
		return PTR_ERR(inode);
	}

J
Jaegeuk Kim 已提交
557 558 559 560
	clear_nlink(inode);

	/* truncate all the data during iput */
	iput(inode);
561 562 563 564 565

	get_node_info(sbi, ino, &ni);

	/* ENOMEM was fully retried in f2fs_evict_inode. */
	if (ni.blk_addr != NULL_ADDR) {
566 567 568 569 570
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		f2fs_msg(sbi->sb, KERN_WARNING,
			"%s: orphan failed (ino=%x), run fsck to fix.",
				__func__, ino);
		return -EIO;
571
	}
572
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
573
	return 0;
J
Jaegeuk Kim 已提交
574 575
}

576
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
577
{
578
	block_t start_blk, orphan_blocks, i, j;
579
	int err;
J
Jaegeuk Kim 已提交
580

581
	if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
582
		return 0;
J
Jaegeuk Kim 已提交
583

W
Wanpeng Li 已提交
584
	start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
585
	orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
J
Jaegeuk Kim 已提交
586

587
	ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
588

589
	for (i = 0; i < orphan_blocks; i++) {
J
Jaegeuk Kim 已提交
590 591 592 593 594 595
		struct page *page = get_meta_page(sbi, start_blk + i);
		struct f2fs_orphan_block *orphan_blk;

		orphan_blk = (struct f2fs_orphan_block *)page_address(page);
		for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
			nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
596 597 598 599 600
			err = recover_orphan_inode(sbi, ino);
			if (err) {
				f2fs_put_page(page, 1);
				return err;
			}
J
Jaegeuk Kim 已提交
601 602 603 604
		}
		f2fs_put_page(page, 1);
	}
	/* clear Orphan Flag */
605
	clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
606
	return 0;
J
Jaegeuk Kim 已提交
607 608 609 610
}

static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
{
611
	struct list_head *head;
J
Jaegeuk Kim 已提交
612 613
	struct f2fs_orphan_block *orphan_blk = NULL;
	unsigned int nentries = 0;
C
Chao Yu 已提交
614
	unsigned short index = 1;
615
	unsigned short orphan_blocks;
616
	struct page *page = NULL;
J
Jaegeuk Kim 已提交
617
	struct ino_entry *orphan = NULL;
618
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
619

620
	orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
621

622 623 624 625 626
	/*
	 * we don't need to do spin_lock(&im->ino_lock) here, since all the
	 * orphan inode operations are covered under f2fs_lock_op().
	 * And, spin_lock should be avoided due to page operations below.
	 */
627
	head = &im->ino_list;
J
Jaegeuk Kim 已提交
628 629

	/* loop for each orphan inode entry and write them in Jornal block */
630 631
	list_for_each_entry(orphan, head, list) {
		if (!page) {
C
Chao Yu 已提交
632
			page = grab_meta_page(sbi, start_blk++);
633 634 635 636
			orphan_blk =
				(struct f2fs_orphan_block *)page_address(page);
			memset(orphan_blk, 0, sizeof(*orphan_blk));
		}
J
Jaegeuk Kim 已提交
637

638
		orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
J
Jaegeuk Kim 已提交
639

640
		if (nentries == F2FS_ORPHANS_PER_BLOCK) {
J
Jaegeuk Kim 已提交
641 642 643 644 645 646 647 648 649 650 651 652 653 654
			/*
			 * an orphan block is full of 1020 entries,
			 * then we need to flush current orphan blocks
			 * and bring another one in memory
			 */
			orphan_blk->blk_addr = cpu_to_le16(index);
			orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
			orphan_blk->entry_count = cpu_to_le32(nentries);
			set_page_dirty(page);
			f2fs_put_page(page, 1);
			index++;
			nentries = 0;
			page = NULL;
		}
655
	}
J
Jaegeuk Kim 已提交
656

657 658 659 660 661 662
	if (page) {
		orphan_blk->blk_addr = cpu_to_le16(index);
		orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
		orphan_blk->entry_count = cpu_to_le32(nentries);
		set_page_dirty(page);
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
663 664 665
	}
}

666 667 668
static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
		struct f2fs_checkpoint **cp_block, struct page **cp_page,
		unsigned long long *version)
J
Jaegeuk Kim 已提交
669 670
{
	unsigned long blk_size = sbi->blocksize;
671
	size_t crc_offset = 0;
J
Jaegeuk Kim 已提交
672
	__u32 crc = 0;
J
Jaegeuk Kim 已提交
673

674 675
	*cp_page = get_meta_page(sbi, cp_addr);
	*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
J
Jaegeuk Kim 已提交
676

677 678 679 680 681 682
	crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
	if (crc_offset >= blk_size) {
		f2fs_msg(sbi->sb, KERN_WARNING,
			"invalid crc_offset: %zu", crc_offset);
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
683

684 685 686 687 688 689
	crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
							+ crc_offset)));
	if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
		f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
690

691 692 693
	*version = cur_cp_version(*cp_block);
	return 0;
}
J
Jaegeuk Kim 已提交
694

695 696 697 698 699 700 701
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
				block_t cp_addr, unsigned long long *version)
{
	struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
	struct f2fs_checkpoint *cp_block = NULL;
	unsigned long long cur_version = 0, pre_version = 0;
	int err;
J
Jaegeuk Kim 已提交
702

703 704 705 706 707
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_1, version);
	if (err)
		goto invalid_cp1;
	pre_version = *version;
J
Jaegeuk Kim 已提交
708

709 710 711 712
	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_2, version);
	if (err)
J
Jaegeuk Kim 已提交
713
		goto invalid_cp2;
714
	cur_version = *version;
J
Jaegeuk Kim 已提交
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735

	if (cur_version == pre_version) {
		*version = cur_version;
		f2fs_put_page(cp_page_2, 1);
		return cp_page_1;
	}
invalid_cp2:
	f2fs_put_page(cp_page_2, 1);
invalid_cp1:
	f2fs_put_page(cp_page_1, 1);
	return NULL;
}

int get_valid_checkpoint(struct f2fs_sb_info *sbi)
{
	struct f2fs_checkpoint *cp_block;
	struct f2fs_super_block *fsb = sbi->raw_super;
	struct page *cp1, *cp2, *cur_page;
	unsigned long blk_size = sbi->blocksize;
	unsigned long long cp1_version = 0, cp2_version = 0;
	unsigned long long cp_start_blk_no;
W
Wanpeng Li 已提交
736
	unsigned int cp_blks = 1 + __cp_payload(sbi);
C
Changman Lee 已提交
737 738
	block_t cp_blk_no;
	int i;
J
Jaegeuk Kim 已提交
739

C
Changman Lee 已提交
740
	sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
741 742 743 744 745 746 747 748 749 750
	if (!sbi->ckpt)
		return -ENOMEM;
	/*
	 * Finding out valid cp block involves read both
	 * sets( cp pack1 and cp pack 2)
	 */
	cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);

	/* The second checkpoint pack should start at the next segment */
751 752
	cp_start_blk_no += ((unsigned long long)1) <<
				le32_to_cpu(fsb->log_blocks_per_seg);
J
Jaegeuk Kim 已提交
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
	cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);

	if (cp1 && cp2) {
		if (ver_after(cp2_version, cp1_version))
			cur_page = cp2;
		else
			cur_page = cp1;
	} else if (cp1) {
		cur_page = cp1;
	} else if (cp2) {
		cur_page = cp2;
	} else {
		goto fail_no_cp;
	}

	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
	memcpy(sbi->ckpt, cp_block, blk_size);

771 772 773 774
	/* Sanity checking of checkpoint */
	if (sanity_check_ckpt(sbi))
		goto fail_no_cp;

C
Changman Lee 已提交
775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791
	if (cp_blks <= 1)
		goto done;

	cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	if (cur_page == cp2)
		cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);

	for (i = 1; i < cp_blks; i++) {
		void *sit_bitmap_ptr;
		unsigned char *ckpt = (unsigned char *)sbi->ckpt;

		cur_page = get_meta_page(sbi, cp_blk_no + i);
		sit_bitmap_ptr = page_address(cur_page);
		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
		f2fs_put_page(cur_page, 1);
	}
done:
J
Jaegeuk Kim 已提交
792 793 794 795 796 797 798 799 800
	f2fs_put_page(cp1, 1);
	f2fs_put_page(cp2, 1);
	return 0;

fail_no_cp:
	kfree(sbi->ckpt);
	return -EINVAL;
}

801
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
J
Jaegeuk Kim 已提交
802
{
803
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
804
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
J
Jaegeuk Kim 已提交
805

806
	if (is_inode_flag_set(inode, flag))
807
		return;
808

809 810
	set_inode_flag(inode, flag);
	list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
C
Chao Yu 已提交
811
	stat_inc_dirty_inode(sbi, type);
812 813
}

814
static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
C
Chao Yu 已提交
815
{
816
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
C
Chao Yu 已提交
817

818
	if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag))
C
Chao Yu 已提交
819 820
		return;

821 822
	list_del_init(&F2FS_I(inode)->dirty_list);
	clear_inode_flag(inode, flag);
C
Chao Yu 已提交
823
	stat_dec_dirty_inode(F2FS_I_SB(inode), type);
C
Chao Yu 已提交
824 825
}

826
void update_dirty_page(struct inode *inode, struct page *page)
827
{
828
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
829
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
830

831 832
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
833
		return;
834

835 836
	spin_lock(&sbi->inode_lock[type]);
	if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH))
837
		__add_dirty_inode(inode, type);
838
	inode_inc_dirty_pages(inode);
839 840
	spin_unlock(&sbi->inode_lock[type]);

841
	SetPagePrivate(page);
J
Jaegeuk Kim 已提交
842
	f2fs_trace_pid(page);
843 844
}

845
void remove_dirty_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
846
{
847
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
848
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
J
Jaegeuk Kim 已提交
849

850 851
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
852 853
		return;

854 855 856
	if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH))
		return;

857 858 859
	spin_lock(&sbi->inode_lock[type]);
	__remove_dirty_inode(inode, type);
	spin_unlock(&sbi->inode_lock[type]);
860 861
}

C
Chao Yu 已提交
862
int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
J
Jaegeuk Kim 已提交
863
{
864
	struct list_head *head;
J
Jaegeuk Kim 已提交
865
	struct inode *inode;
866
	struct f2fs_inode_info *fi;
867 868 869 870 871
	bool is_dir = (type == DIR_INODE);

	trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
J
Jaegeuk Kim 已提交
872
retry:
873
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
874
		return -EIO;
875

876
	spin_lock(&sbi->inode_lock[type]);
877

878
	head = &sbi->inode_list[type];
J
Jaegeuk Kim 已提交
879
	if (list_empty(head)) {
880
		spin_unlock(&sbi->inode_lock[type]);
881 882 883
		trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
C
Chao Yu 已提交
884
		return 0;
J
Jaegeuk Kim 已提交
885
	}
886 887
	fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
	inode = igrab(&fi->vfs_inode);
888
	spin_unlock(&sbi->inode_lock[type]);
J
Jaegeuk Kim 已提交
889
	if (inode) {
890
		filemap_fdatawrite(inode->i_mapping);
J
Jaegeuk Kim 已提交
891 892 893 894 895 896
		iput(inode);
	} else {
		/*
		 * We should submit bio, since it exists several
		 * wribacking dentry pages in the freeing inode.
		 */
J
Jaegeuk Kim 已提交
897
		f2fs_submit_merged_bio(sbi, DATA, WRITE);
898
		cond_resched();
J
Jaegeuk Kim 已提交
899 900 901 902
	}
	goto retry;
}

903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930
int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
{
	struct list_head *head = &sbi->inode_list[DIRTY_META];
	struct inode *inode;
	struct f2fs_inode_info *fi;
	s64 total = get_pages(sbi, F2FS_DIRTY_IMETA);

	while (total--) {
		if (unlikely(f2fs_cp_error(sbi)))
			return -EIO;

		spin_lock(&sbi->inode_lock[DIRTY_META]);
		if (list_empty(head)) {
			spin_unlock(&sbi->inode_lock[DIRTY_META]);
			return 0;
		}
		fi = list_entry(head->next, struct f2fs_inode_info,
							gdirty_list);
		inode = igrab(&fi->vfs_inode);
		spin_unlock(&sbi->inode_lock[DIRTY_META]);
		if (inode) {
			update_inode_page(inode);
			iput(inode);
		}
	};
	return 0;
}

J
Jaegeuk Kim 已提交
931
/*
J
Jaegeuk Kim 已提交
932 933
 * Freeze all the FS-operations for checkpoint.
 */
934
static int block_operations(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
935 936 937 938 939 940
{
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = LONG_MAX,
		.for_reclaim = 0,
	};
941
	struct blk_plug plug;
942
	int err = 0;
943 944 945

	blk_start_plug(&plug);

946
retry_flush_dents:
947
	f2fs_lock_all(sbi);
J
Jaegeuk Kim 已提交
948 949
	/* write all the dirty dentry pages */
	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
950
		f2fs_unlock_all(sbi);
C
Chao Yu 已提交
951 952
		err = sync_dirty_inodes(sbi, DIR_INODE);
		if (err)
953
			goto out;
954
		goto retry_flush_dents;
J
Jaegeuk Kim 已提交
955 956
	}

957 958 959 960 961 962 963 964
	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
		f2fs_unlock_all(sbi);
		err = f2fs_sync_inode_meta(sbi);
		if (err)
			goto out;
		goto retry_flush_dents;
	}

J
Jaegeuk Kim 已提交
965
	/*
A
arter97 已提交
966
	 * POR: we should ensure that there are no dirty node pages
J
Jaegeuk Kim 已提交
967 968
	 * until finishing nat/sit flush.
	 */
969
retry_flush_nodes:
970
	down_write(&sbi->node_write);
J
Jaegeuk Kim 已提交
971 972

	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
973
		up_write(&sbi->node_write);
974
		err = sync_node_pages(sbi, &wbc);
C
Chao Yu 已提交
975
		if (err) {
976 977 978
			f2fs_unlock_all(sbi);
			goto out;
		}
979
		goto retry_flush_nodes;
J
Jaegeuk Kim 已提交
980
	}
981
out:
982
	blk_finish_plug(&plug);
983
	return err;
J
Jaegeuk Kim 已提交
984 985 986 987
}

static void unblock_operations(struct f2fs_sb_info *sbi)
{
988
	up_write(&sbi->node_write);
989 990

	build_free_nids(sbi);
991
	f2fs_unlock_all(sbi);
J
Jaegeuk Kim 已提交
992 993
}

994 995 996 997 998 999 1000
static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
	DEFINE_WAIT(wait);

	for (;;) {
		prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);

1001
		if (!atomic_read(&sbi->nr_wb_bios))
1002 1003
			break;

1004
		io_schedule_timeout(5*HZ);
1005 1006 1007 1008
	}
	finish_wait(&sbi->cp_wait, &wait);
}

1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039
static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);

	spin_lock(&sbi->cp_lock);

	if (cpc->reason == CP_UMOUNT)
		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);

	if (cpc->reason == CP_FASTBOOT)
		__set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);

	if (orphan_num)
		__set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);

	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);

	/* set this flag to activate crc|cp_ver for recovery */
	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);

	spin_unlock(&sbi->cp_lock);
}

C
Chao Yu 已提交
1040
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1041 1042
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1043
	struct f2fs_nm_info *nm_i = NM_I(sbi);
1044
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
1045
	nid_t last_nid = nm_i->next_scan_nid;
J
Jaegeuk Kim 已提交
1046 1047
	block_t start_blk;
	unsigned int data_sum_blocks, orphan_blocks;
J
Jaegeuk Kim 已提交
1048
	__u32 crc32 = 0;
J
Jaegeuk Kim 已提交
1049
	int i;
W
Wanpeng Li 已提交
1050
	int cp_payload_blks = __cp_payload(sbi);
1051 1052 1053
	struct super_block *sb = sbi->sb;
	struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
	u64 kbytes_written;
J
Jaegeuk Kim 已提交
1054 1055

	/* Flush all the NAT/SIT pages */
1056
	while (get_pages(sbi, F2FS_DIRTY_META)) {
J
Jaegeuk Kim 已提交
1057
		sync_meta_pages(sbi, META, LONG_MAX);
1058
		if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1059
			return -EIO;
1060
	}
J
Jaegeuk Kim 已提交
1061 1062 1063 1064 1065 1066 1067 1068 1069 1070

	next_free_nid(sbi, &last_nid);

	/*
	 * modify checkpoint
	 * version number is already updated
	 */
	ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
	ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
C
Chao Yu 已提交
1071
	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
J
Jaegeuk Kim 已提交
1072 1073 1074 1075 1076 1077 1078
		ckpt->cur_node_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
		ckpt->cur_node_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
		ckpt->alloc_type[i + CURSEG_HOT_NODE] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
	}
C
Chao Yu 已提交
1079
	for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
J
Jaegeuk Kim 已提交
1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092
		ckpt->cur_data_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
		ckpt->cur_data_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
		ckpt->alloc_type[i + CURSEG_HOT_DATA] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
	}

	ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
	ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
	ckpt->next_free_nid = cpu_to_le32(last_nid);

	/* 2 cp  + n data seg summary + orphan inode blocks */
1093
	data_sum_blocks = npages_for_summary_flush(sbi, false);
1094
	spin_lock(&sbi->cp_lock);
C
Chao Yu 已提交
1095
	if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
1096
		__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
J
Jaegeuk Kim 已提交
1097
	else
1098 1099
		__clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
	spin_unlock(&sbi->cp_lock);
J
Jaegeuk Kim 已提交
1100

1101
	orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
C
Changman Lee 已提交
1102 1103
	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
			orphan_blocks);
J
Jaegeuk Kim 已提交
1104

1105
	if (__remain_node_summaries(cpc->reason))
C
Chao Yu 已提交
1106
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
C
Changman Lee 已提交
1107 1108
				cp_payload_blks + data_sum_blocks +
				orphan_blocks + NR_CURSEG_NODE_TYPE);
1109
	else
C
Chao Yu 已提交
1110
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
C
Changman Lee 已提交
1111 1112
				cp_payload_blks + data_sum_blocks +
				orphan_blocks);
1113

1114 1115
	/* update ckpt flag for checkpoint */
	update_ckpt_flags(sbi, cpc);
1116

J
Jaegeuk Kim 已提交
1117 1118 1119 1120
	/* update SIT/NAT bitmap */
	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));

K
Keith Mok 已提交
1121
	crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
J
Jaegeuk Kim 已提交
1122 1123
	*((__le32 *)((unsigned char *)ckpt +
				le32_to_cpu(ckpt->checksum_offset)))
J
Jaegeuk Kim 已提交
1124 1125 1126 1127
				= cpu_to_le32(crc32);

	start_blk = __start_cp_addr(sbi);

1128 1129 1130
	/* need to wait for end_io results */
	wait_on_all_pages_writeback(sbi);
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1131
		return -EIO;
1132

J
Jaegeuk Kim 已提交
1133
	/* write out checkpoint buffer at block 0 */
C
Chao Yu 已提交
1134 1135 1136 1137 1138
	update_meta_page(sbi, ckpt, start_blk++);

	for (i = 1; i < 1 + cp_payload_blks; i++)
		update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
							start_blk++);
C
Changman Lee 已提交
1139

1140
	if (orphan_num) {
J
Jaegeuk Kim 已提交
1141 1142 1143 1144 1145 1146
		write_orphan_inodes(sbi, start_blk);
		start_blk += orphan_blocks;
	}

	write_data_summaries(sbi, start_blk);
	start_blk += data_sum_blocks;
1147 1148 1149 1150 1151 1152

	/* Record write statistics in the hot node summary */
	kbytes_written = sbi->kbytes_written;
	if (sb->s_bdev->bd_part)
		kbytes_written += BD_PART_WRITTEN(sbi);

1153
	seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
1154

1155
	if (__remain_node_summaries(cpc->reason)) {
J
Jaegeuk Kim 已提交
1156 1157 1158 1159 1160
		write_node_summaries(sbi, start_blk);
		start_blk += NR_CURSEG_NODE_TYPE;
	}

	/* writeout checkpoint block */
C
Chao Yu 已提交
1161
	update_meta_page(sbi, ckpt, start_blk);
J
Jaegeuk Kim 已提交
1162 1163

	/* wait for previous submitted node/meta pages writeback */
1164
	wait_on_all_pages_writeback(sbi);
J
Jaegeuk Kim 已提交
1165

1166
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1167
		return -EIO;
1168

1169 1170
	filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
	filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
J
Jaegeuk Kim 已提交
1171 1172 1173

	/* update user_block_counts */
	sbi->last_valid_block_count = sbi->total_valid_block_count;
1174
	percpu_counter_set(&sbi->alloc_valid_block_count, 0);
J
Jaegeuk Kim 已提交
1175 1176

	/* Here, we only have one bio having CP pack */
1177
	sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
J
Jaegeuk Kim 已提交
1178

1179 1180 1181
	/* wait for previous submitted meta pages writeback */
	wait_on_all_pages_writeback(sbi);

1182
	release_ino_entry(sbi, false);
1183 1184

	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1185
		return -EIO;
1186

1187
	clear_prefree_segments(sbi, cpc);
1188
	clear_sbi_flag(sbi, SBI_IS_DIRTY);
1189
	clear_sbi_flag(sbi, SBI_NEED_CP);
C
Chao Yu 已提交
1190

1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
	/*
	 * redirty superblock if metadata like node page or inode cache is
	 * updated during writing checkpoint.
	 */
	if (get_pages(sbi, F2FS_DIRTY_NODES) ||
			get_pages(sbi, F2FS_DIRTY_IMETA))
		set_sbi_flag(sbi, SBI_IS_DIRTY);

	f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));

C
Chao Yu 已提交
1201
	return 0;
J
Jaegeuk Kim 已提交
1202 1203
}

J
Jaegeuk Kim 已提交
1204
/*
A
arter97 已提交
1205
 * We guarantee that this checkpoint procedure will not fail.
J
Jaegeuk Kim 已提交
1206
 */
C
Chao Yu 已提交
1207
int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1208 1209 1210
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	unsigned long long ckpt_ver;
C
Chao Yu 已提交
1211
	int err = 0;
J
Jaegeuk Kim 已提交
1212

1213
	mutex_lock(&sbi->cp_mutex);
J
Jaegeuk Kim 已提交
1214

1215
	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1216 1217
		(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
		(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
J
Jaegeuk Kim 已提交
1218
		goto out;
C
Chao Yu 已提交
1219 1220
	if (unlikely(f2fs_cp_error(sbi))) {
		err = -EIO;
1221
		goto out;
C
Chao Yu 已提交
1222 1223 1224
	}
	if (f2fs_readonly(sbi->sb)) {
		err = -EROFS;
1225
		goto out;
C
Chao Yu 已提交
1226
	}
W
Wanpeng Li 已提交
1227 1228 1229

	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");

C
Chao Yu 已提交
1230 1231
	err = block_operations(sbi);
	if (err)
1232
		goto out;
J
Jaegeuk Kim 已提交
1233

1234
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
1235

1236
	f2fs_flush_merged_bios(sbi);
J
Jaegeuk Kim 已提交
1237

1238 1239 1240 1241 1242 1243 1244
	/* this is the case of multiple fstrims without any changes */
	if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
		f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
		f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
		f2fs_bug_on(sbi, prefree_segments(sbi));
		flush_sit_entries(sbi, cpc);
		clear_prefree_segments(sbi, cpc);
C
Chao Yu 已提交
1245
		f2fs_wait_all_discard_bio(sbi);
1246 1247 1248 1249
		unblock_operations(sbi);
		goto out;
	}

J
Jaegeuk Kim 已提交
1250 1251 1252 1253 1254
	/*
	 * update checkpoint pack index
	 * Increase the version number so that
	 * SIT entries and seg summaries are written at correct place
	 */
1255
	ckpt_ver = cur_cp_version(ckpt);
J
Jaegeuk Kim 已提交
1256 1257 1258 1259
	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);

	/* write cached NAT/SIT entries to NAT/SIT area */
	flush_nat_entries(sbi);
1260
	flush_sit_entries(sbi, cpc);
J
Jaegeuk Kim 已提交
1261 1262

	/* unlock all the fs_lock[] in do_checkpoint() */
C
Chao Yu 已提交
1263
	err = do_checkpoint(sbi, cpc);
J
Jaegeuk Kim 已提交
1264

C
Chao Yu 已提交
1265 1266
	f2fs_wait_all_discard_bio(sbi);

J
Jaegeuk Kim 已提交
1267
	unblock_operations(sbi);
1268
	stat_inc_cp_count(sbi->stat_info);
1269 1270 1271 1272

	if (cpc->reason == CP_RECOVERY)
		f2fs_msg(sbi->sb, KERN_NOTICE,
			"checkpoint: version = %llx", ckpt_ver);
1273 1274

	/* do checkpoint periodically */
1275
	f2fs_update_time(sbi, CP_TIME);
1276
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
J
Jaegeuk Kim 已提交
1277 1278
out:
	mutex_unlock(&sbi->cp_mutex);
C
Chao Yu 已提交
1279
	return err;
J
Jaegeuk Kim 已提交
1280 1281
}

J
Jaegeuk Kim 已提交
1282
void init_ino_entry_info(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
1283
{
J
Jaegeuk Kim 已提交
1284 1285 1286
	int i;

	for (i = 0; i < MAX_INO_ENTRY; i++) {
1287 1288 1289 1290 1291 1292
		struct inode_management *im = &sbi->im[i];

		INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
		spin_lock_init(&im->ino_lock);
		INIT_LIST_HEAD(&im->ino_list);
		im->ino_num = 0;
J
Jaegeuk Kim 已提交
1293 1294
	}

C
Chao Yu 已提交
1295
	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1296 1297
			NR_CURSEG_TYPE - __cp_payload(sbi)) *
				F2FS_ORPHANS_PER_BLOCK;
J
Jaegeuk Kim 已提交
1298 1299
}

1300
int __init create_checkpoint_caches(void)
J
Jaegeuk Kim 已提交
1301
{
J
Jaegeuk Kim 已提交
1302 1303 1304
	ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
			sizeof(struct ino_entry));
	if (!ino_entry_slab)
J
Jaegeuk Kim 已提交
1305
		return -ENOMEM;
1306 1307
	inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
			sizeof(struct inode_entry));
1308
	if (!inode_entry_slab) {
J
Jaegeuk Kim 已提交
1309
		kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1310 1311 1312 1313 1314 1315 1316
		return -ENOMEM;
	}
	return 0;
}

void destroy_checkpoint_caches(void)
{
J
Jaegeuk Kim 已提交
1317
	kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1318 1319
	kmem_cache_destroy(inode_entry_slab);
}