checkpoint.c 32.2 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
J
Jaegeuk Kim 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 * fs/f2fs/checkpoint.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/f2fs_fs.h>
#include <linux/pagevec.h>
#include <linux/swap.h>

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
23
#include "trace.h"
24
#include <trace/events/f2fs.h>
J
Jaegeuk Kim 已提交
25

J
Jaegeuk Kim 已提交
26
static struct kmem_cache *ino_entry_slab;
27
struct kmem_cache *inode_entry_slab;
J
Jaegeuk Kim 已提交
28

29 30
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
31
	set_ckpt_flags(sbi, CP_ERROR_FLAG);
32 33 34 35 36
	sbi->sb->s_flags |= MS_RDONLY;
	if (!end_io)
		f2fs_flush_merged_bios(sbi);
}

J
Jaegeuk Kim 已提交
37
/*
J
Jaegeuk Kim 已提交
38 39 40 41
 * We guarantee no failure on the returned page.
 */
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
G
Gu Zheng 已提交
42
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
43 44
	struct page *page = NULL;
repeat:
45
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
46 47 48 49
	if (!page) {
		cond_resched();
		goto repeat;
	}
50
	f2fs_wait_on_page_writeback(page, META, true);
51 52
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
53 54 55
	return page;
}

J
Jaegeuk Kim 已提交
56
/*
J
Jaegeuk Kim 已提交
57 58
 * We guarantee no failure on the returned page.
 */
59 60
static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
							bool is_meta)
J
Jaegeuk Kim 已提交
61
{
G
Gu Zheng 已提交
62
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
63
	struct page *page;
64
	struct f2fs_io_info fio = {
65
		.sbi = sbi,
66
		.type = META,
M
Mike Christie 已提交
67
		.op = REQ_OP_READ,
68
		.op_flags = REQ_META | REQ_PRIO,
69 70
		.old_blkaddr = index,
		.new_blkaddr = index,
71
		.encrypted_page = NULL,
72
	};
73 74

	if (unlikely(!is_meta))
M
Mike Christie 已提交
75
		fio.op_flags &= ~REQ_META;
J
Jaegeuk Kim 已提交
76
repeat:
77
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
78 79 80 81
	if (!page) {
		cond_resched();
		goto repeat;
	}
82 83 84
	if (PageUptodate(page))
		goto out;

85 86
	fio.page = page;

87 88
	if (f2fs_submit_page_bio(&fio)) {
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
89
		goto repeat;
90
	}
J
Jaegeuk Kim 已提交
91

92
	lock_page(page);
93
	if (unlikely(page->mapping != mapping)) {
94 95 96
		f2fs_put_page(page, 1);
		goto repeat;
	}
97 98 99 100 101 102 103

	/*
	 * if there is any IO error when accessing device, make our filesystem
	 * readonly and make sure do not write checkpoint with non-uptodate
	 * meta page.
	 */
	if (unlikely(!PageUptodate(page)))
104
		f2fs_stop_checkpoint(sbi, false);
105
out:
J
Jaegeuk Kim 已提交
106 107 108
	return page;
}

109 110 111 112 113 114 115 116 117 118 119
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, true);
}

/* for POR only */
struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, false);
}

120
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
121 122 123
{
	switch (type) {
	case META_NAT:
124
		break;
125
	case META_SIT:
126 127 128
		if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
			return false;
		break;
129
	case META_SSA:
130 131 132 133
		if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
			blkaddr < SM_I(sbi)->ssa_blkaddr))
			return false;
		break;
134
	case META_CP:
135 136 137 138
		if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
			blkaddr < __start_cp_addr(sbi)))
			return false;
		break;
139
	case META_POR:
140 141 142 143
		if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
			blkaddr < MAIN_BLKADDR(sbi)))
			return false;
		break;
144 145 146
	default:
		BUG();
	}
147 148

	return true;
149 150 151
}

/*
152
 * Readahead CP/NAT/SIT/SSA pages
153
 */
154 155
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
							int type, bool sync)
156 157
{
	struct page *page;
158
	block_t blkno = start;
159
	struct f2fs_io_info fio = {
160
		.sbi = sbi,
161
		.type = META,
M
Mike Christie 已提交
162
		.op = REQ_OP_READ,
163
		.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
164
		.encrypted_page = NULL,
165
	};
C
Chao Yu 已提交
166
	struct blk_plug plug;
167

168
	if (unlikely(type == META_POR))
M
Mike Christie 已提交
169
		fio.op_flags &= ~REQ_META;
170

C
Chao Yu 已提交
171
	blk_start_plug(&plug);
172 173
	for (; nrpages-- > 0; blkno++) {

174 175 176
		if (!is_valid_blkaddr(sbi, blkno, type))
			goto out;

177 178
		switch (type) {
		case META_NAT:
179 180
			if (unlikely(blkno >=
					NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
181
				blkno = 0;
182
			/* get nat block addr */
183
			fio.new_blkaddr = current_nat_addr(sbi,
184 185 186 187
					blkno * NAT_ENTRY_PER_BLOCK);
			break;
		case META_SIT:
			/* get sit block addr */
188
			fio.new_blkaddr = current_sit_addr(sbi,
189 190
					blkno * SIT_ENTRY_PER_BLOCK);
			break;
191
		case META_SSA:
192
		case META_CP:
193
		case META_POR:
194
			fio.new_blkaddr = blkno;
195 196 197 198 199
			break;
		default:
			BUG();
		}

200 201
		page = f2fs_grab_cache_page(META_MAPPING(sbi),
						fio.new_blkaddr, false);
202 203 204 205 206 207 208
		if (!page)
			continue;
		if (PageUptodate(page)) {
			f2fs_put_page(page, 1);
			continue;
		}

209
		fio.page = page;
210
		fio.old_blkaddr = fio.new_blkaddr;
211
		f2fs_submit_page_mbio(&fio);
212 213 214 215
		f2fs_put_page(page, 0);
	}
out:
	f2fs_submit_merged_bio(sbi, META, READ);
C
Chao Yu 已提交
216
	blk_finish_plug(&plug);
217 218 219
	return blkno - start;
}

220 221 222 223 224 225
void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
{
	struct page *page;
	bool readahead = false;

	page = find_get_page(META_MAPPING(sbi), index);
226
	if (!page || !PageUptodate(page))
227 228 229 230
		readahead = true;
	f2fs_put_page(page, 0);

	if (readahead)
231
		ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
232 233
}

J
Jaegeuk Kim 已提交
234 235 236
static int f2fs_write_meta_page(struct page *page,
				struct writeback_control *wbc)
{
237
	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
J
Jaegeuk Kim 已提交
238

239 240
	trace_f2fs_writepage(page, META);

241
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
242
		goto redirty_out;
243
	if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
244
		goto redirty_out;
245
	if (unlikely(f2fs_cp_error(sbi)))
246
		goto redirty_out;
J
Jaegeuk Kim 已提交
247

248 249
	write_meta_page(sbi, page);
	dec_page_count(sbi, F2FS_DIRTY_META);
250 251

	if (wbc->for_reclaim)
252 253
		f2fs_submit_merged_bio_cond(sbi, page->mapping->host,
						0, page->index, META, WRITE);
254

255
	unlock_page(page);
256

257
	if (unlikely(f2fs_cp_error(sbi)))
258
		f2fs_submit_merged_bio(sbi, META, WRITE);
259

260
	return 0;
261 262

redirty_out:
263
	redirty_page_for_writepage(wbc, page);
264
	return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
265 266 267 268 269
}

static int f2fs_write_meta_pages(struct address_space *mapping,
				struct writeback_control *wbc)
{
270
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
271
	long diff, written;
J
Jaegeuk Kim 已提交
272

273
	/* collect a number of dirty meta pages and write together */
274 275
	if (wbc->for_kupdate ||
		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
276
		goto skip_write;
J
Jaegeuk Kim 已提交
277

Y
Yunlei He 已提交
278 279
	trace_f2fs_writepages(mapping->host, wbc, META);

J
Jaegeuk Kim 已提交
280 281
	/* if mounting is failed, skip writing node pages */
	mutex_lock(&sbi->cp_mutex);
282 283
	diff = nr_pages_to_write(sbi, META, wbc);
	written = sync_meta_pages(sbi, META, wbc->nr_to_write);
J
Jaegeuk Kim 已提交
284
	mutex_unlock(&sbi->cp_mutex);
285
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
J
Jaegeuk Kim 已提交
286
	return 0;
287 288 289

skip_write:
	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
Y
Yunlei He 已提交
290
	trace_f2fs_writepages(mapping->host, wbc, META);
291
	return 0;
J
Jaegeuk Kim 已提交
292 293 294 295 296
}

long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
						long nr_to_write)
{
G
Gu Zheng 已提交
297
	struct address_space *mapping = META_MAPPING(sbi);
298
	pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
J
Jaegeuk Kim 已提交
299 300 301 302 303
	struct pagevec pvec;
	long nwritten = 0;
	struct writeback_control wbc = {
		.for_reclaim = 0,
	};
C
Chao Yu 已提交
304
	struct blk_plug plug;
J
Jaegeuk Kim 已提交
305 306 307

	pagevec_init(&pvec, 0);

C
Chao Yu 已提交
308 309
	blk_start_plug(&plug);

J
Jaegeuk Kim 已提交
310 311 312 313 314
	while (index <= end) {
		int i, nr_pages;
		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
				PAGECACHE_TAG_DIRTY,
				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
315
		if (unlikely(nr_pages == 0))
J
Jaegeuk Kim 已提交
316 317 318 319
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
320

321
			if (prev == ULONG_MAX)
322 323 324 325 326 327
				prev = page->index - 1;
			if (nr_to_write != LONG_MAX && page->index != prev + 1) {
				pagevec_release(&pvec);
				goto stop;
			}

J
Jaegeuk Kim 已提交
328
			lock_page(page);
329 330 331 332 333 334 335 336 337 338 339

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}
			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

340 341 342
			f2fs_wait_on_page_writeback(page, META, true);

			BUG_ON(PageWriteback(page));
343 344 345
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

346
			if (mapping->a_ops->writepage(page, &wbc)) {
347 348 349
				unlock_page(page);
				break;
			}
350
			nwritten++;
351
			prev = page->index;
352
			if (unlikely(nwritten >= nr_to_write))
J
Jaegeuk Kim 已提交
353 354 355 356 357
				break;
		}
		pagevec_release(&pvec);
		cond_resched();
	}
358
stop:
J
Jaegeuk Kim 已提交
359
	if (nwritten)
J
Jaegeuk Kim 已提交
360
		f2fs_submit_merged_bio(sbi, type, WRITE);
J
Jaegeuk Kim 已提交
361

C
Chao Yu 已提交
362 363
	blk_finish_plug(&plug);

J
Jaegeuk Kim 已提交
364 365 366 367 368
	return nwritten;
}

static int f2fs_set_meta_page_dirty(struct page *page)
{
369 370
	trace_f2fs_set_page_dirty(page, META);

371 372
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
373
	if (!PageDirty(page)) {
374
		f2fs_set_page_dirty_nobuffers(page);
375
		inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
376
		SetPagePrivate(page);
J
Jaegeuk Kim 已提交
377
		f2fs_trace_pid(page);
J
Jaegeuk Kim 已提交
378 379 380 381 382 383 384 385 386
		return 1;
	}
	return 0;
}

const struct address_space_operations f2fs_meta_aops = {
	.writepage	= f2fs_write_meta_page,
	.writepages	= f2fs_write_meta_pages,
	.set_page_dirty	= f2fs_set_meta_page_dirty,
387 388
	.invalidatepage = f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
389 390 391
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
J
Jaegeuk Kim 已提交
392 393
};

J
Jaegeuk Kim 已提交
394
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
395
{
396
	struct inode_management *im = &sbi->im[type];
397 398 399
	struct ino_entry *e, *tmp;

	tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
400
retry:
401
	radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
402

403 404
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
405
	if (!e) {
406
		e = tmp;
407 408
		if (radix_tree_insert(&im->ino_root, ino, e)) {
			spin_unlock(&im->ino_lock);
409
			radix_tree_preload_end();
410 411 412 413
			goto retry;
		}
		memset(e, 0, sizeof(struct ino_entry));
		e->ino = ino;
414

415
		list_add_tail(&e->list, &im->ino_list);
416
		if (type != ORPHAN_INO)
417
			im->ino_num++;
418
	}
419
	spin_unlock(&im->ino_lock);
420
	radix_tree_preload_end();
421 422 423

	if (e != tmp)
		kmem_cache_free(ino_entry_slab, tmp);
424 425
}

J
Jaegeuk Kim 已提交
426
static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
427
{
428
	struct inode_management *im = &sbi->im[type];
J
Jaegeuk Kim 已提交
429
	struct ino_entry *e;
430

431 432
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
433 434
	if (e) {
		list_del(&e->list);
435 436 437
		radix_tree_delete(&im->ino_root, ino);
		im->ino_num--;
		spin_unlock(&im->ino_lock);
438 439
		kmem_cache_free(ino_entry_slab, e);
		return;
440
	}
441
	spin_unlock(&im->ino_lock);
442 443
}

444
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
445 446 447 448 449
{
	/* add new dirty ino entry into list */
	__add_ino_entry(sbi, ino, type);
}

450
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
451 452 453 454 455 456 457 458
{
	/* remove dirty ino entry from list */
	__remove_ino_entry(sbi, ino, type);
}

/* mode should be APPEND_INO or UPDATE_INO */
bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
459
	struct inode_management *im = &sbi->im[mode];
460
	struct ino_entry *e;
461 462 463 464

	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
	spin_unlock(&im->ino_lock);
465 466 467
	return e ? true : false;
}

468
void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
469 470 471 472
{
	struct ino_entry *e, *tmp;
	int i;

473
	for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
474 475 476 477
		struct inode_management *im = &sbi->im[i];

		spin_lock(&im->ino_lock);
		list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
478
			list_del(&e->list);
479
			radix_tree_delete(&im->ino_root, e->ino);
480
			kmem_cache_free(ino_entry_slab, e);
481
			im->ino_num--;
482
		}
483
		spin_unlock(&im->ino_lock);
484 485 486
	}
}

J
Jaegeuk Kim 已提交
487
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
488
{
489
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
490 491
	int err = 0;

492
	spin_lock(&im->ino_lock);
J
Jaegeuk Kim 已提交
493 494

#ifdef CONFIG_F2FS_FAULT_INJECTION
495
	if (time_to_inject(sbi, FAULT_ORPHAN)) {
J
Jaegeuk Kim 已提交
496 497 498 499
		spin_unlock(&im->ino_lock);
		return -ENOSPC;
	}
#endif
500
	if (unlikely(im->ino_num >= sbi->max_orphans))
J
Jaegeuk Kim 已提交
501
		err = -ENOSPC;
J
Jaegeuk Kim 已提交
502
	else
503 504
		im->ino_num++;
	spin_unlock(&im->ino_lock);
505

J
Jaegeuk Kim 已提交
506 507 508
	return err;
}

J
Jaegeuk Kim 已提交
509 510
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
511 512 513 514 515 516
	struct inode_management *im = &sbi->im[ORPHAN_INO];

	spin_lock(&im->ino_lock);
	f2fs_bug_on(sbi, im->ino_num == 0);
	im->ino_num--;
	spin_unlock(&im->ino_lock);
J
Jaegeuk Kim 已提交
517 518
}

519
void add_orphan_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
520
{
521
	/* add new orphan ino entry into list */
522 523
	__add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO);
	update_inode_page(inode);
J
Jaegeuk Kim 已提交
524 525 526 527
}

void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
528
	/* remove orphan entry from orphan list */
J
Jaegeuk Kim 已提交
529
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
J
Jaegeuk Kim 已提交
530 531
}

532
static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
J
Jaegeuk Kim 已提交
533
{
534
	struct inode *inode;
535
	struct node_info ni;
536 537 538 539 540 541 542 543 544 545 546
	int err = acquire_orphan_inode(sbi);

	if (err) {
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		f2fs_msg(sbi->sb, KERN_WARNING,
				"%s: orphan failed (ino=%x), run fsck to fix.",
				__func__, ino);
		return err;
	}

	__add_ino_entry(sbi, ino, ORPHAN_INO);
547

548
	inode = f2fs_iget_retry(sbi->sb, ino);
549 550 551 552 553 554 555 556 557
	if (IS_ERR(inode)) {
		/*
		 * there should be a bug that we can't find the entry
		 * to orphan inode.
		 */
		f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
		return PTR_ERR(inode);
	}

J
Jaegeuk Kim 已提交
558 559 560 561
	clear_nlink(inode);

	/* truncate all the data during iput */
	iput(inode);
562 563 564 565 566

	get_node_info(sbi, ino, &ni);

	/* ENOMEM was fully retried in f2fs_evict_inode. */
	if (ni.blk_addr != NULL_ADDR) {
567 568 569 570 571
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		f2fs_msg(sbi->sb, KERN_WARNING,
			"%s: orphan failed (ino=%x), run fsck to fix.",
				__func__, ino);
		return -EIO;
572
	}
573
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
574
	return 0;
J
Jaegeuk Kim 已提交
575 576
}

577
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
578
{
579
	block_t start_blk, orphan_blocks, i, j;
580
	int err;
J
Jaegeuk Kim 已提交
581

582
	if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
583
		return 0;
J
Jaegeuk Kim 已提交
584

W
Wanpeng Li 已提交
585
	start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
586
	orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
J
Jaegeuk Kim 已提交
587

588
	ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
589

590
	for (i = 0; i < orphan_blocks; i++) {
J
Jaegeuk Kim 已提交
591 592 593 594 595 596
		struct page *page = get_meta_page(sbi, start_blk + i);
		struct f2fs_orphan_block *orphan_blk;

		orphan_blk = (struct f2fs_orphan_block *)page_address(page);
		for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
			nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
597 598 599 600 601
			err = recover_orphan_inode(sbi, ino);
			if (err) {
				f2fs_put_page(page, 1);
				return err;
			}
J
Jaegeuk Kim 已提交
602 603 604 605
		}
		f2fs_put_page(page, 1);
	}
	/* clear Orphan Flag */
606
	clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
607
	return 0;
J
Jaegeuk Kim 已提交
608 609 610 611
}

static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
{
612
	struct list_head *head;
J
Jaegeuk Kim 已提交
613 614
	struct f2fs_orphan_block *orphan_blk = NULL;
	unsigned int nentries = 0;
C
Chao Yu 已提交
615
	unsigned short index = 1;
616
	unsigned short orphan_blocks;
617
	struct page *page = NULL;
J
Jaegeuk Kim 已提交
618
	struct ino_entry *orphan = NULL;
619
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
620

621
	orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
622

623 624 625 626 627
	/*
	 * we don't need to do spin_lock(&im->ino_lock) here, since all the
	 * orphan inode operations are covered under f2fs_lock_op().
	 * And, spin_lock should be avoided due to page operations below.
	 */
628
	head = &im->ino_list;
J
Jaegeuk Kim 已提交
629 630

	/* loop for each orphan inode entry and write them in Jornal block */
631 632
	list_for_each_entry(orphan, head, list) {
		if (!page) {
C
Chao Yu 已提交
633
			page = grab_meta_page(sbi, start_blk++);
634 635 636 637
			orphan_blk =
				(struct f2fs_orphan_block *)page_address(page);
			memset(orphan_blk, 0, sizeof(*orphan_blk));
		}
J
Jaegeuk Kim 已提交
638

639
		orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
J
Jaegeuk Kim 已提交
640

641
		if (nentries == F2FS_ORPHANS_PER_BLOCK) {
J
Jaegeuk Kim 已提交
642 643 644 645 646 647 648 649 650 651 652 653 654 655
			/*
			 * an orphan block is full of 1020 entries,
			 * then we need to flush current orphan blocks
			 * and bring another one in memory
			 */
			orphan_blk->blk_addr = cpu_to_le16(index);
			orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
			orphan_blk->entry_count = cpu_to_le32(nentries);
			set_page_dirty(page);
			f2fs_put_page(page, 1);
			index++;
			nentries = 0;
			page = NULL;
		}
656
	}
J
Jaegeuk Kim 已提交
657

658 659 660 661 662 663
	if (page) {
		orphan_blk->blk_addr = cpu_to_le16(index);
		orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
		orphan_blk->entry_count = cpu_to_le32(nentries);
		set_page_dirty(page);
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
664 665 666
	}
}

667 668 669
static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
		struct f2fs_checkpoint **cp_block, struct page **cp_page,
		unsigned long long *version)
J
Jaegeuk Kim 已提交
670 671
{
	unsigned long blk_size = sbi->blocksize;
672
	size_t crc_offset = 0;
J
Jaegeuk Kim 已提交
673
	__u32 crc = 0;
J
Jaegeuk Kim 已提交
674

675 676
	*cp_page = get_meta_page(sbi, cp_addr);
	*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
J
Jaegeuk Kim 已提交
677

678 679 680 681 682 683
	crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
	if (crc_offset >= blk_size) {
		f2fs_msg(sbi->sb, KERN_WARNING,
			"invalid crc_offset: %zu", crc_offset);
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
684

685 686 687 688 689 690
	crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
							+ crc_offset)));
	if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
		f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
691

692 693 694
	*version = cur_cp_version(*cp_block);
	return 0;
}
J
Jaegeuk Kim 已提交
695

696 697 698 699 700 701 702
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
				block_t cp_addr, unsigned long long *version)
{
	struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
	struct f2fs_checkpoint *cp_block = NULL;
	unsigned long long cur_version = 0, pre_version = 0;
	int err;
J
Jaegeuk Kim 已提交
703

704 705 706 707 708
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_1, version);
	if (err)
		goto invalid_cp1;
	pre_version = *version;
J
Jaegeuk Kim 已提交
709

710 711 712 713
	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_2, version);
	if (err)
J
Jaegeuk Kim 已提交
714
		goto invalid_cp2;
715
	cur_version = *version;
J
Jaegeuk Kim 已提交
716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736

	if (cur_version == pre_version) {
		*version = cur_version;
		f2fs_put_page(cp_page_2, 1);
		return cp_page_1;
	}
invalid_cp2:
	f2fs_put_page(cp_page_2, 1);
invalid_cp1:
	f2fs_put_page(cp_page_1, 1);
	return NULL;
}

int get_valid_checkpoint(struct f2fs_sb_info *sbi)
{
	struct f2fs_checkpoint *cp_block;
	struct f2fs_super_block *fsb = sbi->raw_super;
	struct page *cp1, *cp2, *cur_page;
	unsigned long blk_size = sbi->blocksize;
	unsigned long long cp1_version = 0, cp2_version = 0;
	unsigned long long cp_start_blk_no;
W
Wanpeng Li 已提交
737
	unsigned int cp_blks = 1 + __cp_payload(sbi);
C
Changman Lee 已提交
738 739
	block_t cp_blk_no;
	int i;
J
Jaegeuk Kim 已提交
740

C
Changman Lee 已提交
741
	sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
742 743 744 745 746 747 748 749 750 751
	if (!sbi->ckpt)
		return -ENOMEM;
	/*
	 * Finding out valid cp block involves read both
	 * sets( cp pack1 and cp pack 2)
	 */
	cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);

	/* The second checkpoint pack should start at the next segment */
752 753
	cp_start_blk_no += ((unsigned long long)1) <<
				le32_to_cpu(fsb->log_blocks_per_seg);
J
Jaegeuk Kim 已提交
754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771
	cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);

	if (cp1 && cp2) {
		if (ver_after(cp2_version, cp1_version))
			cur_page = cp2;
		else
			cur_page = cp1;
	} else if (cp1) {
		cur_page = cp1;
	} else if (cp2) {
		cur_page = cp2;
	} else {
		goto fail_no_cp;
	}

	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
	memcpy(sbi->ckpt, cp_block, blk_size);

772 773
	/* Sanity checking of checkpoint */
	if (sanity_check_ckpt(sbi))
774
		goto free_fail_no_cp;
775

776 777 778 779
	if (cur_page == cp1)
		sbi->cur_cp_pack = 1;
	else
		sbi->cur_cp_pack = 2;
780

C
Changman Lee 已提交
781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796 797
	if (cp_blks <= 1)
		goto done;

	cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	if (cur_page == cp2)
		cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);

	for (i = 1; i < cp_blks; i++) {
		void *sit_bitmap_ptr;
		unsigned char *ckpt = (unsigned char *)sbi->ckpt;

		cur_page = get_meta_page(sbi, cp_blk_no + i);
		sit_bitmap_ptr = page_address(cur_page);
		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
		f2fs_put_page(cur_page, 1);
	}
done:
J
Jaegeuk Kim 已提交
798 799 800 801
	f2fs_put_page(cp1, 1);
	f2fs_put_page(cp2, 1);
	return 0;

802 803 804
free_fail_no_cp:
	f2fs_put_page(cp1, 1);
	f2fs_put_page(cp2, 1);
J
Jaegeuk Kim 已提交
805 806 807 808 809
fail_no_cp:
	kfree(sbi->ckpt);
	return -EINVAL;
}

810
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
J
Jaegeuk Kim 已提交
811
{
812
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
813
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
J
Jaegeuk Kim 已提交
814

815
	if (is_inode_flag_set(inode, flag))
816
		return;
817

818 819
	set_inode_flag(inode, flag);
	list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
C
Chao Yu 已提交
820
	stat_inc_dirty_inode(sbi, type);
821 822
}

823
static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
C
Chao Yu 已提交
824
{
825
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
C
Chao Yu 已提交
826

827
	if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag))
C
Chao Yu 已提交
828 829
		return;

830 831
	list_del_init(&F2FS_I(inode)->dirty_list);
	clear_inode_flag(inode, flag);
C
Chao Yu 已提交
832
	stat_dec_dirty_inode(F2FS_I_SB(inode), type);
C
Chao Yu 已提交
833 834
}

835
void update_dirty_page(struct inode *inode, struct page *page)
836
{
837
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
838
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
839

840 841
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
842
		return;
843

844 845
	spin_lock(&sbi->inode_lock[type]);
	if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH))
846
		__add_dirty_inode(inode, type);
847
	inode_inc_dirty_pages(inode);
848 849
	spin_unlock(&sbi->inode_lock[type]);

850
	SetPagePrivate(page);
J
Jaegeuk Kim 已提交
851
	f2fs_trace_pid(page);
852 853
}

854
void remove_dirty_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
855
{
856
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
857
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
J
Jaegeuk Kim 已提交
858

859 860
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
861 862
		return;

863 864 865
	if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH))
		return;

866 867 868
	spin_lock(&sbi->inode_lock[type]);
	__remove_dirty_inode(inode, type);
	spin_unlock(&sbi->inode_lock[type]);
869 870
}

C
Chao Yu 已提交
871
int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
J
Jaegeuk Kim 已提交
872
{
873
	struct list_head *head;
J
Jaegeuk Kim 已提交
874
	struct inode *inode;
875
	struct f2fs_inode_info *fi;
876 877 878 879 880
	bool is_dir = (type == DIR_INODE);

	trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
J
Jaegeuk Kim 已提交
881
retry:
882
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
883
		return -EIO;
884

885
	spin_lock(&sbi->inode_lock[type]);
886

887
	head = &sbi->inode_list[type];
J
Jaegeuk Kim 已提交
888
	if (list_empty(head)) {
889
		spin_unlock(&sbi->inode_lock[type]);
890 891 892
		trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
C
Chao Yu 已提交
893
		return 0;
J
Jaegeuk Kim 已提交
894
	}
895
	fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
896
	inode = igrab(&fi->vfs_inode);
897
	spin_unlock(&sbi->inode_lock[type]);
J
Jaegeuk Kim 已提交
898
	if (inode) {
899
		filemap_fdatawrite(inode->i_mapping);
J
Jaegeuk Kim 已提交
900 901 902 903 904 905
		iput(inode);
	} else {
		/*
		 * We should submit bio, since it exists several
		 * wribacking dentry pages in the freeing inode.
		 */
J
Jaegeuk Kim 已提交
906
		f2fs_submit_merged_bio(sbi, DATA, WRITE);
907
		cond_resched();
J
Jaegeuk Kim 已提交
908 909 910 911
	}
	goto retry;
}

912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927
int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
{
	struct list_head *head = &sbi->inode_list[DIRTY_META];
	struct inode *inode;
	struct f2fs_inode_info *fi;
	s64 total = get_pages(sbi, F2FS_DIRTY_IMETA);

	while (total--) {
		if (unlikely(f2fs_cp_error(sbi)))
			return -EIO;

		spin_lock(&sbi->inode_lock[DIRTY_META]);
		if (list_empty(head)) {
			spin_unlock(&sbi->inode_lock[DIRTY_META]);
			return 0;
		}
928
		fi = list_first_entry(head, struct f2fs_inode_info,
929 930 931 932
							gdirty_list);
		inode = igrab(&fi->vfs_inode);
		spin_unlock(&sbi->inode_lock[DIRTY_META]);
		if (inode) {
933 934 935 936 937
			sync_inode_metadata(inode, 0);

			/* it's on eviction */
			if (is_inode_flag_set(inode, FI_DIRTY_INODE))
				update_inode_page(inode);
938 939 940 941 942 943
			iput(inode);
		}
	};
	return 0;
}

J
Jaegeuk Kim 已提交
944
/*
J
Jaegeuk Kim 已提交
945 946
 * Freeze all the FS-operations for checkpoint.
 */
947
static int block_operations(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
948 949 950 951 952 953
{
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = LONG_MAX,
		.for_reclaim = 0,
	};
954
	struct blk_plug plug;
955
	int err = 0;
956 957 958

	blk_start_plug(&plug);

959
retry_flush_dents:
960
	f2fs_lock_all(sbi);
J
Jaegeuk Kim 已提交
961 962
	/* write all the dirty dentry pages */
	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
963
		f2fs_unlock_all(sbi);
C
Chao Yu 已提交
964 965
		err = sync_dirty_inodes(sbi, DIR_INODE);
		if (err)
966
			goto out;
967
		goto retry_flush_dents;
J
Jaegeuk Kim 已提交
968 969
	}

970 971 972 973 974 975 976 977
	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
		f2fs_unlock_all(sbi);
		err = f2fs_sync_inode_meta(sbi);
		if (err)
			goto out;
		goto retry_flush_dents;
	}

J
Jaegeuk Kim 已提交
978
	/*
A
arter97 已提交
979
	 * POR: we should ensure that there are no dirty node pages
J
Jaegeuk Kim 已提交
980 981
	 * until finishing nat/sit flush.
	 */
982
retry_flush_nodes:
983
	down_write(&sbi->node_write);
J
Jaegeuk Kim 已提交
984 985

	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
986
		up_write(&sbi->node_write);
987
		err = sync_node_pages(sbi, &wbc);
C
Chao Yu 已提交
988
		if (err) {
989 990 991
			f2fs_unlock_all(sbi);
			goto out;
		}
992
		goto retry_flush_nodes;
J
Jaegeuk Kim 已提交
993
	}
994
out:
995
	blk_finish_plug(&plug);
996
	return err;
J
Jaegeuk Kim 已提交
997 998 999 1000
}

static void unblock_operations(struct f2fs_sb_info *sbi)
{
1001
	up_write(&sbi->node_write);
1002
	f2fs_unlock_all(sbi);
J
Jaegeuk Kim 已提交
1003 1004
}

1005 1006 1007 1008 1009 1010 1011
static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
	DEFINE_WAIT(wait);

	for (;;) {
		prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);

1012
		if (!get_pages(sbi, F2FS_WB_CP_DATA))
1013 1014
			break;

1015
		io_schedule_timeout(5*HZ);
1016 1017 1018 1019
	}
	finish_wait(&sbi->cp_wait, &wait);
}

1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048 1049 1050
static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);

	spin_lock(&sbi->cp_lock);

	if (cpc->reason == CP_UMOUNT)
		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);

	if (cpc->reason == CP_FASTBOOT)
		__set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);

	if (orphan_num)
		__set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);

	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);

	/* set this flag to activate crc|cp_ver for recovery */
	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);

	spin_unlock(&sbi->cp_lock);
}

C
Chao Yu 已提交
1051
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1052 1053
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1054
	struct f2fs_nm_info *nm_i = NM_I(sbi);
1055
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
1056
	nid_t last_nid = nm_i->next_scan_nid;
J
Jaegeuk Kim 已提交
1057 1058
	block_t start_blk;
	unsigned int data_sum_blocks, orphan_blocks;
J
Jaegeuk Kim 已提交
1059
	__u32 crc32 = 0;
J
Jaegeuk Kim 已提交
1060
	int i;
W
Wanpeng Li 已提交
1061
	int cp_payload_blks = __cp_payload(sbi);
1062 1063 1064
	struct super_block *sb = sbi->sb;
	struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
	u64 kbytes_written;
J
Jaegeuk Kim 已提交
1065 1066

	/* Flush all the NAT/SIT pages */
1067
	while (get_pages(sbi, F2FS_DIRTY_META)) {
J
Jaegeuk Kim 已提交
1068
		sync_meta_pages(sbi, META, LONG_MAX);
1069
		if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1070
			return -EIO;
1071
	}
J
Jaegeuk Kim 已提交
1072 1073 1074 1075 1076 1077 1078 1079 1080 1081

	next_free_nid(sbi, &last_nid);

	/*
	 * modify checkpoint
	 * version number is already updated
	 */
	ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
	ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
C
Chao Yu 已提交
1082
	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
J
Jaegeuk Kim 已提交
1083 1084 1085 1086 1087 1088 1089
		ckpt->cur_node_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
		ckpt->cur_node_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
		ckpt->alloc_type[i + CURSEG_HOT_NODE] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
	}
C
Chao Yu 已提交
1090
	for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
J
Jaegeuk Kim 已提交
1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103
		ckpt->cur_data_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
		ckpt->cur_data_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
		ckpt->alloc_type[i + CURSEG_HOT_DATA] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
	}

	ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
	ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
	ckpt->next_free_nid = cpu_to_le32(last_nid);

	/* 2 cp  + n data seg summary + orphan inode blocks */
1104
	data_sum_blocks = npages_for_summary_flush(sbi, false);
1105
	spin_lock(&sbi->cp_lock);
C
Chao Yu 已提交
1106
	if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
1107
		__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
J
Jaegeuk Kim 已提交
1108
	else
1109 1110
		__clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
	spin_unlock(&sbi->cp_lock);
J
Jaegeuk Kim 已提交
1111

1112
	orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
C
Changman Lee 已提交
1113 1114
	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
			orphan_blocks);
J
Jaegeuk Kim 已提交
1115

1116
	if (__remain_node_summaries(cpc->reason))
C
Chao Yu 已提交
1117
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
C
Changman Lee 已提交
1118 1119
				cp_payload_blks + data_sum_blocks +
				orphan_blocks + NR_CURSEG_NODE_TYPE);
1120
	else
C
Chao Yu 已提交
1121
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
C
Changman Lee 已提交
1122 1123
				cp_payload_blks + data_sum_blocks +
				orphan_blocks);
1124

1125 1126
	/* update ckpt flag for checkpoint */
	update_ckpt_flags(sbi, cpc);
1127

J
Jaegeuk Kim 已提交
1128 1129 1130 1131
	/* update SIT/NAT bitmap */
	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));

K
Keith Mok 已提交
1132
	crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
J
Jaegeuk Kim 已提交
1133 1134
	*((__le32 *)((unsigned char *)ckpt +
				le32_to_cpu(ckpt->checksum_offset)))
J
Jaegeuk Kim 已提交
1135 1136
				= cpu_to_le32(crc32);

1137
	start_blk = __start_cp_next_addr(sbi);
J
Jaegeuk Kim 已提交
1138

1139 1140 1141
	/* need to wait for end_io results */
	wait_on_all_pages_writeback(sbi);
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1142
		return -EIO;
1143

J
Jaegeuk Kim 已提交
1144
	/* write out checkpoint buffer at block 0 */
C
Chao Yu 已提交
1145 1146 1147 1148 1149
	update_meta_page(sbi, ckpt, start_blk++);

	for (i = 1; i < 1 + cp_payload_blks; i++)
		update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
							start_blk++);
C
Changman Lee 已提交
1150

1151
	if (orphan_num) {
J
Jaegeuk Kim 已提交
1152 1153 1154 1155 1156 1157
		write_orphan_inodes(sbi, start_blk);
		start_blk += orphan_blocks;
	}

	write_data_summaries(sbi, start_blk);
	start_blk += data_sum_blocks;
1158 1159 1160 1161 1162 1163

	/* Record write statistics in the hot node summary */
	kbytes_written = sbi->kbytes_written;
	if (sb->s_bdev->bd_part)
		kbytes_written += BD_PART_WRITTEN(sbi);

1164
	seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
1165

1166
	if (__remain_node_summaries(cpc->reason)) {
J
Jaegeuk Kim 已提交
1167 1168 1169 1170 1171
		write_node_summaries(sbi, start_blk);
		start_blk += NR_CURSEG_NODE_TYPE;
	}

	/* writeout checkpoint block */
C
Chao Yu 已提交
1172
	update_meta_page(sbi, ckpt, start_blk);
J
Jaegeuk Kim 已提交
1173 1174

	/* wait for previous submitted node/meta pages writeback */
1175
	wait_on_all_pages_writeback(sbi);
J
Jaegeuk Kim 已提交
1176

1177
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1178
		return -EIO;
1179

1180 1181
	filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
	filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
J
Jaegeuk Kim 已提交
1182 1183 1184

	/* update user_block_counts */
	sbi->last_valid_block_count = sbi->total_valid_block_count;
1185
	percpu_counter_set(&sbi->alloc_valid_block_count, 0);
J
Jaegeuk Kim 已提交
1186 1187

	/* Here, we only have one bio having CP pack */
1188
	sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
J
Jaegeuk Kim 已提交
1189

1190 1191 1192
	/* wait for previous submitted meta pages writeback */
	wait_on_all_pages_writeback(sbi);

1193
	release_ino_entry(sbi, false);
1194 1195

	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1196
		return -EIO;
1197

1198
	clear_sbi_flag(sbi, SBI_IS_DIRTY);
1199
	clear_sbi_flag(sbi, SBI_NEED_CP);
1200
	__set_cp_next_pack(sbi);
C
Chao Yu 已提交
1201

1202 1203 1204 1205 1206 1207 1208 1209 1210 1211
	/*
	 * redirty superblock if metadata like node page or inode cache is
	 * updated during writing checkpoint.
	 */
	if (get_pages(sbi, F2FS_DIRTY_NODES) ||
			get_pages(sbi, F2FS_DIRTY_IMETA))
		set_sbi_flag(sbi, SBI_IS_DIRTY);

	f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));

C
Chao Yu 已提交
1212
	return 0;
J
Jaegeuk Kim 已提交
1213 1214
}

J
Jaegeuk Kim 已提交
1215
/*
A
arter97 已提交
1216
 * We guarantee that this checkpoint procedure will not fail.
J
Jaegeuk Kim 已提交
1217
 */
C
Chao Yu 已提交
1218
int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1219 1220 1221
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	unsigned long long ckpt_ver;
C
Chao Yu 已提交
1222
	int err = 0;
J
Jaegeuk Kim 已提交
1223

1224
	mutex_lock(&sbi->cp_mutex);
J
Jaegeuk Kim 已提交
1225

1226
	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1227 1228
		(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
		(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
J
Jaegeuk Kim 已提交
1229
		goto out;
C
Chao Yu 已提交
1230 1231
	if (unlikely(f2fs_cp_error(sbi))) {
		err = -EIO;
1232
		goto out;
C
Chao Yu 已提交
1233 1234 1235
	}
	if (f2fs_readonly(sbi->sb)) {
		err = -EROFS;
1236
		goto out;
C
Chao Yu 已提交
1237
	}
W
Wanpeng Li 已提交
1238 1239 1240

	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");

C
Chao Yu 已提交
1241 1242
	err = block_operations(sbi);
	if (err)
1243
		goto out;
J
Jaegeuk Kim 已提交
1244

1245
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
1246

1247
	f2fs_flush_merged_bios(sbi);
J
Jaegeuk Kim 已提交
1248

1249
	/* this is the case of multiple fstrims without any changes */
1250
	if (cpc->reason == CP_DISCARD) {
1251 1252 1253 1254 1255
		if (!exist_trim_candidates(sbi, cpc)) {
			unblock_operations(sbi);
			goto out;
		}

1256 1257 1258 1259 1260 1261 1262 1263
		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
				SIT_I(sbi)->dirty_sentries == 0 &&
				prefree_segments(sbi) == 0) {
			flush_sit_entries(sbi, cpc);
			clear_prefree_segments(sbi, cpc);
			unblock_operations(sbi);
			goto out;
		}
1264 1265
	}

J
Jaegeuk Kim 已提交
1266 1267 1268 1269 1270
	/*
	 * update checkpoint pack index
	 * Increase the version number so that
	 * SIT entries and seg summaries are written at correct place
	 */
1271
	ckpt_ver = cur_cp_version(ckpt);
J
Jaegeuk Kim 已提交
1272 1273 1274 1275
	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);

	/* write cached NAT/SIT entries to NAT/SIT area */
	flush_nat_entries(sbi);
1276
	flush_sit_entries(sbi, cpc);
J
Jaegeuk Kim 已提交
1277 1278

	/* unlock all the fs_lock[] in do_checkpoint() */
C
Chao Yu 已提交
1279
	err = do_checkpoint(sbi, cpc);
1280
	if (err)
1281
		release_discard_addrs(sbi);
1282
	else
1283
		clear_prefree_segments(sbi, cpc);
C
Chao Yu 已提交
1284

J
Jaegeuk Kim 已提交
1285
	unblock_operations(sbi);
1286
	stat_inc_cp_count(sbi->stat_info);
1287 1288 1289 1290

	if (cpc->reason == CP_RECOVERY)
		f2fs_msg(sbi->sb, KERN_NOTICE,
			"checkpoint: version = %llx", ckpt_ver);
1291 1292

	/* do checkpoint periodically */
1293
	f2fs_update_time(sbi, CP_TIME);
1294
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
J
Jaegeuk Kim 已提交
1295 1296
out:
	mutex_unlock(&sbi->cp_mutex);
C
Chao Yu 已提交
1297
	return err;
J
Jaegeuk Kim 已提交
1298 1299
}

J
Jaegeuk Kim 已提交
1300
void init_ino_entry_info(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
1301
{
J
Jaegeuk Kim 已提交
1302 1303 1304
	int i;

	for (i = 0; i < MAX_INO_ENTRY; i++) {
1305 1306 1307 1308 1309 1310
		struct inode_management *im = &sbi->im[i];

		INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
		spin_lock_init(&im->ino_lock);
		INIT_LIST_HEAD(&im->ino_list);
		im->ino_num = 0;
J
Jaegeuk Kim 已提交
1311 1312
	}

C
Chao Yu 已提交
1313
	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1314 1315
			NR_CURSEG_TYPE - __cp_payload(sbi)) *
				F2FS_ORPHANS_PER_BLOCK;
J
Jaegeuk Kim 已提交
1316 1317
}

1318
int __init create_checkpoint_caches(void)
J
Jaegeuk Kim 已提交
1319
{
J
Jaegeuk Kim 已提交
1320 1321 1322
	ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
			sizeof(struct ino_entry));
	if (!ino_entry_slab)
J
Jaegeuk Kim 已提交
1323
		return -ENOMEM;
1324 1325
	inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
			sizeof(struct inode_entry));
1326
	if (!inode_entry_slab) {
J
Jaegeuk Kim 已提交
1327
		kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1328 1329 1330 1331 1332 1333 1334
		return -ENOMEM;
	}
	return 0;
}

void destroy_checkpoint_caches(void)
{
J
Jaegeuk Kim 已提交
1335
	kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1336 1337
	kmem_cache_destroy(inode_entry_slab);
}