checkpoint.c 32.2 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
J
Jaegeuk Kim 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 * fs/f2fs/checkpoint.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/f2fs_fs.h>
#include <linux/pagevec.h>
#include <linux/swap.h>

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
23
#include "trace.h"
24
#include <trace/events/f2fs.h>
J
Jaegeuk Kim 已提交
25

J
Jaegeuk Kim 已提交
26
static struct kmem_cache *ino_entry_slab;
27
struct kmem_cache *inode_entry_slab;
J
Jaegeuk Kim 已提交
28

29 30
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
31
	set_ckpt_flags(sbi, CP_ERROR_FLAG);
32 33 34 35 36
	sbi->sb->s_flags |= MS_RDONLY;
	if (!end_io)
		f2fs_flush_merged_bios(sbi);
}

J
Jaegeuk Kim 已提交
37
/*
J
Jaegeuk Kim 已提交
38 39 40 41
 * We guarantee no failure on the returned page.
 */
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
G
Gu Zheng 已提交
42
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
43 44
	struct page *page = NULL;
repeat:
45
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
46 47 48 49
	if (!page) {
		cond_resched();
		goto repeat;
	}
50
	f2fs_wait_on_page_writeback(page, META, true);
51 52
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
53 54 55
	return page;
}

J
Jaegeuk Kim 已提交
56
/*
J
Jaegeuk Kim 已提交
57 58
 * We guarantee no failure on the returned page.
 */
59 60
static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
							bool is_meta)
J
Jaegeuk Kim 已提交
61
{
G
Gu Zheng 已提交
62
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
63
	struct page *page;
64
	struct f2fs_io_info fio = {
65
		.sbi = sbi,
66
		.type = META,
M
Mike Christie 已提交
67 68
		.op = REQ_OP_READ,
		.op_flags = READ_SYNC | REQ_META | REQ_PRIO,
69 70
		.old_blkaddr = index,
		.new_blkaddr = index,
71
		.encrypted_page = NULL,
72
	};
73 74

	if (unlikely(!is_meta))
M
Mike Christie 已提交
75
		fio.op_flags &= ~REQ_META;
J
Jaegeuk Kim 已提交
76
repeat:
77
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
78 79 80 81
	if (!page) {
		cond_resched();
		goto repeat;
	}
82 83 84
	if (PageUptodate(page))
		goto out;

85 86
	fio.page = page;

87 88
	if (f2fs_submit_page_bio(&fio)) {
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
89
		goto repeat;
90
	}
J
Jaegeuk Kim 已提交
91

92
	lock_page(page);
93
	if (unlikely(page->mapping != mapping)) {
94 95 96
		f2fs_put_page(page, 1);
		goto repeat;
	}
97 98 99 100 101 102 103

	/*
	 * if there is any IO error when accessing device, make our filesystem
	 * readonly and make sure do not write checkpoint with non-uptodate
	 * meta page.
	 */
	if (unlikely(!PageUptodate(page)))
104
		f2fs_stop_checkpoint(sbi, false);
105
out:
J
Jaegeuk Kim 已提交
106 107 108
	return page;
}

109 110 111 112 113 114 115 116 117 118 119
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, true);
}

/* for POR only */
struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, false);
}

120
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
121 122 123
{
	switch (type) {
	case META_NAT:
124
		break;
125
	case META_SIT:
126 127 128
		if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
			return false;
		break;
129
	case META_SSA:
130 131 132 133
		if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
			blkaddr < SM_I(sbi)->ssa_blkaddr))
			return false;
		break;
134
	case META_CP:
135 136 137 138
		if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
			blkaddr < __start_cp_addr(sbi)))
			return false;
		break;
139
	case META_POR:
140 141 142 143
		if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
			blkaddr < MAIN_BLKADDR(sbi)))
			return false;
		break;
144 145 146
	default:
		BUG();
	}
147 148

	return true;
149 150 151
}

/*
152
 * Readahead CP/NAT/SIT/SSA pages
153
 */
154 155
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
							int type, bool sync)
156 157
{
	struct page *page;
158
	block_t blkno = start;
159
	struct f2fs_io_info fio = {
160
		.sbi = sbi,
161
		.type = META,
M
Mike Christie 已提交
162
		.op = REQ_OP_READ,
163
		.op_flags = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : REQ_RAHEAD,
164
		.encrypted_page = NULL,
165
	};
C
Chao Yu 已提交
166
	struct blk_plug plug;
167

168
	if (unlikely(type == META_POR))
M
Mike Christie 已提交
169
		fio.op_flags &= ~REQ_META;
170

C
Chao Yu 已提交
171
	blk_start_plug(&plug);
172 173
	for (; nrpages-- > 0; blkno++) {

174 175 176
		if (!is_valid_blkaddr(sbi, blkno, type))
			goto out;

177 178
		switch (type) {
		case META_NAT:
179 180
			if (unlikely(blkno >=
					NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
181
				blkno = 0;
182
			/* get nat block addr */
183
			fio.new_blkaddr = current_nat_addr(sbi,
184 185 186 187
					blkno * NAT_ENTRY_PER_BLOCK);
			break;
		case META_SIT:
			/* get sit block addr */
188
			fio.new_blkaddr = current_sit_addr(sbi,
189 190
					blkno * SIT_ENTRY_PER_BLOCK);
			break;
191
		case META_SSA:
192
		case META_CP:
193
		case META_POR:
194
			fio.new_blkaddr = blkno;
195 196 197 198 199
			break;
		default:
			BUG();
		}

200 201
		page = f2fs_grab_cache_page(META_MAPPING(sbi),
						fio.new_blkaddr, false);
202 203 204 205 206 207 208
		if (!page)
			continue;
		if (PageUptodate(page)) {
			f2fs_put_page(page, 1);
			continue;
		}

209
		fio.page = page;
210
		fio.old_blkaddr = fio.new_blkaddr;
211
		f2fs_submit_page_mbio(&fio);
212 213 214 215
		f2fs_put_page(page, 0);
	}
out:
	f2fs_submit_merged_bio(sbi, META, READ);
C
Chao Yu 已提交
216
	blk_finish_plug(&plug);
217 218 219
	return blkno - start;
}

220 221 222 223 224 225
void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
{
	struct page *page;
	bool readahead = false;

	page = find_get_page(META_MAPPING(sbi), index);
226
	if (!page || !PageUptodate(page))
227 228 229 230
		readahead = true;
	f2fs_put_page(page, 0);

	if (readahead)
231
		ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
232 233
}

J
Jaegeuk Kim 已提交
234 235 236
static int f2fs_write_meta_page(struct page *page,
				struct writeback_control *wbc)
{
237
	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
J
Jaegeuk Kim 已提交
238

239 240
	trace_f2fs_writepage(page, META);

241
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
242
		goto redirty_out;
243
	if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
244
		goto redirty_out;
245
	if (unlikely(f2fs_cp_error(sbi)))
246
		goto redirty_out;
J
Jaegeuk Kim 已提交
247

248 249
	write_meta_page(sbi, page);
	dec_page_count(sbi, F2FS_DIRTY_META);
250 251 252 253

	if (wbc->for_reclaim)
		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);

254
	unlock_page(page);
255

256
	if (unlikely(f2fs_cp_error(sbi)))
257
		f2fs_submit_merged_bio(sbi, META, WRITE);
258

259
	return 0;
260 261

redirty_out:
262
	redirty_page_for_writepage(wbc, page);
263
	return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
264 265 266 267 268
}

static int f2fs_write_meta_pages(struct address_space *mapping,
				struct writeback_control *wbc)
{
269
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
270
	long diff, written;
J
Jaegeuk Kim 已提交
271

272
	/* collect a number of dirty meta pages and write together */
273 274
	if (wbc->for_kupdate ||
		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
275
		goto skip_write;
J
Jaegeuk Kim 已提交
276

Y
Yunlei He 已提交
277 278
	trace_f2fs_writepages(mapping->host, wbc, META);

J
Jaegeuk Kim 已提交
279 280
	/* if mounting is failed, skip writing node pages */
	mutex_lock(&sbi->cp_mutex);
281 282
	diff = nr_pages_to_write(sbi, META, wbc);
	written = sync_meta_pages(sbi, META, wbc->nr_to_write);
J
Jaegeuk Kim 已提交
283
	mutex_unlock(&sbi->cp_mutex);
284
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
J
Jaegeuk Kim 已提交
285
	return 0;
286 287 288

skip_write:
	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
Y
Yunlei He 已提交
289
	trace_f2fs_writepages(mapping->host, wbc, META);
290
	return 0;
J
Jaegeuk Kim 已提交
291 292 293 294 295
}

long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
						long nr_to_write)
{
G
Gu Zheng 已提交
296
	struct address_space *mapping = META_MAPPING(sbi);
297
	pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
J
Jaegeuk Kim 已提交
298 299 300 301 302
	struct pagevec pvec;
	long nwritten = 0;
	struct writeback_control wbc = {
		.for_reclaim = 0,
	};
C
Chao Yu 已提交
303
	struct blk_plug plug;
J
Jaegeuk Kim 已提交
304 305 306

	pagevec_init(&pvec, 0);

C
Chao Yu 已提交
307 308
	blk_start_plug(&plug);

J
Jaegeuk Kim 已提交
309 310 311 312 313
	while (index <= end) {
		int i, nr_pages;
		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
				PAGECACHE_TAG_DIRTY,
				min(end - index, (pgoff_t)PAGEVEC_SIZE-1) + 1);
314
		if (unlikely(nr_pages == 0))
J
Jaegeuk Kim 已提交
315 316 317 318
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
319

320
			if (prev == ULONG_MAX)
321 322 323 324 325 326
				prev = page->index - 1;
			if (nr_to_write != LONG_MAX && page->index != prev + 1) {
				pagevec_release(&pvec);
				goto stop;
			}

J
Jaegeuk Kim 已提交
327
			lock_page(page);
328 329 330 331 332 333 334 335 336 337 338

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}
			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

339 340 341
			f2fs_wait_on_page_writeback(page, META, true);

			BUG_ON(PageWriteback(page));
342 343 344
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

345
			if (mapping->a_ops->writepage(page, &wbc)) {
346 347 348
				unlock_page(page);
				break;
			}
349
			nwritten++;
350
			prev = page->index;
351
			if (unlikely(nwritten >= nr_to_write))
J
Jaegeuk Kim 已提交
352 353 354 355 356
				break;
		}
		pagevec_release(&pvec);
		cond_resched();
	}
357
stop:
J
Jaegeuk Kim 已提交
358
	if (nwritten)
J
Jaegeuk Kim 已提交
359
		f2fs_submit_merged_bio(sbi, type, WRITE);
J
Jaegeuk Kim 已提交
360

C
Chao Yu 已提交
361 362
	blk_finish_plug(&plug);

J
Jaegeuk Kim 已提交
363 364 365 366 367
	return nwritten;
}

static int f2fs_set_meta_page_dirty(struct page *page)
{
368 369
	trace_f2fs_set_page_dirty(page, META);

370 371
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
372
	if (!PageDirty(page)) {
373
		f2fs_set_page_dirty_nobuffers(page);
374
		inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
375
		SetPagePrivate(page);
J
Jaegeuk Kim 已提交
376
		f2fs_trace_pid(page);
J
Jaegeuk Kim 已提交
377 378 379 380 381 382 383 384 385
		return 1;
	}
	return 0;
}

const struct address_space_operations f2fs_meta_aops = {
	.writepage	= f2fs_write_meta_page,
	.writepages	= f2fs_write_meta_pages,
	.set_page_dirty	= f2fs_set_meta_page_dirty,
386 387
	.invalidatepage = f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
388 389 390
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
J
Jaegeuk Kim 已提交
391 392
};

J
Jaegeuk Kim 已提交
393
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
394
{
395
	struct inode_management *im = &sbi->im[type];
396 397 398
	struct ino_entry *e, *tmp;

	tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
399
retry:
400
	radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
401

402 403
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
404
	if (!e) {
405
		e = tmp;
406 407
		if (radix_tree_insert(&im->ino_root, ino, e)) {
			spin_unlock(&im->ino_lock);
408
			radix_tree_preload_end();
409 410 411 412
			goto retry;
		}
		memset(e, 0, sizeof(struct ino_entry));
		e->ino = ino;
413

414
		list_add_tail(&e->list, &im->ino_list);
415
		if (type != ORPHAN_INO)
416
			im->ino_num++;
417
	}
418
	spin_unlock(&im->ino_lock);
419
	radix_tree_preload_end();
420 421 422

	if (e != tmp)
		kmem_cache_free(ino_entry_slab, tmp);
423 424
}

J
Jaegeuk Kim 已提交
425
static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
426
{
427
	struct inode_management *im = &sbi->im[type];
J
Jaegeuk Kim 已提交
428
	struct ino_entry *e;
429

430 431
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
432 433
	if (e) {
		list_del(&e->list);
434 435 436
		radix_tree_delete(&im->ino_root, ino);
		im->ino_num--;
		spin_unlock(&im->ino_lock);
437 438
		kmem_cache_free(ino_entry_slab, e);
		return;
439
	}
440
	spin_unlock(&im->ino_lock);
441 442
}

443
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
444 445 446 447 448
{
	/* add new dirty ino entry into list */
	__add_ino_entry(sbi, ino, type);
}

449
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
450 451 452 453 454 455 456 457
{
	/* remove dirty ino entry from list */
	__remove_ino_entry(sbi, ino, type);
}

/* mode should be APPEND_INO or UPDATE_INO */
bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
458
	struct inode_management *im = &sbi->im[mode];
459
	struct ino_entry *e;
460 461 462 463

	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
	spin_unlock(&im->ino_lock);
464 465 466
	return e ? true : false;
}

467
void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
468 469 470 471
{
	struct ino_entry *e, *tmp;
	int i;

472
	for (i = all ? ORPHAN_INO: APPEND_INO; i <= UPDATE_INO; i++) {
473 474 475 476
		struct inode_management *im = &sbi->im[i];

		spin_lock(&im->ino_lock);
		list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
477
			list_del(&e->list);
478
			radix_tree_delete(&im->ino_root, e->ino);
479
			kmem_cache_free(ino_entry_slab, e);
480
			im->ino_num--;
481
		}
482
		spin_unlock(&im->ino_lock);
483 484 485
	}
}

J
Jaegeuk Kim 已提交
486
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
487
{
488
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
489 490
	int err = 0;

491
	spin_lock(&im->ino_lock);
J
Jaegeuk Kim 已提交
492 493

#ifdef CONFIG_F2FS_FAULT_INJECTION
494
	if (time_to_inject(sbi, FAULT_ORPHAN)) {
J
Jaegeuk Kim 已提交
495 496 497 498
		spin_unlock(&im->ino_lock);
		return -ENOSPC;
	}
#endif
499
	if (unlikely(im->ino_num >= sbi->max_orphans))
J
Jaegeuk Kim 已提交
500
		err = -ENOSPC;
J
Jaegeuk Kim 已提交
501
	else
502 503
		im->ino_num++;
	spin_unlock(&im->ino_lock);
504

J
Jaegeuk Kim 已提交
505 506 507
	return err;
}

J
Jaegeuk Kim 已提交
508 509
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
510 511 512 513 514 515
	struct inode_management *im = &sbi->im[ORPHAN_INO];

	spin_lock(&im->ino_lock);
	f2fs_bug_on(sbi, im->ino_num == 0);
	im->ino_num--;
	spin_unlock(&im->ino_lock);
J
Jaegeuk Kim 已提交
516 517
}

518
void add_orphan_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
519
{
520
	/* add new orphan ino entry into list */
521 522
	__add_ino_entry(F2FS_I_SB(inode), inode->i_ino, ORPHAN_INO);
	update_inode_page(inode);
J
Jaegeuk Kim 已提交
523 524 525 526
}

void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
527
	/* remove orphan entry from orphan list */
J
Jaegeuk Kim 已提交
528
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
J
Jaegeuk Kim 已提交
529 530
}

531
static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
J
Jaegeuk Kim 已提交
532
{
533
	struct inode *inode;
534
	struct node_info ni;
535 536 537 538 539 540 541 542 543 544 545
	int err = acquire_orphan_inode(sbi);

	if (err) {
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		f2fs_msg(sbi->sb, KERN_WARNING,
				"%s: orphan failed (ino=%x), run fsck to fix.",
				__func__, ino);
		return err;
	}

	__add_ino_entry(sbi, ino, ORPHAN_INO);
546

547
	inode = f2fs_iget_retry(sbi->sb, ino);
548 549 550 551 552 553 554 555 556
	if (IS_ERR(inode)) {
		/*
		 * there should be a bug that we can't find the entry
		 * to orphan inode.
		 */
		f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
		return PTR_ERR(inode);
	}

J
Jaegeuk Kim 已提交
557 558 559 560
	clear_nlink(inode);

	/* truncate all the data during iput */
	iput(inode);
561 562 563 564 565

	get_node_info(sbi, ino, &ni);

	/* ENOMEM was fully retried in f2fs_evict_inode. */
	if (ni.blk_addr != NULL_ADDR) {
566 567 568 569 570
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		f2fs_msg(sbi->sb, KERN_WARNING,
			"%s: orphan failed (ino=%x), run fsck to fix.",
				__func__, ino);
		return -EIO;
571
	}
572
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
573
	return 0;
J
Jaegeuk Kim 已提交
574 575
}

576
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
577
{
578
	block_t start_blk, orphan_blocks, i, j;
579
	int err;
J
Jaegeuk Kim 已提交
580

581
	if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
582
		return 0;
J
Jaegeuk Kim 已提交
583

W
Wanpeng Li 已提交
584
	start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
585
	orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
J
Jaegeuk Kim 已提交
586

587
	ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
588

589
	for (i = 0; i < orphan_blocks; i++) {
J
Jaegeuk Kim 已提交
590 591 592 593 594 595
		struct page *page = get_meta_page(sbi, start_blk + i);
		struct f2fs_orphan_block *orphan_blk;

		orphan_blk = (struct f2fs_orphan_block *)page_address(page);
		for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
			nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
596 597 598 599 600
			err = recover_orphan_inode(sbi, ino);
			if (err) {
				f2fs_put_page(page, 1);
				return err;
			}
J
Jaegeuk Kim 已提交
601 602 603 604
		}
		f2fs_put_page(page, 1);
	}
	/* clear Orphan Flag */
605
	clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
606
	return 0;
J
Jaegeuk Kim 已提交
607 608 609 610
}

static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
{
611
	struct list_head *head;
J
Jaegeuk Kim 已提交
612 613
	struct f2fs_orphan_block *orphan_blk = NULL;
	unsigned int nentries = 0;
C
Chao Yu 已提交
614
	unsigned short index = 1;
615
	unsigned short orphan_blocks;
616
	struct page *page = NULL;
J
Jaegeuk Kim 已提交
617
	struct ino_entry *orphan = NULL;
618
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
619

620
	orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
621

622 623 624 625 626
	/*
	 * we don't need to do spin_lock(&im->ino_lock) here, since all the
	 * orphan inode operations are covered under f2fs_lock_op().
	 * And, spin_lock should be avoided due to page operations below.
	 */
627
	head = &im->ino_list;
J
Jaegeuk Kim 已提交
628 629

	/* loop for each orphan inode entry and write them in Jornal block */
630 631
	list_for_each_entry(orphan, head, list) {
		if (!page) {
C
Chao Yu 已提交
632
			page = grab_meta_page(sbi, start_blk++);
633 634 635 636
			orphan_blk =
				(struct f2fs_orphan_block *)page_address(page);
			memset(orphan_blk, 0, sizeof(*orphan_blk));
		}
J
Jaegeuk Kim 已提交
637

638
		orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
J
Jaegeuk Kim 已提交
639

640
		if (nentries == F2FS_ORPHANS_PER_BLOCK) {
J
Jaegeuk Kim 已提交
641 642 643 644 645 646 647 648 649 650 651 652 653 654
			/*
			 * an orphan block is full of 1020 entries,
			 * then we need to flush current orphan blocks
			 * and bring another one in memory
			 */
			orphan_blk->blk_addr = cpu_to_le16(index);
			orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
			orphan_blk->entry_count = cpu_to_le32(nentries);
			set_page_dirty(page);
			f2fs_put_page(page, 1);
			index++;
			nentries = 0;
			page = NULL;
		}
655
	}
J
Jaegeuk Kim 已提交
656

657 658 659 660 661 662
	if (page) {
		orphan_blk->blk_addr = cpu_to_le16(index);
		orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
		orphan_blk->entry_count = cpu_to_le32(nentries);
		set_page_dirty(page);
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
663 664 665
	}
}

666 667 668
static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
		struct f2fs_checkpoint **cp_block, struct page **cp_page,
		unsigned long long *version)
J
Jaegeuk Kim 已提交
669 670
{
	unsigned long blk_size = sbi->blocksize;
671
	size_t crc_offset = 0;
J
Jaegeuk Kim 已提交
672
	__u32 crc = 0;
J
Jaegeuk Kim 已提交
673

674 675
	*cp_page = get_meta_page(sbi, cp_addr);
	*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
J
Jaegeuk Kim 已提交
676

677 678 679 680 681 682
	crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
	if (crc_offset >= blk_size) {
		f2fs_msg(sbi->sb, KERN_WARNING,
			"invalid crc_offset: %zu", crc_offset);
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
683

684 685 686 687 688 689
	crc = le32_to_cpu(*((__le32 *)((unsigned char *)*cp_block
							+ crc_offset)));
	if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
		f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
690

691 692 693
	*version = cur_cp_version(*cp_block);
	return 0;
}
J
Jaegeuk Kim 已提交
694

695 696 697 698 699 700 701
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
				block_t cp_addr, unsigned long long *version)
{
	struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
	struct f2fs_checkpoint *cp_block = NULL;
	unsigned long long cur_version = 0, pre_version = 0;
	int err;
J
Jaegeuk Kim 已提交
702

703 704 705 706 707
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_1, version);
	if (err)
		goto invalid_cp1;
	pre_version = *version;
J
Jaegeuk Kim 已提交
708

709 710 711 712
	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_2, version);
	if (err)
J
Jaegeuk Kim 已提交
713
		goto invalid_cp2;
714
	cur_version = *version;
J
Jaegeuk Kim 已提交
715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735

	if (cur_version == pre_version) {
		*version = cur_version;
		f2fs_put_page(cp_page_2, 1);
		return cp_page_1;
	}
invalid_cp2:
	f2fs_put_page(cp_page_2, 1);
invalid_cp1:
	f2fs_put_page(cp_page_1, 1);
	return NULL;
}

int get_valid_checkpoint(struct f2fs_sb_info *sbi)
{
	struct f2fs_checkpoint *cp_block;
	struct f2fs_super_block *fsb = sbi->raw_super;
	struct page *cp1, *cp2, *cur_page;
	unsigned long blk_size = sbi->blocksize;
	unsigned long long cp1_version = 0, cp2_version = 0;
	unsigned long long cp_start_blk_no;
W
Wanpeng Li 已提交
736
	unsigned int cp_blks = 1 + __cp_payload(sbi);
C
Changman Lee 已提交
737 738
	block_t cp_blk_no;
	int i;
J
Jaegeuk Kim 已提交
739

C
Changman Lee 已提交
740
	sbi->ckpt = kzalloc(cp_blks * blk_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
741 742 743 744 745 746 747 748 749 750
	if (!sbi->ckpt)
		return -ENOMEM;
	/*
	 * Finding out valid cp block involves read both
	 * sets( cp pack1 and cp pack 2)
	 */
	cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);

	/* The second checkpoint pack should start at the next segment */
751 752
	cp_start_blk_no += ((unsigned long long)1) <<
				le32_to_cpu(fsb->log_blocks_per_seg);
J
Jaegeuk Kim 已提交
753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
	cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);

	if (cp1 && cp2) {
		if (ver_after(cp2_version, cp1_version))
			cur_page = cp2;
		else
			cur_page = cp1;
	} else if (cp1) {
		cur_page = cp1;
	} else if (cp2) {
		cur_page = cp2;
	} else {
		goto fail_no_cp;
	}

	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
	memcpy(sbi->ckpt, cp_block, blk_size);

771 772 773 774
	/* Sanity checking of checkpoint */
	if (sanity_check_ckpt(sbi))
		goto fail_no_cp;

775 776 777 778 779
	if (cur_page == cp1)
		sbi->cur_cp_pack = 1;
	else
		sbi->cur_cp_pack = 2;

C
Changman Lee 已提交
780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795 796
	if (cp_blks <= 1)
		goto done;

	cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	if (cur_page == cp2)
		cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);

	for (i = 1; i < cp_blks; i++) {
		void *sit_bitmap_ptr;
		unsigned char *ckpt = (unsigned char *)sbi->ckpt;

		cur_page = get_meta_page(sbi, cp_blk_no + i);
		sit_bitmap_ptr = page_address(cur_page);
		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
		f2fs_put_page(cur_page, 1);
	}
done:
J
Jaegeuk Kim 已提交
797 798 799 800 801 802 803 804 805
	f2fs_put_page(cp1, 1);
	f2fs_put_page(cp2, 1);
	return 0;

fail_no_cp:
	kfree(sbi->ckpt);
	return -EINVAL;
}

806
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
J
Jaegeuk Kim 已提交
807
{
808
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
809
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
J
Jaegeuk Kim 已提交
810

811
	if (is_inode_flag_set(inode, flag))
812
		return;
813

814 815
	set_inode_flag(inode, flag);
	list_add_tail(&F2FS_I(inode)->dirty_list, &sbi->inode_list[type]);
C
Chao Yu 已提交
816
	stat_inc_dirty_inode(sbi, type);
817 818
}

819
static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
C
Chao Yu 已提交
820
{
821
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
C
Chao Yu 已提交
822

823
	if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag))
C
Chao Yu 已提交
824 825
		return;

826 827
	list_del_init(&F2FS_I(inode)->dirty_list);
	clear_inode_flag(inode, flag);
C
Chao Yu 已提交
828
	stat_dec_dirty_inode(F2FS_I_SB(inode), type);
C
Chao Yu 已提交
829 830
}

831
void update_dirty_page(struct inode *inode, struct page *page)
832
{
833
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
834
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
835

836 837
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
838
		return;
839

840 841
	spin_lock(&sbi->inode_lock[type]);
	if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH))
842
		__add_dirty_inode(inode, type);
843
	inode_inc_dirty_pages(inode);
844 845
	spin_unlock(&sbi->inode_lock[type]);

846
	SetPagePrivate(page);
J
Jaegeuk Kim 已提交
847
	f2fs_trace_pid(page);
848 849
}

850
void remove_dirty_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
851
{
852
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
853
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
J
Jaegeuk Kim 已提交
854

855 856
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
857 858
		return;

859 860 861
	if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH))
		return;

862 863 864
	spin_lock(&sbi->inode_lock[type]);
	__remove_dirty_inode(inode, type);
	spin_unlock(&sbi->inode_lock[type]);
865 866
}

C
Chao Yu 已提交
867
int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
J
Jaegeuk Kim 已提交
868
{
869
	struct list_head *head;
J
Jaegeuk Kim 已提交
870
	struct inode *inode;
871
	struct f2fs_inode_info *fi;
872 873 874 875 876
	bool is_dir = (type == DIR_INODE);

	trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
J
Jaegeuk Kim 已提交
877
retry:
878
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
879
		return -EIO;
880

881
	spin_lock(&sbi->inode_lock[type]);
882

883
	head = &sbi->inode_list[type];
J
Jaegeuk Kim 已提交
884
	if (list_empty(head)) {
885
		spin_unlock(&sbi->inode_lock[type]);
886 887 888
		trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
C
Chao Yu 已提交
889
		return 0;
J
Jaegeuk Kim 已提交
890
	}
891 892
	fi = list_entry(head->next, struct f2fs_inode_info, dirty_list);
	inode = igrab(&fi->vfs_inode);
893
	spin_unlock(&sbi->inode_lock[type]);
J
Jaegeuk Kim 已提交
894
	if (inode) {
895
		filemap_fdatawrite(inode->i_mapping);
J
Jaegeuk Kim 已提交
896 897 898 899 900 901
		iput(inode);
	} else {
		/*
		 * We should submit bio, since it exists several
		 * wribacking dentry pages in the freeing inode.
		 */
J
Jaegeuk Kim 已提交
902
		f2fs_submit_merged_bio(sbi, DATA, WRITE);
903
		cond_resched();
J
Jaegeuk Kim 已提交
904 905 906 907
	}
	goto retry;
}

908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928
int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
{
	struct list_head *head = &sbi->inode_list[DIRTY_META];
	struct inode *inode;
	struct f2fs_inode_info *fi;
	s64 total = get_pages(sbi, F2FS_DIRTY_IMETA);

	while (total--) {
		if (unlikely(f2fs_cp_error(sbi)))
			return -EIO;

		spin_lock(&sbi->inode_lock[DIRTY_META]);
		if (list_empty(head)) {
			spin_unlock(&sbi->inode_lock[DIRTY_META]);
			return 0;
		}
		fi = list_entry(head->next, struct f2fs_inode_info,
							gdirty_list);
		inode = igrab(&fi->vfs_inode);
		spin_unlock(&sbi->inode_lock[DIRTY_META]);
		if (inode) {
929 930 931 932 933
			sync_inode_metadata(inode, 0);

			/* it's on eviction */
			if (is_inode_flag_set(inode, FI_DIRTY_INODE))
				update_inode_page(inode);
934 935 936 937 938 939
			iput(inode);
		}
	};
	return 0;
}

J
Jaegeuk Kim 已提交
940
/*
J
Jaegeuk Kim 已提交
941 942
 * Freeze all the FS-operations for checkpoint.
 */
943
static int block_operations(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
944 945 946 947 948 949
{
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = LONG_MAX,
		.for_reclaim = 0,
	};
950
	struct blk_plug plug;
951
	int err = 0;
952 953 954

	blk_start_plug(&plug);

955
retry_flush_dents:
956
	f2fs_lock_all(sbi);
J
Jaegeuk Kim 已提交
957 958
	/* write all the dirty dentry pages */
	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
959
		f2fs_unlock_all(sbi);
C
Chao Yu 已提交
960 961
		err = sync_dirty_inodes(sbi, DIR_INODE);
		if (err)
962
			goto out;
963
		goto retry_flush_dents;
J
Jaegeuk Kim 已提交
964 965
	}

966 967 968 969 970 971 972 973
	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
		f2fs_unlock_all(sbi);
		err = f2fs_sync_inode_meta(sbi);
		if (err)
			goto out;
		goto retry_flush_dents;
	}

J
Jaegeuk Kim 已提交
974
	/*
A
arter97 已提交
975
	 * POR: we should ensure that there are no dirty node pages
J
Jaegeuk Kim 已提交
976 977
	 * until finishing nat/sit flush.
	 */
978
retry_flush_nodes:
979
	down_write(&sbi->node_write);
J
Jaegeuk Kim 已提交
980 981

	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
982
		up_write(&sbi->node_write);
983
		err = sync_node_pages(sbi, &wbc);
C
Chao Yu 已提交
984
		if (err) {
985 986 987
			f2fs_unlock_all(sbi);
			goto out;
		}
988
		goto retry_flush_nodes;
J
Jaegeuk Kim 已提交
989
	}
990
out:
991
	blk_finish_plug(&plug);
992
	return err;
J
Jaegeuk Kim 已提交
993 994 995 996
}

static void unblock_operations(struct f2fs_sb_info *sbi)
{
997
	up_write(&sbi->node_write);
998

999
	build_free_nids(sbi, false);
1000
	f2fs_unlock_all(sbi);
J
Jaegeuk Kim 已提交
1001 1002
}

1003 1004 1005 1006 1007 1008 1009
static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
	DEFINE_WAIT(wait);

	for (;;) {
		prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);

1010
		if (!get_pages(sbi, F2FS_WB_CP_DATA))
1011 1012
			break;

1013
		io_schedule_timeout(5*HZ);
1014 1015 1016 1017
	}
	finish_wait(&sbi->cp_wait, &wait);
}

1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042 1043 1044 1045 1046 1047 1048
static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);

	spin_lock(&sbi->cp_lock);

	if (cpc->reason == CP_UMOUNT)
		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);

	if (cpc->reason == CP_FASTBOOT)
		__set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);

	if (orphan_num)
		__set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);

	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);

	/* set this flag to activate crc|cp_ver for recovery */
	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);

	spin_unlock(&sbi->cp_lock);
}

C
Chao Yu 已提交
1049
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1050 1051
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1052
	struct f2fs_nm_info *nm_i = NM_I(sbi);
1053
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
1054
	nid_t last_nid = nm_i->next_scan_nid;
J
Jaegeuk Kim 已提交
1055 1056
	block_t start_blk;
	unsigned int data_sum_blocks, orphan_blocks;
J
Jaegeuk Kim 已提交
1057
	__u32 crc32 = 0;
J
Jaegeuk Kim 已提交
1058
	int i;
W
Wanpeng Li 已提交
1059
	int cp_payload_blks = __cp_payload(sbi);
1060 1061 1062
	struct super_block *sb = sbi->sb;
	struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
	u64 kbytes_written;
J
Jaegeuk Kim 已提交
1063 1064

	/* Flush all the NAT/SIT pages */
1065
	while (get_pages(sbi, F2FS_DIRTY_META)) {
J
Jaegeuk Kim 已提交
1066
		sync_meta_pages(sbi, META, LONG_MAX);
1067
		if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1068
			return -EIO;
1069
	}
J
Jaegeuk Kim 已提交
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079

	next_free_nid(sbi, &last_nid);

	/*
	 * modify checkpoint
	 * version number is already updated
	 */
	ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
	ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
C
Chao Yu 已提交
1080
	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
J
Jaegeuk Kim 已提交
1081 1082 1083 1084 1085 1086 1087
		ckpt->cur_node_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
		ckpt->cur_node_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
		ckpt->alloc_type[i + CURSEG_HOT_NODE] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
	}
C
Chao Yu 已提交
1088
	for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
J
Jaegeuk Kim 已提交
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101
		ckpt->cur_data_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
		ckpt->cur_data_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
		ckpt->alloc_type[i + CURSEG_HOT_DATA] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
	}

	ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
	ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
	ckpt->next_free_nid = cpu_to_le32(last_nid);

	/* 2 cp  + n data seg summary + orphan inode blocks */
1102
	data_sum_blocks = npages_for_summary_flush(sbi, false);
1103
	spin_lock(&sbi->cp_lock);
C
Chao Yu 已提交
1104
	if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
1105
		__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
J
Jaegeuk Kim 已提交
1106
	else
1107 1108
		__clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
	spin_unlock(&sbi->cp_lock);
J
Jaegeuk Kim 已提交
1109

1110
	orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
C
Changman Lee 已提交
1111 1112
	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
			orphan_blocks);
J
Jaegeuk Kim 已提交
1113

1114
	if (__remain_node_summaries(cpc->reason))
C
Chao Yu 已提交
1115
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
C
Changman Lee 已提交
1116 1117
				cp_payload_blks + data_sum_blocks +
				orphan_blocks + NR_CURSEG_NODE_TYPE);
1118
	else
C
Chao Yu 已提交
1119
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
C
Changman Lee 已提交
1120 1121
				cp_payload_blks + data_sum_blocks +
				orphan_blocks);
1122

1123 1124
	/* update ckpt flag for checkpoint */
	update_ckpt_flags(sbi, cpc);
1125

J
Jaegeuk Kim 已提交
1126 1127 1128 1129
	/* update SIT/NAT bitmap */
	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));

K
Keith Mok 已提交
1130
	crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
J
Jaegeuk Kim 已提交
1131 1132
	*((__le32 *)((unsigned char *)ckpt +
				le32_to_cpu(ckpt->checksum_offset)))
J
Jaegeuk Kim 已提交
1133 1134
				= cpu_to_le32(crc32);

1135
	start_blk = __start_cp_next_addr(sbi);
J
Jaegeuk Kim 已提交
1136

1137 1138 1139
	/* need to wait for end_io results */
	wait_on_all_pages_writeback(sbi);
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1140
		return -EIO;
1141

J
Jaegeuk Kim 已提交
1142
	/* write out checkpoint buffer at block 0 */
C
Chao Yu 已提交
1143 1144 1145 1146 1147
	update_meta_page(sbi, ckpt, start_blk++);

	for (i = 1; i < 1 + cp_payload_blks; i++)
		update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
							start_blk++);
C
Changman Lee 已提交
1148

1149
	if (orphan_num) {
J
Jaegeuk Kim 已提交
1150 1151 1152 1153 1154 1155
		write_orphan_inodes(sbi, start_blk);
		start_blk += orphan_blocks;
	}

	write_data_summaries(sbi, start_blk);
	start_blk += data_sum_blocks;
1156 1157 1158 1159 1160 1161

	/* Record write statistics in the hot node summary */
	kbytes_written = sbi->kbytes_written;
	if (sb->s_bdev->bd_part)
		kbytes_written += BD_PART_WRITTEN(sbi);

1162
	seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
1163

1164
	if (__remain_node_summaries(cpc->reason)) {
J
Jaegeuk Kim 已提交
1165 1166 1167 1168 1169
		write_node_summaries(sbi, start_blk);
		start_blk += NR_CURSEG_NODE_TYPE;
	}

	/* writeout checkpoint block */
C
Chao Yu 已提交
1170
	update_meta_page(sbi, ckpt, start_blk);
J
Jaegeuk Kim 已提交
1171 1172

	/* wait for previous submitted node/meta pages writeback */
1173
	wait_on_all_pages_writeback(sbi);
J
Jaegeuk Kim 已提交
1174

1175
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1176
		return -EIO;
1177

1178 1179
	filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
	filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
J
Jaegeuk Kim 已提交
1180 1181 1182

	/* update user_block_counts */
	sbi->last_valid_block_count = sbi->total_valid_block_count;
1183
	percpu_counter_set(&sbi->alloc_valid_block_count, 0);
J
Jaegeuk Kim 已提交
1184 1185

	/* Here, we only have one bio having CP pack */
1186
	sync_meta_pages(sbi, META_FLUSH, LONG_MAX);
J
Jaegeuk Kim 已提交
1187

1188 1189 1190
	/* wait for previous submitted meta pages writeback */
	wait_on_all_pages_writeback(sbi);

1191
	release_ino_entry(sbi, false);
1192 1193

	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1194
		return -EIO;
1195

1196
	clear_sbi_flag(sbi, SBI_IS_DIRTY);
1197
	clear_sbi_flag(sbi, SBI_NEED_CP);
1198
	__set_cp_next_pack(sbi);
C
Chao Yu 已提交
1199

1200 1201 1202 1203 1204 1205 1206 1207 1208 1209
	/*
	 * redirty superblock if metadata like node page or inode cache is
	 * updated during writing checkpoint.
	 */
	if (get_pages(sbi, F2FS_DIRTY_NODES) ||
			get_pages(sbi, F2FS_DIRTY_IMETA))
		set_sbi_flag(sbi, SBI_IS_DIRTY);

	f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));

C
Chao Yu 已提交
1210
	return 0;
J
Jaegeuk Kim 已提交
1211 1212
}

J
Jaegeuk Kim 已提交
1213
/*
A
arter97 已提交
1214
 * We guarantee that this checkpoint procedure will not fail.
J
Jaegeuk Kim 已提交
1215
 */
C
Chao Yu 已提交
1216
int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1217 1218 1219
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	unsigned long long ckpt_ver;
C
Chao Yu 已提交
1220
	int err = 0;
J
Jaegeuk Kim 已提交
1221

1222
	mutex_lock(&sbi->cp_mutex);
J
Jaegeuk Kim 已提交
1223

1224
	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1225 1226
		(cpc->reason == CP_FASTBOOT || cpc->reason == CP_SYNC ||
		(cpc->reason == CP_DISCARD && !sbi->discard_blks)))
J
Jaegeuk Kim 已提交
1227
		goto out;
C
Chao Yu 已提交
1228 1229
	if (unlikely(f2fs_cp_error(sbi))) {
		err = -EIO;
1230
		goto out;
C
Chao Yu 已提交
1231 1232 1233
	}
	if (f2fs_readonly(sbi->sb)) {
		err = -EROFS;
1234
		goto out;
C
Chao Yu 已提交
1235
	}
W
Wanpeng Li 已提交
1236 1237 1238

	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");

C
Chao Yu 已提交
1239 1240
	err = block_operations(sbi);
	if (err)
1241
		goto out;
J
Jaegeuk Kim 已提交
1242

1243
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
1244

1245
	f2fs_flush_merged_bios(sbi);
J
Jaegeuk Kim 已提交
1246

1247 1248 1249 1250 1251 1252 1253
	/* this is the case of multiple fstrims without any changes */
	if (cpc->reason == CP_DISCARD && !is_sbi_flag_set(sbi, SBI_IS_DIRTY)) {
		f2fs_bug_on(sbi, NM_I(sbi)->dirty_nat_cnt);
		f2fs_bug_on(sbi, SIT_I(sbi)->dirty_sentries);
		f2fs_bug_on(sbi, prefree_segments(sbi));
		flush_sit_entries(sbi, cpc);
		clear_prefree_segments(sbi, cpc);
C
Chao Yu 已提交
1254
		f2fs_wait_all_discard_bio(sbi);
1255 1256 1257 1258
		unblock_operations(sbi);
		goto out;
	}

J
Jaegeuk Kim 已提交
1259 1260 1261 1262 1263
	/*
	 * update checkpoint pack index
	 * Increase the version number so that
	 * SIT entries and seg summaries are written at correct place
	 */
1264
	ckpt_ver = cur_cp_version(ckpt);
J
Jaegeuk Kim 已提交
1265 1266 1267 1268
	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);

	/* write cached NAT/SIT entries to NAT/SIT area */
	flush_nat_entries(sbi);
1269
	flush_sit_entries(sbi, cpc);
J
Jaegeuk Kim 已提交
1270 1271

	/* unlock all the fs_lock[] in do_checkpoint() */
C
Chao Yu 已提交
1272
	err = do_checkpoint(sbi, cpc);
1273 1274 1275 1276 1277 1278
	if (err) {
		release_discard_addrs(sbi);
	} else {
		clear_prefree_segments(sbi, cpc);
		f2fs_wait_all_discard_bio(sbi);
	}
C
Chao Yu 已提交
1279

J
Jaegeuk Kim 已提交
1280
	unblock_operations(sbi);
1281
	stat_inc_cp_count(sbi->stat_info);
1282 1283 1284 1285

	if (cpc->reason == CP_RECOVERY)
		f2fs_msg(sbi->sb, KERN_NOTICE,
			"checkpoint: version = %llx", ckpt_ver);
1286 1287

	/* do checkpoint periodically */
1288
	f2fs_update_time(sbi, CP_TIME);
1289
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
J
Jaegeuk Kim 已提交
1290 1291
out:
	mutex_unlock(&sbi->cp_mutex);
C
Chao Yu 已提交
1292
	return err;
J
Jaegeuk Kim 已提交
1293 1294
}

J
Jaegeuk Kim 已提交
1295
void init_ino_entry_info(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
1296
{
J
Jaegeuk Kim 已提交
1297 1298 1299
	int i;

	for (i = 0; i < MAX_INO_ENTRY; i++) {
1300 1301 1302 1303 1304 1305
		struct inode_management *im = &sbi->im[i];

		INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
		spin_lock_init(&im->ino_lock);
		INIT_LIST_HEAD(&im->ino_list);
		im->ino_num = 0;
J
Jaegeuk Kim 已提交
1306 1307
	}

C
Chao Yu 已提交
1308
	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1309 1310
			NR_CURSEG_TYPE - __cp_payload(sbi)) *
				F2FS_ORPHANS_PER_BLOCK;
J
Jaegeuk Kim 已提交
1311 1312
}

1313
int __init create_checkpoint_caches(void)
J
Jaegeuk Kim 已提交
1314
{
J
Jaegeuk Kim 已提交
1315 1316 1317
	ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
			sizeof(struct ino_entry));
	if (!ino_entry_slab)
J
Jaegeuk Kim 已提交
1318
		return -ENOMEM;
1319 1320
	inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
			sizeof(struct inode_entry));
1321
	if (!inode_entry_slab) {
J
Jaegeuk Kim 已提交
1322
		kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1323 1324 1325 1326 1327 1328 1329
		return -ENOMEM;
	}
	return 0;
}

void destroy_checkpoint_caches(void)
{
J
Jaegeuk Kim 已提交
1330
	kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1331 1332
	kmem_cache_destroy(inode_entry_slab);
}