checkpoint.c 36.3 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
J
Jaegeuk Kim 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
 * fs/f2fs/checkpoint.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/bio.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/blkdev.h>
#include <linux/f2fs_fs.h>
#include <linux/pagevec.h>
#include <linux/swap.h>

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
23
#include "trace.h"
24
#include <trace/events/f2fs.h>
J
Jaegeuk Kim 已提交
25

J
Jaegeuk Kim 已提交
26
static struct kmem_cache *ino_entry_slab;
27
struct kmem_cache *inode_entry_slab;
J
Jaegeuk Kim 已提交
28

29 30
void f2fs_stop_checkpoint(struct f2fs_sb_info *sbi, bool end_io)
{
31
	set_ckpt_flags(sbi, CP_ERROR_FLAG);
32
	if (!end_io)
33
		f2fs_flush_merged_writes(sbi);
34 35
}

J
Jaegeuk Kim 已提交
36
/*
J
Jaegeuk Kim 已提交
37 38 39 40
 * We guarantee no failure on the returned page.
 */
struct page *grab_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
G
Gu Zheng 已提交
41
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
42 43
	struct page *page = NULL;
repeat:
44
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
45 46 47 48
	if (!page) {
		cond_resched();
		goto repeat;
	}
49
	f2fs_wait_on_page_writeback(page, META, true);
50 51
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
52 53 54
	return page;
}

J
Jaegeuk Kim 已提交
55
/*
J
Jaegeuk Kim 已提交
56 57
 * We guarantee no failure on the returned page.
 */
58 59
static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
							bool is_meta)
J
Jaegeuk Kim 已提交
60
{
G
Gu Zheng 已提交
61
	struct address_space *mapping = META_MAPPING(sbi);
J
Jaegeuk Kim 已提交
62
	struct page *page;
63
	struct f2fs_io_info fio = {
64
		.sbi = sbi,
65
		.type = META,
M
Mike Christie 已提交
66
		.op = REQ_OP_READ,
67
		.op_flags = REQ_META | REQ_PRIO,
68 69
		.old_blkaddr = index,
		.new_blkaddr = index,
70
		.encrypted_page = NULL,
71
		.is_meta = is_meta,
72
	};
73 74

	if (unlikely(!is_meta))
M
Mike Christie 已提交
75
		fio.op_flags &= ~REQ_META;
J
Jaegeuk Kim 已提交
76
repeat:
77
	page = f2fs_grab_cache_page(mapping, index, false);
J
Jaegeuk Kim 已提交
78 79 80 81
	if (!page) {
		cond_resched();
		goto repeat;
	}
82 83 84
	if (PageUptodate(page))
		goto out;

85 86
	fio.page = page;

87 88
	if (f2fs_submit_page_bio(&fio)) {
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
89
		goto repeat;
90
	}
J
Jaegeuk Kim 已提交
91

92
	lock_page(page);
93
	if (unlikely(page->mapping != mapping)) {
94 95 96
		f2fs_put_page(page, 1);
		goto repeat;
	}
97 98 99 100 101 102

	/*
	 * if there is any IO error when accessing device, make our filesystem
	 * readonly and make sure do not write checkpoint with non-uptodate
	 * meta page.
	 */
103 104
	if (unlikely(!PageUptodate(page))) {
		memset(page_address(page), 0, PAGE_SIZE);
105
		f2fs_stop_checkpoint(sbi, false);
106
	}
107
out:
J
Jaegeuk Kim 已提交
108 109 110
	return page;
}

111 112 113 114 115 116 117 118 119 120 121
struct page *get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, true);
}

/* for POR only */
struct page *get_tmp_page(struct f2fs_sb_info *sbi, pgoff_t index)
{
	return __get_meta_page(sbi, index, false);
}

122
bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
123 124 125
{
	switch (type) {
	case META_NAT:
126
		break;
127
	case META_SIT:
128 129 130
		if (unlikely(blkaddr >= SIT_BLK_CNT(sbi)))
			return false;
		break;
131
	case META_SSA:
132 133 134 135
		if (unlikely(blkaddr >= MAIN_BLKADDR(sbi) ||
			blkaddr < SM_I(sbi)->ssa_blkaddr))
			return false;
		break;
136
	case META_CP:
137 138 139 140
		if (unlikely(blkaddr >= SIT_I(sbi)->sit_base_addr ||
			blkaddr < __start_cp_addr(sbi)))
			return false;
		break;
141
	case META_POR:
142 143 144 145
		if (unlikely(blkaddr >= MAX_BLKADDR(sbi) ||
			blkaddr < MAIN_BLKADDR(sbi)))
			return false;
		break;
146 147 148
	default:
		BUG();
	}
149 150

	return true;
151 152 153
}

/*
154
 * Readahead CP/NAT/SIT/SSA pages
155
 */
156 157
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
							int type, bool sync)
158 159
{
	struct page *page;
160
	block_t blkno = start;
161
	struct f2fs_io_info fio = {
162
		.sbi = sbi,
163
		.type = META,
M
Mike Christie 已提交
164
		.op = REQ_OP_READ,
165
		.op_flags = sync ? (REQ_META | REQ_PRIO) : REQ_RAHEAD,
166
		.encrypted_page = NULL,
167
		.in_list = false,
168
		.is_meta = (type != META_POR),
169
	};
C
Chao Yu 已提交
170
	struct blk_plug plug;
171

172
	if (unlikely(type == META_POR))
M
Mike Christie 已提交
173
		fio.op_flags &= ~REQ_META;
174

C
Chao Yu 已提交
175
	blk_start_plug(&plug);
176 177
	for (; nrpages-- > 0; blkno++) {

178 179 180
		if (!is_valid_blkaddr(sbi, blkno, type))
			goto out;

181 182
		switch (type) {
		case META_NAT:
183 184
			if (unlikely(blkno >=
					NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
185
				blkno = 0;
186
			/* get nat block addr */
187
			fio.new_blkaddr = current_nat_addr(sbi,
188 189 190 191
					blkno * NAT_ENTRY_PER_BLOCK);
			break;
		case META_SIT:
			/* get sit block addr */
192
			fio.new_blkaddr = current_sit_addr(sbi,
193 194
					blkno * SIT_ENTRY_PER_BLOCK);
			break;
195
		case META_SSA:
196
		case META_CP:
197
		case META_POR:
198
			fio.new_blkaddr = blkno;
199 200 201 202 203
			break;
		default:
			BUG();
		}

204 205
		page = f2fs_grab_cache_page(META_MAPPING(sbi),
						fio.new_blkaddr, false);
206 207 208 209 210 211 212
		if (!page)
			continue;
		if (PageUptodate(page)) {
			f2fs_put_page(page, 1);
			continue;
		}

213
		fio.page = page;
214
		f2fs_submit_page_bio(&fio);
215 216 217
		f2fs_put_page(page, 0);
	}
out:
C
Chao Yu 已提交
218
	blk_finish_plug(&plug);
219 220 221
	return blkno - start;
}

222 223 224 225 226 227
void ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
{
	struct page *page;
	bool readahead = false;

	page = find_get_page(META_MAPPING(sbi), index);
228
	if (!page || !PageUptodate(page))
229 230 231 232
		readahead = true;
	f2fs_put_page(page, 0);

	if (readahead)
233
		ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
234 235
}

C
Chao Yu 已提交
236 237 238
static int __f2fs_write_meta_page(struct page *page,
				struct writeback_control *wbc,
				enum iostat_type io_type)
J
Jaegeuk Kim 已提交
239
{
240
	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
J
Jaegeuk Kim 已提交
241

242 243
	trace_f2fs_writepage(page, META);

244 245 246 247 248
	if (unlikely(f2fs_cp_error(sbi))) {
		dec_page_count(sbi, F2FS_DIRTY_META);
		unlock_page(page);
		return 0;
	}
249
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
250
		goto redirty_out;
251
	if (wbc->for_reclaim && page->index < GET_SUM_BLOCK(sbi, 0))
252
		goto redirty_out;
J
Jaegeuk Kim 已提交
253

C
Chao Yu 已提交
254
	write_meta_page(sbi, page, io_type);
255
	dec_page_count(sbi, F2FS_DIRTY_META);
256 257

	if (wbc->for_reclaim)
258 259
		f2fs_submit_merged_write_cond(sbi, page->mapping->host,
						0, page->index, META);
260

261
	unlock_page(page);
262

263
	if (unlikely(f2fs_cp_error(sbi)))
264
		f2fs_submit_merged_write(sbi, META);
265

266
	return 0;
267 268

redirty_out:
269
	redirty_page_for_writepage(wbc, page);
270
	return AOP_WRITEPAGE_ACTIVATE;
J
Jaegeuk Kim 已提交
271 272
}

C
Chao Yu 已提交
273 274 275 276 277 278
static int f2fs_write_meta_page(struct page *page,
				struct writeback_control *wbc)
{
	return __f2fs_write_meta_page(page, wbc, FS_META_IO);
}

J
Jaegeuk Kim 已提交
279 280 281
static int f2fs_write_meta_pages(struct address_space *mapping,
				struct writeback_control *wbc)
{
282
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
283
	long diff, written;
J
Jaegeuk Kim 已提交
284

285 286 287
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

288
	/* collect a number of dirty meta pages and write together */
289 290
	if (wbc->for_kupdate ||
		get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
291
		goto skip_write;
J
Jaegeuk Kim 已提交
292

293 294 295
	/* if locked failed, cp will flush dirty pages instead */
	if (!mutex_trylock(&sbi->cp_mutex))
		goto skip_write;
Y
Yunlei He 已提交
296

297
	trace_f2fs_writepages(mapping->host, wbc, META);
298
	diff = nr_pages_to_write(sbi, META, wbc);
C
Chao Yu 已提交
299
	written = sync_meta_pages(sbi, META, wbc->nr_to_write, FS_META_IO);
J
Jaegeuk Kim 已提交
300
	mutex_unlock(&sbi->cp_mutex);
301
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - written - diff);
J
Jaegeuk Kim 已提交
302
	return 0;
303 304 305

skip_write:
	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
Y
Yunlei He 已提交
306
	trace_f2fs_writepages(mapping->host, wbc, META);
307
	return 0;
J
Jaegeuk Kim 已提交
308 309 310
}

long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
C
Chao Yu 已提交
311
				long nr_to_write, enum iostat_type io_type)
J
Jaegeuk Kim 已提交
312
{
G
Gu Zheng 已提交
313
	struct address_space *mapping = META_MAPPING(sbi);
J
Jan Kara 已提交
314
	pgoff_t index = 0, prev = ULONG_MAX;
J
Jaegeuk Kim 已提交
315 316
	struct pagevec pvec;
	long nwritten = 0;
J
Jan Kara 已提交
317
	int nr_pages;
J
Jaegeuk Kim 已提交
318 319 320
	struct writeback_control wbc = {
		.for_reclaim = 0,
	};
C
Chao Yu 已提交
321
	struct blk_plug plug;
J
Jaegeuk Kim 已提交
322

323
	pagevec_init(&pvec);
J
Jaegeuk Kim 已提交
324

C
Chao Yu 已提交
325 326
	blk_start_plug(&plug);

J
Jan Kara 已提交
327
	while ((nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
328
				PAGECACHE_TAG_DIRTY))) {
J
Jan Kara 已提交
329
		int i;
J
Jaegeuk Kim 已提交
330 331 332

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
333

334
			if (prev == ULONG_MAX)
335 336 337 338 339 340
				prev = page->index - 1;
			if (nr_to_write != LONG_MAX && page->index != prev + 1) {
				pagevec_release(&pvec);
				goto stop;
			}

J
Jaegeuk Kim 已提交
341
			lock_page(page);
342 343 344 345 346 347 348 349 350 351 352

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}
			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

353 354 355
			f2fs_wait_on_page_writeback(page, META, true);

			BUG_ON(PageWriteback(page));
356 357 358
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

C
Chao Yu 已提交
359
			if (__f2fs_write_meta_page(page, &wbc, io_type)) {
360 361 362
				unlock_page(page);
				break;
			}
363
			nwritten++;
364
			prev = page->index;
365
			if (unlikely(nwritten >= nr_to_write))
J
Jaegeuk Kim 已提交
366 367 368 369 370
				break;
		}
		pagevec_release(&pvec);
		cond_resched();
	}
371
stop:
J
Jaegeuk Kim 已提交
372
	if (nwritten)
373
		f2fs_submit_merged_write(sbi, type);
J
Jaegeuk Kim 已提交
374

C
Chao Yu 已提交
375 376
	blk_finish_plug(&plug);

J
Jaegeuk Kim 已提交
377 378 379 380 381
	return nwritten;
}

static int f2fs_set_meta_page_dirty(struct page *page)
{
382 383
	trace_f2fs_set_page_dirty(page, META);

384 385
	if (!PageUptodate(page))
		SetPageUptodate(page);
J
Jaegeuk Kim 已提交
386
	if (!PageDirty(page)) {
387
		__set_page_dirty_nobuffers(page);
388
		inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_META);
389
		SetPagePrivate(page);
J
Jaegeuk Kim 已提交
390
		f2fs_trace_pid(page);
J
Jaegeuk Kim 已提交
391 392 393 394 395 396 397 398 399
		return 1;
	}
	return 0;
}

const struct address_space_operations f2fs_meta_aops = {
	.writepage	= f2fs_write_meta_page,
	.writepages	= f2fs_write_meta_pages,
	.set_page_dirty	= f2fs_set_meta_page_dirty,
400 401
	.invalidatepage = f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
402 403 404
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
J
Jaegeuk Kim 已提交
405 406
};

C
Chao Yu 已提交
407 408
static void __add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino,
						unsigned int devidx, int type)
409
{
410
	struct inode_management *im = &sbi->im[type];
411 412 413
	struct ino_entry *e, *tmp;

	tmp = f2fs_kmem_cache_alloc(ino_entry_slab, GFP_NOFS);
414

415
	radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
416

417 418
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
419
	if (!e) {
420
		e = tmp;
421 422 423
		if (unlikely(radix_tree_insert(&im->ino_root, ino, e)))
			f2fs_bug_on(sbi, 1);

424 425
		memset(e, 0, sizeof(struct ino_entry));
		e->ino = ino;
426

427
		list_add_tail(&e->list, &im->ino_list);
428
		if (type != ORPHAN_INO)
429
			im->ino_num++;
430
	}
C
Chao Yu 已提交
431 432 433 434

	if (type == FLUSH_INO)
		f2fs_set_bit(devidx, (char *)&e->dirty_device);

435
	spin_unlock(&im->ino_lock);
436
	radix_tree_preload_end();
437 438 439

	if (e != tmp)
		kmem_cache_free(ino_entry_slab, tmp);
440 441
}

J
Jaegeuk Kim 已提交
442
static void __remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
443
{
444
	struct inode_management *im = &sbi->im[type];
J
Jaegeuk Kim 已提交
445
	struct ino_entry *e;
446

447 448
	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
449 450
	if (e) {
		list_del(&e->list);
451 452 453
		radix_tree_delete(&im->ino_root, ino);
		im->ino_num--;
		spin_unlock(&im->ino_lock);
454 455
		kmem_cache_free(ino_entry_slab, e);
		return;
456
	}
457
	spin_unlock(&im->ino_lock);
458 459
}

460
void add_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
461 462
{
	/* add new dirty ino entry into list */
C
Chao Yu 已提交
463
	__add_ino_entry(sbi, ino, 0, type);
464 465
}

466
void remove_ino_entry(struct f2fs_sb_info *sbi, nid_t ino, int type)
467 468 469 470 471 472 473 474
{
	/* remove dirty ino entry from list */
	__remove_ino_entry(sbi, ino, type);
}

/* mode should be APPEND_INO or UPDATE_INO */
bool exist_written_data(struct f2fs_sb_info *sbi, nid_t ino, int mode)
{
475
	struct inode_management *im = &sbi->im[mode];
476
	struct ino_entry *e;
477 478 479 480

	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
	spin_unlock(&im->ino_lock);
481 482 483
	return e ? true : false;
}

484
void release_ino_entry(struct f2fs_sb_info *sbi, bool all)
485 486 487 488
{
	struct ino_entry *e, *tmp;
	int i;

C
Chao Yu 已提交
489
	for (i = all ? ORPHAN_INO : APPEND_INO; i < MAX_INO_ENTRY; i++) {
490 491 492 493
		struct inode_management *im = &sbi->im[i];

		spin_lock(&im->ino_lock);
		list_for_each_entry_safe(e, tmp, &im->ino_list, list) {
494
			list_del(&e->list);
495
			radix_tree_delete(&im->ino_root, e->ino);
496
			kmem_cache_free(ino_entry_slab, e);
497
			im->ino_num--;
498
		}
499
		spin_unlock(&im->ino_lock);
500 501 502
	}
}

C
Chao Yu 已提交
503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523
void set_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
					unsigned int devidx, int type)
{
	__add_ino_entry(sbi, ino, devidx, type);
}

bool is_dirty_device(struct f2fs_sb_info *sbi, nid_t ino,
					unsigned int devidx, int type)
{
	struct inode_management *im = &sbi->im[type];
	struct ino_entry *e;
	bool is_dirty = false;

	spin_lock(&im->ino_lock);
	e = radix_tree_lookup(&im->ino_root, ino);
	if (e && f2fs_test_bit(devidx, (char *)&e->dirty_device))
		is_dirty = true;
	spin_unlock(&im->ino_lock);
	return is_dirty;
}

J
Jaegeuk Kim 已提交
524
int acquire_orphan_inode(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
525
{
526
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
527 528
	int err = 0;

529
	spin_lock(&im->ino_lock);
J
Jaegeuk Kim 已提交
530 531

#ifdef CONFIG_F2FS_FAULT_INJECTION
532
	if (time_to_inject(sbi, FAULT_ORPHAN)) {
J
Jaegeuk Kim 已提交
533
		spin_unlock(&im->ino_lock);
534
		f2fs_show_injection_info(FAULT_ORPHAN);
J
Jaegeuk Kim 已提交
535 536 537
		return -ENOSPC;
	}
#endif
538
	if (unlikely(im->ino_num >= sbi->max_orphans))
J
Jaegeuk Kim 已提交
539
		err = -ENOSPC;
J
Jaegeuk Kim 已提交
540
	else
541 542
		im->ino_num++;
	spin_unlock(&im->ino_lock);
543

J
Jaegeuk Kim 已提交
544 545 546
	return err;
}

J
Jaegeuk Kim 已提交
547 548
void release_orphan_inode(struct f2fs_sb_info *sbi)
{
549 550 551 552 553 554
	struct inode_management *im = &sbi->im[ORPHAN_INO];

	spin_lock(&im->ino_lock);
	f2fs_bug_on(sbi, im->ino_num == 0);
	im->ino_num--;
	spin_unlock(&im->ino_lock);
J
Jaegeuk Kim 已提交
555 556
}

557
void add_orphan_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
558
{
559
	/* add new orphan ino entry into list */
C
Chao Yu 已提交
560
	__add_ino_entry(F2FS_I_SB(inode), inode->i_ino, 0, ORPHAN_INO);
561
	update_inode_page(inode);
J
Jaegeuk Kim 已提交
562 563 564 565
}

void remove_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
{
566
	/* remove orphan entry from orphan list */
J
Jaegeuk Kim 已提交
567
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
J
Jaegeuk Kim 已提交
568 569
}

570
static int recover_orphan_inode(struct f2fs_sb_info *sbi, nid_t ino)
J
Jaegeuk Kim 已提交
571
{
572
	struct inode *inode;
573
	struct node_info ni;
574 575
	int err = acquire_orphan_inode(sbi);

576 577
	if (err)
		goto err_out;
578

C
Chao Yu 已提交
579
	__add_ino_entry(sbi, ino, 0, ORPHAN_INO);
580

581
	inode = f2fs_iget_retry(sbi->sb, ino);
582 583 584 585 586 587 588 589 590
	if (IS_ERR(inode)) {
		/*
		 * there should be a bug that we can't find the entry
		 * to orphan inode.
		 */
		f2fs_bug_on(sbi, PTR_ERR(inode) == -ENOENT);
		return PTR_ERR(inode);
	}

591
	err = dquot_initialize(inode);
592 593
	if (err) {
		iput(inode);
594
		goto err_out;
595
	}
596

J
Jaegeuk Kim 已提交
597 598 599 600
	clear_nlink(inode);

	/* truncate all the data during iput */
	iput(inode);
601 602 603 604 605

	get_node_info(sbi, ino, &ni);

	/* ENOMEM was fully retried in f2fs_evict_inode. */
	if (ni.blk_addr != NULL_ADDR) {
606 607
		err = -EIO;
		goto err_out;
608
	}
609
	__remove_ino_entry(sbi, ino, ORPHAN_INO);
610
	return 0;
611 612 613 614 615 616 617

err_out:
	set_sbi_flag(sbi, SBI_NEED_FSCK);
	f2fs_msg(sbi->sb, KERN_WARNING,
			"%s: orphan failed (ino=%x), run fsck to fix.",
			__func__, ino);
	return err;
J
Jaegeuk Kim 已提交
618 619
}

620
int recover_orphan_inodes(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
621
{
622
	block_t start_blk, orphan_blocks, i, j;
C
Chao Yu 已提交
623 624
	unsigned int s_flags = sbi->sb->s_flags;
	int err = 0;
J
Jaegeuk Kim 已提交
625 626 627
#ifdef CONFIG_QUOTA
	int quota_enabled;
#endif
J
Jaegeuk Kim 已提交
628

629
	if (!is_set_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG))
630
		return 0;
J
Jaegeuk Kim 已提交
631

632
	if (s_flags & SB_RDONLY) {
C
Chao Yu 已提交
633
		f2fs_msg(sbi->sb, KERN_INFO, "orphan cleanup on readonly fs");
634
		sbi->sb->s_flags &= ~SB_RDONLY;
C
Chao Yu 已提交
635 636 637 638
	}

#ifdef CONFIG_QUOTA
	/* Needed for iput() to work correctly and not trash data */
639
	sbi->sb->s_flags |= SB_ACTIVE;
J
Jaegeuk Kim 已提交
640

C
Chao Yu 已提交
641
	/* Turn on quotas so that they are updated correctly */
642
	quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
C
Chao Yu 已提交
643 644
#endif

W
Wanpeng Li 已提交
645
	start_blk = __start_cp_addr(sbi) + 1 + __cp_payload(sbi);
646
	orphan_blocks = __start_sum_addr(sbi) - 1 - __cp_payload(sbi);
J
Jaegeuk Kim 已提交
647

648
	ra_meta_pages(sbi, start_blk, orphan_blocks, META_CP, true);
649

650
	for (i = 0; i < orphan_blocks; i++) {
J
Jaegeuk Kim 已提交
651 652 653 654 655 656
		struct page *page = get_meta_page(sbi, start_blk + i);
		struct f2fs_orphan_block *orphan_blk;

		orphan_blk = (struct f2fs_orphan_block *)page_address(page);
		for (j = 0; j < le32_to_cpu(orphan_blk->entry_count); j++) {
			nid_t ino = le32_to_cpu(orphan_blk->ino[j]);
657 658 659
			err = recover_orphan_inode(sbi, ino);
			if (err) {
				f2fs_put_page(page, 1);
C
Chao Yu 已提交
660
				goto out;
661
			}
J
Jaegeuk Kim 已提交
662 663 664 665
		}
		f2fs_put_page(page, 1);
	}
	/* clear Orphan Flag */
666
	clear_ckpt_flags(sbi, CP_ORPHAN_PRESENT_FLAG);
C
Chao Yu 已提交
667 668 669
out:
#ifdef CONFIG_QUOTA
	/* Turn quotas off */
J
Jaegeuk Kim 已提交
670 671
	if (quota_enabled)
		f2fs_quota_off_umount(sbi->sb);
C
Chao Yu 已提交
672
#endif
673
	sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
C
Chao Yu 已提交
674 675

	return err;
J
Jaegeuk Kim 已提交
676 677 678 679
}

static void write_orphan_inodes(struct f2fs_sb_info *sbi, block_t start_blk)
{
680
	struct list_head *head;
J
Jaegeuk Kim 已提交
681 682
	struct f2fs_orphan_block *orphan_blk = NULL;
	unsigned int nentries = 0;
C
Chao Yu 已提交
683
	unsigned short index = 1;
684
	unsigned short orphan_blocks;
685
	struct page *page = NULL;
J
Jaegeuk Kim 已提交
686
	struct ino_entry *orphan = NULL;
687
	struct inode_management *im = &sbi->im[ORPHAN_INO];
J
Jaegeuk Kim 已提交
688

689
	orphan_blocks = GET_ORPHAN_BLOCKS(im->ino_num);
690

691 692 693 694 695
	/*
	 * we don't need to do spin_lock(&im->ino_lock) here, since all the
	 * orphan inode operations are covered under f2fs_lock_op().
	 * And, spin_lock should be avoided due to page operations below.
	 */
696
	head = &im->ino_list;
J
Jaegeuk Kim 已提交
697 698

	/* loop for each orphan inode entry and write them in Jornal block */
699 700
	list_for_each_entry(orphan, head, list) {
		if (!page) {
C
Chao Yu 已提交
701
			page = grab_meta_page(sbi, start_blk++);
702 703 704 705
			orphan_blk =
				(struct f2fs_orphan_block *)page_address(page);
			memset(orphan_blk, 0, sizeof(*orphan_blk));
		}
J
Jaegeuk Kim 已提交
706

707
		orphan_blk->ino[nentries++] = cpu_to_le32(orphan->ino);
J
Jaegeuk Kim 已提交
708

709
		if (nentries == F2FS_ORPHANS_PER_BLOCK) {
J
Jaegeuk Kim 已提交
710 711 712 713 714 715 716 717 718 719 720 721 722 723
			/*
			 * an orphan block is full of 1020 entries,
			 * then we need to flush current orphan blocks
			 * and bring another one in memory
			 */
			orphan_blk->blk_addr = cpu_to_le16(index);
			orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
			orphan_blk->entry_count = cpu_to_le32(nentries);
			set_page_dirty(page);
			f2fs_put_page(page, 1);
			index++;
			nentries = 0;
			page = NULL;
		}
724
	}
J
Jaegeuk Kim 已提交
725

726 727 728 729 730 731
	if (page) {
		orphan_blk->blk_addr = cpu_to_le16(index);
		orphan_blk->blk_count = cpu_to_le16(orphan_blocks);
		orphan_blk->entry_count = cpu_to_le32(nentries);
		set_page_dirty(page);
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
732 733 734
	}
}

735 736 737
static int get_checkpoint_version(struct f2fs_sb_info *sbi, block_t cp_addr,
		struct f2fs_checkpoint **cp_block, struct page **cp_page,
		unsigned long long *version)
J
Jaegeuk Kim 已提交
738 739
{
	unsigned long blk_size = sbi->blocksize;
740
	size_t crc_offset = 0;
J
Jaegeuk Kim 已提交
741
	__u32 crc = 0;
J
Jaegeuk Kim 已提交
742

743 744
	*cp_page = get_meta_page(sbi, cp_addr);
	*cp_block = (struct f2fs_checkpoint *)page_address(*cp_page);
J
Jaegeuk Kim 已提交
745

746
	crc_offset = le32_to_cpu((*cp_block)->checksum_offset);
747
	if (crc_offset > (blk_size - sizeof(__le32))) {
748 749 750 751
		f2fs_msg(sbi->sb, KERN_WARNING,
			"invalid crc_offset: %zu", crc_offset);
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
752

753
	crc = cur_cp_crc(*cp_block);
754 755 756 757
	if (!f2fs_crc_valid(sbi, crc, *cp_block, crc_offset)) {
		f2fs_msg(sbi->sb, KERN_WARNING, "invalid crc value");
		return -EINVAL;
	}
J
Jaegeuk Kim 已提交
758

759 760 761
	*version = cur_cp_version(*cp_block);
	return 0;
}
J
Jaegeuk Kim 已提交
762

763 764 765 766 767 768 769
static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
				block_t cp_addr, unsigned long long *version)
{
	struct page *cp_page_1 = NULL, *cp_page_2 = NULL;
	struct f2fs_checkpoint *cp_block = NULL;
	unsigned long long cur_version = 0, pre_version = 0;
	int err;
J
Jaegeuk Kim 已提交
770

771 772 773 774 775
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_1, version);
	if (err)
		goto invalid_cp1;
	pre_version = *version;
J
Jaegeuk Kim 已提交
776

777 778 779 780
	cp_addr += le32_to_cpu(cp_block->cp_pack_total_block_count) - 1;
	err = get_checkpoint_version(sbi, cp_addr, &cp_block,
					&cp_page_2, version);
	if (err)
J
Jaegeuk Kim 已提交
781
		goto invalid_cp2;
782
	cur_version = *version;
J
Jaegeuk Kim 已提交
783 784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803

	if (cur_version == pre_version) {
		*version = cur_version;
		f2fs_put_page(cp_page_2, 1);
		return cp_page_1;
	}
invalid_cp2:
	f2fs_put_page(cp_page_2, 1);
invalid_cp1:
	f2fs_put_page(cp_page_1, 1);
	return NULL;
}

int get_valid_checkpoint(struct f2fs_sb_info *sbi)
{
	struct f2fs_checkpoint *cp_block;
	struct f2fs_super_block *fsb = sbi->raw_super;
	struct page *cp1, *cp2, *cur_page;
	unsigned long blk_size = sbi->blocksize;
	unsigned long long cp1_version = 0, cp2_version = 0;
	unsigned long long cp_start_blk_no;
W
Wanpeng Li 已提交
804
	unsigned int cp_blks = 1 + __cp_payload(sbi);
C
Changman Lee 已提交
805 806
	block_t cp_blk_no;
	int i;
J
Jaegeuk Kim 已提交
807

C
Chao Yu 已提交
808
	sbi->ckpt = f2fs_kzalloc(sbi, cp_blks * blk_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
809 810 811 812 813 814 815 816 817 818
	if (!sbi->ckpt)
		return -ENOMEM;
	/*
	 * Finding out valid cp block involves read both
	 * sets( cp pack1 and cp pack 2)
	 */
	cp_start_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	cp1 = validate_checkpoint(sbi, cp_start_blk_no, &cp1_version);

	/* The second checkpoint pack should start at the next segment */
819 820
	cp_start_blk_no += ((unsigned long long)1) <<
				le32_to_cpu(fsb->log_blocks_per_seg);
J
Jaegeuk Kim 已提交
821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838
	cp2 = validate_checkpoint(sbi, cp_start_blk_no, &cp2_version);

	if (cp1 && cp2) {
		if (ver_after(cp2_version, cp1_version))
			cur_page = cp2;
		else
			cur_page = cp1;
	} else if (cp1) {
		cur_page = cp1;
	} else if (cp2) {
		cur_page = cp2;
	} else {
		goto fail_no_cp;
	}

	cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
	memcpy(sbi->ckpt, cp_block, blk_size);

839 840
	/* Sanity checking of checkpoint */
	if (sanity_check_ckpt(sbi))
841
		goto free_fail_no_cp;
842

843 844 845 846
	if (cur_page == cp1)
		sbi->cur_cp_pack = 1;
	else
		sbi->cur_cp_pack = 2;
847

C
Changman Lee 已提交
848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864
	if (cp_blks <= 1)
		goto done;

	cp_blk_no = le32_to_cpu(fsb->cp_blkaddr);
	if (cur_page == cp2)
		cp_blk_no += 1 << le32_to_cpu(fsb->log_blocks_per_seg);

	for (i = 1; i < cp_blks; i++) {
		void *sit_bitmap_ptr;
		unsigned char *ckpt = (unsigned char *)sbi->ckpt;

		cur_page = get_meta_page(sbi, cp_blk_no + i);
		sit_bitmap_ptr = page_address(cur_page);
		memcpy(ckpt + i * blk_size, sit_bitmap_ptr, blk_size);
		f2fs_put_page(cur_page, 1);
	}
done:
J
Jaegeuk Kim 已提交
865 866 867 868
	f2fs_put_page(cp1, 1);
	f2fs_put_page(cp2, 1);
	return 0;

869 870 871
free_fail_no_cp:
	f2fs_put_page(cp1, 1);
	f2fs_put_page(cp2, 1);
J
Jaegeuk Kim 已提交
872 873 874 875 876
fail_no_cp:
	kfree(sbi->ckpt);
	return -EINVAL;
}

877
static void __add_dirty_inode(struct inode *inode, enum inode_type type)
J
Jaegeuk Kim 已提交
878
{
879
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
880
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
J
Jaegeuk Kim 已提交
881

882
	if (is_inode_flag_set(inode, flag))
883
		return;
884

885
	set_inode_flag(inode, flag);
886 887 888
	if (!f2fs_is_volatile_file(inode))
		list_add_tail(&F2FS_I(inode)->dirty_list,
						&sbi->inode_list[type]);
C
Chao Yu 已提交
889
	stat_inc_dirty_inode(sbi, type);
890 891
}

892
static void __remove_dirty_inode(struct inode *inode, enum inode_type type)
C
Chao Yu 已提交
893
{
894
	int flag = (type == DIR_INODE) ? FI_DIRTY_DIR : FI_DIRTY_FILE;
C
Chao Yu 已提交
895

896
	if (get_dirty_pages(inode) || !is_inode_flag_set(inode, flag))
C
Chao Yu 已提交
897 898
		return;

899 900
	list_del_init(&F2FS_I(inode)->dirty_list);
	clear_inode_flag(inode, flag);
C
Chao Yu 已提交
901
	stat_dec_dirty_inode(F2FS_I_SB(inode), type);
C
Chao Yu 已提交
902 903
}

904
void update_dirty_page(struct inode *inode, struct page *page)
905
{
906
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
907
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
908

909 910
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
911
		return;
912

913 914
	spin_lock(&sbi->inode_lock[type]);
	if (type != FILE_INODE || test_opt(sbi, DATA_FLUSH))
915
		__add_dirty_inode(inode, type);
916
	inode_inc_dirty_pages(inode);
917 918
	spin_unlock(&sbi->inode_lock[type]);

919
	SetPagePrivate(page);
J
Jaegeuk Kim 已提交
920
	f2fs_trace_pid(page);
921 922
}

923
void remove_dirty_inode(struct inode *inode)
J
Jaegeuk Kim 已提交
924
{
925
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
926
	enum inode_type type = S_ISDIR(inode->i_mode) ? DIR_INODE : FILE_INODE;
J
Jaegeuk Kim 已提交
927

928 929
	if (!S_ISDIR(inode->i_mode) && !S_ISREG(inode->i_mode) &&
			!S_ISLNK(inode->i_mode))
J
Jaegeuk Kim 已提交
930 931
		return;

932 933 934
	if (type == FILE_INODE && !test_opt(sbi, DATA_FLUSH))
		return;

935 936 937
	spin_lock(&sbi->inode_lock[type]);
	__remove_dirty_inode(inode, type);
	spin_unlock(&sbi->inode_lock[type]);
938 939
}

C
Chao Yu 已提交
940
int sync_dirty_inodes(struct f2fs_sb_info *sbi, enum inode_type type)
J
Jaegeuk Kim 已提交
941
{
942
	struct list_head *head;
J
Jaegeuk Kim 已提交
943
	struct inode *inode;
944
	struct f2fs_inode_info *fi;
945
	bool is_dir = (type == DIR_INODE);
J
Jaegeuk Kim 已提交
946
	unsigned long ino = 0;
947 948 949 950

	trace_f2fs_sync_dirty_inodes_enter(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
J
Jaegeuk Kim 已提交
951
retry:
952
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
953
		return -EIO;
954

955
	spin_lock(&sbi->inode_lock[type]);
956

957
	head = &sbi->inode_list[type];
J
Jaegeuk Kim 已提交
958
	if (list_empty(head)) {
959
		spin_unlock(&sbi->inode_lock[type]);
960 961 962
		trace_f2fs_sync_dirty_inodes_exit(sbi->sb, is_dir,
				get_pages(sbi, is_dir ?
				F2FS_DIRTY_DENTS : F2FS_DIRTY_DATA));
C
Chao Yu 已提交
963
		return 0;
J
Jaegeuk Kim 已提交
964
	}
965
	fi = list_first_entry(head, struct f2fs_inode_info, dirty_list);
966
	inode = igrab(&fi->vfs_inode);
967
	spin_unlock(&sbi->inode_lock[type]);
J
Jaegeuk Kim 已提交
968
	if (inode) {
J
Jaegeuk Kim 已提交
969 970
		unsigned long cur_ino = inode->i_ino;

C
Chao Yu 已提交
971 972 973
		if (is_dir)
			F2FS_I(inode)->cp_task = current;

974
		filemap_fdatawrite(inode->i_mapping);
C
Chao Yu 已提交
975 976 977 978

		if (is_dir)
			F2FS_I(inode)->cp_task = NULL;

J
Jaegeuk Kim 已提交
979
		iput(inode);
J
Jaegeuk Kim 已提交
980 981 982 983 984 985 986
		/* We need to give cpu to another writers. */
		if (ino == cur_ino) {
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			cond_resched();
		} else {
			ino = cur_ino;
		}
J
Jaegeuk Kim 已提交
987 988 989 990 991
	} else {
		/*
		 * We should submit bio, since it exists several
		 * wribacking dentry pages in the freeing inode.
		 */
992
		f2fs_submit_merged_write(sbi, DATA);
993
		cond_resched();
J
Jaegeuk Kim 已提交
994 995 996 997
	}
	goto retry;
}

998 999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013
int f2fs_sync_inode_meta(struct f2fs_sb_info *sbi)
{
	struct list_head *head = &sbi->inode_list[DIRTY_META];
	struct inode *inode;
	struct f2fs_inode_info *fi;
	s64 total = get_pages(sbi, F2FS_DIRTY_IMETA);

	while (total--) {
		if (unlikely(f2fs_cp_error(sbi)))
			return -EIO;

		spin_lock(&sbi->inode_lock[DIRTY_META]);
		if (list_empty(head)) {
			spin_unlock(&sbi->inode_lock[DIRTY_META]);
			return 0;
		}
1014
		fi = list_first_entry(head, struct f2fs_inode_info,
1015 1016 1017 1018
							gdirty_list);
		inode = igrab(&fi->vfs_inode);
		spin_unlock(&sbi->inode_lock[DIRTY_META]);
		if (inode) {
1019 1020 1021 1022 1023
			sync_inode_metadata(inode, 0);

			/* it's on eviction */
			if (is_inode_flag_set(inode, FI_DIRTY_INODE))
				update_inode_page(inode);
1024 1025
			iput(inode);
		}
C
Chao Yu 已提交
1026
	}
1027 1028 1029
	return 0;
}

1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
static void __prepare_cp_block(struct f2fs_sb_info *sbi)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	nid_t last_nid = nm_i->next_scan_nid;

	next_free_nid(sbi, &last_nid);
	ckpt->valid_block_count = cpu_to_le64(valid_user_blocks(sbi));
	ckpt->valid_node_count = cpu_to_le32(valid_node_count(sbi));
	ckpt->valid_inode_count = cpu_to_le32(valid_inode_count(sbi));
	ckpt->next_free_nid = cpu_to_le32(last_nid);
}

J
Jaegeuk Kim 已提交
1043
/*
J
Jaegeuk Kim 已提交
1044 1045
 * Freeze all the FS-operations for checkpoint.
 */
1046
static int block_operations(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
1047 1048 1049 1050 1051 1052
{
	struct writeback_control wbc = {
		.sync_mode = WB_SYNC_ALL,
		.nr_to_write = LONG_MAX,
		.for_reclaim = 0,
	};
1053
	struct blk_plug plug;
1054
	int err = 0;
1055 1056 1057

	blk_start_plug(&plug);

1058
retry_flush_dents:
1059
	f2fs_lock_all(sbi);
J
Jaegeuk Kim 已提交
1060 1061
	/* write all the dirty dentry pages */
	if (get_pages(sbi, F2FS_DIRTY_DENTS)) {
1062
		f2fs_unlock_all(sbi);
C
Chao Yu 已提交
1063 1064
		err = sync_dirty_inodes(sbi, DIR_INODE);
		if (err)
1065
			goto out;
1066
		cond_resched();
1067
		goto retry_flush_dents;
J
Jaegeuk Kim 已提交
1068 1069
	}

1070 1071 1072 1073 1074 1075
	/*
	 * POR: we should ensure that there are no dirty node pages
	 * until finishing nat/sit flush. inode->i_blocks can be updated.
	 */
	down_write(&sbi->node_change);

1076
	if (get_pages(sbi, F2FS_DIRTY_IMETA)) {
1077
		up_write(&sbi->node_change);
1078 1079 1080 1081
		f2fs_unlock_all(sbi);
		err = f2fs_sync_inode_meta(sbi);
		if (err)
			goto out;
1082
		cond_resched();
1083 1084 1085
		goto retry_flush_dents;
	}

1086
retry_flush_nodes:
1087
	down_write(&sbi->node_write);
J
Jaegeuk Kim 已提交
1088 1089

	if (get_pages(sbi, F2FS_DIRTY_NODES)) {
1090
		up_write(&sbi->node_write);
C
Chao Yu 已提交
1091
		err = sync_node_pages(sbi, &wbc, false, FS_CP_NODE_IO);
C
Chao Yu 已提交
1092
		if (err) {
1093
			up_write(&sbi->node_change);
1094 1095 1096
			f2fs_unlock_all(sbi);
			goto out;
		}
1097
		cond_resched();
1098
		goto retry_flush_nodes;
J
Jaegeuk Kim 已提交
1099
	}
1100 1101 1102 1103 1104 1105 1106

	/*
	 * sbi->node_change is used only for AIO write_begin path which produces
	 * dirty node blocks and some checkpoint values by block allocation.
	 */
	__prepare_cp_block(sbi);
	up_write(&sbi->node_change);
1107
out:
1108
	blk_finish_plug(&plug);
1109
	return err;
J
Jaegeuk Kim 已提交
1110 1111 1112 1113
}

static void unblock_operations(struct f2fs_sb_info *sbi)
{
1114
	up_write(&sbi->node_write);
1115
	f2fs_unlock_all(sbi);
J
Jaegeuk Kim 已提交
1116 1117
}

1118 1119 1120 1121 1122 1123 1124
static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
{
	DEFINE_WAIT(wait);

	for (;;) {
		prepare_to_wait(&sbi->cp_wait, &wait, TASK_UNINTERRUPTIBLE);

1125
		if (!get_pages(sbi, F2FS_WB_CP_DATA))
1126 1127
			break;

1128
		io_schedule_timeout(5*HZ);
1129 1130 1131 1132
	}
	finish_wait(&sbi->cp_wait, &wait);
}

1133 1134 1135 1136
static void update_ckpt_flags(struct f2fs_sb_info *sbi, struct cp_control *cpc)
{
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num;
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1137
	unsigned long flags;
1138

1139
	spin_lock_irqsave(&sbi->cp_lock, flags);
1140

1141
	if ((cpc->reason & CP_UMOUNT) &&
1142
			le32_to_cpu(ckpt->cp_pack_total_block_count) >
1143 1144 1145
			sbi->blocks_per_seg - NM_I(sbi)->nat_bits_blocks)
		disable_nat_bits(sbi, false);

1146 1147
	if (cpc->reason & CP_TRIMMED)
		__set_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
C
Chao Yu 已提交
1148 1149
	else
		__clear_ckpt_flags(ckpt, CP_TRIMMED_FLAG);
1150

1151
	if (cpc->reason & CP_UMOUNT)
1152 1153 1154 1155
		__set_ckpt_flags(ckpt, CP_UMOUNT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_UMOUNT_FLAG);

1156
	if (cpc->reason & CP_FASTBOOT)
1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170
		__set_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_FASTBOOT_FLAG);

	if (orphan_num)
		__set_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
	else
		__clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);

	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
		__set_ckpt_flags(ckpt, CP_FSCK_FLAG);

	/* set this flag to activate crc|cp_ver for recovery */
	__set_ckpt_flags(ckpt, CP_CRC_RECOVERY_FLAG);
1171
	__clear_ckpt_flags(ckpt, CP_NOCRC_RECOVERY_FLAG);
1172

1173
	spin_unlock_irqrestore(&sbi->cp_lock, flags);
1174 1175
}

1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208
static void commit_checkpoint(struct f2fs_sb_info *sbi,
	void *src, block_t blk_addr)
{
	struct writeback_control wbc = {
		.for_reclaim = 0,
	};

	/*
	 * pagevec_lookup_tag and lock_page again will take
	 * some extra time. Therefore, update_meta_pages and
	 * sync_meta_pages are combined in this function.
	 */
	struct page *page = grab_meta_page(sbi, blk_addr);
	int err;

	memcpy(page_address(page), src, PAGE_SIZE);
	set_page_dirty(page);

	f2fs_wait_on_page_writeback(page, META, true);
	f2fs_bug_on(sbi, PageWriteback(page));
	if (unlikely(!clear_page_dirty_for_io(page)))
		f2fs_bug_on(sbi, 1);

	/* writeout cp pack 2 page */
	err = __f2fs_write_meta_page(page, &wbc, FS_CP_META_IO);
	f2fs_bug_on(sbi, err);

	f2fs_put_page(page, 0);

	/* submit checkpoint (with barrier if NOBARRIER is not set) */
	f2fs_submit_merged_write(sbi, META_FLUSH);
}

C
Chao Yu 已提交
1209
static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1210 1211
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
1212
	struct f2fs_nm_info *nm_i = NM_I(sbi);
1213
	unsigned long orphan_num = sbi->im[ORPHAN_INO].ino_num, flags;
J
Jaegeuk Kim 已提交
1214 1215
	block_t start_blk;
	unsigned int data_sum_blocks, orphan_blocks;
J
Jaegeuk Kim 已提交
1216
	__u32 crc32 = 0;
J
Jaegeuk Kim 已提交
1217
	int i;
W
Wanpeng Li 已提交
1218
	int cp_payload_blks = __cp_payload(sbi);
1219 1220 1221
	struct super_block *sb = sbi->sb;
	struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
	u64 kbytes_written;
1222
	int err;
J
Jaegeuk Kim 已提交
1223 1224

	/* Flush all the NAT/SIT pages */
1225
	while (get_pages(sbi, F2FS_DIRTY_META)) {
C
Chao Yu 已提交
1226
		sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
1227
		if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1228
			return -EIO;
1229
	}
J
Jaegeuk Kim 已提交
1230 1231 1232 1233 1234 1235 1236

	/*
	 * modify checkpoint
	 * version number is already updated
	 */
	ckpt->elapsed_time = cpu_to_le64(get_mtime(sbi));
	ckpt->free_segment_count = cpu_to_le32(free_segments(sbi));
C
Chao Yu 已提交
1237
	for (i = 0; i < NR_CURSEG_NODE_TYPE; i++) {
J
Jaegeuk Kim 已提交
1238 1239 1240 1241 1242 1243 1244
		ckpt->cur_node_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_NODE));
		ckpt->cur_node_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_NODE));
		ckpt->alloc_type[i + CURSEG_HOT_NODE] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_NODE);
	}
C
Chao Yu 已提交
1245
	for (i = 0; i < NR_CURSEG_DATA_TYPE; i++) {
J
Jaegeuk Kim 已提交
1246 1247 1248 1249 1250 1251 1252 1253 1254
		ckpt->cur_data_segno[i] =
			cpu_to_le32(curseg_segno(sbi, i + CURSEG_HOT_DATA));
		ckpt->cur_data_blkoff[i] =
			cpu_to_le16(curseg_blkoff(sbi, i + CURSEG_HOT_DATA));
		ckpt->alloc_type[i + CURSEG_HOT_DATA] =
				curseg_alloc_type(sbi, i + CURSEG_HOT_DATA);
	}

	/* 2 cp  + n data seg summary + orphan inode blocks */
1255
	data_sum_blocks = npages_for_summary_flush(sbi, false);
1256
	spin_lock_irqsave(&sbi->cp_lock, flags);
C
Chao Yu 已提交
1257
	if (data_sum_blocks < NR_CURSEG_DATA_TYPE)
1258
		__set_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
J
Jaegeuk Kim 已提交
1259
	else
1260
		__clear_ckpt_flags(ckpt, CP_COMPACT_SUM_FLAG);
1261
	spin_unlock_irqrestore(&sbi->cp_lock, flags);
J
Jaegeuk Kim 已提交
1262

1263
	orphan_blocks = GET_ORPHAN_BLOCKS(orphan_num);
C
Changman Lee 已提交
1264 1265
	ckpt->cp_pack_start_sum = cpu_to_le32(1 + cp_payload_blks +
			orphan_blocks);
J
Jaegeuk Kim 已提交
1266

1267
	if (__remain_node_summaries(cpc->reason))
C
Chao Yu 已提交
1268
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS+
C
Changman Lee 已提交
1269 1270
				cp_payload_blks + data_sum_blocks +
				orphan_blocks + NR_CURSEG_NODE_TYPE);
1271
	else
C
Chao Yu 已提交
1272
		ckpt->cp_pack_total_block_count = cpu_to_le32(F2FS_CP_PACKS +
C
Changman Lee 已提交
1273 1274
				cp_payload_blks + data_sum_blocks +
				orphan_blocks);
1275

1276 1277
	/* update ckpt flag for checkpoint */
	update_ckpt_flags(sbi, cpc);
1278

J
Jaegeuk Kim 已提交
1279 1280 1281 1282
	/* update SIT/NAT bitmap */
	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));

K
Keith Mok 已提交
1283
	crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
J
Jaegeuk Kim 已提交
1284 1285
	*((__le32 *)((unsigned char *)ckpt +
				le32_to_cpu(ckpt->checksum_offset)))
J
Jaegeuk Kim 已提交
1286 1287
				= cpu_to_le32(crc32);

1288
	start_blk = __start_cp_next_addr(sbi);
J
Jaegeuk Kim 已提交
1289

1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304
	/* write nat bits */
	if (enabled_nat_bits(sbi, cpc)) {
		__u64 cp_ver = cur_cp_version(ckpt);
		block_t blk;

		cp_ver |= ((__u64)crc32 << 32);
		*(__le64 *)nm_i->nat_bits = cpu_to_le64(cp_ver);

		blk = start_blk + sbi->blocks_per_seg - nm_i->nat_bits_blocks;
		for (i = 0; i < nm_i->nat_bits_blocks; i++)
			update_meta_page(sbi, nm_i->nat_bits +
					(i << F2FS_BLKSIZE_BITS), blk + i);

		/* Flush all the NAT BITS pages */
		while (get_pages(sbi, F2FS_DIRTY_META)) {
C
Chao Yu 已提交
1305
			sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);
1306 1307 1308 1309 1310
			if (unlikely(f2fs_cp_error(sbi)))
				return -EIO;
		}
	}

J
Jaegeuk Kim 已提交
1311
	/* write out checkpoint buffer at block 0 */
C
Chao Yu 已提交
1312 1313 1314 1315 1316
	update_meta_page(sbi, ckpt, start_blk++);

	for (i = 1; i < 1 + cp_payload_blks; i++)
		update_meta_page(sbi, (char *)ckpt + i * F2FS_BLKSIZE,
							start_blk++);
C
Changman Lee 已提交
1317

1318
	if (orphan_num) {
J
Jaegeuk Kim 已提交
1319 1320 1321 1322 1323 1324
		write_orphan_inodes(sbi, start_blk);
		start_blk += orphan_blocks;
	}

	write_data_summaries(sbi, start_blk);
	start_blk += data_sum_blocks;
1325 1326 1327 1328 1329 1330

	/* Record write statistics in the hot node summary */
	kbytes_written = sbi->kbytes_written;
	if (sb->s_bdev->bd_part)
		kbytes_written += BD_PART_WRITTEN(sbi);

1331
	seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
1332

1333
	if (__remain_node_summaries(cpc->reason)) {
J
Jaegeuk Kim 已提交
1334 1335 1336 1337
		write_node_summaries(sbi, start_blk);
		start_blk += NR_CURSEG_NODE_TYPE;
	}

1338 1339 1340
	/* update user_block_counts */
	sbi->last_valid_block_count = sbi->total_valid_block_count;
	percpu_counter_set(&sbi->alloc_valid_block_count, 0);
J
Jaegeuk Kim 已提交
1341

1342 1343 1344 1345
	/* Here, we have one bio having CP pack except cp pack 2 page */
	sync_meta_pages(sbi, META, LONG_MAX, FS_CP_META_IO);

	/* wait for previous submitted meta pages writeback */
1346
	wait_on_all_pages_writeback(sbi);
J
Jaegeuk Kim 已提交
1347

1348
	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1349
		return -EIO;
1350

1351 1352 1353 1354
	/* flush all device cache */
	err = f2fs_flush_device_cache(sbi);
	if (err)
		return err;
J
Jaegeuk Kim 已提交
1355

1356 1357
	/* barrier and flush checkpoint cp pack 2 page if it can */
	commit_checkpoint(sbi, ckpt, start_blk);
1358 1359
	wait_on_all_pages_writeback(sbi);

1360
	release_ino_entry(sbi, false);
1361 1362

	if (unlikely(f2fs_cp_error(sbi)))
C
Chao Yu 已提交
1363
		return -EIO;
1364

1365
	clear_sbi_flag(sbi, SBI_IS_DIRTY);
1366
	clear_sbi_flag(sbi, SBI_NEED_CP);
1367
	__set_cp_next_pack(sbi);
C
Chao Yu 已提交
1368

1369 1370 1371 1372 1373 1374 1375 1376 1377 1378
	/*
	 * redirty superblock if metadata like node page or inode cache is
	 * updated during writing checkpoint.
	 */
	if (get_pages(sbi, F2FS_DIRTY_NODES) ||
			get_pages(sbi, F2FS_DIRTY_IMETA))
		set_sbi_flag(sbi, SBI_IS_DIRTY);

	f2fs_bug_on(sbi, get_pages(sbi, F2FS_DIRTY_DENTS));

C
Chao Yu 已提交
1379
	return 0;
J
Jaegeuk Kim 已提交
1380 1381
}

J
Jaegeuk Kim 已提交
1382
/*
A
arter97 已提交
1383
 * We guarantee that this checkpoint procedure will not fail.
J
Jaegeuk Kim 已提交
1384
 */
C
Chao Yu 已提交
1385
int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1386 1387 1388
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	unsigned long long ckpt_ver;
C
Chao Yu 已提交
1389
	int err = 0;
J
Jaegeuk Kim 已提交
1390

1391
	mutex_lock(&sbi->cp_mutex);
J
Jaegeuk Kim 已提交
1392

1393
	if (!is_sbi_flag_set(sbi, SBI_IS_DIRTY) &&
1394 1395
		((cpc->reason & CP_FASTBOOT) || (cpc->reason & CP_SYNC) ||
		((cpc->reason & CP_DISCARD) && !sbi->discard_blks)))
J
Jaegeuk Kim 已提交
1396
		goto out;
C
Chao Yu 已提交
1397 1398
	if (unlikely(f2fs_cp_error(sbi))) {
		err = -EIO;
1399
		goto out;
C
Chao Yu 已提交
1400 1401 1402
	}
	if (f2fs_readonly(sbi->sb)) {
		err = -EROFS;
1403
		goto out;
C
Chao Yu 已提交
1404
	}
W
Wanpeng Li 已提交
1405 1406 1407

	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "start block_ops");

C
Chao Yu 已提交
1408 1409
	err = block_operations(sbi);
	if (err)
1410
		goto out;
J
Jaegeuk Kim 已提交
1411

1412
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
1413

1414
	f2fs_flush_merged_writes(sbi);
J
Jaegeuk Kim 已提交
1415

1416
	/* this is the case of multiple fstrims without any changes */
1417
	if (cpc->reason & CP_DISCARD) {
1418 1419 1420 1421 1422
		if (!exist_trim_candidates(sbi, cpc)) {
			unblock_operations(sbi);
			goto out;
		}

1423 1424 1425 1426 1427 1428 1429 1430
		if (NM_I(sbi)->dirty_nat_cnt == 0 &&
				SIT_I(sbi)->dirty_sentries == 0 &&
				prefree_segments(sbi) == 0) {
			flush_sit_entries(sbi, cpc);
			clear_prefree_segments(sbi, cpc);
			unblock_operations(sbi);
			goto out;
		}
1431 1432
	}

J
Jaegeuk Kim 已提交
1433 1434 1435 1436 1437
	/*
	 * update checkpoint pack index
	 * Increase the version number so that
	 * SIT entries and seg summaries are written at correct place
	 */
1438
	ckpt_ver = cur_cp_version(ckpt);
J
Jaegeuk Kim 已提交
1439 1440 1441
	ckpt->checkpoint_ver = cpu_to_le64(++ckpt_ver);

	/* write cached NAT/SIT entries to NAT/SIT area */
1442
	flush_nat_entries(sbi, cpc);
1443
	flush_sit_entries(sbi, cpc);
J
Jaegeuk Kim 已提交
1444 1445

	/* unlock all the fs_lock[] in do_checkpoint() */
C
Chao Yu 已提交
1446
	err = do_checkpoint(sbi, cpc);
1447
	if (err)
1448
		release_discard_addrs(sbi);
1449
	else
1450
		clear_prefree_segments(sbi, cpc);
C
Chao Yu 已提交
1451

J
Jaegeuk Kim 已提交
1452
	unblock_operations(sbi);
1453
	stat_inc_cp_count(sbi->stat_info);
1454

1455
	if (cpc->reason & CP_RECOVERY)
1456 1457
		f2fs_msg(sbi->sb, KERN_NOTICE,
			"checkpoint: version = %llx", ckpt_ver);
1458 1459

	/* do checkpoint periodically */
1460
	f2fs_update_time(sbi, CP_TIME);
1461
	trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish checkpoint");
J
Jaegeuk Kim 已提交
1462 1463
out:
	mutex_unlock(&sbi->cp_mutex);
C
Chao Yu 已提交
1464
	return err;
J
Jaegeuk Kim 已提交
1465 1466
}

J
Jaegeuk Kim 已提交
1467
void init_ino_entry_info(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
1468
{
J
Jaegeuk Kim 已提交
1469 1470 1471
	int i;

	for (i = 0; i < MAX_INO_ENTRY; i++) {
1472 1473 1474 1475 1476 1477
		struct inode_management *im = &sbi->im[i];

		INIT_RADIX_TREE(&im->ino_root, GFP_ATOMIC);
		spin_lock_init(&im->ino_lock);
		INIT_LIST_HEAD(&im->ino_list);
		im->ino_num = 0;
J
Jaegeuk Kim 已提交
1478 1479
	}

C
Chao Yu 已提交
1480
	sbi->max_orphans = (sbi->blocks_per_seg - F2FS_CP_PACKS -
1481 1482
			NR_CURSEG_TYPE - __cp_payload(sbi)) *
				F2FS_ORPHANS_PER_BLOCK;
J
Jaegeuk Kim 已提交
1483 1484
}

1485
int __init create_checkpoint_caches(void)
J
Jaegeuk Kim 已提交
1486
{
J
Jaegeuk Kim 已提交
1487 1488 1489
	ino_entry_slab = f2fs_kmem_cache_create("f2fs_ino_entry",
			sizeof(struct ino_entry));
	if (!ino_entry_slab)
J
Jaegeuk Kim 已提交
1490
		return -ENOMEM;
1491 1492
	inode_entry_slab = f2fs_kmem_cache_create("f2fs_inode_entry",
			sizeof(struct inode_entry));
1493
	if (!inode_entry_slab) {
J
Jaegeuk Kim 已提交
1494
		kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1495 1496 1497 1498 1499 1500 1501
		return -ENOMEM;
	}
	return 0;
}

void destroy_checkpoint_caches(void)
{
J
Jaegeuk Kim 已提交
1502
	kmem_cache_destroy(ino_entry_slab);
J
Jaegeuk Kim 已提交
1503 1504
	kmem_cache_destroy(inode_entry_slab);
}