recovery.c 18.0 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49
static struct kmem_cache *fsync_entry_slab;

C
Chao Yu 已提交
50
bool f2fs_space_for_roll_forward(struct f2fs_sb_info *sbi)
51
{
52 53 54
	s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);

	if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
55 56 57 58 59 60 61 62 63
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

64
	list_for_each_entry(entry, head, list)
65 66
		if (entry->inode->i_ino == ino)
			return entry;
67

68 69 70
	return NULL;
}

71
static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
72
			struct list_head *head, nid_t ino, bool quota_inode)
73
{
74
	struct inode *inode;
75
	struct fsync_inode_entry *entry;
C
Chao Yu 已提交
76
	int err;
77

78
	inode = f2fs_iget_retry(sbi->sb, ino);
79 80 81
	if (IS_ERR(inode))
		return ERR_CAST(inode);

C
Chao Yu 已提交
82 83 84 85 86 87 88 89 90 91
	err = dquot_initialize(inode);
	if (err)
		goto err_out;

	if (quota_inode) {
		err = dquot_alloc_inode(inode);
		if (err)
			goto err_out;
	}

92
	entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
93 94 95 96
	entry->inode = inode;
	list_add_tail(&entry->list, head);

	return entry;
C
Chao Yu 已提交
97 98 99
err_out:
	iput(inode);
	return ERR_PTR(err);
100 101
}

102
static void del_fsync_inode(struct fsync_inode_entry *entry, int drop)
103
{
104 105 106 107
	if (drop) {
		/* inode should not be recovered, drop it */
		f2fs_inode_synced(entry->inode);
	}
108 109 110 111 112
	iput(entry->inode);
	list_del(&entry->list);
	kmem_cache_free(fsync_entry_slab, entry);
}

C
Chao Yu 已提交
113 114
static int recover_dentry(struct inode *inode, struct page *ipage,
						struct list_head *dir_list)
115
{
116
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
117
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
118
	struct f2fs_dir_entry *de;
119
	struct fscrypt_name fname;
120
	struct page *page;
J
Jaegeuk Kim 已提交
121
	struct inode *dir, *einode;
C
Chao Yu 已提交
122
	struct fsync_inode_entry *entry;
123
	int err = 0;
124
	char *name;
125

C
Chao Yu 已提交
126 127
	entry = get_fsync_inode(dir_list, pino);
	if (!entry) {
C
Chao Yu 已提交
128 129
		entry = add_fsync_inode(F2FS_I_SB(inode), dir_list,
							pino, false);
130 131 132
		if (IS_ERR(entry)) {
			dir = ERR_CAST(entry);
			err = PTR_ERR(entry);
C
Chao Yu 已提交
133 134
			goto out;
		}
135 136
	}

C
Chao Yu 已提交
137 138
	dir = entry->inode;

139 140 141
	memset(&fname, 0, sizeof(struct fscrypt_name));
	fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
	fname.disk_name.name = raw_inode->i_name;
142

143
	if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
144 145
		WARN_ON(1);
		err = -ENAMETOOLONG;
C
Chao Yu 已提交
146
		goto out;
147
	}
J
Jaegeuk Kim 已提交
148
retry:
149
	de = __f2fs_find_entry(dir, &fname, &page);
150
	if (de && inode->i_ino == le32_to_cpu(de->ino))
151
		goto out_put;
152

J
Jaegeuk Kim 已提交
153
	if (de) {
154
		einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
J
Jaegeuk Kim 已提交
155 156
		if (IS_ERR(einode)) {
			WARN_ON(1);
157 158
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
159
				err = -EEXIST;
160
			goto out_put;
161
		}
C
Chao Yu 已提交
162 163 164 165

		err = dquot_initialize(einode);
		if (err) {
			iput(einode);
166
			goto out_put;
C
Chao Yu 已提交
167 168
		}

C
Chao Yu 已提交
169
		err = f2fs_acquire_orphan_inode(F2FS_I_SB(inode));
170 171
		if (err) {
			iput(einode);
172
			goto out_put;
J
Jaegeuk Kim 已提交
173
		}
174
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
175 176
		iput(einode);
		goto retry;
177 178 179
	} else if (IS_ERR(page)) {
		err = PTR_ERR(page);
	} else {
C
Chao Yu 已提交
180
		err = f2fs_add_dentry(dir, &fname, inode,
181
					inode->i_ino, inode->i_mode);
182
	}
183 184
	if (err == -ENOMEM)
		goto retry;
185 186
	goto out;

187
out_put:
188
	f2fs_put_page(page, 0);
189
out:
190 191 192 193
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = raw_inode->i_name;
C
Chris Fries 已提交
194 195
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
196
			__func__, ino_of_node(ipage), name,
D
Dan Carpenter 已提交
197
			IS_ERR(dir) ? 0 : dir->i_ino, err);
198 199 200
	return err;
}

J
Jaegeuk Kim 已提交
201 202 203 204 205 206 207 208 209 210 211 212
static void recover_inline_flags(struct inode *inode, struct f2fs_inode *ri)
{
	if (ri->i_inline & F2FS_PIN_FILE)
		set_inode_flag(inode, FI_PIN_FILE);
	else
		clear_inode_flag(inode, FI_PIN_FILE);
	if (ri->i_inline & F2FS_DATA_EXIST)
		set_inode_flag(inode, FI_DATA_EXIST);
	else
		clear_inode_flag(inode, FI_DATA_EXIST);
}

213
static void recover_inode(struct inode *inode, struct page *page)
214
{
215
	struct f2fs_inode *raw = F2FS_INODE(page);
216
	char *name;
217 218

	inode->i_mode = le16_to_cpu(raw->i_mode);
219 220
	i_uid_write(inode, le32_to_cpu(raw->i_uid));
	i_gid_write(inode, le32_to_cpu(raw->i_gid));
221 222 223 224 225 226 227 228 229 230 231 232 233

	if (raw->i_inline & F2FS_EXTRA_ATTR) {
		if (f2fs_sb_has_project_quota(F2FS_I_SB(inode)->sb) &&
			F2FS_FITS_IN_INODE(raw, le16_to_cpu(raw->i_extra_isize),
								i_projid)) {
			projid_t i_projid;

			i_projid = (projid_t)le32_to_cpu(raw->i_projid);
			F2FS_I(inode)->i_projid =
				make_kprojid(&init_user_ns, i_projid);
		}
	}

234
	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
C
Chao Yu 已提交
235
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
236 237
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
C
Chao Yu 已提交
238
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
239 240
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
241

242
	F2FS_I(inode)->i_advise = raw->i_advise;
243
	F2FS_I(inode)->i_flags = le32_to_cpu(raw->i_flags);
244
	f2fs_set_inode_flags(inode);
245 246
	F2FS_I(inode)->i_gc_failures[GC_FAILURE_PIN] =
				le16_to_cpu(raw->i_gc_failures);
247

J
Jaegeuk Kim 已提交
248 249
	recover_inline_flags(inode, raw);

250 251
	f2fs_mark_inode_dirty_sync(inode, true);

252 253 254 255 256
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = F2FS_INODE(page)->i_name;

J
Jaegeuk Kim 已提交
257 258 259
	f2fs_msg(inode->i_sb, KERN_NOTICE,
		"recover_inode: ino = %x, name = %s, inline = %x",
			ino_of_node(page), name, raw->i_inline);
260 261
}

262 263
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
				bool check_only)
264 265
{
	struct curseg_info *curseg;
266
	struct page *page = NULL;
267
	block_t blkaddr;
268
	unsigned int loop_cnt = 0;
269 270
	unsigned int free_blocks = MAIN_SEGS(sbi) * sbi->blocks_per_seg -
						valid_user_blocks(sbi);
271 272 273 274
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
275
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
276 277 278 279

	while (1) {
		struct fsync_inode_entry *entry;

280
		if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
281
			return 0;
282

C
Chao Yu 已提交
283
		page = f2fs_get_tmp_page(sbi, blkaddr);
284 285 286 287
		if (IS_ERR(page)) {
			err = PTR_ERR(page);
			break;
		}
288

289
		if (!is_recoverable_dnode(page))
290
			break;
291 292 293 294 295

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
296
		if (!entry) {
C
Chao Yu 已提交
297 298
			bool quota_inode = false;

299 300
			if (!check_only &&
					IS_INODE(page) && is_dent_dnode(page)) {
C
Chao Yu 已提交
301
				err = f2fs_recover_inode_page(sbi, page);
302
				if (err)
303
					break;
C
Chao Yu 已提交
304
				quota_inode = true;
305 306
			}

307 308 309 310
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
C
Chao Yu 已提交
311 312
			entry = add_fsync_inode(sbi, head, ino_of_node(page),
								quota_inode);
313 314
			if (IS_ERR(entry)) {
				err = PTR_ERR(entry);
315 316
				if (err == -ENOENT) {
					err = 0;
317
					goto next;
318
				}
319
				break;
320 321
			}
		}
J
Jaegeuk Kim 已提交
322 323
		entry->blkaddr = blkaddr;

324 325
		if (IS_INODE(page) && is_dent_dnode(page))
			entry->last_dentry = blkaddr;
326
next:
327 328 329 330 331 332 333 334 335 336 337
		/* sanity check in order to detect looped node chain */
		if (++loop_cnt >= free_blocks ||
			blkaddr == next_blkaddr_of_node(page)) {
			f2fs_msg(sbi->sb, KERN_NOTICE,
				"%s: detect looped node chain, "
				"blkaddr:%u, next:%u",
				__func__, blkaddr, next_blkaddr_of_node(page));
			err = -EINVAL;
			break;
		}

338 339
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
340
		f2fs_put_page(page, 1);
341

C
Chao Yu 已提交
342
		f2fs_ra_meta_pages_cond(sbi, blkaddr);
343
	}
344
	f2fs_put_page(page, 1);
345 346 347
	return err;
}

348
static void destroy_fsync_dnodes(struct list_head *head, int drop)
349
{
350 351
	struct fsync_inode_entry *entry, *tmp;

352
	list_for_each_entry_safe(entry, tmp, head, list)
353
		del_fsync_inode(entry, drop);
354 355
}

356
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
357
			block_t blkaddr, struct dnode_of_data *dn)
358 359 360
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
361
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
362
	struct f2fs_summary_block *sum_node;
363
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
364
	struct page *sum_page, *node_page;
365
	struct dnode_of_data tdn = *dn;
366
	nid_t ino, nid;
367
	struct inode *inode;
368
	unsigned int offset;
369 370 371 372 373
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
374
		return 0;
375 376

	/* Get the previous summary */
377
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
378 379 380
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
381
			goto got_it;
382 383 384
		}
	}

C
Chao Yu 已提交
385
	sum_page = f2fs_get_sum_page(sbi, segno);
J
Jaegeuk Kim 已提交
386 387 388 389
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
390 391 392 393
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
394 395
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
396
		tdn.node_page = dn->inode_page;
397
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
398
		goto truncate_out;
399
	} else if (dn->nid == nid) {
400
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
401
		goto truncate_out;
402 403
	}

404
	/* Get the node page */
C
Chao Yu 已提交
405
	node_page = f2fs_get_node_page(sbi, nid);
406 407
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
408 409

	offset = ofs_of_node(node_page);
410 411 412
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

413
	if (ino != dn->inode->i_ino) {
C
Chao Yu 已提交
414 415
		int ret;

416
		/* Deallocate previous index in the node page */
417
		inode = f2fs_iget_retry(sbi->sb, ino);
418 419
		if (IS_ERR(inode))
			return PTR_ERR(inode);
C
Chao Yu 已提交
420 421 422 423 424 425

		ret = dquot_initialize(inode);
		if (ret) {
			iput(inode);
			return ret;
		}
426 427 428
	} else {
		inode = dn->inode;
	}
429

C
Chao Yu 已提交
430 431
	bidx = f2fs_start_bidx_of_node(offset, inode) +
				le16_to_cpu(sum.ofs_in_node);
432

433 434 435 436 437 438 439 440
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
441
	if (f2fs_get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
442 443 444
		goto out;

	if (tdn.data_blkaddr == blkaddr)
C
Chao Yu 已提交
445
		f2fs_truncate_data_blocks_range(&tdn, 1);
446 447 448 449

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
450
		iput(inode);
451 452 453 454 455
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
456 457
	if (datablock_addr(tdn.inode, tdn.node_page,
					tdn.ofs_in_node) == blkaddr)
C
Chao Yu 已提交
458
		f2fs_truncate_data_blocks_range(&tdn, 1);
459 460
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
461
	return 0;
462 463
}

464
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
S
Sheng Yong 已提交
465
					struct page *page)
466 467 468
{
	struct dnode_of_data dn;
	struct node_info ni;
469
	unsigned int start, end;
470
	int err = 0, recovered = 0;
471

472 473
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
C
Chao Yu 已提交
474
		f2fs_recover_inline_xattr(inode, page);
475
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
C
Chao Yu 已提交
476
		err = f2fs_recover_xattr_data(inode, page);
477 478
		if (!err)
			recovered++;
479
		goto out;
480
	}
481

482
	/* step 2: recover inline data */
C
Chao Yu 已提交
483
	if (f2fs_recover_inline_data(inode, page))
484 485
		goto out;

486
	/* step 3: recover data indices */
C
Chao Yu 已提交
487
	start = f2fs_start_bidx_of_node(ofs_of_node(page), inode);
488
	end = start + ADDRS_PER_PAGE(page, inode);
489 490

	set_new_dnode(&dn, inode, NULL, NULL, 0);
491
retry_dn:
C
Chao Yu 已提交
492
	err = f2fs_get_dnode_of_data(&dn, start, ALLOC_NODE);
493 494 495 496 497
	if (err) {
		if (err == -ENOMEM) {
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			goto retry_dn;
		}
498
		goto out;
499
	}
500

501
	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
502

503 504 505 506
	err = f2fs_get_node_info(sbi, dn.nid, &ni);
	if (err)
		goto err;

507
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
508 509 510 511 512 513

	if (ofs_of_node(dn.node_page) != ofs_of_node(page)) {
		f2fs_msg(sbi->sb, KERN_WARNING,
			"Inconsistent ofs_of_node, ino:%lu, ofs:%u, %u",
			inode->i_ino, ofs_of_node(dn.node_page),
			ofs_of_node(page));
C
Chao Yu 已提交
514
		err = -EFSCORRUPTED;
515 516
		goto err;
	}
517

518
	for (; start < end; start++, dn.ofs_in_node++) {
519 520
		block_t src, dest;

521 522
		src = datablock_addr(dn.inode, dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(dn.inode, page, dn.ofs_in_node);
523

524 525 526 527 528 529
		/* skip recovering if dest is the same as src */
		if (src == dest)
			continue;

		/* dest is invalid, just invalidate src block */
		if (dest == NULL_ADDR) {
C
Chao Yu 已提交
530
			f2fs_truncate_data_blocks_range(&dn, 1);
531 532 533
			continue;
		}

534
		if (!file_keep_isize(inode) &&
535 536 537
			(i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
			f2fs_i_size_write(inode,
				(loff_t)(start + 1) << PAGE_SHIFT);
538

539 540 541 542 543
		/*
		 * dest is reserved block, invalidate src block
		 * and then reserve one new block in dnode page.
		 */
		if (dest == NEW_ADDR) {
C
Chao Yu 已提交
544 545
			f2fs_truncate_data_blocks_range(&dn, 1);
			f2fs_reserve_new_block(&dn);
546 547 548 549
			continue;
		}

		/* dest is valid block, try to recover from src to dest */
550
		if (f2fs_is_valid_blkaddr(sbi, dest, META_POR)) {
551

552
			if (src == NULL_ADDR) {
C
Chao Yu 已提交
553
				err = f2fs_reserve_new_block(&dn);
554 555
				while (err &&
				       IS_ENABLED(CONFIG_F2FS_FAULT_INJECTION))
C
Chao Yu 已提交
556
					err = f2fs_reserve_new_block(&dn);
557
				/* We should not get -ENOSPC */
558
				f2fs_bug_on(sbi, err);
559 560
				if (err)
					goto err;
561
			}
562
retry_prev:
563
			/* Check the previous node page having this index */
564
			err = check_index_in_prev_nodes(sbi, dest, &dn);
565 566 567 568 569
			if (err) {
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					goto retry_prev;
				}
570
				goto err;
571
			}
572 573

			/* write dummy data page */
574
			f2fs_replace_block(sbi, &dn, src, dest,
575
						ni.version, false, false);
576
			recovered++;
577 578 579 580 581 582 583
		}
	}

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
584
err:
585
	f2fs_put_dnode(&dn);
586
out:
C
Chris Fries 已提交
587
	f2fs_msg(sbi->sb, KERN_NOTICE,
588 589 590 591
		"recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
		inode->i_ino,
		file_keep_isize(inode) ? "keep" : "recover",
		recovered, err);
592
	return err;
593 594
}

C
Chao Yu 已提交
595
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
596
		struct list_head *tmp_inode_list, struct list_head *dir_list)
597 598
{
	struct curseg_info *curseg;
599
	struct page *page = NULL;
600
	int err = 0;
601 602 603
	block_t blkaddr;

	/* get node pages in the current segment */
C
Chao Yu 已提交
604
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
605 606 607 608 609
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

610
		if (!f2fs_is_valid_blkaddr(sbi, blkaddr, META_POR))
611
			break;
612

C
Chao Yu 已提交
613
		f2fs_ra_meta_pages_cond(sbi, blkaddr);
614

C
Chao Yu 已提交
615
		page = f2fs_get_tmp_page(sbi, blkaddr);
616 617 618 619
		if (IS_ERR(page)) {
			err = PTR_ERR(page);
			break;
		}
620

621
		if (!is_recoverable_dnode(page)) {
622
			f2fs_put_page(page, 1);
623
			break;
624
		}
625

C
Chao Yu 已提交
626
		entry = get_fsync_inode(inode_list, ino_of_node(page));
627 628
		if (!entry)
			goto next;
629 630 631
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
632
		 * So, call recover_inode for the inode update.
633
		 */
634
		if (IS_INODE(page))
635 636
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
C
Chao Yu 已提交
637
			err = recover_dentry(entry->inode, page, dir_list);
638 639 640 641 642
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
S
Sheng Yong 已提交
643
		err = do_recover_data(sbi, entry->inode, page);
644 645
		if (err) {
			f2fs_put_page(page, 1);
646
			break;
647
		}
648

649
		if (entry->blkaddr == blkaddr)
650
			list_move_tail(&entry->list, tmp_inode_list);
651 652 653
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
654
		f2fs_put_page(page, 1);
655
	}
656
	if (!err)
C
Chao Yu 已提交
657
		f2fs_allocate_new_segments(sbi);
658
	return err;
659 660
}

C
Chao Yu 已提交
661
int f2fs_recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
662
{
663
	struct list_head inode_list, tmp_inode_list;
C
Chao Yu 已提交
664
	struct list_head dir_list;
665
	int err;
666
	int ret = 0;
C
Chao Yu 已提交
667
	unsigned long s_flags = sbi->sb->s_flags;
H
Haicheng Li 已提交
668
	bool need_writecp = false;
J
Jaegeuk Kim 已提交
669 670 671
#ifdef CONFIG_QUOTA
	int quota_enabled;
#endif
672

673
	if (s_flags & SB_RDONLY) {
674 675
		f2fs_msg(sbi->sb, KERN_INFO,
				"recover fsync data on readonly fs");
676
		sbi->sb->s_flags &= ~SB_RDONLY;
C
Chao Yu 已提交
677 678 679 680
	}

#ifdef CONFIG_QUOTA
	/* Needed for iput() to work correctly and not trash data */
681
	sbi->sb->s_flags |= SB_ACTIVE;
C
Chao Yu 已提交
682
	/* Turn on quotas so that they are updated correctly */
683
	quota_enabled = f2fs_enable_quota_files(sbi, s_flags & SB_RDONLY);
C
Chao Yu 已提交
684 685
#endif

686
	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
687
			sizeof(struct fsync_inode_entry));
C
Chao Yu 已提交
688 689 690 691
	if (!fsync_entry_slab) {
		err = -ENOMEM;
		goto out;
	}
692 693

	INIT_LIST_HEAD(&inode_list);
694
	INIT_LIST_HEAD(&tmp_inode_list);
C
Chao Yu 已提交
695
	INIT_LIST_HEAD(&dir_list);
696

697 698 699
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

700
	/* step #1: find fsynced inode numbers */
701
	err = find_fsync_dnodes(sbi, &inode_list, check_only);
702
	if (err || list_empty(&inode_list))
C
Chao Yu 已提交
703
		goto skip;
704

705 706
	if (check_only) {
		ret = 1;
C
Chao Yu 已提交
707
		goto skip;
708
	}
709

H
Haicheng Li 已提交
710
	need_writecp = true;
711

712
	/* step #2: recover data */
713
	err = recover_data(sbi, &inode_list, &tmp_inode_list, &dir_list);
714
	if (!err)
715
		f2fs_bug_on(sbi, !list_empty(&inode_list));
716 717 718 719
	else {
		/* restore s_flags to let iput() trash data */
		sbi->sb->s_flags = s_flags;
	}
C
Chao Yu 已提交
720
skip:
721 722
	destroy_fsync_dnodes(&inode_list, err);
	destroy_fsync_dnodes(&tmp_inode_list, err);
723

724 725
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
726
			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
727

728 729 730
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
731 732
	} else {
		clear_sbi_flag(sbi, SBI_POR_DOING);
733
	}
734 735
	mutex_unlock(&sbi->cp_mutex);

736
	/* let's drop all the directory inodes for clean checkpoint */
737
	destroy_fsync_dnodes(&dir_list, err);
738

739 740 741 742 743 744 745 746 747
	if (need_writecp) {
		set_sbi_flag(sbi, SBI_IS_RECOVERED);

		if (!err) {
			struct cp_control cpc = {
				.reason = CP_RECOVERY,
			};
			err = f2fs_write_checkpoint(sbi, &cpc);
		}
748
	}
C
Chao Yu 已提交
749 750

	kmem_cache_destroy(fsync_entry_slab);
C
Chao Yu 已提交
751 752 753
out:
#ifdef CONFIG_QUOTA
	/* Turn quotas off */
J
Jaegeuk Kim 已提交
754 755
	if (quota_enabled)
		f2fs_quota_off_umount(sbi->sb);
C
Chao Yu 已提交
756
#endif
757
	sbi->sb->s_flags = s_flags; /* Restore SB_RDONLY status */
C
Chao Yu 已提交
758

759
	return ret ? ret: err;
760
}