recovery.c 15.0 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69
	return NULL;
}

70
static int recover_dentry(struct inode *inode, struct page *ipage)
71
{
72
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
74
	struct f2fs_dir_entry *de;
75
	struct qstr name;
76
	struct page *page;
J
Jaegeuk Kim 已提交
77
	struct inode *dir, *einode;
78 79
	int err = 0;

80 81 82 83 84 85
	dir = f2fs_iget(inode->i_sb, pino);
	if (IS_ERR(dir)) {
		err = PTR_ERR(dir);
		goto out;
	}

86 87 88 89 90
	if (file_enc_name(inode)) {
		iput(dir);
		return 0;
	}

91 92
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
93 94 95 96

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
97
		goto out_err;
98
	}
J
Jaegeuk Kim 已提交
99 100
retry:
	de = f2fs_find_entry(dir, &name, &page);
101
	if (de && inode->i_ino == le32_to_cpu(de->ino))
102
		goto out_unmap_put;
103

J
Jaegeuk Kim 已提交
104 105 106 107
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
108 109
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
110
				err = -EEXIST;
111 112
			goto out_unmap_put;
		}
113
		err = acquire_orphan_inode(F2FS_I_SB(inode));
114 115 116
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
117
		}
118
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
119 120
		iput(einode);
		goto retry;
121
	}
122
	err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
123 124 125 126 127 128 129 130 131 132
	if (err)
		goto out_err;

	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
		iput(dir);
	} else {
		add_dirty_dir_inode(dir);
		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
	}

133 134 135
	goto out;

out_unmap_put:
136
	f2fs_dentry_kunmap(dir, page);
137
	f2fs_put_page(page, 0);
138 139
out_err:
	iput(dir);
140
out:
C
Chris Fries 已提交
141 142 143
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
D
Dan Carpenter 已提交
144
			IS_ERR(dir) ? 0 : dir->i_ino, err);
145 146 147
	return err;
}

148
static void recover_inode(struct inode *inode, struct page *page)
149
{
150
	struct f2fs_inode *raw = F2FS_INODE(page);
151
	char *name;
152 153 154 155 156 157 158 159 160

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
161

162 163 164 165 166
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = F2FS_INODE(page)->i_name;

167
	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
168
			ino_of_node(page), name);
169 170
}

171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
static bool is_same_inode(struct inode *inode, struct page *ipage)
{
	struct f2fs_inode *ri = F2FS_INODE(ipage);
	struct timespec disk;

	if (!IS_INODE(ipage))
		return true;

	disk.tv_sec = le64_to_cpu(ri->i_ctime);
	disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
	if (timespec_compare(&inode->i_ctime, &disk) > 0)
		return false;

	disk.tv_sec = le64_to_cpu(ri->i_atime);
	disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
	if (timespec_compare(&inode->i_atime, &disk) > 0)
		return false;

	disk.tv_sec = le64_to_cpu(ri->i_mtime);
	disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
	if (timespec_compare(&inode->i_mtime, &disk) > 0)
		return false;

	return true;
}

197 198
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
199
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
200
	struct curseg_info *curseg;
201
	struct page *page = NULL;
202 203 204 205 206
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
207
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
208

209
	ra_meta_pages(sbi, blkaddr, 1, META_POR, true);
210

211 212 213
	while (1) {
		struct fsync_inode_entry *entry;

214
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
215
			return 0;
216

217
		page = get_tmp_page(sbi, blkaddr);
218

219
		if (cp_ver != cpver_of_node(page))
220
			break;
221 222 223 224 225

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
226 227 228 229
		if (entry) {
			if (!is_same_inode(entry->inode, page))
				goto next;
		} else {
230
			if (IS_INODE(page) && is_dent_dnode(page)) {
231 232
				err = recover_inode_page(sbi, page);
				if (err)
233
					break;
234 235 236
			}

			/* add this fsync inode to the list */
237
			entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
238 239
			if (!entry) {
				err = -ENOMEM;
240
				break;
241
			}
242 243 244 245
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
246 247 248
			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(entry->inode)) {
				err = PTR_ERR(entry->inode);
249
				kmem_cache_free(fsync_entry_slab, entry);
250 251
				if (err == -ENOENT) {
					err = 0;
252
					goto next;
253
				}
254
				break;
255
			}
256
			list_add_tail(&entry->list, head);
257
		}
J
Jaegeuk Kim 已提交
258 259
		entry->blkaddr = blkaddr;

260 261 262 263 264
		if (IS_INODE(page)) {
			entry->last_inode = blkaddr;
			if (is_dent_dnode(page))
				entry->last_dentry = blkaddr;
		}
265 266 267
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
268
		f2fs_put_page(page, 1);
269 270

		ra_meta_pages_cond(sbi, blkaddr);
271
	}
272
	f2fs_put_page(page, 1);
273 274 275
	return err;
}

276
static void destroy_fsync_dnodes(struct list_head *head)
277
{
278 279 280
	struct fsync_inode_entry *entry, *tmp;

	list_for_each_entry_safe(entry, tmp, head, list) {
281 282 283 284 285 286
		iput(entry->inode);
		list_del(&entry->list);
		kmem_cache_free(fsync_entry_slab, entry);
	}
}

287
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
288
			block_t blkaddr, struct dnode_of_data *dn)
289 290 291
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
292
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
293
	struct f2fs_summary_block *sum_node;
294
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
295
	struct page *sum_page, *node_page;
296
	struct dnode_of_data tdn = *dn;
297
	nid_t ino, nid;
298
	struct inode *inode;
299
	unsigned int offset;
300 301 302 303 304
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
305
		return 0;
306 307 308 309 310 311

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
312
			goto got_it;
313 314 315
		}
	}

J
Jaegeuk Kim 已提交
316 317 318 319 320
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
321 322 323 324
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
325 326
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
327
		tdn.node_page = dn->inode_page;
328
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
329
		goto truncate_out;
330
	} else if (dn->nid == nid) {
331
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
332
		goto truncate_out;
333 334
	}

335
	/* Get the node page */
336
	node_page = get_node_page(sbi, nid);
337 338
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
339 340

	offset = ofs_of_node(node_page);
341 342 343
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

344 345 346 347 348 349 350 351
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
352

353
	bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
354
			le16_to_cpu(sum.ofs_in_node);
355

356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
373
		iput(inode);
374 375 376 377 378 379 380 381 382
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
383
	return 0;
384 385
}

386
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
387 388
					struct page *page, block_t blkaddr)
{
389
	struct f2fs_inode_info *fi = F2FS_I(inode);
390 391 392
	unsigned int start, end;
	struct dnode_of_data dn;
	struct node_info ni;
393
	int err = 0, recovered = 0;
394

395 396 397 398
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
399 400 401 402
		/*
		 * Deprecated; xattr blocks should be found from cold log.
		 * But, we should remain this for backward compatibility.
		 */
403
		recover_xattr_data(inode, page, blkaddr);
404
		goto out;
405
	}
406

407 408
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
409 410
		goto out;

411
	/* step 3: recover data indices */
412
	start = start_bidx_of_node(ofs_of_node(page), fi);
413
	end = start + ADDRS_PER_PAGE(page, fi);
414 415

	set_new_dnode(&dn, inode, NULL, NULL, 0);
416

417
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
418
	if (err)
419
		goto out;
420

421
	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
422 423

	get_node_info(sbi, dn.nid, &ni);
424 425
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
426

427
	for (; start < end; start++, dn.ofs_in_node++) {
428 429 430 431 432
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455
		/* skip recovering if dest is the same as src */
		if (src == dest)
			continue;

		/* dest is invalid, just invalidate src block */
		if (dest == NULL_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			continue;
		}

		/*
		 * dest is reserved block, invalidate src block
		 * and then reserve one new block in dnode page.
		 */
		if (dest == NEW_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			err = reserve_new_block(&dn);
			f2fs_bug_on(sbi, err);
			continue;
		}

		/* dest is valid block, try to recover from src to dest */
		if (is_valid_blkaddr(sbi, dest, META_POR)) {
456

457
			if (src == NULL_ADDR) {
458
				err = reserve_new_block(&dn);
459
				/* We should not get -ENOSPC */
460
				f2fs_bug_on(sbi, err);
461 462 463
			}

			/* Check the previous node page having this index */
464 465 466
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
467 468

			/* write dummy data page */
469 470
			f2fs_replace_block(sbi, &dn, src, dest,
							ni.version, false);
471
			recovered++;
472 473 474 475 476 477 478 479 480 481
		}
	}

	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
482
err:
483
	f2fs_put_dnode(&dn);
484
out:
C
Chris Fries 已提交
485 486 487
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
488
	return err;
489 490
}

C
Chao Yu 已提交
491
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
492
{
493
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
494
	struct curseg_info *curseg;
495
	struct page *page = NULL;
496
	int err = 0;
497 498 499
	block_t blkaddr;

	/* get node pages in the current segment */
C
Chao Yu 已提交
500
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
501 502 503 504 505
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

506
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
507
			break;
508

509 510
		ra_meta_pages_cond(sbi, blkaddr);

511
		page = get_tmp_page(sbi, blkaddr);
512

513 514
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
515
			break;
516
		}
517 518 519 520

		entry = get_fsync_inode(head, ino_of_node(page));
		if (!entry)
			goto next;
521 522 523
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
524
		 * So, call recover_inode for the inode update.
525
		 */
526 527 528 529 530 531 532 533 534
		if (entry->last_inode == blkaddr)
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
			err = recover_dentry(entry->inode, page);
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
535
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
536 537
		if (err) {
			f2fs_put_page(page, 1);
538
			break;
539
		}
540 541 542 543 544 545 546 547 548

		if (entry->blkaddr == blkaddr) {
			iput(entry->inode);
			list_del(&entry->list);
			kmem_cache_free(fsync_entry_slab, entry);
		}
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
549
		f2fs_put_page(page, 1);
550
	}
551 552 553
	if (!err)
		allocate_new_segments(sbi);
	return err;
554 555
}

556
int recover_fsync_data(struct f2fs_sb_info *sbi)
557
{
558
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
559
	struct list_head inode_list;
560
	block_t blkaddr;
561
	int err;
H
Haicheng Li 已提交
562
	bool need_writecp = false;
563 564

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
565
			sizeof(struct fsync_inode_entry));
566
	if (!fsync_entry_slab)
567
		return -ENOMEM;
568 569 570

	INIT_LIST_HEAD(&inode_list);

571 572 573
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

574 575
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

576
	/* step #1: find fsynced inode numbers */
577 578
	err = find_fsync_dnodes(sbi, &inode_list);
	if (err)
579 580 581 582 583
		goto out;

	if (list_empty(&inode_list))
		goto out;

H
Haicheng Li 已提交
584
	need_writecp = true;
585

586
	/* step #2: recover data */
C
Chao Yu 已提交
587
	err = recover_data(sbi, &inode_list);
588
	if (!err)
589
		f2fs_bug_on(sbi, !list_empty(&inode_list));
590
out:
591
	destroy_fsync_dnodes(&inode_list);
592
	kmem_cache_destroy(fsync_entry_slab);
593

594 595
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
C
Chao Yu 已提交
596
			(loff_t)MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
597

598 599 600 601 602
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

603
	clear_sbi_flag(sbi, SBI_POR_DOING);
604
	if (err) {
C
Chao Yu 已提交
605 606 607 608
		bool invalidate = false;

		if (discard_next_dnode(sbi, blkaddr))
			invalidate = true;
609 610 611 612

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
C
Chao Yu 已提交
613 614 615 616 617 618

		/* invalidate temporary meta page */
		if (invalidate)
			invalidate_mapping_pages(META_MAPPING(sbi),
							blkaddr, blkaddr);

619 620
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
621
	} else if (need_writecp) {
622
		struct cp_control cpc = {
623
			.reason = CP_RECOVERY,
624
		};
625
		mutex_unlock(&sbi->cp_mutex);
C
Chao Yu 已提交
626
		err = write_checkpoint(sbi, &cpc);
627 628
	} else {
		mutex_unlock(&sbi->cp_mutex);
629
	}
630
	return err;
631
}