recovery.c 14.9 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69
	return NULL;
}

70
static int recover_dentry(struct inode *inode, struct page *ipage)
71
{
72
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
74
	struct f2fs_dir_entry *de;
75
	struct qstr name;
76
	struct page *page;
J
Jaegeuk Kim 已提交
77
	struct inode *dir, *einode;
78 79
	int err = 0;

80 81 82 83 84 85
	dir = f2fs_iget(inode->i_sb, pino);
	if (IS_ERR(dir)) {
		err = PTR_ERR(dir);
		goto out;
	}

86 87 88 89 90
	if (file_enc_name(inode)) {
		iput(dir);
		return 0;
	}

91 92
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
93 94 95 96

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
97
		goto out_err;
98
	}
J
Jaegeuk Kim 已提交
99 100
retry:
	de = f2fs_find_entry(dir, &name, &page);
101
	if (de && inode->i_ino == le32_to_cpu(de->ino))
102
		goto out_unmap_put;
103

J
Jaegeuk Kim 已提交
104 105 106 107
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
108 109
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
110
				err = -EEXIST;
111 112
			goto out_unmap_put;
		}
113
		err = acquire_orphan_inode(F2FS_I_SB(inode));
114 115 116
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
117
		}
118
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
119 120
		iput(einode);
		goto retry;
121
	}
122
	err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
123 124 125 126 127 128 129 130 131 132
	if (err)
		goto out_err;

	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
		iput(dir);
	} else {
		add_dirty_dir_inode(dir);
		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
	}

133 134 135
	goto out;

out_unmap_put:
136
	f2fs_dentry_kunmap(dir, page);
137
	f2fs_put_page(page, 0);
138 139
out_err:
	iput(dir);
140
out:
C
Chris Fries 已提交
141 142 143
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
D
Dan Carpenter 已提交
144
			IS_ERR(dir) ? 0 : dir->i_ino, err);
145 146 147
	return err;
}

148
static void recover_inode(struct inode *inode, struct page *page)
149
{
150
	struct f2fs_inode *raw = F2FS_INODE(page);
151
	char *name;
152 153 154 155 156 157 158 159 160

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
161

162 163 164 165 166
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = F2FS_INODE(page)->i_name;

167
	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
168
			ino_of_node(page), name);
169 170
}

171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
static bool is_same_inode(struct inode *inode, struct page *ipage)
{
	struct f2fs_inode *ri = F2FS_INODE(ipage);
	struct timespec disk;

	if (!IS_INODE(ipage))
		return true;

	disk.tv_sec = le64_to_cpu(ri->i_ctime);
	disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
	if (timespec_compare(&inode->i_ctime, &disk) > 0)
		return false;

	disk.tv_sec = le64_to_cpu(ri->i_atime);
	disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
	if (timespec_compare(&inode->i_atime, &disk) > 0)
		return false;

	disk.tv_sec = le64_to_cpu(ri->i_mtime);
	disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
	if (timespec_compare(&inode->i_mtime, &disk) > 0)
		return false;

	return true;
}

197 198
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
199
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
200
	struct curseg_info *curseg;
201
	struct page *page = NULL;
202 203 204 205 206
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
207
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
208

209
	ra_meta_pages(sbi, blkaddr, 1, META_POR, true);
210

211 212 213
	while (1) {
		struct fsync_inode_entry *entry;

214
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
215
			return 0;
216

217
		page = get_tmp_page(sbi, blkaddr);
218

219
		if (cp_ver != cpver_of_node(page))
220
			break;
221 222 223 224 225

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
226 227 228 229
		if (entry) {
			if (!is_same_inode(entry->inode, page))
				goto next;
		} else {
230
			if (IS_INODE(page) && is_dent_dnode(page)) {
231 232
				err = recover_inode_page(sbi, page);
				if (err)
233
					break;
234 235 236
			}

			/* add this fsync inode to the list */
237
			entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
238 239
			if (!entry) {
				err = -ENOMEM;
240
				break;
241
			}
242 243 244 245
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
246 247 248
			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(entry->inode)) {
				err = PTR_ERR(entry->inode);
249
				kmem_cache_free(fsync_entry_slab, entry);
250 251
				if (err == -ENOENT) {
					err = 0;
252
					goto next;
253
				}
254
				break;
255
			}
256
			list_add_tail(&entry->list, head);
257
		}
J
Jaegeuk Kim 已提交
258 259
		entry->blkaddr = blkaddr;

260 261
		if (IS_INODE(page) && is_dent_dnode(page))
			entry->last_dentry = blkaddr;
262 263 264
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
265
		f2fs_put_page(page, 1);
266 267

		ra_meta_pages_cond(sbi, blkaddr);
268
	}
269
	f2fs_put_page(page, 1);
270 271 272
	return err;
}

273
static void destroy_fsync_dnodes(struct list_head *head)
274
{
275 276 277
	struct fsync_inode_entry *entry, *tmp;

	list_for_each_entry_safe(entry, tmp, head, list) {
278 279 280 281 282 283
		iput(entry->inode);
		list_del(&entry->list);
		kmem_cache_free(fsync_entry_slab, entry);
	}
}

284
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
285
			block_t blkaddr, struct dnode_of_data *dn)
286 287 288
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
289
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
290
	struct f2fs_summary_block *sum_node;
291
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
292
	struct page *sum_page, *node_page;
293
	struct dnode_of_data tdn = *dn;
294
	nid_t ino, nid;
295
	struct inode *inode;
296
	unsigned int offset;
297 298 299 300 301
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
302
		return 0;
303 304 305 306 307 308

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
309
			goto got_it;
310 311 312
		}
	}

J
Jaegeuk Kim 已提交
313 314 315 316 317
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
318 319 320 321
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
322 323
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
324
		tdn.node_page = dn->inode_page;
325
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
326
		goto truncate_out;
327
	} else if (dn->nid == nid) {
328
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
329
		goto truncate_out;
330 331
	}

332
	/* Get the node page */
333
	node_page = get_node_page(sbi, nid);
334 335
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
336 337

	offset = ofs_of_node(node_page);
338 339 340
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

341 342 343 344 345 346 347 348
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
349

350
	bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
351

352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
369
		iput(inode);
370 371 372 373 374 375 376 377 378
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
379
	return 0;
380 381
}

382
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
383 384 385 386
					struct page *page, block_t blkaddr)
{
	struct dnode_of_data dn;
	struct node_info ni;
387
	unsigned int start, end;
388
	int err = 0, recovered = 0;
389

390 391 392 393
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
394 395 396 397
		/*
		 * Deprecated; xattr blocks should be found from cold log.
		 * But, we should remain this for backward compatibility.
		 */
398
		recover_xattr_data(inode, page, blkaddr);
399
		goto out;
400
	}
401

402 403
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
404 405
		goto out;

406
	/* step 3: recover data indices */
407 408
	start = start_bidx_of_node(ofs_of_node(page), inode);
	end = start + ADDRS_PER_PAGE(page, inode);
409 410

	set_new_dnode(&dn, inode, NULL, NULL, 0);
411

412
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
413
	if (err)
414
		goto out;
415

416
	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
417 418

	get_node_info(sbi, dn.nid, &ni);
419 420
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
421

422
	for (; start < end; start++, dn.ofs_in_node++) {
423 424 425 426 427
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450
		/* skip recovering if dest is the same as src */
		if (src == dest)
			continue;

		/* dest is invalid, just invalidate src block */
		if (dest == NULL_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			continue;
		}

		/*
		 * dest is reserved block, invalidate src block
		 * and then reserve one new block in dnode page.
		 */
		if (dest == NEW_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			err = reserve_new_block(&dn);
			f2fs_bug_on(sbi, err);
			continue;
		}

		/* dest is valid block, try to recover from src to dest */
		if (is_valid_blkaddr(sbi, dest, META_POR)) {
451

452
			if (src == NULL_ADDR) {
453
				err = reserve_new_block(&dn);
454
				/* We should not get -ENOSPC */
455
				f2fs_bug_on(sbi, err);
456 457 458
			}

			/* Check the previous node page having this index */
459 460 461
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
462 463

			/* write dummy data page */
464
			f2fs_replace_block(sbi, &dn, src, dest,
465
						ni.version, false, false);
466
			recovered++;
467 468 469 470 471 472 473 474 475 476
		}
	}

	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
477
err:
478
	f2fs_put_dnode(&dn);
479
out:
C
Chris Fries 已提交
480 481 482
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
483
	return err;
484 485
}

C
Chao Yu 已提交
486
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
487
{
488
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
489
	struct curseg_info *curseg;
490
	struct page *page = NULL;
491
	int err = 0;
492 493 494
	block_t blkaddr;

	/* get node pages in the current segment */
C
Chao Yu 已提交
495
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
496 497 498 499 500
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

501
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
502
			break;
503

504 505
		ra_meta_pages_cond(sbi, blkaddr);

506
		page = get_tmp_page(sbi, blkaddr);
507

508 509
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
510
			break;
511
		}
512 513 514 515

		entry = get_fsync_inode(head, ino_of_node(page));
		if (!entry)
			goto next;
516 517 518
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
519
		 * So, call recover_inode for the inode update.
520
		 */
521
		if (IS_INODE(page))
522 523 524 525 526 527 528 529
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
			err = recover_dentry(entry->inode, page);
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
530
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
531 532
		if (err) {
			f2fs_put_page(page, 1);
533
			break;
534
		}
535 536 537 538 539 540 541 542 543

		if (entry->blkaddr == blkaddr) {
			iput(entry->inode);
			list_del(&entry->list);
			kmem_cache_free(fsync_entry_slab, entry);
		}
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
544
		f2fs_put_page(page, 1);
545
	}
546 547 548
	if (!err)
		allocate_new_segments(sbi);
	return err;
549 550
}

551
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
552
{
553
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
554
	struct list_head inode_list;
555
	block_t blkaddr;
556
	int err;
557
	int ret = 0;
H
Haicheng Li 已提交
558
	bool need_writecp = false;
559 560

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
561
			sizeof(struct fsync_inode_entry));
562
	if (!fsync_entry_slab)
563
		return -ENOMEM;
564 565 566

	INIT_LIST_HEAD(&inode_list);

567 568 569
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

570 571
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

572
	/* step #1: find fsynced inode numbers */
573
	err = find_fsync_dnodes(sbi, &inode_list);
574
	if (err || list_empty(&inode_list))
575 576
		goto out;

577 578
	if (check_only) {
		ret = 1;
579
		goto out;
580
	}
581

H
Haicheng Li 已提交
582
	need_writecp = true;
583

584
	/* step #2: recover data */
C
Chao Yu 已提交
585
	err = recover_data(sbi, &inode_list);
586
	if (!err)
587
		f2fs_bug_on(sbi, !list_empty(&inode_list));
588
out:
589
	destroy_fsync_dnodes(&inode_list);
590
	kmem_cache_destroy(fsync_entry_slab);
591

592 593
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
594
			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
595

596 597 598 599 600
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

601
	clear_sbi_flag(sbi, SBI_POR_DOING);
602
	if (err) {
C
Chao Yu 已提交
603 604 605 606
		bool invalidate = false;

		if (discard_next_dnode(sbi, blkaddr))
			invalidate = true;
607 608 609 610

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
C
Chao Yu 已提交
611 612 613 614 615 616

		/* invalidate temporary meta page */
		if (invalidate)
			invalidate_mapping_pages(META_MAPPING(sbi),
							blkaddr, blkaddr);

617 618
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
619
	} else if (need_writecp) {
620
		struct cp_control cpc = {
621
			.reason = CP_RECOVERY,
622
		};
623
		mutex_unlock(&sbi->cp_mutex);
C
Chao Yu 已提交
624
		err = write_checkpoint(sbi, &cpc);
625 626
	} else {
		mutex_unlock(&sbi->cp_mutex);
627
	}
628
	return ret ? ret: err;
629
}