recovery.c 15.0 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69
	return NULL;
}

70
static int recover_dentry(struct inode *inode, struct page *ipage)
71
{
72
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
74
	struct f2fs_dir_entry *de;
75
	struct qstr name;
76
	struct page *page;
J
Jaegeuk Kim 已提交
77
	struct inode *dir, *einode;
78 79
	int err = 0;

80 81 82 83 84 85
	dir = f2fs_iget(inode->i_sb, pino);
	if (IS_ERR(dir)) {
		err = PTR_ERR(dir);
		goto out;
	}

86 87 88 89 90
	if (file_enc_name(inode)) {
		iput(dir);
		return 0;
	}

91 92
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
93 94 95 96

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
97
		goto out_err;
98
	}
J
Jaegeuk Kim 已提交
99 100
retry:
	de = f2fs_find_entry(dir, &name, &page);
101
	if (de && inode->i_ino == le32_to_cpu(de->ino))
102
		goto out_unmap_put;
103

J
Jaegeuk Kim 已提交
104 105 106 107
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
108 109
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
110
				err = -EEXIST;
111 112
			goto out_unmap_put;
		}
113
		err = acquire_orphan_inode(F2FS_I_SB(inode));
114 115 116
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
117
		}
118
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
119 120
		iput(einode);
		goto retry;
121
	}
122
	err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
123 124 125 126 127 128 129 130 131 132
	if (err)
		goto out_err;

	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
		iput(dir);
	} else {
		add_dirty_dir_inode(dir);
		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
	}

133 134 135
	goto out;

out_unmap_put:
136
	f2fs_dentry_kunmap(dir, page);
137
	f2fs_put_page(page, 0);
138 139
out_err:
	iput(dir);
140
out:
C
Chris Fries 已提交
141 142 143
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
D
Dan Carpenter 已提交
144
			IS_ERR(dir) ? 0 : dir->i_ino, err);
145 146 147
	return err;
}

148
static void recover_inode(struct inode *inode, struct page *page)
149
{
150
	struct f2fs_inode *raw = F2FS_INODE(page);
151
	char *name;
152 153 154 155 156 157 158 159 160

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
161

162 163 164 165 166
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = F2FS_INODE(page)->i_name;

167
	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
168
			ino_of_node(page), name);
169 170
}

171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
static bool is_same_inode(struct inode *inode, struct page *ipage)
{
	struct f2fs_inode *ri = F2FS_INODE(ipage);
	struct timespec disk;

	if (!IS_INODE(ipage))
		return true;

	disk.tv_sec = le64_to_cpu(ri->i_ctime);
	disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
	if (timespec_compare(&inode->i_ctime, &disk) > 0)
		return false;

	disk.tv_sec = le64_to_cpu(ri->i_atime);
	disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
	if (timespec_compare(&inode->i_atime, &disk) > 0)
		return false;

	disk.tv_sec = le64_to_cpu(ri->i_mtime);
	disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
	if (timespec_compare(&inode->i_mtime, &disk) > 0)
		return false;

	return true;
}

197 198
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
199
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
200
	struct curseg_info *curseg;
201
	struct page *page = NULL;
202 203 204 205 206
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
207
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
208

209
	ra_meta_pages(sbi, blkaddr, 1, META_POR, true);
210

211 212 213
	while (1) {
		struct fsync_inode_entry *entry;

214
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
215
			return 0;
216

217
		page = get_tmp_page(sbi, blkaddr);
218

219
		if (cp_ver != cpver_of_node(page))
220
			break;
221 222 223 224 225

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
226 227 228 229
		if (entry) {
			if (!is_same_inode(entry->inode, page))
				goto next;
		} else {
230
			if (IS_INODE(page) && is_dent_dnode(page)) {
231 232
				err = recover_inode_page(sbi, page);
				if (err)
233
					break;
234 235 236
			}

			/* add this fsync inode to the list */
237
			entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
238 239
			if (!entry) {
				err = -ENOMEM;
240
				break;
241
			}
242 243 244 245
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
246 247 248
			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(entry->inode)) {
				err = PTR_ERR(entry->inode);
249
				kmem_cache_free(fsync_entry_slab, entry);
250 251
				if (err == -ENOENT) {
					err = 0;
252
					goto next;
253
				}
254
				break;
255
			}
256
			list_add_tail(&entry->list, head);
257
		}
J
Jaegeuk Kim 已提交
258 259
		entry->blkaddr = blkaddr;

260 261 262 263 264
		if (IS_INODE(page)) {
			entry->last_inode = blkaddr;
			if (is_dent_dnode(page))
				entry->last_dentry = blkaddr;
		}
265 266 267
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
268
		f2fs_put_page(page, 1);
269 270

		ra_meta_pages_cond(sbi, blkaddr);
271
	}
272
	f2fs_put_page(page, 1);
273 274 275
	return err;
}

276
static void destroy_fsync_dnodes(struct list_head *head)
277
{
278 279 280
	struct fsync_inode_entry *entry, *tmp;

	list_for_each_entry_safe(entry, tmp, head, list) {
281 282 283 284 285 286
		iput(entry->inode);
		list_del(&entry->list);
		kmem_cache_free(fsync_entry_slab, entry);
	}
}

287
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
288
			block_t blkaddr, struct dnode_of_data *dn)
289 290 291
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
292
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
293
	struct f2fs_summary_block *sum_node;
294
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
295
	struct page *sum_page, *node_page;
296
	struct dnode_of_data tdn = *dn;
297
	nid_t ino, nid;
298
	struct inode *inode;
299
	unsigned int offset;
300 301 302 303 304
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
305
		return 0;
306 307 308 309 310 311

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
312
			goto got_it;
313 314 315
		}
	}

J
Jaegeuk Kim 已提交
316 317 318 319 320
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
321 322 323 324
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
325 326
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
327
		tdn.node_page = dn->inode_page;
328
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
329
		goto truncate_out;
330
	} else if (dn->nid == nid) {
331
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
332
		goto truncate_out;
333 334
	}

335
	/* Get the node page */
336
	node_page = get_node_page(sbi, nid);
337 338
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
339 340

	offset = ofs_of_node(node_page);
341 342 343
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

344 345 346 347 348 349 350 351
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
352

353
	bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
354

355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
372
		iput(inode);
373 374 375 376 377 378 379 380 381
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
382
	return 0;
383 384
}

385
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
386 387 388 389
					struct page *page, block_t blkaddr)
{
	struct dnode_of_data dn;
	struct node_info ni;
390
	unsigned int start, end;
391
	int err = 0, recovered = 0;
392

393 394 395 396
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
397 398 399 400
		/*
		 * Deprecated; xattr blocks should be found from cold log.
		 * But, we should remain this for backward compatibility.
		 */
401
		recover_xattr_data(inode, page, blkaddr);
402
		goto out;
403
	}
404

405 406
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
407 408
		goto out;

409
	/* step 3: recover data indices */
410 411
	start = start_bidx_of_node(ofs_of_node(page), inode);
	end = start + ADDRS_PER_PAGE(page, inode);
412 413

	set_new_dnode(&dn, inode, NULL, NULL, 0);
414

415
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
416
	if (err)
417
		goto out;
418

419
	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
420 421

	get_node_info(sbi, dn.nid, &ni);
422 423
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
424

425
	for (; start < end; start++, dn.ofs_in_node++) {
426 427 428 429 430
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
		/* skip recovering if dest is the same as src */
		if (src == dest)
			continue;

		/* dest is invalid, just invalidate src block */
		if (dest == NULL_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			continue;
		}

		/*
		 * dest is reserved block, invalidate src block
		 * and then reserve one new block in dnode page.
		 */
		if (dest == NEW_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			err = reserve_new_block(&dn);
			f2fs_bug_on(sbi, err);
			continue;
		}

		/* dest is valid block, try to recover from src to dest */
		if (is_valid_blkaddr(sbi, dest, META_POR)) {
454

455
			if (src == NULL_ADDR) {
456
				err = reserve_new_block(&dn);
457
				/* We should not get -ENOSPC */
458
				f2fs_bug_on(sbi, err);
459 460 461
			}

			/* Check the previous node page having this index */
462 463 464
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
465 466

			/* write dummy data page */
467
			f2fs_replace_block(sbi, &dn, src, dest,
468
						ni.version, false, false);
469
			recovered++;
470 471 472 473 474 475 476 477 478 479
		}
	}

	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
480
err:
481
	f2fs_put_dnode(&dn);
482
out:
C
Chris Fries 已提交
483 484 485
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
486
	return err;
487 488
}

C
Chao Yu 已提交
489
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *head)
490
{
491
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
492
	struct curseg_info *curseg;
493
	struct page *page = NULL;
494
	int err = 0;
495 496 497
	block_t blkaddr;

	/* get node pages in the current segment */
C
Chao Yu 已提交
498
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
499 500 501 502 503
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

504
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
505
			break;
506

507 508
		ra_meta_pages_cond(sbi, blkaddr);

509
		page = get_tmp_page(sbi, blkaddr);
510

511 512
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
513
			break;
514
		}
515 516 517 518

		entry = get_fsync_inode(head, ino_of_node(page));
		if (!entry)
			goto next;
519 520 521
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
522
		 * So, call recover_inode for the inode update.
523
		 */
524 525 526 527 528 529 530 531 532
		if (entry->last_inode == blkaddr)
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
			err = recover_dentry(entry->inode, page);
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
533
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
534 535
		if (err) {
			f2fs_put_page(page, 1);
536
			break;
537
		}
538 539 540 541 542 543 544 545 546

		if (entry->blkaddr == blkaddr) {
			iput(entry->inode);
			list_del(&entry->list);
			kmem_cache_free(fsync_entry_slab, entry);
		}
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
547
		f2fs_put_page(page, 1);
548
	}
549 550 551
	if (!err)
		allocate_new_segments(sbi);
	return err;
552 553
}

554
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
555
{
556
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
557
	struct list_head inode_list;
558
	block_t blkaddr;
559
	int err;
560
	int ret = 0;
H
Haicheng Li 已提交
561
	bool need_writecp = false;
562 563

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
564
			sizeof(struct fsync_inode_entry));
565
	if (!fsync_entry_slab)
566
		return -ENOMEM;
567 568 569

	INIT_LIST_HEAD(&inode_list);

570 571 572
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

573 574
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

575
	/* step #1: find fsynced inode numbers */
576
	err = find_fsync_dnodes(sbi, &inode_list);
577
	if (err || list_empty(&inode_list))
578 579
		goto out;

580 581
	if (check_only) {
		ret = 1;
582
		goto out;
583
	}
584

H
Haicheng Li 已提交
585
	need_writecp = true;
586

587
	/* step #2: recover data */
C
Chao Yu 已提交
588
	err = recover_data(sbi, &inode_list);
589
	if (!err)
590
		f2fs_bug_on(sbi, !list_empty(&inode_list));
591
out:
592
	destroy_fsync_dnodes(&inode_list);
593
	kmem_cache_destroy(fsync_entry_slab);
594

595 596
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
597
			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
598

599 600 601 602 603
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

604
	clear_sbi_flag(sbi, SBI_POR_DOING);
605
	if (err) {
C
Chao Yu 已提交
606 607 608 609
		bool invalidate = false;

		if (discard_next_dnode(sbi, blkaddr))
			invalidate = true;
610 611 612 613

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
C
Chao Yu 已提交
614 615 616 617 618 619

		/* invalidate temporary meta page */
		if (invalidate)
			invalidate_mapping_pages(META_MAPPING(sbi),
							blkaddr, blkaddr);

620 621
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
622
	} else if (need_writecp) {
623
		struct cp_control cpc = {
624
			.reason = CP_RECOVERY,
625
		};
626
		mutex_unlock(&sbi->cp_mutex);
C
Chao Yu 已提交
627
		err = write_checkpoint(sbi, &cpc);
628 629
	} else {
		mutex_unlock(&sbi->cp_mutex);
630
	}
631
	return ret ? ret: err;
632
}