recovery.c 15.3 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69
	return NULL;
}

70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91
static struct fsync_inode_entry *add_fsync_inode(struct list_head *head,
							struct inode *inode)
{
	struct fsync_inode_entry *entry;

	entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
	if (!entry)
		return NULL;

	entry->inode = inode;
	list_add_tail(&entry->list, head);

	return entry;
}

static void del_fsync_inode(struct fsync_inode_entry *entry)
{
	iput(entry->inode);
	list_del(&entry->list);
	kmem_cache_free(fsync_entry_slab, entry);
}

C
Chao Yu 已提交
92 93
static int recover_dentry(struct inode *inode, struct page *ipage,
						struct list_head *dir_list)
94
{
95
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
96
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
97
	struct f2fs_dir_entry *de;
98
	struct qstr name;
99
	struct page *page;
J
Jaegeuk Kim 已提交
100
	struct inode *dir, *einode;
C
Chao Yu 已提交
101
	struct fsync_inode_entry *entry;
102 103
	int err = 0;

C
Chao Yu 已提交
104 105 106 107 108 109 110 111 112 113 114 115 116 117
	entry = get_fsync_inode(dir_list, pino);
	if (!entry) {
		dir = f2fs_iget(inode->i_sb, pino);
		if (IS_ERR(dir)) {
			err = PTR_ERR(dir);
			goto out;
		}

		entry = add_fsync_inode(dir_list, dir);
		if (!entry) {
			err = -ENOMEM;
			iput(dir);
			goto out;
		}
118 119
	}

C
Chao Yu 已提交
120 121 122
	dir = entry->inode;

	if (file_enc_name(inode))
123 124
		return 0;

125 126
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
127 128 129 130

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
C
Chao Yu 已提交
131
		goto out;
132
	}
J
Jaegeuk Kim 已提交
133 134
retry:
	de = f2fs_find_entry(dir, &name, &page);
135
	if (de && inode->i_ino == le32_to_cpu(de->ino))
136
		goto out_unmap_put;
137

J
Jaegeuk Kim 已提交
138 139 140 141
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
142 143
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
144
				err = -EEXIST;
145 146
			goto out_unmap_put;
		}
147
		err = acquire_orphan_inode(F2FS_I_SB(inode));
148 149 150
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
151
		}
152
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
153 154
		iput(einode);
		goto retry;
155
	}
156
	err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
157

158 159 160
	goto out;

out_unmap_put:
161
	f2fs_dentry_kunmap(dir, page);
162
	f2fs_put_page(page, 0);
163
out:
C
Chris Fries 已提交
164 165 166
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
D
Dan Carpenter 已提交
167
			IS_ERR(dir) ? 0 : dir->i_ino, err);
168 169 170
	return err;
}

171
static void recover_inode(struct inode *inode, struct page *page)
172
{
173
	struct f2fs_inode *raw = F2FS_INODE(page);
174
	char *name;
175 176 177 178 179 180 181 182 183

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
184

185 186 187 188 189
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = F2FS_INODE(page)->i_name;

190
	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
191
			ino_of_node(page), name);
192 193
}

194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
static bool is_same_inode(struct inode *inode, struct page *ipage)
{
	struct f2fs_inode *ri = F2FS_INODE(ipage);
	struct timespec disk;

	if (!IS_INODE(ipage))
		return true;

	disk.tv_sec = le64_to_cpu(ri->i_ctime);
	disk.tv_nsec = le32_to_cpu(ri->i_ctime_nsec);
	if (timespec_compare(&inode->i_ctime, &disk) > 0)
		return false;

	disk.tv_sec = le64_to_cpu(ri->i_atime);
	disk.tv_nsec = le32_to_cpu(ri->i_atime_nsec);
	if (timespec_compare(&inode->i_atime, &disk) > 0)
		return false;

	disk.tv_sec = le64_to_cpu(ri->i_mtime);
	disk.tv_nsec = le32_to_cpu(ri->i_mtime_nsec);
	if (timespec_compare(&inode->i_mtime, &disk) > 0)
		return false;

	return true;
}

220 221
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
222
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
223
	struct curseg_info *curseg;
224
	struct inode *inode;
225
	struct page *page = NULL;
226 227 228 229 230
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
231
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
232 233 234 235

	while (1) {
		struct fsync_inode_entry *entry;

236
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
237
			return 0;
238

239
		page = get_tmp_page(sbi, blkaddr);
240

241
		if (cp_ver != cpver_of_node(page))
242
			break;
243 244 245 246 247

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
248 249 250 251
		if (entry) {
			if (!is_same_inode(entry->inode, page))
				goto next;
		} else {
252
			if (IS_INODE(page) && is_dent_dnode(page)) {
253 254
				err = recover_inode_page(sbi, page);
				if (err)
255
					break;
256 257
			}

258 259 260 261
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
262 263 264
			inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(inode)) {
				err = PTR_ERR(inode);
265 266
				if (err == -ENOENT) {
					err = 0;
267
					goto next;
268
				}
269
				break;
270
			}
271 272 273 274 275 276 277 278

			/* add this fsync inode to the list */
			entry = add_fsync_inode(head, inode);
			if (!entry) {
				err = -ENOMEM;
				iput(inode);
				break;
			}
279
		}
J
Jaegeuk Kim 已提交
280 281
		entry->blkaddr = blkaddr;

282 283
		if (IS_INODE(page) && is_dent_dnode(page))
			entry->last_dentry = blkaddr;
284 285 286
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
287
		f2fs_put_page(page, 1);
288 289

		ra_meta_pages_cond(sbi, blkaddr);
290
	}
291
	f2fs_put_page(page, 1);
292 293 294
	return err;
}

295
static void destroy_fsync_dnodes(struct list_head *head)
296
{
297 298
	struct fsync_inode_entry *entry, *tmp;

299 300
	list_for_each_entry_safe(entry, tmp, head, list)
		del_fsync_inode(entry);
301 302
}

303
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
304
			block_t blkaddr, struct dnode_of_data *dn)
305 306 307
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
308
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
309
	struct f2fs_summary_block *sum_node;
310
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
311
	struct page *sum_page, *node_page;
312
	struct dnode_of_data tdn = *dn;
313
	nid_t ino, nid;
314
	struct inode *inode;
315
	unsigned int offset;
316 317 318 319 320
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
321
		return 0;
322 323 324 325 326 327

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
328
			goto got_it;
329 330 331
		}
	}

J
Jaegeuk Kim 已提交
332 333 334 335 336
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
337 338 339 340
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
341 342
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
343
		tdn.node_page = dn->inode_page;
344
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
345
		goto truncate_out;
346
	} else if (dn->nid == nid) {
347
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
348
		goto truncate_out;
349 350
	}

351
	/* Get the node page */
352
	node_page = get_node_page(sbi, nid);
353 354
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
355 356

	offset = ofs_of_node(node_page);
357 358 359
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

360 361 362 363 364 365 366 367
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
368

369
	bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
370

371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
388
		iput(inode);
389 390 391 392 393 394 395 396 397
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
398
	return 0;
399 400
}

401
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
402 403 404 405
					struct page *page, block_t blkaddr)
{
	struct dnode_of_data dn;
	struct node_info ni;
406
	unsigned int start, end;
407
	int err = 0, recovered = 0;
408

409 410 411 412
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
413 414 415 416
		/*
		 * Deprecated; xattr blocks should be found from cold log.
		 * But, we should remain this for backward compatibility.
		 */
417
		recover_xattr_data(inode, page, blkaddr);
418
		goto out;
419
	}
420

421 422
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
423 424
		goto out;

425
	/* step 3: recover data indices */
426 427
	start = start_bidx_of_node(ofs_of_node(page), inode);
	end = start + ADDRS_PER_PAGE(page, inode);
428 429

	set_new_dnode(&dn, inode, NULL, NULL, 0);
430

431
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
432
	if (err)
433
		goto out;
434

435
	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
436 437

	get_node_info(sbi, dn.nid, &ni);
438 439
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
440

441
	for (; start < end; start++, dn.ofs_in_node++) {
442 443 444 445 446
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462
		/* skip recovering if dest is the same as src */
		if (src == dest)
			continue;

		/* dest is invalid, just invalidate src block */
		if (dest == NULL_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			continue;
		}

		/*
		 * dest is reserved block, invalidate src block
		 * and then reserve one new block in dnode page.
		 */
		if (dest == NEW_ADDR) {
			truncate_data_blocks_range(&dn, 1);
463
			reserve_new_block(&dn);
464 465 466 467 468
			continue;
		}

		/* dest is valid block, try to recover from src to dest */
		if (is_valid_blkaddr(sbi, dest, META_POR)) {
469

470
			if (src == NULL_ADDR) {
471
				err = reserve_new_block(&dn);
472
				/* We should not get -ENOSPC */
473
				f2fs_bug_on(sbi, err);
474 475 476
			}

			/* Check the previous node page having this index */
477 478 479
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
480 481

			/* write dummy data page */
482
			f2fs_replace_block(sbi, &dn, src, dest,
483
						ni.version, false, false);
484
			recovered++;
485 486 487 488 489 490 491 492 493 494
		}
	}

	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
495
err:
496
	f2fs_put_dnode(&dn);
497
out:
C
Chris Fries 已提交
498 499 500
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
501
	return err;
502 503
}

C
Chao Yu 已提交
504 505
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
						struct list_head *dir_list)
506
{
507
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
508
	struct curseg_info *curseg;
509
	struct page *page = NULL;
510
	int err = 0;
511 512 513
	block_t blkaddr;

	/* get node pages in the current segment */
C
Chao Yu 已提交
514
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
515 516 517 518 519
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

520
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
521
			break;
522

523 524
		ra_meta_pages_cond(sbi, blkaddr);

525
		page = get_tmp_page(sbi, blkaddr);
526

527 528
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
529
			break;
530
		}
531

C
Chao Yu 已提交
532
		entry = get_fsync_inode(inode_list, ino_of_node(page));
533 534
		if (!entry)
			goto next;
535 536 537
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
538
		 * So, call recover_inode for the inode update.
539
		 */
540
		if (IS_INODE(page))
541 542
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
C
Chao Yu 已提交
543
			err = recover_dentry(entry->inode, page, dir_list);
544 545 546 547 548
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
549
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
550 551
		if (err) {
			f2fs_put_page(page, 1);
552
			break;
553
		}
554

555 556
		if (entry->blkaddr == blkaddr)
			del_fsync_inode(entry);
557 558 559
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
560
		f2fs_put_page(page, 1);
561
	}
562 563 564
	if (!err)
		allocate_new_segments(sbi);
	return err;
565 566
}

567
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
568
{
569
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
570
	struct list_head inode_list;
C
Chao Yu 已提交
571
	struct list_head dir_list;
572
	block_t blkaddr;
573
	int err;
574
	int ret = 0;
H
Haicheng Li 已提交
575
	bool need_writecp = false;
576 577

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
578
			sizeof(struct fsync_inode_entry));
579
	if (!fsync_entry_slab)
580
		return -ENOMEM;
581 582

	INIT_LIST_HEAD(&inode_list);
C
Chao Yu 已提交
583
	INIT_LIST_HEAD(&dir_list);
584

585 586 587
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

588 589
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

590
	/* step #1: find fsynced inode numbers */
591
	err = find_fsync_dnodes(sbi, &inode_list);
592
	if (err || list_empty(&inode_list))
593 594
		goto out;

595 596
	if (check_only) {
		ret = 1;
597
		goto out;
598
	}
599

H
Haicheng Li 已提交
600
	need_writecp = true;
601

602
	/* step #2: recover data */
C
Chao Yu 已提交
603
	err = recover_data(sbi, &inode_list, &dir_list);
604
	if (!err)
605
		f2fs_bug_on(sbi, !list_empty(&inode_list));
606
out:
607
	destroy_fsync_dnodes(&inode_list);
608

609 610
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
611
			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
612

613 614 615 616 617
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

618
	clear_sbi_flag(sbi, SBI_POR_DOING);
619
	if (err) {
C
Chao Yu 已提交
620 621 622 623
		bool invalidate = false;

		if (discard_next_dnode(sbi, blkaddr))
			invalidate = true;
624 625 626 627

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
C
Chao Yu 已提交
628 629 630 631 632 633

		/* invalidate temporary meta page */
		if (invalidate)
			invalidate_mapping_pages(META_MAPPING(sbi),
							blkaddr, blkaddr);

634 635
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
636
	} else if (need_writecp) {
637
		struct cp_control cpc = {
638
			.reason = CP_RECOVERY,
639
		};
640
		mutex_unlock(&sbi->cp_mutex);
C
Chao Yu 已提交
641
		err = write_checkpoint(sbi, &cpc);
642 643
	} else {
		mutex_unlock(&sbi->cp_mutex);
644
	}
C
Chao Yu 已提交
645 646 647

	destroy_fsync_dnodes(&dir_list);
	kmem_cache_destroy(fsync_entry_slab);
648
	return ret ? ret: err;
649
}