recovery.c 14.7 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
52 53 54
	s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);

	if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
55 56 57 58 59 60 61 62 63
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

64
	list_for_each_entry(entry, head, list)
65 66
		if (entry->inode->i_ino == ino)
			return entry;
67

68 69 70
	return NULL;
}

71 72
static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
					struct list_head *head, nid_t ino)
73
{
74
	struct inode *inode;
75 76
	struct fsync_inode_entry *entry;

77
	inode = f2fs_iget_retry(sbi->sb, ino);
78 79 80
	if (IS_ERR(inode))
		return ERR_CAST(inode);

81
	entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
82 83 84 85 86 87 88 89 90 91 92 93 94
	entry->inode = inode;
	list_add_tail(&entry->list, head);

	return entry;
}

static void del_fsync_inode(struct fsync_inode_entry *entry)
{
	iput(entry->inode);
	list_del(&entry->list);
	kmem_cache_free(fsync_entry_slab, entry);
}

C
Chao Yu 已提交
95 96
static int recover_dentry(struct inode *inode, struct page *ipage,
						struct list_head *dir_list)
97
{
98
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
99
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
100
	struct f2fs_dir_entry *de;
101
	struct fscrypt_name fname;
102
	struct page *page;
J
Jaegeuk Kim 已提交
103
	struct inode *dir, *einode;
C
Chao Yu 已提交
104
	struct fsync_inode_entry *entry;
105
	int err = 0;
106
	char *name;
107

C
Chao Yu 已提交
108 109
	entry = get_fsync_inode(dir_list, pino);
	if (!entry) {
110 111 112 113
		entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino);
		if (IS_ERR(entry)) {
			dir = ERR_CAST(entry);
			err = PTR_ERR(entry);
C
Chao Yu 已提交
114 115
			goto out;
		}
116 117
	}

C
Chao Yu 已提交
118 119
	dir = entry->inode;

120 121 122
	memset(&fname, 0, sizeof(struct fscrypt_name));
	fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
	fname.disk_name.name = raw_inode->i_name;
123

124
	if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
125 126
		WARN_ON(1);
		err = -ENAMETOOLONG;
C
Chao Yu 已提交
127
		goto out;
128
	}
J
Jaegeuk Kim 已提交
129
retry:
130
	de = __f2fs_find_entry(dir, &fname, &page);
131
	if (de && inode->i_ino == le32_to_cpu(de->ino))
132
		goto out_unmap_put;
133

J
Jaegeuk Kim 已提交
134
	if (de) {
135
		einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
J
Jaegeuk Kim 已提交
136 137
		if (IS_ERR(einode)) {
			WARN_ON(1);
138 139
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
140
				err = -EEXIST;
141 142
			goto out_unmap_put;
		}
143
		err = acquire_orphan_inode(F2FS_I_SB(inode));
144 145 146
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
147
		}
148
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
149 150
		iput(einode);
		goto retry;
151 152 153
	} else if (IS_ERR(page)) {
		err = PTR_ERR(page);
	} else {
154
		err = __f2fs_do_add_link(dir, &fname, inode,
155
					inode->i_ino, inode->i_mode);
156
	}
157 158
	if (err == -ENOMEM)
		goto retry;
159 160 161
	goto out;

out_unmap_put:
162
	f2fs_dentry_kunmap(dir, page);
163
	f2fs_put_page(page, 0);
164
out:
165 166 167 168
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = raw_inode->i_name;
C
Chris Fries 已提交
169 170
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
171
			__func__, ino_of_node(ipage), name,
D
Dan Carpenter 已提交
172
			IS_ERR(dir) ? 0 : dir->i_ino, err);
173 174 175
	return err;
}

176
static void recover_inode(struct inode *inode, struct page *page)
177
{
178
	struct f2fs_inode *raw = F2FS_INODE(page);
179
	char *name;
180 181

	inode->i_mode = le16_to_cpu(raw->i_mode);
182
	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
C
Chao Yu 已提交
183
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
184 185
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
C
Chao Yu 已提交
186
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
187 188
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
189

190 191
	F2FS_I(inode)->i_advise = raw->i_advise;

192 193 194 195 196
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = F2FS_INODE(page)->i_name;

197
	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
198
			ino_of_node(page), name);
199 200 201 202 203
}

static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
	struct curseg_info *curseg;
204
	struct page *page = NULL;
205 206 207 208 209
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
210
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
211 212 213 214

	while (1) {
		struct fsync_inode_entry *entry;

215
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
216
			return 0;
217

218
		page = get_tmp_page(sbi, blkaddr);
219

220
		if (!is_recoverable_dnode(page))
221
			break;
222 223 224 225 226

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
227
		if (!entry) {
228
			if (IS_INODE(page) && is_dent_dnode(page)) {
229 230
				err = recover_inode_page(sbi, page);
				if (err)
231
					break;
232 233
			}

234 235 236 237
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
238 239 240
			entry = add_fsync_inode(sbi, head, ino_of_node(page));
			if (IS_ERR(entry)) {
				err = PTR_ERR(entry);
241 242
				if (err == -ENOENT) {
					err = 0;
243
					goto next;
244
				}
245
				break;
246 247
			}
		}
J
Jaegeuk Kim 已提交
248 249
		entry->blkaddr = blkaddr;

250 251
		if (IS_INODE(page) && is_dent_dnode(page))
			entry->last_dentry = blkaddr;
252 253 254
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
255
		f2fs_put_page(page, 1);
256 257

		ra_meta_pages_cond(sbi, blkaddr);
258
	}
259
	f2fs_put_page(page, 1);
260 261 262
	return err;
}

263
static void destroy_fsync_dnodes(struct list_head *head)
264
{
265 266
	struct fsync_inode_entry *entry, *tmp;

267 268
	list_for_each_entry_safe(entry, tmp, head, list)
		del_fsync_inode(entry);
269 270
}

271
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
272
			block_t blkaddr, struct dnode_of_data *dn)
273 274 275
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
276
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
277
	struct f2fs_summary_block *sum_node;
278
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
279
	struct page *sum_page, *node_page;
280
	struct dnode_of_data tdn = *dn;
281
	nid_t ino, nid;
282
	struct inode *inode;
283
	unsigned int offset;
284 285 286 287 288
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
289
		return 0;
290 291 292 293 294 295

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
296
			goto got_it;
297 298 299
		}
	}

J
Jaegeuk Kim 已提交
300 301 302 303 304
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
305 306 307 308
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
309 310
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
311
		tdn.node_page = dn->inode_page;
312
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
313
		goto truncate_out;
314
	} else if (dn->nid == nid) {
315
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
316
		goto truncate_out;
317 318
	}

319
	/* Get the node page */
320
	node_page = get_node_page(sbi, nid);
321 322
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
323 324

	offset = ofs_of_node(node_page);
325 326 327
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

328 329
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
330
		inode = f2fs_iget_retry(sbi->sb, ino);
331 332 333 334 335
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
336

337
	bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
338

339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
356
		iput(inode);
357 358 359 360 361 362 363 364 365
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
366
	return 0;
367 368
}

369
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
370 371 372 373
					struct page *page, block_t blkaddr)
{
	struct dnode_of_data dn;
	struct node_info ni;
374
	unsigned int start, end;
375
	int err = 0, recovered = 0;
376

377 378 379 380
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
381 382 383 384
		/*
		 * Deprecated; xattr blocks should be found from cold log.
		 * But, we should remain this for backward compatibility.
		 */
385
		recover_xattr_data(inode, page, blkaddr);
386
		goto out;
387
	}
388

389 390
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
391 392
		goto out;

393
	/* step 3: recover data indices */
394 395
	start = start_bidx_of_node(ofs_of_node(page), inode);
	end = start + ADDRS_PER_PAGE(page, inode);
396 397

	set_new_dnode(&dn, inode, NULL, NULL, 0);
398
retry_dn:
399
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
400 401 402 403 404
	if (err) {
		if (err == -ENOMEM) {
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			goto retry_dn;
		}
405
		goto out;
406
	}
407

408
	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
409 410

	get_node_info(sbi, dn.nid, &ni);
411 412
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
413

414
	for (; start < end; start++, dn.ofs_in_node++) {
415 416 417 418 419
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

420 421 422 423 424 425 426 427 428 429
		/* skip recovering if dest is the same as src */
		if (src == dest)
			continue;

		/* dest is invalid, just invalidate src block */
		if (dest == NULL_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			continue;
		}

430 431
		if (!file_keep_isize(inode) &&
				(i_size_read(inode) <= (start << PAGE_SHIFT)))
432 433
			f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);

434 435 436 437 438 439
		/*
		 * dest is reserved block, invalidate src block
		 * and then reserve one new block in dnode page.
		 */
		if (dest == NEW_ADDR) {
			truncate_data_blocks_range(&dn, 1);
440
			reserve_new_block(&dn);
441 442 443 444 445
			continue;
		}

		/* dest is valid block, try to recover from src to dest */
		if (is_valid_blkaddr(sbi, dest, META_POR)) {
446

447
			if (src == NULL_ADDR) {
448
				err = reserve_new_block(&dn);
449 450 451 452
#ifdef CONFIG_F2FS_FAULT_INJECTION
				while (err)
					err = reserve_new_block(&dn);
#endif
453
				/* We should not get -ENOSPC */
454
				f2fs_bug_on(sbi, err);
455 456
				if (err)
					goto err;
457
			}
458
retry_prev:
459
			/* Check the previous node page having this index */
460
			err = check_index_in_prev_nodes(sbi, dest, &dn);
461 462 463 464 465
			if (err) {
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					goto retry_prev;
				}
466
				goto err;
467
			}
468 469

			/* write dummy data page */
470
			f2fs_replace_block(sbi, &dn, src, dest,
471
						ni.version, false, false);
472
			recovered++;
473 474 475 476 477 478 479
		}
	}

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
480
err:
481
	f2fs_put_dnode(&dn);
482
out:
C
Chris Fries 已提交
483
	f2fs_msg(sbi->sb, KERN_NOTICE,
484 485 486 487
		"recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
		inode->i_ino,
		file_keep_isize(inode) ? "keep" : "recover",
		recovered, err);
488
	return err;
489 490
}

C
Chao Yu 已提交
491 492
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
						struct list_head *dir_list)
493 494
{
	struct curseg_info *curseg;
495
	struct page *page = NULL;
496
	int err = 0;
497 498 499
	block_t blkaddr;

	/* get node pages in the current segment */
C
Chao Yu 已提交
500
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
501 502 503 504 505
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

506
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
507
			break;
508

509 510
		ra_meta_pages_cond(sbi, blkaddr);

511
		page = get_tmp_page(sbi, blkaddr);
512

513
		if (!is_recoverable_dnode(page)) {
514
			f2fs_put_page(page, 1);
515
			break;
516
		}
517

C
Chao Yu 已提交
518
		entry = get_fsync_inode(inode_list, ino_of_node(page));
519 520
		if (!entry)
			goto next;
521 522 523
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
524
		 * So, call recover_inode for the inode update.
525
		 */
526
		if (IS_INODE(page))
527 528
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
C
Chao Yu 已提交
529
			err = recover_dentry(entry->inode, page, dir_list);
530 531 532 533 534
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
535
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
536 537
		if (err) {
			f2fs_put_page(page, 1);
538
			break;
539
		}
540

541 542
		if (entry->blkaddr == blkaddr)
			del_fsync_inode(entry);
543 544 545
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
546
		f2fs_put_page(page, 1);
547
	}
548 549 550
	if (!err)
		allocate_new_segments(sbi);
	return err;
551 552
}

553
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
554 555
{
	struct list_head inode_list;
C
Chao Yu 已提交
556
	struct list_head dir_list;
557
	int err;
558
	int ret = 0;
H
Haicheng Li 已提交
559
	bool need_writecp = false;
560 561

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
562
			sizeof(struct fsync_inode_entry));
563
	if (!fsync_entry_slab)
564
		return -ENOMEM;
565 566

	INIT_LIST_HEAD(&inode_list);
C
Chao Yu 已提交
567
	INIT_LIST_HEAD(&dir_list);
568

569 570 571
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

572
	/* step #1: find fsynced inode numbers */
573
	err = find_fsync_dnodes(sbi, &inode_list);
574
	if (err || list_empty(&inode_list))
575 576
		goto out;

577 578
	if (check_only) {
		ret = 1;
579
		goto out;
580
	}
581

H
Haicheng Li 已提交
582
	need_writecp = true;
583

584
	/* step #2: recover data */
C
Chao Yu 已提交
585
	err = recover_data(sbi, &inode_list, &dir_list);
586
	if (!err)
587
		f2fs_bug_on(sbi, !list_empty(&inode_list));
588
out:
589
	destroy_fsync_dnodes(&inode_list);
590

591 592
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
593
			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
594

595 596 597 598 599
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

600
	clear_sbi_flag(sbi, SBI_POR_DOING);
601
	if (err)
602
		set_ckpt_flags(sbi, CP_ERROR_FLAG);
603 604
	mutex_unlock(&sbi->cp_mutex);

605 606 607
	/* let's drop all the directory inodes for clean checkpoint */
	destroy_fsync_dnodes(&dir_list);

608
	if (!err && need_writecp) {
609
		struct cp_control cpc = {
610
			.reason = CP_RECOVERY,
611
		};
C
Chao Yu 已提交
612
		err = write_checkpoint(sbi, &cpc);
613
	}
C
Chao Yu 已提交
614 615

	kmem_cache_destroy(fsync_entry_slab);
616
	return ret ? ret: err;
617
}