recovery.c 14.6 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
52 53 54
	s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);

	if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
55 56 57 58 59 60 61 62 63
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

64
	list_for_each_entry(entry, head, list)
65 66
		if (entry->inode->i_ino == ino)
			return entry;
67

68 69 70
	return NULL;
}

71 72
static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
					struct list_head *head, nid_t ino)
73
{
74
	struct inode *inode;
75 76
	struct fsync_inode_entry *entry;

77
	inode = f2fs_iget_retry(sbi->sb, ino);
78 79 80
	if (IS_ERR(inode))
		return ERR_CAST(inode);

81
	entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
82 83 84 85 86 87 88 89 90 91 92 93 94
	entry->inode = inode;
	list_add_tail(&entry->list, head);

	return entry;
}

static void del_fsync_inode(struct fsync_inode_entry *entry)
{
	iput(entry->inode);
	list_del(&entry->list);
	kmem_cache_free(fsync_entry_slab, entry);
}

C
Chao Yu 已提交
95 96
static int recover_dentry(struct inode *inode, struct page *ipage,
						struct list_head *dir_list)
97
{
98
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
99
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
100
	struct f2fs_dir_entry *de;
101
	struct fscrypt_name fname;
102
	struct page *page;
J
Jaegeuk Kim 已提交
103
	struct inode *dir, *einode;
C
Chao Yu 已提交
104
	struct fsync_inode_entry *entry;
105
	int err = 0;
106
	char *name;
107

C
Chao Yu 已提交
108 109
	entry = get_fsync_inode(dir_list, pino);
	if (!entry) {
110 111 112 113
		entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino);
		if (IS_ERR(entry)) {
			dir = ERR_CAST(entry);
			err = PTR_ERR(entry);
C
Chao Yu 已提交
114 115
			goto out;
		}
116 117
	}

C
Chao Yu 已提交
118 119
	dir = entry->inode;

120 121 122
	memset(&fname, 0, sizeof(struct fscrypt_name));
	fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
	fname.disk_name.name = raw_inode->i_name;
123

124
	if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
125 126
		WARN_ON(1);
		err = -ENAMETOOLONG;
C
Chao Yu 已提交
127
		goto out;
128
	}
J
Jaegeuk Kim 已提交
129
retry:
130
	de = __f2fs_find_entry(dir, &fname, &page);
131
	if (de && inode->i_ino == le32_to_cpu(de->ino))
132
		goto out_unmap_put;
133

J
Jaegeuk Kim 已提交
134
	if (de) {
135
		einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
J
Jaegeuk Kim 已提交
136 137
		if (IS_ERR(einode)) {
			WARN_ON(1);
138 139
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
140
				err = -EEXIST;
141 142
			goto out_unmap_put;
		}
143
		err = acquire_orphan_inode(F2FS_I_SB(inode));
144 145 146
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
147
		}
148
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
149 150
		iput(einode);
		goto retry;
151 152 153
	} else if (IS_ERR(page)) {
		err = PTR_ERR(page);
	} else {
154
		err = __f2fs_do_add_link(dir, &fname, inode,
155
					inode->i_ino, inode->i_mode);
156
	}
157 158
	if (err == -ENOMEM)
		goto retry;
159 160 161
	goto out;

out_unmap_put:
162
	f2fs_dentry_kunmap(dir, page);
163
	f2fs_put_page(page, 0);
164
out:
165 166 167 168
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = raw_inode->i_name;
C
Chris Fries 已提交
169 170
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
171
			__func__, ino_of_node(ipage), name,
D
Dan Carpenter 已提交
172
			IS_ERR(dir) ? 0 : dir->i_ino, err);
173 174 175
	return err;
}

176
static void recover_inode(struct inode *inode, struct page *page)
177
{
178
	struct f2fs_inode *raw = F2FS_INODE(page);
179
	char *name;
180 181

	inode->i_mode = le16_to_cpu(raw->i_mode);
182
	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
C
Chao Yu 已提交
183
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
184 185
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
C
Chao Yu 已提交
186
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
187 188
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
189

190 191
	F2FS_I(inode)->i_advise = raw->i_advise;

192 193 194 195 196
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = F2FS_INODE(page)->i_name;

197
	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
198
			ino_of_node(page), name);
199 200
}

201 202
static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head,
				bool check_only)
203 204
{
	struct curseg_info *curseg;
205
	struct page *page = NULL;
206 207 208 209 210
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
211
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
212 213 214 215

	while (1) {
		struct fsync_inode_entry *entry;

216
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
217
			return 0;
218

219
		page = get_tmp_page(sbi, blkaddr);
220

221
		if (!is_recoverable_dnode(page))
222
			break;
223 224 225 226 227

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
228
		if (!entry) {
229 230
			if (!check_only &&
					IS_INODE(page) && is_dent_dnode(page)) {
231 232
				err = recover_inode_page(sbi, page);
				if (err)
233
					break;
234 235
			}

236 237 238 239
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
240 241 242
			entry = add_fsync_inode(sbi, head, ino_of_node(page));
			if (IS_ERR(entry)) {
				err = PTR_ERR(entry);
243 244
				if (err == -ENOENT) {
					err = 0;
245
					goto next;
246
				}
247
				break;
248 249
			}
		}
J
Jaegeuk Kim 已提交
250 251
		entry->blkaddr = blkaddr;

252 253
		if (IS_INODE(page) && is_dent_dnode(page))
			entry->last_dentry = blkaddr;
254 255 256
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
257
		f2fs_put_page(page, 1);
258 259

		ra_meta_pages_cond(sbi, blkaddr);
260
	}
261
	f2fs_put_page(page, 1);
262 263 264
	return err;
}

265
static void destroy_fsync_dnodes(struct list_head *head)
266
{
267 268
	struct fsync_inode_entry *entry, *tmp;

269 270
	list_for_each_entry_safe(entry, tmp, head, list)
		del_fsync_inode(entry);
271 272
}

273
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
274
			block_t blkaddr, struct dnode_of_data *dn)
275 276 277
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
278
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
279
	struct f2fs_summary_block *sum_node;
280
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
281
	struct page *sum_page, *node_page;
282
	struct dnode_of_data tdn = *dn;
283
	nid_t ino, nid;
284
	struct inode *inode;
285
	unsigned int offset;
286 287 288 289 290
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
291
		return 0;
292 293 294 295 296 297

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
298
			goto got_it;
299 300 301
		}
	}

J
Jaegeuk Kim 已提交
302 303 304 305 306
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
307 308 309 310
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
311 312
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
313
		tdn.node_page = dn->inode_page;
314
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
315
		goto truncate_out;
316
	} else if (dn->nid == nid) {
317
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
318
		goto truncate_out;
319 320
	}

321
	/* Get the node page */
322
	node_page = get_node_page(sbi, nid);
323 324
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
325 326

	offset = ofs_of_node(node_page);
327 328 329
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

330 331
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
332
		inode = f2fs_iget_retry(sbi->sb, ino);
333 334 335 336 337
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
338

339
	bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
340

341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
358
		iput(inode);
359 360 361 362 363 364 365 366 367
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
368
	return 0;
369 370
}

371
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
372 373 374 375
					struct page *page, block_t blkaddr)
{
	struct dnode_of_data dn;
	struct node_info ni;
376
	unsigned int start, end;
377
	int err = 0, recovered = 0;
378

379 380 381 382
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
383 384 385
		err = recover_xattr_data(inode, page, blkaddr);
		if (!err)
			recovered++;
386
		goto out;
387
	}
388

389 390
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
391 392
		goto out;

393
	/* step 3: recover data indices */
394 395
	start = start_bidx_of_node(ofs_of_node(page), inode);
	end = start + ADDRS_PER_PAGE(page, inode);
396 397

	set_new_dnode(&dn, inode, NULL, NULL, 0);
398
retry_dn:
399
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
400 401 402 403 404
	if (err) {
		if (err == -ENOMEM) {
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			goto retry_dn;
		}
405
		goto out;
406
	}
407

408
	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
409 410

	get_node_info(sbi, dn.nid, &ni);
411 412
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
413

414
	for (; start < end; start++, dn.ofs_in_node++) {
415 416 417 418 419
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

420 421 422 423 424 425 426 427 428 429
		/* skip recovering if dest is the same as src */
		if (src == dest)
			continue;

		/* dest is invalid, just invalidate src block */
		if (dest == NULL_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			continue;
		}

430
		if (!file_keep_isize(inode) &&
431 432 433
			(i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
			f2fs_i_size_write(inode,
				(loff_t)(start + 1) << PAGE_SHIFT);
434

435 436 437 438 439 440
		/*
		 * dest is reserved block, invalidate src block
		 * and then reserve one new block in dnode page.
		 */
		if (dest == NEW_ADDR) {
			truncate_data_blocks_range(&dn, 1);
441
			reserve_new_block(&dn);
442 443 444 445 446
			continue;
		}

		/* dest is valid block, try to recover from src to dest */
		if (is_valid_blkaddr(sbi, dest, META_POR)) {
447

448
			if (src == NULL_ADDR) {
449
				err = reserve_new_block(&dn);
450 451 452 453
#ifdef CONFIG_F2FS_FAULT_INJECTION
				while (err)
					err = reserve_new_block(&dn);
#endif
454
				/* We should not get -ENOSPC */
455
				f2fs_bug_on(sbi, err);
456 457
				if (err)
					goto err;
458
			}
459
retry_prev:
460
			/* Check the previous node page having this index */
461
			err = check_index_in_prev_nodes(sbi, dest, &dn);
462 463 464 465 466
			if (err) {
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					goto retry_prev;
				}
467
				goto err;
468
			}
469 470

			/* write dummy data page */
471
			f2fs_replace_block(sbi, &dn, src, dest,
472
						ni.version, false, false);
473
			recovered++;
474 475 476 477 478 479 480
		}
	}

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
481
err:
482
	f2fs_put_dnode(&dn);
483
out:
C
Chris Fries 已提交
484
	f2fs_msg(sbi->sb, KERN_NOTICE,
485 486 487 488
		"recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
		inode->i_ino,
		file_keep_isize(inode) ? "keep" : "recover",
		recovered, err);
489
	return err;
490 491
}

C
Chao Yu 已提交
492 493
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
						struct list_head *dir_list)
494 495
{
	struct curseg_info *curseg;
496
	struct page *page = NULL;
497
	int err = 0;
498 499 500
	block_t blkaddr;

	/* get node pages in the current segment */
C
Chao Yu 已提交
501
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
502 503 504 505 506
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

507
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
508
			break;
509

510 511
		ra_meta_pages_cond(sbi, blkaddr);

512
		page = get_tmp_page(sbi, blkaddr);
513

514
		if (!is_recoverable_dnode(page)) {
515
			f2fs_put_page(page, 1);
516
			break;
517
		}
518

C
Chao Yu 已提交
519
		entry = get_fsync_inode(inode_list, ino_of_node(page));
520 521
		if (!entry)
			goto next;
522 523 524
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
525
		 * So, call recover_inode for the inode update.
526
		 */
527
		if (IS_INODE(page))
528 529
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
C
Chao Yu 已提交
530
			err = recover_dentry(entry->inode, page, dir_list);
531 532 533 534 535
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
536
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
537 538
		if (err) {
			f2fs_put_page(page, 1);
539
			break;
540
		}
541

542 543
		if (entry->blkaddr == blkaddr)
			del_fsync_inode(entry);
544 545 546
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
547
		f2fs_put_page(page, 1);
548
	}
549 550 551
	if (!err)
		allocate_new_segments(sbi);
	return err;
552 553
}

554
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
555 556
{
	struct list_head inode_list;
C
Chao Yu 已提交
557
	struct list_head dir_list;
558
	int err;
559
	int ret = 0;
H
Haicheng Li 已提交
560
	bool need_writecp = false;
561 562

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
563
			sizeof(struct fsync_inode_entry));
564
	if (!fsync_entry_slab)
565
		return -ENOMEM;
566 567

	INIT_LIST_HEAD(&inode_list);
C
Chao Yu 已提交
568
	INIT_LIST_HEAD(&dir_list);
569

570 571 572
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

573
	/* step #1: find fsynced inode numbers */
574
	err = find_fsync_dnodes(sbi, &inode_list, check_only);
575
	if (err || list_empty(&inode_list))
576 577
		goto out;

578 579
	if (check_only) {
		ret = 1;
580
		goto out;
581
	}
582

H
Haicheng Li 已提交
583
	need_writecp = true;
584

585
	/* step #2: recover data */
C
Chao Yu 已提交
586
	err = recover_data(sbi, &inode_list, &dir_list);
587
	if (!err)
588
		f2fs_bug_on(sbi, !list_empty(&inode_list));
589
out:
590
	destroy_fsync_dnodes(&inode_list);
591

592 593
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
594
			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
595

596 597 598 599 600
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

601
	clear_sbi_flag(sbi, SBI_POR_DOING);
602
	if (err)
603
		set_ckpt_flags(sbi, CP_ERROR_FLAG);
604 605
	mutex_unlock(&sbi->cp_mutex);

606 607 608
	/* let's drop all the directory inodes for clean checkpoint */
	destroy_fsync_dnodes(&dir_list);

609
	if (!err && need_writecp) {
610
		struct cp_control cpc = {
611
			.reason = CP_RECOVERY,
612
		};
C
Chao Yu 已提交
613
		err = write_checkpoint(sbi, &cpc);
614
	}
C
Chao Yu 已提交
615 616

	kmem_cache_destroy(fsync_entry_slab);
617
	return ret ? ret: err;
618
}