recovery.c 14.7 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
52 53 54
	s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);

	if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
55 56 57 58 59 60 61 62 63
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

64
	list_for_each_entry(entry, head, list)
65 66
		if (entry->inode->i_ino == ino)
			return entry;
67

68 69 70
	return NULL;
}

71 72
static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
					struct list_head *head, nid_t ino)
73
{
74
	struct inode *inode;
75 76
	struct fsync_inode_entry *entry;

77
	inode = f2fs_iget_retry(sbi->sb, ino);
78 79 80
	if (IS_ERR(inode))
		return ERR_CAST(inode);

81
	entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
82 83 84 85 86 87 88 89 90 91 92 93 94
	entry->inode = inode;
	list_add_tail(&entry->list, head);

	return entry;
}

static void del_fsync_inode(struct fsync_inode_entry *entry)
{
	iput(entry->inode);
	list_del(&entry->list);
	kmem_cache_free(fsync_entry_slab, entry);
}

C
Chao Yu 已提交
95 96
static int recover_dentry(struct inode *inode, struct page *ipage,
						struct list_head *dir_list)
97
{
98
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
99
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
100
	struct f2fs_dir_entry *de;
101
	struct fscrypt_name fname;
102
	struct page *page;
J
Jaegeuk Kim 已提交
103
	struct inode *dir, *einode;
C
Chao Yu 已提交
104
	struct fsync_inode_entry *entry;
105
	int err = 0;
106
	char *name;
107

C
Chao Yu 已提交
108 109
	entry = get_fsync_inode(dir_list, pino);
	if (!entry) {
110 111 112 113
		entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino);
		if (IS_ERR(entry)) {
			dir = ERR_CAST(entry);
			err = PTR_ERR(entry);
C
Chao Yu 已提交
114 115
			goto out;
		}
116 117
	}

C
Chao Yu 已提交
118 119
	dir = entry->inode;

120 121 122
	memset(&fname, 0, sizeof(struct fscrypt_name));
	fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
	fname.disk_name.name = raw_inode->i_name;
123

124
	if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
125 126
		WARN_ON(1);
		err = -ENAMETOOLONG;
C
Chao Yu 已提交
127
		goto out;
128
	}
J
Jaegeuk Kim 已提交
129
retry:
130
	de = __f2fs_find_entry(dir, &fname, &page);
131
	if (de && inode->i_ino == le32_to_cpu(de->ino))
132
		goto out_unmap_put;
133

J
Jaegeuk Kim 已提交
134
	if (de) {
135
		einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
J
Jaegeuk Kim 已提交
136 137
		if (IS_ERR(einode)) {
			WARN_ON(1);
138 139
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
140
				err = -EEXIST;
141 142
			goto out_unmap_put;
		}
143
		err = acquire_orphan_inode(F2FS_I_SB(inode));
144 145 146
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
147
		}
148
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
149 150
		iput(einode);
		goto retry;
151 152 153
	} else if (IS_ERR(page)) {
		err = PTR_ERR(page);
	} else {
154
		err = __f2fs_do_add_link(dir, &fname, inode,
155
					inode->i_ino, inode->i_mode);
156
	}
157 158
	if (err == -ENOMEM)
		goto retry;
159 160 161
	goto out;

out_unmap_put:
162
	f2fs_dentry_kunmap(dir, page);
163
	f2fs_put_page(page, 0);
164
out:
165 166 167 168
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = raw_inode->i_name;
C
Chris Fries 已提交
169 170
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
171
			__func__, ino_of_node(ipage), name,
D
Dan Carpenter 已提交
172
			IS_ERR(dir) ? 0 : dir->i_ino, err);
173 174 175
	return err;
}

176
static void recover_inode(struct inode *inode, struct page *page)
177
{
178
	struct f2fs_inode *raw = F2FS_INODE(page);
179
	char *name;
180 181

	inode->i_mode = le16_to_cpu(raw->i_mode);
182
	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
C
Chao Yu 已提交
183
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
184 185
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
C
Chao Yu 已提交
186
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
187 188
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
189

190 191
	F2FS_I(inode)->i_advise = raw->i_advise;

192 193 194 195 196
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = F2FS_INODE(page)->i_name;

197
	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
198
			ino_of_node(page), name);
199 200 201 202 203
}

static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
	struct curseg_info *curseg;
204
	struct page *page = NULL;
205 206 207 208 209
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
210
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
211 212 213 214

	while (1) {
		struct fsync_inode_entry *entry;

215
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
216
			return 0;
217

218
		page = get_tmp_page(sbi, blkaddr);
219

220
		if (!is_recoverable_dnode(page))
221
			break;
222 223 224 225 226

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
227
		if (!entry) {
228
			if (IS_INODE(page) && is_dent_dnode(page)) {
229 230
				err = recover_inode_page(sbi, page);
				if (err)
231
					break;
232 233
			}

234 235 236 237
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
238 239 240
			entry = add_fsync_inode(sbi, head, ino_of_node(page));
			if (IS_ERR(entry)) {
				err = PTR_ERR(entry);
241 242
				if (err == -ENOENT) {
					err = 0;
243
					goto next;
244
				}
245
				break;
246 247
			}
		}
J
Jaegeuk Kim 已提交
248 249
		entry->blkaddr = blkaddr;

250 251
		if (IS_INODE(page) && is_dent_dnode(page))
			entry->last_dentry = blkaddr;
252 253 254
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
255
		f2fs_put_page(page, 1);
256 257

		ra_meta_pages_cond(sbi, blkaddr);
258
	}
259
	f2fs_put_page(page, 1);
260 261 262
	return err;
}

263
static void destroy_fsync_dnodes(struct list_head *head)
264
{
265 266
	struct fsync_inode_entry *entry, *tmp;

267 268
	list_for_each_entry_safe(entry, tmp, head, list)
		del_fsync_inode(entry);
269 270
}

271
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
272
			block_t blkaddr, struct dnode_of_data *dn)
273 274 275
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
276
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
277
	struct f2fs_summary_block *sum_node;
278
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
279
	struct page *sum_page, *node_page;
280
	struct dnode_of_data tdn = *dn;
281
	nid_t ino, nid;
282
	struct inode *inode;
283
	unsigned int offset;
284 285 286 287 288
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
289
		return 0;
290 291 292 293 294 295

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
296
			goto got_it;
297 298 299
		}
	}

J
Jaegeuk Kim 已提交
300 301 302 303 304
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
305 306 307 308
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
309 310
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
311
		tdn.node_page = dn->inode_page;
312
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
313
		goto truncate_out;
314
	} else if (dn->nid == nid) {
315
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
316
		goto truncate_out;
317 318
	}

319
	/* Get the node page */
320
	node_page = get_node_page(sbi, nid);
321 322
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
323 324

	offset = ofs_of_node(node_page);
325 326 327
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

328 329
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
330
		inode = f2fs_iget_retry(sbi->sb, ino);
331 332 333 334 335
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
336

337
	bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
338

339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
356
		iput(inode);
357 358 359 360 361 362 363 364 365
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
366
	return 0;
367 368
}

369
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
370 371 372 373
					struct page *page, block_t blkaddr)
{
	struct dnode_of_data dn;
	struct node_info ni;
374
	unsigned int start, end;
375
	int err = 0, recovered = 0;
376

377 378 379 380
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
381 382 383 384
		/*
		 * Deprecated; xattr blocks should be found from cold log.
		 * But, we should remain this for backward compatibility.
		 */
385
		recover_xattr_data(inode, page, blkaddr);
386
		goto out;
387
	}
388

389 390
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
391 392
		goto out;

393
	/* step 3: recover data indices */
394 395
	start = start_bidx_of_node(ofs_of_node(page), inode);
	end = start + ADDRS_PER_PAGE(page, inode);
396 397

	set_new_dnode(&dn, inode, NULL, NULL, 0);
398
retry_dn:
399
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
400 401 402 403 404
	if (err) {
		if (err == -ENOMEM) {
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			goto retry_dn;
		}
405
		goto out;
406
	}
407

408
	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
409 410

	get_node_info(sbi, dn.nid, &ni);
411 412
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
413

414
	for (; start < end; start++, dn.ofs_in_node++) {
415 416 417 418 419
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

420 421 422 423 424 425 426 427 428 429
		/* skip recovering if dest is the same as src */
		if (src == dest)
			continue;

		/* dest is invalid, just invalidate src block */
		if (dest == NULL_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			continue;
		}

430
		if (!file_keep_isize(inode) &&
431 432 433
			(i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
			f2fs_i_size_write(inode,
				(loff_t)(start + 1) << PAGE_SHIFT);
434

435 436 437 438 439 440
		/*
		 * dest is reserved block, invalidate src block
		 * and then reserve one new block in dnode page.
		 */
		if (dest == NEW_ADDR) {
			truncate_data_blocks_range(&dn, 1);
441
			reserve_new_block(&dn);
442 443 444 445 446
			continue;
		}

		/* dest is valid block, try to recover from src to dest */
		if (is_valid_blkaddr(sbi, dest, META_POR)) {
447

448
			if (src == NULL_ADDR) {
449
				err = reserve_new_block(&dn);
450 451 452 453
#ifdef CONFIG_F2FS_FAULT_INJECTION
				while (err)
					err = reserve_new_block(&dn);
#endif
454
				/* We should not get -ENOSPC */
455
				f2fs_bug_on(sbi, err);
456 457
				if (err)
					goto err;
458
			}
459
retry_prev:
460
			/* Check the previous node page having this index */
461
			err = check_index_in_prev_nodes(sbi, dest, &dn);
462 463 464 465 466
			if (err) {
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					goto retry_prev;
				}
467
				goto err;
468
			}
469 470

			/* write dummy data page */
471
			f2fs_replace_block(sbi, &dn, src, dest,
472
						ni.version, false, false);
473
			recovered++;
474 475 476 477 478 479 480
		}
	}

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
481
err:
482
	f2fs_put_dnode(&dn);
483
out:
C
Chris Fries 已提交
484
	f2fs_msg(sbi->sb, KERN_NOTICE,
485 486 487 488
		"recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
		inode->i_ino,
		file_keep_isize(inode) ? "keep" : "recover",
		recovered, err);
489
	return err;
490 491
}

C
Chao Yu 已提交
492 493
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
						struct list_head *dir_list)
494 495
{
	struct curseg_info *curseg;
496
	struct page *page = NULL;
497
	int err = 0;
498 499 500
	block_t blkaddr;

	/* get node pages in the current segment */
C
Chao Yu 已提交
501
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
502 503 504 505 506
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

507
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
508
			break;
509

510 511
		ra_meta_pages_cond(sbi, blkaddr);

512
		page = get_tmp_page(sbi, blkaddr);
513

514
		if (!is_recoverable_dnode(page)) {
515
			f2fs_put_page(page, 1);
516
			break;
517
		}
518

C
Chao Yu 已提交
519
		entry = get_fsync_inode(inode_list, ino_of_node(page));
520 521
		if (!entry)
			goto next;
522 523 524
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
525
		 * So, call recover_inode for the inode update.
526
		 */
527
		if (IS_INODE(page))
528 529
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
C
Chao Yu 已提交
530
			err = recover_dentry(entry->inode, page, dir_list);
531 532 533 534 535
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
536
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
537 538
		if (err) {
			f2fs_put_page(page, 1);
539
			break;
540
		}
541

542 543
		if (entry->blkaddr == blkaddr)
			del_fsync_inode(entry);
544 545 546
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
547
		f2fs_put_page(page, 1);
548
	}
549 550 551
	if (!err)
		allocate_new_segments(sbi);
	return err;
552 553
}

554
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
555 556
{
	struct list_head inode_list;
C
Chao Yu 已提交
557
	struct list_head dir_list;
558
	int err;
559
	int ret = 0;
H
Haicheng Li 已提交
560
	bool need_writecp = false;
561 562

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
563
			sizeof(struct fsync_inode_entry));
564
	if (!fsync_entry_slab)
565
		return -ENOMEM;
566 567

	INIT_LIST_HEAD(&inode_list);
C
Chao Yu 已提交
568
	INIT_LIST_HEAD(&dir_list);
569

570 571 572
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

573
	/* step #1: find fsynced inode numbers */
574
	err = find_fsync_dnodes(sbi, &inode_list);
575
	if (err || list_empty(&inode_list))
576 577
		goto out;

578 579
	if (check_only) {
		ret = 1;
580
		goto out;
581
	}
582

H
Haicheng Li 已提交
583
	need_writecp = true;
584

585
	/* step #2: recover data */
C
Chao Yu 已提交
586
	err = recover_data(sbi, &inode_list, &dir_list);
587
	if (!err)
588
		f2fs_bug_on(sbi, !list_empty(&inode_list));
589
out:
590
	destroy_fsync_dnodes(&inode_list);
591

592 593
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
594
			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
595

596 597 598 599 600
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

601
	clear_sbi_flag(sbi, SBI_POR_DOING);
602
	if (err)
603
		set_ckpt_flags(sbi, CP_ERROR_FLAG);
604 605
	mutex_unlock(&sbi->cp_mutex);

606 607 608
	/* let's drop all the directory inodes for clean checkpoint */
	destroy_fsync_dnodes(&dir_list);

609
	if (!err && need_writecp) {
610
		struct cp_control cpc = {
611
			.reason = CP_RECOVERY,
612
		};
C
Chao Yu 已提交
613
		err = write_checkpoint(sbi, &cpc);
614
	}
C
Chao Yu 已提交
615 616

	kmem_cache_destroy(fsync_entry_slab);
617
	return ret ? ret: err;
618
}