recovery.c 13.9 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69
	return NULL;
}

70
static int recover_dentry(struct inode *inode, struct page *ipage)
71
{
72
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
74
	struct f2fs_dir_entry *de;
75
	struct qstr name;
76
	struct page *page;
J
Jaegeuk Kim 已提交
77
	struct inode *dir, *einode;
78 79
	int err = 0;

80 81 82 83 84 85
	dir = f2fs_iget(inode->i_sb, pino);
	if (IS_ERR(dir)) {
		err = PTR_ERR(dir);
		goto out;
	}

86 87
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
88 89 90 91

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
92
		goto out_err;
93
	}
J
Jaegeuk Kim 已提交
94 95
retry:
	de = f2fs_find_entry(dir, &name, &page);
96 97
	if (de && inode->i_ino == le32_to_cpu(de->ino)) {
		clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
98
		goto out_unmap_put;
99
	}
J
Jaegeuk Kim 已提交
100 101 102 103
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
104 105
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
106
				err = -EEXIST;
107 108
			goto out_unmap_put;
		}
109
		err = acquire_orphan_inode(F2FS_I_SB(inode));
110 111 112
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
113
		}
114
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
115 116
		iput(einode);
		goto retry;
117
	}
118
	err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
119 120 121 122 123 124 125 126 127 128
	if (err)
		goto out_err;

	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
		iput(dir);
	} else {
		add_dirty_dir_inode(dir);
		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
	}

129 130 131
	goto out;

out_unmap_put:
132
	f2fs_dentry_kunmap(dir, page);
133
	f2fs_put_page(page, 0);
134 135
out_err:
	iput(dir);
136
out:
C
Chris Fries 已提交
137 138 139
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
D
Dan Carpenter 已提交
140
			IS_ERR(dir) ? 0 : dir->i_ino, err);
141 142 143
	return err;
}

144
static void recover_inode(struct inode *inode, struct page *page)
145
{
146 147 148 149 150 151 152 153 154 155
	struct f2fs_inode *raw = F2FS_INODE(page);

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
156 157

	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
158
			ino_of_node(page), F2FS_INODE(page)->i_name);
159 160 161 162
}

static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
163
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
164
	struct curseg_info *curseg;
165
	struct page *page = NULL;
166 167 168 169 170
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
171
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
172

173 174
	ra_meta_pages(sbi, blkaddr, 1, META_POR);

175 176 177
	while (1) {
		struct fsync_inode_entry *entry;

178
		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
179
			return 0;
180

181
		page = get_meta_page(sbi, blkaddr);
182

183
		if (cp_ver != cpver_of_node(page))
184
			break;
185 186 187 188 189 190 191 192 193 194 195

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
		if (entry) {
			if (IS_INODE(page) && is_dent_dnode(page))
				set_inode_flag(F2FS_I(entry->inode),
							FI_INC_LINK);
		} else {
			if (IS_INODE(page) && is_dent_dnode(page)) {
196 197
				err = recover_inode_page(sbi, page);
				if (err)
198
					break;
199 200 201
			}

			/* add this fsync inode to the list */
202
			entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
203 204
			if (!entry) {
				err = -ENOMEM;
205
				break;
206
			}
207 208 209 210
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
211 212 213
			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(entry->inode)) {
				err = PTR_ERR(entry->inode);
214
				kmem_cache_free(fsync_entry_slab, entry);
215 216
				if (err == -ENOENT) {
					err = 0;
217
					goto next;
218
				}
219
				break;
220
			}
221
			list_add_tail(&entry->list, head);
222
		}
J
Jaegeuk Kim 已提交
223 224
		entry->blkaddr = blkaddr;

225 226 227 228 229
		if (IS_INODE(page)) {
			entry->last_inode = blkaddr;
			if (is_dent_dnode(page))
				entry->last_dentry = blkaddr;
		}
230 231 232
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
233
		f2fs_put_page(page, 1);
234 235

		ra_meta_pages_cond(sbi, blkaddr);
236
	}
237
	f2fs_put_page(page, 1);
238 239 240
	return err;
}

241
static void destroy_fsync_dnodes(struct list_head *head)
242
{
243 244 245
	struct fsync_inode_entry *entry, *tmp;

	list_for_each_entry_safe(entry, tmp, head, list) {
246 247 248 249 250 251
		iput(entry->inode);
		list_del(&entry->list);
		kmem_cache_free(fsync_entry_slab, entry);
	}
}

252
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
253
			block_t blkaddr, struct dnode_of_data *dn)
254 255 256
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
257
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
258
	struct f2fs_summary_block *sum_node;
259
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
260
	struct page *sum_page, *node_page;
261
	struct dnode_of_data tdn = *dn;
262
	nid_t ino, nid;
263
	struct inode *inode;
264
	unsigned int offset;
265 266 267 268 269
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
270
		return 0;
271 272 273 274 275 276

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
277
			goto got_it;
278 279 280
		}
	}

J
Jaegeuk Kim 已提交
281 282 283 284 285
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
286 287 288 289
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
290 291
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
292
		tdn.node_page = dn->inode_page;
293
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
294
		goto truncate_out;
295
	} else if (dn->nid == nid) {
296
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
297
		goto truncate_out;
298 299
	}

300
	/* Get the node page */
301
	node_page = get_node_page(sbi, nid);
302 303
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
304 305

	offset = ofs_of_node(node_page);
306 307 308
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

309 310 311 312 313 314 315 316
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
317

318
	bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
319
			le16_to_cpu(sum.ofs_in_node);
320

321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
338
		iput(inode);
339 340 341 342 343 344 345 346 347
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
348
	return 0;
349 350
}

351
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
352 353
					struct page *page, block_t blkaddr)
{
354
	struct f2fs_inode_info *fi = F2FS_I(inode);
355 356 357 358
	unsigned int start, end;
	struct dnode_of_data dn;
	struct f2fs_summary sum;
	struct node_info ni;
359
	int err = 0, recovered = 0;
360

361 362 363 364
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
365 366 367 368
		/*
		 * Deprecated; xattr blocks should be found from cold log.
		 * But, we should remain this for backward compatibility.
		 */
369
		recover_xattr_data(inode, page, blkaddr);
370
		goto out;
371
	}
372

373 374
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
375 376
		goto out;

377
	/* step 3: recover data indices */
378
	start = start_bidx_of_node(ofs_of_node(page), fi);
379
	end = start + ADDRS_PER_PAGE(page, fi);
380

381
	f2fs_lock_op(sbi);
382

383
	set_new_dnode(&dn, inode, NULL, NULL, 0);
384

385
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
386
	if (err) {
387
		f2fs_unlock_op(sbi);
388
		goto out;
389
	}
390

391
	f2fs_wait_on_page_writeback(dn.node_page, NODE);
392 393

	get_node_info(sbi, dn.nid, &ni);
394 395
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
396 397 398 399 400 401 402 403 404

	for (; start < end; start++) {
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

		if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
			if (src == NULL_ADDR) {
405
				err = reserve_new_block(&dn);
406
				/* We should not get -ENOSPC */
407
				f2fs_bug_on(sbi, err);
408 409 410
			}

			/* Check the previous node page having this index */
411 412 413
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
414 415 416 417 418

			set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);

			/* write dummy data page */
			recover_data_page(sbi, NULL, &sum, src, dest);
J
Jaegeuk Kim 已提交
419
			dn.data_blkaddr = dest;
420
			set_data_blkaddr(&dn);
421
			f2fs_update_extent_cache(&dn);
422
			recovered++;
423 424 425 426 427 428 429 430 431 432 433
		}
		dn.ofs_in_node++;
	}

	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
434
err:
435
	f2fs_put_dnode(&dn);
436
	f2fs_unlock_op(sbi);
437
out:
C
Chris Fries 已提交
438 439 440
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
441
	return err;
442 443
}

444
static int recover_data(struct f2fs_sb_info *sbi,
445 446
				struct list_head *head, int type)
{
447
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
448
	struct curseg_info *curseg;
449
	struct page *page = NULL;
450
	int err = 0;
451 452 453 454 455 456 457 458 459
	block_t blkaddr;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, type);
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

460
		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
461
			break;
462

463 464 465
		ra_meta_pages_cond(sbi, blkaddr);

		page = get_meta_page(sbi, blkaddr);
466

467 468
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
469
			break;
470
		}
471 472 473 474

		entry = get_fsync_inode(head, ino_of_node(page));
		if (!entry)
			goto next;
475 476 477
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
478
		 * So, call recover_inode for the inode update.
479
		 */
480 481 482 483 484 485 486 487 488
		if (entry->last_inode == blkaddr)
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
			err = recover_dentry(entry->inode, page);
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
489
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
490 491
		if (err) {
			f2fs_put_page(page, 1);
492
			break;
493
		}
494 495 496 497 498 499 500 501 502

		if (entry->blkaddr == blkaddr) {
			iput(entry->inode);
			list_del(&entry->list);
			kmem_cache_free(fsync_entry_slab, entry);
		}
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
503
		f2fs_put_page(page, 1);
504
	}
505 506 507
	if (!err)
		allocate_new_segments(sbi);
	return err;
508 509
}

510
int recover_fsync_data(struct f2fs_sb_info *sbi)
511
{
512
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
513
	struct list_head inode_list;
514
	block_t blkaddr;
515
	int err;
H
Haicheng Li 已提交
516
	bool need_writecp = false;
517 518

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
519
			sizeof(struct fsync_inode_entry));
520
	if (!fsync_entry_slab)
521
		return -ENOMEM;
522 523 524 525

	INIT_LIST_HEAD(&inode_list);

	/* step #1: find fsynced inode numbers */
526
	set_sbi_flag(sbi, SBI_POR_DOING);
527

528 529 530
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

531 532
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

533 534
	err = find_fsync_dnodes(sbi, &inode_list);
	if (err)
535 536 537 538 539
		goto out;

	if (list_empty(&inode_list))
		goto out;

H
Haicheng Li 已提交
540
	need_writecp = true;
541

542
	/* step #2: recover data */
543
	err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
544
	if (!err)
545
		f2fs_bug_on(sbi, !list_empty(&inode_list));
546
out:
547
	destroy_fsync_dnodes(&inode_list);
548
	kmem_cache_destroy(fsync_entry_slab);
549

550 551
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
552
			MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
553

554 555 556 557 558
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

559
	clear_sbi_flag(sbi, SBI_POR_DOING);
560 561 562 563 564 565
	if (err) {
		discard_next_dnode(sbi, blkaddr);

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
566 567
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
568
	} else if (need_writecp) {
569 570 571
		struct cp_control cpc = {
			.reason = CP_SYNC,
		};
572
		mutex_unlock(&sbi->cp_mutex);
573
		write_checkpoint(sbi, &cpc);
574 575
	} else {
		mutex_unlock(&sbi->cp_mutex);
576
	}
577
	return err;
578
}