recovery.c 13.5 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69
	return NULL;
}

70
static int recover_dentry(struct inode *inode, struct page *ipage)
71
{
72
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
74
	struct f2fs_dir_entry *de;
75
	struct qstr name;
76
	struct page *page;
J
Jaegeuk Kim 已提交
77
	struct inode *dir, *einode;
78 79
	int err = 0;

80 81 82 83 84 85
	dir = f2fs_iget(inode->i_sb, pino);
	if (IS_ERR(dir)) {
		err = PTR_ERR(dir);
		goto out;
	}

86 87
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
88 89 90 91

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
92
		goto out_err;
93
	}
J
Jaegeuk Kim 已提交
94 95
retry:
	de = f2fs_find_entry(dir, &name, &page);
96 97
	if (de && inode->i_ino == le32_to_cpu(de->ino)) {
		clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
98
		goto out_unmap_put;
99
	}
J
Jaegeuk Kim 已提交
100 101 102 103
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
104 105
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
106
				err = -EEXIST;
107 108
			goto out_unmap_put;
		}
109
		err = acquire_orphan_inode(F2FS_I_SB(inode));
110 111 112
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
113
		}
114
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
115 116
		iput(einode);
		goto retry;
117
	}
J
Jaegeuk Kim 已提交
118
	err = __f2fs_add_link(dir, &name, inode);
119 120 121 122 123 124 125 126 127 128
	if (err)
		goto out_err;

	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
		iput(dir);
	} else {
		add_dirty_dir_inode(dir);
		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
	}

129 130 131
	goto out;

out_unmap_put:
132
	f2fs_dentry_kunmap(dir, page);
133
	f2fs_put_page(page, 0);
134 135
out_err:
	iput(dir);
136
out:
C
Chris Fries 已提交
137 138 139
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
D
Dan Carpenter 已提交
140
			IS_ERR(dir) ? 0 : dir->i_ino, err);
141 142 143
	return err;
}

144
static void recover_inode(struct inode *inode, struct page *page)
145
{
146 147 148 149 150 151 152 153 154 155
	struct f2fs_inode *raw = F2FS_INODE(page);

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
156 157

	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
158
			ino_of_node(page), F2FS_INODE(page)->i_name);
159 160 161 162
}

static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
163
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
164
	struct curseg_info *curseg;
165
	struct page *page = NULL;
166 167 168 169 170
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
171
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
172

173 174
	ra_meta_pages(sbi, blkaddr, 1, META_POR);

175 176 177
	while (1) {
		struct fsync_inode_entry *entry;

178
		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
179
			return 0;
180

181
		page = get_meta_page(sbi, blkaddr);
182

183
		if (cp_ver != cpver_of_node(page))
184
			break;
185 186 187 188 189 190 191 192 193 194 195

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
		if (entry) {
			if (IS_INODE(page) && is_dent_dnode(page))
				set_inode_flag(F2FS_I(entry->inode),
							FI_INC_LINK);
		} else {
			if (IS_INODE(page) && is_dent_dnode(page)) {
196 197
				err = recover_inode_page(sbi, page);
				if (err)
198
					break;
199 200 201
			}

			/* add this fsync inode to the list */
202
			entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
203 204
			if (!entry) {
				err = -ENOMEM;
205
				break;
206
			}
207 208 209 210
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
211 212 213
			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(entry->inode)) {
				err = PTR_ERR(entry->inode);
214
				kmem_cache_free(fsync_entry_slab, entry);
215 216
				if (err == -ENOENT)
					goto next;
217
				break;
218
			}
219
			list_add_tail(&entry->list, head);
220
		}
J
Jaegeuk Kim 已提交
221 222
		entry->blkaddr = blkaddr;

223 224 225 226 227
		if (IS_INODE(page)) {
			entry->last_inode = blkaddr;
			if (is_dent_dnode(page))
				entry->last_dentry = blkaddr;
		}
228 229 230
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
231
		f2fs_put_page(page, 1);
232 233

		ra_meta_pages_cond(sbi, blkaddr);
234
	}
235
	f2fs_put_page(page, 1);
236 237 238
	return err;
}

239
static void destroy_fsync_dnodes(struct list_head *head)
240
{
241 242 243
	struct fsync_inode_entry *entry, *tmp;

	list_for_each_entry_safe(entry, tmp, head, list) {
244 245 246 247 248 249
		iput(entry->inode);
		list_del(&entry->list);
		kmem_cache_free(fsync_entry_slab, entry);
	}
}

250
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
251
			block_t blkaddr, struct dnode_of_data *dn)
252 253 254
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
255
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
256
	struct f2fs_summary_block *sum_node;
257
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
258
	struct page *sum_page, *node_page;
259
	nid_t ino, nid;
260
	struct inode *inode;
261
	unsigned int offset;
262 263 264 265 266
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
267
		return 0;
268 269 270 271 272 273

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
274
			goto got_it;
275 276 277
		}
	}

J
Jaegeuk Kim 已提交
278 279 280 281 282
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
283 284 285 286 287 288
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		struct dnode_of_data tdn = *dn;
		tdn.nid = nid;
		tdn.node_page = dn->inode_page;
289
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
290
		truncate_data_blocks_range(&tdn, 1);
291
		return 0;
292 293
	} else if (dn->nid == nid) {
		struct dnode_of_data tdn = *dn;
294
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
295
		truncate_data_blocks_range(&tdn, 1);
296
		return 0;
297 298
	}

299
	/* Get the node page */
300
	node_page = get_node_page(sbi, nid);
301 302
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
303 304

	offset = ofs_of_node(node_page);
305 306 307
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

308 309 310 311 312 313 314 315
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
316

317
	bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
318
			le16_to_cpu(sum.ofs_in_node);
319

320 321 322 323 324 325 326 327 328 329 330 331
	if (ino != dn->inode->i_ino) {
		truncate_hole(inode, bidx, bidx + 1);
		iput(inode);
	} else {
		struct dnode_of_data tdn;
		set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0);
		if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
			return 0;
		if (tdn.data_blkaddr != NULL_ADDR)
			truncate_data_blocks_range(&tdn, 1);
		f2fs_put_page(tdn.node_page, 1);
	}
332
	return 0;
333 334
}

335
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
336 337
					struct page *page, block_t blkaddr)
{
338
	struct f2fs_inode_info *fi = F2FS_I(inode);
339 340 341 342
	unsigned int start, end;
	struct dnode_of_data dn;
	struct f2fs_summary sum;
	struct node_info ni;
343
	int err = 0, recovered = 0;
344

345 346 347 348
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
349 350 351 352
		/*
		 * Deprecated; xattr blocks should be found from cold log.
		 * But, we should remain this for backward compatibility.
		 */
353
		recover_xattr_data(inode, page, blkaddr);
354
		goto out;
355
	}
356

357 358
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
359 360
		goto out;

361
	/* step 3: recover data indices */
362
	start = start_bidx_of_node(ofs_of_node(page), fi);
363
	end = start + ADDRS_PER_PAGE(page, fi);
364

365
	f2fs_lock_op(sbi);
366

367
	set_new_dnode(&dn, inode, NULL, NULL, 0);
368

369
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
370
	if (err) {
371
		f2fs_unlock_op(sbi);
372
		goto out;
373
	}
374

375
	f2fs_wait_on_page_writeback(dn.node_page, NODE);
376 377

	get_node_info(sbi, dn.nid, &ni);
378 379
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
380 381 382 383 384 385 386 387 388

	for (; start < end; start++) {
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

		if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
			if (src == NULL_ADDR) {
389
				err = reserve_new_block(&dn);
390
				/* We should not get -ENOSPC */
391
				f2fs_bug_on(sbi, err);
392 393 394
			}

			/* Check the previous node page having this index */
395 396 397
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
398 399 400 401 402

			set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);

			/* write dummy data page */
			recover_data_page(sbi, NULL, &sum, src, dest);
J
Jaegeuk Kim 已提交
403
			dn.data_blkaddr = dest;
404
			f2fs_update_extent_cache(&dn);
405
			recovered++;
406 407 408 409 410 411 412 413 414 415 416
		}
		dn.ofs_in_node++;
	}

	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
417
err:
418
	f2fs_put_dnode(&dn);
419
	f2fs_unlock_op(sbi);
420
out:
C
Chris Fries 已提交
421 422 423
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
424
	return err;
425 426
}

427
static int recover_data(struct f2fs_sb_info *sbi,
428 429
				struct list_head *head, int type)
{
430
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
431
	struct curseg_info *curseg;
432
	struct page *page = NULL;
433
	int err = 0;
434 435 436 437 438 439 440 441 442
	block_t blkaddr;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, type);
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

443
		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
444
			break;
445

446 447 448
		ra_meta_pages_cond(sbi, blkaddr);

		page = get_meta_page(sbi, blkaddr);
449

450 451
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
452
			break;
453
		}
454 455 456 457

		entry = get_fsync_inode(head, ino_of_node(page));
		if (!entry)
			goto next;
458 459 460
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
461
		 * So, call recover_inode for the inode update.
462
		 */
463 464 465 466 467 468 469 470 471
		if (entry->last_inode == blkaddr)
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
			err = recover_dentry(entry->inode, page);
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
472
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
473 474
		if (err) {
			f2fs_put_page(page, 1);
475
			break;
476
		}
477 478 479 480 481 482 483 484 485

		if (entry->blkaddr == blkaddr) {
			iput(entry->inode);
			list_del(&entry->list);
			kmem_cache_free(fsync_entry_slab, entry);
		}
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
486
		f2fs_put_page(page, 1);
487
	}
488 489 490
	if (!err)
		allocate_new_segments(sbi);
	return err;
491 492
}

493
int recover_fsync_data(struct f2fs_sb_info *sbi)
494
{
495
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
496
	struct list_head inode_list;
497
	block_t blkaddr;
498
	int err;
H
Haicheng Li 已提交
499
	bool need_writecp = false;
500 501

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
502
			sizeof(struct fsync_inode_entry));
503
	if (!fsync_entry_slab)
504
		return -ENOMEM;
505 506 507 508

	INIT_LIST_HEAD(&inode_list);

	/* step #1: find fsynced inode numbers */
509
	set_sbi_flag(sbi, SBI_POR_DOING);
510

511 512 513
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

514 515
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

516 517
	err = find_fsync_dnodes(sbi, &inode_list);
	if (err)
518 519 520 521 522
		goto out;

	if (list_empty(&inode_list))
		goto out;

H
Haicheng Li 已提交
523
	need_writecp = true;
524

525
	/* step #2: recover data */
526
	err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
527
	if (!err)
528
		f2fs_bug_on(sbi, !list_empty(&inode_list));
529
out:
530
	destroy_fsync_dnodes(&inode_list);
531
	kmem_cache_destroy(fsync_entry_slab);
532

533 534
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
535
			MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
536

537 538 539 540 541
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

542
	clear_sbi_flag(sbi, SBI_POR_DOING);
543 544 545 546 547 548
	if (err) {
		discard_next_dnode(sbi, blkaddr);

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
549 550
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
551
	} else if (need_writecp) {
552 553 554
		struct cp_control cpc = {
			.reason = CP_SYNC,
		};
555
		mutex_unlock(&sbi->cp_mutex);
556
		write_checkpoint(sbi, &cpc);
557 558
	} else {
		mutex_unlock(&sbi->cp_mutex);
559
	}
560
	return err;
561
}