recovery.c 13.8 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69
	return NULL;
}

70
static int recover_dentry(struct inode *inode, struct page *ipage)
71
{
72
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
74
	struct f2fs_dir_entry *de;
75
	struct qstr name;
76
	struct page *page;
J
Jaegeuk Kim 已提交
77
	struct inode *dir, *einode;
78 79
	int err = 0;

80 81 82 83 84 85
	dir = f2fs_iget(inode->i_sb, pino);
	if (IS_ERR(dir)) {
		err = PTR_ERR(dir);
		goto out;
	}

86 87
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
88 89 90 91

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
92
		goto out_err;
93
	}
J
Jaegeuk Kim 已提交
94 95
retry:
	de = f2fs_find_entry(dir, &name, &page);
96
	if (de && inode->i_ino == le32_to_cpu(de->ino))
97
		goto out_unmap_put;
98

J
Jaegeuk Kim 已提交
99 100 101 102
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
103 104
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
105
				err = -EEXIST;
106 107
			goto out_unmap_put;
		}
108
		err = acquire_orphan_inode(F2FS_I_SB(inode));
109 110 111
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
112
		}
113
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
114 115
		iput(einode);
		goto retry;
116
	}
117
	err = __f2fs_add_link(dir, &name, inode, inode->i_ino, inode->i_mode);
118 119 120 121 122 123 124 125 126 127
	if (err)
		goto out_err;

	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
		iput(dir);
	} else {
		add_dirty_dir_inode(dir);
		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
	}

128 129 130
	goto out;

out_unmap_put:
131
	f2fs_dentry_kunmap(dir, page);
132
	f2fs_put_page(page, 0);
133 134
out_err:
	iput(dir);
135
out:
C
Chris Fries 已提交
136 137 138
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
D
Dan Carpenter 已提交
139
			IS_ERR(dir) ? 0 : dir->i_ino, err);
140 141 142
	return err;
}

143
static void recover_inode(struct inode *inode, struct page *page)
144
{
145 146 147 148 149 150 151 152 153 154
	struct f2fs_inode *raw = F2FS_INODE(page);

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
155 156

	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
157
			ino_of_node(page), F2FS_INODE(page)->i_name);
158 159 160 161
}

static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
162
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
163
	struct curseg_info *curseg;
164
	struct page *page = NULL;
165 166 167 168 169
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
170
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
171

172 173
	ra_meta_pages(sbi, blkaddr, 1, META_POR);

174 175 176
	while (1) {
		struct fsync_inode_entry *entry;

177
		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
178
			return 0;
179

180
		page = get_meta_page(sbi, blkaddr);
181

182
		if (cp_ver != cpver_of_node(page))
183
			break;
184 185 186 187 188

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
189
		if (!entry) {
190
			if (IS_INODE(page) && is_dent_dnode(page)) {
191 192
				err = recover_inode_page(sbi, page);
				if (err)
193
					break;
194 195 196
			}

			/* add this fsync inode to the list */
197
			entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
198 199
			if (!entry) {
				err = -ENOMEM;
200
				break;
201
			}
202 203 204 205
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
206 207 208
			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(entry->inode)) {
				err = PTR_ERR(entry->inode);
209
				kmem_cache_free(fsync_entry_slab, entry);
210 211
				if (err == -ENOENT) {
					err = 0;
212
					goto next;
213
				}
214
				break;
215
			}
216
			list_add_tail(&entry->list, head);
217
		}
J
Jaegeuk Kim 已提交
218 219
		entry->blkaddr = blkaddr;

220 221 222 223 224
		if (IS_INODE(page)) {
			entry->last_inode = blkaddr;
			if (is_dent_dnode(page))
				entry->last_dentry = blkaddr;
		}
225 226 227
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
228
		f2fs_put_page(page, 1);
229 230

		ra_meta_pages_cond(sbi, blkaddr);
231
	}
232
	f2fs_put_page(page, 1);
233 234 235
	return err;
}

236
static void destroy_fsync_dnodes(struct list_head *head)
237
{
238 239 240
	struct fsync_inode_entry *entry, *tmp;

	list_for_each_entry_safe(entry, tmp, head, list) {
241 242 243 244 245 246
		iput(entry->inode);
		list_del(&entry->list);
		kmem_cache_free(fsync_entry_slab, entry);
	}
}

247
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
248
			block_t blkaddr, struct dnode_of_data *dn)
249 250 251
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
252
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
253
	struct f2fs_summary_block *sum_node;
254
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
255
	struct page *sum_page, *node_page;
256
	struct dnode_of_data tdn = *dn;
257
	nid_t ino, nid;
258
	struct inode *inode;
259
	unsigned int offset;
260 261 262 263 264
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
265
		return 0;
266 267 268 269 270 271

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
272
			goto got_it;
273 274 275
		}
	}

J
Jaegeuk Kim 已提交
276 277 278 279 280
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
281 282 283 284
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
285 286
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
287
		tdn.node_page = dn->inode_page;
288
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
289
		goto truncate_out;
290
	} else if (dn->nid == nid) {
291
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
292
		goto truncate_out;
293 294
	}

295
	/* Get the node page */
296
	node_page = get_node_page(sbi, nid);
297 298
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
299 300

	offset = ofs_of_node(node_page);
301 302 303
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

304 305 306 307 308 309 310 311
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
312

313
	bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
314
			le16_to_cpu(sum.ofs_in_node);
315

316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
333
		iput(inode);
334 335 336 337 338 339 340 341 342
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
343
	return 0;
344 345
}

346
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
347 348
					struct page *page, block_t blkaddr)
{
349
	struct f2fs_inode_info *fi = F2FS_I(inode);
350 351 352 353
	unsigned int start, end;
	struct dnode_of_data dn;
	struct f2fs_summary sum;
	struct node_info ni;
354
	int err = 0, recovered = 0;
355

356 357 358 359
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
360 361 362 363
		/*
		 * Deprecated; xattr blocks should be found from cold log.
		 * But, we should remain this for backward compatibility.
		 */
364
		recover_xattr_data(inode, page, blkaddr);
365
		goto out;
366
	}
367

368 369
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
370 371
		goto out;

372
	/* step 3: recover data indices */
373
	start = start_bidx_of_node(ofs_of_node(page), fi);
374
	end = start + ADDRS_PER_PAGE(page, fi);
375

376
	f2fs_lock_op(sbi);
377

378
	set_new_dnode(&dn, inode, NULL, NULL, 0);
379

380
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
381
	if (err) {
382
		f2fs_unlock_op(sbi);
383
		goto out;
384
	}
385

386
	f2fs_wait_on_page_writeback(dn.node_page, NODE);
387 388

	get_node_info(sbi, dn.nid, &ni);
389 390
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
391 392 393 394 395 396 397

	for (; start < end; start++) {
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

398 399 400
		if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR &&
			dest >= MAIN_BLKADDR(sbi) && dest < MAX_BLKADDR(sbi)) {

401
			if (src == NULL_ADDR) {
402
				err = reserve_new_block(&dn);
403
				/* We should not get -ENOSPC */
404
				f2fs_bug_on(sbi, err);
405 406 407
			}

			/* Check the previous node page having this index */
408 409 410
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
411 412 413 414 415

			set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);

			/* write dummy data page */
			recover_data_page(sbi, NULL, &sum, src, dest);
J
Jaegeuk Kim 已提交
416
			dn.data_blkaddr = dest;
417
			set_data_blkaddr(&dn);
418
			f2fs_update_extent_cache(&dn);
419
			recovered++;
420 421 422 423 424 425 426 427 428 429 430
		}
		dn.ofs_in_node++;
	}

	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
431
err:
432
	f2fs_put_dnode(&dn);
433
	f2fs_unlock_op(sbi);
434
out:
C
Chris Fries 已提交
435 436 437
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
438
	return err;
439 440
}

441
static int recover_data(struct f2fs_sb_info *sbi,
442 443
				struct list_head *head, int type)
{
444
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
445
	struct curseg_info *curseg;
446
	struct page *page = NULL;
447
	int err = 0;
448 449 450 451 452 453 454 455 456
	block_t blkaddr;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, type);
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

457
		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
458
			break;
459

460 461 462
		ra_meta_pages_cond(sbi, blkaddr);

		page = get_meta_page(sbi, blkaddr);
463

464 465
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
466
			break;
467
		}
468 469 470 471

		entry = get_fsync_inode(head, ino_of_node(page));
		if (!entry)
			goto next;
472 473 474
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
475
		 * So, call recover_inode for the inode update.
476
		 */
477 478 479 480 481 482 483 484 485
		if (entry->last_inode == blkaddr)
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
			err = recover_dentry(entry->inode, page);
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
486
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
487 488
		if (err) {
			f2fs_put_page(page, 1);
489
			break;
490
		}
491 492 493 494 495 496 497 498 499

		if (entry->blkaddr == blkaddr) {
			iput(entry->inode);
			list_del(&entry->list);
			kmem_cache_free(fsync_entry_slab, entry);
		}
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
500
		f2fs_put_page(page, 1);
501
	}
502 503 504
	if (!err)
		allocate_new_segments(sbi);
	return err;
505 506
}

507
int recover_fsync_data(struct f2fs_sb_info *sbi)
508
{
509
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
510
	struct list_head inode_list;
511
	block_t blkaddr;
512
	int err;
H
Haicheng Li 已提交
513
	bool need_writecp = false;
514 515

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
516
			sizeof(struct fsync_inode_entry));
517
	if (!fsync_entry_slab)
518
		return -ENOMEM;
519 520 521 522

	INIT_LIST_HEAD(&inode_list);

	/* step #1: find fsynced inode numbers */
523
	set_sbi_flag(sbi, SBI_POR_DOING);
524

525 526 527
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

528 529
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

530 531
	err = find_fsync_dnodes(sbi, &inode_list);
	if (err)
532 533 534 535 536
		goto out;

	if (list_empty(&inode_list))
		goto out;

H
Haicheng Li 已提交
537
	need_writecp = true;
538

539
	/* step #2: recover data */
540
	err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
541
	if (!err)
542
		f2fs_bug_on(sbi, !list_empty(&inode_list));
543
out:
544
	destroy_fsync_dnodes(&inode_list);
545
	kmem_cache_destroy(fsync_entry_slab);
546

547 548
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
549
			MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
550

551 552 553 554 555
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

556
	clear_sbi_flag(sbi, SBI_POR_DOING);
557 558 559 560 561 562
	if (err) {
		discard_next_dnode(sbi, blkaddr);

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
563 564
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
565
	} else if (need_writecp) {
566
		struct cp_control cpc = {
567
			.reason = CP_RECOVERY,
568
		};
569
		mutex_unlock(&sbi->cp_mutex);
570
		write_checkpoint(sbi, &cpc);
571 572
	} else {
		mutex_unlock(&sbi->cp_mutex);
573
	}
574
	return err;
575
}