recovery.c 13.3 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69 70 71
	return NULL;
}

static int recover_dentry(struct page *ipage, struct inode *inode)
{
72
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
74
	struct f2fs_dir_entry *de;
75
	struct qstr name;
76
	struct page *page;
J
Jaegeuk Kim 已提交
77
	struct inode *dir, *einode;
78 79
	int err = 0;

80 81 82 83 84 85
	dir = f2fs_iget(inode->i_sb, pino);
	if (IS_ERR(dir)) {
		err = PTR_ERR(dir);
		goto out;
	}

86 87
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
88 89 90 91

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
92
		goto out_err;
93
	}
J
Jaegeuk Kim 已提交
94 95
retry:
	de = f2fs_find_entry(dir, &name, &page);
96 97
	if (de && inode->i_ino == le32_to_cpu(de->ino)) {
		clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
98
		goto out_unmap_put;
99
	}
J
Jaegeuk Kim 已提交
100 101 102 103
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
104 105
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
106
				err = -EEXIST;
107 108
			goto out_unmap_put;
		}
109
		err = acquire_orphan_inode(F2FS_I_SB(inode));
110 111 112
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
113 114 115 116
		}
		f2fs_delete_entry(de, page, einode);
		iput(einode);
		goto retry;
117
	}
J
Jaegeuk Kim 已提交
118
	err = __f2fs_add_link(dir, &name, inode);
119 120 121 122 123 124 125 126 127 128
	if (err)
		goto out_err;

	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
		iput(dir);
	} else {
		add_dirty_dir_inode(dir);
		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
	}

129 130 131 132 133
	goto out;

out_unmap_put:
	kunmap(page);
	f2fs_put_page(page, 0);
134 135
out_err:
	iput(dir);
136
out:
C
Chris Fries 已提交
137 138 139
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
D
Dan Carpenter 已提交
140
			IS_ERR(dir) ? 0 : dir->i_ino, err);
141 142 143
	return err;
}

144
static void __recover_inode(struct inode *inode, struct page *page)
145
{
146 147 148 149 150 151 152 153 154 155 156
	struct f2fs_inode *raw = F2FS_INODE(page);

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
}
157

158 159
static int recover_inode(struct inode *inode, struct page *node_page)
{
160 161 162
	if (!IS_INODE(node_page))
		return 0;

163
	__recover_inode(inode, node_page);
164

165 166 167 168
	if (is_dent_dnode(node_page))
		return recover_dentry(node_page, inode);

	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
169
			ino_of_node(node_page), F2FS_INODE(node_page)->i_name);
170
	return 0;
171 172 173 174
}

static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
175
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
176
	struct curseg_info *curseg;
177
	struct page *page = NULL;
178 179 180 181 182
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
183
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
184 185 186 187

	while (1) {
		struct fsync_inode_entry *entry;

188 189 190
		if (blkaddr < SM_I(sbi)->main_blkaddr ||
			blkaddr >= (SM_I(sbi)->seg0_blkaddr + TOTAL_BLKS(sbi)))
			return 0;
191

192
		page = get_meta_page_ra(sbi, blkaddr);
193

194
		if (cp_ver != cpver_of_node(page))
195
			break;
196 197 198 199 200 201 202 203 204 205 206

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
		if (entry) {
			if (IS_INODE(page) && is_dent_dnode(page))
				set_inode_flag(F2FS_I(entry->inode),
							FI_INC_LINK);
		} else {
			if (IS_INODE(page) && is_dent_dnode(page)) {
207 208
				err = recover_inode_page(sbi, page);
				if (err)
209
					break;
210 211 212 213 214 215
			}

			/* add this fsync inode to the list */
			entry = kmem_cache_alloc(fsync_entry_slab, GFP_NOFS);
			if (!entry) {
				err = -ENOMEM;
216
				break;
217 218
			}

219 220 221 222
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
223 224 225
			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(entry->inode)) {
				err = PTR_ERR(entry->inode);
226
				kmem_cache_free(fsync_entry_slab, entry);
227 228
				if (err == -ENOENT)
					goto next;
229
				break;
230
			}
231
			list_add_tail(&entry->list, head);
232
		}
J
Jaegeuk Kim 已提交
233 234
		entry->blkaddr = blkaddr;

235 236 237
		err = recover_inode(entry->inode, page);
		if (err && err != -ENOENT)
			break;
238 239 240
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
241
		f2fs_put_page(page, 1);
242
	}
243
	f2fs_put_page(page, 1);
244 245 246
	return err;
}

247
static void destroy_fsync_dnodes(struct list_head *head)
248
{
249 250 251
	struct fsync_inode_entry *entry, *tmp;

	list_for_each_entry_safe(entry, tmp, head, list) {
252 253 254 255 256 257
		iput(entry->inode);
		list_del(&entry->list);
		kmem_cache_free(fsync_entry_slab, entry);
	}
}

258
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
259
			block_t blkaddr, struct dnode_of_data *dn)
260 261 262
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
263
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
264
	struct f2fs_summary_block *sum_node;
265
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
266
	struct page *sum_page, *node_page;
267
	nid_t ino, nid;
268
	struct inode *inode;
269
	unsigned int offset;
270 271 272 273 274
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
275
		return 0;
276 277 278 279 280 281

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
282
			goto got_it;
283 284 285
		}
	}

J
Jaegeuk Kim 已提交
286 287 288 289 290
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
291 292 293 294 295 296
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		struct dnode_of_data tdn = *dn;
		tdn.nid = nid;
		tdn.node_page = dn->inode_page;
297
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
298
		truncate_data_blocks_range(&tdn, 1);
299
		return 0;
300 301
	} else if (dn->nid == nid) {
		struct dnode_of_data tdn = *dn;
302
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
303
		truncate_data_blocks_range(&tdn, 1);
304
		return 0;
305 306
	}

307
	/* Get the node page */
308
	node_page = get_node_page(sbi, nid);
309 310
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
311 312

	offset = ofs_of_node(node_page);
313 314 315
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

316 317 318 319 320 321 322 323
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
324

325
	bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
326
			le16_to_cpu(sum.ofs_in_node);
327

328 329 330 331 332 333 334 335 336 337 338 339
	if (ino != dn->inode->i_ino) {
		truncate_hole(inode, bidx, bidx + 1);
		iput(inode);
	} else {
		struct dnode_of_data tdn;
		set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0);
		if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
			return 0;
		if (tdn.data_blkaddr != NULL_ADDR)
			truncate_data_blocks_range(&tdn, 1);
		f2fs_put_page(tdn.node_page, 1);
	}
340
	return 0;
341 342
}

343
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
344 345
					struct page *page, block_t blkaddr)
{
346
	struct f2fs_inode_info *fi = F2FS_I(inode);
347 348 349 350
	unsigned int start, end;
	struct dnode_of_data dn;
	struct f2fs_summary sum;
	struct node_info ni;
351
	int err = 0, recovered = 0;
352

353 354 355 356 357
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
		recover_xattr_data(inode, page, blkaddr);
358
		goto out;
359
	}
360

361 362
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
363 364
		goto out;

365
	/* step 3: recover data indices */
366
	start = start_bidx_of_node(ofs_of_node(page), fi);
367
	end = start + ADDRS_PER_PAGE(page, fi);
368

369
	f2fs_lock_op(sbi);
370

371
	set_new_dnode(&dn, inode, NULL, NULL, 0);
372

373
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
374
	if (err) {
375
		f2fs_unlock_op(sbi);
376
		goto out;
377
	}
378

379
	f2fs_wait_on_page_writeback(dn.node_page, NODE);
380 381

	get_node_info(sbi, dn.nid, &ni);
382 383
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
384 385 386 387 388 389 390 391 392

	for (; start < end; start++) {
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

		if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
			if (src == NULL_ADDR) {
393
				err = reserve_new_block(&dn);
394
				/* We should not get -ENOSPC */
395
				f2fs_bug_on(sbi, err);
396 397 398
			}

			/* Check the previous node page having this index */
399 400 401
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
402 403 404 405 406 407

			set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);

			/* write dummy data page */
			recover_data_page(sbi, NULL, &sum, src, dest);
			update_extent_cache(dest, &dn);
408
			recovered++;
409 410 411 412 413 414 415 416 417 418 419 420 421
		}
		dn.ofs_in_node++;
	}

	/* write node page in place */
	set_summary(&sum, dn.nid, 0, 0);
	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
422
err:
423
	f2fs_put_dnode(&dn);
424
	f2fs_unlock_op(sbi);
425
out:
C
Chris Fries 已提交
426 427 428
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
429
	return err;
430 431
}

432
static int recover_data(struct f2fs_sb_info *sbi,
433 434
				struct list_head *head, int type)
{
435
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
436
	struct curseg_info *curseg;
437
	struct page *page = NULL;
438
	int err = 0;
439 440 441 442 443 444 445 446 447
	block_t blkaddr;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, type);
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

448 449 450
		if (blkaddr < SM_I(sbi)->main_blkaddr ||
			blkaddr >= (SM_I(sbi)->seg0_blkaddr + TOTAL_BLKS(sbi)))
			break;
451

452
		page = get_meta_page_ra(sbi, blkaddr);
453

454 455
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
456
			break;
457
		}
458 459 460 461

		entry = get_fsync_inode(head, ino_of_node(page));
		if (!entry)
			goto next;
462 463 464 465 466 467 468
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
		 * So, call __recover_inode for the inode update.
		 */
		if (IS_INODE(page))
			__recover_inode(entry->inode, page);
469

470
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
471 472
		if (err) {
			f2fs_put_page(page, 1);
473
			break;
474
		}
475 476 477 478 479 480 481 482 483

		if (entry->blkaddr == blkaddr) {
			iput(entry->inode);
			list_del(&entry->list);
			kmem_cache_free(fsync_entry_slab, entry);
		}
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
484
		f2fs_put_page(page, 1);
485
	}
486 487 488
	if (!err)
		allocate_new_segments(sbi);
	return err;
489 490
}

491
int recover_fsync_data(struct f2fs_sb_info *sbi)
492
{
493
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
494
	struct list_head inode_list;
495
	block_t blkaddr;
496
	int err;
H
Haicheng Li 已提交
497
	bool need_writecp = false;
498 499

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
500
			sizeof(struct fsync_inode_entry));
501
	if (!fsync_entry_slab)
502
		return -ENOMEM;
503 504 505 506

	INIT_LIST_HEAD(&inode_list);

	/* step #1: find fsynced inode numbers */
H
Haicheng Li 已提交
507
	sbi->por_doing = true;
508

509 510 511
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

512 513
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

514 515
	err = find_fsync_dnodes(sbi, &inode_list);
	if (err)
516 517 518 519 520
		goto out;

	if (list_empty(&inode_list))
		goto out;

H
Haicheng Li 已提交
521
	need_writecp = true;
522

523
	/* step #2: recover data */
524
	err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
525
	if (!err)
526
		f2fs_bug_on(sbi, !list_empty(&inode_list));
527
out:
528
	destroy_fsync_dnodes(&inode_list);
529
	kmem_cache_destroy(fsync_entry_slab);
530

531 532 533 534
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
		SM_I(sbi)->main_blkaddr << PAGE_CACHE_SHIFT, -1);

535 536 537 538 539
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

H
Haicheng Li 已提交
540
	sbi->por_doing = false;
541 542 543 544 545 546
	if (err) {
		discard_next_dnode(sbi, blkaddr);

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
547 548
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
549
	} else if (need_writecp) {
550
		mutex_unlock(&sbi->cp_mutex);
551
		write_checkpoint(sbi, false);
552 553
	} else {
		mutex_unlock(&sbi->cp_mutex);
554
	}
555
	return err;
556
}