recovery.c 13.3 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69
	return NULL;
}

70
static int recover_dentry(struct inode *inode, struct page *ipage)
71
{
72
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
74
	struct f2fs_dir_entry *de;
75
	struct qstr name;
76
	struct page *page;
J
Jaegeuk Kim 已提交
77
	struct inode *dir, *einode;
78 79
	int err = 0;

80 81 82 83 84 85
	dir = f2fs_iget(inode->i_sb, pino);
	if (IS_ERR(dir)) {
		err = PTR_ERR(dir);
		goto out;
	}

86 87
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
88 89 90 91

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
92
		goto out_err;
93
	}
J
Jaegeuk Kim 已提交
94 95
retry:
	de = f2fs_find_entry(dir, &name, &page);
96 97
	if (de && inode->i_ino == le32_to_cpu(de->ino)) {
		clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
98
		goto out_unmap_put;
99
	}
J
Jaegeuk Kim 已提交
100 101 102 103
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
104 105
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
106
				err = -EEXIST;
107 108
			goto out_unmap_put;
		}
109
		err = acquire_orphan_inode(F2FS_I_SB(inode));
110 111 112
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
113 114 115 116
		}
		f2fs_delete_entry(de, page, einode);
		iput(einode);
		goto retry;
117
	}
J
Jaegeuk Kim 已提交
118
	err = __f2fs_add_link(dir, &name, inode);
119 120 121 122 123 124 125 126 127 128
	if (err)
		goto out_err;

	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
		iput(dir);
	} else {
		add_dirty_dir_inode(dir);
		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
	}

129 130 131 132 133
	goto out;

out_unmap_put:
	kunmap(page);
	f2fs_put_page(page, 0);
134 135
out_err:
	iput(dir);
136
out:
C
Chris Fries 已提交
137 138 139
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
D
Dan Carpenter 已提交
140
			IS_ERR(dir) ? 0 : dir->i_ino, err);
141 142 143
	return err;
}

144
static void recover_inode(struct inode *inode, struct page *page)
145
{
146 147 148 149 150 151 152 153 154 155
	struct f2fs_inode *raw = F2FS_INODE(page);

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
156 157

	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
158
			ino_of_node(page), F2FS_INODE(page)->i_name);
159 160 161 162
}

static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
163
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
164
	struct curseg_info *curseg;
165
	struct page *page = NULL;
166 167 168 169 170
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
171
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
172 173 174 175

	while (1) {
		struct fsync_inode_entry *entry;

176
		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
177
			return 0;
178

179
		page = get_meta_page_ra(sbi, blkaddr);
180

181
		if (cp_ver != cpver_of_node(page))
182
			break;
183 184 185 186 187 188 189 190 191 192 193

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
		if (entry) {
			if (IS_INODE(page) && is_dent_dnode(page))
				set_inode_flag(F2FS_I(entry->inode),
							FI_INC_LINK);
		} else {
			if (IS_INODE(page) && is_dent_dnode(page)) {
194 195
				err = recover_inode_page(sbi, page);
				if (err)
196
					break;
197 198 199
			}

			/* add this fsync inode to the list */
200
			entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
201 202
			if (!entry) {
				err = -ENOMEM;
203
				break;
204
			}
205 206 207 208
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
209 210 211
			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(entry->inode)) {
				err = PTR_ERR(entry->inode);
212
				kmem_cache_free(fsync_entry_slab, entry);
213 214
				if (err == -ENOENT)
					goto next;
215
				break;
216
			}
217
			list_add_tail(&entry->list, head);
218
		}
J
Jaegeuk Kim 已提交
219 220
		entry->blkaddr = blkaddr;

221 222 223 224 225
		if (IS_INODE(page)) {
			entry->last_inode = blkaddr;
			if (is_dent_dnode(page))
				entry->last_dentry = blkaddr;
		}
226 227 228
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
229
		f2fs_put_page(page, 1);
230
	}
231
	f2fs_put_page(page, 1);
232 233 234
	return err;
}

235
static void destroy_fsync_dnodes(struct list_head *head)
236
{
237 238 239
	struct fsync_inode_entry *entry, *tmp;

	list_for_each_entry_safe(entry, tmp, head, list) {
240 241 242 243 244 245
		iput(entry->inode);
		list_del(&entry->list);
		kmem_cache_free(fsync_entry_slab, entry);
	}
}

246
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
247
			block_t blkaddr, struct dnode_of_data *dn)
248 249 250
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
251
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
252
	struct f2fs_summary_block *sum_node;
253
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
254
	struct page *sum_page, *node_page;
255
	nid_t ino, nid;
256
	struct inode *inode;
257
	unsigned int offset;
258 259 260 261 262
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
263
		return 0;
264 265 266 267 268 269

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
270
			goto got_it;
271 272 273
		}
	}

J
Jaegeuk Kim 已提交
274 275 276 277 278
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
279 280 281 282 283 284
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		struct dnode_of_data tdn = *dn;
		tdn.nid = nid;
		tdn.node_page = dn->inode_page;
285
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
286
		truncate_data_blocks_range(&tdn, 1);
287
		return 0;
288 289
	} else if (dn->nid == nid) {
		struct dnode_of_data tdn = *dn;
290
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
291
		truncate_data_blocks_range(&tdn, 1);
292
		return 0;
293 294
	}

295
	/* Get the node page */
296
	node_page = get_node_page(sbi, nid);
297 298
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
299 300

	offset = ofs_of_node(node_page);
301 302 303
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

304 305 306 307 308 309 310 311
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
312

313
	bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
314
			le16_to_cpu(sum.ofs_in_node);
315

316 317 318 319 320 321 322 323 324 325 326 327
	if (ino != dn->inode->i_ino) {
		truncate_hole(inode, bidx, bidx + 1);
		iput(inode);
	} else {
		struct dnode_of_data tdn;
		set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0);
		if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
			return 0;
		if (tdn.data_blkaddr != NULL_ADDR)
			truncate_data_blocks_range(&tdn, 1);
		f2fs_put_page(tdn.node_page, 1);
	}
328
	return 0;
329 330
}

331
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
332 333
					struct page *page, block_t blkaddr)
{
334
	struct f2fs_inode_info *fi = F2FS_I(inode);
335 336 337 338
	unsigned int start, end;
	struct dnode_of_data dn;
	struct f2fs_summary sum;
	struct node_info ni;
339
	int err = 0, recovered = 0;
340

341 342 343 344 345
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
		recover_xattr_data(inode, page, blkaddr);
346
		goto out;
347
	}
348

349 350
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
351 352
		goto out;

353
	/* step 3: recover data indices */
354
	start = start_bidx_of_node(ofs_of_node(page), fi);
355
	end = start + ADDRS_PER_PAGE(page, fi);
356

357
	f2fs_lock_op(sbi);
358

359
	set_new_dnode(&dn, inode, NULL, NULL, 0);
360

361
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
362
	if (err) {
363
		f2fs_unlock_op(sbi);
364
		goto out;
365
	}
366

367
	f2fs_wait_on_page_writeback(dn.node_page, NODE);
368 369

	get_node_info(sbi, dn.nid, &ni);
370 371
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
372 373 374 375 376 377 378 379 380

	for (; start < end; start++) {
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

		if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
			if (src == NULL_ADDR) {
381
				err = reserve_new_block(&dn);
382
				/* We should not get -ENOSPC */
383
				f2fs_bug_on(sbi, err);
384 385 386
			}

			/* Check the previous node page having this index */
387 388 389
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
390 391 392 393 394 395

			set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);

			/* write dummy data page */
			recover_data_page(sbi, NULL, &sum, src, dest);
			update_extent_cache(dest, &dn);
396
			recovered++;
397 398 399 400 401 402 403 404 405 406 407 408 409
		}
		dn.ofs_in_node++;
	}

	/* write node page in place */
	set_summary(&sum, dn.nid, 0, 0);
	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
410
err:
411
	f2fs_put_dnode(&dn);
412
	f2fs_unlock_op(sbi);
413
out:
C
Chris Fries 已提交
414 415 416
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
417
	return err;
418 419
}

420
static int recover_data(struct f2fs_sb_info *sbi,
421 422
				struct list_head *head, int type)
{
423
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
424
	struct curseg_info *curseg;
425
	struct page *page = NULL;
426
	int err = 0;
427 428 429 430 431 432 433 434 435
	block_t blkaddr;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, type);
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

436
		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
437
			break;
438

439
		page = get_meta_page_ra(sbi, blkaddr);
440

441 442
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
443
			break;
444
		}
445 446 447 448

		entry = get_fsync_inode(head, ino_of_node(page));
		if (!entry)
			goto next;
449 450 451
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
452
		 * So, call recover_inode for the inode update.
453
		 */
454 455 456 457 458 459 460 461 462
		if (entry->last_inode == blkaddr)
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
			err = recover_dentry(entry->inode, page);
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
463
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
464 465
		if (err) {
			f2fs_put_page(page, 1);
466
			break;
467
		}
468 469 470 471 472 473 474 475 476

		if (entry->blkaddr == blkaddr) {
			iput(entry->inode);
			list_del(&entry->list);
			kmem_cache_free(fsync_entry_slab, entry);
		}
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
477
		f2fs_put_page(page, 1);
478
	}
479 480 481
	if (!err)
		allocate_new_segments(sbi);
	return err;
482 483
}

484
int recover_fsync_data(struct f2fs_sb_info *sbi)
485
{
486
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
487
	struct list_head inode_list;
488
	block_t blkaddr;
489
	int err;
H
Haicheng Li 已提交
490
	bool need_writecp = false;
491 492

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
493
			sizeof(struct fsync_inode_entry));
494
	if (!fsync_entry_slab)
495
		return -ENOMEM;
496 497 498 499

	INIT_LIST_HEAD(&inode_list);

	/* step #1: find fsynced inode numbers */
H
Haicheng Li 已提交
500
	sbi->por_doing = true;
501

502 503 504
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

505 506
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

507 508
	err = find_fsync_dnodes(sbi, &inode_list);
	if (err)
509 510 511 512 513
		goto out;

	if (list_empty(&inode_list))
		goto out;

H
Haicheng Li 已提交
514
	need_writecp = true;
515

516
	/* step #2: recover data */
517
	err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
518
	if (!err)
519
		f2fs_bug_on(sbi, !list_empty(&inode_list));
520
out:
521
	destroy_fsync_dnodes(&inode_list);
522
	kmem_cache_destroy(fsync_entry_slab);
523

524 525
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
526
			MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
527

528 529 530 531 532
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

H
Haicheng Li 已提交
533
	sbi->por_doing = false;
534 535 536 537 538 539
	if (err) {
		discard_next_dnode(sbi, blkaddr);

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
540 541
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
542
	} else if (need_writecp) {
543 544 545
		struct cp_control cpc = {
			.reason = CP_SYNC,
		};
546
		mutex_unlock(&sbi->cp_mutex);
547
		write_checkpoint(sbi, &cpc);
548 549
	} else {
		mutex_unlock(&sbi->cp_mutex);
550
	}
551
	return err;
552
}