recovery.c 13.3 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69
	return NULL;
}

70
static int recover_dentry(struct inode *inode, struct page *ipage)
71
{
72
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
74
	struct f2fs_dir_entry *de;
75
	struct qstr name;
76
	struct page *page;
J
Jaegeuk Kim 已提交
77
	struct inode *dir, *einode;
78 79
	int err = 0;

80 81 82 83 84 85
	dir = f2fs_iget(inode->i_sb, pino);
	if (IS_ERR(dir)) {
		err = PTR_ERR(dir);
		goto out;
	}

86 87
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
88 89 90 91

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
92
		goto out_err;
93
	}
J
Jaegeuk Kim 已提交
94 95
retry:
	de = f2fs_find_entry(dir, &name, &page);
96 97
	if (de && inode->i_ino == le32_to_cpu(de->ino)) {
		clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
98
		goto out_unmap_put;
99
	}
J
Jaegeuk Kim 已提交
100 101 102 103
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
104 105
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
106
				err = -EEXIST;
107 108
			goto out_unmap_put;
		}
109
		err = acquire_orphan_inode(F2FS_I_SB(inode));
110 111 112
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
113 114 115 116
		}
		f2fs_delete_entry(de, page, einode);
		iput(einode);
		goto retry;
117
	}
J
Jaegeuk Kim 已提交
118
	err = __f2fs_add_link(dir, &name, inode);
119 120 121 122 123 124 125 126 127 128
	if (err)
		goto out_err;

	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
		iput(dir);
	} else {
		add_dirty_dir_inode(dir);
		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
	}

129 130 131 132 133
	goto out;

out_unmap_put:
	kunmap(page);
	f2fs_put_page(page, 0);
134 135
out_err:
	iput(dir);
136
out:
C
Chris Fries 已提交
137 138 139
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
D
Dan Carpenter 已提交
140
			IS_ERR(dir) ? 0 : dir->i_ino, err);
141 142 143
	return err;
}

144
static void recover_inode(struct inode *inode, struct page *page)
145
{
146 147 148 149 150 151 152 153 154 155
	struct f2fs_inode *raw = F2FS_INODE(page);

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
156 157

	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
158
			ino_of_node(page), F2FS_INODE(page)->i_name);
159 160 161 162
}

static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
163
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
164
	struct curseg_info *curseg;
165
	struct page *page = NULL;
166 167 168 169 170
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
171
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
172 173 174 175

	while (1) {
		struct fsync_inode_entry *entry;

176 177 178
		if (blkaddr < SM_I(sbi)->main_blkaddr ||
			blkaddr >= (SM_I(sbi)->seg0_blkaddr + TOTAL_BLKS(sbi)))
			return 0;
179

180
		page = get_meta_page_ra(sbi, blkaddr);
181

182
		if (cp_ver != cpver_of_node(page))
183
			break;
184 185 186 187 188 189 190 191 192 193 194

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
		if (entry) {
			if (IS_INODE(page) && is_dent_dnode(page))
				set_inode_flag(F2FS_I(entry->inode),
							FI_INC_LINK);
		} else {
			if (IS_INODE(page) && is_dent_dnode(page)) {
195 196
				err = recover_inode_page(sbi, page);
				if (err)
197
					break;
198 199 200
			}

			/* add this fsync inode to the list */
201
			entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
202 203
			if (!entry) {
				err = -ENOMEM;
204
				break;
205
			}
206 207 208 209
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
210 211 212
			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(entry->inode)) {
				err = PTR_ERR(entry->inode);
213
				kmem_cache_free(fsync_entry_slab, entry);
214 215
				if (err == -ENOENT)
					goto next;
216
				break;
217
			}
218
			list_add_tail(&entry->list, head);
219
		}
J
Jaegeuk Kim 已提交
220 221
		entry->blkaddr = blkaddr;

222 223 224 225 226
		if (IS_INODE(page)) {
			entry->last_inode = blkaddr;
			if (is_dent_dnode(page))
				entry->last_dentry = blkaddr;
		}
227 228 229
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
230
		f2fs_put_page(page, 1);
231
	}
232
	f2fs_put_page(page, 1);
233 234 235
	return err;
}

236
static void destroy_fsync_dnodes(struct list_head *head)
237
{
238 239 240
	struct fsync_inode_entry *entry, *tmp;

	list_for_each_entry_safe(entry, tmp, head, list) {
241 242 243 244 245 246
		iput(entry->inode);
		list_del(&entry->list);
		kmem_cache_free(fsync_entry_slab, entry);
	}
}

247
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
248
			block_t blkaddr, struct dnode_of_data *dn)
249 250 251
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
252
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
253
	struct f2fs_summary_block *sum_node;
254
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
255
	struct page *sum_page, *node_page;
256
	nid_t ino, nid;
257
	struct inode *inode;
258
	unsigned int offset;
259 260 261 262 263
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
264
		return 0;
265 266 267 268 269 270

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
271
			goto got_it;
272 273 274
		}
	}

J
Jaegeuk Kim 已提交
275 276 277 278 279
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
280 281 282 283 284 285
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		struct dnode_of_data tdn = *dn;
		tdn.nid = nid;
		tdn.node_page = dn->inode_page;
286
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
287
		truncate_data_blocks_range(&tdn, 1);
288
		return 0;
289 290
	} else if (dn->nid == nid) {
		struct dnode_of_data tdn = *dn;
291
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
292
		truncate_data_blocks_range(&tdn, 1);
293
		return 0;
294 295
	}

296
	/* Get the node page */
297
	node_page = get_node_page(sbi, nid);
298 299
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
300 301

	offset = ofs_of_node(node_page);
302 303 304
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

305 306 307 308 309 310 311 312
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
313

314
	bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
315
			le16_to_cpu(sum.ofs_in_node);
316

317 318 319 320 321 322 323 324 325 326 327 328
	if (ino != dn->inode->i_ino) {
		truncate_hole(inode, bidx, bidx + 1);
		iput(inode);
	} else {
		struct dnode_of_data tdn;
		set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0);
		if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
			return 0;
		if (tdn.data_blkaddr != NULL_ADDR)
			truncate_data_blocks_range(&tdn, 1);
		f2fs_put_page(tdn.node_page, 1);
	}
329
	return 0;
330 331
}

332
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
333 334
					struct page *page, block_t blkaddr)
{
335
	struct f2fs_inode_info *fi = F2FS_I(inode);
336 337 338 339
	unsigned int start, end;
	struct dnode_of_data dn;
	struct f2fs_summary sum;
	struct node_info ni;
340
	int err = 0, recovered = 0;
341

342 343 344 345 346
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
		recover_xattr_data(inode, page, blkaddr);
347
		goto out;
348
	}
349

350 351
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
352 353
		goto out;

354
	/* step 3: recover data indices */
355
	start = start_bidx_of_node(ofs_of_node(page), fi);
356
	end = start + ADDRS_PER_PAGE(page, fi);
357

358
	f2fs_lock_op(sbi);
359

360
	set_new_dnode(&dn, inode, NULL, NULL, 0);
361

362
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
363
	if (err) {
364
		f2fs_unlock_op(sbi);
365
		goto out;
366
	}
367

368
	f2fs_wait_on_page_writeback(dn.node_page, NODE);
369 370

	get_node_info(sbi, dn.nid, &ni);
371 372
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
373 374 375 376 377 378 379 380 381

	for (; start < end; start++) {
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

		if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
			if (src == NULL_ADDR) {
382
				err = reserve_new_block(&dn);
383
				/* We should not get -ENOSPC */
384
				f2fs_bug_on(sbi, err);
385 386 387
			}

			/* Check the previous node page having this index */
388 389 390
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
391 392 393 394 395 396

			set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);

			/* write dummy data page */
			recover_data_page(sbi, NULL, &sum, src, dest);
			update_extent_cache(dest, &dn);
397
			recovered++;
398 399 400 401 402 403 404 405 406 407 408 409 410
		}
		dn.ofs_in_node++;
	}

	/* write node page in place */
	set_summary(&sum, dn.nid, 0, 0);
	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
411
err:
412
	f2fs_put_dnode(&dn);
413
	f2fs_unlock_op(sbi);
414
out:
C
Chris Fries 已提交
415 416 417
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
418
	return err;
419 420
}

421
static int recover_data(struct f2fs_sb_info *sbi,
422 423
				struct list_head *head, int type)
{
424
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
425
	struct curseg_info *curseg;
426
	struct page *page = NULL;
427
	int err = 0;
428 429 430 431 432 433 434 435 436
	block_t blkaddr;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, type);
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

437 438 439
		if (blkaddr < SM_I(sbi)->main_blkaddr ||
			blkaddr >= (SM_I(sbi)->seg0_blkaddr + TOTAL_BLKS(sbi)))
			break;
440

441
		page = get_meta_page_ra(sbi, blkaddr);
442

443 444
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
445
			break;
446
		}
447 448 449 450

		entry = get_fsync_inode(head, ino_of_node(page));
		if (!entry)
			goto next;
451 452 453
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
454
		 * So, call recover_inode for the inode update.
455
		 */
456 457 458 459 460 461 462 463 464
		if (entry->last_inode == blkaddr)
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
			err = recover_dentry(entry->inode, page);
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
465
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
466 467
		if (err) {
			f2fs_put_page(page, 1);
468
			break;
469
		}
470 471 472 473 474 475 476 477 478

		if (entry->blkaddr == blkaddr) {
			iput(entry->inode);
			list_del(&entry->list);
			kmem_cache_free(fsync_entry_slab, entry);
		}
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
479
		f2fs_put_page(page, 1);
480
	}
481 482 483
	if (!err)
		allocate_new_segments(sbi);
	return err;
484 485
}

486
int recover_fsync_data(struct f2fs_sb_info *sbi)
487
{
488
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
489
	struct list_head inode_list;
490
	block_t blkaddr;
491
	int err;
H
Haicheng Li 已提交
492
	bool need_writecp = false;
493 494

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
495
			sizeof(struct fsync_inode_entry));
496
	if (!fsync_entry_slab)
497
		return -ENOMEM;
498 499 500 501

	INIT_LIST_HEAD(&inode_list);

	/* step #1: find fsynced inode numbers */
H
Haicheng Li 已提交
502
	sbi->por_doing = true;
503

504 505 506
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

507 508
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

509 510
	err = find_fsync_dnodes(sbi, &inode_list);
	if (err)
511 512 513 514 515
		goto out;

	if (list_empty(&inode_list))
		goto out;

H
Haicheng Li 已提交
516
	need_writecp = true;
517

518
	/* step #2: recover data */
519
	err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
520
	if (!err)
521
		f2fs_bug_on(sbi, !list_empty(&inode_list));
522
out:
523
	destroy_fsync_dnodes(&inode_list);
524
	kmem_cache_destroy(fsync_entry_slab);
525

526 527 528 529
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
		SM_I(sbi)->main_blkaddr << PAGE_CACHE_SHIFT, -1);

530 531 532 533 534
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

H
Haicheng Li 已提交
535
	sbi->por_doing = false;
536 537 538 539 540 541
	if (err) {
		discard_next_dnode(sbi, blkaddr);

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
542 543
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
544
	} else if (need_writecp) {
545 546 547
		struct cp_control cpc = {
			.reason = CP_SYNC,
		};
548
		mutex_unlock(&sbi->cp_mutex);
549
		write_checkpoint(sbi, &cpc);
550 551
	} else {
		mutex_unlock(&sbi->cp_mutex);
552
	}
553
	return err;
554
}