recovery.c 13.3 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
	if (sbi->last_valid_block_count + sbi->alloc_valid_block_count
			> sbi->user_block_count)
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

63
	list_for_each_entry(entry, head, list)
64 65
		if (entry->inode->i_ino == ino)
			return entry;
66

67 68 69
	return NULL;
}

70
static int recover_dentry(struct inode *inode, struct page *ipage)
71
{
72
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
73
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
74
	struct f2fs_dir_entry *de;
75
	struct qstr name;
76
	struct page *page;
J
Jaegeuk Kim 已提交
77
	struct inode *dir, *einode;
78 79
	int err = 0;

80 81 82 83 84 85
	dir = f2fs_iget(inode->i_sb, pino);
	if (IS_ERR(dir)) {
		err = PTR_ERR(dir);
		goto out;
	}

86 87
	name.len = le32_to_cpu(raw_inode->i_namelen);
	name.name = raw_inode->i_name;
88 89 90 91

	if (unlikely(name.len > F2FS_NAME_LEN)) {
		WARN_ON(1);
		err = -ENAMETOOLONG;
92
		goto out_err;
93
	}
J
Jaegeuk Kim 已提交
94 95
retry:
	de = f2fs_find_entry(dir, &name, &page);
96 97
	if (de && inode->i_ino == le32_to_cpu(de->ino)) {
		clear_inode_flag(F2FS_I(inode), FI_INC_LINK);
98
		goto out_unmap_put;
99
	}
J
Jaegeuk Kim 已提交
100 101 102 103
	if (de) {
		einode = f2fs_iget(inode->i_sb, le32_to_cpu(de->ino));
		if (IS_ERR(einode)) {
			WARN_ON(1);
104 105
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
106
				err = -EEXIST;
107 108
			goto out_unmap_put;
		}
109
		err = acquire_orphan_inode(F2FS_I_SB(inode));
110 111 112
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
113
		}
114
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
115 116
		iput(einode);
		goto retry;
117
	}
J
Jaegeuk Kim 已提交
118
	err = __f2fs_add_link(dir, &name, inode);
119 120 121 122 123 124 125 126 127 128
	if (err)
		goto out_err;

	if (is_inode_flag_set(F2FS_I(dir), FI_DELAY_IPUT)) {
		iput(dir);
	} else {
		add_dirty_dir_inode(dir);
		set_inode_flag(F2FS_I(dir), FI_DELAY_IPUT);
	}

129 130 131
	goto out;

out_unmap_put:
C
Chao Yu 已提交
132 133
	if (!f2fs_has_inline_dentry(dir))
		kunmap(page);
134
	f2fs_put_page(page, 0);
135 136
out_err:
	iput(dir);
137
out:
C
Chris Fries 已提交
138 139 140
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
			__func__, ino_of_node(ipage), raw_inode->i_name,
D
Dan Carpenter 已提交
141
			IS_ERR(dir) ? 0 : dir->i_ino, err);
142 143 144
	return err;
}

145
static void recover_inode(struct inode *inode, struct page *page)
146
{
147 148 149 150 151 152 153 154 155 156
	struct f2fs_inode *raw = F2FS_INODE(page);

	inode->i_mode = le16_to_cpu(raw->i_mode);
	i_size_write(inode, le64_to_cpu(raw->i_size));
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
157 158

	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
159
			ino_of_node(page), F2FS_INODE(page)->i_name);
160 161 162 163
}

static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
164
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
165
	struct curseg_info *curseg;
166
	struct page *page = NULL;
167 168 169 170 171
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
172
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
173 174 175 176

	while (1) {
		struct fsync_inode_entry *entry;

177
		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
178
			return 0;
179

180
		page = get_meta_page_ra(sbi, blkaddr);
181

182
		if (cp_ver != cpver_of_node(page))
183
			break;
184 185 186 187 188 189 190 191 192 193 194

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
		if (entry) {
			if (IS_INODE(page) && is_dent_dnode(page))
				set_inode_flag(F2FS_I(entry->inode),
							FI_INC_LINK);
		} else {
			if (IS_INODE(page) && is_dent_dnode(page)) {
195 196
				err = recover_inode_page(sbi, page);
				if (err)
197
					break;
198 199 200
			}

			/* add this fsync inode to the list */
201
			entry = kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
202 203
			if (!entry) {
				err = -ENOMEM;
204
				break;
205
			}
206 207 208 209
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
210 211 212
			entry->inode = f2fs_iget(sbi->sb, ino_of_node(page));
			if (IS_ERR(entry->inode)) {
				err = PTR_ERR(entry->inode);
213
				kmem_cache_free(fsync_entry_slab, entry);
214 215
				if (err == -ENOENT)
					goto next;
216
				break;
217
			}
218
			list_add_tail(&entry->list, head);
219
		}
J
Jaegeuk Kim 已提交
220 221
		entry->blkaddr = blkaddr;

222 223 224 225 226
		if (IS_INODE(page)) {
			entry->last_inode = blkaddr;
			if (is_dent_dnode(page))
				entry->last_dentry = blkaddr;
		}
227 228 229
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
230
		f2fs_put_page(page, 1);
231
	}
232
	f2fs_put_page(page, 1);
233 234 235
	return err;
}

236
static void destroy_fsync_dnodes(struct list_head *head)
237
{
238 239 240
	struct fsync_inode_entry *entry, *tmp;

	list_for_each_entry_safe(entry, tmp, head, list) {
241 242 243 244 245 246
		iput(entry->inode);
		list_del(&entry->list);
		kmem_cache_free(fsync_entry_slab, entry);
	}
}

247
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
248
			block_t blkaddr, struct dnode_of_data *dn)
249 250 251
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
252
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
253
	struct f2fs_summary_block *sum_node;
254
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
255
	struct page *sum_page, *node_page;
256
	nid_t ino, nid;
257
	struct inode *inode;
258
	unsigned int offset;
259 260 261 262 263
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
264
		return 0;
265 266 267 268 269 270

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
271
			goto got_it;
272 273 274
		}
	}

J
Jaegeuk Kim 已提交
275 276 277 278 279
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
280 281 282 283 284 285
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		struct dnode_of_data tdn = *dn;
		tdn.nid = nid;
		tdn.node_page = dn->inode_page;
286
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
287
		truncate_data_blocks_range(&tdn, 1);
288
		return 0;
289 290
	} else if (dn->nid == nid) {
		struct dnode_of_data tdn = *dn;
291
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
292
		truncate_data_blocks_range(&tdn, 1);
293
		return 0;
294 295
	}

296
	/* Get the node page */
297
	node_page = get_node_page(sbi, nid);
298 299
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
300 301

	offset = ofs_of_node(node_page);
302 303 304
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

305 306 307 308 309 310 311 312
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
		inode = f2fs_iget(sbi->sb, ino);
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
313

314
	bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
315
			le16_to_cpu(sum.ofs_in_node);
316

317 318 319 320 321 322 323 324 325 326 327 328
	if (ino != dn->inode->i_ino) {
		truncate_hole(inode, bidx, bidx + 1);
		iput(inode);
	} else {
		struct dnode_of_data tdn;
		set_new_dnode(&tdn, inode, dn->inode_page, NULL, 0);
		if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
			return 0;
		if (tdn.data_blkaddr != NULL_ADDR)
			truncate_data_blocks_range(&tdn, 1);
		f2fs_put_page(tdn.node_page, 1);
	}
329
	return 0;
330 331
}

332
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
333 334
					struct page *page, block_t blkaddr)
{
335
	struct f2fs_inode_info *fi = F2FS_I(inode);
336 337 338 339
	unsigned int start, end;
	struct dnode_of_data dn;
	struct f2fs_summary sum;
	struct node_info ni;
340
	int err = 0, recovered = 0;
341

342 343 344 345 346
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
		recover_xattr_data(inode, page, blkaddr);
347
		goto out;
348
	}
349

350 351
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
352 353
		goto out;

354
	/* step 3: recover data indices */
355
	start = start_bidx_of_node(ofs_of_node(page), fi);
356
	end = start + ADDRS_PER_PAGE(page, fi);
357

358
	f2fs_lock_op(sbi);
359

360
	set_new_dnode(&dn, inode, NULL, NULL, 0);
361

362
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
363
	if (err) {
364
		f2fs_unlock_op(sbi);
365
		goto out;
366
	}
367

368
	f2fs_wait_on_page_writeback(dn.node_page, NODE);
369 370

	get_node_info(sbi, dn.nid, &ni);
371 372
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
373 374 375 376 377 378 379 380 381

	for (; start < end; start++) {
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

		if (src != dest && dest != NEW_ADDR && dest != NULL_ADDR) {
			if (src == NULL_ADDR) {
382
				err = reserve_new_block(&dn);
383
				/* We should not get -ENOSPC */
384
				f2fs_bug_on(sbi, err);
385 386 387
			}

			/* Check the previous node page having this index */
388 389 390
			err = check_index_in_prev_nodes(sbi, dest, &dn);
			if (err)
				goto err;
391 392 393 394 395 396

			set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);

			/* write dummy data page */
			recover_data_page(sbi, NULL, &sum, src, dest);
			update_extent_cache(dest, &dn);
397
			recovered++;
398 399 400 401 402 403 404 405 406 407 408 409 410
		}
		dn.ofs_in_node++;
	}

	/* write node page in place */
	set_summary(&sum, dn.nid, 0, 0);
	if (IS_INODE(dn.node_page))
		sync_inode_page(&dn);

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
411
err:
412
	f2fs_put_dnode(&dn);
413
	f2fs_unlock_op(sbi);
414
out:
C
Chris Fries 已提交
415 416 417
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
418
	return err;
419 420
}

421
static int recover_data(struct f2fs_sb_info *sbi,
422 423
				struct list_head *head, int type)
{
424
	unsigned long long cp_ver = cur_cp_version(F2FS_CKPT(sbi));
425
	struct curseg_info *curseg;
426
	struct page *page = NULL;
427
	int err = 0;
428 429 430 431 432 433 434 435 436
	block_t blkaddr;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, type);
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

437
		if (blkaddr < MAIN_BLKADDR(sbi) || blkaddr >= MAX_BLKADDR(sbi))
438
			break;
439

440
		page = get_meta_page_ra(sbi, blkaddr);
441

442 443
		if (cp_ver != cpver_of_node(page)) {
			f2fs_put_page(page, 1);
444
			break;
445
		}
446 447 448 449

		entry = get_fsync_inode(head, ino_of_node(page));
		if (!entry)
			goto next;
450 451 452
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
453
		 * So, call recover_inode for the inode update.
454
		 */
455 456 457 458 459 460 461 462 463
		if (entry->last_inode == blkaddr)
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
			err = recover_dentry(entry->inode, page);
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
464
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
465 466
		if (err) {
			f2fs_put_page(page, 1);
467
			break;
468
		}
469 470 471 472 473 474 475 476 477

		if (entry->blkaddr == blkaddr) {
			iput(entry->inode);
			list_del(&entry->list);
			kmem_cache_free(fsync_entry_slab, entry);
		}
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
478
		f2fs_put_page(page, 1);
479
	}
480 481 482
	if (!err)
		allocate_new_segments(sbi);
	return err;
483 484
}

485
int recover_fsync_data(struct f2fs_sb_info *sbi)
486
{
487
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
488
	struct list_head inode_list;
489
	block_t blkaddr;
490
	int err;
H
Haicheng Li 已提交
491
	bool need_writecp = false;
492 493

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
494
			sizeof(struct fsync_inode_entry));
495
	if (!fsync_entry_slab)
496
		return -ENOMEM;
497 498 499 500

	INIT_LIST_HEAD(&inode_list);

	/* step #1: find fsynced inode numbers */
H
Haicheng Li 已提交
501
	sbi->por_doing = true;
502

503 504 505
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

506 507
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

508 509
	err = find_fsync_dnodes(sbi, &inode_list);
	if (err)
510 511 512 513 514
		goto out;

	if (list_empty(&inode_list))
		goto out;

H
Haicheng Li 已提交
515
	need_writecp = true;
516

517
	/* step #2: recover data */
518
	err = recover_data(sbi, &inode_list, CURSEG_WARM_NODE);
519
	if (!err)
520
		f2fs_bug_on(sbi, !list_empty(&inode_list));
521
out:
522
	destroy_fsync_dnodes(&inode_list);
523
	kmem_cache_destroy(fsync_entry_slab);
524

525 526
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
527
			MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
528

529 530 531 532 533
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

H
Haicheng Li 已提交
534
	sbi->por_doing = false;
535 536 537 538 539 540
	if (err) {
		discard_next_dnode(sbi, blkaddr);

		/* Flush all the NAT/SIT pages */
		while (get_pages(sbi, F2FS_DIRTY_META))
			sync_meta_pages(sbi, META, LONG_MAX);
541 542
		set_ckpt_flags(sbi->ckpt, CP_ERROR_FLAG);
		mutex_unlock(&sbi->cp_mutex);
543
	} else if (need_writecp) {
544 545 546
		struct cp_control cpc = {
			.reason = CP_SYNC,
		};
547
		mutex_unlock(&sbi->cp_mutex);
548
		write_checkpoint(sbi, &cpc);
549 550
	} else {
		mutex_unlock(&sbi->cp_mutex);
551
	}
552
	return err;
553
}