recovery.c 14.6 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
52 53 54
	s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);

	if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
55 56 57 58 59 60 61 62 63
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

64
	list_for_each_entry(entry, head, list)
65 66
		if (entry->inode->i_ino == ino)
			return entry;
67

68 69 70
	return NULL;
}

71 72
static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
					struct list_head *head, nid_t ino)
73
{
74
	struct inode *inode;
75 76
	struct fsync_inode_entry *entry;

77
	inode = f2fs_iget_retry(sbi->sb, ino);
78 79 80
	if (IS_ERR(inode))
		return ERR_CAST(inode);

81
	entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
82 83 84 85 86 87 88 89 90 91 92 93 94
	entry->inode = inode;
	list_add_tail(&entry->list, head);

	return entry;
}

static void del_fsync_inode(struct fsync_inode_entry *entry)
{
	iput(entry->inode);
	list_del(&entry->list);
	kmem_cache_free(fsync_entry_slab, entry);
}

C
Chao Yu 已提交
95 96
static int recover_dentry(struct inode *inode, struct page *ipage,
						struct list_head *dir_list)
97
{
98
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
99
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
100
	struct f2fs_dir_entry *de;
101
	struct fscrypt_name fname;
102
	struct page *page;
J
Jaegeuk Kim 已提交
103
	struct inode *dir, *einode;
C
Chao Yu 已提交
104
	struct fsync_inode_entry *entry;
105
	int err = 0;
106
	char *name;
107

C
Chao Yu 已提交
108 109
	entry = get_fsync_inode(dir_list, pino);
	if (!entry) {
110 111 112 113
		entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino);
		if (IS_ERR(entry)) {
			dir = ERR_CAST(entry);
			err = PTR_ERR(entry);
C
Chao Yu 已提交
114 115
			goto out;
		}
116 117
	}

C
Chao Yu 已提交
118 119
	dir = entry->inode;

120 121 122
	memset(&fname, 0, sizeof(struct fscrypt_name));
	fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
	fname.disk_name.name = raw_inode->i_name;
123

124
	if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
125 126
		WARN_ON(1);
		err = -ENAMETOOLONG;
C
Chao Yu 已提交
127
		goto out;
128
	}
J
Jaegeuk Kim 已提交
129
retry:
130
	de = __f2fs_find_entry(dir, &fname, &page);
131
	if (de && inode->i_ino == le32_to_cpu(de->ino))
132
		goto out_unmap_put;
133

J
Jaegeuk Kim 已提交
134
	if (de) {
135
		einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
J
Jaegeuk Kim 已提交
136 137
		if (IS_ERR(einode)) {
			WARN_ON(1);
138 139
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
140
				err = -EEXIST;
141 142
			goto out_unmap_put;
		}
143
		err = acquire_orphan_inode(F2FS_I_SB(inode));
144 145 146
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
147
		}
148
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
149 150
		iput(einode);
		goto retry;
151 152 153
	} else if (IS_ERR(page)) {
		err = PTR_ERR(page);
	} else {
154
		err = __f2fs_do_add_link(dir, &fname, inode,
155
					inode->i_ino, inode->i_mode);
156
	}
157 158
	if (err == -ENOMEM)
		goto retry;
159 160 161
	goto out;

out_unmap_put:
162
	f2fs_dentry_kunmap(dir, page);
163
	f2fs_put_page(page, 0);
164
out:
165 166 167 168
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = raw_inode->i_name;
C
Chris Fries 已提交
169 170
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
171
			__func__, ino_of_node(ipage), name,
D
Dan Carpenter 已提交
172
			IS_ERR(dir) ? 0 : dir->i_ino, err);
173 174 175
	return err;
}

176
static void recover_inode(struct inode *inode, struct page *page)
177
{
178
	struct f2fs_inode *raw = F2FS_INODE(page);
179
	char *name;
180 181

	inode->i_mode = le16_to_cpu(raw->i_mode);
182
	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
C
Chao Yu 已提交
183
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
184 185
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
C
Chao Yu 已提交
186
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
187 188
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
189

190 191
	F2FS_I(inode)->i_advise = raw->i_advise;

192 193 194 195 196
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = F2FS_INODE(page)->i_name;

197
	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
198
			ino_of_node(page), name);
199 200 201 202 203
}

static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
	struct curseg_info *curseg;
204
	struct page *page = NULL;
205 206 207 208 209
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
210
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
211 212 213 214

	while (1) {
		struct fsync_inode_entry *entry;

215
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
216
			return 0;
217

218
		page = get_tmp_page(sbi, blkaddr);
219

220
		if (!is_recoverable_dnode(page))
221
			break;
222 223 224 225 226

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
227
		if (!entry) {
228
			if (IS_INODE(page) && is_dent_dnode(page)) {
229 230
				err = recover_inode_page(sbi, page);
				if (err)
231
					break;
232 233
			}

234 235 236 237
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
238 239 240
			entry = add_fsync_inode(sbi, head, ino_of_node(page));
			if (IS_ERR(entry)) {
				err = PTR_ERR(entry);
241 242
				if (err == -ENOENT) {
					err = 0;
243
					goto next;
244
				}
245
				break;
246 247
			}
		}
J
Jaegeuk Kim 已提交
248 249
		entry->blkaddr = blkaddr;

250 251
		if (IS_INODE(page) && is_dent_dnode(page))
			entry->last_dentry = blkaddr;
252 253 254
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
255
		f2fs_put_page(page, 1);
256 257

		ra_meta_pages_cond(sbi, blkaddr);
258
	}
259
	f2fs_put_page(page, 1);
260 261 262
	return err;
}

263
static void destroy_fsync_dnodes(struct list_head *head)
264
{
265 266
	struct fsync_inode_entry *entry, *tmp;

267 268
	list_for_each_entry_safe(entry, tmp, head, list)
		del_fsync_inode(entry);
269 270
}

271
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
272
			block_t blkaddr, struct dnode_of_data *dn)
273 274 275
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
276
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
277
	struct f2fs_summary_block *sum_node;
278
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
279
	struct page *sum_page, *node_page;
280
	struct dnode_of_data tdn = *dn;
281
	nid_t ino, nid;
282
	struct inode *inode;
283
	unsigned int offset;
284 285 286 287 288
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
289
		return 0;
290 291 292 293 294 295

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
296
			goto got_it;
297 298 299
		}
	}

J
Jaegeuk Kim 已提交
300 301 302 303 304
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
305 306 307 308
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
309 310
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
311
		tdn.node_page = dn->inode_page;
312
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
313
		goto truncate_out;
314
	} else if (dn->nid == nid) {
315
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
316
		goto truncate_out;
317 318
	}

319
	/* Get the node page */
320
	node_page = get_node_page(sbi, nid);
321 322
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
323 324

	offset = ofs_of_node(node_page);
325 326 327
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

328 329
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
330
		inode = f2fs_iget_retry(sbi->sb, ino);
331 332 333 334 335
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
336

337
	bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
338

339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
356
		iput(inode);
357 358 359 360 361 362 363 364 365
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
366
	return 0;
367 368
}

369
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
370 371 372 373
					struct page *page, block_t blkaddr)
{
	struct dnode_of_data dn;
	struct node_info ni;
374
	unsigned int start, end;
375
	int err = 0, recovered = 0;
376

377 378 379 380
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
381 382 383
		err = recover_xattr_data(inode, page, blkaddr);
		if (!err)
			recovered++;
384
		goto out;
385
	}
386

387 388
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
389 390
		goto out;

391
	/* step 3: recover data indices */
392 393
	start = start_bidx_of_node(ofs_of_node(page), inode);
	end = start + ADDRS_PER_PAGE(page, inode);
394 395

	set_new_dnode(&dn, inode, NULL, NULL, 0);
396
retry_dn:
397
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
398 399 400 401 402
	if (err) {
		if (err == -ENOMEM) {
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			goto retry_dn;
		}
403
		goto out;
404
	}
405

406
	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
407 408

	get_node_info(sbi, dn.nid, &ni);
409 410
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
411

412
	for (; start < end; start++, dn.ofs_in_node++) {
413 414 415 416 417
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

418 419 420 421 422 423 424 425 426 427
		/* skip recovering if dest is the same as src */
		if (src == dest)
			continue;

		/* dest is invalid, just invalidate src block */
		if (dest == NULL_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			continue;
		}

428
		if (!file_keep_isize(inode) &&
429 430 431
			(i_size_read(inode) <= ((loff_t)start << PAGE_SHIFT)))
			f2fs_i_size_write(inode,
				(loff_t)(start + 1) << PAGE_SHIFT);
432

433 434 435 436 437 438
		/*
		 * dest is reserved block, invalidate src block
		 * and then reserve one new block in dnode page.
		 */
		if (dest == NEW_ADDR) {
			truncate_data_blocks_range(&dn, 1);
439
			reserve_new_block(&dn);
440 441 442 443 444
			continue;
		}

		/* dest is valid block, try to recover from src to dest */
		if (is_valid_blkaddr(sbi, dest, META_POR)) {
445

446
			if (src == NULL_ADDR) {
447
				err = reserve_new_block(&dn);
448 449 450 451
#ifdef CONFIG_F2FS_FAULT_INJECTION
				while (err)
					err = reserve_new_block(&dn);
#endif
452
				/* We should not get -ENOSPC */
453
				f2fs_bug_on(sbi, err);
454 455
				if (err)
					goto err;
456
			}
457
retry_prev:
458
			/* Check the previous node page having this index */
459
			err = check_index_in_prev_nodes(sbi, dest, &dn);
460 461 462 463 464
			if (err) {
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					goto retry_prev;
				}
465
				goto err;
466
			}
467 468

			/* write dummy data page */
469
			f2fs_replace_block(sbi, &dn, src, dest,
470
						ni.version, false, false);
471
			recovered++;
472 473 474 475 476 477 478
		}
	}

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
479
err:
480
	f2fs_put_dnode(&dn);
481
out:
C
Chris Fries 已提交
482
	f2fs_msg(sbi->sb, KERN_NOTICE,
483 484 485 486
		"recover_data: ino = %lx (i_size: %s) recovered = %d, err = %d",
		inode->i_ino,
		file_keep_isize(inode) ? "keep" : "recover",
		recovered, err);
487
	return err;
488 489
}

C
Chao Yu 已提交
490 491
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
						struct list_head *dir_list)
492 493
{
	struct curseg_info *curseg;
494
	struct page *page = NULL;
495
	int err = 0;
496 497 498
	block_t blkaddr;

	/* get node pages in the current segment */
C
Chao Yu 已提交
499
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
500 501 502 503 504
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

505
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
506
			break;
507

508 509
		ra_meta_pages_cond(sbi, blkaddr);

510
		page = get_tmp_page(sbi, blkaddr);
511

512
		if (!is_recoverable_dnode(page)) {
513
			f2fs_put_page(page, 1);
514
			break;
515
		}
516

C
Chao Yu 已提交
517
		entry = get_fsync_inode(inode_list, ino_of_node(page));
518 519
		if (!entry)
			goto next;
520 521 522
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
523
		 * So, call recover_inode for the inode update.
524
		 */
525
		if (IS_INODE(page))
526 527
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
C
Chao Yu 已提交
528
			err = recover_dentry(entry->inode, page, dir_list);
529 530 531 532 533
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
534
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
535 536
		if (err) {
			f2fs_put_page(page, 1);
537
			break;
538
		}
539

540 541
		if (entry->blkaddr == blkaddr)
			del_fsync_inode(entry);
542 543 544
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
545
		f2fs_put_page(page, 1);
546
	}
547 548 549
	if (!err)
		allocate_new_segments(sbi);
	return err;
550 551
}

552
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
553 554
{
	struct list_head inode_list;
C
Chao Yu 已提交
555
	struct list_head dir_list;
556
	int err;
557
	int ret = 0;
H
Haicheng Li 已提交
558
	bool need_writecp = false;
559 560

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
561
			sizeof(struct fsync_inode_entry));
562
	if (!fsync_entry_slab)
563
		return -ENOMEM;
564 565

	INIT_LIST_HEAD(&inode_list);
C
Chao Yu 已提交
566
	INIT_LIST_HEAD(&dir_list);
567

568 569 570
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

571
	/* step #1: find fsynced inode numbers */
572
	err = find_fsync_dnodes(sbi, &inode_list);
573
	if (err || list_empty(&inode_list))
574 575
		goto out;

576 577
	if (check_only) {
		ret = 1;
578
		goto out;
579
	}
580

H
Haicheng Li 已提交
581
	need_writecp = true;
582

583
	/* step #2: recover data */
C
Chao Yu 已提交
584
	err = recover_data(sbi, &inode_list, &dir_list);
585
	if (!err)
586
		f2fs_bug_on(sbi, !list_empty(&inode_list));
587
out:
588
	destroy_fsync_dnodes(&inode_list);
589

590 591
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
592
			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
593

594 595 596 597 598
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

599
	clear_sbi_flag(sbi, SBI_POR_DOING);
600
	if (err)
601
		set_ckpt_flags(sbi, CP_ERROR_FLAG);
602 603
	mutex_unlock(&sbi->cp_mutex);

604 605 606
	/* let's drop all the directory inodes for clean checkpoint */
	destroy_fsync_dnodes(&dir_list);

607
	if (!err && need_writecp) {
608
		struct cp_control cpc = {
609
			.reason = CP_RECOVERY,
610
		};
C
Chao Yu 已提交
611
		err = write_checkpoint(sbi, &cpc);
612
	}
C
Chao Yu 已提交
613 614

	kmem_cache_destroy(fsync_entry_slab);
615
	return ret ? ret: err;
616
}