recovery.c 14.7 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/recovery.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include "f2fs.h"
#include "node.h"
#include "segment.h"

17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
/*
 * Roll forward recovery scenarios.
 *
 * [Term] F: fsync_mark, D: dentry_mark
 *
 * 1. inode(x) | CP | inode(x) | dnode(F)
 * -> Update the latest inode(x).
 *
 * 2. inode(x) | CP | inode(F) | dnode(F)
 * -> No problem.
 *
 * 3. inode(x) | CP | dnode(F) | inode(x)
 * -> Recover to the latest dnode(F), and drop the last inode(x)
 *
 * 4. inode(x) | CP | dnode(F) | inode(F)
 * -> No problem.
 *
 * 5. CP | inode(x) | dnode(F)
 * -> The inode(DF) was missing. Should drop this dnode(F).
 *
 * 6. CP | inode(DF) | dnode(F)
 * -> No problem.
 *
 * 7. CP | dnode(F) | inode(DF)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *
 * 8. CP | dnode(F) | inode(x)
 * -> If f2fs_iget fails, then goto next to find inode(DF).
 *    But it will fail due to no inode(DF).
 */

48 49 50 51
static struct kmem_cache *fsync_entry_slab;

bool space_for_roll_forward(struct f2fs_sb_info *sbi)
{
52 53 54
	s64 nalloc = percpu_counter_sum_positive(&sbi->alloc_valid_block_count);

	if (sbi->last_valid_block_count + nalloc > sbi->user_block_count)
55 56 57 58 59 60 61 62 63
		return false;
	return true;
}

static struct fsync_inode_entry *get_fsync_inode(struct list_head *head,
								nid_t ino)
{
	struct fsync_inode_entry *entry;

64
	list_for_each_entry(entry, head, list)
65 66
		if (entry->inode->i_ino == ino)
			return entry;
67

68 69 70
	return NULL;
}

71 72
static struct fsync_inode_entry *add_fsync_inode(struct f2fs_sb_info *sbi,
					struct list_head *head, nid_t ino)
73
{
74
	struct inode *inode;
75 76
	struct fsync_inode_entry *entry;

77
	inode = f2fs_iget_retry(sbi->sb, ino);
78 79 80
	if (IS_ERR(inode))
		return ERR_CAST(inode);

81
	entry = f2fs_kmem_cache_alloc(fsync_entry_slab, GFP_F2FS_ZERO);
82 83 84 85 86 87 88 89 90 91 92 93 94
	entry->inode = inode;
	list_add_tail(&entry->list, head);

	return entry;
}

static void del_fsync_inode(struct fsync_inode_entry *entry)
{
	iput(entry->inode);
	list_del(&entry->list);
	kmem_cache_free(fsync_entry_slab, entry);
}

C
Chao Yu 已提交
95 96
static int recover_dentry(struct inode *inode, struct page *ipage,
						struct list_head *dir_list)
97
{
98
	struct f2fs_inode *raw_inode = F2FS_INODE(ipage);
99
	nid_t pino = le32_to_cpu(raw_inode->i_pino);
J
Jaegeuk Kim 已提交
100
	struct f2fs_dir_entry *de;
101
	struct fscrypt_name fname;
102
	struct page *page;
J
Jaegeuk Kim 已提交
103
	struct inode *dir, *einode;
C
Chao Yu 已提交
104
	struct fsync_inode_entry *entry;
105
	int err = 0;
106
	char *name;
107

C
Chao Yu 已提交
108 109
	entry = get_fsync_inode(dir_list, pino);
	if (!entry) {
110 111 112 113
		entry = add_fsync_inode(F2FS_I_SB(inode), dir_list, pino);
		if (IS_ERR(entry)) {
			dir = ERR_CAST(entry);
			err = PTR_ERR(entry);
C
Chao Yu 已提交
114 115
			goto out;
		}
116 117
	}

C
Chao Yu 已提交
118 119
	dir = entry->inode;

120 121 122
	memset(&fname, 0, sizeof(struct fscrypt_name));
	fname.disk_name.len = le32_to_cpu(raw_inode->i_namelen);
	fname.disk_name.name = raw_inode->i_name;
123

124
	if (unlikely(fname.disk_name.len > F2FS_NAME_LEN)) {
125 126
		WARN_ON(1);
		err = -ENAMETOOLONG;
C
Chao Yu 已提交
127
		goto out;
128
	}
J
Jaegeuk Kim 已提交
129
retry:
130
	de = __f2fs_find_entry(dir, &fname, &page);
131
	if (de && inode->i_ino == le32_to_cpu(de->ino))
132
		goto out_unmap_put;
133

J
Jaegeuk Kim 已提交
134
	if (de) {
135
		einode = f2fs_iget_retry(inode->i_sb, le32_to_cpu(de->ino));
J
Jaegeuk Kim 已提交
136 137
		if (IS_ERR(einode)) {
			WARN_ON(1);
138 139
			err = PTR_ERR(einode);
			if (err == -ENOENT)
J
Jaegeuk Kim 已提交
140
				err = -EEXIST;
141 142
			goto out_unmap_put;
		}
143
		err = acquire_orphan_inode(F2FS_I_SB(inode));
144 145 146
		if (err) {
			iput(einode);
			goto out_unmap_put;
J
Jaegeuk Kim 已提交
147
		}
148
		f2fs_delete_entry(de, page, dir, einode);
J
Jaegeuk Kim 已提交
149 150
		iput(einode);
		goto retry;
151 152 153
	} else if (IS_ERR(page)) {
		err = PTR_ERR(page);
	} else {
154
		err = __f2fs_do_add_link(dir, &fname, inode,
155
					inode->i_ino, inode->i_mode);
156
	}
157 158
	if (err == -ENOMEM)
		goto retry;
159 160 161
	goto out;

out_unmap_put:
162
	f2fs_dentry_kunmap(dir, page);
163
	f2fs_put_page(page, 0);
164
out:
165 166 167 168
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = raw_inode->i_name;
C
Chris Fries 已提交
169 170
	f2fs_msg(inode->i_sb, KERN_NOTICE,
			"%s: ino = %x, name = %s, dir = %lx, err = %d",
171
			__func__, ino_of_node(ipage), name,
D
Dan Carpenter 已提交
172
			IS_ERR(dir) ? 0 : dir->i_ino, err);
173 174 175
	return err;
}

176
static void recover_inode(struct inode *inode, struct page *page)
177
{
178
	struct f2fs_inode *raw = F2FS_INODE(page);
179
	char *name;
180 181

	inode->i_mode = le16_to_cpu(raw->i_mode);
182
	f2fs_i_size_write(inode, le64_to_cpu(raw->i_size));
C
Chao Yu 已提交
183
	inode->i_atime.tv_sec = le64_to_cpu(raw->i_atime);
184 185
	inode->i_ctime.tv_sec = le64_to_cpu(raw->i_ctime);
	inode->i_mtime.tv_sec = le64_to_cpu(raw->i_mtime);
C
Chao Yu 已提交
186
	inode->i_atime.tv_nsec = le32_to_cpu(raw->i_atime_nsec);
187 188
	inode->i_ctime.tv_nsec = le32_to_cpu(raw->i_ctime_nsec);
	inode->i_mtime.tv_nsec = le32_to_cpu(raw->i_mtime_nsec);
189

190 191 192 193 194
	if (file_enc_name(inode))
		name = "<encrypted>";
	else
		name = F2FS_INODE(page)->i_name;

195
	f2fs_msg(inode->i_sb, KERN_NOTICE, "recover_inode: ino = %x, name = %s",
196
			ino_of_node(page), name);
197 198 199 200 201
}

static int find_fsync_dnodes(struct f2fs_sb_info *sbi, struct list_head *head)
{
	struct curseg_info *curseg;
202
	struct page *page = NULL;
203 204 205 206 207
	block_t blkaddr;
	int err = 0;

	/* get node pages in the current segment */
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
208
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);
209 210 211 212

	while (1) {
		struct fsync_inode_entry *entry;

213
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
214
			return 0;
215

216
		page = get_tmp_page(sbi, blkaddr);
217

218
		if (!is_recoverable_dnode(page))
219
			break;
220 221 222 223 224

		if (!is_fsync_dnode(page))
			goto next;

		entry = get_fsync_inode(head, ino_of_node(page));
225
		if (!entry) {
226
			if (IS_INODE(page) && is_dent_dnode(page)) {
227 228
				err = recover_inode_page(sbi, page);
				if (err)
229
					break;
230 231
			}

232 233 234 235
			/*
			 * CP | dnode(F) | inode(DF)
			 * For this case, we should not give up now.
			 */
236 237 238
			entry = add_fsync_inode(sbi, head, ino_of_node(page));
			if (IS_ERR(entry)) {
				err = PTR_ERR(entry);
239 240
				if (err == -ENOENT) {
					err = 0;
241
					goto next;
242
				}
243
				break;
244 245
			}
		}
J
Jaegeuk Kim 已提交
246 247
		entry->blkaddr = blkaddr;

248 249
		if (IS_INODE(page) && is_dent_dnode(page))
			entry->last_dentry = blkaddr;
250 251 252
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
253
		f2fs_put_page(page, 1);
254 255

		ra_meta_pages_cond(sbi, blkaddr);
256
	}
257
	f2fs_put_page(page, 1);
258 259 260
	return err;
}

261
static void destroy_fsync_dnodes(struct list_head *head)
262
{
263 264
	struct fsync_inode_entry *entry, *tmp;

265 266
	list_for_each_entry_safe(entry, tmp, head, list)
		del_fsync_inode(entry);
267 268
}

269
static int check_index_in_prev_nodes(struct f2fs_sb_info *sbi,
270
			block_t blkaddr, struct dnode_of_data *dn)
271 272 273
{
	struct seg_entry *sentry;
	unsigned int segno = GET_SEGNO(sbi, blkaddr);
J
Jaegeuk Kim 已提交
274
	unsigned short blkoff = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
275
	struct f2fs_summary_block *sum_node;
276
	struct f2fs_summary sum;
J
Jaegeuk Kim 已提交
277
	struct page *sum_page, *node_page;
278
	struct dnode_of_data tdn = *dn;
279
	nid_t ino, nid;
280
	struct inode *inode;
281
	unsigned int offset;
282 283 284 285 286
	block_t bidx;
	int i;

	sentry = get_seg_entry(sbi, segno);
	if (!f2fs_test_bit(blkoff, sentry->cur_valid_map))
287
		return 0;
288 289 290 291 292 293

	/* Get the previous summary */
	for (i = CURSEG_WARM_DATA; i <= CURSEG_COLD_DATA; i++) {
		struct curseg_info *curseg = CURSEG_I(sbi, i);
		if (curseg->segno == segno) {
			sum = curseg->sum_blk->entries[blkoff];
J
Jaegeuk Kim 已提交
294
			goto got_it;
295 296 297
		}
	}

J
Jaegeuk Kim 已提交
298 299 300 301 302
	sum_page = get_sum_page(sbi, segno);
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	sum = sum_node->entries[blkoff];
	f2fs_put_page(sum_page, 1);
got_it:
303 304 305 306
	/* Use the locked dnode page and inode */
	nid = le32_to_cpu(sum.nid);
	if (dn->inode->i_ino == nid) {
		tdn.nid = nid;
307 308
		if (!dn->inode_page_locked)
			lock_page(dn->inode_page);
309
		tdn.node_page = dn->inode_page;
310
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
311
		goto truncate_out;
312
	} else if (dn->nid == nid) {
313
		tdn.ofs_in_node = le16_to_cpu(sum.ofs_in_node);
314
		goto truncate_out;
315 316
	}

317
	/* Get the node page */
318
	node_page = get_node_page(sbi, nid);
319 320
	if (IS_ERR(node_page))
		return PTR_ERR(node_page);
321 322

	offset = ofs_of_node(node_page);
323 324 325
	ino = ino_of_node(node_page);
	f2fs_put_page(node_page, 1);

326 327
	if (ino != dn->inode->i_ino) {
		/* Deallocate previous index in the node page */
328
		inode = f2fs_iget_retry(sbi->sb, ino);
329 330 331 332 333
		if (IS_ERR(inode))
			return PTR_ERR(inode);
	} else {
		inode = dn->inode;
	}
334

335
	bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
336

337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
	/*
	 * if inode page is locked, unlock temporarily, but its reference
	 * count keeps alive.
	 */
	if (ino == dn->inode->i_ino && dn->inode_page_locked)
		unlock_page(dn->inode_page);

	set_new_dnode(&tdn, inode, NULL, NULL, 0);
	if (get_dnode_of_data(&tdn, bidx, LOOKUP_NODE))
		goto out;

	if (tdn.data_blkaddr == blkaddr)
		truncate_data_blocks_range(&tdn, 1);

	f2fs_put_dnode(&tdn);
out:
	if (ino != dn->inode->i_ino)
354
		iput(inode);
355 356 357 358 359 360 361 362 363
	else if (dn->inode_page_locked)
		lock_page(dn->inode_page);
	return 0;

truncate_out:
	if (datablock_addr(tdn.node_page, tdn.ofs_in_node) == blkaddr)
		truncate_data_blocks_range(&tdn, 1);
	if (dn->inode->i_ino == nid && !dn->inode_page_locked)
		unlock_page(dn->inode_page);
364
	return 0;
365 366
}

367
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
368 369 370 371
					struct page *page, block_t blkaddr)
{
	struct dnode_of_data dn;
	struct node_info ni;
372
	unsigned int start, end;
373
	int err = 0, recovered = 0;
374

375 376 377 378
	/* step 1: recover xattr */
	if (IS_INODE(page)) {
		recover_inline_xattr(inode, page);
	} else if (f2fs_has_xattr_block(ofs_of_node(page))) {
379 380 381 382
		/*
		 * Deprecated; xattr blocks should be found from cold log.
		 * But, we should remain this for backward compatibility.
		 */
383
		recover_xattr_data(inode, page, blkaddr);
384
		goto out;
385
	}
386

387 388
	/* step 2: recover inline data */
	if (recover_inline_data(inode, page))
389 390
		goto out;

391
	/* step 3: recover data indices */
392 393
	start = start_bidx_of_node(ofs_of_node(page), inode);
	end = start + ADDRS_PER_PAGE(page, inode);
394 395

	set_new_dnode(&dn, inode, NULL, NULL, 0);
396
retry_dn:
397
	err = get_dnode_of_data(&dn, start, ALLOC_NODE);
398 399 400 401 402
	if (err) {
		if (err == -ENOMEM) {
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			goto retry_dn;
		}
403
		goto out;
404
	}
405

406
	f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
407 408

	get_node_info(sbi, dn.nid, &ni);
409 410
	f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
	f2fs_bug_on(sbi, ofs_of_node(dn.node_page) != ofs_of_node(page));
411

412
	for (; start < end; start++, dn.ofs_in_node++) {
413 414 415 416 417
		block_t src, dest;

		src = datablock_addr(dn.node_page, dn.ofs_in_node);
		dest = datablock_addr(page, dn.ofs_in_node);

418 419 420 421 422 423 424 425 426 427
		/* skip recovering if dest is the same as src */
		if (src == dest)
			continue;

		/* dest is invalid, just invalidate src block */
		if (dest == NULL_ADDR) {
			truncate_data_blocks_range(&dn, 1);
			continue;
		}

428
		if (i_size_read(inode) <= (start << PAGE_SHIFT))
429 430
			f2fs_i_size_write(inode, (start + 1) << PAGE_SHIFT);

431 432 433 434 435 436
		/*
		 * dest is reserved block, invalidate src block
		 * and then reserve one new block in dnode page.
		 */
		if (dest == NEW_ADDR) {
			truncate_data_blocks_range(&dn, 1);
437
			reserve_new_block(&dn);
438 439 440 441 442
			continue;
		}

		/* dest is valid block, try to recover from src to dest */
		if (is_valid_blkaddr(sbi, dest, META_POR)) {
443

444
			if (src == NULL_ADDR) {
445
				err = reserve_new_block(&dn);
446 447 448 449
#ifdef CONFIG_F2FS_FAULT_INJECTION
				while (err)
					err = reserve_new_block(&dn);
#endif
450
				/* We should not get -ENOSPC */
451
				f2fs_bug_on(sbi, err);
452 453
				if (err)
					goto err;
454
			}
455
retry_prev:
456
			/* Check the previous node page having this index */
457
			err = check_index_in_prev_nodes(sbi, dest, &dn);
458 459 460 461 462
			if (err) {
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					goto retry_prev;
				}
463
				goto err;
464
			}
465 466

			/* write dummy data page */
467
			f2fs_replace_block(sbi, &dn, src, dest,
468
						ni.version, false, false);
469
			recovered++;
470 471 472 473 474 475 476
		}
	}

	copy_node_footer(dn.node_page, page);
	fill_node_footer(dn.node_page, dn.nid, ni.ino,
					ofs_of_node(page), false);
	set_page_dirty(dn.node_page);
477
err:
478
	f2fs_put_dnode(&dn);
479
out:
C
Chris Fries 已提交
480 481 482
	f2fs_msg(sbi->sb, KERN_NOTICE,
		"recover_data: ino = %lx, recovered = %d blocks, err = %d",
		inode->i_ino, recovered, err);
483
	return err;
484 485
}

C
Chao Yu 已提交
486 487
static int recover_data(struct f2fs_sb_info *sbi, struct list_head *inode_list,
						struct list_head *dir_list)
488 489
{
	struct curseg_info *curseg;
490
	struct page *page = NULL;
491
	int err = 0;
492 493 494
	block_t blkaddr;

	/* get node pages in the current segment */
C
Chao Yu 已提交
495
	curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
496 497 498 499 500
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

	while (1) {
		struct fsync_inode_entry *entry;

501
		if (!is_valid_blkaddr(sbi, blkaddr, META_POR))
502
			break;
503

504 505
		ra_meta_pages_cond(sbi, blkaddr);

506
		page = get_tmp_page(sbi, blkaddr);
507

508
		if (!is_recoverable_dnode(page)) {
509
			f2fs_put_page(page, 1);
510
			break;
511
		}
512

C
Chao Yu 已提交
513
		entry = get_fsync_inode(inode_list, ino_of_node(page));
514 515
		if (!entry)
			goto next;
516 517 518
		/*
		 * inode(x) | CP | inode(x) | dnode(F)
		 * In this case, we can lose the latest inode(x).
519
		 * So, call recover_inode for the inode update.
520
		 */
521
		if (IS_INODE(page))
522 523
			recover_inode(entry->inode, page);
		if (entry->last_dentry == blkaddr) {
C
Chao Yu 已提交
524
			err = recover_dentry(entry->inode, page, dir_list);
525 526 527 528 529
			if (err) {
				f2fs_put_page(page, 1);
				break;
			}
		}
530
		err = do_recover_data(sbi, entry->inode, page, blkaddr);
531 532
		if (err) {
			f2fs_put_page(page, 1);
533
			break;
534
		}
535

536 537
		if (entry->blkaddr == blkaddr)
			del_fsync_inode(entry);
538 539 540
next:
		/* check next segment */
		blkaddr = next_blkaddr_of_node(page);
541
		f2fs_put_page(page, 1);
542
	}
543 544 545
	if (!err)
		allocate_new_segments(sbi);
	return err;
546 547
}

548
int recover_fsync_data(struct f2fs_sb_info *sbi, bool check_only)
549
{
550
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_WARM_NODE);
551
	struct list_head inode_list;
C
Chao Yu 已提交
552
	struct list_head dir_list;
553
	block_t blkaddr;
554
	int err;
555
	int ret = 0;
H
Haicheng Li 已提交
556
	bool need_writecp = false;
557 558

	fsync_entry_slab = f2fs_kmem_cache_create("f2fs_fsync_inode_entry",
559
			sizeof(struct fsync_inode_entry));
560
	if (!fsync_entry_slab)
561
		return -ENOMEM;
562 563

	INIT_LIST_HEAD(&inode_list);
C
Chao Yu 已提交
564
	INIT_LIST_HEAD(&dir_list);
565

566 567 568
	/* prevent checkpoint */
	mutex_lock(&sbi->cp_mutex);

569 570
	blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

571
	/* step #1: find fsynced inode numbers */
572
	err = find_fsync_dnodes(sbi, &inode_list);
573
	if (err || list_empty(&inode_list))
574 575
		goto out;

576 577
	if (check_only) {
		ret = 1;
578
		goto out;
579
	}
580

H
Haicheng Li 已提交
581
	need_writecp = true;
582

583
	/* step #2: recover data */
C
Chao Yu 已提交
584
	err = recover_data(sbi, &inode_list, &dir_list);
585
	if (!err)
586
		f2fs_bug_on(sbi, !list_empty(&inode_list));
587
out:
588
	destroy_fsync_dnodes(&inode_list);
589

590 591
	/* truncate meta pages to be used by the recovery */
	truncate_inode_pages_range(META_MAPPING(sbi),
592
			(loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
593

594 595 596 597 598
	if (err) {
		truncate_inode_pages_final(NODE_MAPPING(sbi));
		truncate_inode_pages_final(META_MAPPING(sbi));
	}

599
	clear_sbi_flag(sbi, SBI_POR_DOING);
600
	if (err)
601
		set_ckpt_flags(sbi, CP_ERROR_FLAG);
602 603
	mutex_unlock(&sbi->cp_mutex);

604 605 606
	/* let's drop all the directory inodes for clean checkpoint */
	destroy_fsync_dnodes(&dir_list);

607
	if (!err && need_writecp) {
608
		struct cp_control cpc = {
609
			.reason = CP_RECOVERY,
610
		};
C
Chao Yu 已提交
611
		err = write_checkpoint(sbi, &cpc);
612
	}
C
Chao Yu 已提交
613 614

	kmem_cache_destroy(fsync_entry_slab);
615
	return ret ? ret: err;
616
}