node.c 74.6 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
J
Jaegeuk Kim 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 * fs/f2fs/node.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/mpage.h>
#include <linux/backing-dev.h>
#include <linux/blkdev.h>
#include <linux/pagevec.h>
#include <linux/swap.h>

#include "f2fs.h"
#include "node.h"
#include "segment.h"
22
#include "xattr.h"
J
Jaegeuk Kim 已提交
23
#include "trace.h"
24
#include <trace/events/f2fs.h>
J
Jaegeuk Kim 已提交
25

C
Chao Yu 已提交
26
#define on_f2fs_build_free_nids(nmi) mutex_is_locked(&(nm_i)->build_lock)
27

J
Jaegeuk Kim 已提交
28 29
static struct kmem_cache *nat_entry_slab;
static struct kmem_cache *free_nid_slab;
30
static struct kmem_cache *nat_entry_set_slab;
31
static struct kmem_cache *fsync_node_entry_slab;
J
Jaegeuk Kim 已提交
32

33 34 35
/*
 * Check whether the given nid is within node id range.
 */
C
Chao Yu 已提交
36
int f2fs_check_nid_range(struct f2fs_sb_info *sbi, nid_t nid)
37 38 39 40 41 42 43 44 45 46 47
{
	if (unlikely(nid < F2FS_ROOT_INO(sbi) || nid >= NM_I(sbi)->max_nid)) {
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		f2fs_msg(sbi->sb, KERN_WARNING,
				"%s: out-of-range nid=%x, run fsck to fix.",
				__func__, nid);
		return -EINVAL;
	}
	return 0;
}

C
Chao Yu 已提交
48
bool f2fs_available_free_memory(struct f2fs_sb_info *sbi, int type)
49
{
50
	struct f2fs_nm_info *nm_i = NM_I(sbi);
51
	struct sysinfo val;
52
	unsigned long avail_ram;
53
	unsigned long mem_size = 0;
54
	bool res = false;
55 56

	si_meminfo(&val);
57 58 59 60

	/* only uses low memory */
	avail_ram = val.totalram - val.totalhigh;

61 62 63
	/*
	 * give 25%, 25%, 50%, 50%, 50% memory for each components respectively
	 */
64
	if (type == FREE_NIDS) {
C
Chao Yu 已提交
65
		mem_size = (nm_i->nid_cnt[FREE_NID] *
C
Chao Yu 已提交
66
				sizeof(struct free_nid)) >> PAGE_SHIFT;
67
		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
68
	} else if (type == NAT_ENTRIES) {
69
		mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
70
							PAGE_SHIFT;
71
		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
72 73
		if (excess_cached_nats(sbi))
			res = false;
74 75 76 77 78
	} else if (type == DIRTY_DENTS) {
		if (sbi->sb->s_bdi->wb.dirty_exceeded)
			return false;
		mem_size = get_pages(sbi, F2FS_DIRTY_DENTS);
		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
79 80 81
	} else if (type == INO_ENTRIES) {
		int i;

C
Chao Yu 已提交
82
		for (i = 0; i < MAX_INO_ENTRY; i++)
83 84 85
			mem_size += sbi->im[i].ino_num *
						sizeof(struct ino_entry);
		mem_size >>= PAGE_SHIFT;
86
		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
87
	} else if (type == EXTENT_CACHE) {
88 89
		mem_size = (atomic_read(&sbi->total_ext_tree) *
				sizeof(struct extent_tree) +
90
				atomic_read(&sbi->total_ext_node) *
91
				sizeof(struct extent_node)) >> PAGE_SHIFT;
92
		res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
J
Jaegeuk Kim 已提交
93 94 95 96
	} else if (type == INMEM_PAGES) {
		/* it allows 20% / total_ram for inmemory pages */
		mem_size = get_pages(sbi, F2FS_INMEM_PAGES);
		res = mem_size < (val.totalram / 5);
97
	} else {
98 99
		if (!sbi->sb->s_bdi->wb.dirty_exceeded)
			return true;
100 101
	}
	return res;
102 103
}

J
Jaegeuk Kim 已提交
104 105 106
static void clear_node_page_dirty(struct page *page)
{
	if (PageDirty(page)) {
C
Chao Yu 已提交
107
		f2fs_clear_radix_tree_dirty_tag(page);
J
Jaegeuk Kim 已提交
108
		clear_page_dirty_for_io(page);
109
		dec_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
J
Jaegeuk Kim 已提交
110 111 112 113 114 115
	}
	ClearPageUptodate(page);
}

static struct page *get_current_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
116
	return f2fs_get_meta_page_nofail(sbi, current_nat_addr(sbi, nid));
J
Jaegeuk Kim 已提交
117 118 119 120 121 122 123 124 125 126 127
}

static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
{
	struct page *src_page;
	struct page *dst_page;
	pgoff_t dst_off;
	void *src_addr;
	void *dst_addr;
	struct f2fs_nm_info *nm_i = NM_I(sbi);

128
	dst_off = next_nat_addr(sbi, current_nat_addr(sbi, nid));
J
Jaegeuk Kim 已提交
129 130

	/* get current nat block page with lock */
131
	src_page = get_current_nat_page(sbi, nid);
C
Chao Yu 已提交
132
	dst_page = f2fs_grab_meta_page(sbi, dst_off);
133
	f2fs_bug_on(sbi, PageDirty(src_page));
J
Jaegeuk Kim 已提交
134 135 136

	src_addr = page_address(src_page);
	dst_addr = page_address(dst_page);
137
	memcpy(dst_addr, src_addr, PAGE_SIZE);
J
Jaegeuk Kim 已提交
138 139 140 141 142 143 144 145
	set_page_dirty(dst_page);
	f2fs_put_page(src_page, 1);

	set_to_next_nat(nm_i, nid);

	return dst_page;
}

146 147 148 149 150
static struct nat_entry *__alloc_nat_entry(nid_t nid, bool no_fail)
{
	struct nat_entry *new;

	if (no_fail)
C
Chao Yu 已提交
151
		new = f2fs_kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO);
152
	else
C
Chao Yu 已提交
153
		new = kmem_cache_alloc(nat_entry_slab, GFP_F2FS_ZERO);
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
	if (new) {
		nat_set_nid(new, nid);
		nat_reset_flag(new);
	}
	return new;
}

static void __free_nat_entry(struct nat_entry *e)
{
	kmem_cache_free(nat_entry_slab, e);
}

/* must be locked by nat_tree_lock */
static struct nat_entry *__init_nat_entry(struct f2fs_nm_info *nm_i,
	struct nat_entry *ne, struct f2fs_nat_entry *raw_ne, bool no_fail)
{
	if (no_fail)
		f2fs_radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne);
	else if (radix_tree_insert(&nm_i->nat_root, nat_get_nid(ne), ne))
		return NULL;

	if (raw_ne)
		node_info_from_raw_nat(&ne->ni, raw_ne);
177 178

	spin_lock(&nm_i->nat_list_lock);
179
	list_add_tail(&ne->list, &nm_i->nat_entries);
180 181
	spin_unlock(&nm_i->nat_list_lock);

182 183 184 185
	nm_i->nat_cnt++;
	return ne;
}

J
Jaegeuk Kim 已提交
186 187
static struct nat_entry *__lookup_nat_cache(struct f2fs_nm_info *nm_i, nid_t n)
{
188 189 190 191 192 193 194 195 196 197 198 199 200
	struct nat_entry *ne;

	ne = radix_tree_lookup(&nm_i->nat_root, n);

	/* for recent accessed nat entry, move it to tail of lru list */
	if (ne && !get_nat_flag(ne, IS_DIRTY)) {
		spin_lock(&nm_i->nat_list_lock);
		if (!list_empty(&ne->list))
			list_move_tail(&ne->list, &nm_i->nat_entries);
		spin_unlock(&nm_i->nat_list_lock);
	}

	return ne;
J
Jaegeuk Kim 已提交
201 202 203 204 205 206 207 208 209 210 211 212
}

static unsigned int __gang_lookup_nat_cache(struct f2fs_nm_info *nm_i,
		nid_t start, unsigned int nr, struct nat_entry **ep)
{
	return radix_tree_gang_lookup(&nm_i->nat_root, (void **)ep, start, nr);
}

static void __del_from_nat_cache(struct f2fs_nm_info *nm_i, struct nat_entry *e)
{
	radix_tree_delete(&nm_i->nat_root, nat_get_nid(e));
	nm_i->nat_cnt--;
213
	__free_nat_entry(e);
J
Jaegeuk Kim 已提交
214 215
}

216 217
static struct nat_entry_set *__grab_nat_entry_set(struct f2fs_nm_info *nm_i,
							struct nat_entry *ne)
218 219 220 221 222 223
{
	nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
	struct nat_entry_set *head;

	head = radix_tree_lookup(&nm_i->nat_set_root, set);
	if (!head) {
224
		head = f2fs_kmem_cache_alloc(nat_entry_set_slab, GFP_NOFS);
225 226 227 228 229

		INIT_LIST_HEAD(&head->entry_list);
		INIT_LIST_HEAD(&head->set_list);
		head->set = set;
		head->entry_cnt = 0;
230
		f2fs_radix_tree_insert(&nm_i->nat_set_root, set, head);
231
	}
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253
	return head;
}

static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
						struct nat_entry *ne)
{
	struct nat_entry_set *head;
	bool new_ne = nat_get_blkaddr(ne) == NEW_ADDR;

	if (!new_ne)
		head = __grab_nat_entry_set(nm_i, ne);

	/*
	 * update entry_cnt in below condition:
	 * 1. update NEW_ADDR to valid block address;
	 * 2. update old block address to new one;
	 */
	if (!new_ne && (get_nat_flag(ne, IS_PREALLOC) ||
				!get_nat_flag(ne, IS_DIRTY)))
		head->entry_cnt++;

	set_nat_flag(ne, IS_PREALLOC, new_ne);
254 255 256 257

	if (get_nat_flag(ne, IS_DIRTY))
		goto refresh_list;

258 259
	nm_i->dirty_nat_cnt++;
	set_nat_flag(ne, IS_DIRTY, true);
260
refresh_list:
261
	spin_lock(&nm_i->nat_list_lock);
262
	if (new_ne)
263 264 265
		list_del_init(&ne->list);
	else
		list_move_tail(&ne->list, &head->entry_list);
266
	spin_unlock(&nm_i->nat_list_lock);
267 268 269
}

static void __clear_nat_cache_dirty(struct f2fs_nm_info *nm_i,
270
		struct nat_entry_set *set, struct nat_entry *ne)
271
{
272
	spin_lock(&nm_i->nat_list_lock);
273
	list_move_tail(&ne->list, &nm_i->nat_entries);
274 275
	spin_unlock(&nm_i->nat_list_lock);

276 277 278
	set_nat_flag(ne, IS_DIRTY, false);
	set->entry_cnt--;
	nm_i->dirty_nat_cnt--;
279 280 281 282 283 284 285 286 287
}

static unsigned int __gang_lookup_nat_set(struct f2fs_nm_info *nm_i,
		nid_t start, unsigned int nr, struct nat_entry_set **ep)
{
	return radix_tree_gang_lookup(&nm_i->nat_set_root, (void **)ep,
							start, nr);
}

288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353
bool f2fs_in_warm_node_list(struct f2fs_sb_info *sbi, struct page *page)
{
	return NODE_MAPPING(sbi) == page->mapping &&
			IS_DNODE(page) && is_cold_node(page);
}

void f2fs_init_fsync_node_info(struct f2fs_sb_info *sbi)
{
	spin_lock_init(&sbi->fsync_node_lock);
	INIT_LIST_HEAD(&sbi->fsync_node_list);
	sbi->fsync_seg_id = 0;
	sbi->fsync_node_num = 0;
}

static unsigned int f2fs_add_fsync_node_entry(struct f2fs_sb_info *sbi,
							struct page *page)
{
	struct fsync_node_entry *fn;
	unsigned long flags;
	unsigned int seq_id;

	fn = f2fs_kmem_cache_alloc(fsync_node_entry_slab, GFP_NOFS);

	get_page(page);
	fn->page = page;
	INIT_LIST_HEAD(&fn->list);

	spin_lock_irqsave(&sbi->fsync_node_lock, flags);
	list_add_tail(&fn->list, &sbi->fsync_node_list);
	fn->seq_id = sbi->fsync_seg_id++;
	seq_id = fn->seq_id;
	sbi->fsync_node_num++;
	spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);

	return seq_id;
}

void f2fs_del_fsync_node_entry(struct f2fs_sb_info *sbi, struct page *page)
{
	struct fsync_node_entry *fn;
	unsigned long flags;

	spin_lock_irqsave(&sbi->fsync_node_lock, flags);
	list_for_each_entry(fn, &sbi->fsync_node_list, list) {
		if (fn->page == page) {
			list_del(&fn->list);
			sbi->fsync_node_num--;
			spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
			kmem_cache_free(fsync_node_entry_slab, fn);
			put_page(page);
			return;
		}
	}
	spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
	f2fs_bug_on(sbi, 1);
}

void f2fs_reset_fsync_node_info(struct f2fs_sb_info *sbi)
{
	unsigned long flags;

	spin_lock_irqsave(&sbi->fsync_node_lock, flags);
	sbi->fsync_seg_id = 0;
	spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
}

C
Chao Yu 已提交
354
int f2fs_need_dentry_mark(struct f2fs_sb_info *sbi, nid_t nid)
J
Jaegeuk Kim 已提交
355 356 357
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct nat_entry *e;
J
Jaegeuk Kim 已提交
358
	bool need = false;
J
Jaegeuk Kim 已提交
359

360
	down_read(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
361
	e = __lookup_nat_cache(nm_i, nid);
J
Jaegeuk Kim 已提交
362 363 364 365 366
	if (e) {
		if (!get_nat_flag(e, IS_CHECKPOINTED) &&
				!get_nat_flag(e, HAS_FSYNCED_INODE))
			need = true;
	}
367
	up_read(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
368
	return need;
J
Jaegeuk Kim 已提交
369 370
}

C
Chao Yu 已提交
371
bool f2fs_is_checkpointed_node(struct f2fs_sb_info *sbi, nid_t nid)
372 373 374
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct nat_entry *e;
J
Jaegeuk Kim 已提交
375
	bool is_cp = true;
376

377
	down_read(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
378 379 380
	e = __lookup_nat_cache(nm_i, nid);
	if (e && !get_nat_flag(e, IS_CHECKPOINTED))
		is_cp = false;
381
	up_read(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
382
	return is_cp;
383 384
}

C
Chao Yu 已提交
385
bool f2fs_need_inode_block_update(struct f2fs_sb_info *sbi, nid_t ino)
386 387 388
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct nat_entry *e;
389
	bool need_update = true;
390

391
	down_read(&nm_i->nat_tree_lock);
392 393 394 395 396
	e = __lookup_nat_cache(nm_i, ino);
	if (e && get_nat_flag(e, HAS_LAST_FSYNC) &&
			(get_nat_flag(e, IS_CHECKPOINTED) ||
			 get_nat_flag(e, HAS_FSYNCED_INODE)))
		need_update = false;
397
	up_read(&nm_i->nat_tree_lock);
398
	return need_update;
399 400
}

401
/* must be locked by nat_tree_lock */
402
static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
J
Jaegeuk Kim 已提交
403 404
						struct f2fs_nat_entry *ne)
{
405
	struct f2fs_nm_info *nm_i = NM_I(sbi);
406
	struct nat_entry *new, *e;
407

408 409 410 411 412
	new = __alloc_nat_entry(nid, false);
	if (!new)
		return;

	down_write(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
413
	e = __lookup_nat_cache(nm_i, nid);
414 415 416
	if (!e)
		e = __init_nat_entry(nm_i, new, ne, false);
	else
E
Eric Biggers 已提交
417 418 419
		f2fs_bug_on(sbi, nat_get_ino(e) != le32_to_cpu(ne->ino) ||
				nat_get_blkaddr(e) !=
					le32_to_cpu(ne->block_addr) ||
420
				nat_get_version(e) != ne->version);
421 422 423
	up_write(&nm_i->nat_tree_lock);
	if (e != new)
		__free_nat_entry(new);
J
Jaegeuk Kim 已提交
424 425 426
}

static void set_node_addr(struct f2fs_sb_info *sbi, struct node_info *ni,
427
			block_t new_blkaddr, bool fsync_done)
J
Jaegeuk Kim 已提交
428 429 430
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct nat_entry *e;
431
	struct nat_entry *new = __alloc_nat_entry(ni->nid, true);
432

433
	down_write(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
434 435
	e = __lookup_nat_cache(nm_i, ni->nid);
	if (!e) {
436
		e = __init_nat_entry(nm_i, new, NULL, true);
437
		copy_node_info(&e->ni, ni);
438
		f2fs_bug_on(sbi, ni->blk_addr == NEW_ADDR);
J
Jaegeuk Kim 已提交
439 440 441 442 443 444
	} else if (new_blkaddr == NEW_ADDR) {
		/*
		 * when nid is reallocated,
		 * previous nat entry can be remained in nat cache.
		 * So, reinitialize it with new information.
		 */
445
		copy_node_info(&e->ni, ni);
446
		f2fs_bug_on(sbi, ni->blk_addr != NULL_ADDR);
J
Jaegeuk Kim 已提交
447
	}
448 449 450
	/* let's free early to reduce memory consumption */
	if (e != new)
		__free_nat_entry(new);
J
Jaegeuk Kim 已提交
451 452

	/* sanity check */
453 454
	f2fs_bug_on(sbi, nat_get_blkaddr(e) != ni->blk_addr);
	f2fs_bug_on(sbi, nat_get_blkaddr(e) == NULL_ADDR &&
J
Jaegeuk Kim 已提交
455
			new_blkaddr == NULL_ADDR);
456
	f2fs_bug_on(sbi, nat_get_blkaddr(e) == NEW_ADDR &&
J
Jaegeuk Kim 已提交
457
			new_blkaddr == NEW_ADDR);
458
	f2fs_bug_on(sbi, is_valid_data_blkaddr(sbi, nat_get_blkaddr(e)) &&
J
Jaegeuk Kim 已提交
459 460
			new_blkaddr == NEW_ADDR);

A
arter97 已提交
461
	/* increment version no as node is removed */
J
Jaegeuk Kim 已提交
462 463 464 465 466 467 468
	if (nat_get_blkaddr(e) != NEW_ADDR && new_blkaddr == NULL_ADDR) {
		unsigned char version = nat_get_version(e);
		nat_set_version(e, inc_node_version(version));
	}

	/* change address */
	nat_set_blkaddr(e, new_blkaddr);
469
	if (!is_valid_data_blkaddr(sbi, new_blkaddr))
470
		set_nat_flag(e, IS_CHECKPOINTED, false);
J
Jaegeuk Kim 已提交
471
	__set_nat_cache_dirty(nm_i, e);
472 473

	/* update fsync_mark if its inode nat entry is still alive */
474 475
	if (ni->nid != ni->ino)
		e = __lookup_nat_cache(nm_i, ni->ino);
476 477 478 479 480
	if (e) {
		if (fsync_done && ni->nid == ni->ino)
			set_nat_flag(e, HAS_FSYNCED_INODE, true);
		set_nat_flag(e, HAS_LAST_FSYNC, fsync_done);
	}
481
	up_write(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
482 483
}

C
Chao Yu 已提交
484
int f2fs_try_to_free_nats(struct f2fs_sb_info *sbi, int nr_shrink)
J
Jaegeuk Kim 已提交
485 486
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
J
Jaegeuk Kim 已提交
487
	int nr = nr_shrink;
J
Jaegeuk Kim 已提交
488

489 490
	if (!down_write_trylock(&nm_i->nat_tree_lock))
		return 0;
J
Jaegeuk Kim 已提交
491

492 493
	spin_lock(&nm_i->nat_list_lock);
	while (nr_shrink) {
J
Jaegeuk Kim 已提交
494
		struct nat_entry *ne;
495 496 497 498

		if (list_empty(&nm_i->nat_entries))
			break;

J
Jaegeuk Kim 已提交
499 500
		ne = list_first_entry(&nm_i->nat_entries,
					struct nat_entry, list);
501 502 503
		list_del(&ne->list);
		spin_unlock(&nm_i->nat_list_lock);

J
Jaegeuk Kim 已提交
504 505
		__del_from_nat_cache(nm_i, ne);
		nr_shrink--;
506 507

		spin_lock(&nm_i->nat_list_lock);
J
Jaegeuk Kim 已提交
508
	}
509 510
	spin_unlock(&nm_i->nat_list_lock);

511
	up_write(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
512
	return nr - nr_shrink;
J
Jaegeuk Kim 已提交
513 514
}

J
Jaegeuk Kim 已提交
515
/*
A
arter97 已提交
516
 * This function always returns success
J
Jaegeuk Kim 已提交
517
 */
518
int f2fs_get_node_info(struct f2fs_sb_info *sbi, nid_t nid,
C
Chao Yu 已提交
519
						struct node_info *ni)
J
Jaegeuk Kim 已提交
520 521 522
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
523
	struct f2fs_journal *journal = curseg->journal;
J
Jaegeuk Kim 已提交
524 525 526 527 528
	nid_t start_nid = START_NID(nid);
	struct f2fs_nat_block *nat_blk;
	struct page *page = NULL;
	struct f2fs_nat_entry ne;
	struct nat_entry *e;
529
	pgoff_t index;
J
Jaegeuk Kim 已提交
530 531 532 533 534
	int i;

	ni->nid = nid;

	/* Check nat cache */
535
	down_read(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
536 537 538 539 540
	e = __lookup_nat_cache(nm_i, nid);
	if (e) {
		ni->ino = nat_get_ino(e);
		ni->blk_addr = nat_get_blkaddr(e);
		ni->version = nat_get_version(e);
541
		up_read(&nm_i->nat_tree_lock);
542
		return 0;
543
	}
J
Jaegeuk Kim 已提交
544

545 546
	memset(&ne, 0, sizeof(struct f2fs_nat_entry));

J
Jaegeuk Kim 已提交
547
	/* Check current segment summary */
548
	down_read(&curseg->journal_rwsem);
C
Chao Yu 已提交
549
	i = f2fs_lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
J
Jaegeuk Kim 已提交
550
	if (i >= 0) {
551
		ne = nat_in_journal(journal, i);
J
Jaegeuk Kim 已提交
552 553
		node_info_from_raw_nat(ni, &ne);
	}
554
	up_read(&curseg->journal_rwsem);
555 556
	if (i >= 0) {
		up_read(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
557
		goto cache;
558
	}
J
Jaegeuk Kim 已提交
559 560

	/* Fill node_info from nat page */
561 562 563
	index = current_nat_addr(sbi, nid);
	up_read(&nm_i->nat_tree_lock);

C
Chao Yu 已提交
564
	page = f2fs_get_meta_page(sbi, index);
565 566 567
	if (IS_ERR(page))
		return PTR_ERR(page);

J
Jaegeuk Kim 已提交
568 569 570 571 572 573
	nat_blk = (struct f2fs_nat_block *)page_address(page);
	ne = nat_blk->entries[nid - start_nid];
	node_info_from_raw_nat(ni, &ne);
	f2fs_put_page(page, 1);
cache:
	/* cache nat entry */
574
	cache_nat_entry(sbi, nid, &ne);
575
	return 0;
J
Jaegeuk Kim 已提交
576 577
}

578 579 580
/*
 * readahead MAX_RA_NODE number of node pages.
 */
C
Chao Yu 已提交
581
static void f2fs_ra_node_pages(struct page *parent, int start, int n)
582 583 584 585 586 587 588 589 590 591 592 593 594
{
	struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
	struct blk_plug plug;
	int i, end;
	nid_t nid;

	blk_start_plug(&plug);

	/* Then, try readahead for siblings of the desired node */
	end = start + n;
	end = min(end, NIDS_PER_BLOCK);
	for (i = start; i < end; i++) {
		nid = get_nid(parent, i, false);
C
Chao Yu 已提交
595
		f2fs_ra_node_page(sbi, nid);
596 597 598 599 600
	}

	blk_finish_plug(&plug);
}

C
Chao Yu 已提交
601
pgoff_t f2fs_get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
{
	const long direct_index = ADDRS_PER_INODE(dn->inode);
	const long direct_blks = ADDRS_PER_BLOCK;
	const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
	unsigned int skipped_unit = ADDRS_PER_BLOCK;
	int cur_level = dn->cur_level;
	int max_level = dn->max_level;
	pgoff_t base = 0;

	if (!dn->max_level)
		return pgofs + 1;

	while (max_level-- > cur_level)
		skipped_unit *= NIDS_PER_BLOCK;

	switch (dn->max_level) {
	case 3:
		base += 2 * indirect_blks;
	case 2:
		base += 2 * direct_blks;
	case 1:
		base += direct_index;
		break;
	default:
		f2fs_bug_on(F2FS_I_SB(dn->inode), 1);
	}

	return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base;
}

J
Jaegeuk Kim 已提交
632
/*
J
Jaegeuk Kim 已提交
633 634 635
 * The maximum depth is four.
 * Offset[0] will have raw inode offset.
 */
636
static int get_node_path(struct inode *inode, long block,
637
				int offset[4], unsigned int noffset[4])
J
Jaegeuk Kim 已提交
638
{
639
	const long direct_index = ADDRS_PER_INODE(inode);
J
Jaegeuk Kim 已提交
640 641 642 643 644 645 646 647 648 649
	const long direct_blks = ADDRS_PER_BLOCK;
	const long dptrs_per_blk = NIDS_PER_BLOCK;
	const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
	const long dindirect_blks = indirect_blks * NIDS_PER_BLOCK;
	int n = 0;
	int level = 0;

	noffset[0] = 0;

	if (block < direct_index) {
650
		offset[n] = block;
J
Jaegeuk Kim 已提交
651 652 653 654 655 656
		goto got;
	}
	block -= direct_index;
	if (block < direct_blks) {
		offset[n++] = NODE_DIR1_BLOCK;
		noffset[n] = 1;
657
		offset[n] = block;
J
Jaegeuk Kim 已提交
658 659 660 661 662 663 664
		level = 1;
		goto got;
	}
	block -= direct_blks;
	if (block < direct_blks) {
		offset[n++] = NODE_DIR2_BLOCK;
		noffset[n] = 2;
665
		offset[n] = block;
J
Jaegeuk Kim 已提交
666 667 668 669 670 671 672 673 674
		level = 1;
		goto got;
	}
	block -= direct_blks;
	if (block < indirect_blks) {
		offset[n++] = NODE_IND1_BLOCK;
		noffset[n] = 3;
		offset[n++] = block / direct_blks;
		noffset[n] = 4 + offset[n - 1];
675
		offset[n] = block % direct_blks;
J
Jaegeuk Kim 已提交
676 677 678 679 680 681 682 683 684
		level = 2;
		goto got;
	}
	block -= indirect_blks;
	if (block < indirect_blks) {
		offset[n++] = NODE_IND2_BLOCK;
		noffset[n] = 4 + dptrs_per_blk;
		offset[n++] = block / direct_blks;
		noffset[n] = 5 + dptrs_per_blk + offset[n - 1];
685
		offset[n] = block % direct_blks;
J
Jaegeuk Kim 已提交
686 687 688 689 690 691 692 693 694 695 696 697 698 699
		level = 2;
		goto got;
	}
	block -= indirect_blks;
	if (block < dindirect_blks) {
		offset[n++] = NODE_DIND_BLOCK;
		noffset[n] = 5 + (dptrs_per_blk * 2);
		offset[n++] = block / indirect_blks;
		noffset[n] = 6 + (dptrs_per_blk * 2) +
			      offset[n - 1] * (dptrs_per_blk + 1);
		offset[n++] = (block / direct_blks) % dptrs_per_blk;
		noffset[n] = 7 + (dptrs_per_blk * 2) +
			      offset[n - 2] * (dptrs_per_blk + 1) +
			      offset[n - 1];
700
		offset[n] = block % direct_blks;
J
Jaegeuk Kim 已提交
701 702 703
		level = 3;
		goto got;
	} else {
704
		return -E2BIG;
J
Jaegeuk Kim 已提交
705 706 707 708 709 710 711
	}
got:
	return level;
}

/*
 * Caller should call f2fs_put_dnode(dn).
C
Chao Yu 已提交
712 713
 * Also, it should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op() only if ro is not set RDONLY_NODE.
714
 * In the case of RDONLY_NODE, we don't need to care about mutex.
J
Jaegeuk Kim 已提交
715
 */
C
Chao Yu 已提交
716
int f2fs_get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
J
Jaegeuk Kim 已提交
717
{
718
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
J
Jaegeuk Kim 已提交
719
	struct page *npage[4];
720
	struct page *parent = NULL;
J
Jaegeuk Kim 已提交
721 722 723
	int offset[4];
	unsigned int noffset[4];
	nid_t nids[4];
724
	int level, i = 0;
J
Jaegeuk Kim 已提交
725 726
	int err = 0;

727
	level = get_node_path(dn->inode, index, offset, noffset);
728 729
	if (level < 0)
		return level;
J
Jaegeuk Kim 已提交
730 731

	nids[0] = dn->inode->i_ino;
732
	npage[0] = dn->inode_page;
J
Jaegeuk Kim 已提交
733

734
	if (!npage[0]) {
C
Chao Yu 已提交
735
		npage[0] = f2fs_get_node_page(sbi, nids[0]);
736 737 738
		if (IS_ERR(npage[0]))
			return PTR_ERR(npage[0]);
	}
739 740 741

	/* if inline_data is set, should not report any block indices */
	if (f2fs_has_inline_data(dn->inode) && index) {
742
		err = -ENOENT;
743 744 745 746
		f2fs_put_page(npage[0], 1);
		goto release_out;
	}

J
Jaegeuk Kim 已提交
747
	parent = npage[0];
748 749
	if (level != 0)
		nids[1] = get_nid(parent, offset[0], true);
J
Jaegeuk Kim 已提交
750 751 752 753 754 755 756
	dn->inode_page = npage[0];
	dn->inode_page_locked = true;

	/* get indirect or direct nodes */
	for (i = 1; i <= level; i++) {
		bool done = false;

757
		if (!nids[i] && mode == ALLOC_NODE) {
J
Jaegeuk Kim 已提交
758
			/* alloc new node */
C
Chao Yu 已提交
759
			if (!f2fs_alloc_nid(sbi, &(nids[i]))) {
J
Jaegeuk Kim 已提交
760 761 762 763 764
				err = -ENOSPC;
				goto release_pages;
			}

			dn->nid = nids[i];
C
Chao Yu 已提交
765
			npage[i] = f2fs_new_node_page(dn, noffset[i]);
J
Jaegeuk Kim 已提交
766
			if (IS_ERR(npage[i])) {
C
Chao Yu 已提交
767
				f2fs_alloc_nid_failed(sbi, nids[i]);
J
Jaegeuk Kim 已提交
768 769 770 771 772
				err = PTR_ERR(npage[i]);
				goto release_pages;
			}

			set_nid(parent, offset[i - 1], nids[i], i == 1);
C
Chao Yu 已提交
773
			f2fs_alloc_nid_done(sbi, nids[i]);
J
Jaegeuk Kim 已提交
774
			done = true;
775
		} else if (mode == LOOKUP_NODE_RA && i == level && level > 1) {
C
Chao Yu 已提交
776
			npage[i] = f2fs_get_node_page_ra(parent, offset[i - 1]);
J
Jaegeuk Kim 已提交
777 778 779 780 781 782 783 784 785 786 787 788 789 790
			if (IS_ERR(npage[i])) {
				err = PTR_ERR(npage[i]);
				goto release_pages;
			}
			done = true;
		}
		if (i == 1) {
			dn->inode_page_locked = false;
			unlock_page(parent);
		} else {
			f2fs_put_page(parent, 1);
		}

		if (!done) {
C
Chao Yu 已提交
791
			npage[i] = f2fs_get_node_page(sbi, nids[i]);
J
Jaegeuk Kim 已提交
792 793 794 795 796 797 798 799 800 801 802 803 804 805
			if (IS_ERR(npage[i])) {
				err = PTR_ERR(npage[i]);
				f2fs_put_page(npage[0], 0);
				goto release_out;
			}
		}
		if (i < level) {
			parent = npage[i];
			nids[i + 1] = get_nid(parent, offset[i], false);
		}
	}
	dn->nid = nids[level];
	dn->ofs_in_node = offset[level];
	dn->node_page = npage[level];
806 807
	dn->data_blkaddr = datablock_addr(dn->inode,
				dn->node_page, dn->ofs_in_node);
J
Jaegeuk Kim 已提交
808 809 810 811 812 813 814 815 816
	return 0;

release_pages:
	f2fs_put_page(parent, 1);
	if (i > 1)
		f2fs_put_page(npage[0], 0);
release_out:
	dn->inode_page = NULL;
	dn->node_page = NULL;
817 818 819
	if (err == -ENOENT) {
		dn->cur_level = i;
		dn->max_level = level;
820
		dn->ofs_in_node = offset[level];
821
	}
J
Jaegeuk Kim 已提交
822 823 824
	return err;
}

825
static int truncate_node(struct dnode_of_data *dn)
J
Jaegeuk Kim 已提交
826
{
827
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
J
Jaegeuk Kim 已提交
828
	struct node_info ni;
829
	int err;
J
Jaegeuk Kim 已提交
830

831 832 833
	err = f2fs_get_node_info(sbi, dn->nid, &ni);
	if (err)
		return err;
J
Jaegeuk Kim 已提交
834 835

	/* Deallocate node address */
C
Chao Yu 已提交
836
	f2fs_invalidate_blocks(sbi, ni.blk_addr);
837
	dec_valid_node_count(sbi, dn->inode, dn->nid == dn->inode->i_ino);
838
	set_node_addr(sbi, &ni, NULL_ADDR, false);
J
Jaegeuk Kim 已提交
839 840

	if (dn->nid == dn->inode->i_ino) {
C
Chao Yu 已提交
841
		f2fs_remove_orphan_inode(sbi, dn->nid);
J
Jaegeuk Kim 已提交
842
		dec_valid_inode_count(sbi);
843
		f2fs_inode_synced(dn->inode);
J
Jaegeuk Kim 已提交
844
	}
845

J
Jaegeuk Kim 已提交
846
	clear_node_page_dirty(dn->node_page);
847
	set_sbi_flag(sbi, SBI_IS_DIRTY);
J
Jaegeuk Kim 已提交
848 849

	f2fs_put_page(dn->node_page, 1);
850 851 852 853

	invalidate_mapping_pages(NODE_MAPPING(sbi),
			dn->node_page->index, dn->node_page->index);

J
Jaegeuk Kim 已提交
854
	dn->node_page = NULL;
855
	trace_f2fs_truncate_node(dn->inode, dn->nid, ni.blk_addr);
856 857

	return 0;
J
Jaegeuk Kim 已提交
858 859 860 861 862
}

static int truncate_dnode(struct dnode_of_data *dn)
{
	struct page *page;
863
	int err;
J
Jaegeuk Kim 已提交
864 865 866 867 868

	if (dn->nid == 0)
		return 1;

	/* get direct node */
C
Chao Yu 已提交
869
	page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
J
Jaegeuk Kim 已提交
870 871 872 873 874 875 876 877
	if (IS_ERR(page) && PTR_ERR(page) == -ENOENT)
		return 1;
	else if (IS_ERR(page))
		return PTR_ERR(page);

	/* Make dnode_of_data for parameter */
	dn->node_page = page;
	dn->ofs_in_node = 0;
C
Chao Yu 已提交
878
	f2fs_truncate_data_blocks(dn);
879 880 881 882
	err = truncate_node(dn);
	if (err)
		return err;

J
Jaegeuk Kim 已提交
883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899
	return 1;
}

static int truncate_nodes(struct dnode_of_data *dn, unsigned int nofs,
						int ofs, int depth)
{
	struct dnode_of_data rdn = *dn;
	struct page *page;
	struct f2fs_node *rn;
	nid_t child_nid;
	unsigned int child_nofs;
	int freed = 0;
	int i, ret;

	if (dn->nid == 0)
		return NIDS_PER_BLOCK + 1;

900 901
	trace_f2fs_truncate_nodes_enter(dn->inode, dn->nid, dn->data_blkaddr);

C
Chao Yu 已提交
902
	page = f2fs_get_node_page(F2FS_I_SB(dn->inode), dn->nid);
903 904
	if (IS_ERR(page)) {
		trace_f2fs_truncate_nodes_exit(dn->inode, PTR_ERR(page));
J
Jaegeuk Kim 已提交
905
		return PTR_ERR(page);
906
	}
J
Jaegeuk Kim 已提交
907

C
Chao Yu 已提交
908
	f2fs_ra_node_pages(page, ofs, NIDS_PER_BLOCK);
909

910
	rn = F2FS_NODE(page);
J
Jaegeuk Kim 已提交
911 912 913 914 915 916 917 918 919
	if (depth < 3) {
		for (i = ofs; i < NIDS_PER_BLOCK; i++, freed++) {
			child_nid = le32_to_cpu(rn->in.nid[i]);
			if (child_nid == 0)
				continue;
			rdn.nid = child_nid;
			ret = truncate_dnode(&rdn);
			if (ret < 0)
				goto out_err;
920 921
			if (set_nid(page, i, 0, false))
				dn->node_changed = true;
J
Jaegeuk Kim 已提交
922 923 924 925 926 927 928 929 930 931 932 933
		}
	} else {
		child_nofs = nofs + ofs * (NIDS_PER_BLOCK + 1) + 1;
		for (i = ofs; i < NIDS_PER_BLOCK; i++) {
			child_nid = le32_to_cpu(rn->in.nid[i]);
			if (child_nid == 0) {
				child_nofs += NIDS_PER_BLOCK + 1;
				continue;
			}
			rdn.nid = child_nid;
			ret = truncate_nodes(&rdn, child_nofs, 0, depth - 1);
			if (ret == (NIDS_PER_BLOCK + 1)) {
934 935
				if (set_nid(page, i, 0, false))
					dn->node_changed = true;
J
Jaegeuk Kim 已提交
936 937 938 939 940 941 942 943 944 945 946
				child_nofs += ret;
			} else if (ret < 0 && ret != -ENOENT) {
				goto out_err;
			}
		}
		freed = child_nofs;
	}

	if (!ofs) {
		/* remove current indirect node */
		dn->node_page = page;
947 948 949
		ret = truncate_node(dn);
		if (ret)
			goto out_err;
J
Jaegeuk Kim 已提交
950 951 952 953
		freed++;
	} else {
		f2fs_put_page(page, 1);
	}
954
	trace_f2fs_truncate_nodes_exit(dn->inode, freed);
J
Jaegeuk Kim 已提交
955 956 957 958
	return freed;

out_err:
	f2fs_put_page(page, 1);
959
	trace_f2fs_truncate_nodes_exit(dn->inode, ret);
J
Jaegeuk Kim 已提交
960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976 977
	return ret;
}

static int truncate_partial_nodes(struct dnode_of_data *dn,
			struct f2fs_inode *ri, int *offset, int depth)
{
	struct page *pages[2];
	nid_t nid[3];
	nid_t child_nid;
	int err = 0;
	int i;
	int idx = depth - 2;

	nid[0] = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
	if (!nid[0])
		return 0;

	/* get indirect nodes in the path */
978
	for (i = 0; i < idx + 1; i++) {
A
arter97 已提交
979
		/* reference count'll be increased */
C
Chao Yu 已提交
980
		pages[i] = f2fs_get_node_page(F2FS_I_SB(dn->inode), nid[i]);
J
Jaegeuk Kim 已提交
981 982
		if (IS_ERR(pages[i])) {
			err = PTR_ERR(pages[i]);
983
			idx = i - 1;
J
Jaegeuk Kim 已提交
984 985 986 987 988
			goto fail;
		}
		nid[i + 1] = get_nid(pages[i], offset[i + 1], false);
	}

C
Chao Yu 已提交
989
	f2fs_ra_node_pages(pages[idx], offset[idx + 1], NIDS_PER_BLOCK);
990

J
Jaegeuk Kim 已提交
991
	/* free direct nodes linked to a partial indirect node */
992
	for (i = offset[idx + 1]; i < NIDS_PER_BLOCK; i++) {
J
Jaegeuk Kim 已提交
993 994 995 996 997 998 999
		child_nid = get_nid(pages[idx], i, false);
		if (!child_nid)
			continue;
		dn->nid = child_nid;
		err = truncate_dnode(dn);
		if (err < 0)
			goto fail;
1000 1001
		if (set_nid(pages[idx], i, 0, false))
			dn->node_changed = true;
J
Jaegeuk Kim 已提交
1002 1003
	}

1004
	if (offset[idx + 1] == 0) {
J
Jaegeuk Kim 已提交
1005 1006
		dn->node_page = pages[idx];
		dn->nid = nid[idx];
1007 1008 1009
		err = truncate_node(dn);
		if (err)
			goto fail;
J
Jaegeuk Kim 已提交
1010 1011 1012 1013
	} else {
		f2fs_put_page(pages[idx], 1);
	}
	offset[idx]++;
1014 1015
	offset[idx + 1] = 0;
	idx--;
J
Jaegeuk Kim 已提交
1016
fail:
1017
	for (i = idx; i >= 0; i--)
J
Jaegeuk Kim 已提交
1018
		f2fs_put_page(pages[i], 1);
1019 1020 1021

	trace_f2fs_truncate_partial_nodes(dn->inode, nid, depth, err);

J
Jaegeuk Kim 已提交
1022 1023 1024
	return err;
}

J
Jaegeuk Kim 已提交
1025
/*
J
Jaegeuk Kim 已提交
1026 1027
 * All the block addresses of data and nodes should be nullified.
 */
C
Chao Yu 已提交
1028
int f2fs_truncate_inode_blocks(struct inode *inode, pgoff_t from)
J
Jaegeuk Kim 已提交
1029
{
1030
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
J
Jaegeuk Kim 已提交
1031 1032
	int err = 0, cont = 1;
	int level, offset[4], noffset[4];
J
Jaegeuk Kim 已提交
1033
	unsigned int nofs = 0;
1034
	struct f2fs_inode *ri;
J
Jaegeuk Kim 已提交
1035 1036 1037
	struct dnode_of_data dn;
	struct page *page;

1038 1039
	trace_f2fs_truncate_inode_blocks_enter(inode, from);

1040
	level = get_node_path(inode, from, offset, noffset);
1041 1042
	if (level < 0)
		return level;
1043

C
Chao Yu 已提交
1044
	page = f2fs_get_node_page(sbi, inode->i_ino);
1045 1046
	if (IS_ERR(page)) {
		trace_f2fs_truncate_inode_blocks_exit(inode, PTR_ERR(page));
J
Jaegeuk Kim 已提交
1047
		return PTR_ERR(page);
1048
	}
J
Jaegeuk Kim 已提交
1049 1050 1051 1052

	set_new_dnode(&dn, inode, page, NULL, 0);
	unlock_page(page);

1053
	ri = F2FS_INODE(page);
J
Jaegeuk Kim 已提交
1054 1055 1056 1057 1058 1059 1060 1061 1062
	switch (level) {
	case 0:
	case 1:
		nofs = noffset[1];
		break;
	case 2:
		nofs = noffset[1];
		if (!offset[level - 1])
			goto skip_partial;
1063
		err = truncate_partial_nodes(&dn, ri, offset, level);
J
Jaegeuk Kim 已提交
1064 1065 1066 1067 1068 1069 1070 1071
		if (err < 0 && err != -ENOENT)
			goto fail;
		nofs += 1 + NIDS_PER_BLOCK;
		break;
	case 3:
		nofs = 5 + 2 * NIDS_PER_BLOCK;
		if (!offset[level - 1])
			goto skip_partial;
1072
		err = truncate_partial_nodes(&dn, ri, offset, level);
J
Jaegeuk Kim 已提交
1073 1074 1075 1076 1077 1078 1079 1080 1081
		if (err < 0 && err != -ENOENT)
			goto fail;
		break;
	default:
		BUG();
	}

skip_partial:
	while (cont) {
1082
		dn.nid = le32_to_cpu(ri->i_nid[offset[0] - NODE_DIR1_BLOCK]);
J
Jaegeuk Kim 已提交
1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104
		switch (offset[0]) {
		case NODE_DIR1_BLOCK:
		case NODE_DIR2_BLOCK:
			err = truncate_dnode(&dn);
			break;

		case NODE_IND1_BLOCK:
		case NODE_IND2_BLOCK:
			err = truncate_nodes(&dn, nofs, offset[1], 2);
			break;

		case NODE_DIND_BLOCK:
			err = truncate_nodes(&dn, nofs, offset[1], 3);
			cont = 0;
			break;

		default:
			BUG();
		}
		if (err < 0 && err != -ENOENT)
			goto fail;
		if (offset[1] == 0 &&
1105
				ri->i_nid[offset[0] - NODE_DIR1_BLOCK]) {
J
Jaegeuk Kim 已提交
1106
			lock_page(page);
1107
			BUG_ON(page->mapping != NODE_MAPPING(sbi));
1108
			f2fs_wait_on_page_writeback(page, NODE, true);
1109
			ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
J
Jaegeuk Kim 已提交
1110 1111 1112 1113 1114 1115 1116 1117 1118
			set_page_dirty(page);
			unlock_page(page);
		}
		offset[1] = 0;
		offset[0]++;
		nofs += err;
	}
fail:
	f2fs_put_page(page, 0);
1119
	trace_f2fs_truncate_inode_blocks_exit(inode, err);
J
Jaegeuk Kim 已提交
1120 1121 1122
	return err > 0 ? 0 : err;
}

1123
/* caller must lock inode page */
C
Chao Yu 已提交
1124
int f2fs_truncate_xattr_node(struct inode *inode)
1125
{
1126
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1127 1128 1129
	nid_t nid = F2FS_I(inode)->i_xattr_nid;
	struct dnode_of_data dn;
	struct page *npage;
1130
	int err;
1131 1132 1133 1134

	if (!nid)
		return 0;

C
Chao Yu 已提交
1135
	npage = f2fs_get_node_page(sbi, nid);
1136 1137 1138
	if (IS_ERR(npage))
		return PTR_ERR(npage);

1139 1140 1141 1142 1143 1144 1145
	set_new_dnode(&dn, inode, NULL, npage, nid);
	err = truncate_node(&dn);
	if (err) {
		f2fs_put_page(npage, 1);
		return err;
	}

1146
	f2fs_i_xnid_write(inode, 0);
J
Jaegeuk Kim 已提交
1147

1148 1149 1150
	return 0;
}

1151
/*
C
Chao Yu 已提交
1152 1153
 * Caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
1154
 */
C
Chao Yu 已提交
1155
int f2fs_remove_inode_page(struct inode *inode)
J
Jaegeuk Kim 已提交
1156 1157
{
	struct dnode_of_data dn;
C
Chao Yu 已提交
1158
	int err;
J
Jaegeuk Kim 已提交
1159

1160
	set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
C
Chao Yu 已提交
1161
	err = f2fs_get_dnode_of_data(&dn, 0, LOOKUP_NODE);
C
Chao Yu 已提交
1162 1163
	if (err)
		return err;
J
Jaegeuk Kim 已提交
1164

C
Chao Yu 已提交
1165
	err = f2fs_truncate_xattr_node(inode);
C
Chao Yu 已提交
1166
	if (err) {
1167
		f2fs_put_dnode(&dn);
C
Chao Yu 已提交
1168
		return err;
J
Jaegeuk Kim 已提交
1169
	}
1170 1171 1172 1173

	/* remove potential inline_data blocks */
	if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
				S_ISLNK(inode->i_mode))
C
Chao Yu 已提交
1174
		f2fs_truncate_data_blocks_range(&dn, 1);
1175

A
arter97 已提交
1176
	/* 0 is possible, after f2fs_new_inode() has failed */
1177 1178 1179 1180
	if (unlikely(f2fs_cp_error(F2FS_I_SB(inode)))) {
		f2fs_put_dnode(&dn);
		return -EIO;
	}
1181
	f2fs_bug_on(F2FS_I_SB(inode),
1182
			inode->i_blocks != 0 && inode->i_blocks != 8);
1183 1184

	/* will put inode & node pages */
1185 1186 1187 1188 1189
	err = truncate_node(&dn);
	if (err) {
		f2fs_put_dnode(&dn);
		return err;
	}
C
Chao Yu 已提交
1190
	return 0;
J
Jaegeuk Kim 已提交
1191 1192
}

C
Chao Yu 已提交
1193
struct page *f2fs_new_inode_page(struct inode *inode)
J
Jaegeuk Kim 已提交
1194 1195 1196 1197 1198
{
	struct dnode_of_data dn;

	/* allocate inode page for new inode */
	set_new_dnode(&dn, inode, NULL, NULL, inode->i_ino);
1199 1200

	/* caller should f2fs_put_page(page, 1); */
C
Chao Yu 已提交
1201
	return f2fs_new_node_page(&dn, 0);
J
Jaegeuk Kim 已提交
1202 1203
}

C
Chao Yu 已提交
1204
struct page *f2fs_new_node_page(struct dnode_of_data *dn, unsigned int ofs)
J
Jaegeuk Kim 已提交
1205
{
1206
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
1207
	struct node_info new_ni;
J
Jaegeuk Kim 已提交
1208 1209 1210
	struct page *page;
	int err;

1211
	if (unlikely(is_inode_flag_set(dn->inode, FI_NO_ALLOC)))
J
Jaegeuk Kim 已提交
1212 1213
		return ERR_PTR(-EPERM);

1214
	page = f2fs_grab_cache_page(NODE_MAPPING(sbi), dn->nid, false);
J
Jaegeuk Kim 已提交
1215 1216 1217
	if (!page)
		return ERR_PTR(-ENOMEM);

C
Chao Yu 已提交
1218
	if (unlikely((err = inc_valid_node_count(sbi, dn->inode, !ofs))))
1219
		goto fail;
C
Chao Yu 已提交
1220

1221
#ifdef CONFIG_F2FS_CHECK_FS
1222 1223 1224 1225 1226
	err = f2fs_get_node_info(sbi, dn->nid, &new_ni);
	if (err) {
		dec_valid_node_count(sbi, dn->inode, !ofs);
		goto fail;
	}
1227 1228 1229
	f2fs_bug_on(sbi, new_ni.blk_addr != NULL_ADDR);
#endif
	new_ni.nid = dn->nid;
J
Jaegeuk Kim 已提交
1230
	new_ni.ino = dn->inode->i_ino;
1231 1232 1233
	new_ni.blk_addr = NULL_ADDR;
	new_ni.flag = 0;
	new_ni.version = 0;
1234
	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
1235

1236
	f2fs_wait_on_page_writeback(page, NODE, true);
1237
	fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
C
Chao Yu 已提交
1238
	set_cold_node(page, S_ISDIR(dn->inode->i_mode));
1239 1240
	if (!PageUptodate(page))
		SetPageUptodate(page);
1241 1242
	if (set_page_dirty(page))
		dn->node_changed = true;
J
Jaegeuk Kim 已提交
1243

1244
	if (f2fs_has_xattr_block(ofs))
1245
		f2fs_i_xnid_write(dn->inode, dn->nid);
1246

J
Jaegeuk Kim 已提交
1247 1248 1249 1250 1251
	if (ofs == 0)
		inc_valid_inode_count(sbi);
	return page;

fail:
1252
	clear_node_page_dirty(page);
J
Jaegeuk Kim 已提交
1253 1254 1255 1256
	f2fs_put_page(page, 1);
	return ERR_PTR(err);
}

1257 1258 1259
/*
 * Caller should do after getting the following values.
 * 0: f2fs_put_page(page, 0)
1260
 * LOCKED_PAGE or error: f2fs_put_page(page, 1)
1261
 */
M
Mike Christie 已提交
1262
static int read_node_page(struct page *page, int op_flags)
J
Jaegeuk Kim 已提交
1263
{
1264
	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
J
Jaegeuk Kim 已提交
1265
	struct node_info ni;
1266
	struct f2fs_io_info fio = {
1267
		.sbi = sbi,
1268
		.type = NODE,
M
Mike Christie 已提交
1269 1270
		.op = REQ_OP_READ,
		.op_flags = op_flags,
1271
		.page = page,
1272
		.encrypted_page = NULL,
1273
	};
1274
	int err;
J
Jaegeuk Kim 已提交
1275

1276 1277 1278 1279
	if (PageUptodate(page)) {
#ifdef CONFIG_F2FS_CHECK_FS
		f2fs_bug_on(sbi, !f2fs_inode_chksum_verify(sbi, page));
#endif
1280
		return LOCKED_PAGE;
1281
	}
1282

1283 1284 1285
	err = f2fs_get_node_info(sbi, page->index, &ni);
	if (err)
		return err;
J
Jaegeuk Kim 已提交
1286

1287 1288
	if (unlikely(ni.blk_addr == NULL_ADDR) ||
			is_sbi_flag_set(sbi, SBI_IS_SHUTDOWN)) {
1289
		ClearPageUptodate(page);
J
Jaegeuk Kim 已提交
1290
		return -ENOENT;
1291 1292
	}

1293
	fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
1294
	return f2fs_submit_page_bio(&fio);
J
Jaegeuk Kim 已提交
1295 1296
}

J
Jaegeuk Kim 已提交
1297
/*
J
Jaegeuk Kim 已提交
1298 1299
 * Readahead a node page
 */
C
Chao Yu 已提交
1300
void f2fs_ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
J
Jaegeuk Kim 已提交
1301 1302
{
	struct page *apage;
1303
	int err;
J
Jaegeuk Kim 已提交
1304

1305 1306
	if (!nid)
		return;
C
Chao Yu 已提交
1307
	if (f2fs_check_nid_range(sbi, nid))
1308
		return;
1309

1310
	rcu_read_lock();
M
Matthew Wilcox 已提交
1311
	apage = radix_tree_lookup(&NODE_MAPPING(sbi)->i_pages, nid);
1312 1313
	rcu_read_unlock();
	if (apage)
1314
		return;
J
Jaegeuk Kim 已提交
1315

1316
	apage = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
J
Jaegeuk Kim 已提交
1317 1318 1319
	if (!apage)
		return;

1320
	err = read_node_page(apage, REQ_RAHEAD);
1321
	f2fs_put_page(apage, err ? 1 : 0);
J
Jaegeuk Kim 已提交
1322 1323
}

J
Jaegeuk Kim 已提交
1324
static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
1325
					struct page *parent, int start)
J
Jaegeuk Kim 已提交
1326 1327
{
	struct page *page;
1328
	int err;
J
Jaegeuk Kim 已提交
1329 1330 1331

	if (!nid)
		return ERR_PTR(-ENOENT);
C
Chao Yu 已提交
1332
	if (f2fs_check_nid_range(sbi, nid))
1333
		return ERR_PTR(-EINVAL);
1334
repeat:
1335
	page = f2fs_grab_cache_page(NODE_MAPPING(sbi), nid, false);
J
Jaegeuk Kim 已提交
1336 1337 1338
	if (!page)
		return ERR_PTR(-ENOMEM);

1339
	err = read_node_page(page, 0);
1340 1341
	if (err < 0) {
		f2fs_put_page(page, 1);
J
Jaegeuk Kim 已提交
1342
		return ERR_PTR(err);
1343
	} else if (err == LOCKED_PAGE) {
1344
		err = 0;
1345
		goto page_hit;
1346
	}
J
Jaegeuk Kim 已提交
1347

1348
	if (parent)
C
Chao Yu 已提交
1349
		f2fs_ra_node_pages(parent, start + 1, MAX_RA_NODE);
1350

J
Jaegeuk Kim 已提交
1351
	lock_page(page);
1352

1353
	if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
1354 1355 1356
		f2fs_put_page(page, 1);
		goto repeat;
	}
1357

1358 1359
	if (unlikely(!PageUptodate(page))) {
		err = -EIO;
1360
		goto out_err;
1361
	}
C
Chao Yu 已提交
1362 1363 1364 1365 1366

	if (!f2fs_inode_chksum_verify(sbi, page)) {
		err = -EBADMSG;
		goto out_err;
	}
1367
page_hit:
1368
	if(unlikely(nid != nid_of_node(page))) {
1369 1370 1371 1372 1373 1374
		f2fs_msg(sbi->sb, KERN_WARNING, "inconsistent node block, "
			"nid:%lu, node_footer[nid:%u,ino:%u,ofs:%u,cpver:%llu,blkaddr:%u]",
			nid, nid_of_node(page), ino_of_node(page),
			ofs_of_node(page), cpver_of_node(page),
			next_blkaddr_of_node(page));
		err = -EINVAL;
1375
out_err:
1376
		ClearPageUptodate(page);
1377
		f2fs_put_page(page, 1);
1378
		return ERR_PTR(err);
1379
	}
J
Jaegeuk Kim 已提交
1380 1381 1382
	return page;
}

C
Chao Yu 已提交
1383
struct page *f2fs_get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid)
1384 1385 1386 1387
{
	return __get_node_page(sbi, nid, NULL, 0);
}

C
Chao Yu 已提交
1388
struct page *f2fs_get_node_page_ra(struct page *parent, int start)
1389 1390 1391 1392 1393 1394 1395
{
	struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
	nid_t nid = get_nid(parent, start, false);

	return __get_node_page(sbi, nid, parent, start);
}

1396 1397 1398 1399
static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
{
	struct inode *inode;
	struct page *page;
1400
	int ret;
1401 1402 1403 1404 1405 1406

	/* should flush inline_data before evict_inode */
	inode = ilookup(sbi->sb, ino);
	if (!inode)
		return;

C
Chao Yu 已提交
1407 1408
	page = f2fs_pagecache_get_page(inode->i_mapping, 0,
					FGP_LOCK|FGP_NOWAIT, 0);
1409 1410 1411 1412 1413 1414 1415 1416 1417 1418 1419 1420
	if (!page)
		goto iput_out;

	if (!PageUptodate(page))
		goto page_out;

	if (!PageDirty(page))
		goto page_out;

	if (!clear_page_dirty_for_io(page))
		goto page_out;

1421 1422
	ret = f2fs_write_inline_data(inode, page);
	inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
1423
	f2fs_remove_dirty_inode(inode);
1424
	if (ret)
1425 1426
		set_page_dirty(page);
page_out:
1427
	f2fs_put_page(page, 1);
1428 1429 1430 1431
iput_out:
	iput(inode);
}

1432 1433
static struct page *last_fsync_dnode(struct f2fs_sb_info *sbi, nid_t ino)
{
J
Jan Kara 已提交
1434
	pgoff_t index;
1435 1436
	struct pagevec pvec;
	struct page *last_page = NULL;
J
Jan Kara 已提交
1437
	int nr_pages;
1438

1439
	pagevec_init(&pvec);
1440
	index = 0;
J
Jan Kara 已提交
1441 1442

	while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1443
				PAGECACHE_TAG_DIRTY))) {
J
Jan Kara 已提交
1444
		int i;
1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];

			if (unlikely(f2fs_cp_error(sbi))) {
				f2fs_put_page(last_page, 0);
				pagevec_release(&pvec);
				return ERR_PTR(-EIO);
			}

			if (!IS_DNODE(page) || !is_cold_node(page))
				continue;
			if (ino_of_node(page) != ino)
				continue;

			lock_page(page);

			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
continue_unlock:
				unlock_page(page);
				continue;
			}
			if (ino_of_node(page) != ino)
				goto continue_unlock;

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

			if (last_page)
				f2fs_put_page(last_page, 0);

			get_page(page);
			last_page = page;
			unlock_page(page);
		}
		pagevec_release(&pvec);
		cond_resched();
	}
	return last_page;
}

1488
static int __write_node_page(struct page *page, bool atomic, bool *submitted,
C
Chao Yu 已提交
1489
				struct writeback_control *wbc, bool do_balance,
1490
				enum iostat_type io_type, unsigned int *seq_id)
1491 1492 1493 1494 1495 1496
{
	struct f2fs_sb_info *sbi = F2FS_P_SB(page);
	nid_t nid;
	struct node_info ni;
	struct f2fs_io_info fio = {
		.sbi = sbi,
C
Chao Yu 已提交
1497
		.ino = ino_of_node(page),
1498 1499 1500 1501 1502
		.type = NODE,
		.op = REQ_OP_WRITE,
		.op_flags = wbc_to_write_flags(wbc),
		.page = page,
		.encrypted_page = NULL,
1503
		.submitted = false,
C
Chao Yu 已提交
1504
		.io_type = io_type,
1505
		.io_wbc = wbc,
1506
	};
1507
	unsigned int seq;
1508 1509 1510

	trace_f2fs_writepage(page, NODE);

1511 1512
	if (unlikely(f2fs_cp_error(sbi)))
		goto redirty_out;
1513

1514 1515 1516
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto redirty_out;

1517 1518 1519 1520
	if (wbc->sync_mode == WB_SYNC_NONE &&
			IS_DNODE(page) && is_cold_node(page))
		goto redirty_out;

1521 1522 1523 1524
	/* get old block addr of this node page */
	nid = nid_of_node(page);
	f2fs_bug_on(sbi, page->index != nid);

1525 1526 1527
	if (f2fs_get_node_info(sbi, nid, &ni))
		goto redirty_out;

1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543
	if (wbc->for_reclaim) {
		if (!down_read_trylock(&sbi->node_write))
			goto redirty_out;
	} else {
		down_read(&sbi->node_write);
	}

	/* This page is already truncated */
	if (unlikely(ni.blk_addr == NULL_ADDR)) {
		ClearPageUptodate(page);
		dec_page_count(sbi, F2FS_DIRTY_NODES);
		up_read(&sbi->node_write);
		unlock_page(page);
		return 0;
	}

1544 1545 1546 1547
	if (__is_valid_data_blkaddr(ni.blk_addr) &&
		!f2fs_is_valid_blkaddr(sbi, ni.blk_addr, DATA_GENERIC))
		goto redirty_out;

1548 1549 1550
	if (atomic && !test_opt(sbi, NOBARRIER))
		fio.op_flags |= REQ_PREFLUSH | REQ_FUA;

1551
	set_page_writeback(page);
J
Jaegeuk Kim 已提交
1552
	ClearPageError(page);
1553 1554 1555 1556 1557 1558 1559

	if (f2fs_in_warm_node_list(sbi, page)) {
		seq = f2fs_add_fsync_node_entry(sbi, page);
		if (seq_id)
			*seq_id = seq;
	}

1560
	fio.old_blkaddr = ni.blk_addr;
C
Chao Yu 已提交
1561
	f2fs_do_write_node_page(nid, &fio);
1562 1563 1564 1565
	set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
	dec_page_count(sbi, F2FS_DIRTY_NODES);
	up_read(&sbi->node_write);

1566
	if (wbc->for_reclaim) {
1567 1568
		f2fs_submit_merged_write_cond(sbi, page->mapping->host, 0,
						page->index, NODE);
1569 1570
		submitted = NULL;
	}
1571 1572 1573

	unlock_page(page);

1574
	if (unlikely(f2fs_cp_error(sbi))) {
1575
		f2fs_submit_merged_write(sbi, NODE);
1576 1577 1578 1579
		submitted = NULL;
	}
	if (submitted)
		*submitted = fio.submitted;
1580

1581 1582
	if (do_balance)
		f2fs_balance_fs(sbi, false);
1583 1584 1585 1586 1587 1588 1589
	return 0;

redirty_out:
	redirty_page_for_writepage(wbc, page);
	return AOP_WRITEPAGE_ACTIVATE;
}

C
Chao Yu 已提交
1590
void f2fs_move_node_page(struct page *node_page, int gc_type)
1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606
{
	if (gc_type == FG_GC) {
		struct writeback_control wbc = {
			.sync_mode = WB_SYNC_ALL,
			.nr_to_write = 1,
			.for_reclaim = 0,
		};

		set_page_dirty(node_page);
		f2fs_wait_on_page_writeback(node_page, NODE, true);

		f2fs_bug_on(F2FS_P_SB(node_page), PageWriteback(node_page));
		if (!clear_page_dirty_for_io(node_page))
			goto out_page;

		if (__write_node_page(node_page, false, NULL,
1607
					&wbc, false, FS_GC_NODE_IO, NULL))
1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620
			unlock_page(node_page);
		goto release_page;
	} else {
		/* set page dirty and write it */
		if (!PageWriteback(node_page))
			set_page_dirty(node_page);
	}
out_page:
	unlock_page(node_page);
release_page:
	f2fs_put_page(node_page, 0);
}

1621 1622 1623
static int f2fs_write_node_page(struct page *page,
				struct writeback_control *wbc)
{
1624 1625
	return __write_node_page(page, false, NULL, wbc, false,
						FS_NODE_IO, NULL);
1626 1627
}

C
Chao Yu 已提交
1628
int f2fs_fsync_node_pages(struct f2fs_sb_info *sbi, struct inode *inode,
1629 1630
			struct writeback_control *wbc, bool atomic,
			unsigned int *seq_id)
J
Jaegeuk Kim 已提交
1631
{
J
Jan Kara 已提交
1632
	pgoff_t index;
1633
	pgoff_t last_idx = ULONG_MAX;
J
Jaegeuk Kim 已提交
1634
	struct pagevec pvec;
1635
	int ret = 0;
1636 1637
	struct page *last_page = NULL;
	bool marked = false;
1638
	nid_t ino = inode->i_ino;
J
Jan Kara 已提交
1639
	int nr_pages;
1640

1641 1642 1643 1644 1645 1646
	if (atomic) {
		last_page = last_fsync_dnode(sbi, ino);
		if (IS_ERR_OR_NULL(last_page))
			return PTR_ERR_OR_ZERO(last_page);
	}
retry:
1647
	pagevec_init(&pvec);
1648
	index = 0;
J
Jan Kara 已提交
1649 1650

	while ((nr_pages = pagevec_lookup_tag(&pvec, NODE_MAPPING(sbi), &index,
1651
				PAGECACHE_TAG_DIRTY))) {
J
Jan Kara 已提交
1652
		int i;
1653 1654 1655

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
1656
			bool submitted = false;
1657 1658

			if (unlikely(f2fs_cp_error(sbi))) {
1659
				f2fs_put_page(last_page, 0);
1660
				pagevec_release(&pvec);
1661 1662
				ret = -EIO;
				goto out;
1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679
			}

			if (!IS_DNODE(page) || !is_cold_node(page))
				continue;
			if (ino_of_node(page) != ino)
				continue;

			lock_page(page);

			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
continue_unlock:
				unlock_page(page);
				continue;
			}
			if (ino_of_node(page) != ino)
				goto continue_unlock;

1680
			if (!PageDirty(page) && page != last_page) {
1681 1682 1683 1684 1685 1686 1687
				/* someone wrote it for us */
				goto continue_unlock;
			}

			f2fs_wait_on_page_writeback(page, NODE, true);
			BUG_ON(PageWriteback(page));

1688 1689 1690
			set_fsync_mark(page, 0);
			set_dentry_mark(page, 0);

1691 1692
			if (!atomic || page == last_page) {
				set_fsync_mark(page, 1);
1693 1694 1695
				if (IS_INODE(page)) {
					if (is_inode_flag_set(inode,
								FI_DIRTY_INODE))
C
Chao Yu 已提交
1696
						f2fs_update_inode(inode, page);
1697
					set_dentry_mark(page,
C
Chao Yu 已提交
1698
						f2fs_need_dentry_mark(sbi, ino));
1699
				}
1700 1701 1702 1703 1704 1705 1706
				/*  may be written by other thread */
				if (!PageDirty(page))
					set_page_dirty(page);
			}

			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;
1707

1708
			ret = __write_node_page(page, atomic &&
1709
						page == last_page,
C
Chao Yu 已提交
1710
						&submitted, wbc, true,
1711
						FS_NODE_IO, seq_id);
1712
			if (ret) {
1713
				unlock_page(page);
1714 1715
				f2fs_put_page(last_page, 0);
				break;
1716
			} else if (submitted) {
1717
				last_idx = page->index;
1718
			}
1719

1720 1721 1722
			if (page == last_page) {
				f2fs_put_page(page, 0);
				marked = true;
1723
				break;
1724
			}
1725 1726 1727 1728
		}
		pagevec_release(&pvec);
		cond_resched();

1729
		if (ret || marked)
1730 1731
			break;
	}
1732 1733 1734 1735 1736
	if (!ret && atomic && !marked) {
		f2fs_msg(sbi->sb, KERN_DEBUG,
			"Retry to write fsync mark: ino=%u, idx=%lx",
					ino, last_page->index);
		lock_page(last_page);
1737
		f2fs_wait_on_page_writeback(last_page, NODE, true);
1738 1739 1740 1741
		set_page_dirty(last_page);
		unlock_page(last_page);
		goto retry;
	}
1742
out:
1743
	if (last_idx != ULONG_MAX)
1744
		f2fs_submit_merged_write_cond(sbi, NULL, ino, last_idx, NODE);
1745
	return ret ? -EIO: 0;
1746 1747
}

C
Chao Yu 已提交
1748 1749
int f2fs_sync_node_pages(struct f2fs_sb_info *sbi,
				struct writeback_control *wbc,
C
Chao Yu 已提交
1750
				bool do_balance, enum iostat_type io_type)
1751
{
J
Jan Kara 已提交
1752
	pgoff_t index;
1753 1754
	struct pagevec pvec;
	int step = 0;
1755
	int nwritten = 0;
1756
	int ret = 0;
1757
	int nr_pages, done = 0;
J
Jaegeuk Kim 已提交
1758

1759
	pagevec_init(&pvec);
J
Jaegeuk Kim 已提交
1760 1761 1762

next_step:
	index = 0;
J
Jan Kara 已提交
1763

1764 1765
	while (!done && (nr_pages = pagevec_lookup_tag(&pvec,
			NODE_MAPPING(sbi), &index, PAGECACHE_TAG_DIRTY))) {
J
Jan Kara 已提交
1766
		int i;
J
Jaegeuk Kim 已提交
1767 1768 1769

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
1770
			bool submitted = false;
J
Jaegeuk Kim 已提交
1771

1772 1773 1774 1775 1776 1777 1778
			/* give a priority to WB_SYNC threads */
			if (atomic_read(&sbi->wb_sync_req[NODE]) &&
					wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}

J
Jaegeuk Kim 已提交
1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792
			/*
			 * flushing sequence with step:
			 * 0. indirect nodes
			 * 1. dentry dnodes
			 * 2. file dnodes
			 */
			if (step == 0 && IS_DNODE(page))
				continue;
			if (step == 1 && (!IS_DNODE(page) ||
						is_cold_node(page)))
				continue;
			if (step == 2 && (!IS_DNODE(page) ||
						!is_cold_node(page)))
				continue;
1793
lock_node:
1794 1795 1796
			if (wbc->sync_mode == WB_SYNC_ALL)
				lock_page(page);
			else if (!trylock_page(page))
J
Jaegeuk Kim 已提交
1797 1798
				continue;

1799
			if (unlikely(page->mapping != NODE_MAPPING(sbi))) {
J
Jaegeuk Kim 已提交
1800 1801 1802 1803 1804 1805 1806 1807 1808 1809
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

1810
			/* flush inline_data */
1811
			if (is_inline_node(page)) {
1812 1813 1814
				clear_inline_node(page);
				unlock_page(page);
				flush_inline_data(sbi, ino_of_node(page));
1815
				goto lock_node;
1816 1817
			}

1818 1819 1820
			f2fs_wait_on_page_writeback(page, NODE, true);

			BUG_ON(PageWriteback(page));
J
Jaegeuk Kim 已提交
1821 1822 1823
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

1824 1825
			set_fsync_mark(page, 0);
			set_dentry_mark(page, 0);
1826

1827
			ret = __write_node_page(page, false, &submitted,
1828
						wbc, do_balance, io_type, NULL);
1829
			if (ret)
1830
				unlock_page(page);
1831
			else if (submitted)
1832
				nwritten++;
J
Jaegeuk Kim 已提交
1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846

			if (--wbc->nr_to_write == 0)
				break;
		}
		pagevec_release(&pvec);
		cond_resched();

		if (wbc->nr_to_write == 0) {
			step = 2;
			break;
		}
	}

	if (step < 2) {
1847 1848
		if (wbc->sync_mode == WB_SYNC_NONE && step == 1)
			goto out;
J
Jaegeuk Kim 已提交
1849 1850 1851
		step++;
		goto next_step;
	}
1852
out:
1853
	if (nwritten)
1854
		f2fs_submit_merged_write(sbi, NODE);
1855 1856 1857

	if (unlikely(f2fs_cp_error(sbi)))
		return -EIO;
1858
	return ret;
J
Jaegeuk Kim 已提交
1859 1860
}

1861 1862
int f2fs_wait_on_node_pages_writeback(struct f2fs_sb_info *sbi,
						unsigned int seq_id)
1863
{
1864 1865 1866 1867 1868
	struct fsync_node_entry *fn;
	struct page *page;
	struct list_head *head = &sbi->fsync_node_list;
	unsigned long flags;
	unsigned int cur_seq_id = 0;
1869
	int ret2, ret = 0;
1870

1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885
	while (seq_id && cur_seq_id < seq_id) {
		spin_lock_irqsave(&sbi->fsync_node_lock, flags);
		if (list_empty(head)) {
			spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
			break;
		}
		fn = list_first_entry(head, struct fsync_node_entry, list);
		if (fn->seq_id > seq_id) {
			spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
			break;
		}
		cur_seq_id = fn->seq_id;
		page = fn->page;
		get_page(page);
		spin_unlock_irqrestore(&sbi->fsync_node_lock, flags);
1886

1887 1888 1889
		f2fs_wait_on_page_writeback(page, NODE, true);
		if (TestClearPageError(page))
			ret = -EIO;
1890

1891
		put_page(page);
1892

1893 1894
		if (ret)
			break;
1895 1896
	}

1897
	ret2 = filemap_check_errors(NODE_MAPPING(sbi));
1898 1899
	if (!ret)
		ret = ret2;
1900

1901 1902 1903
	return ret;
}

J
Jaegeuk Kim 已提交
1904 1905 1906
static int f2fs_write_node_pages(struct address_space *mapping,
			    struct writeback_control *wbc)
{
1907
	struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
1908
	struct blk_plug plug;
1909
	long diff;
J
Jaegeuk Kim 已提交
1910

1911 1912 1913
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

1914 1915
	/* balancing f2fs's metadata in background */
	f2fs_balance_fs_bg(sbi);
J
Jaegeuk Kim 已提交
1916

1917
	/* collect a number of dirty node pages and write together */
1918
	if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
1919
		goto skip_write;
1920

1921 1922 1923 1924 1925
	if (wbc->sync_mode == WB_SYNC_ALL)
		atomic_inc(&sbi->wb_sync_req[NODE]);
	else if (atomic_read(&sbi->wb_sync_req[NODE]))
		goto skip_write;

Y
Yunlei He 已提交
1926 1927
	trace_f2fs_writepages(mapping->host, wbc, NODE);

1928
	diff = nr_pages_to_write(sbi, NODE, wbc);
1929
	blk_start_plug(&plug);
C
Chao Yu 已提交
1930
	f2fs_sync_node_pages(sbi, wbc, true, FS_NODE_IO);
1931
	blk_finish_plug(&plug);
1932
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1933 1934 1935

	if (wbc->sync_mode == WB_SYNC_ALL)
		atomic_dec(&sbi->wb_sync_req[NODE]);
J
Jaegeuk Kim 已提交
1936
	return 0;
1937 1938 1939

skip_write:
	wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
Y
Yunlei He 已提交
1940
	trace_f2fs_writepages(mapping->host, wbc, NODE);
1941
	return 0;
J
Jaegeuk Kim 已提交
1942 1943 1944 1945
}

static int f2fs_set_node_page_dirty(struct page *page)
{
1946 1947
	trace_f2fs_set_page_dirty(page, NODE);

1948 1949
	if (!PageUptodate(page))
		SetPageUptodate(page);
1950 1951 1952 1953
#ifdef CONFIG_F2FS_CHECK_FS
	if (IS_INODE(page))
		f2fs_inode_chksum_set(F2FS_P_SB(page), page);
#endif
J
Jaegeuk Kim 已提交
1954
	if (!PageDirty(page)) {
1955
		__set_page_dirty_nobuffers(page);
1956
		inc_page_count(F2FS_P_SB(page), F2FS_DIRTY_NODES);
J
Jaegeuk Kim 已提交
1957
		SetPagePrivate(page);
J
Jaegeuk Kim 已提交
1958
		f2fs_trace_pid(page);
J
Jaegeuk Kim 已提交
1959 1960 1961 1962 1963
		return 1;
	}
	return 0;
}

J
Jaegeuk Kim 已提交
1964
/*
J
Jaegeuk Kim 已提交
1965 1966 1967 1968 1969 1970
 * Structure of the f2fs node operations
 */
const struct address_space_operations f2fs_node_aops = {
	.writepage	= f2fs_write_node_page,
	.writepages	= f2fs_write_node_pages,
	.set_page_dirty	= f2fs_set_node_page_dirty,
1971 1972
	.invalidatepage	= f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
1973 1974 1975
#ifdef CONFIG_MIGRATION
	.migratepage    = f2fs_migrate_page,
#endif
J
Jaegeuk Kim 已提交
1976 1977
};

1978 1979
static struct free_nid *__lookup_free_nid_list(struct f2fs_nm_info *nm_i,
						nid_t n)
J
Jaegeuk Kim 已提交
1980
{
1981
	return radix_tree_lookup(&nm_i->free_nid_root, n);
J
Jaegeuk Kim 已提交
1982 1983
}

C
Chao Yu 已提交
1984
static int __insert_free_nid(struct f2fs_sb_info *sbi,
F
Fan Li 已提交
1985
			struct free_nid *i, enum nid_state state)
J
Jaegeuk Kim 已提交
1986
{
C
Chao Yu 已提交
1987 1988
	struct f2fs_nm_info *nm_i = NM_I(sbi);

F
Fan Li 已提交
1989 1990 1991
	int err = radix_tree_insert(&nm_i->free_nid_root, i->nid, i);
	if (err)
		return err;
1992

C
Chao Yu 已提交
1993 1994 1995 1996
	f2fs_bug_on(sbi, state != i->state);
	nm_i->nid_cnt[state]++;
	if (state == FREE_NID)
		list_add_tail(&i->list, &nm_i->free_nid_list);
1997
	return 0;
C
Chao Yu 已提交
1998 1999
}

C
Chao Yu 已提交
2000
static void __remove_free_nid(struct f2fs_sb_info *sbi,
F
Fan Li 已提交
2001
			struct free_nid *i, enum nid_state state)
C
Chao Yu 已提交
2002 2003 2004
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);

C
Chao Yu 已提交
2005 2006 2007 2008
	f2fs_bug_on(sbi, state != i->state);
	nm_i->nid_cnt[state]--;
	if (state == FREE_NID)
		list_del(&i->list);
F
Fan Li 已提交
2009 2010 2011 2012 2013
	radix_tree_delete(&nm_i->free_nid_root, i->nid);
}

static void __move_free_nid(struct f2fs_sb_info *sbi, struct free_nid *i,
			enum nid_state org_state, enum nid_state dst_state)
C
Chao Yu 已提交
2014 2015 2016
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);

F
Fan Li 已提交
2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030 2031
	f2fs_bug_on(sbi, org_state != i->state);
	i->state = dst_state;
	nm_i->nid_cnt[org_state]--;
	nm_i->nid_cnt[dst_state]++;

	switch (dst_state) {
	case PREALLOC_NID:
		list_del(&i->list);
		break;
	case FREE_NID:
		list_add_tail(&i->list, &nm_i->free_nid_list);
		break;
	default:
		BUG_ON(1);
	}
J
Jaegeuk Kim 已提交
2032 2033
}

2034 2035 2036 2037 2038 2039 2040 2041 2042 2043 2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057
static void update_free_nid_bitmap(struct f2fs_sb_info *sbi, nid_t nid,
							bool set, bool build)
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	unsigned int nat_ofs = NAT_BLOCK_OFFSET(nid);
	unsigned int nid_ofs = nid - START_NID(nid);

	if (!test_bit_le(nat_ofs, nm_i->nat_block_bitmap))
		return;

	if (set) {
		if (test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
			return;
		__set_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
		nm_i->free_nid_count[nat_ofs]++;
	} else {
		if (!test_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]))
			return;
		__clear_bit_le(nid_ofs, nm_i->free_nid_bitmap[nat_ofs]);
		if (!build)
			nm_i->free_nid_count[nat_ofs]--;
	}
}

C
Chao Yu 已提交
2058
/* return if the nid is recognized as free */
2059 2060
static bool add_free_nid(struct f2fs_sb_info *sbi,
				nid_t nid, bool build, bool update)
J
Jaegeuk Kim 已提交
2061
{
2062
	struct f2fs_nm_info *nm_i = NM_I(sbi);
2063
	struct free_nid *i, *e;
2064
	struct nat_entry *ne;
2065 2066
	int err = -EINVAL;
	bool ret = false;
2067 2068

	/* 0 nid should not be used */
2069
	if (unlikely(nid == 0))
C
Chao Yu 已提交
2070
		return false;
2071

2072
	i = f2fs_kmem_cache_alloc(free_nid_slab, GFP_NOFS);
J
Jaegeuk Kim 已提交
2073
	i->nid = nid;
C
Chao Yu 已提交
2074
	i->state = FREE_NID;
J
Jaegeuk Kim 已提交
2075

2076
	radix_tree_preload(GFP_NOFS | __GFP_NOFAIL);
2077

C
Chao Yu 已提交
2078
	spin_lock(&nm_i->nid_list_lock);
2079 2080 2081 2082 2083 2084

	if (build) {
		/*
		 *   Thread A             Thread B
		 *  - f2fs_create
		 *   - f2fs_new_inode
C
Chao Yu 已提交
2085
		 *    - f2fs_alloc_nid
C
Chao Yu 已提交
2086
		 *     - __insert_nid_to_list(PREALLOC_NID)
2087
		 *                     - f2fs_balance_fs_bg
C
Chao Yu 已提交
2088 2089
		 *                      - f2fs_build_free_nids
		 *                       - __f2fs_build_free_nids
2090 2091 2092 2093
		 *                        - scan_nat_page
		 *                         - add_free_nid
		 *                          - __lookup_nat_cache
		 *  - f2fs_add_link
C
Chao Yu 已提交
2094 2095 2096
		 *   - f2fs_init_inode_metadata
		 *    - f2fs_new_inode_page
		 *     - f2fs_new_node_page
2097
		 *      - set_node_addr
C
Chao Yu 已提交
2098
		 *  - f2fs_alloc_nid_done
C
Chao Yu 已提交
2099 2100
		 *   - __remove_nid_from_list(PREALLOC_NID)
		 *                         - __insert_nid_to_list(FREE_NID)
2101 2102 2103 2104 2105 2106 2107 2108
		 */
		ne = __lookup_nat_cache(nm_i, nid);
		if (ne && (!get_nat_flag(ne, IS_CHECKPOINTED) ||
				nat_get_blkaddr(ne) != NULL_ADDR))
			goto err_out;

		e = __lookup_free_nid_list(nm_i, nid);
		if (e) {
C
Chao Yu 已提交
2109
			if (e->state == FREE_NID)
2110 2111 2112 2113 2114
				ret = true;
			goto err_out;
		}
	}
	ret = true;
F
Fan Li 已提交
2115
	err = __insert_free_nid(sbi, i, FREE_NID);
2116
err_out:
2117 2118 2119 2120 2121
	if (update) {
		update_free_nid_bitmap(sbi, nid, ret, build);
		if (!build)
			nm_i->available_nids++;
	}
2122 2123
	spin_unlock(&nm_i->nid_list_lock);
	radix_tree_preload_end();
2124

2125
	if (err)
J
Jaegeuk Kim 已提交
2126
		kmem_cache_free(free_nid_slab, i);
2127
	return ret;
J
Jaegeuk Kim 已提交
2128 2129
}

C
Chao Yu 已提交
2130
static void remove_free_nid(struct f2fs_sb_info *sbi, nid_t nid)
J
Jaegeuk Kim 已提交
2131
{
C
Chao Yu 已提交
2132
	struct f2fs_nm_info *nm_i = NM_I(sbi);
J
Jaegeuk Kim 已提交
2133
	struct free_nid *i;
2134 2135
	bool need_free = false;

C
Chao Yu 已提交
2136
	spin_lock(&nm_i->nid_list_lock);
2137
	i = __lookup_free_nid_list(nm_i, nid);
C
Chao Yu 已提交
2138
	if (i && i->state == FREE_NID) {
F
Fan Li 已提交
2139
		__remove_free_nid(sbi, i, FREE_NID);
2140
		need_free = true;
J
Jaegeuk Kim 已提交
2141
	}
C
Chao Yu 已提交
2142
	spin_unlock(&nm_i->nid_list_lock);
2143 2144 2145

	if (need_free)
		kmem_cache_free(free_nid_slab, i);
J
Jaegeuk Kim 已提交
2146 2147
}

2148
static int scan_nat_page(struct f2fs_sb_info *sbi,
J
Jaegeuk Kim 已提交
2149 2150
			struct page *nat_page, nid_t start_nid)
{
2151
	struct f2fs_nm_info *nm_i = NM_I(sbi);
J
Jaegeuk Kim 已提交
2152 2153
	struct f2fs_nat_block *nat_blk = page_address(nat_page);
	block_t blk_addr;
C
Chao Yu 已提交
2154
	unsigned int nat_ofs = NAT_BLOCK_OFFSET(start_nid);
J
Jaegeuk Kim 已提交
2155 2156
	int i;

J
Jaegeuk Kim 已提交
2157
	__set_bit_le(nat_ofs, nm_i->nat_block_bitmap);
C
Chao Yu 已提交
2158

J
Jaegeuk Kim 已提交
2159 2160 2161
	i = start_nid % NAT_ENTRY_PER_BLOCK;

	for (; i < NAT_ENTRY_PER_BLOCK; i++, start_nid++) {
2162
		if (unlikely(start_nid >= nm_i->max_nid))
J
Jaegeuk Kim 已提交
2163
			break;
H
Haicheng Li 已提交
2164 2165

		blk_addr = le32_to_cpu(nat_blk->entries[i].block_addr);
2166 2167 2168 2169

		if (blk_addr == NEW_ADDR)
			return -EINVAL;

2170 2171 2172 2173 2174 2175 2176
		if (blk_addr == NULL_ADDR) {
			add_free_nid(sbi, start_nid, true, true);
		} else {
			spin_lock(&NM_I(sbi)->nid_list_lock);
			update_free_nid_bitmap(sbi, start_nid, false, true);
			spin_unlock(&NM_I(sbi)->nid_list_lock);
		}
C
Chao Yu 已提交
2177
	}
2178 2179

	return 0;
C
Chao Yu 已提交
2180 2181
}

2182
static void scan_curseg_cache(struct f2fs_sb_info *sbi)
C
Chao Yu 已提交
2183 2184 2185
{
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
	struct f2fs_journal *journal = curseg->journal;
2186 2187 2188 2189 2190 2191 2192 2193 2194 2195
	int i;

	down_read(&curseg->journal_rwsem);
	for (i = 0; i < nats_in_cursum(journal); i++) {
		block_t addr;
		nid_t nid;

		addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
		nid = le32_to_cpu(nid_in_journal(journal, i));
		if (addr == NULL_ADDR)
2196
			add_free_nid(sbi, nid, true, false);
2197 2198 2199 2200 2201 2202 2203 2204 2205
		else
			remove_free_nid(sbi, nid);
	}
	up_read(&curseg->journal_rwsem);
}

static void scan_free_nid_bits(struct f2fs_sb_info *sbi)
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
C
Chao Yu 已提交
2206
	unsigned int i, idx;
2207
	nid_t nid;
C
Chao Yu 已提交
2208 2209 2210 2211 2212 2213

	down_read(&nm_i->nat_tree_lock);

	for (i = 0; i < nm_i->nat_blocks; i++) {
		if (!test_bit_le(i, nm_i->nat_block_bitmap))
			continue;
2214 2215
		if (!nm_i->free_nid_count[i])
			continue;
C
Chao Yu 已提交
2216
		for (idx = 0; idx < NAT_ENTRY_PER_BLOCK; idx++) {
2217 2218 2219 2220
			idx = find_next_bit_le(nm_i->free_nid_bitmap[i],
						NAT_ENTRY_PER_BLOCK, idx);
			if (idx >= NAT_ENTRY_PER_BLOCK)
				break;
C
Chao Yu 已提交
2221 2222

			nid = i * NAT_ENTRY_PER_BLOCK + idx;
2223
			add_free_nid(sbi, nid, true, false);
C
Chao Yu 已提交
2224

C
Chao Yu 已提交
2225
			if (nm_i->nid_cnt[FREE_NID] >= MAX_FREE_NIDS)
C
Chao Yu 已提交
2226 2227 2228 2229
				goto out;
		}
	}
out:
2230
	scan_curseg_cache(sbi);
C
Chao Yu 已提交
2231 2232

	up_read(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
2233 2234
}

2235
static int __f2fs_build_free_nids(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
2236
						bool sync, bool mount)
J
Jaegeuk Kim 已提交
2237 2238
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
2239
	int i = 0, ret;
2240
	nid_t nid = nm_i->next_scan_nid;
J
Jaegeuk Kim 已提交
2241

2242 2243 2244
	if (unlikely(nid >= nm_i->max_nid))
		nid = 0;

2245
	/* Enough entries */
C
Chao Yu 已提交
2246
	if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
2247
		return 0;
J
Jaegeuk Kim 已提交
2248

C
Chao Yu 已提交
2249
	if (!sync && !f2fs_available_free_memory(sbi, FREE_NIDS))
2250
		return 0;
J
Jaegeuk Kim 已提交
2251

C
Chao Yu 已提交
2252 2253 2254 2255
	if (!mount) {
		/* try to find free nids in free_nid_bitmap */
		scan_free_nid_bits(sbi);

2256
		if (nm_i->nid_cnt[FREE_NID] >= NAT_ENTRY_PER_BLOCK)
2257
			return 0;
2258 2259
	}

2260
	/* readahead nat pages to be scanned */
C
Chao Yu 已提交
2261
	f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nid), FREE_NID_PAGES,
2262
							META_NAT, true);
J
Jaegeuk Kim 已提交
2263

2264
	down_read(&nm_i->nat_tree_lock);
2265

J
Jaegeuk Kim 已提交
2266
	while (1) {
2267 2268 2269
		if (!test_bit_le(NAT_BLOCK_OFFSET(nid),
						nm_i->nat_block_bitmap)) {
			struct page *page = get_current_nat_page(sbi, nid);
J
Jaegeuk Kim 已提交
2270

2271
			ret = scan_nat_page(sbi, page, nid);
2272
			f2fs_put_page(page, 1);
2273 2274 2275 2276 2277 2278 2279 2280

			if (ret) {
				up_read(&nm_i->nat_tree_lock);
				f2fs_bug_on(sbi, !mount);
				f2fs_msg(sbi->sb, KERN_ERR,
					"NAT is corrupt, run fsck to fix it");
				return -EINVAL;
			}
2281
		}
J
Jaegeuk Kim 已提交
2282 2283

		nid += (NAT_ENTRY_PER_BLOCK - (nid % NAT_ENTRY_PER_BLOCK));
2284
		if (unlikely(nid >= nm_i->max_nid))
J
Jaegeuk Kim 已提交
2285
			nid = 0;
2286

2287
		if (++i >= FREE_NID_PAGES)
J
Jaegeuk Kim 已提交
2288 2289 2290
			break;
	}

2291 2292
	/* go to the next free nat pages to find free nids abundantly */
	nm_i->next_scan_nid = nid;
J
Jaegeuk Kim 已提交
2293 2294

	/* find free nids from current sum_pages */
2295
	scan_curseg_cache(sbi);
2296

2297
	up_read(&nm_i->nat_tree_lock);
C
Chao Yu 已提交
2298

C
Chao Yu 已提交
2299
	f2fs_ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
C
Chao Yu 已提交
2300
					nm_i->ra_nid_pages, META_NAT, false);
2301 2302

	return 0;
J
Jaegeuk Kim 已提交
2303 2304
}

2305
int f2fs_build_free_nids(struct f2fs_sb_info *sbi, bool sync, bool mount)
2306
{
2307 2308
	int ret;

2309
	mutex_lock(&NM_I(sbi)->build_lock);
2310
	ret = __f2fs_build_free_nids(sbi, sync, mount);
2311
	mutex_unlock(&NM_I(sbi)->build_lock);
2312 2313

	return ret;
2314 2315
}

J
Jaegeuk Kim 已提交
2316 2317 2318 2319 2320
/*
 * If this function returns success, caller can obtain a new nid
 * from second parameter of this function.
 * The returned nid could be used ino as well as nid when inode is created.
 */
C
Chao Yu 已提交
2321
bool f2fs_alloc_nid(struct f2fs_sb_info *sbi, nid_t *nid)
J
Jaegeuk Kim 已提交
2322 2323 2324 2325
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct free_nid *i = NULL;
retry:
2326 2327
	if (time_to_inject(sbi, FAULT_ALLOC_NID)) {
		f2fs_show_injection_info(FAULT_ALLOC_NID);
J
Jaegeuk Kim 已提交
2328
		return false;
2329
	}
2330

C
Chao Yu 已提交
2331
	spin_lock(&nm_i->nid_list_lock);
J
Jaegeuk Kim 已提交
2332

2333 2334 2335 2336
	if (unlikely(nm_i->available_nids == 0)) {
		spin_unlock(&nm_i->nid_list_lock);
		return false;
	}
J
Jaegeuk Kim 已提交
2337

C
Chao Yu 已提交
2338 2339
	/* We should not use stale free nids created by f2fs_build_free_nids */
	if (nm_i->nid_cnt[FREE_NID] && !on_f2fs_build_free_nids(nm_i)) {
C
Chao Yu 已提交
2340 2341
		f2fs_bug_on(sbi, list_empty(&nm_i->free_nid_list));
		i = list_first_entry(&nm_i->free_nid_list,
C
Chao Yu 已提交
2342
					struct free_nid, list);
2343
		*nid = i->nid;
C
Chao Yu 已提交
2344

F
Fan Li 已提交
2345
		__move_free_nid(sbi, i, FREE_NID, PREALLOC_NID);
2346
		nm_i->available_nids--;
C
Chao Yu 已提交
2347

2348
		update_free_nid_bitmap(sbi, *nid, false, false);
C
Chao Yu 已提交
2349

C
Chao Yu 已提交
2350
		spin_unlock(&nm_i->nid_list_lock);
2351 2352
		return true;
	}
C
Chao Yu 已提交
2353
	spin_unlock(&nm_i->nid_list_lock);
2354 2355

	/* Let's scan nat pages and its caches to get free nids */
C
Chao Yu 已提交
2356
	f2fs_build_free_nids(sbi, true, false);
2357
	goto retry;
J
Jaegeuk Kim 已提交
2358 2359
}

J
Jaegeuk Kim 已提交
2360
/*
C
Chao Yu 已提交
2361
 * f2fs_alloc_nid() should be called prior to this function.
J
Jaegeuk Kim 已提交
2362
 */
C
Chao Yu 已提交
2363
void f2fs_alloc_nid_done(struct f2fs_sb_info *sbi, nid_t nid)
J
Jaegeuk Kim 已提交
2364 2365 2366 2367
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct free_nid *i;

C
Chao Yu 已提交
2368
	spin_lock(&nm_i->nid_list_lock);
2369
	i = __lookup_free_nid_list(nm_i, nid);
C
Chao Yu 已提交
2370
	f2fs_bug_on(sbi, !i);
F
Fan Li 已提交
2371
	__remove_free_nid(sbi, i, PREALLOC_NID);
C
Chao Yu 已提交
2372
	spin_unlock(&nm_i->nid_list_lock);
2373 2374

	kmem_cache_free(free_nid_slab, i);
J
Jaegeuk Kim 已提交
2375 2376
}

J
Jaegeuk Kim 已提交
2377
/*
C
Chao Yu 已提交
2378
 * f2fs_alloc_nid() should be called prior to this function.
J
Jaegeuk Kim 已提交
2379
 */
C
Chao Yu 已提交
2380
void f2fs_alloc_nid_failed(struct f2fs_sb_info *sbi, nid_t nid)
J
Jaegeuk Kim 已提交
2381
{
2382 2383
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct free_nid *i;
2384
	bool need_free = false;
2385

J
Jaegeuk Kim 已提交
2386 2387 2388
	if (!nid)
		return;

C
Chao Yu 已提交
2389
	spin_lock(&nm_i->nid_list_lock);
2390
	i = __lookup_free_nid_list(nm_i, nid);
C
Chao Yu 已提交
2391 2392
	f2fs_bug_on(sbi, !i);

C
Chao Yu 已提交
2393
	if (!f2fs_available_free_memory(sbi, FREE_NIDS)) {
F
Fan Li 已提交
2394
		__remove_free_nid(sbi, i, PREALLOC_NID);
2395
		need_free = true;
2396
	} else {
F
Fan Li 已提交
2397
		__move_free_nid(sbi, i, PREALLOC_NID, FREE_NID);
2398
	}
2399 2400 2401

	nm_i->available_nids++;

2402
	update_free_nid_bitmap(sbi, nid, true, false);
C
Chao Yu 已提交
2403

C
Chao Yu 已提交
2404
	spin_unlock(&nm_i->nid_list_lock);
2405 2406 2407

	if (need_free)
		kmem_cache_free(free_nid_slab, i);
J
Jaegeuk Kim 已提交
2408 2409
}

C
Chao Yu 已提交
2410
int f2fs_try_to_free_nids(struct f2fs_sb_info *sbi, int nr_shrink)
C
Chao Yu 已提交
2411 2412 2413 2414 2415
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct free_nid *i, *next;
	int nr = nr_shrink;

C
Chao Yu 已提交
2416
	if (nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
2417 2418
		return 0;

C
Chao Yu 已提交
2419 2420 2421
	if (!mutex_trylock(&nm_i->build_lock))
		return 0;

C
Chao Yu 已提交
2422
	spin_lock(&nm_i->nid_list_lock);
C
Chao Yu 已提交
2423
	list_for_each_entry_safe(i, next, &nm_i->free_nid_list, list) {
C
Chao Yu 已提交
2424
		if (nr_shrink <= 0 ||
C
Chao Yu 已提交
2425
				nm_i->nid_cnt[FREE_NID] <= MAX_FREE_NIDS)
C
Chao Yu 已提交
2426
			break;
C
Chao Yu 已提交
2427

F
Fan Li 已提交
2428
		__remove_free_nid(sbi, i, FREE_NID);
C
Chao Yu 已提交
2429 2430 2431
		kmem_cache_free(free_nid_slab, i);
		nr_shrink--;
	}
C
Chao Yu 已提交
2432
	spin_unlock(&nm_i->nid_list_lock);
C
Chao Yu 已提交
2433 2434 2435 2436 2437
	mutex_unlock(&nm_i->build_lock);

	return nr - nr_shrink;
}

C
Chao Yu 已提交
2438
void f2fs_recover_inline_xattr(struct inode *inode, struct page *page)
2439 2440 2441 2442 2443 2444
{
	void *src_addr, *dst_addr;
	size_t inline_size;
	struct page *ipage;
	struct f2fs_inode *ri;

C
Chao Yu 已提交
2445
	ipage = f2fs_get_node_page(F2FS_I_SB(inode), inode->i_ino);
2446
	f2fs_bug_on(F2FS_I_SB(inode), IS_ERR(ipage));
2447

2448
	ri = F2FS_INODE(page);
2449 2450 2451
	if (ri->i_inline & F2FS_INLINE_XATTR) {
		set_inode_flag(inode, FI_INLINE_XATTR);
	} else {
2452
		clear_inode_flag(inode, FI_INLINE_XATTR);
2453 2454 2455
		goto update_inode;
	}

C
Chao Yu 已提交
2456 2457
	dst_addr = inline_xattr_addr(inode, ipage);
	src_addr = inline_xattr_addr(inode, page);
2458 2459
	inline_size = inline_xattr_size(inode);

2460
	f2fs_wait_on_page_writeback(ipage, NODE, true);
2461
	memcpy(dst_addr, src_addr, inline_size);
2462
update_inode:
C
Chao Yu 已提交
2463
	f2fs_update_inode(inode, ipage);
2464 2465 2466
	f2fs_put_page(ipage, 1);
}

C
Chao Yu 已提交
2467
int f2fs_recover_xattr_data(struct inode *inode, struct page *page)
2468
{
2469
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
2470
	nid_t prev_xnid = F2FS_I(inode)->i_xattr_nid;
2471 2472
	nid_t new_xnid;
	struct dnode_of_data dn;
2473
	struct node_info ni;
2474
	struct page *xpage;
2475
	int err;
2476 2477 2478 2479

	if (!prev_xnid)
		goto recover_xnid;

2480
	/* 1: invalidate the previous xattr nid */
2481 2482 2483 2484
	err = f2fs_get_node_info(sbi, prev_xnid, &ni);
	if (err)
		return err;

C
Chao Yu 已提交
2485
	f2fs_invalidate_blocks(sbi, ni.blk_addr);
2486
	dec_valid_node_count(sbi, inode, false);
2487
	set_node_addr(sbi, &ni, NULL_ADDR, false);
2488 2489

recover_xnid:
2490
	/* 2: update xattr nid in inode */
C
Chao Yu 已提交
2491
	if (!f2fs_alloc_nid(sbi, &new_xnid))
2492 2493 2494
		return -ENOSPC;

	set_new_dnode(&dn, inode, NULL, NULL, new_xnid);
C
Chao Yu 已提交
2495
	xpage = f2fs_new_node_page(&dn, XATTR_NODE_OFFSET);
2496
	if (IS_ERR(xpage)) {
C
Chao Yu 已提交
2497
		f2fs_alloc_nid_failed(sbi, new_xnid);
2498 2499 2500
		return PTR_ERR(xpage);
	}

C
Chao Yu 已提交
2501 2502
	f2fs_alloc_nid_done(sbi, new_xnid);
	f2fs_update_inode_page(inode);
2503 2504

	/* 3: update and set xattr node page dirty */
2505
	memcpy(F2FS_NODE(xpage), F2FS_NODE(page), VALID_XATTR_BLOCK_SIZE);
2506

2507 2508
	set_page_dirty(xpage);
	f2fs_put_page(xpage, 1);
2509

2510
	return 0;
2511 2512
}

C
Chao Yu 已提交
2513
int f2fs_recover_inode_page(struct f2fs_sb_info *sbi, struct page *page)
J
Jaegeuk Kim 已提交
2514
{
2515
	struct f2fs_inode *src, *dst;
J
Jaegeuk Kim 已提交
2516 2517 2518
	nid_t ino = ino_of_node(page);
	struct node_info old_ni, new_ni;
	struct page *ipage;
2519
	int err;
J
Jaegeuk Kim 已提交
2520

2521 2522 2523
	err = f2fs_get_node_info(sbi, ino, &old_ni);
	if (err)
		return err;
2524 2525 2526

	if (unlikely(old_ni.blk_addr != NULL_ADDR))
		return -EINVAL;
2527
retry:
2528
	ipage = f2fs_grab_cache_page(NODE_MAPPING(sbi), ino, false);
2529 2530 2531 2532
	if (!ipage) {
		congestion_wait(BLK_RW_ASYNC, HZ/50);
		goto retry;
	}
J
Jaegeuk Kim 已提交
2533

A
arter97 已提交
2534
	/* Should not use this inode from free nid list */
C
Chao Yu 已提交
2535
	remove_free_nid(sbi, ino);
J
Jaegeuk Kim 已提交
2536

2537 2538
	if (!PageUptodate(ipage))
		SetPageUptodate(ipage);
J
Jaegeuk Kim 已提交
2539
	fill_node_footer(ipage, ino, ino, 0, true);
C
Chao Yu 已提交
2540
	set_cold_node(page, false);
J
Jaegeuk Kim 已提交
2541

2542 2543
	src = F2FS_INODE(page);
	dst = F2FS_INODE(ipage);
J
Jaegeuk Kim 已提交
2544

2545 2546 2547 2548 2549
	memcpy(dst, src, (unsigned long)&src->i_ext - (unsigned long)src);
	dst->i_size = 0;
	dst->i_blocks = cpu_to_le64(1);
	dst->i_links = cpu_to_le32(1);
	dst->i_xattr_nid = 0;
2550
	dst->i_inline = src->i_inline & (F2FS_INLINE_XATTR | F2FS_EXTRA_ATTR);
C
Chao Yu 已提交
2551
	if (dst->i_inline & F2FS_EXTRA_ATTR) {
2552
		dst->i_extra_isize = src->i_extra_isize;
C
Chao Yu 已提交
2553 2554 2555 2556 2557 2558

		if (f2fs_sb_has_flexible_inline_xattr(sbi->sb) &&
			F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
							i_inline_xattr_size))
			dst->i_inline_xattr_size = src->i_inline_xattr_size;

C
Chao Yu 已提交
2559 2560 2561 2562 2563
		if (f2fs_sb_has_project_quota(sbi->sb) &&
			F2FS_FITS_IN_INODE(src, le16_to_cpu(src->i_extra_isize),
								i_projid))
			dst->i_projid = src->i_projid;
	}
J
Jaegeuk Kim 已提交
2564 2565 2566 2567

	new_ni = old_ni;
	new_ni.ino = ino;

C
Chao Yu 已提交
2568
	if (unlikely(inc_valid_node_count(sbi, NULL, true)))
2569
		WARN_ON(1);
2570
	set_node_addr(sbi, &new_ni, NEW_ADDR, false);
J
Jaegeuk Kim 已提交
2571
	inc_valid_inode_count(sbi);
2572
	set_page_dirty(ipage);
J
Jaegeuk Kim 已提交
2573 2574 2575 2576
	f2fs_put_page(ipage, 1);
	return 0;
}

2577
int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
J
Jaegeuk Kim 已提交
2578 2579 2580 2581 2582
			unsigned int segno, struct f2fs_summary_block *sum)
{
	struct f2fs_node *rn;
	struct f2fs_summary *sum_entry;
	block_t addr;
2583
	int i, idx, last_offset, nrpages;
J
Jaegeuk Kim 已提交
2584 2585 2586 2587 2588 2589

	/* scan the node segment */
	last_offset = sbi->blocks_per_seg;
	addr = START_BLOCK(sbi, segno);
	sum_entry = &sum->entries[0];

2590
	for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
2591
		nrpages = min(last_offset - i, BIO_MAX_PAGES);
2592

A
arter97 已提交
2593
		/* readahead node pages */
C
Chao Yu 已提交
2594
		f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true);
J
Jaegeuk Kim 已提交
2595

2596
		for (idx = addr; idx < addr + nrpages; idx++) {
C
Chao Yu 已提交
2597
			struct page *page = f2fs_get_tmp_page(sbi, idx);
2598

2599 2600 2601
			if (IS_ERR(page))
				return PTR_ERR(page);

2602 2603 2604 2605 2606 2607
			rn = F2FS_NODE(page);
			sum_entry->nid = rn->footer.nid;
			sum_entry->version = 0;
			sum_entry->ofs_in_node = 0;
			sum_entry++;
			f2fs_put_page(page, 1);
2608
		}
2609

2610
		invalidate_mapping_pages(META_MAPPING(sbi), addr,
2611
							addr + nrpages);
J
Jaegeuk Kim 已提交
2612
	}
2613
	return 0;
J
Jaegeuk Kim 已提交
2614 2615
}

2616
static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
2617 2618 2619
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2620
	struct f2fs_journal *journal = curseg->journal;
J
Jaegeuk Kim 已提交
2621 2622
	int i;

2623
	down_write(&curseg->journal_rwsem);
2624
	for (i = 0; i < nats_in_cursum(journal); i++) {
J
Jaegeuk Kim 已提交
2625 2626
		struct nat_entry *ne;
		struct f2fs_nat_entry raw_ne;
2627
		nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
J
Jaegeuk Kim 已提交
2628

2629
		raw_ne = nat_in_journal(journal, i);
2630

J
Jaegeuk Kim 已提交
2631 2632
		ne = __lookup_nat_cache(nm_i, nid);
		if (!ne) {
2633 2634
			ne = __alloc_nat_entry(nid, true);
			__init_nat_entry(nm_i, ne, &raw_ne, true);
J
Jaegeuk Kim 已提交
2635
		}
2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646 2647 2648

		/*
		 * if a free nat in journal has not been used after last
		 * checkpoint, we should remove it from available nids,
		 * since later we will add it again.
		 */
		if (!get_nat_flag(ne, IS_DIRTY) &&
				le32_to_cpu(raw_ne.block_addr) == NULL_ADDR) {
			spin_lock(&nm_i->nid_list_lock);
			nm_i->available_nids--;
			spin_unlock(&nm_i->nid_list_lock);
		}

J
Jaegeuk Kim 已提交
2649 2650
		__set_nat_cache_dirty(nm_i, ne);
	}
2651
	update_nats_in_cursum(journal, -i);
2652
	up_write(&curseg->journal_rwsem);
J
Jaegeuk Kim 已提交
2653 2654
}

2655 2656
static void __adjust_nat_entry_set(struct nat_entry_set *nes,
						struct list_head *head, int max)
J
Jaegeuk Kim 已提交
2657
{
2658
	struct nat_entry_set *cur;
J
Jaegeuk Kim 已提交
2659

2660 2661
	if (nes->entry_cnt >= max)
		goto add_out;
J
Jaegeuk Kim 已提交
2662

2663 2664 2665 2666 2667
	list_for_each_entry(cur, head, set_list) {
		if (cur->entry_cnt >= nes->entry_cnt) {
			list_add(&nes->set_list, cur->set_list.prev);
			return;
		}
2668
	}
2669 2670 2671
add_out:
	list_add_tail(&nes->set_list, head);
}
J
Jaegeuk Kim 已提交
2672

J
Jaegeuk Kim 已提交
2673
static void __update_nat_bits(struct f2fs_sb_info *sbi, nid_t start_nid,
2674 2675 2676 2677 2678 2679
						struct page *page)
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	unsigned int nat_index = start_nid / NAT_ENTRY_PER_BLOCK;
	struct f2fs_nat_block *nat_blk = page_address(page);
	int valid = 0;
F
Fan Li 已提交
2680
	int i = 0;
2681 2682 2683 2684

	if (!enabled_nat_bits(sbi, NULL))
		return;

F
Fan Li 已提交
2685 2686 2687 2688 2689 2690
	if (nat_index == 0) {
		valid = 1;
		i = 1;
	}
	for (; i < NAT_ENTRY_PER_BLOCK; i++) {
		if (nat_blk->entries[i].block_addr != NULL_ADDR)
2691 2692 2693
			valid++;
	}
	if (valid == 0) {
J
Jaegeuk Kim 已提交
2694 2695
		__set_bit_le(nat_index, nm_i->empty_nat_bits);
		__clear_bit_le(nat_index, nm_i->full_nat_bits);
2696 2697 2698
		return;
	}

J
Jaegeuk Kim 已提交
2699
	__clear_bit_le(nat_index, nm_i->empty_nat_bits);
2700
	if (valid == NAT_ENTRY_PER_BLOCK)
J
Jaegeuk Kim 已提交
2701
		__set_bit_le(nat_index, nm_i->full_nat_bits);
2702
	else
J
Jaegeuk Kim 已提交
2703
		__clear_bit_le(nat_index, nm_i->full_nat_bits);
2704 2705
}

2706
static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
2707
		struct nat_entry_set *set, struct cp_control *cpc)
2708 2709
{
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2710
	struct f2fs_journal *journal = curseg->journal;
2711 2712 2713 2714 2715
	nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
	bool to_journal = true;
	struct f2fs_nat_block *nat_blk;
	struct nat_entry *ne, *cur;
	struct page *page = NULL;
J
Jaegeuk Kim 已提交
2716

2717 2718 2719 2720 2721
	/*
	 * there are two steps to flush nat entries:
	 * #1, flush nat entries to journal in current hot data summary block.
	 * #2, flush nat entries to nat page.
	 */
2722 2723
	if (enabled_nat_bits(sbi, cpc) ||
		!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
2724 2725 2726
		to_journal = false;

	if (to_journal) {
2727
		down_write(&curseg->journal_rwsem);
2728 2729 2730 2731 2732
	} else {
		page = get_next_nat_page(sbi, start_nid);
		nat_blk = page_address(page);
		f2fs_bug_on(sbi, !nat_blk);
	}
2733

2734 2735 2736 2737 2738 2739
	/* flush dirty nats in nat entry set */
	list_for_each_entry_safe(ne, cur, &set->entry_list, list) {
		struct f2fs_nat_entry *raw_ne;
		nid_t nid = nat_get_nid(ne);
		int offset;

2740
		f2fs_bug_on(sbi, nat_get_blkaddr(ne) == NEW_ADDR);
2741 2742

		if (to_journal) {
C
Chao Yu 已提交
2743
			offset = f2fs_lookup_journal_in_cursum(journal,
2744 2745
							NAT_JOURNAL, nid, 1);
			f2fs_bug_on(sbi, offset < 0);
2746 2747
			raw_ne = &nat_in_journal(journal, offset);
			nid_in_journal(journal, offset) = cpu_to_le32(nid);
2748
		} else {
2749
			raw_ne = &nat_blk->entries[nid - start_nid];
J
Jaegeuk Kim 已提交
2750
		}
2751 2752
		raw_nat_from_node_info(raw_ne, &ne->ni);
		nat_reset_flag(ne);
2753
		__clear_nat_cache_dirty(NM_I(sbi), set, ne);
2754
		if (nat_get_blkaddr(ne) == NULL_ADDR) {
2755
			add_free_nid(sbi, nid, false, true);
C
Chao Yu 已提交
2756 2757
		} else {
			spin_lock(&NM_I(sbi)->nid_list_lock);
2758
			update_free_nid_bitmap(sbi, nid, false, false);
2759 2760
			spin_unlock(&NM_I(sbi)->nid_list_lock);
		}
2761
	}
J
Jaegeuk Kim 已提交
2762

2763
	if (to_journal) {
2764
		up_write(&curseg->journal_rwsem);
2765 2766
	} else {
		__update_nat_bits(sbi, start_nid, page);
2767
		f2fs_put_page(page, 1);
2768
	}
2769

2770 2771 2772 2773 2774
	/* Allow dirty nats by node block allocation in write_begin */
	if (!set->entry_cnt) {
		radix_tree_delete(&NM_I(sbi)->nat_set_root, set->set);
		kmem_cache_free(nat_entry_set_slab, set);
	}
2775
}
2776

2777 2778 2779
/*
 * This function is called during the checkpointing process.
 */
C
Chao Yu 已提交
2780
void f2fs_flush_nat_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
2781 2782 2783
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
2784
	struct f2fs_journal *journal = curseg->journal;
2785
	struct nat_entry_set *setvec[SETVEC_SIZE];
2786 2787 2788 2789 2790
	struct nat_entry_set *set, *tmp;
	unsigned int found;
	nid_t set_idx = 0;
	LIST_HEAD(sets);

2791 2792 2793 2794 2795 2796 2797
	/* during unmount, let's flush nat_bits before checking dirty_nat_cnt */
	if (enabled_nat_bits(sbi, cpc)) {
		down_write(&nm_i->nat_tree_lock);
		remove_nats_in_journal(sbi);
		up_write(&nm_i->nat_tree_lock);
	}

2798 2799
	if (!nm_i->dirty_nat_cnt)
		return;
2800

2801
	down_write(&nm_i->nat_tree_lock);
2802

2803 2804 2805 2806 2807
	/*
	 * if there are no enough space in journal to store dirty nat
	 * entries, remove all entries from journal and merge them
	 * into nat entry set.
	 */
2808
	if (enabled_nat_bits(sbi, cpc) ||
2809
		!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
2810 2811 2812
		remove_nats_in_journal(sbi);

	while ((found = __gang_lookup_nat_set(nm_i,
2813
					set_idx, SETVEC_SIZE, setvec))) {
2814 2815 2816 2817
		unsigned idx;
		set_idx = setvec[found - 1]->set + 1;
		for (idx = 0; idx < found; idx++)
			__adjust_nat_entry_set(setvec[idx], &sets,
2818
						MAX_NAT_JENTRIES(journal));
J
Jaegeuk Kim 已提交
2819
	}
2820

2821 2822
	/* flush dirty nats in nat entry set */
	list_for_each_entry_safe(set, tmp, &sets, set_list)
2823
		__flush_nat_entry_set(sbi, set, cpc);
2824

2825
	up_write(&nm_i->nat_tree_lock);
2826
	/* Allow dirty nats by node block allocation in write_begin */
J
Jaegeuk Kim 已提交
2827 2828
}

2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840
static int __get_nat_bitmaps(struct f2fs_sb_info *sbi)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	unsigned int nat_bits_bytes = nm_i->nat_blocks / BITS_PER_BYTE;
	unsigned int i;
	__u64 cp_ver = cur_cp_version(ckpt);
	block_t nat_bits_addr;

	if (!enabled_nat_bits(sbi, NULL))
		return 0;

C
Chao Yu 已提交
2841
	nm_i->nat_bits_blocks = F2FS_BLK_ALIGN((nat_bits_bytes << 1) + 8);
C
Chao Yu 已提交
2842 2843
	nm_i->nat_bits = f2fs_kzalloc(sbi,
			nm_i->nat_bits_blocks << F2FS_BLKSIZE_BITS, GFP_KERNEL);
2844 2845 2846 2847 2848 2849
	if (!nm_i->nat_bits)
		return -ENOMEM;

	nat_bits_addr = __start_cp_addr(sbi) + sbi->blocks_per_seg -
						nm_i->nat_bits_blocks;
	for (i = 0; i < nm_i->nat_bits_blocks; i++) {
2850 2851 2852 2853 2854 2855 2856
		struct page *page;

		page = f2fs_get_meta_page(sbi, nat_bits_addr++);
		if (IS_ERR(page)) {
			disable_nat_bits(sbi, true);
			return PTR_ERR(page);
		}
2857 2858 2859 2860 2861 2862

		memcpy(nm_i->nat_bits + (i << F2FS_BLKSIZE_BITS),
					page_address(page), F2FS_BLKSIZE);
		f2fs_put_page(page, 1);
	}

2863
	cp_ver |= (cur_cp_crc(ckpt) << 32);
2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875
	if (cpu_to_le64(cp_ver) != *(__le64 *)nm_i->nat_bits) {
		disable_nat_bits(sbi, true);
		return 0;
	}

	nm_i->full_nat_bits = nm_i->nat_bits + 8;
	nm_i->empty_nat_bits = nm_i->full_nat_bits + nat_bits_bytes;

	f2fs_msg(sbi->sb, KERN_NOTICE, "Found nat_bits in checkpoint");
	return 0;
}

2876
static inline void load_free_nid_bitmap(struct f2fs_sb_info *sbi)
2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	unsigned int i = 0;
	nid_t nid, last_nid;

	if (!enabled_nat_bits(sbi, NULL))
		return;

	for (i = 0; i < nm_i->nat_blocks; i++) {
		i = find_next_bit_le(nm_i->empty_nat_bits, nm_i->nat_blocks, i);
		if (i >= nm_i->nat_blocks)
			break;

		__set_bit_le(i, nm_i->nat_block_bitmap);

		nid = i * NAT_ENTRY_PER_BLOCK;
2893
		last_nid = nid + NAT_ENTRY_PER_BLOCK;
2894

2895
		spin_lock(&NM_I(sbi)->nid_list_lock);
2896
		for (; nid < last_nid; nid++)
2897 2898
			update_free_nid_bitmap(sbi, nid, true, true);
		spin_unlock(&NM_I(sbi)->nid_list_lock);
2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909
	}

	for (i = 0; i < nm_i->nat_blocks; i++) {
		i = find_next_bit_le(nm_i->full_nat_bits, nm_i->nat_blocks, i);
		if (i >= nm_i->nat_blocks)
			break;

		__set_bit_le(i, nm_i->nat_block_bitmap);
	}
}

J
Jaegeuk Kim 已提交
2910 2911 2912 2913 2914
static int init_node_manager(struct f2fs_sb_info *sbi)
{
	struct f2fs_super_block *sb_raw = F2FS_RAW_SUPER(sbi);
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	unsigned char *version_bitmap;
2915 2916
	unsigned int nat_segs;
	int err;
J
Jaegeuk Kim 已提交
2917 2918 2919 2920 2921

	nm_i->nat_blkaddr = le32_to_cpu(sb_raw->nat_blkaddr);

	/* segment_count_nat includes pair segment so divide to 2. */
	nat_segs = le32_to_cpu(sb_raw->segment_count_nat) >> 1;
2922 2923
	nm_i->nat_blocks = nat_segs << le32_to_cpu(sb_raw->log_blocks_per_seg);
	nm_i->max_nid = NAT_ENTRY_PER_BLOCK * nm_i->nat_blocks;
2924

2925
	/* not used nids: 0, node, meta, (and root counted as valid node) */
2926
	nm_i->available_nids = nm_i->max_nid - sbi->total_valid_node_count -
2927
				sbi->nquota_files - F2FS_RESERVED_NODE_NUM;
C
Chao Yu 已提交
2928 2929
	nm_i->nid_cnt[FREE_NID] = 0;
	nm_i->nid_cnt[PREALLOC_NID] = 0;
J
Jaegeuk Kim 已提交
2930
	nm_i->nat_cnt = 0;
2931
	nm_i->ram_thresh = DEF_RAM_THRESHOLD;
C
Chao Yu 已提交
2932
	nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
C
Chao Yu 已提交
2933
	nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
J
Jaegeuk Kim 已提交
2934

2935
	INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
C
Chao Yu 已提交
2936
	INIT_LIST_HEAD(&nm_i->free_nid_list);
2937 2938
	INIT_RADIX_TREE(&nm_i->nat_root, GFP_NOIO);
	INIT_RADIX_TREE(&nm_i->nat_set_root, GFP_NOIO);
J
Jaegeuk Kim 已提交
2939
	INIT_LIST_HEAD(&nm_i->nat_entries);
2940
	spin_lock_init(&nm_i->nat_list_lock);
J
Jaegeuk Kim 已提交
2941 2942

	mutex_init(&nm_i->build_lock);
C
Chao Yu 已提交
2943
	spin_lock_init(&nm_i->nid_list_lock);
2944
	init_rwsem(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
2945 2946

	nm_i->next_scan_nid = le32_to_cpu(sbi->ckpt->next_free_nid);
A
Alexandru Gheorghiu 已提交
2947
	nm_i->bitmap_size = __bitmap_size(sbi, NAT_BITMAP);
J
Jaegeuk Kim 已提交
2948 2949 2950 2951
	version_bitmap = __bitmap_ptr(sbi, NAT_BITMAP);
	if (!version_bitmap)
		return -EFAULT;

A
Alexandru Gheorghiu 已提交
2952 2953 2954 2955
	nm_i->nat_bitmap = kmemdup(version_bitmap, nm_i->bitmap_size,
					GFP_KERNEL);
	if (!nm_i->nat_bitmap)
		return -ENOMEM;
2956

2957 2958 2959 2960
	err = __get_nat_bitmaps(sbi);
	if (err)
		return err;

2961 2962 2963 2964 2965 2966 2967
#ifdef CONFIG_F2FS_CHECK_FS
	nm_i->nat_bitmap_mir = kmemdup(version_bitmap, nm_i->bitmap_size,
					GFP_KERNEL);
	if (!nm_i->nat_bitmap_mir)
		return -ENOMEM;
#endif

J
Jaegeuk Kim 已提交
2968 2969 2970
	return 0;
}

J
Jaegeuk Kim 已提交
2971
static int init_free_nid_cache(struct f2fs_sb_info *sbi)
C
Chao Yu 已提交
2972 2973
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
2974
	int i;
C
Chao Yu 已提交
2975

2976 2977 2978 2979
	nm_i->free_nid_bitmap =
		f2fs_kzalloc(sbi, array_size(sizeof(unsigned char *),
					     nm_i->nat_blocks),
			     GFP_KERNEL);
C
Chao Yu 已提交
2980 2981 2982
	if (!nm_i->free_nid_bitmap)
		return -ENOMEM;

2983 2984
	for (i = 0; i < nm_i->nat_blocks; i++) {
		nm_i->free_nid_bitmap[i] = f2fs_kvzalloc(sbi,
2985
			f2fs_bitmap_size(NAT_ENTRY_PER_BLOCK), GFP_KERNEL);
2986
		if (!nm_i->free_nid_bitmap[i])
2987 2988 2989
			return -ENOMEM;
	}

C
Chao Yu 已提交
2990
	nm_i->nat_block_bitmap = f2fs_kvzalloc(sbi, nm_i->nat_blocks / 8,
C
Chao Yu 已提交
2991 2992 2993
								GFP_KERNEL);
	if (!nm_i->nat_block_bitmap)
		return -ENOMEM;
2994

2995 2996 2997 2998
	nm_i->free_nid_count =
		f2fs_kvzalloc(sbi, array_size(sizeof(unsigned short),
					      nm_i->nat_blocks),
			      GFP_KERNEL);
2999 3000
	if (!nm_i->free_nid_count)
		return -ENOMEM;
C
Chao Yu 已提交
3001 3002 3003
	return 0;
}

C
Chao Yu 已提交
3004
int f2fs_build_node_manager(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3005 3006 3007
{
	int err;

C
Chao Yu 已提交
3008 3009
	sbi->nm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_nm_info),
							GFP_KERNEL);
J
Jaegeuk Kim 已提交
3010 3011 3012 3013 3014 3015 3016
	if (!sbi->nm_info)
		return -ENOMEM;

	err = init_node_manager(sbi);
	if (err)
		return err;

C
Chao Yu 已提交
3017 3018 3019 3020
	err = init_free_nid_cache(sbi);
	if (err)
		return err;

3021 3022 3023
	/* load free nid status from nat_bits table */
	load_free_nid_bitmap(sbi);

3024
	return f2fs_build_free_nids(sbi, true, true);
J
Jaegeuk Kim 已提交
3025 3026
}

C
Chao Yu 已提交
3027
void f2fs_destroy_node_manager(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3028 3029 3030 3031
{
	struct f2fs_nm_info *nm_i = NM_I(sbi);
	struct free_nid *i, *next_i;
	struct nat_entry *natvec[NATVEC_SIZE];
3032
	struct nat_entry_set *setvec[SETVEC_SIZE];
J
Jaegeuk Kim 已提交
3033 3034 3035 3036 3037 3038 3039
	nid_t nid = 0;
	unsigned int found;

	if (!nm_i)
		return;

	/* destroy free nid list */
C
Chao Yu 已提交
3040
	spin_lock(&nm_i->nid_list_lock);
C
Chao Yu 已提交
3041
	list_for_each_entry_safe(i, next_i, &nm_i->free_nid_list, list) {
F
Fan Li 已提交
3042
		__remove_free_nid(sbi, i, FREE_NID);
C
Chao Yu 已提交
3043
		spin_unlock(&nm_i->nid_list_lock);
3044
		kmem_cache_free(free_nid_slab, i);
C
Chao Yu 已提交
3045
		spin_lock(&nm_i->nid_list_lock);
J
Jaegeuk Kim 已提交
3046
	}
C
Chao Yu 已提交
3047 3048 3049
	f2fs_bug_on(sbi, nm_i->nid_cnt[FREE_NID]);
	f2fs_bug_on(sbi, nm_i->nid_cnt[PREALLOC_NID]);
	f2fs_bug_on(sbi, !list_empty(&nm_i->free_nid_list));
C
Chao Yu 已提交
3050
	spin_unlock(&nm_i->nid_list_lock);
J
Jaegeuk Kim 已提交
3051 3052

	/* destroy nat cache */
3053
	down_write(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
3054 3055 3056
	while ((found = __gang_lookup_nat_cache(nm_i,
					nid, NATVEC_SIZE, natvec))) {
		unsigned idx;
3057

3058
		nid = nat_get_nid(natvec[found - 1]) + 1;
3059 3060 3061 3062 3063
		for (idx = 0; idx < found; idx++) {
			spin_lock(&nm_i->nat_list_lock);
			list_del(&natvec[idx]->list);
			spin_unlock(&nm_i->nat_list_lock);

3064
			__del_from_nat_cache(nm_i, natvec[idx]);
3065
		}
J
Jaegeuk Kim 已提交
3066
	}
3067
	f2fs_bug_on(sbi, nm_i->nat_cnt);
3068 3069 3070 3071 3072 3073 3074 3075 3076 3077 3078 3079 3080 3081 3082

	/* destroy nat set cache */
	nid = 0;
	while ((found = __gang_lookup_nat_set(nm_i,
					nid, SETVEC_SIZE, setvec))) {
		unsigned idx;

		nid = setvec[found - 1]->set + 1;
		for (idx = 0; idx < found; idx++) {
			/* entry_cnt is not zero, when cp_error was occurred */
			f2fs_bug_on(sbi, !list_empty(&setvec[idx]->entry_list));
			radix_tree_delete(&nm_i->nat_set_root, setvec[idx]->set);
			kmem_cache_free(nat_entry_set_slab, setvec[idx]);
		}
	}
3083
	up_write(&nm_i->nat_tree_lock);
J
Jaegeuk Kim 已提交
3084

C
Chao Yu 已提交
3085
	kvfree(nm_i->nat_block_bitmap);
3086 3087 3088 3089 3090 3091 3092
	if (nm_i->free_nid_bitmap) {
		int i;

		for (i = 0; i < nm_i->nat_blocks; i++)
			kvfree(nm_i->free_nid_bitmap[i]);
		kfree(nm_i->free_nid_bitmap);
	}
3093
	kvfree(nm_i->free_nid_count);
C
Chao Yu 已提交
3094

J
Jaegeuk Kim 已提交
3095
	kfree(nm_i->nat_bitmap);
3096
	kfree(nm_i->nat_bits);
3097 3098 3099
#ifdef CONFIG_F2FS_CHECK_FS
	kfree(nm_i->nat_bitmap_mir);
#endif
J
Jaegeuk Kim 已提交
3100 3101 3102 3103
	sbi->nm_info = NULL;
	kfree(nm_i);
}

C
Chao Yu 已提交
3104
int __init f2fs_create_node_manager_caches(void)
J
Jaegeuk Kim 已提交
3105 3106
{
	nat_entry_slab = f2fs_kmem_cache_create("nat_entry",
3107
			sizeof(struct nat_entry));
J
Jaegeuk Kim 已提交
3108
	if (!nat_entry_slab)
3109
		goto fail;
J
Jaegeuk Kim 已提交
3110 3111

	free_nid_slab = f2fs_kmem_cache_create("free_nid",
3112
			sizeof(struct free_nid));
3113
	if (!free_nid_slab)
3114
		goto destroy_nat_entry;
3115 3116 3117 3118

	nat_entry_set_slab = f2fs_kmem_cache_create("nat_entry_set",
			sizeof(struct nat_entry_set));
	if (!nat_entry_set_slab)
3119
		goto destroy_free_nid;
3120 3121 3122 3123 3124

	fsync_node_entry_slab = f2fs_kmem_cache_create("fsync_node_entry",
			sizeof(struct fsync_node_entry));
	if (!fsync_node_entry_slab)
		goto destroy_nat_entry_set;
J
Jaegeuk Kim 已提交
3125
	return 0;
3126

3127 3128
destroy_nat_entry_set:
	kmem_cache_destroy(nat_entry_set_slab);
3129
destroy_free_nid:
3130
	kmem_cache_destroy(free_nid_slab);
3131
destroy_nat_entry:
3132 3133 3134
	kmem_cache_destroy(nat_entry_slab);
fail:
	return -ENOMEM;
J
Jaegeuk Kim 已提交
3135 3136
}

C
Chao Yu 已提交
3137
void f2fs_destroy_node_manager_caches(void)
J
Jaegeuk Kim 已提交
3138
{
3139
	kmem_cache_destroy(fsync_node_entry_slab);
3140
	kmem_cache_destroy(nat_entry_set_slab);
J
Jaegeuk Kim 已提交
3141 3142 3143
	kmem_cache_destroy(free_nid_slab);
	kmem_cache_destroy(nat_entry_slab);
}