segment.c 112.7 KB
Newer Older
C
Chao Yu 已提交
1
// SPDX-License-Identifier: GPL-2.0
J
Jaegeuk Kim 已提交
2
/*
J
Jaegeuk Kim 已提交
3 4 5 6 7 8 9 10 11
 * fs/f2fs/segment.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
12
#include <linux/prefetch.h>
13
#include <linux/kthread.h>
14
#include <linux/swap.h>
15
#include <linux/timer.h>
16
#include <linux/freezer.h>
17
#include <linux/sched/signal.h>
J
Jaegeuk Kim 已提交
18 19 20 21

#include "f2fs.h"
#include "segment.h"
#include "node.h"
22
#include "gc.h"
J
Jaegeuk Kim 已提交
23
#include "trace.h"
24
#include <trace/events/f2fs.h>
J
Jaegeuk Kim 已提交
25

26 27
#define __reverse_ffz(x) __reverse_ffs(~(x))

28
static struct kmem_cache *discard_entry_slab;
29
static struct kmem_cache *discard_cmd_slab;
30
static struct kmem_cache *sit_entry_set_slab;
J
Jaegeuk Kim 已提交
31
static struct kmem_cache *inmem_entry_slab;
32

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
static unsigned long __reverse_ulong(unsigned char *str)
{
	unsigned long tmp = 0;
	int shift = 24, idx = 0;

#if BITS_PER_LONG == 64
	shift = 56;
#endif
	while (shift >= 0) {
		tmp |= (unsigned long)str[idx++] << shift;
		shift -= BITS_PER_BYTE;
	}
	return tmp;
}

48 49 50 51 52 53 54 55 56
/*
 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
 * MSB and LSB are reversed in a byte by f2fs_set_bit.
 */
static inline unsigned long __reverse_ffs(unsigned long word)
{
	int num = 0;

#if BITS_PER_LONG == 64
57
	if ((word & 0xffffffff00000000UL) == 0)
58
		num += 32;
59
	else
60 61
		word >>= 32;
#endif
62
	if ((word & 0xffff0000) == 0)
63
		num += 16;
64
	else
65
		word >>= 16;
66 67

	if ((word & 0xff00) == 0)
68
		num += 8;
69
	else
70
		word >>= 8;
71

72 73 74 75
	if ((word & 0xf0) == 0)
		num += 4;
	else
		word >>= 4;
76

77 78 79 80
	if ((word & 0xc) == 0)
		num += 2;
	else
		word >>= 2;
81

82 83 84 85 86 87
	if ((word & 0x2) == 0)
		num += 1;
	return num;
}

/*
A
arter97 已提交
88
 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
89
 * f2fs_set_bit makes MSB and LSB reversed in a byte.
F
Fan Li 已提交
90
 * @size must be integral times of unsigned long.
91
 * Example:
92 93 94
 *                             MSB <--> LSB
 *   f2fs_set_bit(0, bitmap) => 1000 0000
 *   f2fs_set_bit(7, bitmap) => 0000 0001
95 96 97 98 99
 */
static unsigned long __find_rev_next_bit(const unsigned long *addr,
			unsigned long size, unsigned long offset)
{
	const unsigned long *p = addr + BIT_WORD(offset);
F
Fan Li 已提交
100
	unsigned long result = size;
101 102 103 104 105
	unsigned long tmp;

	if (offset >= size)
		return size;

F
Fan Li 已提交
106
	size -= (offset & ~(BITS_PER_LONG - 1));
107
	offset %= BITS_PER_LONG;
108

F
Fan Li 已提交
109 110 111
	while (1) {
		if (*p == 0)
			goto pass;
112

113
		tmp = __reverse_ulong((unsigned char *)p);
F
Fan Li 已提交
114 115 116 117

		tmp &= ~0UL >> offset;
		if (size < BITS_PER_LONG)
			tmp &= (~0UL << (BITS_PER_LONG - size));
118
		if (tmp)
F
Fan Li 已提交
119 120 121 122
			goto found;
pass:
		if (size <= BITS_PER_LONG)
			break;
123
		size -= BITS_PER_LONG;
F
Fan Li 已提交
124
		offset = 0;
125
		p++;
126
	}
F
Fan Li 已提交
127 128 129
	return result;
found:
	return result - size + __reverse_ffs(tmp);
130 131 132 133 134 135
}

static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
			unsigned long size, unsigned long offset)
{
	const unsigned long *p = addr + BIT_WORD(offset);
136
	unsigned long result = size;
137 138 139 140 141
	unsigned long tmp;

	if (offset >= size)
		return size;

142
	size -= (offset & ~(BITS_PER_LONG - 1));
143
	offset %= BITS_PER_LONG;
144 145 146 147 148

	while (1) {
		if (*p == ~0UL)
			goto pass;

149
		tmp = __reverse_ulong((unsigned char *)p);
150 151 152 153 154

		if (offset)
			tmp |= ~0UL << (BITS_PER_LONG - offset);
		if (size < BITS_PER_LONG)
			tmp |= ~0UL >> size;
155
		if (tmp != ~0UL)
156 157 158 159
			goto found;
pass:
		if (size <= BITS_PER_LONG)
			break;
160
		size -= BITS_PER_LONG;
161
		offset = 0;
162
		p++;
163
	}
164 165 166
	return result;
found:
	return result - size + __reverse_ffz(tmp);
167 168
}

C
Chao Yu 已提交
169
bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
170 171 172 173 174 175 176
{
	int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
	int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
	int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);

	if (test_opt(sbi, LFS))
		return false;
177
	if (sbi->gc_mode == GC_URGENT)
178
		return true;
D
Daniel Rosenberg 已提交
179 180
	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
		return true;
181 182

	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
C
Chao Yu 已提交
183
			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
184 185
}

C
Chao Yu 已提交
186
void f2fs_register_inmem_page(struct inode *inode, struct page *page)
J
Jaegeuk Kim 已提交
187
{
J
Jaegeuk Kim 已提交
188
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
J
Jaegeuk Kim 已提交
189 190
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct inmem_pages *new;
191

J
Jaegeuk Kim 已提交
192
	f2fs_trace_pid(page);
193

C
Chao Yu 已提交
194 195 196
	set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
	SetPagePrivate(page);

J
Jaegeuk Kim 已提交
197 198 199 200 201
	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);

	/* add atomic page indices to the list */
	new->page = page;
	INIT_LIST_HEAD(&new->list);
C
Chao Yu 已提交
202

J
Jaegeuk Kim 已提交
203 204 205 206
	/* increase reference count with clean state */
	mutex_lock(&fi->inmem_lock);
	get_page(page);
	list_add_tail(&new->list, &fi->inmem_pages);
J
Jaegeuk Kim 已提交
207 208 209 210
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (list_empty(&fi->inmem_ilist))
		list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
211
	inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
J
Jaegeuk Kim 已提交
212
	mutex_unlock(&fi->inmem_lock);
213 214

	trace_f2fs_register_inmem_page(page, INMEM);
J
Jaegeuk Kim 已提交
215 216
}

217 218
static int __revoke_inmem_pages(struct inode *inode,
				struct list_head *head, bool drop, bool recover)
219
{
220
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
221
	struct inmem_pages *cur, *tmp;
222
	int err = 0;
223 224

	list_for_each_entry_safe(cur, tmp, head, list) {
225 226 227 228 229 230
		struct page *page = cur->page;

		if (drop)
			trace_f2fs_commit_inmem_page(page, INMEM_DROP);

		lock_page(page);
231

232
		f2fs_wait_on_page_writeback(page, DATA, true, true);
233

234 235 236 237 238
		if (recover) {
			struct dnode_of_data dn;
			struct node_info ni;

			trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
239
retry:
240
			set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
241 242
			err = f2fs_get_dnode_of_data(&dn, page->index,
								LOOKUP_NODE);
243 244 245 246 247 248
			if (err) {
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					cond_resched();
					goto retry;
				}
249 250 251
				err = -EAGAIN;
				goto next;
			}
252 253 254 255 256 257 258

			err = f2fs_get_node_info(sbi, dn.nid, &ni);
			if (err) {
				f2fs_put_dnode(&dn);
				return err;
			}

259
			if (cur->old_addr == NEW_ADDR) {
C
Chao Yu 已提交
260
				f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
261 262 263
				f2fs_update_data_blkaddr(&dn, NEW_ADDR);
			} else
				f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
264 265 266 267
					cur->old_addr, ni.version, true, true);
			f2fs_put_dnode(&dn);
		}
next:
268
		/* we don't need to invalidate this in the sccessful status */
C
Chao Yu 已提交
269
		if (drop || recover) {
270
			ClearPageUptodate(page);
C
Chao Yu 已提交
271 272
			clear_cold_data(page);
		}
273
		set_page_private(page, 0);
C
Chao Yu 已提交
274
		ClearPagePrivate(page);
275
		f2fs_put_page(page, 1);
276 277 278 279 280

		list_del(&cur->list);
		kmem_cache_free(inmem_entry_slab, cur);
		dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
	}
281
	return err;
282 283
}

C
Chao Yu 已提交
284
void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
J
Jaegeuk Kim 已提交
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
{
	struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
	struct inode *inode;
	struct f2fs_inode_info *fi;
next:
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (list_empty(head)) {
		spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
		return;
	}
	fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
	inode = igrab(&fi->vfs_inode);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);

	if (inode) {
300 301 302 303 304 305 306
		if (gc_failure) {
			if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
				goto drop;
			goto skip;
		}
drop:
		set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
C
Chao Yu 已提交
307
		f2fs_drop_inmem_pages(inode);
J
Jaegeuk Kim 已提交
308 309
		iput(inode);
	}
310
skip:
J
Jaegeuk Kim 已提交
311 312 313 314 315
	congestion_wait(BLK_RW_ASYNC, HZ/50);
	cond_resched();
	goto next;
}

C
Chao Yu 已提交
316
void f2fs_drop_inmem_pages(struct inode *inode)
317
{
J
Jaegeuk Kim 已提交
318
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
319 320 321
	struct f2fs_inode_info *fi = F2FS_I(inode);

	mutex_lock(&fi->inmem_lock);
322
	__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
J
Jaegeuk Kim 已提交
323 324 325 326
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (!list_empty(&fi->inmem_ilist))
		list_del_init(&fi->inmem_ilist);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
327
	mutex_unlock(&fi->inmem_lock);
C
Chao Yu 已提交
328 329

	clear_inode_flag(inode, FI_ATOMIC_FILE);
330
	fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
C
Chao Yu 已提交
331
	stat_dec_atomic_write(inode);
332 333
}

C
Chao Yu 已提交
334
void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
335 336 337 338 339 340 341 342 343 344 345 346 347 348
{
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct list_head *head = &fi->inmem_pages;
	struct inmem_pages *cur = NULL;

	f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));

	mutex_lock(&fi->inmem_lock);
	list_for_each_entry(cur, head, list) {
		if (cur->page == page)
			break;
	}

349
	f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
350 351 352 353 354 355 356 357 358 359 360 361 362 363
	list_del(&cur->list);
	mutex_unlock(&fi->inmem_lock);

	dec_page_count(sbi, F2FS_INMEM_PAGES);
	kmem_cache_free(inmem_entry_slab, cur);

	ClearPageUptodate(page);
	set_page_private(page, 0);
	ClearPagePrivate(page);
	f2fs_put_page(page, 0);

	trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
}

C
Chao Yu 已提交
364
static int __f2fs_commit_inmem_pages(struct inode *inode)
J
Jaegeuk Kim 已提交
365 366 367 368 369
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct inmem_pages *cur, *tmp;
	struct f2fs_io_info fio = {
370
		.sbi = sbi,
C
Chao Yu 已提交
371
		.ino = inode->i_ino,
J
Jaegeuk Kim 已提交
372
		.type = DATA,
M
Mike Christie 已提交
373
		.op = REQ_OP_WRITE,
374
		.op_flags = REQ_SYNC | REQ_PRIO,
C
Chao Yu 已提交
375
		.io_type = FS_DATA_IO,
J
Jaegeuk Kim 已提交
376
	};
C
Chao Yu 已提交
377
	struct list_head revoke_list;
378
	bool submit_bio = false;
379
	int err = 0;
J
Jaegeuk Kim 已提交
380

C
Chao Yu 已提交
381 382
	INIT_LIST_HEAD(&revoke_list);

J
Jaegeuk Kim 已提交
383
	list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
384 385 386 387 388 389
		struct page *page = cur->page;

		lock_page(page);
		if (page->mapping == inode->i_mapping) {
			trace_f2fs_commit_inmem_page(page, INMEM);

390
			f2fs_wait_on_page_writeback(page, DATA, true, true);
391 392

			set_page_dirty(page);
393
			if (clear_page_dirty_for_io(page)) {
394
				inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
395
				f2fs_remove_dirty_inode(inode);
396
			}
397
retry:
398
			fio.page = page;
399
			fio.old_blkaddr = NULL_ADDR;
400
			fio.encrypted_page = NULL;
401
			fio.need_lock = LOCK_DONE;
C
Chao Yu 已提交
402
			err = f2fs_do_write_data_page(&fio);
403
			if (err) {
404 405 406 407 408
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					cond_resched();
					goto retry;
				}
409
				unlock_page(page);
410
				break;
411
			}
412 413
			/* record old blkaddr for revoking */
			cur->old_addr = fio.old_blkaddr;
414
			submit_bio = true;
415 416
		}
		unlock_page(page);
C
Chao Yu 已提交
417
		list_move_tail(&cur->list, &revoke_list);
J
Jaegeuk Kim 已提交
418
	}
419

420 421
	if (submit_bio)
		f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
422

C
Chao Yu 已提交
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
	if (err) {
		/*
		 * try to revoke all committed pages, but still we could fail
		 * due to no memory or other reason, if that happened, EAGAIN
		 * will be returned, which means in such case, transaction is
		 * already not integrity, caller should use journal to do the
		 * recovery or rewrite & commit last transaction. For other
		 * error number, revoking was done by filesystem itself.
		 */
		err = __revoke_inmem_pages(inode, &revoke_list, false, true);

		/* drop all uncommitted pages */
		__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
	} else {
		__revoke_inmem_pages(inode, &revoke_list, false, false);
	}
439

440 441 442
	return err;
}

C
Chao Yu 已提交
443
int f2fs_commit_inmem_pages(struct inode *inode)
444 445 446
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct f2fs_inode_info *fi = F2FS_I(inode);
447
	int err;
448 449 450

	f2fs_balance_fs(sbi, true);

451 452 453
	down_write(&fi->i_gc_rwsem[WRITE]);

	f2fs_lock_op(sbi);
C
Chao Yu 已提交
454 455
	set_inode_flag(inode, FI_ATOMIC_COMMIT);

456
	mutex_lock(&fi->inmem_lock);
C
Chao Yu 已提交
457
	err = __f2fs_commit_inmem_pages(inode);
458

J
Jaegeuk Kim 已提交
459 460 461 462
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (!list_empty(&fi->inmem_ilist))
		list_del_init(&fi->inmem_ilist);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
J
Jaegeuk Kim 已提交
463 464
	mutex_unlock(&fi->inmem_lock);

C
Chao Yu 已提交
465 466
	clear_inode_flag(inode, FI_ATOMIC_COMMIT);

467
	f2fs_unlock_op(sbi);
468 469
	up_write(&fi->i_gc_rwsem[WRITE]);

470
	return err;
J
Jaegeuk Kim 已提交
471 472
}

J
Jaegeuk Kim 已提交
473
/*
J
Jaegeuk Kim 已提交
474 475 476
 * This function balances dirty node and dentry pages.
 * In addition, it controls garbage collection.
 */
J
Jaegeuk Kim 已提交
477
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
J
Jaegeuk Kim 已提交
478
{
479 480
	if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
		f2fs_show_injection_info(FAULT_CHECKPOINT);
481
		f2fs_stop_checkpoint(sbi, false);
482
	}
483

484
	/* balance_fs_bg is able to be pending */
J
Jaegeuk Kim 已提交
485
	if (need && excess_cached_nats(sbi))
486 487
		f2fs_balance_fs_bg(sbi);

D
Daniel Rosenberg 已提交
488 489 490
	if (f2fs_is_checkpoint_ready(sbi))
		return;

J
Jaegeuk Kim 已提交
491
	/*
492 493
	 * We should do GC or end up with checkpoint, if there are so many dirty
	 * dir/node pages without enough free segments.
J
Jaegeuk Kim 已提交
494
	 */
495
	if (has_not_enough_free_secs(sbi, 0, 0)) {
J
Jaegeuk Kim 已提交
496
		mutex_lock(&sbi->gc_mutex);
497
		f2fs_gc(sbi, false, false, NULL_SEGNO);
J
Jaegeuk Kim 已提交
498 499 500
	}
}

501 502
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
{
503 504 505
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		return;

C
Chao Yu 已提交
506
	/* try to shrink extent cache when there is no enough memory */
C
Chao Yu 已提交
507
	if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
J
Jaegeuk Kim 已提交
508
		f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
C
Chao Yu 已提交
509

J
Jaegeuk Kim 已提交
510
	/* check the # of cached NAT entries */
C
Chao Yu 已提交
511 512
	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
		f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
J
Jaegeuk Kim 已提交
513

C
Chao Yu 已提交
514 515
	if (!f2fs_available_free_memory(sbi, FREE_NIDS))
		f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
516
	else
C
Chao Yu 已提交
517
		f2fs_build_free_nids(sbi, false, false);
C
Chao Yu 已提交
518

519
	if (!is_idle(sbi, REQ_TIME) &&
520
		(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
J
Jaegeuk Kim 已提交
521
		return;
C
Chao Yu 已提交
522

J
Jaegeuk Kim 已提交
523
	/* checkpoint is the only way to shrink partial cached entries */
C
Chao Yu 已提交
524 525
	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
			!f2fs_available_free_memory(sbi, INO_ENTRIES) ||
526 527
			excess_prefree_segs(sbi) ||
			excess_dirty_nats(sbi) ||
528
			excess_dirty_nodes(sbi) ||
J
Jaegeuk Kim 已提交
529
			f2fs_time_over(sbi, CP_TIME)) {
C
Chao Yu 已提交
530 531 532 533
		if (test_opt(sbi, DATA_FLUSH)) {
			struct blk_plug plug;

			blk_start_plug(&plug);
C
Chao Yu 已提交
534
			f2fs_sync_dirty_inodes(sbi, FILE_INODE);
C
Chao Yu 已提交
535 536
			blk_finish_plug(&plug);
		}
537
		f2fs_sync_fs(sbi->sb, true);
538
		stat_inc_bg_cp_count(sbi->stat_info);
C
Chao Yu 已提交
539
	}
540 541
}

542 543
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
				struct block_device *bdev)
J
Jaegeuk Kim 已提交
544
{
545
	struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
J
Jaegeuk Kim 已提交
546 547
	int ret;

548
	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
549
	bio_set_dev(bio, bdev);
J
Jaegeuk Kim 已提交
550 551
	ret = submit_bio_wait(bio);
	bio_put(bio);
552 553 554

	trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
				test_opt(sbi, FLUSH_MERGE), ret);
J
Jaegeuk Kim 已提交
555 556 557
	return ret;
}

C
Chao Yu 已提交
558
static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
J
Jaegeuk Kim 已提交
559
{
C
Chao Yu 已提交
560
	int ret = 0;
J
Jaegeuk Kim 已提交
561 562
	int i;

C
Chao Yu 已提交
563 564
	if (!sbi->s_ndevs)
		return __submit_flush_wait(sbi, sbi->sb->s_bdev);
565

C
Chao Yu 已提交
566
	for (i = 0; i < sbi->s_ndevs; i++) {
C
Chao Yu 已提交
567
		if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
C
Chao Yu 已提交
568
			continue;
569 570 571
		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
		if (ret)
			break;
J
Jaegeuk Kim 已提交
572 573 574 575
	}
	return ret;
}

576
static int issue_flush_thread(void *data)
577 578
{
	struct f2fs_sb_info *sbi = data;
579
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
580
	wait_queue_head_t *q = &fcc->flush_wait_queue;
581 582 583 584
repeat:
	if (kthread_should_stop())
		return 0;

585 586
	sb_start_intwrite(sbi->sb);

587
	if (!llist_empty(&fcc->issue_list)) {
588 589 590
		struct flush_cmd *cmd, *next;
		int ret;

591 592 593
		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);

C
Chao Yu 已提交
594 595 596
		cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);

		ret = submit_flush_wait(sbi, cmd->ino);
C
Chao Yu 已提交
597 598
		atomic_inc(&fcc->issued_flush);

599 600
		llist_for_each_entry_safe(cmd, next,
					  fcc->dispatch_list, llnode) {
601 602 603
			cmd->ret = ret;
			complete(&cmd->wait);
		}
604
		fcc->dispatch_list = NULL;
605 606
	}

607 608
	sb_end_intwrite(sbi->sb);

609
	wait_event_interruptible(*q,
610
		kthread_should_stop() || !llist_empty(&fcc->issue_list));
611 612 613
	goto repeat;
}

C
Chao Yu 已提交
614
int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
615
{
616
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
617
	struct flush_cmd cmd;
C
Chao Yu 已提交
618
	int ret;
619

J
Jaegeuk Kim 已提交
620 621 622
	if (test_opt(sbi, NOBARRIER))
		return 0;

C
Chao Yu 已提交
623
	if (!test_opt(sbi, FLUSH_MERGE)) {
624
		atomic_inc(&fcc->queued_flush);
C
Chao Yu 已提交
625
		ret = submit_flush_wait(sbi, ino);
626
		atomic_dec(&fcc->queued_flush);
C
Chao Yu 已提交
627 628 629
		atomic_inc(&fcc->issued_flush);
		return ret;
	}
J
Jaegeuk Kim 已提交
630

631
	if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) {
C
Chao Yu 已提交
632
		ret = submit_flush_wait(sbi, ino);
633
		atomic_dec(&fcc->queued_flush);
C
Chao Yu 已提交
634 635

		atomic_inc(&fcc->issued_flush);
J
Jaegeuk Kim 已提交
636 637
		return ret;
	}
638

C
Chao Yu 已提交
639
	cmd.ino = ino;
640
	init_completion(&cmd.wait);
641

642
	llist_add(&cmd.llnode, &fcc->issue_list);
643

644 645 646 647
	/* update issue_list before we wake up issue_flush thread */
	smp_mb();

	if (waitqueue_active(&fcc->flush_wait_queue))
648
		wake_up(&fcc->flush_wait_queue);
649

650 651
	if (fcc->f2fs_issue_flush) {
		wait_for_completion(&cmd.wait);
652
		atomic_dec(&fcc->queued_flush);
653
	} else {
654 655 656 657 658
		struct llist_node *list;

		list = llist_del_all(&fcc->issue_list);
		if (!list) {
			wait_for_completion(&cmd.wait);
659
			atomic_dec(&fcc->queued_flush);
660 661 662
		} else {
			struct flush_cmd *tmp, *next;

C
Chao Yu 已提交
663
			ret = submit_flush_wait(sbi, ino);
664 665 666 667

			llist_for_each_entry_safe(tmp, next, list, llnode) {
				if (tmp == &cmd) {
					cmd.ret = ret;
668
					atomic_dec(&fcc->queued_flush);
669 670 671 672 673 674
					continue;
				}
				tmp->ret = ret;
				complete(&tmp->wait);
			}
		}
675
	}
676 677

	return cmd.ret;
678 679
}

C
Chao Yu 已提交
680
int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
681 682 683 684 685
{
	dev_t dev = sbi->sb->s_bdev->bd_dev;
	struct flush_cmd_control *fcc;
	int err = 0;

686 687
	if (SM_I(sbi)->fcc_info) {
		fcc = SM_I(sbi)->fcc_info;
688 689
		if (fcc->f2fs_issue_flush)
			return err;
690 691 692
		goto init_thread;
	}

C
Chao Yu 已提交
693
	fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
694 695
	if (!fcc)
		return -ENOMEM;
C
Chao Yu 已提交
696
	atomic_set(&fcc->issued_flush, 0);
697
	atomic_set(&fcc->queued_flush, 0);
698
	init_waitqueue_head(&fcc->flush_wait_queue);
699
	init_llist_head(&fcc->issue_list);
700
	SM_I(sbi)->fcc_info = fcc;
701 702 703
	if (!test_opt(sbi, FLUSH_MERGE))
		return err;

704
init_thread:
705 706 707 708
	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
	if (IS_ERR(fcc->f2fs_issue_flush)) {
		err = PTR_ERR(fcc->f2fs_issue_flush);
709
		kvfree(fcc);
710
		SM_I(sbi)->fcc_info = NULL;
711 712 713 714 715 716
		return err;
	}

	return err;
}

C
Chao Yu 已提交
717
void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
718
{
719
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
720

721 722 723 724 725 726 727
	if (fcc && fcc->f2fs_issue_flush) {
		struct task_struct *flush_thread = fcc->f2fs_issue_flush;

		fcc->f2fs_issue_flush = NULL;
		kthread_stop(flush_thread);
	}
	if (free) {
728
		kvfree(fcc);
729
		SM_I(sbi)->fcc_info = NULL;
730
	}
731 732
}

733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
{
	int ret = 0, i;

	if (!sbi->s_ndevs)
		return 0;

	for (i = 1; i < sbi->s_ndevs; i++) {
		if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
			continue;
		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
		if (ret)
			break;

		spin_lock(&sbi->dev_lock);
		f2fs_clear_bit(i, (char *)&sbi->dirty_device);
		spin_unlock(&sbi->dev_lock);
	}

	return ret;
}

J
Jaegeuk Kim 已提交
755 756 757 758 759 760 761 762 763 764 765 766 767 768
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	/* need not be added */
	if (IS_CURSEG(sbi, segno))
		return;

	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
		dirty_i->nr_dirty[dirty_type]++;

	if (dirty_type == DIRTY) {
		struct seg_entry *sentry = get_seg_entry(sbi, segno);
769
		enum dirty_type t = sentry->type;
770

771 772 773 774
		if (unlikely(t >= DIRTY)) {
			f2fs_bug_on(sbi, 1);
			return;
		}
775 776
		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
			dirty_i->nr_dirty[t]++;
J
Jaegeuk Kim 已提交
777 778 779 780 781 782 783 784 785 786 787 788
	}
}

static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
		dirty_i->nr_dirty[dirty_type]--;

	if (dirty_type == DIRTY) {
789 790 791 792 793
		struct seg_entry *sentry = get_seg_entry(sbi, segno);
		enum dirty_type t = sentry->type;

		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
			dirty_i->nr_dirty[t]--;
794

795
		if (get_valid_blocks(sbi, segno, true) == 0)
796
			clear_bit(GET_SEC_FROM_SEG(sbi, segno),
797
						dirty_i->victim_secmap);
J
Jaegeuk Kim 已提交
798 799 800
	}
}

J
Jaegeuk Kim 已提交
801
/*
J
Jaegeuk Kim 已提交
802 803 804 805
 * Should not occur error such as -ENOMEM.
 * Adding dirty entry into seglist is not critical operation.
 * If a given segment is one of current working segments, it won't be added.
 */
806
static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
807 808
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
D
Daniel Rosenberg 已提交
809
	unsigned short valid_blocks, ckpt_valid_blocks;
J
Jaegeuk Kim 已提交
810 811 812 813 814 815

	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
		return;

	mutex_lock(&dirty_i->seglist_lock);

816
	valid_blocks = get_valid_blocks(sbi, segno, false);
D
Daniel Rosenberg 已提交
817
	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
J
Jaegeuk Kim 已提交
818

D
Daniel Rosenberg 已提交
819 820
	if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
				ckpt_valid_blocks == sbi->blocks_per_seg)) {
J
Jaegeuk Kim 已提交
821 822 823 824 825 826 827 828 829 830 831 832
		__locate_dirty_segment(sbi, segno, PRE);
		__remove_dirty_segment(sbi, segno, DIRTY);
	} else if (valid_blocks < sbi->blocks_per_seg) {
		__locate_dirty_segment(sbi, segno, DIRTY);
	} else {
		/* Recovery routine with SSR needs this */
		__remove_dirty_segment(sbi, segno, DIRTY);
	}

	mutex_unlock(&dirty_i->seglist_lock);
}

D
Daniel Rosenberg 已提交
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	unsigned int segno;

	mutex_lock(&dirty_i->seglist_lock);
	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
		if (get_valid_blocks(sbi, segno, false))
			continue;
		if (IS_CURSEG(sbi, segno))
			continue;
		__locate_dirty_segment(sbi, segno, PRE);
		__remove_dirty_segment(sbi, segno, DIRTY);
	}
	mutex_unlock(&dirty_i->seglist_lock);
}

int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
	block_t holes[2] = {0, 0};	/* DATA and NODE */
	struct seg_entry *se;
	unsigned int segno;

	mutex_lock(&dirty_i->seglist_lock);
	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
		se = get_seg_entry(sbi, segno);
		if (IS_NODESEG(se->type))
			holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
		else
			holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
	}
	mutex_unlock(&dirty_i->seglist_lock);

	if (holes[DATA] > ovp || holes[NODE] > ovp)
		return -EAGAIN;
	return 0;
}

/* This is only used by SBI_CP_DISABLED */
static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	unsigned int segno = 0;

	mutex_lock(&dirty_i->seglist_lock);
	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
		if (get_valid_blocks(sbi, segno, false))
			continue;
		if (get_ckpt_valid_blocks(sbi, segno))
			continue;
		mutex_unlock(&dirty_i->seglist_lock);
		return segno;
	}
	mutex_unlock(&dirty_i->seglist_lock);
	return NULL_SEGNO;
}

893
static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
894 895
		struct block_device *bdev, block_t lstart,
		block_t start, block_t len)
C
Chao Yu 已提交
896
{
897
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
898
	struct list_head *pend_list;
899
	struct discard_cmd *dc;
C
Chao Yu 已提交
900

C
Chao Yu 已提交
901 902 903 904
	f2fs_bug_on(sbi, !len);

	pend_list = &dcc->pend_list[plist_idx(len)];

905 906
	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
	INIT_LIST_HEAD(&dc->list);
907
	dc->bdev = bdev;
908
	dc->lstart = lstart;
909
	dc->start = start;
910
	dc->len = len;
911
	dc->ref = 0;
912
	dc->state = D_PREP;
913
	dc->queued = 0;
914
	dc->error = 0;
915
	init_completion(&dc->wait);
C
Chao Yu 已提交
916
	list_add_tail(&dc->list, pend_list);
917 918
	spin_lock_init(&dc->lock);
	dc->bio_ref = 0;
C
Chao Yu 已提交
919
	atomic_inc(&dcc->discard_cmd_cnt);
C
Chao Yu 已提交
920
	dcc->undiscard_blks += len;
921 922 923 924 925 926 927

	return dc;
}

static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len,
C
Chao Yu 已提交
928 929
				struct rb_node *parent, struct rb_node **p,
				bool leftmost)
930 931 932 933 934 935 936
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *dc;

	dc = __create_discard_cmd(sbi, bdev, lstart, start, len);

	rb_link_node(&dc->rb_node, parent, p);
C
Chao Yu 已提交
937
	rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
938 939

	return dc;
940 941
}

942 943
static void __detach_discard_cmd(struct discard_cmd_control *dcc,
							struct discard_cmd *dc)
944
{
945
	if (dc->state == D_DONE)
946
		atomic_sub(dc->queued, &dcc->queued_discard);
947 948

	list_del(&dc->list);
C
Chao Yu 已提交
949
	rb_erase_cached(&dc->rb_node, &dcc->root);
C
Chao Yu 已提交
950
	dcc->undiscard_blks -= dc->len;
951 952 953 954 955 956 957 958 959 960

	kmem_cache_free(discard_cmd_slab, dc);

	atomic_dec(&dcc->discard_cmd_cnt);
}

static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
							struct discard_cmd *dc)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
961
	unsigned long flags;
962

C
Chao Yu 已提交
963 964
	trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);

965 966 967 968 969 970 971
	spin_lock_irqsave(&dc->lock, flags);
	if (dc->bio_ref) {
		spin_unlock_irqrestore(&dc->lock, flags);
		return;
	}
	spin_unlock_irqrestore(&dc->lock, flags);

972 973
	f2fs_bug_on(sbi, dc->ref);

974 975
	if (dc->error == -EOPNOTSUPP)
		dc->error = 0;
976

977
	if (dc->error)
978 979 980
		printk_ratelimited(
			"%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
			KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
981
	__detach_discard_cmd(dcc, dc);
C
Chao Yu 已提交
982 983
}

984 985 986
static void f2fs_submit_discard_endio(struct bio *bio)
{
	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
987
	unsigned long flags;
988

989
	dc->error = blk_status_to_errno(bio->bi_status);
990 991 992 993 994 995 996 997

	spin_lock_irqsave(&dc->lock, flags);
	dc->bio_ref--;
	if (!dc->bio_ref && dc->state == D_SUBMIT) {
		dc->state = D_DONE;
		complete_all(&dc->wait);
	}
	spin_unlock_irqrestore(&dc->lock, flags);
998 999 1000
	bio_put(bio);
}

W
Wei Yongjun 已提交
1001
static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
				block_t start, block_t end)
{
#ifdef CONFIG_F2FS_CHECK_FS
	struct seg_entry *sentry;
	unsigned int segno;
	block_t blk = start;
	unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
	unsigned long *map;

	while (blk < end) {
		segno = GET_SEGNO(sbi, blk);
		sentry = get_seg_entry(sbi, segno);
		offset = GET_BLKOFF_FROM_SEG0(sbi, blk);

1016 1017 1018 1019
		if (end < START_BLOCK(sbi, segno + 1))
			size = GET_BLKOFF_FROM_SEG0(sbi, end);
		else
			size = max_blocks;
C
Chao Yu 已提交
1020 1021 1022
		map = (unsigned long *)(sentry->cur_valid_map);
		offset = __find_rev_next_bit(map, size, offset);
		f2fs_bug_on(sbi, offset != size);
1023
		blk = START_BLOCK(sbi, segno + 1);
C
Chao Yu 已提交
1024 1025 1026 1027
	}
#endif
}

1028 1029 1030 1031 1032 1033 1034
static void __init_discard_policy(struct f2fs_sb_info *sbi,
				struct discard_policy *dpolicy,
				int discard_type, unsigned int granularity)
{
	/* common policy */
	dpolicy->type = discard_type;
	dpolicy->sync = true;
C
Chao Yu 已提交
1035
	dpolicy->ordered = false;
1036 1037 1038 1039
	dpolicy->granularity = granularity;

	dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
	dpolicy->io_aware_gran = MAX_PLIST_NUM;
1040
	dpolicy->timeout = 0;
1041 1042 1043

	if (discard_type == DPOLICY_BG) {
		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1044
		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1045 1046
		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
		dpolicy->io_aware = true;
1047
		dpolicy->sync = false;
C
Chao Yu 已提交
1048
		dpolicy->ordered = true;
1049 1050 1051 1052 1053 1054
		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
			dpolicy->granularity = 1;
			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
		}
	} else if (discard_type == DPOLICY_FORCE) {
		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1055
		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1056 1057 1058 1059 1060
		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
		dpolicy->io_aware = false;
	} else if (discard_type == DPOLICY_FSTRIM) {
		dpolicy->io_aware = false;
	} else if (discard_type == DPOLICY_UMOUNT) {
1061
		dpolicy->max_requests = UINT_MAX;
1062 1063 1064 1065
		dpolicy->io_aware = false;
	}
}

1066 1067 1068
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len);
1069
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1070
static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1071
						struct discard_policy *dpolicy,
1072 1073
						struct discard_cmd *dc,
						unsigned int *issued)
1074
{
1075 1076 1077 1078
	struct block_device *bdev = dc->bdev;
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_discard_blocks =
			SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1079
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1080 1081 1082
	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
					&(dcc->fstrim_list) : &(dcc->wait_list);
	int flag = dpolicy->sync ? REQ_SYNC : 0;
1083 1084
	block_t lstart, start, len, total_len;
	int err = 0;
1085 1086

	if (dc->state != D_PREP)
1087
		return 0;
1088

1089
	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1090
		return 0;
1091

1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116
	trace_f2fs_issue_discard(bdev, dc->start, dc->len);

	lstart = dc->lstart;
	start = dc->start;
	len = dc->len;
	total_len = len;

	dc->len = 0;

	while (total_len && *issued < dpolicy->max_requests && !err) {
		struct bio *bio = NULL;
		unsigned long flags;
		bool last = true;

		if (len > max_discard_blocks) {
			len = max_discard_blocks;
			last = false;
		}

		(*issued)++;
		if (*issued == dpolicy->max_requests)
			last = true;

		dc->len += len;

1117 1118 1119 1120 1121
		if (time_to_inject(sbi, FAULT_DISCARD)) {
			f2fs_show_injection_info(FAULT_DISCARD);
			err = -EIO;
			goto submit;
		}
1122 1123 1124 1125
		err = __blkdev_issue_discard(bdev,
					SECTOR_FROM_BLOCK(start),
					SECTOR_FROM_BLOCK(len),
					GFP_NOFS, 0, &bio);
1126
submit:
1127
		if (err) {
1128
			spin_lock_irqsave(&dc->lock, flags);
1129
			if (dc->state == D_PARTIAL)
1130 1131 1132
				dc->state = D_SUBMIT;
			spin_unlock_irqrestore(&dc->lock, flags);

1133 1134
			break;
		}
1135

1136
		f2fs_bug_on(sbi, !bio);
1137

1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148
		/*
		 * should keep before submission to avoid D_DONE
		 * right away
		 */
		spin_lock_irqsave(&dc->lock, flags);
		if (last)
			dc->state = D_SUBMIT;
		else
			dc->state = D_PARTIAL;
		dc->bio_ref++;
		spin_unlock_irqrestore(&dc->lock, flags);
1149

1150 1151
		atomic_inc(&dcc->queued_discard);
		dc->queued++;
1152
		list_move_tail(&dc->list, wait_list);
C
Chao Yu 已提交
1153

1154
		/* sanity check on discard range */
1155
		__check_sit_bitmap(sbi, lstart, lstart + len);
1156

1157 1158 1159 1160 1161 1162 1163 1164
		bio->bi_private = dc;
		bio->bi_end_io = f2fs_submit_discard_endio;
		bio->bi_opf |= flag;
		submit_bio(bio);

		atomic_inc(&dcc->issued_discard);

		f2fs_update_iostat(sbi, FS_DISCARD, 1);
1165 1166 1167 1168 1169

		lstart += len;
		start += len;
		total_len -= len;
		len = total_len;
1170
	}
1171

1172
	if (!err && len)
1173
		__update_discard_tree_range(sbi, bdev, lstart, start, len);
1174
	return err;
1175 1176
}

1177 1178 1179 1180 1181
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len,
				struct rb_node **insert_p,
				struct rb_node *insert_parent)
1182
{
1183
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1184
	struct rb_node **p;
1185 1186
	struct rb_node *parent = NULL;
	struct discard_cmd *dc = NULL;
C
Chao Yu 已提交
1187
	bool leftmost = true;
1188 1189 1190 1191 1192 1193

	if (insert_p && insert_parent) {
		parent = insert_parent;
		p = insert_p;
		goto do_insert;
	}
1194

C
Chao Yu 已提交
1195 1196
	p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
							lstart, &leftmost);
1197
do_insert:
C
Chao Yu 已提交
1198 1199
	dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
								p, leftmost);
1200 1201
	if (!dc)
		return NULL;
1202

1203
	return dc;
1204 1205
}

C
Chao Yu 已提交
1206 1207 1208 1209 1210 1211
static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
						struct discard_cmd *dc)
{
	list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
}

1212 1213 1214
static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
				struct discard_cmd *dc, block_t blkaddr)
{
C
Chao Yu 已提交
1215
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1216 1217
	struct discard_info di = dc->di;
	bool modified = false;
1218

1219
	if (dc->state == D_DONE || dc->len == 1) {
1220 1221 1222 1223
		__remove_discard_cmd(sbi, dc);
		return;
	}

C
Chao Yu 已提交
1224 1225
	dcc->undiscard_blks -= di.len;

1226
	if (blkaddr > di.lstart) {
1227
		dc->len = blkaddr - dc->lstart;
C
Chao Yu 已提交
1228
		dcc->undiscard_blks += dc->len;
C
Chao Yu 已提交
1229
		__relocate_discard_cmd(dcc, dc);
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242
		modified = true;
	}

	if (blkaddr < di.lstart + di.len - 1) {
		if (modified) {
			__insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
					di.start + blkaddr + 1 - di.lstart,
					di.lstart + di.len - 1 - blkaddr,
					NULL, NULL);
		} else {
			dc->lstart++;
			dc->len--;
			dc->start++;
C
Chao Yu 已提交
1243
			dcc->undiscard_blks += dc->len;
C
Chao Yu 已提交
1244
			__relocate_discard_cmd(dcc, dc);
1245
		}
1246 1247 1248
	}
}

1249 1250 1251
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len)
C
Chao Yu 已提交
1252
{
1253
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1254 1255 1256 1257
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct discard_cmd *dc;
	struct discard_info di = {0};
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
1258 1259 1260
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_discard_blocks =
			SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1261
	block_t end = lstart + len;
C
Chao Yu 已提交
1262

C
Chao Yu 已提交
1263
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1264 1265 1266
					NULL, lstart,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
C
Chao Yu 已提交
1267
					&insert_p, &insert_parent, true, NULL);
1268 1269 1270 1271 1272 1273 1274 1275
	if (dc)
		prev_dc = dc;

	if (!prev_dc) {
		di.lstart = lstart;
		di.len = next_dc ? next_dc->lstart - lstart : len;
		di.len = min(di.len, len);
		di.start = start;
C
Chao Yu 已提交
1276
	}
1277

1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301
	while (1) {
		struct rb_node *node;
		bool merged = false;
		struct discard_cmd *tdc = NULL;

		if (prev_dc) {
			di.lstart = prev_dc->lstart + prev_dc->len;
			if (di.lstart < lstart)
				di.lstart = lstart;
			if (di.lstart >= end)
				break;

			if (!next_dc || next_dc->lstart > end)
				di.len = end - di.lstart;
			else
				di.len = next_dc->lstart - di.lstart;
			di.start = start + di.lstart - lstart;
		}

		if (!di.len)
			goto next;

		if (prev_dc && prev_dc->state == D_PREP &&
			prev_dc->bdev == bdev &&
1302 1303
			__is_discard_back_mergeable(&di, &prev_dc->di,
							max_discard_blocks)) {
1304
			prev_dc->di.len += di.len;
C
Chao Yu 已提交
1305
			dcc->undiscard_blks += di.len;
C
Chao Yu 已提交
1306
			__relocate_discard_cmd(dcc, prev_dc);
1307 1308 1309 1310 1311 1312 1313
			di = prev_dc->di;
			tdc = prev_dc;
			merged = true;
		}

		if (next_dc && next_dc->state == D_PREP &&
			next_dc->bdev == bdev &&
1314 1315
			__is_discard_front_mergeable(&di, &next_dc->di,
							max_discard_blocks)) {
1316 1317 1318
			next_dc->di.lstart = di.lstart;
			next_dc->di.len += di.len;
			next_dc->di.start = di.start;
C
Chao Yu 已提交
1319
			dcc->undiscard_blks += di.len;
C
Chao Yu 已提交
1320
			__relocate_discard_cmd(dcc, next_dc);
1321 1322 1323
			if (tdc)
				__remove_discard_cmd(sbi, tdc);
			merged = true;
1324
		}
1325

1326
		if (!merged) {
1327 1328
			__insert_discard_tree(sbi, bdev, di.lstart, di.start,
							di.len, NULL, NULL);
1329
		}
1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344
 next:
		prev_dc = next_dc;
		if (!prev_dc)
			break;

		node = rb_next(&prev_dc->rb_node);
		next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
	}
}

static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
{
	block_t lblkstart = blkstart;

C
Chao Yu 已提交
1345
	trace_f2fs_queue_discard(bdev, blkstart, blklen);
1346 1347 1348 1349 1350 1351

	if (sbi->s_ndevs) {
		int devi = f2fs_target_device_index(sbi, blkstart);

		blkstart -= FDEV(devi).start_blk;
	}
1352
	mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1353
	__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1354
	mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1355 1356 1357
	return 0;
}

C
Chao Yu 已提交
1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374
static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
					struct discard_policy *dpolicy)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
	struct discard_cmd *dc;
	struct blk_plug plug;
	unsigned int pos = dcc->next_pos;
	unsigned int issued = 0;
	bool io_interrupted = false;

	mutex_lock(&dcc->cmd_lock);
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
					NULL, pos,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
C
Chao Yu 已提交
1375
					&insert_p, &insert_parent, true, NULL);
C
Chao Yu 已提交
1376 1377 1378 1379 1380 1381 1382
	if (!dc)
		dc = next_dc;

	blk_start_plug(&plug);

	while (dc) {
		struct rb_node *node;
1383
		int err = 0;
C
Chao Yu 已提交
1384 1385 1386 1387

		if (dc->state != D_PREP)
			goto next;

1388
		if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
C
Chao Yu 已提交
1389 1390 1391 1392 1393
			io_interrupted = true;
			break;
		}

		dcc->next_pos = dc->lstart + dc->len;
1394
		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
C
Chao Yu 已提交
1395

1396
		if (issued >= dpolicy->max_requests)
C
Chao Yu 已提交
1397 1398 1399
			break;
next:
		node = rb_next(&dc->rb_node);
1400 1401
		if (err)
			__remove_discard_cmd(sbi, dc);
C
Chao Yu 已提交
1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416 1417
		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
	}

	blk_finish_plug(&plug);

	if (!dc)
		dcc->next_pos = 0;

	mutex_unlock(&dcc->cmd_lock);

	if (!issued && io_interrupted)
		issued = -1;

	return issued;
}

C
Chao Yu 已提交
1418 1419
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
					struct discard_policy *dpolicy)
C
Chao Yu 已提交
1420 1421 1422 1423 1424
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *pend_list;
	struct discard_cmd *dc, *tmp;
	struct blk_plug plug;
1425
	int i, issued = 0;
1426
	bool io_interrupted = false;
C
Chao Yu 已提交
1427

1428 1429 1430
	if (dpolicy->timeout != 0)
		f2fs_update_time(sbi, dpolicy->timeout);

C
Chao Yu 已提交
1431
	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
1432 1433 1434 1435
		if (dpolicy->timeout != 0 &&
				f2fs_time_over(sbi, dpolicy->timeout))
			break;

C
Chao Yu 已提交
1436 1437
		if (i + 1 < dpolicy->granularity)
			break;
C
Chao Yu 已提交
1438 1439 1440 1441

		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
			return __issue_discard_cmd_orderly(sbi, dpolicy);

C
Chao Yu 已提交
1442
		pend_list = &dcc->pend_list[i];
1443 1444

		mutex_lock(&dcc->cmd_lock);
1445 1446
		if (list_empty(pend_list))
			goto next;
1447 1448 1449
		if (unlikely(dcc->rbtree_check))
			f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
								&dcc->root));
1450
		blk_start_plug(&plug);
C
Chao Yu 已提交
1451 1452 1453
		list_for_each_entry_safe(dc, tmp, pend_list, list) {
			f2fs_bug_on(sbi, dc->state != D_PREP);

C
Chao Yu 已提交
1454
			if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1455
						!is_idle(sbi, DISCARD_TIME)) {
1456
				io_interrupted = true;
1457
				break;
1458
			}
1459

1460
			__submit_discard_cmd(sbi, dpolicy, dc, &issued);
1461

1462
			if (issued >= dpolicy->max_requests)
1463
				break;
C
Chao Yu 已提交
1464
		}
1465
		blk_finish_plug(&plug);
1466
next:
1467 1468
		mutex_unlock(&dcc->cmd_lock);

1469
		if (issued >= dpolicy->max_requests || io_interrupted)
1470
			break;
C
Chao Yu 已提交
1471
	}
1472

1473 1474 1475
	if (!issued && io_interrupted)
		issued = -1;

1476 1477 1478
	return issued;
}

1479
static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1480 1481 1482 1483 1484
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *pend_list;
	struct discard_cmd *dc, *tmp;
	int i;
1485
	bool dropped = false;
1486 1487 1488 1489 1490 1491 1492

	mutex_lock(&dcc->cmd_lock);
	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
		pend_list = &dcc->pend_list[i];
		list_for_each_entry_safe(dc, tmp, pend_list, list) {
			f2fs_bug_on(sbi, dc->state != D_PREP);
			__remove_discard_cmd(sbi, dc);
1493
			dropped = true;
1494 1495 1496
		}
	}
	mutex_unlock(&dcc->cmd_lock);
1497 1498

	return dropped;
C
Chao Yu 已提交
1499 1500
}

C
Chao Yu 已提交
1501
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1502 1503 1504 1505
{
	__drop_discard_cmd(sbi);
}

1506
static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1507 1508 1509
							struct discard_cmd *dc)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1510
	unsigned int len = 0;
C
Chao Yu 已提交
1511 1512 1513 1514 1515

	wait_for_completion_io(&dc->wait);
	mutex_lock(&dcc->cmd_lock);
	f2fs_bug_on(sbi, dc->state != D_DONE);
	dc->ref--;
1516 1517 1518
	if (!dc->ref) {
		if (!dc->error)
			len = dc->len;
C
Chao Yu 已提交
1519
		__remove_discard_cmd(sbi, dc);
1520
	}
C
Chao Yu 已提交
1521
	mutex_unlock(&dcc->cmd_lock);
1522 1523

	return len;
C
Chao Yu 已提交
1524 1525
}

1526
static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1527 1528
						struct discard_policy *dpolicy,
						block_t start, block_t end)
C
Chao Yu 已提交
1529 1530
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1531 1532
	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
					&(dcc->fstrim_list) : &(dcc->wait_list);
C
Chao Yu 已提交
1533
	struct discard_cmd *dc, *tmp;
1534
	bool need_wait;
1535
	unsigned int trimmed = 0;
1536 1537 1538

next:
	need_wait = false;
C
Chao Yu 已提交
1539 1540 1541

	mutex_lock(&dcc->cmd_lock);
	list_for_each_entry_safe(dc, tmp, wait_list, list) {
1542 1543
		if (dc->lstart + dc->len <= start || end <= dc->lstart)
			continue;
C
Chao Yu 已提交
1544
		if (dc->len < dpolicy->granularity)
1545
			continue;
C
Chao Yu 已提交
1546
		if (dc->state == D_DONE && !dc->ref) {
C
Chao Yu 已提交
1547
			wait_for_completion_io(&dc->wait);
1548 1549
			if (!dc->error)
				trimmed += dc->len;
C
Chao Yu 已提交
1550
			__remove_discard_cmd(sbi, dc);
1551 1552 1553 1554
		} else {
			dc->ref++;
			need_wait = true;
			break;
C
Chao Yu 已提交
1555 1556 1557
		}
	}
	mutex_unlock(&dcc->cmd_lock);
1558 1559

	if (need_wait) {
1560
		trimmed += __wait_one_discard_bio(sbi, dc);
1561 1562
		goto next;
	}
1563 1564

	return trimmed;
C
Chao Yu 已提交
1565 1566
}

1567
static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1568
						struct discard_policy *dpolicy)
1569
{
1570
	struct discard_policy dp;
1571
	unsigned int discard_blks;
1572

1573 1574
	if (dpolicy)
		return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1575 1576

	/* wait all */
1577
	__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1578
	discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1579
	__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1580 1581 1582
	discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);

	return discard_blks;
1583 1584
}

1585
/* This should be covered by global mutex, &sit_i->sentry_lock */
W
Wei Yongjun 已提交
1586
static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1587 1588 1589
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *dc;
1590
	bool need_wait = false;
1591 1592

	mutex_lock(&dcc->cmd_lock);
C
Chao Yu 已提交
1593 1594
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
							NULL, blkaddr);
1595
	if (dc) {
1596 1597 1598 1599 1600 1601
		if (dc->state == D_PREP) {
			__punch_discard_cmd(sbi, dc, blkaddr);
		} else {
			dc->ref++;
			need_wait = true;
		}
C
Chao Yu 已提交
1602
	}
C
Chao Yu 已提交
1603
	mutex_unlock(&dcc->cmd_lock);
1604

C
Chao Yu 已提交
1605 1606
	if (need_wait)
		__wait_one_discard_bio(sbi, dc);
C
Chao Yu 已提交
1607 1608
}

C
Chao Yu 已提交
1609
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1610 1611 1612 1613 1614 1615 1616 1617
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;

	if (dcc && dcc->f2fs_issue_discard) {
		struct task_struct *discard_thread = dcc->f2fs_issue_discard;

		dcc->f2fs_issue_discard = NULL;
		kthread_stop(discard_thread);
1618
	}
C
Chao Yu 已提交
1619 1620
}

1621
/* This comes from f2fs_put_super */
1622
bool f2fs_issue_discard_timeout(struct f2fs_sb_info *sbi)
1623 1624
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1625
	struct discard_policy dpolicy;
1626
	bool dropped;
1627

1628 1629
	__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
					dcc->discard_granularity);
1630
	dpolicy.timeout = UMOUNT_DISCARD_TIMEOUT;
C
Chao Yu 已提交
1631
	__issue_discard_cmd(sbi, &dpolicy);
1632 1633
	dropped = __drop_discard_cmd(sbi);

1634 1635
	/* just to make sure there is no pending discard commands */
	__wait_all_discard_cmd(sbi, NULL);
1636 1637

	f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1638
	return dropped;
1639 1640
}

1641 1642 1643 1644 1645
static int issue_discard_thread(void *data)
{
	struct f2fs_sb_info *sbi = data;
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	wait_queue_head_t *q = &dcc->discard_wait_queue;
C
Chao Yu 已提交
1646
	struct discard_policy dpolicy;
1647 1648
	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
	int issued;
1649

1650
	set_freezable();
1651

1652
	do {
1653
		__init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
C
Chao Yu 已提交
1654 1655
					dcc->discard_granularity);

1656 1657 1658 1659
		wait_event_interruptible_timeout(*q,
				kthread_should_stop() || freezing(current) ||
				dcc->discard_wake,
				msecs_to_jiffies(wait_ms));
S
Sheng Yong 已提交
1660 1661 1662 1663

		if (dcc->discard_wake)
			dcc->discard_wake = 0;

1664 1665 1666 1667
		/* clean up pending candidates before going to sleep */
		if (atomic_read(&dcc->queued_discard))
			__wait_all_discard_cmd(sbi, NULL);

1668 1669
		if (try_to_freeze())
			continue;
1670 1671
		if (f2fs_readonly(sbi->sb))
			continue;
1672 1673
		if (kthread_should_stop())
			return 0;
1674 1675 1676 1677
		if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
			wait_ms = dpolicy.max_interval;
			continue;
		}
1678

1679
		if (sbi->gc_mode == GC_URGENT)
1680
			__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1681

1682 1683
		sb_start_intwrite(sbi->sb);

C
Chao Yu 已提交
1684
		issued = __issue_discard_cmd(sbi, &dpolicy);
1685
		if (issued > 0) {
C
Chao Yu 已提交
1686 1687
			__wait_all_discard_cmd(sbi, &dpolicy);
			wait_ms = dpolicy.min_interval;
1688
		} else if (issued == -1){
1689 1690
			wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
			if (!wait_ms)
1691
				wait_ms = dpolicy.mid_interval;
1692
		} else {
C
Chao Yu 已提交
1693
			wait_ms = dpolicy.max_interval;
1694
		}
1695

1696
		sb_end_intwrite(sbi->sb);
1697 1698 1699

	} while (!kthread_should_stop());
	return 0;
1700 1701
}

1702
#ifdef CONFIG_BLK_DEV_ZONED
J
Jaegeuk Kim 已提交
1703 1704
static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
1705
{
1706
	sector_t sector, nr_sects;
1707
	block_t lblkstart = blkstart;
J
Jaegeuk Kim 已提交
1708 1709 1710 1711 1712 1713
	int devi = 0;

	if (sbi->s_ndevs) {
		devi = f2fs_target_device_index(sbi, blkstart);
		blkstart -= FDEV(devi).start_blk;
	}
1714 1715 1716 1717 1718 1719

	/*
	 * We need to know the type of the zone: for conventional zones,
	 * use regular discard if the drive supports it. For sequential
	 * zones, reset the zone write pointer.
	 */
J
Jaegeuk Kim 已提交
1720
	switch (get_blkz_type(sbi, bdev, blkstart)) {
1721 1722 1723 1724

	case BLK_ZONE_TYPE_CONVENTIONAL:
		if (!blk_queue_discard(bdev_get_queue(bdev)))
			return 0;
1725
		return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1726 1727
	case BLK_ZONE_TYPE_SEQWRITE_REQ:
	case BLK_ZONE_TYPE_SEQWRITE_PREF:
1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738
		sector = SECTOR_FROM_BLOCK(blkstart);
		nr_sects = SECTOR_FROM_BLOCK(blklen);

		if (sector & (bdev_zone_sectors(bdev) - 1) ||
				nr_sects != bdev_zone_sectors(bdev)) {
			f2fs_msg(sbi->sb, KERN_INFO,
				"(%d) %s: Unaligned discard attempted (block %x + %x)",
				devi, sbi->s_ndevs ? FDEV(devi).path: "",
				blkstart, blklen);
			return -EIO;
		}
1739
		trace_f2fs_issue_reset_zone(bdev, blkstart);
1740 1741 1742 1743 1744 1745 1746 1747 1748
		return blkdev_reset_zones(bdev, sector,
					  nr_sects, GFP_NOFS);
	default:
		/* Unknown zone type: broken device ? */
		return -EIO;
	}
}
#endif

J
Jaegeuk Kim 已提交
1749 1750 1751 1752
static int __issue_discard_async(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
1753
	if (f2fs_sb_has_blkzoned(sbi) &&
J
Jaegeuk Kim 已提交
1754 1755 1756
				bdev_zoned_model(bdev) != BLK_ZONED_NONE)
		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
1757
	return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
J
Jaegeuk Kim 已提交
1758 1759
}

1760
static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1761 1762
				block_t blkstart, block_t blklen)
{
J
Jaegeuk Kim 已提交
1763 1764
	sector_t start = blkstart, len = 0;
	struct block_device *bdev;
1765 1766 1767
	struct seg_entry *se;
	unsigned int offset;
	block_t i;
J
Jaegeuk Kim 已提交
1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786
	int err = 0;

	bdev = f2fs_target_device(sbi, blkstart, NULL);

	for (i = blkstart; i < blkstart + blklen; i++, len++) {
		if (i != start) {
			struct block_device *bdev2 =
				f2fs_target_device(sbi, i, NULL);

			if (bdev2 != bdev) {
				err = __issue_discard_async(sbi, bdev,
						start, len);
				if (err)
					return err;
				bdev = bdev2;
				start = i;
				len = 0;
			}
		}
1787 1788 1789 1790 1791 1792 1793

		se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
		offset = GET_BLKOFF_FROM_SEG0(sbi, i);

		if (!f2fs_test_and_set_bit(offset, se->discard_map))
			sbi->discard_blks--;
	}
1794

J
Jaegeuk Kim 已提交
1795 1796 1797
	if (len)
		err = __issue_discard_async(sbi, bdev, start, len);
	return err;
1798 1799
}

1800 1801
static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
							bool check_only)
1802
{
1803 1804
	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
	int max_blocks = sbi->blocks_per_seg;
1805
	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1806 1807
	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1808
	unsigned long *discard_map = (unsigned long *)se->discard_map;
J
Jaegeuk Kim 已提交
1809
	unsigned long *dmap = SIT_I(sbi)->tmp_map;
1810
	unsigned int start = 0, end = -1;
1811
	bool force = (cpc->reason & CP_DISCARD);
C
Chao Yu 已提交
1812
	struct discard_entry *de = NULL;
1813
	struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1814 1815
	int i;

1816
	if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1817
		return false;
1818

1819
	if (!force) {
1820
		if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1821 1822
			SM_I(sbi)->dcc_info->nr_discards >=
				SM_I(sbi)->dcc_info->max_discards)
1823
			return false;
1824 1825
	}

1826 1827
	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
	for (i = 0; i < entries; i++)
1828
		dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1829
				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1830

1831 1832
	while (force || SM_I(sbi)->dcc_info->nr_discards <=
				SM_I(sbi)->dcc_info->max_discards) {
1833 1834 1835 1836 1837
		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
		if (start >= max_blocks)
			break;

		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1838 1839 1840 1841
		if (force && start && end != max_blocks
					&& (end - start) < cpc->trim_minlen)
			continue;

1842 1843 1844
		if (check_only)
			return true;

C
Chao Yu 已提交
1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
		if (!de) {
			de = f2fs_kmem_cache_alloc(discard_entry_slab,
								GFP_F2FS_ZERO);
			de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
			list_add_tail(&de->list, head);
		}

		for (i = start; i < end; i++)
			__set_bit_le(i, (void *)de->discard_map);

		SM_I(sbi)->dcc_info->nr_discards += end - start;
1856
	}
1857
	return false;
1858 1859
}

1860 1861 1862 1863 1864 1865
static void release_discard_addr(struct discard_entry *entry)
{
	list_del(&entry->list);
	kmem_cache_free(discard_entry_slab, entry);
}

C
Chao Yu 已提交
1866
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1867
{
1868
	struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1869 1870 1871
	struct discard_entry *entry, *this;

	/* drop caches */
1872 1873
	list_for_each_entry_safe(entry, this, head, list)
		release_discard_addr(entry);
1874 1875
}

J
Jaegeuk Kim 已提交
1876
/*
C
Chao Yu 已提交
1877
 * Should call f2fs_clear_prefree_segments after checkpoint is done.
J
Jaegeuk Kim 已提交
1878 1879 1880 1881
 */
static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1882
	unsigned int segno;
J
Jaegeuk Kim 已提交
1883 1884

	mutex_lock(&dirty_i->seglist_lock);
1885
	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
1886 1887 1888 1889
		__set_test_and_free(sbi, segno);
	mutex_unlock(&dirty_i->seglist_lock);
}

C
Chao Yu 已提交
1890 1891
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
						struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1892
{
1893 1894
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *head = &dcc->entry_list;
1895
	struct discard_entry *entry, *this;
J
Jaegeuk Kim 已提交
1896
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1897 1898
	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
	unsigned int start = 0, end = -1;
1899
	unsigned int secno, start_segno;
1900
	bool force = (cpc->reason & CP_DISCARD);
1901
	bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
J
Jaegeuk Kim 已提交
1902 1903

	mutex_lock(&dirty_i->seglist_lock);
1904

J
Jaegeuk Kim 已提交
1905
	while (1) {
1906
		int i;
1907 1908 1909

		if (need_align && end != -1)
			end--;
1910 1911
		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
		if (start >= MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
1912
			break;
1913 1914
		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
								start + 1);
1915

1916 1917 1918 1919
		if (need_align) {
			start = rounddown(start, sbi->segs_per_sec);
			end = roundup(end, sbi->segs_per_sec);
		}
1920

1921 1922 1923 1924
		for (i = start; i < end; i++) {
			if (test_and_clear_bit(i, prefree_map))
				dirty_i->nr_dirty[PRE]--;
		}
1925

1926
		if (!f2fs_realtime_discard_enable(sbi))
1927
			continue;
J
Jaegeuk Kim 已提交
1928

1929 1930 1931 1932
		if (force && start >= cpc->trim_start &&
					(end - 1) <= cpc->trim_end)
				continue;

1933
		if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
1934
			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1935
				(end - start) << sbi->log_blocks_per_seg);
1936 1937 1938
			continue;
		}
next:
1939 1940
		secno = GET_SEC_FROM_SEG(sbi, start);
		start_segno = GET_SEG_FROM_SEC(sbi, secno);
1941
		if (!IS_CURSEC(sbi, secno) &&
1942
			!get_valid_blocks(sbi, start, true))
1943 1944 1945 1946 1947 1948
			f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
				sbi->segs_per_sec << sbi->log_blocks_per_seg);

		start = start_segno + sbi->segs_per_sec;
		if (start < end)
			goto next;
1949 1950
		else
			end = start - 1;
J
Jaegeuk Kim 已提交
1951 1952
	}
	mutex_unlock(&dirty_i->seglist_lock);
1953 1954

	/* send small discards */
1955
	list_for_each_entry_safe(entry, this, head, list) {
C
Chao Yu 已提交
1956 1957 1958 1959 1960 1961 1962 1963 1964
		unsigned int cur_pos = 0, next_pos, len, total_len = 0;
		bool is_valid = test_bit_le(0, entry->discard_map);

find_next:
		if (is_valid) {
			next_pos = find_next_zero_bit_le(entry->discard_map,
					sbi->blocks_per_seg, cur_pos);
			len = next_pos - cur_pos;

1965
			if (f2fs_sb_has_blkzoned(sbi) ||
1966
			    (force && len < cpc->trim_minlen))
C
Chao Yu 已提交
1967 1968 1969 1970 1971 1972 1973 1974 1975
				goto skip;

			f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
									len);
			total_len += len;
		} else {
			next_pos = find_next_bit_le(entry->discard_map,
					sbi->blocks_per_seg, cur_pos);
		}
1976
skip:
C
Chao Yu 已提交
1977 1978 1979 1980 1981 1982
		cur_pos = next_pos;
		is_valid = !is_valid;

		if (cur_pos < sbi->blocks_per_seg)
			goto find_next;

1983
		release_discard_addr(entry);
1984
		dcc->nr_discards -= total_len;
1985
	}
C
Chao Yu 已提交
1986

1987
	wake_up_discard_thread(sbi, false);
J
Jaegeuk Kim 已提交
1988 1989
}

1990
static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1991
{
1992
	dev_t dev = sbi->sb->s_bdev->bd_dev;
1993
	struct discard_cmd_control *dcc;
C
Chao Yu 已提交
1994
	int err = 0, i;
1995 1996 1997 1998 1999 2000

	if (SM_I(sbi)->dcc_info) {
		dcc = SM_I(sbi)->dcc_info;
		goto init_thread;
	}

C
Chao Yu 已提交
2001
	dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
2002 2003 2004
	if (!dcc)
		return -ENOMEM;

2005
	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
2006
	INIT_LIST_HEAD(&dcc->entry_list);
C
Chao Yu 已提交
2007
	for (i = 0; i < MAX_PLIST_NUM; i++)
C
Chao Yu 已提交
2008
		INIT_LIST_HEAD(&dcc->pend_list[i]);
2009
	INIT_LIST_HEAD(&dcc->wait_list);
2010
	INIT_LIST_HEAD(&dcc->fstrim_list);
2011
	mutex_init(&dcc->cmd_lock);
C
Chao Yu 已提交
2012
	atomic_set(&dcc->issued_discard, 0);
2013
	atomic_set(&dcc->queued_discard, 0);
C
Chao Yu 已提交
2014
	atomic_set(&dcc->discard_cmd_cnt, 0);
2015
	dcc->nr_discards = 0;
C
Chao Yu 已提交
2016
	dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
C
Chao Yu 已提交
2017
	dcc->undiscard_blks = 0;
C
Chao Yu 已提交
2018
	dcc->next_pos = 0;
C
Chao Yu 已提交
2019
	dcc->root = RB_ROOT_CACHED;
2020
	dcc->rbtree_check = false;
2021

2022
	init_waitqueue_head(&dcc->discard_wait_queue);
2023 2024
	SM_I(sbi)->dcc_info = dcc;
init_thread:
2025 2026 2027 2028
	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
	if (IS_ERR(dcc->f2fs_issue_discard)) {
		err = PTR_ERR(dcc->f2fs_issue_discard);
2029
		kvfree(dcc);
2030 2031 2032 2033
		SM_I(sbi)->dcc_info = NULL;
		return err;
	}

2034 2035 2036
	return err;
}

2037
static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2038 2039 2040
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;

2041 2042 2043
	if (!dcc)
		return;

C
Chao Yu 已提交
2044
	f2fs_stop_discard_thread(sbi);
2045

2046
	kvfree(dcc);
2047
	SM_I(sbi)->dcc_info = NULL;
2048 2049
}

2050
static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
2051 2052
{
	struct sit_info *sit_i = SIT_I(sbi);
2053 2054

	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
J
Jaegeuk Kim 已提交
2055
		sit_i->dirty_sentries++;
2056 2057 2058 2059
		return false;
	}

	return true;
J
Jaegeuk Kim 已提交
2060 2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075
}

static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
					unsigned int segno, int modified)
{
	struct seg_entry *se = get_seg_entry(sbi, segno);
	se->type = type;
	if (modified)
		__mark_sit_entry_dirty(sbi, segno);
}

static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
{
	struct seg_entry *se;
	unsigned int segno, offset;
	long int new_vblocks;
2076 2077 2078 2079
	bool exist;
#ifdef CONFIG_F2FS_CHECK_FS
	bool mir_exist;
#endif
J
Jaegeuk Kim 已提交
2080 2081 2082 2083 2084

	segno = GET_SEGNO(sbi, blkaddr);

	se = get_seg_entry(sbi, segno);
	new_vblocks = se->valid_blocks + del;
J
Jaegeuk Kim 已提交
2085
	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
2086

2087
	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
J
Jaegeuk Kim 已提交
2088 2089 2090
				(new_vblocks > sbi->blocks_per_seg)));

	se->valid_blocks = new_vblocks;
C
Chao Yu 已提交
2091 2092 2093
	se->mtime = get_mtime(sbi, false);
	if (se->mtime > SIT_I(sbi)->max_mtime)
		SIT_I(sbi)->max_mtime = se->mtime;
J
Jaegeuk Kim 已提交
2094 2095 2096

	/* Update valid block bitmap */
	if (del > 0) {
2097
		exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
C
Chao Yu 已提交
2098
#ifdef CONFIG_F2FS_CHECK_FS
2099 2100 2101 2102 2103 2104
		mir_exist = f2fs_test_and_set_bit(offset,
						se->cur_valid_map_mir);
		if (unlikely(exist != mir_exist)) {
			f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
				"when setting bitmap, blk:%u, old bit:%d",
				blkaddr, exist);
2105
			f2fs_bug_on(sbi, 1);
2106
		}
C
Chao Yu 已提交
2107
#endif
2108 2109 2110 2111
		if (unlikely(exist)) {
			f2fs_msg(sbi->sb, KERN_ERR,
				"Bitmap was wrongly set, blk:%u", blkaddr);
			f2fs_bug_on(sbi, 1);
2112 2113
			se->valid_blocks--;
			del = 0;
C
Chao Yu 已提交
2114
		}
2115

2116
		if (!f2fs_test_and_set_bit(offset, se->discard_map))
2117
			sbi->discard_blks--;
2118 2119

		/* don't overwrite by SSR to keep node chain */
D
Daniel Rosenberg 已提交
2120 2121
		if (IS_NODESEG(se->type) &&
				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2122 2123 2124
			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
				se->ckpt_valid_blocks++;
		}
J
Jaegeuk Kim 已提交
2125
	} else {
2126
		exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
C
Chao Yu 已提交
2127
#ifdef CONFIG_F2FS_CHECK_FS
2128 2129 2130 2131 2132 2133
		mir_exist = f2fs_test_and_clear_bit(offset,
						se->cur_valid_map_mir);
		if (unlikely(exist != mir_exist)) {
			f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
				"when clearing bitmap, blk:%u, old bit:%d",
				blkaddr, exist);
2134
			f2fs_bug_on(sbi, 1);
2135
		}
C
Chao Yu 已提交
2136
#endif
2137 2138 2139 2140
		if (unlikely(!exist)) {
			f2fs_msg(sbi->sb, KERN_ERR,
				"Bitmap was wrongly cleared, blk:%u", blkaddr);
			f2fs_bug_on(sbi, 1);
2141 2142
			se->valid_blocks++;
			del = 0;
D
Daniel Rosenberg 已提交
2143 2144 2145 2146 2147 2148 2149 2150 2151
		} else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
			/*
			 * If checkpoints are off, we must not reuse data that
			 * was used in the previous checkpoint. If it was used
			 * before, we must track that to know how much space we
			 * really have.
			 */
			if (f2fs_test_bit(offset, se->ckpt_valid_map))
				sbi->unusable_block_count++;
C
Chao Yu 已提交
2152
		}
2153

2154
		if (f2fs_test_and_clear_bit(offset, se->discard_map))
2155
			sbi->discard_blks++;
J
Jaegeuk Kim 已提交
2156 2157 2158 2159 2160 2161 2162 2163 2164
	}
	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
		se->ckpt_valid_blocks += del;

	__mark_sit_entry_dirty(sbi, segno);

	/* update total number of valid blocks to be written in ckpt area */
	SIT_I(sbi)->written_valid_blocks += del;

2165
	if (__is_large_section(sbi))
J
Jaegeuk Kim 已提交
2166 2167 2168
		get_sec_entry(sbi, segno)->valid_blocks += del;
}

C
Chao Yu 已提交
2169
void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
J
Jaegeuk Kim 已提交
2170 2171 2172 2173
{
	unsigned int segno = GET_SEGNO(sbi, addr);
	struct sit_info *sit_i = SIT_I(sbi);

2174
	f2fs_bug_on(sbi, addr == NULL_ADDR);
J
Jaegeuk Kim 已提交
2175 2176 2177
	if (addr == NEW_ADDR)
		return;

2178 2179
	invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);

J
Jaegeuk Kim 已提交
2180
	/* add it into sit main buffer */
2181
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2182 2183 2184 2185 2186 2187

	update_sit_entry(sbi, addr, -1);

	/* add it into dirty seglist */
	locate_dirty_segment(sbi, segno);

2188
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2189 2190
}

C
Chao Yu 已提交
2191
bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2192 2193 2194 2195 2196 2197
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int segno, offset;
	struct seg_entry *se;
	bool is_cp = false;

2198
	if (!is_valid_data_blkaddr(sbi, blkaddr))
2199 2200
		return true;

2201
	down_read(&sit_i->sentry_lock);
2202 2203 2204 2205 2206 2207 2208 2209

	segno = GET_SEGNO(sbi, blkaddr);
	se = get_seg_entry(sbi, segno);
	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);

	if (f2fs_test_bit(offset, se->ckpt_valid_map))
		is_cp = true;

2210
	up_read(&sit_i->sentry_lock);
2211 2212 2213 2214

	return is_cp;
}

J
Jaegeuk Kim 已提交
2215
/*
J
Jaegeuk Kim 已提交
2216 2217 2218
 * This function should be resided under the curseg_mutex lock
 */
static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2219
					struct f2fs_summary *sum)
J
Jaegeuk Kim 已提交
2220 2221 2222
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	void *addr = curseg->sum_blk;
2223
	addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
J
Jaegeuk Kim 已提交
2224 2225 2226
	memcpy(addr, sum, sizeof(struct f2fs_summary));
}

J
Jaegeuk Kim 已提交
2227
/*
J
Jaegeuk Kim 已提交
2228 2229
 * Calculate the number of current summary pages for writing
 */
C
Chao Yu 已提交
2230
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
J
Jaegeuk Kim 已提交
2231 2232
{
	int valid_sum_count = 0;
2233
	int i, sum_in_page;
J
Jaegeuk Kim 已提交
2234 2235 2236 2237

	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		if (sbi->ckpt->alloc_type[i] == SSR)
			valid_sum_count += sbi->blocks_per_seg;
2238 2239 2240 2241 2242 2243 2244
		else {
			if (for_ra)
				valid_sum_count += le16_to_cpu(
					F2FS_CKPT(sbi)->cur_data_blkoff[i]);
			else
				valid_sum_count += curseg_blkoff(sbi, i);
		}
J
Jaegeuk Kim 已提交
2245 2246
	}

2247
	sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2248 2249
			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
	if (valid_sum_count <= sum_in_page)
J
Jaegeuk Kim 已提交
2250
		return 1;
2251
	else if ((valid_sum_count - sum_in_page) <=
2252
		(PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
J
Jaegeuk Kim 已提交
2253 2254 2255 2256
		return 2;
	return 3;
}

J
Jaegeuk Kim 已提交
2257
/*
J
Jaegeuk Kim 已提交
2258 2259
 * Caller should put this summary page
 */
C
Chao Yu 已提交
2260
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
2261
{
2262
	return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
J
Jaegeuk Kim 已提交
2263 2264
}

C
Chao Yu 已提交
2265 2266
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
					void *src, block_t blk_addr)
J
Jaegeuk Kim 已提交
2267
{
C
Chao Yu 已提交
2268
	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
C
Chao Yu 已提交
2269

2270
	memcpy(page_address(page), src, PAGE_SIZE);
J
Jaegeuk Kim 已提交
2271 2272 2273 2274
	set_page_dirty(page);
	f2fs_put_page(page, 1);
}

C
Chao Yu 已提交
2275 2276 2277
static void write_sum_page(struct f2fs_sb_info *sbi,
			struct f2fs_summary_block *sum_blk, block_t blk_addr)
{
C
Chao Yu 已提交
2278
	f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
C
Chao Yu 已提交
2279 2280
}

2281 2282 2283 2284
static void write_current_sum_page(struct f2fs_sb_info *sbi,
						int type, block_t blk_addr)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
C
Chao Yu 已提交
2285
	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2286 2287 2288 2289
	struct f2fs_summary_block *src = curseg->sum_blk;
	struct f2fs_summary_block *dst;

	dst = (struct f2fs_summary_block *)page_address(page);
2290
	memset(dst, 0, PAGE_SIZE);
2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306

	mutex_lock(&curseg->curseg_mutex);

	down_read(&curseg->journal_rwsem);
	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
	up_read(&curseg->journal_rwsem);

	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);

	mutex_unlock(&curseg->curseg_mutex);

	set_page_dirty(page);
	f2fs_put_page(page, 1);
}

J
Jaegeuk Kim 已提交
2307 2308 2309 2310 2311 2312 2313 2314 2315 2316 2317
static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int segno = curseg->segno + 1;
	struct free_segmap_info *free_i = FREE_I(sbi);

	if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
		return !test_bit(segno, free_i->free_segmap);
	return 0;
}

J
Jaegeuk Kim 已提交
2318
/*
J
Jaegeuk Kim 已提交
2319 2320 2321 2322 2323 2324 2325 2326
 * Find a new segment from the free segments bitmap to right order
 * This function should be returned with success, otherwise BUG
 */
static void get_new_segment(struct f2fs_sb_info *sbi,
			unsigned int *newseg, bool new_sec, int dir)
{
	struct free_segmap_info *free_i = FREE_I(sbi);
	unsigned int segno, secno, zoneno;
2327
	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2328 2329
	unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
	unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
J
Jaegeuk Kim 已提交
2330 2331 2332 2333 2334
	unsigned int left_start = hint;
	bool init = true;
	int go_left = 0;
	int i;

2335
	spin_lock(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
2336 2337 2338

	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
		segno = find_next_zero_bit(free_i->free_segmap,
2339 2340
			GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
		if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
J
Jaegeuk Kim 已提交
2341 2342 2343
			goto got_it;
	}
find_other_zone:
2344 2345
	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
	if (secno >= MAIN_SECS(sbi)) {
J
Jaegeuk Kim 已提交
2346 2347
		if (dir == ALLOC_RIGHT) {
			secno = find_next_zero_bit(free_i->free_secmap,
2348 2349
							MAIN_SECS(sbi), 0);
			f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360 2361 2362 2363
		} else {
			go_left = 1;
			left_start = hint - 1;
		}
	}
	if (go_left == 0)
		goto skip_left;

	while (test_bit(left_start, free_i->free_secmap)) {
		if (left_start > 0) {
			left_start--;
			continue;
		}
		left_start = find_next_zero_bit(free_i->free_secmap,
2364 2365
							MAIN_SECS(sbi), 0);
		f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
2366 2367 2368 2369
		break;
	}
	secno = left_start;
skip_left:
2370 2371
	segno = GET_SEG_FROM_SEC(sbi, secno);
	zoneno = GET_ZONE_FROM_SEC(sbi, secno);
J
Jaegeuk Kim 已提交
2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401 2402

	/* give up on finding another zone */
	if (!init)
		goto got_it;
	if (sbi->secs_per_zone == 1)
		goto got_it;
	if (zoneno == old_zoneno)
		goto got_it;
	if (dir == ALLOC_LEFT) {
		if (!go_left && zoneno + 1 >= total_zones)
			goto got_it;
		if (go_left && zoneno == 0)
			goto got_it;
	}
	for (i = 0; i < NR_CURSEG_TYPE; i++)
		if (CURSEG_I(sbi, i)->zone == zoneno)
			break;

	if (i < NR_CURSEG_TYPE) {
		/* zone is in user, try another */
		if (go_left)
			hint = zoneno * sbi->secs_per_zone - 1;
		else if (zoneno + 1 >= total_zones)
			hint = 0;
		else
			hint = (zoneno + 1) * sbi->secs_per_zone;
		init = false;
		goto find_other_zone;
	}
got_it:
	/* set it as dirty segment in free segmap */
2403
	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
J
Jaegeuk Kim 已提交
2404 2405
	__set_inuse(sbi, segno);
	*newseg = segno;
2406
	spin_unlock(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
2407 2408 2409 2410 2411 2412 2413 2414
}

static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	struct summary_footer *sum_footer;

	curseg->segno = curseg->next_segno;
2415
	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
J
Jaegeuk Kim 已提交
2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427
	curseg->next_blkoff = 0;
	curseg->next_segno = NULL_SEGNO;

	sum_footer = &(curseg->sum_blk->footer);
	memset(sum_footer, 0, sizeof(struct summary_footer));
	if (IS_DATASEG(type))
		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
	if (IS_NODESEG(type))
		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
	__set_sit_entry_type(sbi, type, curseg->segno, modified);
}

2428 2429
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
J
Jaegeuk Kim 已提交
2430
	/* if segs_per_sec is large than 1, we need to keep original policy. */
2431
	if (__is_large_section(sbi))
J
Jaegeuk Kim 已提交
2432 2433
		return CURSEG_I(sbi, type)->segno;

D
Daniel Rosenberg 已提交
2434 2435 2436
	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
		return 0;

2437 2438
	if (test_opt(sbi, NOHEAP) &&
		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
2439 2440
		return 0;

2441 2442
	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2443 2444

	/* find segments from 0 to reuse freed segments */
2445
	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2446 2447
		return 0;

2448 2449 2450
	return CURSEG_I(sbi, type)->segno;
}

J
Jaegeuk Kim 已提交
2451
/*
J
Jaegeuk Kim 已提交
2452 2453 2454 2455 2456 2457 2458 2459 2460 2461
 * Allocate a current working segment.
 * This function always allocates a free segment in LFS manner.
 */
static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int segno = curseg->segno;
	int dir = ALLOC_LEFT;

	write_sum_page(sbi, curseg->sum_blk,
2462
				GET_SUM_BLOCK(sbi, segno));
J
Jaegeuk Kim 已提交
2463 2464 2465 2466 2467 2468
	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
		dir = ALLOC_RIGHT;

	if (test_opt(sbi, NOHEAP))
		dir = ALLOC_RIGHT;

2469
	segno = __get_next_segno(sbi, type);
J
Jaegeuk Kim 已提交
2470 2471 2472 2473 2474 2475 2476 2477 2478 2479
	get_new_segment(sbi, &segno, new_sec, dir);
	curseg->next_segno = segno;
	reset_curseg(sbi, type, 1);
	curseg->alloc_type = LFS;
}

static void __next_free_blkoff(struct f2fs_sb_info *sbi,
			struct curseg_info *seg, block_t start)
{
	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2480
	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
J
Jaegeuk Kim 已提交
2481
	unsigned long *target_map = SIT_I(sbi)->tmp_map;
2482 2483 2484 2485 2486 2487 2488 2489 2490 2491
	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
	int i, pos;

	for (i = 0; i < entries; i++)
		target_map[i] = ckpt_map[i] | cur_map[i];

	pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);

	seg->next_blkoff = pos;
J
Jaegeuk Kim 已提交
2492 2493
}

J
Jaegeuk Kim 已提交
2494
/*
J
Jaegeuk Kim 已提交
2495 2496 2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507
 * If a segment is written by LFS manner, next block offset is just obtained
 * by increasing the current block offset. However, if a segment is written by
 * SSR manner, next block offset obtained by calling __next_free_blkoff
 */
static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
				struct curseg_info *seg)
{
	if (seg->alloc_type == SSR)
		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
	else
		seg->next_blkoff++;
}

J
Jaegeuk Kim 已提交
2508
/*
A
arter97 已提交
2509
 * This function always allocates a used segment(from dirty seglist) by SSR
J
Jaegeuk Kim 已提交
2510 2511
 * manner, so it should recover the existing segment information of valid blocks
 */
2512
static void change_curseg(struct f2fs_sb_info *sbi, int type)
J
Jaegeuk Kim 已提交
2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523 2524 2525 2526 2527 2528 2529 2530 2531 2532
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int new_segno = curseg->next_segno;
	struct f2fs_summary_block *sum_node;
	struct page *sum_page;

	write_sum_page(sbi, curseg->sum_blk,
				GET_SUM_BLOCK(sbi, curseg->segno));
	__set_test_and_inuse(sbi, new_segno);

	mutex_lock(&dirty_i->seglist_lock);
	__remove_dirty_segment(sbi, new_segno, PRE);
	__remove_dirty_segment(sbi, new_segno, DIRTY);
	mutex_unlock(&dirty_i->seglist_lock);

	reset_curseg(sbi, type, 1);
	curseg->alloc_type = SSR;
	__next_free_blkoff(sbi, curseg, 0);

C
Chao Yu 已提交
2533
	sum_page = f2fs_get_sum_page(sbi, new_segno);
2534
	f2fs_bug_on(sbi, IS_ERR(sum_page));
2535 2536 2537
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
	f2fs_put_page(sum_page, 1);
J
Jaegeuk Kim 已提交
2538 2539
}

2540 2541 2542 2543
static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2544
	unsigned segno = NULL_SEGNO;
2545 2546
	int i, cnt;
	bool reversed = false;
2547

C
Chao Yu 已提交
2548
	/* f2fs_need_SSR() already forces to do this */
2549 2550
	if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
		curseg->next_segno = segno;
2551
		return 1;
2552
	}
2553

2554 2555
	/* For node segments, let's do SSR more intensively */
	if (IS_NODESEG(type)) {
2556 2557 2558 2559 2560 2561 2562
		if (type >= CURSEG_WARM_NODE) {
			reversed = true;
			i = CURSEG_COLD_NODE;
		} else {
			i = CURSEG_HOT_NODE;
		}
		cnt = NR_CURSEG_NODE_TYPE;
2563
	} else {
2564 2565 2566 2567 2568 2569 2570
		if (type >= CURSEG_WARM_DATA) {
			reversed = true;
			i = CURSEG_COLD_DATA;
		} else {
			i = CURSEG_HOT_DATA;
		}
		cnt = NR_CURSEG_DATA_TYPE;
2571
	}
2572

2573
	for (; cnt-- > 0; reversed ? i-- : i++) {
2574 2575
		if (i == type)
			continue;
2576 2577
		if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
			curseg->next_segno = segno;
2578
			return 1;
2579
		}
2580
	}
D
Daniel Rosenberg 已提交
2581 2582 2583 2584 2585 2586 2587 2588 2589

	/* find valid_blocks=0 in dirty list */
	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
		segno = get_free_segment(sbi);
		if (segno != NULL_SEGNO) {
			curseg->next_segno = segno;
			return 1;
		}
	}
2590 2591 2592
	return 0;
}

J
Jaegeuk Kim 已提交
2593 2594 2595 2596 2597 2598 2599
/*
 * flush out current segment and replace it with new segment
 * This function should be returned with success, otherwise BUG
 */
static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
						int type, bool force)
{
J
Jaegeuk Kim 已提交
2600 2601
	struct curseg_info *curseg = CURSEG_I(sbi, type);

2602
	if (force)
J
Jaegeuk Kim 已提交
2603
		new_curseg(sbi, type, true);
2604 2605
	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
					type == CURSEG_WARM_NODE)
J
Jaegeuk Kim 已提交
2606
		new_curseg(sbi, type, false);
D
Daniel Rosenberg 已提交
2607 2608
	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
			likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
J
Jaegeuk Kim 已提交
2609
		new_curseg(sbi, type, false);
C
Chao Yu 已提交
2610
	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2611
		change_curseg(sbi, type);
J
Jaegeuk Kim 已提交
2612 2613
	else
		new_curseg(sbi, type, false);
2614

J
Jaegeuk Kim 已提交
2615
	stat_inc_seg_type(sbi, curseg);
J
Jaegeuk Kim 已提交
2616 2617
}

C
Chao Yu 已提交
2618
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
2619
{
2620 2621
	struct curseg_info *curseg;
	unsigned int old_segno;
J
Jaegeuk Kim 已提交
2622 2623
	int i;

2624 2625
	down_write(&SIT_I(sbi)->sentry_lock);

2626 2627 2628 2629 2630 2631
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		curseg = CURSEG_I(sbi, i);
		old_segno = curseg->segno;
		SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
		locate_dirty_segment(sbi, old_segno);
	}
2632 2633

	up_write(&SIT_I(sbi)->sentry_lock);
J
Jaegeuk Kim 已提交
2634 2635 2636 2637 2638 2639
}

static const struct segment_allocation default_salloc_ops = {
	.allocate_segment = allocate_segment_by_default,
};

C
Chao Yu 已提交
2640 2641
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
						struct cp_control *cpc)
2642 2643 2644 2645
{
	__u64 trim_start = cpc->trim_start;
	bool has_candidate = false;

2646
	down_write(&SIT_I(sbi)->sentry_lock);
2647 2648 2649 2650 2651 2652
	for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
		if (add_discard_addrs(sbi, cpc, true)) {
			has_candidate = true;
			break;
		}
	}
2653
	up_write(&SIT_I(sbi)->sentry_lock);
2654 2655 2656 2657 2658

	cpc->trim_start = trim_start;
	return has_candidate;
}

2659
static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2660 2661 2662 2663 2664 2665 2666 2667 2668
					struct discard_policy *dpolicy,
					unsigned int start, unsigned int end)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
	struct discard_cmd *dc;
	struct blk_plug plug;
	int issued;
2669
	unsigned int trimmed = 0;
2670 2671 2672 2673 2674

next:
	issued = 0;

	mutex_lock(&dcc->cmd_lock);
2675 2676 2677
	if (unlikely(dcc->rbtree_check))
		f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
								&dcc->root));
2678

C
Chao Yu 已提交
2679
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2680 2681 2682
					NULL, start,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
C
Chao Yu 已提交
2683
					&insert_p, &insert_parent, true, NULL);
2684 2685 2686 2687 2688 2689 2690
	if (!dc)
		dc = next_dc;

	blk_start_plug(&plug);

	while (dc && dc->lstart <= end) {
		struct rb_node *node;
2691
		int err = 0;
2692 2693 2694 2695 2696 2697 2698 2699 2700

		if (dc->len < dpolicy->granularity)
			goto skip;

		if (dc->state != D_PREP) {
			list_move_tail(&dc->list, &dcc->fstrim_list);
			goto skip;
		}

2701
		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2702

2703
		if (issued >= dpolicy->max_requests) {
2704 2705
			start = dc->lstart + dc->len;

2706 2707 2708
			if (err)
				__remove_discard_cmd(sbi, dc);

2709 2710
			blk_finish_plug(&plug);
			mutex_unlock(&dcc->cmd_lock);
2711
			trimmed += __wait_all_discard_cmd(sbi, NULL);
2712 2713 2714 2715 2716
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			goto next;
		}
skip:
		node = rb_next(&dc->rb_node);
2717 2718
		if (err)
			__remove_discard_cmd(sbi, dc);
2719 2720 2721 2722 2723 2724 2725 2726
		dc = rb_entry_safe(node, struct discard_cmd, rb_node);

		if (fatal_signal_pending(current))
			break;
	}

	blk_finish_plug(&plug);
	mutex_unlock(&dcc->cmd_lock);
2727 2728

	return trimmed;
2729 2730
}

2731 2732
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
{
2733 2734
	__u64 start = F2FS_BYTES_TO_BLK(range->start);
	__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
C
Chao Yu 已提交
2735
	unsigned int start_segno, end_segno;
2736
	block_t start_block, end_block;
2737
	struct cp_control cpc;
C
Chao Yu 已提交
2738
	struct discard_policy dpolicy;
2739
	unsigned long long trimmed = 0;
C
Chao Yu 已提交
2740
	int err = 0;
2741
	bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
2742

2743
	if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2744 2745
		return -EINVAL;

2746 2747
	if (end < MAIN_BLKADDR(sbi))
		goto out;
2748

2749 2750 2751
	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
		f2fs_msg(sbi->sb, KERN_WARNING,
			"Found FS corruption, run fsck to fix.");
2752
		return -EIO;
2753 2754
	}

2755
	/* start/end segment number in main_area */
2756 2757 2758
	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
						GET_SEGNO(sbi, end);
2759 2760 2761 2762
	if (need_align) {
		start_segno = rounddown(start_segno, sbi->segs_per_sec);
		end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
	}
2763

2764
	cpc.reason = CP_DISCARD;
2765
	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
C
Chao Yu 已提交
2766 2767
	cpc.trim_start = start_segno;
	cpc.trim_end = end_segno;
2768

C
Chao Yu 已提交
2769 2770
	if (sbi->discard_blks == 0)
		goto out;
2771

C
Chao Yu 已提交
2772
	mutex_lock(&sbi->gc_mutex);
C
Chao Yu 已提交
2773
	err = f2fs_write_checkpoint(sbi, &cpc);
C
Chao Yu 已提交
2774 2775 2776
	mutex_unlock(&sbi->gc_mutex);
	if (err)
		goto out;
2777

2778 2779 2780 2781 2782 2783
	/*
	 * We filed discard candidates, but actually we don't need to wait for
	 * all of them, since they'll be issued in idle time along with runtime
	 * discard option. User configuration looks like using runtime discard
	 * or periodic fstrim instead of it.
	 */
2784
	if (f2fs_realtime_discard_enable(sbi))
2785 2786 2787 2788 2789 2790
		goto out;

	start_block = START_BLOCK(sbi, start_segno);
	end_block = START_BLOCK(sbi, end_segno + 1);

	__init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2791 2792
	trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
					start_block, end_block);
2793

2794
	trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
2795
					start_block, end_block);
C
Chao Yu 已提交
2796
out:
2797 2798
	if (!err)
		range->len = F2FS_BLK_TO_BYTES(trimmed);
C
Chao Yu 已提交
2799
	return err;
2800 2801
}

J
Jaegeuk Kim 已提交
2802 2803 2804 2805 2806 2807 2808 2809
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	if (curseg->next_blkoff < sbi->blocks_per_seg)
		return true;
	return false;
}

C
Chao Yu 已提交
2810
int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821
{
	switch (hint) {
	case WRITE_LIFE_SHORT:
		return CURSEG_HOT_DATA;
	case WRITE_LIFE_EXTREME:
		return CURSEG_COLD_DATA;
	default:
		return CURSEG_WARM_DATA;
	}
}

2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854
/* This returns write hints for each segment type. This hints will be
 * passed down to block layer. There are mapping tables which depend on
 * the mount option 'whint_mode'.
 *
 * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
 *
 * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
 *
 * User                  F2FS                     Block
 * ----                  ----                     -----
 *                       META                     WRITE_LIFE_NOT_SET
 *                       HOT_NODE                 "
 *                       WARM_NODE                "
 *                       COLD_NODE                "
 * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
 * extension list        "                        "
 *
 * -- buffered io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        "
 * WRITE_LIFE_MEDIUM     "                        "
 * WRITE_LIFE_LONG       "                        "
 *
 * -- direct io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
 * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
 * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
 *
2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878 2879 2880
 * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
 *
 * User                  F2FS                     Block
 * ----                  ----                     -----
 *                       META                     WRITE_LIFE_MEDIUM;
 *                       HOT_NODE                 WRITE_LIFE_NOT_SET
 *                       WARM_NODE                "
 *                       COLD_NODE                WRITE_LIFE_NONE
 * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
 * extension list        "                        "
 *
 * -- buffered io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
 * WRITE_LIFE_NONE       "                        "
 * WRITE_LIFE_MEDIUM     "                        "
 * WRITE_LIFE_LONG       "                        "
 *
 * -- direct io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
 * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
 * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2881 2882
 */

C
Chao Yu 已提交
2883
enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
2884 2885
				enum page_type type, enum temp_type temp)
{
2886
	if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
2887
		if (type == DATA) {
2888
			if (temp == WARM)
2889
				return WRITE_LIFE_NOT_SET;
2890 2891 2892 2893
			else if (temp == HOT)
				return WRITE_LIFE_SHORT;
			else if (temp == COLD)
				return WRITE_LIFE_EXTREME;
2894 2895 2896
		} else {
			return WRITE_LIFE_NOT_SET;
		}
2897
	} else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
2898 2899 2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912
		if (type == DATA) {
			if (temp == WARM)
				return WRITE_LIFE_LONG;
			else if (temp == HOT)
				return WRITE_LIFE_SHORT;
			else if (temp == COLD)
				return WRITE_LIFE_EXTREME;
		} else if (type == NODE) {
			if (temp == WARM || temp == HOT)
				return WRITE_LIFE_NOT_SET;
			else if (temp == COLD)
				return WRITE_LIFE_NONE;
		} else if (type == META) {
			return WRITE_LIFE_MEDIUM;
		}
2913
	}
2914
	return WRITE_LIFE_NOT_SET;
2915 2916
}

2917
static int __get_segment_type_2(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2918
{
2919
	if (fio->type == DATA)
J
Jaegeuk Kim 已提交
2920 2921 2922 2923 2924
		return CURSEG_HOT_DATA;
	else
		return CURSEG_HOT_NODE;
}

2925
static int __get_segment_type_4(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2926
{
2927 2928
	if (fio->type == DATA) {
		struct inode *inode = fio->page->mapping->host;
J
Jaegeuk Kim 已提交
2929 2930 2931 2932 2933 2934

		if (S_ISDIR(inode->i_mode))
			return CURSEG_HOT_DATA;
		else
			return CURSEG_COLD_DATA;
	} else {
2935
		if (IS_DNODE(fio->page) && is_cold_node(fio->page))
2936
			return CURSEG_WARM_NODE;
J
Jaegeuk Kim 已提交
2937 2938 2939 2940 2941
		else
			return CURSEG_COLD_NODE;
	}
}

2942
static int __get_segment_type_6(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2943
{
2944 2945
	if (fio->type == DATA) {
		struct inode *inode = fio->page->mapping->host;
J
Jaegeuk Kim 已提交
2946

2947
		if (is_cold_data(fio->page) || file_is_cold(inode))
J
Jaegeuk Kim 已提交
2948
			return CURSEG_COLD_DATA;
C
Chao Yu 已提交
2949
		if (file_is_hot(inode) ||
2950
				is_inode_flag_set(inode, FI_HOT_DATA) ||
2951 2952
				f2fs_is_atomic_file(inode) ||
				f2fs_is_volatile_file(inode))
2953
			return CURSEG_HOT_DATA;
C
Chao Yu 已提交
2954
		return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
J
Jaegeuk Kim 已提交
2955
	} else {
2956 2957
		if (IS_DNODE(fio->page))
			return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
J
Jaegeuk Kim 已提交
2958
						CURSEG_HOT_NODE;
2959
		return CURSEG_COLD_NODE;
J
Jaegeuk Kim 已提交
2960 2961 2962
	}
}

2963
static int __get_segment_type(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2964
{
J
Jaegeuk Kim 已提交
2965 2966
	int type = 0;

2967
	switch (F2FS_OPTION(fio->sbi).active_logs) {
J
Jaegeuk Kim 已提交
2968
	case 2:
J
Jaegeuk Kim 已提交
2969 2970
		type = __get_segment_type_2(fio);
		break;
J
Jaegeuk Kim 已提交
2971
	case 4:
J
Jaegeuk Kim 已提交
2972 2973 2974 2975 2976 2977 2978
		type = __get_segment_type_4(fio);
		break;
	case 6:
		type = __get_segment_type_6(fio);
		break;
	default:
		f2fs_bug_on(fio->sbi, true);
J
Jaegeuk Kim 已提交
2979
	}
2980

J
Jaegeuk Kim 已提交
2981 2982 2983 2984 2985 2986 2987
	if (IS_HOT(type))
		fio->temp = HOT;
	else if (IS_WARM(type))
		fio->temp = WARM;
	else
		fio->temp = COLD;
	return type;
J
Jaegeuk Kim 已提交
2988 2989
}

C
Chao Yu 已提交
2990
void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2991
		block_t old_blkaddr, block_t *new_blkaddr,
2992 2993
		struct f2fs_summary *sum, int type,
		struct f2fs_io_info *fio, bool add_list)
J
Jaegeuk Kim 已提交
2994 2995
{
	struct sit_info *sit_i = SIT_I(sbi);
2996
	struct curseg_info *curseg = CURSEG_I(sbi, type);
J
Jaegeuk Kim 已提交
2997

C
Chao Yu 已提交
2998 2999
	down_read(&SM_I(sbi)->curseg_lock);

J
Jaegeuk Kim 已提交
3000
	mutex_lock(&curseg->curseg_mutex);
3001
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3002 3003 3004

	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

3005 3006
	f2fs_wait_discard_bio(sbi, *new_blkaddr);

J
Jaegeuk Kim 已提交
3007 3008 3009 3010 3011
	/*
	 * __add_sum_entry should be resided under the curseg_mutex
	 * because, this function updates a summary entry in the
	 * current summary block.
	 */
3012
	__add_sum_entry(sbi, type, sum);
J
Jaegeuk Kim 已提交
3013 3014

	__refresh_next_blkoff(sbi, curseg);
3015 3016

	stat_inc_block_count(sbi, curseg);
J
Jaegeuk Kim 已提交
3017

3018 3019 3020 3021 3022 3023 3024 3025
	/*
	 * SIT information should be updated before segment allocation,
	 * since SSR needs latest valid block information.
	 */
	update_sit_entry(sbi, *new_blkaddr, 1);
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
		update_sit_entry(sbi, old_blkaddr, -1);

3026 3027
	if (!__has_curseg_space(sbi, type))
		sit_i->s_ops->allocate_segment(sbi, type, false);
3028

J
Jaegeuk Kim 已提交
3029
	/*
3030 3031 3032
	 * segment dirty status should be updated after segment allocation,
	 * so we just need to update status only one time after previous
	 * segment being closed.
J
Jaegeuk Kim 已提交
3033
	 */
3034 3035
	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
	locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3036

3037
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3038

C
Chao Yu 已提交
3039
	if (page && IS_NODESEG(type)) {
J
Jaegeuk Kim 已提交
3040 3041
		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));

C
Chao Yu 已提交
3042 3043 3044
		f2fs_inode_chksum_set(sbi, page);
	}

3045 3046 3047 3048 3049
	if (add_list) {
		struct f2fs_bio_info *io;

		INIT_LIST_HEAD(&fio->list);
		fio->in_list = true;
3050
		fio->retry = false;
3051 3052 3053 3054 3055 3056
		io = sbi->write_io[fio->type] + fio->temp;
		spin_lock(&io->io_lock);
		list_add_tail(&fio->list, &io->io_list);
		spin_unlock(&io->io_lock);
	}

3057
	mutex_unlock(&curseg->curseg_mutex);
C
Chao Yu 已提交
3058 3059

	up_read(&SM_I(sbi)->curseg_lock);
3060 3061
}

C
Chao Yu 已提交
3062 3063 3064 3065 3066 3067 3068 3069 3070 3071 3072
static void update_device_state(struct f2fs_io_info *fio)
{
	struct f2fs_sb_info *sbi = fio->sbi;
	unsigned int devidx;

	if (!sbi->s_ndevs)
		return;

	devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);

	/* update device state for fsync */
C
Chao Yu 已提交
3073
	f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3074 3075 3076 3077 3078 3079 3080

	/* update device state for checkpoint */
	if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
		spin_lock(&sbi->dev_lock);
		f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
		spin_unlock(&sbi->dev_lock);
	}
C
Chao Yu 已提交
3081 3082
}

3083
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3084
{
3085
	int type = __get_segment_type(fio);
3086
	bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
3087

3088 3089
	if (keep_order)
		down_read(&fio->sbi->io_order_lock);
3090
reallocate:
C
Chao Yu 已提交
3091
	f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3092
			&fio->new_blkaddr, sum, type, fio, true);
3093 3094 3095
	if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
		invalidate_mapping_pages(META_MAPPING(fio->sbi),
					fio->old_blkaddr, fio->old_blkaddr);
3096

J
Jaegeuk Kim 已提交
3097
	/* writeout dirty page into bdev */
3098 3099
	f2fs_submit_page_write(fio);
	if (fio->retry) {
3100 3101 3102
		fio->old_blkaddr = fio->new_blkaddr;
		goto reallocate;
	}
3103 3104 3105

	update_device_state(fio);

3106 3107
	if (keep_order)
		up_read(&fio->sbi->io_order_lock);
J
Jaegeuk Kim 已提交
3108 3109
}

C
Chao Yu 已提交
3110
void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
C
Chao Yu 已提交
3111
					enum iostat_type io_type)
J
Jaegeuk Kim 已提交
3112
{
J
Jaegeuk Kim 已提交
3113
	struct f2fs_io_info fio = {
3114
		.sbi = sbi,
J
Jaegeuk Kim 已提交
3115
		.type = META,
3116
		.temp = HOT,
M
Mike Christie 已提交
3117
		.op = REQ_OP_WRITE,
3118
		.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3119 3120
		.old_blkaddr = page->index,
		.new_blkaddr = page->index,
3121
		.page = page,
3122
		.encrypted_page = NULL,
3123
		.in_list = false,
J
Jaegeuk Kim 已提交
3124 3125
	};

3126
	if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
M
Mike Christie 已提交
3127
		fio.op_flags &= ~REQ_META;
3128

J
Jaegeuk Kim 已提交
3129
	set_page_writeback(page);
J
Jaegeuk Kim 已提交
3130
	ClearPageError(page);
3131
	f2fs_submit_page_write(&fio);
C
Chao Yu 已提交
3132

C
Chao Yu 已提交
3133
	stat_inc_meta_count(sbi, page->index);
C
Chao Yu 已提交
3134
	f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3135 3136
}

C
Chao Yu 已提交
3137
void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3138 3139
{
	struct f2fs_summary sum;
3140

J
Jaegeuk Kim 已提交
3141
	set_summary(&sum, nid, 0, 0);
3142
	do_write_page(&sum, fio);
C
Chao Yu 已提交
3143 3144

	f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3145 3146
}

C
Chao Yu 已提交
3147 3148
void f2fs_outplace_write_data(struct dnode_of_data *dn,
					struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3149
{
3150
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
3151 3152
	struct f2fs_summary sum;

3153
	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3154
	set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3155
	do_write_page(&sum, fio);
3156
	f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
C
Chao Yu 已提交
3157 3158

	f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3159 3160
}

C
Chao Yu 已提交
3161
int f2fs_inplace_write_data(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3162
{
C
Chao Yu 已提交
3163
	int err;
3164
	struct f2fs_sb_info *sbi = fio->sbi;
C
Chao Yu 已提交
3165

3166
	fio->new_blkaddr = fio->old_blkaddr;
3167 3168
	/* i/o temperature is needed for passing down write hints */
	__get_segment_type(fio);
3169 3170 3171 3172

	f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
			GET_SEGNO(sbi, fio->new_blkaddr))->type));

3173
	stat_inc_inplace_blocks(fio->sbi);
C
Chao Yu 已提交
3174 3175

	err = f2fs_submit_page_bio(fio);
C
Chao Yu 已提交
3176 3177
	if (!err)
		update_device_state(fio);
C
Chao Yu 已提交
3178 3179 3180 3181

	f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);

	return err;
J
Jaegeuk Kim 已提交
3182 3183
}

C
Chao Yu 已提交
3184 3185 3186 3187 3188 3189 3190 3191 3192 3193 3194 3195
static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
						unsigned int segno)
{
	int i;

	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
		if (CURSEG_I(sbi, i)->segno == segno)
			break;
	}
	return i;
}

C
Chao Yu 已提交
3196
void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3197
				block_t old_blkaddr, block_t new_blkaddr,
3198
				bool recover_curseg, bool recover_newaddr)
J
Jaegeuk Kim 已提交
3199 3200 3201 3202 3203 3204
{
	struct sit_info *sit_i = SIT_I(sbi);
	struct curseg_info *curseg;
	unsigned int segno, old_cursegno;
	struct seg_entry *se;
	int type;
3205
	unsigned short old_blkoff;
J
Jaegeuk Kim 已提交
3206 3207 3208 3209 3210

	segno = GET_SEGNO(sbi, new_blkaddr);
	se = get_seg_entry(sbi, segno);
	type = se->type;

C
Chao Yu 已提交
3211 3212
	down_write(&SM_I(sbi)->curseg_lock);

3213 3214 3215 3216 3217 3218 3219 3220 3221
	if (!recover_curseg) {
		/* for recovery flow */
		if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
			if (old_blkaddr == NULL_ADDR)
				type = CURSEG_COLD_DATA;
			else
				type = CURSEG_WARM_DATA;
		}
	} else {
C
Chao Yu 已提交
3222 3223 3224 3225 3226
		if (IS_CURSEG(sbi, segno)) {
			/* se->type is volatile as SSR allocation */
			type = __f2fs_get_curseg(sbi, segno);
			f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
		} else {
J
Jaegeuk Kim 已提交
3227
			type = CURSEG_WARM_DATA;
C
Chao Yu 已提交
3228
		}
J
Jaegeuk Kim 已提交
3229
	}
3230

3231
	f2fs_bug_on(sbi, !IS_DATASEG(type));
J
Jaegeuk Kim 已提交
3232 3233 3234
	curseg = CURSEG_I(sbi, type);

	mutex_lock(&curseg->curseg_mutex);
3235
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3236 3237

	old_cursegno = curseg->segno;
3238
	old_blkoff = curseg->next_blkoff;
J
Jaegeuk Kim 已提交
3239 3240 3241 3242

	/* change the current segment */
	if (segno != curseg->segno) {
		curseg->next_segno = segno;
3243
		change_curseg(sbi, type);
J
Jaegeuk Kim 已提交
3244 3245
	}

J
Jaegeuk Kim 已提交
3246
	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3247
	__add_sum_entry(sbi, type, sum);
J
Jaegeuk Kim 已提交
3248

3249
	if (!recover_curseg || recover_newaddr)
3250
		update_sit_entry(sbi, new_blkaddr, 1);
3251 3252 3253
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
		invalidate_mapping_pages(META_MAPPING(sbi),
					old_blkaddr, old_blkaddr);
3254
		update_sit_entry(sbi, old_blkaddr, -1);
3255
	}
3256 3257 3258 3259

	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
	locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));

J
Jaegeuk Kim 已提交
3260 3261
	locate_dirty_segment(sbi, old_cursegno);

3262 3263 3264
	if (recover_curseg) {
		if (old_cursegno != curseg->segno) {
			curseg->next_segno = old_cursegno;
3265
			change_curseg(sbi, type);
3266 3267 3268 3269
		}
		curseg->next_blkoff = old_blkoff;
	}

3270
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3271
	mutex_unlock(&curseg->curseg_mutex);
C
Chao Yu 已提交
3272
	up_write(&SM_I(sbi)->curseg_lock);
J
Jaegeuk Kim 已提交
3273 3274
}

3275 3276
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
				block_t old_addr, block_t new_addr,
3277 3278
				unsigned char version, bool recover_curseg,
				bool recover_newaddr)
3279 3280 3281 3282 3283
{
	struct f2fs_summary sum;

	set_summary(&sum, dn->nid, dn->ofs_in_node, version);

C
Chao Yu 已提交
3284
	f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3285
					recover_curseg, recover_newaddr);
3286

3287
	f2fs_update_data_blkaddr(dn, new_addr);
3288 3289
}

3290
void f2fs_wait_on_page_writeback(struct page *page,
3291
				enum page_type type, bool ordered, bool locked)
3292 3293
{
	if (PageWriteback(page)) {
3294 3295
		struct f2fs_sb_info *sbi = F2FS_P_SB(page);

3296
		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3297
		if (ordered) {
3298
			wait_on_page_writeback(page);
3299 3300
			f2fs_bug_on(sbi, locked && PageWriteback(page));
		} else {
3301
			wait_for_stable_page(page);
3302
		}
3303 3304 3305
	}
}

3306
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3307
{
3308
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3309 3310
	struct page *cpage;

3311 3312 3313
	if (!f2fs_post_read_required(inode))
		return;

3314
	if (!is_valid_data_blkaddr(sbi, blkaddr))
3315 3316 3317 3318
		return;

	cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
	if (cpage) {
3319
		f2fs_wait_on_page_writeback(cpage, DATA, true, true);
3320 3321 3322 3323
		f2fs_put_page(cpage, 1);
	}
}

3324 3325 3326 3327 3328 3329 3330 3331 3332
void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
								block_t len)
{
	block_t i;

	for (i = 0; i < len; i++)
		f2fs_wait_on_block_writeback(inode, blkaddr + i);
}

3333
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3334 3335 3336 3337 3338 3339 3340 3341 3342 3343
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct curseg_info *seg_i;
	unsigned char *kaddr;
	struct page *page;
	block_t start;
	int i, j, offset;

	start = start_sum_block(sbi);

C
Chao Yu 已提交
3344
	page = f2fs_get_meta_page(sbi, start++);
3345 3346
	if (IS_ERR(page))
		return PTR_ERR(page);
J
Jaegeuk Kim 已提交
3347 3348 3349 3350
	kaddr = (unsigned char *)page_address(page);

	/* Step 1: restore nat cache */
	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3351
	memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3352 3353 3354

	/* Step 2: restore sit cache */
	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3355
	memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367 3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378
	offset = 2 * SUM_JOURNAL_SIZE;

	/* Step 3: restore summary entries */
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		unsigned short blk_off;
		unsigned int segno;

		seg_i = CURSEG_I(sbi, i);
		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
		seg_i->next_segno = segno;
		reset_curseg(sbi, i, 0);
		seg_i->alloc_type = ckpt->alloc_type[i];
		seg_i->next_blkoff = blk_off;

		if (seg_i->alloc_type == SSR)
			blk_off = sbi->blocks_per_seg;

		for (j = 0; j < blk_off; j++) {
			struct f2fs_summary *s;
			s = (struct f2fs_summary *)(kaddr + offset);
			seg_i->sum_blk->entries[j] = *s;
			offset += SUMMARY_SIZE;
3379
			if (offset + SUMMARY_SIZE <= PAGE_SIZE -
J
Jaegeuk Kim 已提交
3380 3381 3382 3383 3384 3385
						SUM_FOOTER_SIZE)
				continue;

			f2fs_put_page(page, 1);
			page = NULL;

C
Chao Yu 已提交
3386
			page = f2fs_get_meta_page(sbi, start++);
3387 3388
			if (IS_ERR(page))
				return PTR_ERR(page);
J
Jaegeuk Kim 已提交
3389 3390 3391 3392 3393
			kaddr = (unsigned char *)page_address(page);
			offset = 0;
		}
	}
	f2fs_put_page(page, 1);
3394
	return 0;
J
Jaegeuk Kim 已提交
3395 3396 3397 3398 3399 3400 3401 3402 3403 3404 3405
}

static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct f2fs_summary_block *sum;
	struct curseg_info *curseg;
	struct page *new;
	unsigned short blk_off;
	unsigned int segno = 0;
	block_t blk_addr = 0;
3406
	int err = 0;
J
Jaegeuk Kim 已提交
3407 3408 3409 3410 3411 3412

	/* get segment number and block addr */
	if (IS_DATASEG(type)) {
		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
							CURSEG_HOT_DATA]);
3413
		if (__exist_node_summaries(sbi))
J
Jaegeuk Kim 已提交
3414 3415 3416 3417 3418 3419 3420 3421
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
		else
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
	} else {
		segno = le32_to_cpu(ckpt->cur_node_segno[type -
							CURSEG_HOT_NODE]);
		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
							CURSEG_HOT_NODE]);
3422
		if (__exist_node_summaries(sbi))
J
Jaegeuk Kim 已提交
3423 3424 3425 3426 3427 3428
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
							type - CURSEG_HOT_NODE);
		else
			blk_addr = GET_SUM_BLOCK(sbi, segno);
	}

C
Chao Yu 已提交
3429
	new = f2fs_get_meta_page(sbi, blk_addr);
3430 3431
	if (IS_ERR(new))
		return PTR_ERR(new);
J
Jaegeuk Kim 已提交
3432 3433 3434
	sum = (struct f2fs_summary_block *)page_address(new);

	if (IS_NODESEG(type)) {
3435
		if (__exist_node_summaries(sbi)) {
J
Jaegeuk Kim 已提交
3436 3437 3438 3439 3440 3441 3442
			struct f2fs_summary *ns = &sum->entries[0];
			int i;
			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
				ns->version = 0;
				ns->ofs_in_node = 0;
			}
		} else {
3443 3444 3445
			err = f2fs_restore_node_summary(sbi, segno, sum);
			if (err)
				goto out;
J
Jaegeuk Kim 已提交
3446 3447 3448 3449 3450 3451
		}
	}

	/* set uncompleted segment to curseg */
	curseg = CURSEG_I(sbi, type);
	mutex_lock(&curseg->curseg_mutex);
3452 3453 3454 3455 3456 3457 3458 3459

	/* update journal info */
	down_write(&curseg->journal_rwsem);
	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
	up_write(&curseg->journal_rwsem);

	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
J
Jaegeuk Kim 已提交
3460 3461 3462 3463 3464
	curseg->next_segno = segno;
	reset_curseg(sbi, type, 0);
	curseg->alloc_type = ckpt->alloc_type[type];
	curseg->next_blkoff = blk_off;
	mutex_unlock(&curseg->curseg_mutex);
3465
out:
J
Jaegeuk Kim 已提交
3466
	f2fs_put_page(new, 1);
3467
	return err;
J
Jaegeuk Kim 已提交
3468 3469 3470 3471
}

static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
{
3472 3473
	struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
	struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
J
Jaegeuk Kim 已提交
3474
	int type = CURSEG_HOT_DATA;
3475
	int err;
J
Jaegeuk Kim 已提交
3476

3477
	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
C
Chao Yu 已提交
3478
		int npages = f2fs_npages_for_summary_flush(sbi, true);
3479 3480

		if (npages >= 2)
C
Chao Yu 已提交
3481
			f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3482
							META_CP, true);
3483

J
Jaegeuk Kim 已提交
3484
		/* restore for compacted data summary */
3485 3486 3487
		err = read_compacted_summaries(sbi);
		if (err)
			return err;
J
Jaegeuk Kim 已提交
3488 3489 3490
		type = CURSEG_HOT_NODE;
	}

3491
	if (__exist_node_summaries(sbi))
C
Chao Yu 已提交
3492
		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
3493
					NR_CURSEG_TYPE - type, META_CP, true);
3494

3495 3496 3497 3498 3499 3500
	for (; type <= CURSEG_COLD_NODE; type++) {
		err = read_normal_summaries(sbi, type);
		if (err)
			return err;
	}

3501 3502 3503 3504 3505
	/* sanity check for summary blocks */
	if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
			sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
		return -EINVAL;

J
Jaegeuk Kim 已提交
3506 3507 3508 3509 3510 3511 3512 3513 3514 3515 3516 3517
	return 0;
}

static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
{
	struct page *page;
	unsigned char *kaddr;
	struct f2fs_summary *summary;
	struct curseg_info *seg_i;
	int written_size = 0;
	int i, j;

C
Chao Yu 已提交
3518
	page = f2fs_grab_meta_page(sbi, blkaddr++);
J
Jaegeuk Kim 已提交
3519
	kaddr = (unsigned char *)page_address(page);
3520
	memset(kaddr, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
3521 3522 3523

	/* Step 1: write nat cache */
	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3524
	memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3525 3526 3527 3528
	written_size += SUM_JOURNAL_SIZE;

	/* Step 2: write sit cache */
	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3529
	memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542
	written_size += SUM_JOURNAL_SIZE;

	/* Step 3: write summary entries */
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		unsigned short blkoff;
		seg_i = CURSEG_I(sbi, i);
		if (sbi->ckpt->alloc_type[i] == SSR)
			blkoff = sbi->blocks_per_seg;
		else
			blkoff = curseg_blkoff(sbi, i);

		for (j = 0; j < blkoff; j++) {
			if (!page) {
C
Chao Yu 已提交
3543
				page = f2fs_grab_meta_page(sbi, blkaddr++);
J
Jaegeuk Kim 已提交
3544
				kaddr = (unsigned char *)page_address(page);
3545
				memset(kaddr, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
3546 3547 3548 3549 3550 3551
				written_size = 0;
			}
			summary = (struct f2fs_summary *)(kaddr + written_size);
			*summary = seg_i->sum_blk->entries[j];
			written_size += SUMMARY_SIZE;

3552
			if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
J
Jaegeuk Kim 已提交
3553 3554 3555
							SUM_FOOTER_SIZE)
				continue;

3556
			set_page_dirty(page);
J
Jaegeuk Kim 已提交
3557 3558 3559 3560
			f2fs_put_page(page, 1);
			page = NULL;
		}
	}
3561 3562
	if (page) {
		set_page_dirty(page);
J
Jaegeuk Kim 已提交
3563
		f2fs_put_page(page, 1);
3564
	}
J
Jaegeuk Kim 已提交
3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575
}

static void write_normal_summaries(struct f2fs_sb_info *sbi,
					block_t blkaddr, int type)
{
	int i, end;
	if (IS_DATASEG(type))
		end = type + NR_CURSEG_DATA_TYPE;
	else
		end = type + NR_CURSEG_NODE_TYPE;

3576 3577
	for (i = type; i < end; i++)
		write_current_sum_page(sbi, i, blkaddr + (i - type));
J
Jaegeuk Kim 已提交
3578 3579
}

C
Chao Yu 已提交
3580
void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
J
Jaegeuk Kim 已提交
3581
{
3582
	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
J
Jaegeuk Kim 已提交
3583 3584 3585 3586 3587
		write_compacted_summaries(sbi, start_blk);
	else
		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
}

C
Chao Yu 已提交
3588
void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
J
Jaegeuk Kim 已提交
3589
{
3590
	write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
J
Jaegeuk Kim 已提交
3591 3592
}

C
Chao Yu 已提交
3593
int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
J
Jaegeuk Kim 已提交
3594 3595 3596 3597 3598
					unsigned int val, int alloc)
{
	int i;

	if (type == NAT_JOURNAL) {
3599 3600
		for (i = 0; i < nats_in_cursum(journal); i++) {
			if (le32_to_cpu(nid_in_journal(journal, i)) == val)
J
Jaegeuk Kim 已提交
3601 3602
				return i;
		}
3603 3604
		if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
			return update_nats_in_cursum(journal, 1);
J
Jaegeuk Kim 已提交
3605
	} else if (type == SIT_JOURNAL) {
3606 3607
		for (i = 0; i < sits_in_cursum(journal); i++)
			if (le32_to_cpu(segno_in_journal(journal, i)) == val)
J
Jaegeuk Kim 已提交
3608
				return i;
3609 3610
		if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
			return update_sits_in_cursum(journal, 1);
J
Jaegeuk Kim 已提交
3611 3612 3613 3614 3615 3616 3617
	}
	return -1;
}

static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
					unsigned int segno)
{
3618
	return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
J
Jaegeuk Kim 已提交
3619 3620 3621 3622 3623 3624
}

static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
					unsigned int start)
{
	struct sit_info *sit_i = SIT_I(sbi);
3625
	struct page *page;
J
Jaegeuk Kim 已提交
3626 3627 3628 3629 3630
	pgoff_t src_off, dst_off;

	src_off = current_sit_addr(sbi, start);
	dst_off = next_sit_addr(sbi, src_off);

C
Chao Yu 已提交
3631
	page = f2fs_grab_meta_page(sbi, dst_off);
3632
	seg_info_to_sit_page(sbi, page, start);
J
Jaegeuk Kim 已提交
3633

3634
	set_page_dirty(page);
J
Jaegeuk Kim 已提交
3635 3636
	set_to_next_sit(sit_i, start);

3637
	return page;
J
Jaegeuk Kim 已提交
3638 3639
}

3640 3641 3642
static struct sit_entry_set *grab_sit_entry_set(void)
{
	struct sit_entry_set *ses =
3643
			f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686 3687 3688 3689 3690 3691 3692 3693 3694 3695 3696 3697

	ses->entry_cnt = 0;
	INIT_LIST_HEAD(&ses->set_list);
	return ses;
}

static void release_sit_entry_set(struct sit_entry_set *ses)
{
	list_del(&ses->set_list);
	kmem_cache_free(sit_entry_set_slab, ses);
}

static void adjust_sit_entry_set(struct sit_entry_set *ses,
						struct list_head *head)
{
	struct sit_entry_set *next = ses;

	if (list_is_last(&ses->set_list, head))
		return;

	list_for_each_entry_continue(next, head, set_list)
		if (ses->entry_cnt <= next->entry_cnt)
			break;

	list_move_tail(&ses->set_list, &next->set_list);
}

static void add_sit_entry(unsigned int segno, struct list_head *head)
{
	struct sit_entry_set *ses;
	unsigned int start_segno = START_SEGNO(segno);

	list_for_each_entry(ses, head, set_list) {
		if (ses->start_segno == start_segno) {
			ses->entry_cnt++;
			adjust_sit_entry_set(ses, head);
			return;
		}
	}

	ses = grab_sit_entry_set();

	ses->start_segno = start_segno;
	ses->entry_cnt++;
	list_add(&ses->set_list, head);
}

static void add_sits_in_set(struct f2fs_sb_info *sbi)
{
	struct f2fs_sm_info *sm_info = SM_I(sbi);
	struct list_head *set_list = &sm_info->sit_entry_set;
	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
	unsigned int segno;

3698
	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3699 3700 3701 3702
		add_sit_entry(segno, set_list);
}

static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3703 3704
{
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3705
	struct f2fs_journal *journal = curseg->journal;
J
Jaegeuk Kim 已提交
3706 3707
	int i;

3708
	down_write(&curseg->journal_rwsem);
3709
	for (i = 0; i < sits_in_cursum(journal); i++) {
3710 3711 3712
		unsigned int segno;
		bool dirtied;

3713
		segno = le32_to_cpu(segno_in_journal(journal, i));
3714 3715 3716 3717
		dirtied = __mark_sit_entry_dirty(sbi, segno);

		if (!dirtied)
			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
J
Jaegeuk Kim 已提交
3718
	}
3719
	update_sits_in_cursum(journal, -i);
3720
	up_write(&curseg->journal_rwsem);
J
Jaegeuk Kim 已提交
3721 3722
}

J
Jaegeuk Kim 已提交
3723
/*
J
Jaegeuk Kim 已提交
3724 3725 3726
 * CP calls this function, which flushes SIT entries including sit_journal,
 * and moves prefree segs to free segs.
 */
C
Chao Yu 已提交
3727
void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
3728 3729 3730 3731
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3732
	struct f2fs_journal *journal = curseg->journal;
3733 3734 3735
	struct sit_entry_set *ses, *tmp;
	struct list_head *head = &SM_I(sbi)->sit_entry_set;
	bool to_journal = true;
3736
	struct seg_entry *se;
J
Jaegeuk Kim 已提交
3737

3738
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3739

3740 3741 3742
	if (!sit_i->dirty_sentries)
		goto out;

J
Jaegeuk Kim 已提交
3743
	/*
3744 3745
	 * add and account sit entries of dirty bitmap in sit entry
	 * set temporarily
J
Jaegeuk Kim 已提交
3746
	 */
3747
	add_sits_in_set(sbi);
J
Jaegeuk Kim 已提交
3748

3749 3750 3751 3752 3753
	/*
	 * if there are no enough space in journal to store dirty sit
	 * entries, remove all entries from journal and add and account
	 * them in sit entry set.
	 */
3754
	if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
3755
		remove_sits_in_journal(sbi);
3756

3757 3758 3759 3760 3761 3762
	/*
	 * there are two steps to flush sit entries:
	 * #1, flush sit entries to journal in current cold data summary block.
	 * #2, flush sit entries to sit page.
	 */
	list_for_each_entry_safe(ses, tmp, head, set_list) {
J
Jaegeuk Kim 已提交
3763
		struct page *page = NULL;
3764 3765 3766
		struct f2fs_sit_block *raw_sit = NULL;
		unsigned int start_segno = ses->start_segno;
		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3767
						(unsigned long)MAIN_SEGS(sbi));
3768 3769 3770
		unsigned int segno = start_segno;

		if (to_journal &&
3771
			!__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3772 3773
			to_journal = false;

3774 3775 3776
		if (to_journal) {
			down_write(&curseg->journal_rwsem);
		} else {
3777 3778
			page = get_next_sit_page(sbi, start_segno);
			raw_sit = page_address(page);
J
Jaegeuk Kim 已提交
3779 3780
		}

3781 3782 3783
		/* flush dirty sit entries in region of current sit set */
		for_each_set_bit_from(segno, bitmap, end) {
			int offset, sit_offset;
3784 3785

			se = get_seg_entry(sbi, segno);
3786 3787 3788 3789 3790
#ifdef CONFIG_F2FS_CHECK_FS
			if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
						SIT_VBLOCK_MAP_SIZE))
				f2fs_bug_on(sbi, 1);
#endif
3791 3792

			/* add discard candidates */
3793
			if (!(cpc->reason & CP_DISCARD)) {
3794
				cpc->trim_start = segno;
3795
				add_discard_addrs(sbi, cpc, false);
3796
			}
3797 3798

			if (to_journal) {
C
Chao Yu 已提交
3799
				offset = f2fs_lookup_journal_in_cursum(journal,
3800 3801
							SIT_JOURNAL, segno, 1);
				f2fs_bug_on(sbi, offset < 0);
3802
				segno_in_journal(journal, offset) =
3803 3804
							cpu_to_le32(segno);
				seg_info_to_raw_sit(se,
3805
					&sit_in_journal(journal, offset));
3806 3807
				check_block_count(sbi, segno,
					&sit_in_journal(journal, offset));
3808 3809 3810 3811
			} else {
				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
				seg_info_to_raw_sit(se,
						&raw_sit->entries[sit_offset]);
3812 3813
				check_block_count(sbi, segno,
						&raw_sit->entries[sit_offset]);
3814
			}
J
Jaegeuk Kim 已提交
3815

3816 3817 3818
			__clear_bit(segno, bitmap);
			sit_i->dirty_sentries--;
			ses->entry_cnt--;
J
Jaegeuk Kim 已提交
3819 3820
		}

3821 3822 3823
		if (to_journal)
			up_write(&curseg->journal_rwsem);
		else
3824 3825 3826 3827
			f2fs_put_page(page, 1);

		f2fs_bug_on(sbi, ses->entry_cnt);
		release_sit_entry_set(ses);
J
Jaegeuk Kim 已提交
3828
	}
3829 3830 3831 3832

	f2fs_bug_on(sbi, !list_empty(head));
	f2fs_bug_on(sbi, sit_i->dirty_sentries);
out:
3833
	if (cpc->reason & CP_DISCARD) {
3834 3835
		__u64 trim_start = cpc->trim_start;

3836
		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3837
			add_discard_addrs(sbi, cpc, false);
3838 3839

		cpc->trim_start = trim_start;
3840
	}
3841
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3842 3843 3844 3845 3846 3847 3848 3849 3850

	set_prefree_as_free_segments(sbi);
}

static int build_sit_info(struct f2fs_sb_info *sbi)
{
	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
	struct sit_info *sit_i;
	unsigned int sit_segs, start;
3851
	char *src_bitmap;
J
Jaegeuk Kim 已提交
3852 3853 3854
	unsigned int bitmap_size;

	/* allocate memory for SIT information */
C
Chao Yu 已提交
3855
	sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
3856 3857 3858 3859 3860
	if (!sit_i)
		return -ENOMEM;

	SM_I(sbi)->sit_info = sit_i;

3861 3862 3863 3864
	sit_i->sentries =
		f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
					      MAIN_SEGS(sbi)),
			      GFP_KERNEL);
J
Jaegeuk Kim 已提交
3865 3866 3867
	if (!sit_i->sentries)
		return -ENOMEM;

3868
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
C
Chao Yu 已提交
3869 3870
	sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
3871 3872 3873
	if (!sit_i->dirty_sentries_bitmap)
		return -ENOMEM;

3874
	for (start = 0; start < MAIN_SEGS(sbi); start++) {
J
Jaegeuk Kim 已提交
3875
		sit_i->sentries[start].cur_valid_map
C
Chao Yu 已提交
3876
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3877
		sit_i->sentries[start].ckpt_valid_map
C
Chao Yu 已提交
3878
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3879
		if (!sit_i->sentries[start].cur_valid_map ||
3880
				!sit_i->sentries[start].ckpt_valid_map)
J
Jaegeuk Kim 已提交
3881
			return -ENOMEM;
3882

C
Chao Yu 已提交
3883 3884
#ifdef CONFIG_F2FS_CHECK_FS
		sit_i->sentries[start].cur_valid_map_mir
C
Chao Yu 已提交
3885
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
C
Chao Yu 已提交
3886 3887 3888 3889
		if (!sit_i->sentries[start].cur_valid_map_mir)
			return -ENOMEM;
#endif

3890 3891 3892 3893 3894
		sit_i->sentries[start].discard_map
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
							GFP_KERNEL);
		if (!sit_i->sentries[start].discard_map)
			return -ENOMEM;
J
Jaegeuk Kim 已提交
3895 3896
	}

C
Chao Yu 已提交
3897
	sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3898 3899 3900
	if (!sit_i->tmp_map)
		return -ENOMEM;

3901
	if (__is_large_section(sbi)) {
3902 3903 3904 3905
		sit_i->sec_entries =
			f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
						      MAIN_SECS(sbi)),
				      GFP_KERNEL);
J
Jaegeuk Kim 已提交
3906 3907 3908 3909 3910 3911 3912 3913 3914 3915 3916
		if (!sit_i->sec_entries)
			return -ENOMEM;
	}

	/* get information related with SIT */
	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;

	/* setup SIT bitmap from ckeckpoint pack */
	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);

3917 3918
	sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
	if (!sit_i->sit_bitmap)
J
Jaegeuk Kim 已提交
3919 3920
		return -ENOMEM;

3921 3922 3923 3924 3925 3926
#ifdef CONFIG_F2FS_CHECK_FS
	sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
	if (!sit_i->sit_bitmap_mir)
		return -ENOMEM;
#endif

J
Jaegeuk Kim 已提交
3927 3928 3929 3930 3931
	/* init SIT information */
	sit_i->s_ops = &default_salloc_ops;

	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
3932
	sit_i->written_valid_blocks = 0;
J
Jaegeuk Kim 已提交
3933 3934 3935 3936
	sit_i->bitmap_size = bitmap_size;
	sit_i->dirty_sentries = 0;
	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
3937
	sit_i->mounted_time = ktime_get_real_seconds();
3938
	init_rwsem(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3939 3940 3941 3942 3943 3944 3945 3946 3947
	return 0;
}

static int build_free_segmap(struct f2fs_sb_info *sbi)
{
	struct free_segmap_info *free_i;
	unsigned int bitmap_size, sec_bitmap_size;

	/* allocate memory for free segmap information */
C
Chao Yu 已提交
3948
	free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
3949 3950 3951 3952 3953
	if (!free_i)
		return -ENOMEM;

	SM_I(sbi)->free_info = free_i;

3954
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
C
Chao Yu 已提交
3955
	free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3956 3957 3958
	if (!free_i->free_segmap)
		return -ENOMEM;

3959
	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
C
Chao Yu 已提交
3960
	free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3961 3962 3963 3964 3965 3966 3967 3968
	if (!free_i->free_secmap)
		return -ENOMEM;

	/* set all segments as dirty temporarily */
	memset(free_i->free_segmap, 0xff, bitmap_size);
	memset(free_i->free_secmap, 0xff, sec_bitmap_size);

	/* init free segmap information */
3969
	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
J
Jaegeuk Kim 已提交
3970 3971
	free_i->free_segments = 0;
	free_i->free_sections = 0;
3972
	spin_lock_init(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
3973 3974 3975 3976 3977
	return 0;
}

static int build_curseg(struct f2fs_sb_info *sbi)
{
N
Namjae Jeon 已提交
3978
	struct curseg_info *array;
J
Jaegeuk Kim 已提交
3979 3980
	int i;

3981 3982
	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
			     GFP_KERNEL);
J
Jaegeuk Kim 已提交
3983 3984 3985 3986 3987 3988 3989
	if (!array)
		return -ENOMEM;

	SM_I(sbi)->curseg_array = array;

	for (i = 0; i < NR_CURSEG_TYPE; i++) {
		mutex_init(&array[i].curseg_mutex);
C
Chao Yu 已提交
3990
		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3991 3992
		if (!array[i].sum_blk)
			return -ENOMEM;
3993
		init_rwsem(&array[i].journal_rwsem);
C
Chao Yu 已提交
3994 3995
		array[i].journal = f2fs_kzalloc(sbi,
				sizeof(struct f2fs_journal), GFP_KERNEL);
3996 3997
		if (!array[i].journal)
			return -ENOMEM;
J
Jaegeuk Kim 已提交
3998 3999 4000 4001 4002 4003
		array[i].segno = NULL_SEGNO;
		array[i].next_blkoff = 0;
	}
	return restore_curseg_summaries(sbi);
}

4004
static int build_sit_entries(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4005 4006 4007
{
	struct sit_info *sit_i = SIT_I(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
4008
	struct f2fs_journal *journal = curseg->journal;
4009 4010
	struct seg_entry *se;
	struct f2fs_sit_entry sit;
4011 4012 4013
	int sit_blk_cnt = SIT_BLK_CNT(sbi);
	unsigned int i, start, end;
	unsigned int readed, start_blk = 0;
4014
	int err = 0;
4015
	block_t total_node_blocks = 0;
J
Jaegeuk Kim 已提交
4016

4017
	do {
C
Chao Yu 已提交
4018
		readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
4019
							META_SIT, true);
4020 4021 4022 4023

		start = start_blk * sit_i->sents_per_block;
		end = (start_blk + readed) * sit_i->sents_per_block;

4024
		for (; start < end && start < MAIN_SEGS(sbi); start++) {
4025 4026 4027
			struct f2fs_sit_block *sit_blk;
			struct page *page;

4028
			se = &sit_i->sentries[start];
4029
			page = get_current_sit_page(sbi, start);
4030 4031
			if (IS_ERR(page))
				return PTR_ERR(page);
4032 4033 4034
			sit_blk = (struct f2fs_sit_block *)page_address(page);
			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
			f2fs_put_page(page, 1);
4035

4036 4037 4038
			err = check_block_count(sbi, start, &sit);
			if (err)
				return err;
4039
			seg_info_from_raw_sit(se, &sit);
4040 4041
			if (IS_NODESEG(se->type))
				total_node_blocks += se->valid_blocks;
4042 4043

			/* build discard map only one time */
4044 4045 4046 4047 4048 4049 4050 4051 4052 4053
			if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
				memset(se->discard_map, 0xff,
					SIT_VBLOCK_MAP_SIZE);
			} else {
				memcpy(se->discard_map,
					se->cur_valid_map,
					SIT_VBLOCK_MAP_SIZE);
				sbi->discard_blks +=
					sbi->blocks_per_seg -
					se->valid_blocks;
4054
			}
4055

4056
			if (__is_large_section(sbi))
4057 4058
				get_sec_entry(sbi, start)->valid_blocks +=
							se->valid_blocks;
J
Jaegeuk Kim 已提交
4059
		}
4060 4061
		start_blk += readed;
	} while (start_blk < sit_blk_cnt);
4062 4063 4064 4065 4066 4067

	down_read(&curseg->journal_rwsem);
	for (i = 0; i < sits_in_cursum(journal); i++) {
		unsigned int old_valid_blocks;

		start = le32_to_cpu(segno_in_journal(journal, i));
J
Jaegeuk Kim 已提交
4068 4069 4070 4071 4072 4073 4074 4075 4076
		if (start >= MAIN_SEGS(sbi)) {
			f2fs_msg(sbi->sb, KERN_ERR,
					"Wrong journal entry on segno %u",
					start);
			set_sbi_flag(sbi, SBI_NEED_FSCK);
			err = -EINVAL;
			break;
		}

4077 4078 4079 4080
		se = &sit_i->sentries[start];
		sit = sit_in_journal(journal, i);

		old_valid_blocks = se->valid_blocks;
4081 4082
		if (IS_NODESEG(se->type))
			total_node_blocks -= old_valid_blocks;
4083

4084 4085 4086
		err = check_block_count(sbi, start, &sit);
		if (err)
			break;
4087
		seg_info_from_raw_sit(se, &sit);
4088 4089
		if (IS_NODESEG(se->type))
			total_node_blocks += se->valid_blocks;
4090

4091 4092 4093 4094 4095 4096 4097
		if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
			memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
		} else {
			memcpy(se->discard_map, se->cur_valid_map,
						SIT_VBLOCK_MAP_SIZE);
			sbi->discard_blks += old_valid_blocks;
			sbi->discard_blks -= se->valid_blocks;
4098 4099
		}

4100
		if (__is_large_section(sbi)) {
4101
			get_sec_entry(sbi, start)->valid_blocks +=
C
Chao Yu 已提交
4102 4103 4104 4105
							se->valid_blocks;
			get_sec_entry(sbi, start)->valid_blocks -=
							old_valid_blocks;
		}
4106 4107
	}
	up_read(&curseg->journal_rwsem);
4108 4109 4110 4111 4112 4113 4114 4115 4116

	if (!err && total_node_blocks != valid_node_count(sbi)) {
		f2fs_msg(sbi->sb, KERN_ERR,
			"SIT is corrupted node# %u vs %u",
			total_node_blocks, valid_node_count(sbi));
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		err = -EINVAL;
	}

4117
	return err;
J
Jaegeuk Kim 已提交
4118 4119 4120 4121 4122 4123 4124
}

static void init_free_segmap(struct f2fs_sb_info *sbi)
{
	unsigned int start;
	int type;

4125
	for (start = 0; start < MAIN_SEGS(sbi); start++) {
J
Jaegeuk Kim 已提交
4126 4127 4128
		struct seg_entry *sentry = get_seg_entry(sbi, start);
		if (!sentry->valid_blocks)
			__set_free(sbi, start);
4129 4130 4131
		else
			SIT_I(sbi)->written_valid_blocks +=
						sentry->valid_blocks;
J
Jaegeuk Kim 已提交
4132 4133 4134 4135 4136 4137 4138 4139 4140 4141 4142 4143 4144
	}

	/* set use the current segments */
	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
		__set_test_and_inuse(sbi, curseg_t->segno);
	}
}

static void init_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	struct free_segmap_info *free_i = FREE_I(sbi);
4145
	unsigned int segno = 0, offset = 0;
J
Jaegeuk Kim 已提交
4146 4147
	unsigned short valid_blocks;

4148
	while (1) {
J
Jaegeuk Kim 已提交
4149
		/* find dirty segment based on free segmap */
4150 4151
		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
		if (segno >= MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
4152 4153
			break;
		offset = segno + 1;
4154
		valid_blocks = get_valid_blocks(sbi, segno, false);
4155
		if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
J
Jaegeuk Kim 已提交
4156
			continue;
4157 4158 4159 4160
		if (valid_blocks > sbi->blocks_per_seg) {
			f2fs_bug_on(sbi, 1);
			continue;
		}
J
Jaegeuk Kim 已提交
4161 4162 4163 4164 4165 4166
		mutex_lock(&dirty_i->seglist_lock);
		__locate_dirty_segment(sbi, segno, DIRTY);
		mutex_unlock(&dirty_i->seglist_lock);
	}
}

4167
static int init_victim_secmap(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4168 4169
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4170
	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
4171

C
Chao Yu 已提交
4172
	dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4173
	if (!dirty_i->victim_secmap)
J
Jaegeuk Kim 已提交
4174 4175 4176 4177 4178 4179 4180 4181 4182 4183
		return -ENOMEM;
	return 0;
}

static int build_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i;
	unsigned int bitmap_size, i;

	/* allocate memory for dirty segments list information */
C
Chao Yu 已提交
4184 4185
	dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
4186 4187 4188 4189 4190 4191
	if (!dirty_i)
		return -ENOMEM;

	SM_I(sbi)->dirty_info = dirty_i;
	mutex_init(&dirty_i->seglist_lock);

4192
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
J
Jaegeuk Kim 已提交
4193 4194

	for (i = 0; i < NR_DIRTY_TYPE; i++) {
C
Chao Yu 已提交
4195 4196
		dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
4197 4198 4199 4200 4201
		if (!dirty_i->dirty_segmap[i])
			return -ENOMEM;
	}

	init_dirty_segmap(sbi);
4202
	return init_victim_secmap(sbi);
J
Jaegeuk Kim 已提交
4203 4204
}

J
Jaegeuk Kim 已提交
4205
/*
J
Jaegeuk Kim 已提交
4206 4207 4208 4209 4210 4211 4212
 * Update min, max modified time for cost-benefit GC algorithm
 */
static void init_min_max_mtime(struct f2fs_sb_info *sbi)
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int segno;

4213
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
4214

4215
	sit_i->min_mtime = ULLONG_MAX;
J
Jaegeuk Kim 已提交
4216

4217
	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
J
Jaegeuk Kim 已提交
4218 4219 4220 4221 4222 4223 4224 4225 4226 4227 4228
		unsigned int i;
		unsigned long long mtime = 0;

		for (i = 0; i < sbi->segs_per_sec; i++)
			mtime += get_seg_entry(sbi, segno + i)->mtime;

		mtime = div_u64(mtime, sbi->segs_per_sec);

		if (sit_i->min_mtime > mtime)
			sit_i->min_mtime = mtime;
	}
C
Chao Yu 已提交
4229
	sit_i->max_mtime = get_mtime(sbi, false);
4230
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
4231 4232
}

C
Chao Yu 已提交
4233
int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4234 4235 4236
{
	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
N
Namjae Jeon 已提交
4237
	struct f2fs_sm_info *sm_info;
J
Jaegeuk Kim 已提交
4238 4239
	int err;

C
Chao Yu 已提交
4240
	sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
4241 4242 4243 4244 4245 4246 4247 4248 4249 4250 4251 4252
	if (!sm_info)
		return -ENOMEM;

	/* init sm info */
	sbi->sm_info = sm_info;
	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
4253 4254
	sm_info->rec_prefree_segments = sm_info->main_segments *
					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
J
Jaegeuk Kim 已提交
4255 4256 4257
	if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
		sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;

4258 4259
	if (!test_opt(sbi, LFS))
		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
4260
	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
4261
	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
4262
	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
4263
	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
C
Chao Yu 已提交
4264
	sm_info->min_ssr_sections = reserved_sections(sbi);
J
Jaegeuk Kim 已提交
4265

4266 4267
	INIT_LIST_HEAD(&sm_info->sit_entry_set);

C
Chao Yu 已提交
4268 4269
	init_rwsem(&sm_info->curseg_lock);

4270
	if (!f2fs_readonly(sbi->sb)) {
C
Chao Yu 已提交
4271
		err = f2fs_create_flush_cmd_control(sbi);
4272
		if (err)
4273
			return err;
4274 4275
	}

4276 4277 4278 4279
	err = create_discard_cmd_control(sbi);
	if (err)
		return err;

J
Jaegeuk Kim 已提交
4280 4281 4282 4283 4284 4285 4286 4287 4288 4289 4290
	err = build_sit_info(sbi);
	if (err)
		return err;
	err = build_free_segmap(sbi);
	if (err)
		return err;
	err = build_curseg(sbi);
	if (err)
		return err;

	/* reinit free segmap based on SIT */
4291 4292 4293
	err = build_sit_entries(sbi);
	if (err)
		return err;
J
Jaegeuk Kim 已提交
4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 4308 4309

	init_free_segmap(sbi);
	err = build_dirty_segmap(sbi);
	if (err)
		return err;

	init_min_max_mtime(sbi);
	return 0;
}

static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	mutex_lock(&dirty_i->seglist_lock);
4310
	kvfree(dirty_i->dirty_segmap[dirty_type]);
J
Jaegeuk Kim 已提交
4311 4312 4313 4314
	dirty_i->nr_dirty[dirty_type] = 0;
	mutex_unlock(&dirty_i->seglist_lock);
}

4315
static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4316 4317
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4318
	kvfree(dirty_i->victim_secmap);
J
Jaegeuk Kim 已提交
4319 4320 4321 4322 4323 4324 4325 4326 4327 4328 4329 4330 4331 4332
}

static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	int i;

	if (!dirty_i)
		return;

	/* discard pre-free/dirty segments list */
	for (i = 0; i < NR_DIRTY_TYPE; i++)
		discard_dirty_segmap(sbi, i);

4333
	destroy_victim_secmap(sbi);
J
Jaegeuk Kim 已提交
4334
	SM_I(sbi)->dirty_info = NULL;
4335
	kvfree(dirty_i);
J
Jaegeuk Kim 已提交
4336 4337 4338 4339 4340 4341 4342 4343 4344 4345
}

static void destroy_curseg(struct f2fs_sb_info *sbi)
{
	struct curseg_info *array = SM_I(sbi)->curseg_array;
	int i;

	if (!array)
		return;
	SM_I(sbi)->curseg_array = NULL;
4346
	for (i = 0; i < NR_CURSEG_TYPE; i++) {
4347 4348
		kvfree(array[i].sum_blk);
		kvfree(array[i].journal);
4349
	}
4350
	kvfree(array);
J
Jaegeuk Kim 已提交
4351 4352 4353 4354 4355 4356 4357 4358
}

static void destroy_free_segmap(struct f2fs_sb_info *sbi)
{
	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
	if (!free_i)
		return;
	SM_I(sbi)->free_info = NULL;
4359 4360
	kvfree(free_i->free_segmap);
	kvfree(free_i->free_secmap);
4361
	kvfree(free_i);
J
Jaegeuk Kim 已提交
4362 4363 4364 4365 4366 4367 4368 4369 4370 4371 4372
}

static void destroy_sit_info(struct f2fs_sb_info *sbi)
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int start;

	if (!sit_i)
		return;

	if (sit_i->sentries) {
4373
		for (start = 0; start < MAIN_SEGS(sbi); start++) {
4374
			kvfree(sit_i->sentries[start].cur_valid_map);
C
Chao Yu 已提交
4375
#ifdef CONFIG_F2FS_CHECK_FS
4376
			kvfree(sit_i->sentries[start].cur_valid_map_mir);
C
Chao Yu 已提交
4377
#endif
4378 4379
			kvfree(sit_i->sentries[start].ckpt_valid_map);
			kvfree(sit_i->sentries[start].discard_map);
J
Jaegeuk Kim 已提交
4380 4381
		}
	}
4382
	kvfree(sit_i->tmp_map);
J
Jaegeuk Kim 已提交
4383

4384 4385 4386
	kvfree(sit_i->sentries);
	kvfree(sit_i->sec_entries);
	kvfree(sit_i->dirty_sentries_bitmap);
J
Jaegeuk Kim 已提交
4387 4388

	SM_I(sbi)->sit_info = NULL;
4389
	kvfree(sit_i->sit_bitmap);
4390
#ifdef CONFIG_F2FS_CHECK_FS
4391
	kvfree(sit_i->sit_bitmap_mir);
4392
#endif
4393
	kvfree(sit_i);
J
Jaegeuk Kim 已提交
4394 4395
}

C
Chao Yu 已提交
4396
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4397 4398
{
	struct f2fs_sm_info *sm_info = SM_I(sbi);
4399

4400 4401
	if (!sm_info)
		return;
C
Chao Yu 已提交
4402
	f2fs_destroy_flush_cmd_control(sbi, true);
4403
	destroy_discard_cmd_control(sbi);
J
Jaegeuk Kim 已提交
4404 4405 4406 4407 4408
	destroy_dirty_segmap(sbi);
	destroy_curseg(sbi);
	destroy_free_segmap(sbi);
	destroy_sit_info(sbi);
	sbi->sm_info = NULL;
4409
	kvfree(sm_info);
J
Jaegeuk Kim 已提交
4410
}
4411

C
Chao Yu 已提交
4412
int __init f2fs_create_segment_manager_caches(void)
4413 4414
{
	discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
4415
			sizeof(struct discard_entry));
4416
	if (!discard_entry_slab)
4417 4418
		goto fail;

4419 4420 4421
	discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
			sizeof(struct discard_cmd));
	if (!discard_cmd_slab)
C
Chao Yu 已提交
4422
		goto destroy_discard_entry;
C
Chao Yu 已提交
4423

4424
	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
4425
			sizeof(struct sit_entry_set));
4426
	if (!sit_entry_set_slab)
4427
		goto destroy_discard_cmd;
J
Jaegeuk Kim 已提交
4428 4429 4430 4431 4432

	inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
			sizeof(struct inmem_pages));
	if (!inmem_entry_slab)
		goto destroy_sit_entry_set;
4433
	return 0;
4434

J
Jaegeuk Kim 已提交
4435 4436
destroy_sit_entry_set:
	kmem_cache_destroy(sit_entry_set_slab);
4437 4438
destroy_discard_cmd:
	kmem_cache_destroy(discard_cmd_slab);
C
Chao Yu 已提交
4439
destroy_discard_entry:
4440 4441 4442
	kmem_cache_destroy(discard_entry_slab);
fail:
	return -ENOMEM;
4443 4444
}

C
Chao Yu 已提交
4445
void f2fs_destroy_segment_manager_caches(void)
4446
{
4447
	kmem_cache_destroy(sit_entry_set_slab);
4448
	kmem_cache_destroy(discard_cmd_slab);
4449
	kmem_cache_destroy(discard_entry_slab);
J
Jaegeuk Kim 已提交
4450
	kmem_cache_destroy(inmem_entry_slab);
4451
}