segment.c 112.4 KB
Newer Older
C
Chao Yu 已提交
1
// SPDX-License-Identifier: GPL-2.0
J
Jaegeuk Kim 已提交
2
/*
J
Jaegeuk Kim 已提交
3 4 5 6 7 8 9 10 11
 * fs/f2fs/segment.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
12
#include <linux/prefetch.h>
13
#include <linux/kthread.h>
14
#include <linux/swap.h>
15
#include <linux/timer.h>
16
#include <linux/freezer.h>
17
#include <linux/sched/signal.h>
J
Jaegeuk Kim 已提交
18 19 20 21

#include "f2fs.h"
#include "segment.h"
#include "node.h"
22
#include "gc.h"
J
Jaegeuk Kim 已提交
23
#include "trace.h"
24
#include <trace/events/f2fs.h>
J
Jaegeuk Kim 已提交
25

26 27
#define __reverse_ffz(x) __reverse_ffs(~(x))

28
static struct kmem_cache *discard_entry_slab;
29
static struct kmem_cache *discard_cmd_slab;
30
static struct kmem_cache *sit_entry_set_slab;
J
Jaegeuk Kim 已提交
31
static struct kmem_cache *inmem_entry_slab;
32

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
static unsigned long __reverse_ulong(unsigned char *str)
{
	unsigned long tmp = 0;
	int shift = 24, idx = 0;

#if BITS_PER_LONG == 64
	shift = 56;
#endif
	while (shift >= 0) {
		tmp |= (unsigned long)str[idx++] << shift;
		shift -= BITS_PER_BYTE;
	}
	return tmp;
}

48 49 50 51 52 53 54 55 56
/*
 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
 * MSB and LSB are reversed in a byte by f2fs_set_bit.
 */
static inline unsigned long __reverse_ffs(unsigned long word)
{
	int num = 0;

#if BITS_PER_LONG == 64
57
	if ((word & 0xffffffff00000000UL) == 0)
58
		num += 32;
59
	else
60 61
		word >>= 32;
#endif
62
	if ((word & 0xffff0000) == 0)
63
		num += 16;
64
	else
65
		word >>= 16;
66 67

	if ((word & 0xff00) == 0)
68
		num += 8;
69
	else
70
		word >>= 8;
71

72 73 74 75
	if ((word & 0xf0) == 0)
		num += 4;
	else
		word >>= 4;
76

77 78 79 80
	if ((word & 0xc) == 0)
		num += 2;
	else
		word >>= 2;
81

82 83 84 85 86 87
	if ((word & 0x2) == 0)
		num += 1;
	return num;
}

/*
A
arter97 已提交
88
 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
89
 * f2fs_set_bit makes MSB and LSB reversed in a byte.
F
Fan Li 已提交
90
 * @size must be integral times of unsigned long.
91
 * Example:
92 93 94
 *                             MSB <--> LSB
 *   f2fs_set_bit(0, bitmap) => 1000 0000
 *   f2fs_set_bit(7, bitmap) => 0000 0001
95 96 97 98 99
 */
static unsigned long __find_rev_next_bit(const unsigned long *addr,
			unsigned long size, unsigned long offset)
{
	const unsigned long *p = addr + BIT_WORD(offset);
F
Fan Li 已提交
100
	unsigned long result = size;
101 102 103 104 105
	unsigned long tmp;

	if (offset >= size)
		return size;

F
Fan Li 已提交
106
	size -= (offset & ~(BITS_PER_LONG - 1));
107
	offset %= BITS_PER_LONG;
108

F
Fan Li 已提交
109 110 111
	while (1) {
		if (*p == 0)
			goto pass;
112

113
		tmp = __reverse_ulong((unsigned char *)p);
F
Fan Li 已提交
114 115 116 117

		tmp &= ~0UL >> offset;
		if (size < BITS_PER_LONG)
			tmp &= (~0UL << (BITS_PER_LONG - size));
118
		if (tmp)
F
Fan Li 已提交
119 120 121 122
			goto found;
pass:
		if (size <= BITS_PER_LONG)
			break;
123
		size -= BITS_PER_LONG;
F
Fan Li 已提交
124
		offset = 0;
125
		p++;
126
	}
F
Fan Li 已提交
127 128 129
	return result;
found:
	return result - size + __reverse_ffs(tmp);
130 131 132 133 134 135
}

static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
			unsigned long size, unsigned long offset)
{
	const unsigned long *p = addr + BIT_WORD(offset);
136
	unsigned long result = size;
137 138 139 140 141
	unsigned long tmp;

	if (offset >= size)
		return size;

142
	size -= (offset & ~(BITS_PER_LONG - 1));
143
	offset %= BITS_PER_LONG;
144 145 146 147 148

	while (1) {
		if (*p == ~0UL)
			goto pass;

149
		tmp = __reverse_ulong((unsigned char *)p);
150 151 152 153 154

		if (offset)
			tmp |= ~0UL << (BITS_PER_LONG - offset);
		if (size < BITS_PER_LONG)
			tmp |= ~0UL >> size;
155
		if (tmp != ~0UL)
156 157 158 159
			goto found;
pass:
		if (size <= BITS_PER_LONG)
			break;
160
		size -= BITS_PER_LONG;
161
		offset = 0;
162
		p++;
163
	}
164 165 166
	return result;
found:
	return result - size + __reverse_ffz(tmp);
167 168
}

C
Chao Yu 已提交
169
bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
170 171 172 173 174 175 176
{
	int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
	int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
	int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);

	if (test_opt(sbi, LFS))
		return false;
177
	if (sbi->gc_mode == GC_URGENT)
178
		return true;
D
Daniel Rosenberg 已提交
179 180
	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
		return true;
181 182

	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
C
Chao Yu 已提交
183
			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
184 185
}

C
Chao Yu 已提交
186
void f2fs_register_inmem_page(struct inode *inode, struct page *page)
J
Jaegeuk Kim 已提交
187
{
J
Jaegeuk Kim 已提交
188
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
J
Jaegeuk Kim 已提交
189 190
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct inmem_pages *new;
191

J
Jaegeuk Kim 已提交
192
	f2fs_trace_pid(page);
193

C
Chao Yu 已提交
194 195 196
	set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
	SetPagePrivate(page);

J
Jaegeuk Kim 已提交
197 198 199 200 201
	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);

	/* add atomic page indices to the list */
	new->page = page;
	INIT_LIST_HEAD(&new->list);
C
Chao Yu 已提交
202

J
Jaegeuk Kim 已提交
203 204 205 206
	/* increase reference count with clean state */
	mutex_lock(&fi->inmem_lock);
	get_page(page);
	list_add_tail(&new->list, &fi->inmem_pages);
J
Jaegeuk Kim 已提交
207 208 209 210
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (list_empty(&fi->inmem_ilist))
		list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
211
	inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
J
Jaegeuk Kim 已提交
212
	mutex_unlock(&fi->inmem_lock);
213 214

	trace_f2fs_register_inmem_page(page, INMEM);
J
Jaegeuk Kim 已提交
215 216
}

217 218
static int __revoke_inmem_pages(struct inode *inode,
				struct list_head *head, bool drop, bool recover)
219
{
220
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
221
	struct inmem_pages *cur, *tmp;
222
	int err = 0;
223 224

	list_for_each_entry_safe(cur, tmp, head, list) {
225 226 227 228 229 230
		struct page *page = cur->page;

		if (drop)
			trace_f2fs_commit_inmem_page(page, INMEM_DROP);

		lock_page(page);
231

232 233
		f2fs_wait_on_page_writeback(page, DATA, true);

234 235 236 237 238
		if (recover) {
			struct dnode_of_data dn;
			struct node_info ni;

			trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
239
retry:
240
			set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
241 242
			err = f2fs_get_dnode_of_data(&dn, page->index,
								LOOKUP_NODE);
243 244 245 246 247 248
			if (err) {
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					cond_resched();
					goto retry;
				}
249 250 251
				err = -EAGAIN;
				goto next;
			}
252 253 254 255 256 257 258

			err = f2fs_get_node_info(sbi, dn.nid, &ni);
			if (err) {
				f2fs_put_dnode(&dn);
				return err;
			}

259
			if (cur->old_addr == NEW_ADDR) {
C
Chao Yu 已提交
260
				f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
261 262 263
				f2fs_update_data_blkaddr(&dn, NEW_ADDR);
			} else
				f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
264 265 266 267
					cur->old_addr, ni.version, true, true);
			f2fs_put_dnode(&dn);
		}
next:
268
		/* we don't need to invalidate this in the sccessful status */
C
Chao Yu 已提交
269
		if (drop || recover) {
270
			ClearPageUptodate(page);
C
Chao Yu 已提交
271 272
			clear_cold_data(page);
		}
273
		set_page_private(page, 0);
C
Chao Yu 已提交
274
		ClearPagePrivate(page);
275
		f2fs_put_page(page, 1);
276 277 278 279 280

		list_del(&cur->list);
		kmem_cache_free(inmem_entry_slab, cur);
		dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
	}
281
	return err;
282 283
}

C
Chao Yu 已提交
284
void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
J
Jaegeuk Kim 已提交
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
{
	struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
	struct inode *inode;
	struct f2fs_inode_info *fi;
next:
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (list_empty(head)) {
		spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
		return;
	}
	fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
	inode = igrab(&fi->vfs_inode);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);

	if (inode) {
300 301 302 303 304 305 306
		if (gc_failure) {
			if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
				goto drop;
			goto skip;
		}
drop:
		set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
C
Chao Yu 已提交
307
		f2fs_drop_inmem_pages(inode);
J
Jaegeuk Kim 已提交
308 309
		iput(inode);
	}
310
skip:
J
Jaegeuk Kim 已提交
311 312 313 314 315
	congestion_wait(BLK_RW_ASYNC, HZ/50);
	cond_resched();
	goto next;
}

C
Chao Yu 已提交
316
void f2fs_drop_inmem_pages(struct inode *inode)
317
{
J
Jaegeuk Kim 已提交
318
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
319 320 321
	struct f2fs_inode_info *fi = F2FS_I(inode);

	mutex_lock(&fi->inmem_lock);
322
	__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
J
Jaegeuk Kim 已提交
323 324 325 326
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (!list_empty(&fi->inmem_ilist))
		list_del_init(&fi->inmem_ilist);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
327
	mutex_unlock(&fi->inmem_lock);
C
Chao Yu 已提交
328 329

	clear_inode_flag(inode, FI_ATOMIC_FILE);
330
	fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
C
Chao Yu 已提交
331
	stat_dec_atomic_write(inode);
332 333
}

C
Chao Yu 已提交
334
void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
335 336 337 338 339 340 341 342 343 344 345 346 347 348
{
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct list_head *head = &fi->inmem_pages;
	struct inmem_pages *cur = NULL;

	f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));

	mutex_lock(&fi->inmem_lock);
	list_for_each_entry(cur, head, list) {
		if (cur->page == page)
			break;
	}

349
	f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
350 351 352 353 354 355 356 357 358 359 360 361 362 363
	list_del(&cur->list);
	mutex_unlock(&fi->inmem_lock);

	dec_page_count(sbi, F2FS_INMEM_PAGES);
	kmem_cache_free(inmem_entry_slab, cur);

	ClearPageUptodate(page);
	set_page_private(page, 0);
	ClearPagePrivate(page);
	f2fs_put_page(page, 0);

	trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
}

C
Chao Yu 已提交
364
static int __f2fs_commit_inmem_pages(struct inode *inode)
J
Jaegeuk Kim 已提交
365 366 367 368 369
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct inmem_pages *cur, *tmp;
	struct f2fs_io_info fio = {
370
		.sbi = sbi,
C
Chao Yu 已提交
371
		.ino = inode->i_ino,
J
Jaegeuk Kim 已提交
372
		.type = DATA,
M
Mike Christie 已提交
373
		.op = REQ_OP_WRITE,
374
		.op_flags = REQ_SYNC | REQ_PRIO,
C
Chao Yu 已提交
375
		.io_type = FS_DATA_IO,
J
Jaegeuk Kim 已提交
376
	};
C
Chao Yu 已提交
377
	struct list_head revoke_list;
378
	bool submit_bio = false;
379
	int err = 0;
J
Jaegeuk Kim 已提交
380

C
Chao Yu 已提交
381 382
	INIT_LIST_HEAD(&revoke_list);

J
Jaegeuk Kim 已提交
383
	list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
384 385 386 387 388 389 390
		struct page *page = cur->page;

		lock_page(page);
		if (page->mapping == inode->i_mapping) {
			trace_f2fs_commit_inmem_page(page, INMEM);

			f2fs_wait_on_page_writeback(page, DATA, true);
391 392

			set_page_dirty(page);
393
			if (clear_page_dirty_for_io(page)) {
394
				inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
395
				f2fs_remove_dirty_inode(inode);
396
			}
397
retry:
398
			fio.page = page;
399
			fio.old_blkaddr = NULL_ADDR;
400
			fio.encrypted_page = NULL;
401
			fio.need_lock = LOCK_DONE;
C
Chao Yu 已提交
402
			err = f2fs_do_write_data_page(&fio);
403
			if (err) {
404 405 406 407 408
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					cond_resched();
					goto retry;
				}
409
				unlock_page(page);
410
				break;
411
			}
412 413
			/* record old blkaddr for revoking */
			cur->old_addr = fio.old_blkaddr;
414
			submit_bio = true;
415 416
		}
		unlock_page(page);
C
Chao Yu 已提交
417
		list_move_tail(&cur->list, &revoke_list);
J
Jaegeuk Kim 已提交
418
	}
419

420 421
	if (submit_bio)
		f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
422

C
Chao Yu 已提交
423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438
	if (err) {
		/*
		 * try to revoke all committed pages, but still we could fail
		 * due to no memory or other reason, if that happened, EAGAIN
		 * will be returned, which means in such case, transaction is
		 * already not integrity, caller should use journal to do the
		 * recovery or rewrite & commit last transaction. For other
		 * error number, revoking was done by filesystem itself.
		 */
		err = __revoke_inmem_pages(inode, &revoke_list, false, true);

		/* drop all uncommitted pages */
		__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
	} else {
		__revoke_inmem_pages(inode, &revoke_list, false, false);
	}
439

440 441 442
	return err;
}

C
Chao Yu 已提交
443
int f2fs_commit_inmem_pages(struct inode *inode)
444 445 446
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct f2fs_inode_info *fi = F2FS_I(inode);
447
	int err;
448 449 450

	f2fs_balance_fs(sbi, true);

451 452 453
	down_write(&fi->i_gc_rwsem[WRITE]);

	f2fs_lock_op(sbi);
C
Chao Yu 已提交
454 455
	set_inode_flag(inode, FI_ATOMIC_COMMIT);

456
	mutex_lock(&fi->inmem_lock);
C
Chao Yu 已提交
457
	err = __f2fs_commit_inmem_pages(inode);
458

J
Jaegeuk Kim 已提交
459 460 461 462
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (!list_empty(&fi->inmem_ilist))
		list_del_init(&fi->inmem_ilist);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
J
Jaegeuk Kim 已提交
463 464
	mutex_unlock(&fi->inmem_lock);

C
Chao Yu 已提交
465 466
	clear_inode_flag(inode, FI_ATOMIC_COMMIT);

467
	f2fs_unlock_op(sbi);
468 469
	up_write(&fi->i_gc_rwsem[WRITE]);

470
	return err;
J
Jaegeuk Kim 已提交
471 472
}

J
Jaegeuk Kim 已提交
473
/*
J
Jaegeuk Kim 已提交
474 475 476
 * This function balances dirty node and dentry pages.
 * In addition, it controls garbage collection.
 */
J
Jaegeuk Kim 已提交
477
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
J
Jaegeuk Kim 已提交
478
{
479 480
	if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
		f2fs_show_injection_info(FAULT_CHECKPOINT);
481
		f2fs_stop_checkpoint(sbi, false);
482
	}
483

484
	/* balance_fs_bg is able to be pending */
J
Jaegeuk Kim 已提交
485
	if (need && excess_cached_nats(sbi))
486 487
		f2fs_balance_fs_bg(sbi);

D
Daniel Rosenberg 已提交
488 489 490
	if (f2fs_is_checkpoint_ready(sbi))
		return;

J
Jaegeuk Kim 已提交
491
	/*
492 493
	 * We should do GC or end up with checkpoint, if there are so many dirty
	 * dir/node pages without enough free segments.
J
Jaegeuk Kim 已提交
494
	 */
495
	if (has_not_enough_free_secs(sbi, 0, 0)) {
J
Jaegeuk Kim 已提交
496
		mutex_lock(&sbi->gc_mutex);
497
		f2fs_gc(sbi, false, false, NULL_SEGNO);
J
Jaegeuk Kim 已提交
498 499 500
	}
}

501 502
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
{
503 504 505
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		return;

C
Chao Yu 已提交
506
	/* try to shrink extent cache when there is no enough memory */
C
Chao Yu 已提交
507
	if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
J
Jaegeuk Kim 已提交
508
		f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
C
Chao Yu 已提交
509

J
Jaegeuk Kim 已提交
510
	/* check the # of cached NAT entries */
C
Chao Yu 已提交
511 512
	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
		f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
J
Jaegeuk Kim 已提交
513

C
Chao Yu 已提交
514 515
	if (!f2fs_available_free_memory(sbi, FREE_NIDS))
		f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
516
	else
C
Chao Yu 已提交
517
		f2fs_build_free_nids(sbi, false, false);
C
Chao Yu 已提交
518

519
	if (!is_idle(sbi, REQ_TIME) &&
520
		(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
J
Jaegeuk Kim 已提交
521
		return;
C
Chao Yu 已提交
522

J
Jaegeuk Kim 已提交
523
	/* checkpoint is the only way to shrink partial cached entries */
C
Chao Yu 已提交
524 525
	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
			!f2fs_available_free_memory(sbi, INO_ENTRIES) ||
526 527
			excess_prefree_segs(sbi) ||
			excess_dirty_nats(sbi) ||
528
			excess_dirty_nodes(sbi) ||
J
Jaegeuk Kim 已提交
529
			f2fs_time_over(sbi, CP_TIME)) {
C
Chao Yu 已提交
530 531 532 533
		if (test_opt(sbi, DATA_FLUSH)) {
			struct blk_plug plug;

			blk_start_plug(&plug);
C
Chao Yu 已提交
534
			f2fs_sync_dirty_inodes(sbi, FILE_INODE);
C
Chao Yu 已提交
535 536
			blk_finish_plug(&plug);
		}
537
		f2fs_sync_fs(sbi->sb, true);
538
		stat_inc_bg_cp_count(sbi->stat_info);
C
Chao Yu 已提交
539
	}
540 541
}

542 543
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
				struct block_device *bdev)
J
Jaegeuk Kim 已提交
544
{
545
	struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
J
Jaegeuk Kim 已提交
546 547
	int ret;

548
	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
549
	bio_set_dev(bio, bdev);
J
Jaegeuk Kim 已提交
550 551
	ret = submit_bio_wait(bio);
	bio_put(bio);
552 553 554

	trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
				test_opt(sbi, FLUSH_MERGE), ret);
J
Jaegeuk Kim 已提交
555 556 557
	return ret;
}

C
Chao Yu 已提交
558
static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
J
Jaegeuk Kim 已提交
559
{
C
Chao Yu 已提交
560
	int ret = 0;
J
Jaegeuk Kim 已提交
561 562
	int i;

C
Chao Yu 已提交
563 564
	if (!sbi->s_ndevs)
		return __submit_flush_wait(sbi, sbi->sb->s_bdev);
565

C
Chao Yu 已提交
566
	for (i = 0; i < sbi->s_ndevs; i++) {
C
Chao Yu 已提交
567
		if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
C
Chao Yu 已提交
568
			continue;
569 570 571
		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
		if (ret)
			break;
J
Jaegeuk Kim 已提交
572 573 574 575
	}
	return ret;
}

576
static int issue_flush_thread(void *data)
577 578
{
	struct f2fs_sb_info *sbi = data;
579
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
580
	wait_queue_head_t *q = &fcc->flush_wait_queue;
581 582 583 584
repeat:
	if (kthread_should_stop())
		return 0;

585 586
	sb_start_intwrite(sbi->sb);

587
	if (!llist_empty(&fcc->issue_list)) {
588 589 590
		struct flush_cmd *cmd, *next;
		int ret;

591 592 593
		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);

C
Chao Yu 已提交
594 595 596
		cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);

		ret = submit_flush_wait(sbi, cmd->ino);
C
Chao Yu 已提交
597 598
		atomic_inc(&fcc->issued_flush);

599 600
		llist_for_each_entry_safe(cmd, next,
					  fcc->dispatch_list, llnode) {
601 602 603
			cmd->ret = ret;
			complete(&cmd->wait);
		}
604
		fcc->dispatch_list = NULL;
605 606
	}

607 608
	sb_end_intwrite(sbi->sb);

609
	wait_event_interruptible(*q,
610
		kthread_should_stop() || !llist_empty(&fcc->issue_list));
611 612 613
	goto repeat;
}

C
Chao Yu 已提交
614
int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
615
{
616
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
617
	struct flush_cmd cmd;
C
Chao Yu 已提交
618
	int ret;
619

J
Jaegeuk Kim 已提交
620 621 622
	if (test_opt(sbi, NOBARRIER))
		return 0;

C
Chao Yu 已提交
623
	if (!test_opt(sbi, FLUSH_MERGE)) {
624
		atomic_inc(&fcc->queued_flush);
C
Chao Yu 已提交
625
		ret = submit_flush_wait(sbi, ino);
626
		atomic_dec(&fcc->queued_flush);
C
Chao Yu 已提交
627 628 629
		atomic_inc(&fcc->issued_flush);
		return ret;
	}
J
Jaegeuk Kim 已提交
630

631
	if (atomic_inc_return(&fcc->queued_flush) == 1 || sbi->s_ndevs > 1) {
C
Chao Yu 已提交
632
		ret = submit_flush_wait(sbi, ino);
633
		atomic_dec(&fcc->queued_flush);
C
Chao Yu 已提交
634 635

		atomic_inc(&fcc->issued_flush);
J
Jaegeuk Kim 已提交
636 637
		return ret;
	}
638

C
Chao Yu 已提交
639
	cmd.ino = ino;
640
	init_completion(&cmd.wait);
641

642
	llist_add(&cmd.llnode, &fcc->issue_list);
643

644 645 646 647
	/* update issue_list before we wake up issue_flush thread */
	smp_mb();

	if (waitqueue_active(&fcc->flush_wait_queue))
648
		wake_up(&fcc->flush_wait_queue);
649

650 651
	if (fcc->f2fs_issue_flush) {
		wait_for_completion(&cmd.wait);
652
		atomic_dec(&fcc->queued_flush);
653
	} else {
654 655 656 657 658
		struct llist_node *list;

		list = llist_del_all(&fcc->issue_list);
		if (!list) {
			wait_for_completion(&cmd.wait);
659
			atomic_dec(&fcc->queued_flush);
660 661 662
		} else {
			struct flush_cmd *tmp, *next;

C
Chao Yu 已提交
663
			ret = submit_flush_wait(sbi, ino);
664 665 666 667

			llist_for_each_entry_safe(tmp, next, list, llnode) {
				if (tmp == &cmd) {
					cmd.ret = ret;
668
					atomic_dec(&fcc->queued_flush);
669 670 671 672 673 674
					continue;
				}
				tmp->ret = ret;
				complete(&tmp->wait);
			}
		}
675
	}
676 677

	return cmd.ret;
678 679
}

C
Chao Yu 已提交
680
int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
681 682 683 684 685
{
	dev_t dev = sbi->sb->s_bdev->bd_dev;
	struct flush_cmd_control *fcc;
	int err = 0;

686 687
	if (SM_I(sbi)->fcc_info) {
		fcc = SM_I(sbi)->fcc_info;
688 689
		if (fcc->f2fs_issue_flush)
			return err;
690 691 692
		goto init_thread;
	}

C
Chao Yu 已提交
693
	fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
694 695
	if (!fcc)
		return -ENOMEM;
C
Chao Yu 已提交
696
	atomic_set(&fcc->issued_flush, 0);
697
	atomic_set(&fcc->queued_flush, 0);
698
	init_waitqueue_head(&fcc->flush_wait_queue);
699
	init_llist_head(&fcc->issue_list);
700
	SM_I(sbi)->fcc_info = fcc;
701 702 703
	if (!test_opt(sbi, FLUSH_MERGE))
		return err;

704
init_thread:
705 706 707 708
	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
	if (IS_ERR(fcc->f2fs_issue_flush)) {
		err = PTR_ERR(fcc->f2fs_issue_flush);
709
		kvfree(fcc);
710
		SM_I(sbi)->fcc_info = NULL;
711 712 713 714 715 716
		return err;
	}

	return err;
}

C
Chao Yu 已提交
717
void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
718
{
719
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
720

721 722 723 724 725 726 727
	if (fcc && fcc->f2fs_issue_flush) {
		struct task_struct *flush_thread = fcc->f2fs_issue_flush;

		fcc->f2fs_issue_flush = NULL;
		kthread_stop(flush_thread);
	}
	if (free) {
728
		kvfree(fcc);
729
		SM_I(sbi)->fcc_info = NULL;
730
	}
731 732
}

733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754
int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
{
	int ret = 0, i;

	if (!sbi->s_ndevs)
		return 0;

	for (i = 1; i < sbi->s_ndevs; i++) {
		if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
			continue;
		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
		if (ret)
			break;

		spin_lock(&sbi->dev_lock);
		f2fs_clear_bit(i, (char *)&sbi->dirty_device);
		spin_unlock(&sbi->dev_lock);
	}

	return ret;
}

J
Jaegeuk Kim 已提交
755 756 757 758 759 760 761 762 763 764 765 766 767 768
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	/* need not be added */
	if (IS_CURSEG(sbi, segno))
		return;

	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
		dirty_i->nr_dirty[dirty_type]++;

	if (dirty_type == DIRTY) {
		struct seg_entry *sentry = get_seg_entry(sbi, segno);
769
		enum dirty_type t = sentry->type;
770

771 772 773 774
		if (unlikely(t >= DIRTY)) {
			f2fs_bug_on(sbi, 1);
			return;
		}
775 776
		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
			dirty_i->nr_dirty[t]++;
J
Jaegeuk Kim 已提交
777 778 779 780 781 782 783 784 785 786 787 788
	}
}

static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
		dirty_i->nr_dirty[dirty_type]--;

	if (dirty_type == DIRTY) {
789 790 791 792 793
		struct seg_entry *sentry = get_seg_entry(sbi, segno);
		enum dirty_type t = sentry->type;

		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
			dirty_i->nr_dirty[t]--;
794

795
		if (get_valid_blocks(sbi, segno, true) == 0)
796
			clear_bit(GET_SEC_FROM_SEG(sbi, segno),
797
						dirty_i->victim_secmap);
J
Jaegeuk Kim 已提交
798 799 800
	}
}

J
Jaegeuk Kim 已提交
801
/*
J
Jaegeuk Kim 已提交
802 803 804 805
 * Should not occur error such as -ENOMEM.
 * Adding dirty entry into seglist is not critical operation.
 * If a given segment is one of current working segments, it won't be added.
 */
806
static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
807 808
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
D
Daniel Rosenberg 已提交
809
	unsigned short valid_blocks, ckpt_valid_blocks;
J
Jaegeuk Kim 已提交
810 811 812 813 814 815

	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
		return;

	mutex_lock(&dirty_i->seglist_lock);

816
	valid_blocks = get_valid_blocks(sbi, segno, false);
D
Daniel Rosenberg 已提交
817
	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
J
Jaegeuk Kim 已提交
818

D
Daniel Rosenberg 已提交
819 820
	if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
				ckpt_valid_blocks == sbi->blocks_per_seg)) {
J
Jaegeuk Kim 已提交
821 822 823 824 825 826 827 828 829 830 831 832
		__locate_dirty_segment(sbi, segno, PRE);
		__remove_dirty_segment(sbi, segno, DIRTY);
	} else if (valid_blocks < sbi->blocks_per_seg) {
		__locate_dirty_segment(sbi, segno, DIRTY);
	} else {
		/* Recovery routine with SSR needs this */
		__remove_dirty_segment(sbi, segno, DIRTY);
	}

	mutex_unlock(&dirty_i->seglist_lock);
}

D
Daniel Rosenberg 已提交
833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	unsigned int segno;

	mutex_lock(&dirty_i->seglist_lock);
	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
		if (get_valid_blocks(sbi, segno, false))
			continue;
		if (IS_CURSEG(sbi, segno))
			continue;
		__locate_dirty_segment(sbi, segno, PRE);
		__remove_dirty_segment(sbi, segno, DIRTY);
	}
	mutex_unlock(&dirty_i->seglist_lock);
}

int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
	block_t holes[2] = {0, 0};	/* DATA and NODE */
	struct seg_entry *se;
	unsigned int segno;

	mutex_lock(&dirty_i->seglist_lock);
	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
		se = get_seg_entry(sbi, segno);
		if (IS_NODESEG(se->type))
			holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
		else
			holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
	}
	mutex_unlock(&dirty_i->seglist_lock);

	if (holes[DATA] > ovp || holes[NODE] > ovp)
		return -EAGAIN;
	return 0;
}

/* This is only used by SBI_CP_DISABLED */
static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	unsigned int segno = 0;

	mutex_lock(&dirty_i->seglist_lock);
	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
		if (get_valid_blocks(sbi, segno, false))
			continue;
		if (get_ckpt_valid_blocks(sbi, segno))
			continue;
		mutex_unlock(&dirty_i->seglist_lock);
		return segno;
	}
	mutex_unlock(&dirty_i->seglist_lock);
	return NULL_SEGNO;
}

893
static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
894 895
		struct block_device *bdev, block_t lstart,
		block_t start, block_t len)
C
Chao Yu 已提交
896
{
897
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
898
	struct list_head *pend_list;
899
	struct discard_cmd *dc;
C
Chao Yu 已提交
900

C
Chao Yu 已提交
901 902 903 904
	f2fs_bug_on(sbi, !len);

	pend_list = &dcc->pend_list[plist_idx(len)];

905 906
	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
	INIT_LIST_HEAD(&dc->list);
907
	dc->bdev = bdev;
908
	dc->lstart = lstart;
909
	dc->start = start;
910
	dc->len = len;
911
	dc->ref = 0;
912
	dc->state = D_PREP;
913
	dc->queued = 0;
914
	dc->error = 0;
915
	init_completion(&dc->wait);
C
Chao Yu 已提交
916
	list_add_tail(&dc->list, pend_list);
917 918
	spin_lock_init(&dc->lock);
	dc->bio_ref = 0;
C
Chao Yu 已提交
919
	atomic_inc(&dcc->discard_cmd_cnt);
C
Chao Yu 已提交
920
	dcc->undiscard_blks += len;
921 922 923 924 925 926 927

	return dc;
}

static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len,
C
Chao Yu 已提交
928 929
				struct rb_node *parent, struct rb_node **p,
				bool leftmost)
930 931 932 933 934 935 936
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *dc;

	dc = __create_discard_cmd(sbi, bdev, lstart, start, len);

	rb_link_node(&dc->rb_node, parent, p);
C
Chao Yu 已提交
937
	rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
938 939

	return dc;
940 941
}

942 943
static void __detach_discard_cmd(struct discard_cmd_control *dcc,
							struct discard_cmd *dc)
944
{
945
	if (dc->state == D_DONE)
946
		atomic_sub(dc->queued, &dcc->queued_discard);
947 948

	list_del(&dc->list);
C
Chao Yu 已提交
949
	rb_erase_cached(&dc->rb_node, &dcc->root);
C
Chao Yu 已提交
950
	dcc->undiscard_blks -= dc->len;
951 952 953 954 955 956 957 958 959 960

	kmem_cache_free(discard_cmd_slab, dc);

	atomic_dec(&dcc->discard_cmd_cnt);
}

static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
							struct discard_cmd *dc)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
961
	unsigned long flags;
962

C
Chao Yu 已提交
963 964
	trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);

965 966 967 968 969 970 971
	spin_lock_irqsave(&dc->lock, flags);
	if (dc->bio_ref) {
		spin_unlock_irqrestore(&dc->lock, flags);
		return;
	}
	spin_unlock_irqrestore(&dc->lock, flags);

972 973
	f2fs_bug_on(sbi, dc->ref);

974 975
	if (dc->error == -EOPNOTSUPP)
		dc->error = 0;
976

977
	if (dc->error)
978 979 980
		printk_ratelimited(
			"%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
			KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
981
	__detach_discard_cmd(dcc, dc);
C
Chao Yu 已提交
982 983
}

984 985 986
static void f2fs_submit_discard_endio(struct bio *bio)
{
	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
987
	unsigned long flags;
988

989
	dc->error = blk_status_to_errno(bio->bi_status);
990 991 992 993 994 995 996 997

	spin_lock_irqsave(&dc->lock, flags);
	dc->bio_ref--;
	if (!dc->bio_ref && dc->state == D_SUBMIT) {
		dc->state = D_DONE;
		complete_all(&dc->wait);
	}
	spin_unlock_irqrestore(&dc->lock, flags);
998 999 1000
	bio_put(bio);
}

W
Wei Yongjun 已提交
1001
static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015
				block_t start, block_t end)
{
#ifdef CONFIG_F2FS_CHECK_FS
	struct seg_entry *sentry;
	unsigned int segno;
	block_t blk = start;
	unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
	unsigned long *map;

	while (blk < end) {
		segno = GET_SEGNO(sbi, blk);
		sentry = get_seg_entry(sbi, segno);
		offset = GET_BLKOFF_FROM_SEG0(sbi, blk);

1016 1017 1018 1019
		if (end < START_BLOCK(sbi, segno + 1))
			size = GET_BLKOFF_FROM_SEG0(sbi, end);
		else
			size = max_blocks;
C
Chao Yu 已提交
1020 1021 1022
		map = (unsigned long *)(sentry->cur_valid_map);
		offset = __find_rev_next_bit(map, size, offset);
		f2fs_bug_on(sbi, offset != size);
1023
		blk = START_BLOCK(sbi, segno + 1);
C
Chao Yu 已提交
1024 1025 1026 1027
	}
#endif
}

1028 1029 1030 1031 1032 1033 1034
static void __init_discard_policy(struct f2fs_sb_info *sbi,
				struct discard_policy *dpolicy,
				int discard_type, unsigned int granularity)
{
	/* common policy */
	dpolicy->type = discard_type;
	dpolicy->sync = true;
C
Chao Yu 已提交
1035
	dpolicy->ordered = false;
1036 1037 1038 1039 1040 1041 1042
	dpolicy->granularity = granularity;

	dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
	dpolicy->io_aware_gran = MAX_PLIST_NUM;

	if (discard_type == DPOLICY_BG) {
		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1043
		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1044 1045
		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
		dpolicy->io_aware = true;
1046
		dpolicy->sync = false;
C
Chao Yu 已提交
1047
		dpolicy->ordered = true;
1048 1049 1050 1051 1052 1053
		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
			dpolicy->granularity = 1;
			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
		}
	} else if (discard_type == DPOLICY_FORCE) {
		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1054
		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1055 1056 1057 1058 1059
		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
		dpolicy->io_aware = false;
	} else if (discard_type == DPOLICY_FSTRIM) {
		dpolicy->io_aware = false;
	} else if (discard_type == DPOLICY_UMOUNT) {
1060
		dpolicy->max_requests = UINT_MAX;
1061 1062 1063 1064
		dpolicy->io_aware = false;
	}
}

1065 1066 1067
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len);
1068
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1069
static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1070
						struct discard_policy *dpolicy,
1071 1072
						struct discard_cmd *dc,
						unsigned int *issued)
1073
{
1074 1075 1076 1077
	struct block_device *bdev = dc->bdev;
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_discard_blocks =
			SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1078
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1079 1080 1081
	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
					&(dcc->fstrim_list) : &(dcc->wait_list);
	int flag = dpolicy->sync ? REQ_SYNC : 0;
1082 1083
	block_t lstart, start, len, total_len;
	int err = 0;
1084 1085

	if (dc->state != D_PREP)
1086
		return 0;
1087

1088
	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1089
		return 0;
1090

1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115
	trace_f2fs_issue_discard(bdev, dc->start, dc->len);

	lstart = dc->lstart;
	start = dc->start;
	len = dc->len;
	total_len = len;

	dc->len = 0;

	while (total_len && *issued < dpolicy->max_requests && !err) {
		struct bio *bio = NULL;
		unsigned long flags;
		bool last = true;

		if (len > max_discard_blocks) {
			len = max_discard_blocks;
			last = false;
		}

		(*issued)++;
		if (*issued == dpolicy->max_requests)
			last = true;

		dc->len += len;

1116 1117 1118 1119 1120
		if (time_to_inject(sbi, FAULT_DISCARD)) {
			f2fs_show_injection_info(FAULT_DISCARD);
			err = -EIO;
			goto submit;
		}
1121 1122 1123 1124
		err = __blkdev_issue_discard(bdev,
					SECTOR_FROM_BLOCK(start),
					SECTOR_FROM_BLOCK(len),
					GFP_NOFS, 0, &bio);
1125
submit:
1126
		if (err) {
1127
			spin_lock_irqsave(&dc->lock, flags);
1128
			if (dc->state == D_PARTIAL)
1129 1130 1131
				dc->state = D_SUBMIT;
			spin_unlock_irqrestore(&dc->lock, flags);

1132 1133
			break;
		}
1134

1135
		f2fs_bug_on(sbi, !bio);
1136

1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147
		/*
		 * should keep before submission to avoid D_DONE
		 * right away
		 */
		spin_lock_irqsave(&dc->lock, flags);
		if (last)
			dc->state = D_SUBMIT;
		else
			dc->state = D_PARTIAL;
		dc->bio_ref++;
		spin_unlock_irqrestore(&dc->lock, flags);
1148

1149 1150
		atomic_inc(&dcc->queued_discard);
		dc->queued++;
1151
		list_move_tail(&dc->list, wait_list);
C
Chao Yu 已提交
1152

1153 1154
		/* sanity check on discard range */
		__check_sit_bitmap(sbi, start, start + len);
1155

1156 1157 1158 1159 1160 1161 1162 1163
		bio->bi_private = dc;
		bio->bi_end_io = f2fs_submit_discard_endio;
		bio->bi_opf |= flag;
		submit_bio(bio);

		atomic_inc(&dcc->issued_discard);

		f2fs_update_iostat(sbi, FS_DISCARD, 1);
1164 1165 1166 1167 1168

		lstart += len;
		start += len;
		total_len -= len;
		len = total_len;
1169
	}
1170

1171
	if (!err && len)
1172
		__update_discard_tree_range(sbi, bdev, lstart, start, len);
1173
	return err;
1174 1175
}

1176 1177 1178 1179 1180
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len,
				struct rb_node **insert_p,
				struct rb_node *insert_parent)
1181
{
1182
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1183
	struct rb_node **p;
1184 1185
	struct rb_node *parent = NULL;
	struct discard_cmd *dc = NULL;
C
Chao Yu 已提交
1186
	bool leftmost = true;
1187 1188 1189 1190 1191 1192

	if (insert_p && insert_parent) {
		parent = insert_parent;
		p = insert_p;
		goto do_insert;
	}
1193

C
Chao Yu 已提交
1194 1195
	p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
							lstart, &leftmost);
1196
do_insert:
C
Chao Yu 已提交
1197 1198
	dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
								p, leftmost);
1199 1200
	if (!dc)
		return NULL;
1201

1202
	return dc;
1203 1204
}

C
Chao Yu 已提交
1205 1206 1207 1208 1209 1210
static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
						struct discard_cmd *dc)
{
	list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
}

1211 1212 1213
static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
				struct discard_cmd *dc, block_t blkaddr)
{
C
Chao Yu 已提交
1214
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1215 1216
	struct discard_info di = dc->di;
	bool modified = false;
1217

1218
	if (dc->state == D_DONE || dc->len == 1) {
1219 1220 1221 1222
		__remove_discard_cmd(sbi, dc);
		return;
	}

C
Chao Yu 已提交
1223 1224
	dcc->undiscard_blks -= di.len;

1225
	if (blkaddr > di.lstart) {
1226
		dc->len = blkaddr - dc->lstart;
C
Chao Yu 已提交
1227
		dcc->undiscard_blks += dc->len;
C
Chao Yu 已提交
1228
		__relocate_discard_cmd(dcc, dc);
1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241
		modified = true;
	}

	if (blkaddr < di.lstart + di.len - 1) {
		if (modified) {
			__insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
					di.start + blkaddr + 1 - di.lstart,
					di.lstart + di.len - 1 - blkaddr,
					NULL, NULL);
		} else {
			dc->lstart++;
			dc->len--;
			dc->start++;
C
Chao Yu 已提交
1242
			dcc->undiscard_blks += dc->len;
C
Chao Yu 已提交
1243
			__relocate_discard_cmd(dcc, dc);
1244
		}
1245 1246 1247
	}
}

1248 1249 1250
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len)
C
Chao Yu 已提交
1251
{
1252
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1253 1254 1255 1256
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct discard_cmd *dc;
	struct discard_info di = {0};
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
1257 1258 1259
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_discard_blocks =
			SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1260
	block_t end = lstart + len;
C
Chao Yu 已提交
1261

C
Chao Yu 已提交
1262
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1263 1264 1265
					NULL, lstart,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
C
Chao Yu 已提交
1266
					&insert_p, &insert_parent, true, NULL);
1267 1268 1269 1270 1271 1272 1273 1274
	if (dc)
		prev_dc = dc;

	if (!prev_dc) {
		di.lstart = lstart;
		di.len = next_dc ? next_dc->lstart - lstart : len;
		di.len = min(di.len, len);
		di.start = start;
C
Chao Yu 已提交
1275
	}
1276

1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300
	while (1) {
		struct rb_node *node;
		bool merged = false;
		struct discard_cmd *tdc = NULL;

		if (prev_dc) {
			di.lstart = prev_dc->lstart + prev_dc->len;
			if (di.lstart < lstart)
				di.lstart = lstart;
			if (di.lstart >= end)
				break;

			if (!next_dc || next_dc->lstart > end)
				di.len = end - di.lstart;
			else
				di.len = next_dc->lstart - di.lstart;
			di.start = start + di.lstart - lstart;
		}

		if (!di.len)
			goto next;

		if (prev_dc && prev_dc->state == D_PREP &&
			prev_dc->bdev == bdev &&
1301 1302
			__is_discard_back_mergeable(&di, &prev_dc->di,
							max_discard_blocks)) {
1303
			prev_dc->di.len += di.len;
C
Chao Yu 已提交
1304
			dcc->undiscard_blks += di.len;
C
Chao Yu 已提交
1305
			__relocate_discard_cmd(dcc, prev_dc);
1306 1307 1308 1309 1310 1311 1312
			di = prev_dc->di;
			tdc = prev_dc;
			merged = true;
		}

		if (next_dc && next_dc->state == D_PREP &&
			next_dc->bdev == bdev &&
1313 1314
			__is_discard_front_mergeable(&di, &next_dc->di,
							max_discard_blocks)) {
1315 1316 1317
			next_dc->di.lstart = di.lstart;
			next_dc->di.len += di.len;
			next_dc->di.start = di.start;
C
Chao Yu 已提交
1318
			dcc->undiscard_blks += di.len;
C
Chao Yu 已提交
1319
			__relocate_discard_cmd(dcc, next_dc);
1320 1321 1322
			if (tdc)
				__remove_discard_cmd(sbi, tdc);
			merged = true;
1323
		}
1324

1325
		if (!merged) {
1326 1327
			__insert_discard_tree(sbi, bdev, di.lstart, di.start,
							di.len, NULL, NULL);
1328
		}
1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343
 next:
		prev_dc = next_dc;
		if (!prev_dc)
			break;

		node = rb_next(&prev_dc->rb_node);
		next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
	}
}

static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
{
	block_t lblkstart = blkstart;

C
Chao Yu 已提交
1344
	trace_f2fs_queue_discard(bdev, blkstart, blklen);
1345 1346 1347 1348 1349 1350

	if (sbi->s_ndevs) {
		int devi = f2fs_target_device_index(sbi, blkstart);

		blkstart -= FDEV(devi).start_blk;
	}
1351
	mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1352
	__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1353
	mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1354 1355 1356
	return 0;
}

C
Chao Yu 已提交
1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373
static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
					struct discard_policy *dpolicy)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
	struct discard_cmd *dc;
	struct blk_plug plug;
	unsigned int pos = dcc->next_pos;
	unsigned int issued = 0;
	bool io_interrupted = false;

	mutex_lock(&dcc->cmd_lock);
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
					NULL, pos,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
C
Chao Yu 已提交
1374
					&insert_p, &insert_parent, true, NULL);
C
Chao Yu 已提交
1375 1376 1377 1378 1379 1380 1381
	if (!dc)
		dc = next_dc;

	blk_start_plug(&plug);

	while (dc) {
		struct rb_node *node;
1382
		int err = 0;
C
Chao Yu 已提交
1383 1384 1385 1386

		if (dc->state != D_PREP)
			goto next;

1387
		if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
C
Chao Yu 已提交
1388 1389 1390 1391 1392
			io_interrupted = true;
			break;
		}

		dcc->next_pos = dc->lstart + dc->len;
1393
		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
C
Chao Yu 已提交
1394

1395
		if (issued >= dpolicy->max_requests)
C
Chao Yu 已提交
1396 1397 1398
			break;
next:
		node = rb_next(&dc->rb_node);
1399 1400
		if (err)
			__remove_discard_cmd(sbi, dc);
C
Chao Yu 已提交
1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413 1414 1415 1416
		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
	}

	blk_finish_plug(&plug);

	if (!dc)
		dcc->next_pos = 0;

	mutex_unlock(&dcc->cmd_lock);

	if (!issued && io_interrupted)
		issued = -1;

	return issued;
}

C
Chao Yu 已提交
1417 1418
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
					struct discard_policy *dpolicy)
C
Chao Yu 已提交
1419 1420 1421 1422 1423
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *pend_list;
	struct discard_cmd *dc, *tmp;
	struct blk_plug plug;
1424
	int i, issued = 0;
1425
	bool io_interrupted = false;
C
Chao Yu 已提交
1426

C
Chao Yu 已提交
1427 1428 1429
	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
		if (i + 1 < dpolicy->granularity)
			break;
C
Chao Yu 已提交
1430 1431 1432 1433

		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
			return __issue_discard_cmd_orderly(sbi, dpolicy);

C
Chao Yu 已提交
1434
		pend_list = &dcc->pend_list[i];
1435 1436

		mutex_lock(&dcc->cmd_lock);
1437 1438
		if (list_empty(pend_list))
			goto next;
1439 1440 1441
		if (unlikely(dcc->rbtree_check))
			f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
								&dcc->root));
1442
		blk_start_plug(&plug);
C
Chao Yu 已提交
1443 1444 1445
		list_for_each_entry_safe(dc, tmp, pend_list, list) {
			f2fs_bug_on(sbi, dc->state != D_PREP);

C
Chao Yu 已提交
1446
			if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1447
						!is_idle(sbi, DISCARD_TIME)) {
1448
				io_interrupted = true;
1449
				break;
1450
			}
1451

1452
			__submit_discard_cmd(sbi, dpolicy, dc, &issued);
1453

1454
			if (issued >= dpolicy->max_requests)
1455
				break;
C
Chao Yu 已提交
1456
		}
1457
		blk_finish_plug(&plug);
1458
next:
1459 1460
		mutex_unlock(&dcc->cmd_lock);

1461
		if (issued >= dpolicy->max_requests || io_interrupted)
1462
			break;
C
Chao Yu 已提交
1463
	}
1464

1465 1466 1467
	if (!issued && io_interrupted)
		issued = -1;

1468 1469 1470
	return issued;
}

1471
static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1472 1473 1474 1475 1476
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *pend_list;
	struct discard_cmd *dc, *tmp;
	int i;
1477
	bool dropped = false;
1478 1479 1480 1481 1482 1483 1484

	mutex_lock(&dcc->cmd_lock);
	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
		pend_list = &dcc->pend_list[i];
		list_for_each_entry_safe(dc, tmp, pend_list, list) {
			f2fs_bug_on(sbi, dc->state != D_PREP);
			__remove_discard_cmd(sbi, dc);
1485
			dropped = true;
1486 1487 1488
		}
	}
	mutex_unlock(&dcc->cmd_lock);
1489 1490

	return dropped;
C
Chao Yu 已提交
1491 1492
}

C
Chao Yu 已提交
1493
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1494 1495 1496 1497
{
	__drop_discard_cmd(sbi);
}

1498
static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1499 1500 1501
							struct discard_cmd *dc)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1502
	unsigned int len = 0;
C
Chao Yu 已提交
1503 1504 1505 1506 1507

	wait_for_completion_io(&dc->wait);
	mutex_lock(&dcc->cmd_lock);
	f2fs_bug_on(sbi, dc->state != D_DONE);
	dc->ref--;
1508 1509 1510
	if (!dc->ref) {
		if (!dc->error)
			len = dc->len;
C
Chao Yu 已提交
1511
		__remove_discard_cmd(sbi, dc);
1512
	}
C
Chao Yu 已提交
1513
	mutex_unlock(&dcc->cmd_lock);
1514 1515

	return len;
C
Chao Yu 已提交
1516 1517
}

1518
static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1519 1520
						struct discard_policy *dpolicy,
						block_t start, block_t end)
C
Chao Yu 已提交
1521 1522
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1523 1524
	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
					&(dcc->fstrim_list) : &(dcc->wait_list);
C
Chao Yu 已提交
1525
	struct discard_cmd *dc, *tmp;
1526
	bool need_wait;
1527
	unsigned int trimmed = 0;
1528 1529 1530

next:
	need_wait = false;
C
Chao Yu 已提交
1531 1532 1533

	mutex_lock(&dcc->cmd_lock);
	list_for_each_entry_safe(dc, tmp, wait_list, list) {
1534 1535
		if (dc->lstart + dc->len <= start || end <= dc->lstart)
			continue;
C
Chao Yu 已提交
1536
		if (dc->len < dpolicy->granularity)
1537
			continue;
C
Chao Yu 已提交
1538
		if (dc->state == D_DONE && !dc->ref) {
C
Chao Yu 已提交
1539
			wait_for_completion_io(&dc->wait);
1540 1541
			if (!dc->error)
				trimmed += dc->len;
C
Chao Yu 已提交
1542
			__remove_discard_cmd(sbi, dc);
1543 1544 1545 1546
		} else {
			dc->ref++;
			need_wait = true;
			break;
C
Chao Yu 已提交
1547 1548 1549
		}
	}
	mutex_unlock(&dcc->cmd_lock);
1550 1551

	if (need_wait) {
1552
		trimmed += __wait_one_discard_bio(sbi, dc);
1553 1554
		goto next;
	}
1555 1556

	return trimmed;
C
Chao Yu 已提交
1557 1558
}

1559
static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1560
						struct discard_policy *dpolicy)
1561
{
1562
	struct discard_policy dp;
1563
	unsigned int discard_blks;
1564

1565 1566
	if (dpolicy)
		return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1567 1568

	/* wait all */
1569
	__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1570
	discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1571
	__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1572 1573 1574
	discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);

	return discard_blks;
1575 1576
}

1577
/* This should be covered by global mutex, &sit_i->sentry_lock */
W
Wei Yongjun 已提交
1578
static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1579 1580 1581
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *dc;
1582
	bool need_wait = false;
1583 1584

	mutex_lock(&dcc->cmd_lock);
C
Chao Yu 已提交
1585 1586
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
							NULL, blkaddr);
1587
	if (dc) {
1588 1589 1590 1591 1592 1593
		if (dc->state == D_PREP) {
			__punch_discard_cmd(sbi, dc, blkaddr);
		} else {
			dc->ref++;
			need_wait = true;
		}
C
Chao Yu 已提交
1594
	}
C
Chao Yu 已提交
1595
	mutex_unlock(&dcc->cmd_lock);
1596

C
Chao Yu 已提交
1597 1598
	if (need_wait)
		__wait_one_discard_bio(sbi, dc);
C
Chao Yu 已提交
1599 1600
}

C
Chao Yu 已提交
1601
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1602 1603 1604 1605 1606 1607 1608 1609
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;

	if (dcc && dcc->f2fs_issue_discard) {
		struct task_struct *discard_thread = dcc->f2fs_issue_discard;

		dcc->f2fs_issue_discard = NULL;
		kthread_stop(discard_thread);
1610
	}
C
Chao Yu 已提交
1611 1612
}

1613
/* This comes from f2fs_put_super */
1614
bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
1615 1616
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1617
	struct discard_policy dpolicy;
1618
	bool dropped;
1619

1620 1621
	__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
					dcc->discard_granularity);
C
Chao Yu 已提交
1622
	__issue_discard_cmd(sbi, &dpolicy);
1623 1624
	dropped = __drop_discard_cmd(sbi);

1625 1626
	/* just to make sure there is no pending discard commands */
	__wait_all_discard_cmd(sbi, NULL);
1627 1628

	f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1629
	return dropped;
1630 1631
}

1632 1633 1634 1635 1636
static int issue_discard_thread(void *data)
{
	struct f2fs_sb_info *sbi = data;
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	wait_queue_head_t *q = &dcc->discard_wait_queue;
C
Chao Yu 已提交
1637
	struct discard_policy dpolicy;
1638 1639
	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
	int issued;
1640

1641
	set_freezable();
1642

1643
	do {
1644
		__init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
C
Chao Yu 已提交
1645 1646
					dcc->discard_granularity);

1647 1648 1649 1650
		wait_event_interruptible_timeout(*q,
				kthread_should_stop() || freezing(current) ||
				dcc->discard_wake,
				msecs_to_jiffies(wait_ms));
S
Sheng Yong 已提交
1651 1652 1653 1654

		if (dcc->discard_wake)
			dcc->discard_wake = 0;

1655 1656 1657 1658
		/* clean up pending candidates before going to sleep */
		if (atomic_read(&dcc->queued_discard))
			__wait_all_discard_cmd(sbi, NULL);

1659 1660
		if (try_to_freeze())
			continue;
1661 1662
		if (f2fs_readonly(sbi->sb))
			continue;
1663 1664
		if (kthread_should_stop())
			return 0;
1665 1666 1667 1668
		if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
			wait_ms = dpolicy.max_interval;
			continue;
		}
1669

1670
		if (sbi->gc_mode == GC_URGENT)
1671
			__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1672

1673 1674
		sb_start_intwrite(sbi->sb);

C
Chao Yu 已提交
1675
		issued = __issue_discard_cmd(sbi, &dpolicy);
1676
		if (issued > 0) {
C
Chao Yu 已提交
1677 1678
			__wait_all_discard_cmd(sbi, &dpolicy);
			wait_ms = dpolicy.min_interval;
1679
		} else if (issued == -1){
1680 1681
			wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
			if (!wait_ms)
1682
				wait_ms = dpolicy.mid_interval;
1683
		} else {
C
Chao Yu 已提交
1684
			wait_ms = dpolicy.max_interval;
1685
		}
1686

1687
		sb_end_intwrite(sbi->sb);
1688 1689 1690

	} while (!kthread_should_stop());
	return 0;
1691 1692
}

1693
#ifdef CONFIG_BLK_DEV_ZONED
J
Jaegeuk Kim 已提交
1694 1695
static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
1696
{
1697
	sector_t sector, nr_sects;
1698
	block_t lblkstart = blkstart;
J
Jaegeuk Kim 已提交
1699 1700 1701 1702 1703 1704
	int devi = 0;

	if (sbi->s_ndevs) {
		devi = f2fs_target_device_index(sbi, blkstart);
		blkstart -= FDEV(devi).start_blk;
	}
1705 1706 1707 1708 1709 1710

	/*
	 * We need to know the type of the zone: for conventional zones,
	 * use regular discard if the drive supports it. For sequential
	 * zones, reset the zone write pointer.
	 */
J
Jaegeuk Kim 已提交
1711
	switch (get_blkz_type(sbi, bdev, blkstart)) {
1712 1713 1714 1715

	case BLK_ZONE_TYPE_CONVENTIONAL:
		if (!blk_queue_discard(bdev_get_queue(bdev)))
			return 0;
1716
		return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1717 1718
	case BLK_ZONE_TYPE_SEQWRITE_REQ:
	case BLK_ZONE_TYPE_SEQWRITE_PREF:
1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729
		sector = SECTOR_FROM_BLOCK(blkstart);
		nr_sects = SECTOR_FROM_BLOCK(blklen);

		if (sector & (bdev_zone_sectors(bdev) - 1) ||
				nr_sects != bdev_zone_sectors(bdev)) {
			f2fs_msg(sbi->sb, KERN_INFO,
				"(%d) %s: Unaligned discard attempted (block %x + %x)",
				devi, sbi->s_ndevs ? FDEV(devi).path: "",
				blkstart, blklen);
			return -EIO;
		}
1730
		trace_f2fs_issue_reset_zone(bdev, blkstart);
1731 1732 1733 1734 1735 1736 1737 1738 1739
		return blkdev_reset_zones(bdev, sector,
					  nr_sects, GFP_NOFS);
	default:
		/* Unknown zone type: broken device ? */
		return -EIO;
	}
}
#endif

J
Jaegeuk Kim 已提交
1740 1741 1742 1743
static int __issue_discard_async(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
1744
	if (f2fs_sb_has_blkzoned(sbi) &&
J
Jaegeuk Kim 已提交
1745 1746 1747
				bdev_zoned_model(bdev) != BLK_ZONED_NONE)
		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
1748
	return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
J
Jaegeuk Kim 已提交
1749 1750
}

1751
static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1752 1753
				block_t blkstart, block_t blklen)
{
J
Jaegeuk Kim 已提交
1754 1755
	sector_t start = blkstart, len = 0;
	struct block_device *bdev;
1756 1757 1758
	struct seg_entry *se;
	unsigned int offset;
	block_t i;
J
Jaegeuk Kim 已提交
1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777
	int err = 0;

	bdev = f2fs_target_device(sbi, blkstart, NULL);

	for (i = blkstart; i < blkstart + blklen; i++, len++) {
		if (i != start) {
			struct block_device *bdev2 =
				f2fs_target_device(sbi, i, NULL);

			if (bdev2 != bdev) {
				err = __issue_discard_async(sbi, bdev,
						start, len);
				if (err)
					return err;
				bdev = bdev2;
				start = i;
				len = 0;
			}
		}
1778 1779 1780 1781 1782 1783 1784

		se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
		offset = GET_BLKOFF_FROM_SEG0(sbi, i);

		if (!f2fs_test_and_set_bit(offset, se->discard_map))
			sbi->discard_blks--;
	}
1785

J
Jaegeuk Kim 已提交
1786 1787 1788
	if (len)
		err = __issue_discard_async(sbi, bdev, start, len);
	return err;
1789 1790
}

1791 1792
static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
							bool check_only)
1793
{
1794 1795
	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
	int max_blocks = sbi->blocks_per_seg;
1796
	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1797 1798
	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1799
	unsigned long *discard_map = (unsigned long *)se->discard_map;
J
Jaegeuk Kim 已提交
1800
	unsigned long *dmap = SIT_I(sbi)->tmp_map;
1801
	unsigned int start = 0, end = -1;
1802
	bool force = (cpc->reason & CP_DISCARD);
C
Chao Yu 已提交
1803
	struct discard_entry *de = NULL;
1804
	struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1805 1806
	int i;

1807
	if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1808
		return false;
1809

1810
	if (!force) {
1811
		if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1812 1813
			SM_I(sbi)->dcc_info->nr_discards >=
				SM_I(sbi)->dcc_info->max_discards)
1814
			return false;
1815 1816
	}

1817 1818
	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
	for (i = 0; i < entries; i++)
1819
		dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1820
				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1821

1822 1823
	while (force || SM_I(sbi)->dcc_info->nr_discards <=
				SM_I(sbi)->dcc_info->max_discards) {
1824 1825 1826 1827 1828
		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
		if (start >= max_blocks)
			break;

		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1829 1830 1831 1832
		if (force && start && end != max_blocks
					&& (end - start) < cpc->trim_minlen)
			continue;

1833 1834 1835
		if (check_only)
			return true;

C
Chao Yu 已提交
1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846
		if (!de) {
			de = f2fs_kmem_cache_alloc(discard_entry_slab,
								GFP_F2FS_ZERO);
			de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
			list_add_tail(&de->list, head);
		}

		for (i = start; i < end; i++)
			__set_bit_le(i, (void *)de->discard_map);

		SM_I(sbi)->dcc_info->nr_discards += end - start;
1847
	}
1848
	return false;
1849 1850
}

1851 1852 1853 1854 1855 1856
static void release_discard_addr(struct discard_entry *entry)
{
	list_del(&entry->list);
	kmem_cache_free(discard_entry_slab, entry);
}

C
Chao Yu 已提交
1857
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1858
{
1859
	struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1860 1861 1862
	struct discard_entry *entry, *this;

	/* drop caches */
1863 1864
	list_for_each_entry_safe(entry, this, head, list)
		release_discard_addr(entry);
1865 1866
}

J
Jaegeuk Kim 已提交
1867
/*
C
Chao Yu 已提交
1868
 * Should call f2fs_clear_prefree_segments after checkpoint is done.
J
Jaegeuk Kim 已提交
1869 1870 1871 1872
 */
static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1873
	unsigned int segno;
J
Jaegeuk Kim 已提交
1874 1875

	mutex_lock(&dirty_i->seglist_lock);
1876
	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
1877 1878 1879 1880
		__set_test_and_free(sbi, segno);
	mutex_unlock(&dirty_i->seglist_lock);
}

C
Chao Yu 已提交
1881 1882
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
						struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1883
{
1884 1885
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *head = &dcc->entry_list;
1886
	struct discard_entry *entry, *this;
J
Jaegeuk Kim 已提交
1887
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1888 1889
	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
	unsigned int start = 0, end = -1;
1890
	unsigned int secno, start_segno;
1891
	bool force = (cpc->reason & CP_DISCARD);
1892
	bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
J
Jaegeuk Kim 已提交
1893 1894

	mutex_lock(&dirty_i->seglist_lock);
1895

J
Jaegeuk Kim 已提交
1896
	while (1) {
1897
		int i;
1898 1899 1900

		if (need_align && end != -1)
			end--;
1901 1902
		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
		if (start >= MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
1903
			break;
1904 1905
		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
								start + 1);
1906

1907 1908 1909 1910
		if (need_align) {
			start = rounddown(start, sbi->segs_per_sec);
			end = roundup(end, sbi->segs_per_sec);
		}
1911

1912 1913 1914 1915
		for (i = start; i < end; i++) {
			if (test_and_clear_bit(i, prefree_map))
				dirty_i->nr_dirty[PRE]--;
		}
1916

1917
		if (!f2fs_realtime_discard_enable(sbi))
1918
			continue;
J
Jaegeuk Kim 已提交
1919

1920 1921 1922 1923
		if (force && start >= cpc->trim_start &&
					(end - 1) <= cpc->trim_end)
				continue;

1924
		if (!test_opt(sbi, LFS) || !__is_large_section(sbi)) {
1925
			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1926
				(end - start) << sbi->log_blocks_per_seg);
1927 1928 1929
			continue;
		}
next:
1930 1931
		secno = GET_SEC_FROM_SEG(sbi, start);
		start_segno = GET_SEG_FROM_SEC(sbi, secno);
1932
		if (!IS_CURSEC(sbi, secno) &&
1933
			!get_valid_blocks(sbi, start, true))
1934 1935 1936 1937 1938 1939
			f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
				sbi->segs_per_sec << sbi->log_blocks_per_seg);

		start = start_segno + sbi->segs_per_sec;
		if (start < end)
			goto next;
1940 1941
		else
			end = start - 1;
J
Jaegeuk Kim 已提交
1942 1943
	}
	mutex_unlock(&dirty_i->seglist_lock);
1944 1945

	/* send small discards */
1946
	list_for_each_entry_safe(entry, this, head, list) {
C
Chao Yu 已提交
1947 1948 1949 1950 1951 1952 1953 1954 1955
		unsigned int cur_pos = 0, next_pos, len, total_len = 0;
		bool is_valid = test_bit_le(0, entry->discard_map);

find_next:
		if (is_valid) {
			next_pos = find_next_zero_bit_le(entry->discard_map,
					sbi->blocks_per_seg, cur_pos);
			len = next_pos - cur_pos;

1956
			if (f2fs_sb_has_blkzoned(sbi) ||
1957
			    (force && len < cpc->trim_minlen))
C
Chao Yu 已提交
1958 1959 1960 1961 1962 1963 1964 1965 1966
				goto skip;

			f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
									len);
			total_len += len;
		} else {
			next_pos = find_next_bit_le(entry->discard_map,
					sbi->blocks_per_seg, cur_pos);
		}
1967
skip:
C
Chao Yu 已提交
1968 1969 1970 1971 1972 1973
		cur_pos = next_pos;
		is_valid = !is_valid;

		if (cur_pos < sbi->blocks_per_seg)
			goto find_next;

1974
		release_discard_addr(entry);
1975
		dcc->nr_discards -= total_len;
1976
	}
C
Chao Yu 已提交
1977

1978
	wake_up_discard_thread(sbi, false);
J
Jaegeuk Kim 已提交
1979 1980
}

1981
static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1982
{
1983
	dev_t dev = sbi->sb->s_bdev->bd_dev;
1984
	struct discard_cmd_control *dcc;
C
Chao Yu 已提交
1985
	int err = 0, i;
1986 1987 1988 1989 1990 1991

	if (SM_I(sbi)->dcc_info) {
		dcc = SM_I(sbi)->dcc_info;
		goto init_thread;
	}

C
Chao Yu 已提交
1992
	dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
1993 1994 1995
	if (!dcc)
		return -ENOMEM;

1996
	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
1997
	INIT_LIST_HEAD(&dcc->entry_list);
C
Chao Yu 已提交
1998
	for (i = 0; i < MAX_PLIST_NUM; i++)
C
Chao Yu 已提交
1999
		INIT_LIST_HEAD(&dcc->pend_list[i]);
2000
	INIT_LIST_HEAD(&dcc->wait_list);
2001
	INIT_LIST_HEAD(&dcc->fstrim_list);
2002
	mutex_init(&dcc->cmd_lock);
C
Chao Yu 已提交
2003
	atomic_set(&dcc->issued_discard, 0);
2004
	atomic_set(&dcc->queued_discard, 0);
C
Chao Yu 已提交
2005
	atomic_set(&dcc->discard_cmd_cnt, 0);
2006
	dcc->nr_discards = 0;
C
Chao Yu 已提交
2007
	dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
C
Chao Yu 已提交
2008
	dcc->undiscard_blks = 0;
C
Chao Yu 已提交
2009
	dcc->next_pos = 0;
C
Chao Yu 已提交
2010
	dcc->root = RB_ROOT_CACHED;
2011
	dcc->rbtree_check = false;
2012

2013
	init_waitqueue_head(&dcc->discard_wait_queue);
2014 2015
	SM_I(sbi)->dcc_info = dcc;
init_thread:
2016 2017 2018 2019
	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
	if (IS_ERR(dcc->f2fs_issue_discard)) {
		err = PTR_ERR(dcc->f2fs_issue_discard);
2020
		kvfree(dcc);
2021 2022 2023 2024
		SM_I(sbi)->dcc_info = NULL;
		return err;
	}

2025 2026 2027
	return err;
}

2028
static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2029 2030 2031
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;

2032 2033 2034
	if (!dcc)
		return;

C
Chao Yu 已提交
2035
	f2fs_stop_discard_thread(sbi);
2036

2037
	kvfree(dcc);
2038
	SM_I(sbi)->dcc_info = NULL;
2039 2040
}

2041
static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
2042 2043
{
	struct sit_info *sit_i = SIT_I(sbi);
2044 2045

	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
J
Jaegeuk Kim 已提交
2046
		sit_i->dirty_sentries++;
2047 2048 2049 2050
		return false;
	}

	return true;
J
Jaegeuk Kim 已提交
2051 2052 2053 2054 2055 2056 2057 2058 2059 2060 2061 2062 2063 2064 2065 2066
}

static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
					unsigned int segno, int modified)
{
	struct seg_entry *se = get_seg_entry(sbi, segno);
	se->type = type;
	if (modified)
		__mark_sit_entry_dirty(sbi, segno);
}

static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
{
	struct seg_entry *se;
	unsigned int segno, offset;
	long int new_vblocks;
2067 2068 2069 2070
	bool exist;
#ifdef CONFIG_F2FS_CHECK_FS
	bool mir_exist;
#endif
J
Jaegeuk Kim 已提交
2071 2072 2073 2074 2075

	segno = GET_SEGNO(sbi, blkaddr);

	se = get_seg_entry(sbi, segno);
	new_vblocks = se->valid_blocks + del;
J
Jaegeuk Kim 已提交
2076
	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
2077

2078
	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
J
Jaegeuk Kim 已提交
2079 2080 2081
				(new_vblocks > sbi->blocks_per_seg)));

	se->valid_blocks = new_vblocks;
C
Chao Yu 已提交
2082 2083 2084
	se->mtime = get_mtime(sbi, false);
	if (se->mtime > SIT_I(sbi)->max_mtime)
		SIT_I(sbi)->max_mtime = se->mtime;
J
Jaegeuk Kim 已提交
2085 2086 2087

	/* Update valid block bitmap */
	if (del > 0) {
2088
		exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
C
Chao Yu 已提交
2089
#ifdef CONFIG_F2FS_CHECK_FS
2090 2091 2092 2093 2094 2095
		mir_exist = f2fs_test_and_set_bit(offset,
						se->cur_valid_map_mir);
		if (unlikely(exist != mir_exist)) {
			f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
				"when setting bitmap, blk:%u, old bit:%d",
				blkaddr, exist);
2096
			f2fs_bug_on(sbi, 1);
2097
		}
C
Chao Yu 已提交
2098
#endif
2099 2100 2101 2102
		if (unlikely(exist)) {
			f2fs_msg(sbi->sb, KERN_ERR,
				"Bitmap was wrongly set, blk:%u", blkaddr);
			f2fs_bug_on(sbi, 1);
2103 2104
			se->valid_blocks--;
			del = 0;
C
Chao Yu 已提交
2105
		}
2106

2107
		if (!f2fs_test_and_set_bit(offset, se->discard_map))
2108
			sbi->discard_blks--;
2109 2110

		/* don't overwrite by SSR to keep node chain */
D
Daniel Rosenberg 已提交
2111 2112
		if (IS_NODESEG(se->type) &&
				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2113 2114 2115
			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
				se->ckpt_valid_blocks++;
		}
J
Jaegeuk Kim 已提交
2116
	} else {
2117
		exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
C
Chao Yu 已提交
2118
#ifdef CONFIG_F2FS_CHECK_FS
2119 2120 2121 2122 2123 2124
		mir_exist = f2fs_test_and_clear_bit(offset,
						se->cur_valid_map_mir);
		if (unlikely(exist != mir_exist)) {
			f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
				"when clearing bitmap, blk:%u, old bit:%d",
				blkaddr, exist);
2125
			f2fs_bug_on(sbi, 1);
2126
		}
C
Chao Yu 已提交
2127
#endif
2128 2129 2130 2131
		if (unlikely(!exist)) {
			f2fs_msg(sbi->sb, KERN_ERR,
				"Bitmap was wrongly cleared, blk:%u", blkaddr);
			f2fs_bug_on(sbi, 1);
2132 2133
			se->valid_blocks++;
			del = 0;
D
Daniel Rosenberg 已提交
2134 2135 2136 2137 2138 2139 2140 2141 2142
		} else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
			/*
			 * If checkpoints are off, we must not reuse data that
			 * was used in the previous checkpoint. If it was used
			 * before, we must track that to know how much space we
			 * really have.
			 */
			if (f2fs_test_bit(offset, se->ckpt_valid_map))
				sbi->unusable_block_count++;
C
Chao Yu 已提交
2143
		}
2144

2145
		if (f2fs_test_and_clear_bit(offset, se->discard_map))
2146
			sbi->discard_blks++;
J
Jaegeuk Kim 已提交
2147 2148 2149 2150 2151 2152 2153 2154 2155
	}
	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
		se->ckpt_valid_blocks += del;

	__mark_sit_entry_dirty(sbi, segno);

	/* update total number of valid blocks to be written in ckpt area */
	SIT_I(sbi)->written_valid_blocks += del;

2156
	if (__is_large_section(sbi))
J
Jaegeuk Kim 已提交
2157 2158 2159
		get_sec_entry(sbi, segno)->valid_blocks += del;
}

C
Chao Yu 已提交
2160
void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
J
Jaegeuk Kim 已提交
2161 2162 2163 2164
{
	unsigned int segno = GET_SEGNO(sbi, addr);
	struct sit_info *sit_i = SIT_I(sbi);

2165
	f2fs_bug_on(sbi, addr == NULL_ADDR);
J
Jaegeuk Kim 已提交
2166 2167 2168
	if (addr == NEW_ADDR)
		return;

2169 2170
	invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);

J
Jaegeuk Kim 已提交
2171
	/* add it into sit main buffer */
2172
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2173 2174 2175 2176 2177 2178

	update_sit_entry(sbi, addr, -1);

	/* add it into dirty seglist */
	locate_dirty_segment(sbi, segno);

2179
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2180 2181
}

C
Chao Yu 已提交
2182
bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2183 2184 2185 2186 2187 2188
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int segno, offset;
	struct seg_entry *se;
	bool is_cp = false;

2189
	if (!is_valid_data_blkaddr(sbi, blkaddr))
2190 2191
		return true;

2192
	down_read(&sit_i->sentry_lock);
2193 2194 2195 2196 2197 2198 2199 2200

	segno = GET_SEGNO(sbi, blkaddr);
	se = get_seg_entry(sbi, segno);
	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);

	if (f2fs_test_bit(offset, se->ckpt_valid_map))
		is_cp = true;

2201
	up_read(&sit_i->sentry_lock);
2202 2203 2204 2205

	return is_cp;
}

J
Jaegeuk Kim 已提交
2206
/*
J
Jaegeuk Kim 已提交
2207 2208 2209
 * This function should be resided under the curseg_mutex lock
 */
static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2210
					struct f2fs_summary *sum)
J
Jaegeuk Kim 已提交
2211 2212 2213
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	void *addr = curseg->sum_blk;
2214
	addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
J
Jaegeuk Kim 已提交
2215 2216 2217
	memcpy(addr, sum, sizeof(struct f2fs_summary));
}

J
Jaegeuk Kim 已提交
2218
/*
J
Jaegeuk Kim 已提交
2219 2220
 * Calculate the number of current summary pages for writing
 */
C
Chao Yu 已提交
2221
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
J
Jaegeuk Kim 已提交
2222 2223
{
	int valid_sum_count = 0;
2224
	int i, sum_in_page;
J
Jaegeuk Kim 已提交
2225 2226 2227 2228

	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		if (sbi->ckpt->alloc_type[i] == SSR)
			valid_sum_count += sbi->blocks_per_seg;
2229 2230 2231 2232 2233 2234 2235
		else {
			if (for_ra)
				valid_sum_count += le16_to_cpu(
					F2FS_CKPT(sbi)->cur_data_blkoff[i]);
			else
				valid_sum_count += curseg_blkoff(sbi, i);
		}
J
Jaegeuk Kim 已提交
2236 2237
	}

2238
	sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2239 2240
			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
	if (valid_sum_count <= sum_in_page)
J
Jaegeuk Kim 已提交
2241
		return 1;
2242
	else if ((valid_sum_count - sum_in_page) <=
2243
		(PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
J
Jaegeuk Kim 已提交
2244 2245 2246 2247
		return 2;
	return 3;
}

J
Jaegeuk Kim 已提交
2248
/*
J
Jaegeuk Kim 已提交
2249 2250
 * Caller should put this summary page
 */
C
Chao Yu 已提交
2251
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
2252
{
2253
	return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
J
Jaegeuk Kim 已提交
2254 2255
}

C
Chao Yu 已提交
2256 2257
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
					void *src, block_t blk_addr)
J
Jaegeuk Kim 已提交
2258
{
C
Chao Yu 已提交
2259
	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
C
Chao Yu 已提交
2260

2261
	memcpy(page_address(page), src, PAGE_SIZE);
J
Jaegeuk Kim 已提交
2262 2263 2264 2265
	set_page_dirty(page);
	f2fs_put_page(page, 1);
}

C
Chao Yu 已提交
2266 2267 2268
static void write_sum_page(struct f2fs_sb_info *sbi,
			struct f2fs_summary_block *sum_blk, block_t blk_addr)
{
C
Chao Yu 已提交
2269
	f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
C
Chao Yu 已提交
2270 2271
}

2272 2273 2274 2275
static void write_current_sum_page(struct f2fs_sb_info *sbi,
						int type, block_t blk_addr)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
C
Chao Yu 已提交
2276
	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2277 2278 2279 2280
	struct f2fs_summary_block *src = curseg->sum_blk;
	struct f2fs_summary_block *dst;

	dst = (struct f2fs_summary_block *)page_address(page);
2281
	memset(dst, 0, PAGE_SIZE);
2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297

	mutex_lock(&curseg->curseg_mutex);

	down_read(&curseg->journal_rwsem);
	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
	up_read(&curseg->journal_rwsem);

	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);

	mutex_unlock(&curseg->curseg_mutex);

	set_page_dirty(page);
	f2fs_put_page(page, 1);
}

J
Jaegeuk Kim 已提交
2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308
static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int segno = curseg->segno + 1;
	struct free_segmap_info *free_i = FREE_I(sbi);

	if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
		return !test_bit(segno, free_i->free_segmap);
	return 0;
}

J
Jaegeuk Kim 已提交
2309
/*
J
Jaegeuk Kim 已提交
2310 2311 2312 2313 2314 2315 2316 2317
 * Find a new segment from the free segments bitmap to right order
 * This function should be returned with success, otherwise BUG
 */
static void get_new_segment(struct f2fs_sb_info *sbi,
			unsigned int *newseg, bool new_sec, int dir)
{
	struct free_segmap_info *free_i = FREE_I(sbi);
	unsigned int segno, secno, zoneno;
2318
	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2319 2320
	unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
	unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
J
Jaegeuk Kim 已提交
2321 2322 2323 2324 2325
	unsigned int left_start = hint;
	bool init = true;
	int go_left = 0;
	int i;

2326
	spin_lock(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
2327 2328 2329

	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
		segno = find_next_zero_bit(free_i->free_segmap,
2330 2331
			GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
		if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
J
Jaegeuk Kim 已提交
2332 2333 2334
			goto got_it;
	}
find_other_zone:
2335 2336
	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
	if (secno >= MAIN_SECS(sbi)) {
J
Jaegeuk Kim 已提交
2337 2338
		if (dir == ALLOC_RIGHT) {
			secno = find_next_zero_bit(free_i->free_secmap,
2339 2340
							MAIN_SECS(sbi), 0);
			f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354
		} else {
			go_left = 1;
			left_start = hint - 1;
		}
	}
	if (go_left == 0)
		goto skip_left;

	while (test_bit(left_start, free_i->free_secmap)) {
		if (left_start > 0) {
			left_start--;
			continue;
		}
		left_start = find_next_zero_bit(free_i->free_secmap,
2355 2356
							MAIN_SECS(sbi), 0);
		f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
2357 2358 2359 2360
		break;
	}
	secno = left_start;
skip_left:
2361 2362
	segno = GET_SEG_FROM_SEC(sbi, secno);
	zoneno = GET_ZONE_FROM_SEC(sbi, secno);
J
Jaegeuk Kim 已提交
2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393

	/* give up on finding another zone */
	if (!init)
		goto got_it;
	if (sbi->secs_per_zone == 1)
		goto got_it;
	if (zoneno == old_zoneno)
		goto got_it;
	if (dir == ALLOC_LEFT) {
		if (!go_left && zoneno + 1 >= total_zones)
			goto got_it;
		if (go_left && zoneno == 0)
			goto got_it;
	}
	for (i = 0; i < NR_CURSEG_TYPE; i++)
		if (CURSEG_I(sbi, i)->zone == zoneno)
			break;

	if (i < NR_CURSEG_TYPE) {
		/* zone is in user, try another */
		if (go_left)
			hint = zoneno * sbi->secs_per_zone - 1;
		else if (zoneno + 1 >= total_zones)
			hint = 0;
		else
			hint = (zoneno + 1) * sbi->secs_per_zone;
		init = false;
		goto find_other_zone;
	}
got_it:
	/* set it as dirty segment in free segmap */
2394
	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
J
Jaegeuk Kim 已提交
2395 2396
	__set_inuse(sbi, segno);
	*newseg = segno;
2397
	spin_unlock(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
2398 2399 2400 2401 2402 2403 2404 2405
}

static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	struct summary_footer *sum_footer;

	curseg->segno = curseg->next_segno;
2406
	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
J
Jaegeuk Kim 已提交
2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418
	curseg->next_blkoff = 0;
	curseg->next_segno = NULL_SEGNO;

	sum_footer = &(curseg->sum_blk->footer);
	memset(sum_footer, 0, sizeof(struct summary_footer));
	if (IS_DATASEG(type))
		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
	if (IS_NODESEG(type))
		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
	__set_sit_entry_type(sbi, type, curseg->segno, modified);
}

2419 2420
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
J
Jaegeuk Kim 已提交
2421
	/* if segs_per_sec is large than 1, we need to keep original policy. */
2422
	if (__is_large_section(sbi))
J
Jaegeuk Kim 已提交
2423 2424
		return CURSEG_I(sbi, type)->segno;

D
Daniel Rosenberg 已提交
2425 2426 2427
	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
		return 0;

2428 2429
	if (test_opt(sbi, NOHEAP) &&
		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
2430 2431
		return 0;

2432 2433
	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2434 2435

	/* find segments from 0 to reuse freed segments */
2436
	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2437 2438
		return 0;

2439 2440 2441
	return CURSEG_I(sbi, type)->segno;
}

J
Jaegeuk Kim 已提交
2442
/*
J
Jaegeuk Kim 已提交
2443 2444 2445 2446 2447 2448 2449 2450 2451 2452
 * Allocate a current working segment.
 * This function always allocates a free segment in LFS manner.
 */
static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int segno = curseg->segno;
	int dir = ALLOC_LEFT;

	write_sum_page(sbi, curseg->sum_blk,
2453
				GET_SUM_BLOCK(sbi, segno));
J
Jaegeuk Kim 已提交
2454 2455 2456 2457 2458 2459
	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
		dir = ALLOC_RIGHT;

	if (test_opt(sbi, NOHEAP))
		dir = ALLOC_RIGHT;

2460
	segno = __get_next_segno(sbi, type);
J
Jaegeuk Kim 已提交
2461 2462 2463 2464 2465 2466 2467 2468 2469 2470
	get_new_segment(sbi, &segno, new_sec, dir);
	curseg->next_segno = segno;
	reset_curseg(sbi, type, 1);
	curseg->alloc_type = LFS;
}

static void __next_free_blkoff(struct f2fs_sb_info *sbi,
			struct curseg_info *seg, block_t start)
{
	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2471
	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
J
Jaegeuk Kim 已提交
2472
	unsigned long *target_map = SIT_I(sbi)->tmp_map;
2473 2474 2475 2476 2477 2478 2479 2480 2481 2482
	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
	int i, pos;

	for (i = 0; i < entries; i++)
		target_map[i] = ckpt_map[i] | cur_map[i];

	pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);

	seg->next_blkoff = pos;
J
Jaegeuk Kim 已提交
2483 2484
}

J
Jaegeuk Kim 已提交
2485
/*
J
Jaegeuk Kim 已提交
2486 2487 2488 2489 2490 2491 2492 2493 2494 2495 2496 2497 2498
 * If a segment is written by LFS manner, next block offset is just obtained
 * by increasing the current block offset. However, if a segment is written by
 * SSR manner, next block offset obtained by calling __next_free_blkoff
 */
static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
				struct curseg_info *seg)
{
	if (seg->alloc_type == SSR)
		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
	else
		seg->next_blkoff++;
}

J
Jaegeuk Kim 已提交
2499
/*
A
arter97 已提交
2500
 * This function always allocates a used segment(from dirty seglist) by SSR
J
Jaegeuk Kim 已提交
2501 2502
 * manner, so it should recover the existing segment information of valid blocks
 */
2503
static void change_curseg(struct f2fs_sb_info *sbi, int type)
J
Jaegeuk Kim 已提交
2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522 2523
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int new_segno = curseg->next_segno;
	struct f2fs_summary_block *sum_node;
	struct page *sum_page;

	write_sum_page(sbi, curseg->sum_blk,
				GET_SUM_BLOCK(sbi, curseg->segno));
	__set_test_and_inuse(sbi, new_segno);

	mutex_lock(&dirty_i->seglist_lock);
	__remove_dirty_segment(sbi, new_segno, PRE);
	__remove_dirty_segment(sbi, new_segno, DIRTY);
	mutex_unlock(&dirty_i->seglist_lock);

	reset_curseg(sbi, type, 1);
	curseg->alloc_type = SSR;
	__next_free_blkoff(sbi, curseg, 0);

C
Chao Yu 已提交
2524
	sum_page = f2fs_get_sum_page(sbi, new_segno);
2525
	f2fs_bug_on(sbi, IS_ERR(sum_page));
2526 2527 2528
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
	f2fs_put_page(sum_page, 1);
J
Jaegeuk Kim 已提交
2529 2530
}

2531 2532 2533 2534
static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2535
	unsigned segno = NULL_SEGNO;
2536 2537
	int i, cnt;
	bool reversed = false;
2538

C
Chao Yu 已提交
2539
	/* f2fs_need_SSR() already forces to do this */
2540 2541
	if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
		curseg->next_segno = segno;
2542
		return 1;
2543
	}
2544

2545 2546
	/* For node segments, let's do SSR more intensively */
	if (IS_NODESEG(type)) {
2547 2548 2549 2550 2551 2552 2553
		if (type >= CURSEG_WARM_NODE) {
			reversed = true;
			i = CURSEG_COLD_NODE;
		} else {
			i = CURSEG_HOT_NODE;
		}
		cnt = NR_CURSEG_NODE_TYPE;
2554
	} else {
2555 2556 2557 2558 2559 2560 2561
		if (type >= CURSEG_WARM_DATA) {
			reversed = true;
			i = CURSEG_COLD_DATA;
		} else {
			i = CURSEG_HOT_DATA;
		}
		cnt = NR_CURSEG_DATA_TYPE;
2562
	}
2563

2564
	for (; cnt-- > 0; reversed ? i-- : i++) {
2565 2566
		if (i == type)
			continue;
2567 2568
		if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
			curseg->next_segno = segno;
2569
			return 1;
2570
		}
2571
	}
D
Daniel Rosenberg 已提交
2572 2573 2574 2575 2576 2577 2578 2579 2580

	/* find valid_blocks=0 in dirty list */
	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
		segno = get_free_segment(sbi);
		if (segno != NULL_SEGNO) {
			curseg->next_segno = segno;
			return 1;
		}
	}
2581 2582 2583
	return 0;
}

J
Jaegeuk Kim 已提交
2584 2585 2586 2587 2588 2589 2590
/*
 * flush out current segment and replace it with new segment
 * This function should be returned with success, otherwise BUG
 */
static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
						int type, bool force)
{
J
Jaegeuk Kim 已提交
2591 2592
	struct curseg_info *curseg = CURSEG_I(sbi, type);

2593
	if (force)
J
Jaegeuk Kim 已提交
2594
		new_curseg(sbi, type, true);
2595 2596
	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
					type == CURSEG_WARM_NODE)
J
Jaegeuk Kim 已提交
2597
		new_curseg(sbi, type, false);
D
Daniel Rosenberg 已提交
2598 2599
	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
			likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
J
Jaegeuk Kim 已提交
2600
		new_curseg(sbi, type, false);
C
Chao Yu 已提交
2601
	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2602
		change_curseg(sbi, type);
J
Jaegeuk Kim 已提交
2603 2604
	else
		new_curseg(sbi, type, false);
2605

J
Jaegeuk Kim 已提交
2606
	stat_inc_seg_type(sbi, curseg);
J
Jaegeuk Kim 已提交
2607 2608
}

C
Chao Yu 已提交
2609
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
2610
{
2611 2612
	struct curseg_info *curseg;
	unsigned int old_segno;
J
Jaegeuk Kim 已提交
2613 2614
	int i;

2615 2616
	down_write(&SIT_I(sbi)->sentry_lock);

2617 2618 2619 2620 2621 2622
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		curseg = CURSEG_I(sbi, i);
		old_segno = curseg->segno;
		SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
		locate_dirty_segment(sbi, old_segno);
	}
2623 2624

	up_write(&SIT_I(sbi)->sentry_lock);
J
Jaegeuk Kim 已提交
2625 2626 2627 2628 2629 2630
}

static const struct segment_allocation default_salloc_ops = {
	.allocate_segment = allocate_segment_by_default,
};

C
Chao Yu 已提交
2631 2632
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
						struct cp_control *cpc)
2633 2634 2635 2636
{
	__u64 trim_start = cpc->trim_start;
	bool has_candidate = false;

2637
	down_write(&SIT_I(sbi)->sentry_lock);
2638 2639 2640 2641 2642 2643
	for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
		if (add_discard_addrs(sbi, cpc, true)) {
			has_candidate = true;
			break;
		}
	}
2644
	up_write(&SIT_I(sbi)->sentry_lock);
2645 2646 2647 2648 2649

	cpc->trim_start = trim_start;
	return has_candidate;
}

2650
static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2651 2652 2653 2654 2655 2656 2657 2658 2659
					struct discard_policy *dpolicy,
					unsigned int start, unsigned int end)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
	struct discard_cmd *dc;
	struct blk_plug plug;
	int issued;
2660
	unsigned int trimmed = 0;
2661 2662 2663 2664 2665

next:
	issued = 0;

	mutex_lock(&dcc->cmd_lock);
2666 2667 2668
	if (unlikely(dcc->rbtree_check))
		f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
								&dcc->root));
2669

C
Chao Yu 已提交
2670
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2671 2672 2673
					NULL, start,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
C
Chao Yu 已提交
2674
					&insert_p, &insert_parent, true, NULL);
2675 2676 2677 2678 2679 2680 2681
	if (!dc)
		dc = next_dc;

	blk_start_plug(&plug);

	while (dc && dc->lstart <= end) {
		struct rb_node *node;
2682
		int err = 0;
2683 2684 2685 2686 2687 2688 2689 2690 2691

		if (dc->len < dpolicy->granularity)
			goto skip;

		if (dc->state != D_PREP) {
			list_move_tail(&dc->list, &dcc->fstrim_list);
			goto skip;
		}

2692
		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2693

2694
		if (issued >= dpolicy->max_requests) {
2695 2696
			start = dc->lstart + dc->len;

2697 2698 2699
			if (err)
				__remove_discard_cmd(sbi, dc);

2700 2701
			blk_finish_plug(&plug);
			mutex_unlock(&dcc->cmd_lock);
2702
			trimmed += __wait_all_discard_cmd(sbi, NULL);
2703 2704 2705 2706 2707
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			goto next;
		}
skip:
		node = rb_next(&dc->rb_node);
2708 2709
		if (err)
			__remove_discard_cmd(sbi, dc);
2710 2711 2712 2713 2714 2715 2716 2717
		dc = rb_entry_safe(node, struct discard_cmd, rb_node);

		if (fatal_signal_pending(current))
			break;
	}

	blk_finish_plug(&plug);
	mutex_unlock(&dcc->cmd_lock);
2718 2719

	return trimmed;
2720 2721
}

2722 2723
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
{
2724 2725
	__u64 start = F2FS_BYTES_TO_BLK(range->start);
	__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
C
Chao Yu 已提交
2726
	unsigned int start_segno, end_segno;
2727
	block_t start_block, end_block;
2728
	struct cp_control cpc;
C
Chao Yu 已提交
2729
	struct discard_policy dpolicy;
2730
	unsigned long long trimmed = 0;
C
Chao Yu 已提交
2731
	int err = 0;
2732
	bool need_align = test_opt(sbi, LFS) && __is_large_section(sbi);
2733

2734
	if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2735 2736
		return -EINVAL;

2737 2738
	if (end < MAIN_BLKADDR(sbi))
		goto out;
2739

2740 2741 2742
	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
		f2fs_msg(sbi->sb, KERN_WARNING,
			"Found FS corruption, run fsck to fix.");
2743
		return -EIO;
2744 2745
	}

2746
	/* start/end segment number in main_area */
2747 2748 2749
	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
						GET_SEGNO(sbi, end);
2750 2751 2752 2753
	if (need_align) {
		start_segno = rounddown(start_segno, sbi->segs_per_sec);
		end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
	}
2754

2755
	cpc.reason = CP_DISCARD;
2756
	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
C
Chao Yu 已提交
2757 2758
	cpc.trim_start = start_segno;
	cpc.trim_end = end_segno;
2759

C
Chao Yu 已提交
2760 2761
	if (sbi->discard_blks == 0)
		goto out;
2762

C
Chao Yu 已提交
2763
	mutex_lock(&sbi->gc_mutex);
C
Chao Yu 已提交
2764
	err = f2fs_write_checkpoint(sbi, &cpc);
C
Chao Yu 已提交
2765 2766 2767
	mutex_unlock(&sbi->gc_mutex);
	if (err)
		goto out;
2768

2769 2770 2771 2772 2773 2774
	/*
	 * We filed discard candidates, but actually we don't need to wait for
	 * all of them, since they'll be issued in idle time along with runtime
	 * discard option. User configuration looks like using runtime discard
	 * or periodic fstrim instead of it.
	 */
2775
	if (f2fs_realtime_discard_enable(sbi))
2776 2777 2778 2779 2780 2781
		goto out;

	start_block = START_BLOCK(sbi, start_segno);
	end_block = START_BLOCK(sbi, end_segno + 1);

	__init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2782 2783
	trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
					start_block, end_block);
2784

2785
	trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
2786
					start_block, end_block);
C
Chao Yu 已提交
2787
out:
2788 2789
	if (!err)
		range->len = F2FS_BLK_TO_BYTES(trimmed);
C
Chao Yu 已提交
2790
	return err;
2791 2792
}

J
Jaegeuk Kim 已提交
2793 2794 2795 2796 2797 2798 2799 2800
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	if (curseg->next_blkoff < sbi->blocks_per_seg)
		return true;
	return false;
}

C
Chao Yu 已提交
2801
int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
2802 2803 2804 2805 2806 2807 2808 2809 2810 2811 2812
{
	switch (hint) {
	case WRITE_LIFE_SHORT:
		return CURSEG_HOT_DATA;
	case WRITE_LIFE_EXTREME:
		return CURSEG_COLD_DATA;
	default:
		return CURSEG_WARM_DATA;
	}
}

2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845
/* This returns write hints for each segment type. This hints will be
 * passed down to block layer. There are mapping tables which depend on
 * the mount option 'whint_mode'.
 *
 * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
 *
 * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
 *
 * User                  F2FS                     Block
 * ----                  ----                     -----
 *                       META                     WRITE_LIFE_NOT_SET
 *                       HOT_NODE                 "
 *                       WARM_NODE                "
 *                       COLD_NODE                "
 * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
 * extension list        "                        "
 *
 * -- buffered io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        "
 * WRITE_LIFE_MEDIUM     "                        "
 * WRITE_LIFE_LONG       "                        "
 *
 * -- direct io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
 * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
 * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
 *
2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871
 * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
 *
 * User                  F2FS                     Block
 * ----                  ----                     -----
 *                       META                     WRITE_LIFE_MEDIUM;
 *                       HOT_NODE                 WRITE_LIFE_NOT_SET
 *                       WARM_NODE                "
 *                       COLD_NODE                WRITE_LIFE_NONE
 * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
 * extension list        "                        "
 *
 * -- buffered io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
 * WRITE_LIFE_NONE       "                        "
 * WRITE_LIFE_MEDIUM     "                        "
 * WRITE_LIFE_LONG       "                        "
 *
 * -- direct io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
 * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
 * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2872 2873
 */

C
Chao Yu 已提交
2874
enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
2875 2876
				enum page_type type, enum temp_type temp)
{
2877
	if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
2878
		if (type == DATA) {
2879
			if (temp == WARM)
2880
				return WRITE_LIFE_NOT_SET;
2881 2882 2883 2884
			else if (temp == HOT)
				return WRITE_LIFE_SHORT;
			else if (temp == COLD)
				return WRITE_LIFE_EXTREME;
2885 2886 2887
		} else {
			return WRITE_LIFE_NOT_SET;
		}
2888
	} else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
2889 2890 2891 2892 2893 2894 2895 2896 2897 2898 2899 2900 2901 2902 2903
		if (type == DATA) {
			if (temp == WARM)
				return WRITE_LIFE_LONG;
			else if (temp == HOT)
				return WRITE_LIFE_SHORT;
			else if (temp == COLD)
				return WRITE_LIFE_EXTREME;
		} else if (type == NODE) {
			if (temp == WARM || temp == HOT)
				return WRITE_LIFE_NOT_SET;
			else if (temp == COLD)
				return WRITE_LIFE_NONE;
		} else if (type == META) {
			return WRITE_LIFE_MEDIUM;
		}
2904
	}
2905
	return WRITE_LIFE_NOT_SET;
2906 2907
}

2908
static int __get_segment_type_2(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2909
{
2910
	if (fio->type == DATA)
J
Jaegeuk Kim 已提交
2911 2912 2913 2914 2915
		return CURSEG_HOT_DATA;
	else
		return CURSEG_HOT_NODE;
}

2916
static int __get_segment_type_4(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2917
{
2918 2919
	if (fio->type == DATA) {
		struct inode *inode = fio->page->mapping->host;
J
Jaegeuk Kim 已提交
2920 2921 2922 2923 2924 2925

		if (S_ISDIR(inode->i_mode))
			return CURSEG_HOT_DATA;
		else
			return CURSEG_COLD_DATA;
	} else {
2926
		if (IS_DNODE(fio->page) && is_cold_node(fio->page))
2927
			return CURSEG_WARM_NODE;
J
Jaegeuk Kim 已提交
2928 2929 2930 2931 2932
		else
			return CURSEG_COLD_NODE;
	}
}

2933
static int __get_segment_type_6(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2934
{
2935 2936
	if (fio->type == DATA) {
		struct inode *inode = fio->page->mapping->host;
J
Jaegeuk Kim 已提交
2937

2938
		if (is_cold_data(fio->page) || file_is_cold(inode))
J
Jaegeuk Kim 已提交
2939
			return CURSEG_COLD_DATA;
C
Chao Yu 已提交
2940
		if (file_is_hot(inode) ||
2941
				is_inode_flag_set(inode, FI_HOT_DATA) ||
2942 2943
				f2fs_is_atomic_file(inode) ||
				f2fs_is_volatile_file(inode))
2944
			return CURSEG_HOT_DATA;
C
Chao Yu 已提交
2945
		return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
J
Jaegeuk Kim 已提交
2946
	} else {
2947 2948
		if (IS_DNODE(fio->page))
			return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
J
Jaegeuk Kim 已提交
2949
						CURSEG_HOT_NODE;
2950
		return CURSEG_COLD_NODE;
J
Jaegeuk Kim 已提交
2951 2952 2953
	}
}

2954
static int __get_segment_type(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2955
{
J
Jaegeuk Kim 已提交
2956 2957
	int type = 0;

2958
	switch (F2FS_OPTION(fio->sbi).active_logs) {
J
Jaegeuk Kim 已提交
2959
	case 2:
J
Jaegeuk Kim 已提交
2960 2961
		type = __get_segment_type_2(fio);
		break;
J
Jaegeuk Kim 已提交
2962
	case 4:
J
Jaegeuk Kim 已提交
2963 2964 2965 2966 2967 2968 2969
		type = __get_segment_type_4(fio);
		break;
	case 6:
		type = __get_segment_type_6(fio);
		break;
	default:
		f2fs_bug_on(fio->sbi, true);
J
Jaegeuk Kim 已提交
2970
	}
2971

J
Jaegeuk Kim 已提交
2972 2973 2974 2975 2976 2977 2978
	if (IS_HOT(type))
		fio->temp = HOT;
	else if (IS_WARM(type))
		fio->temp = WARM;
	else
		fio->temp = COLD;
	return type;
J
Jaegeuk Kim 已提交
2979 2980
}

C
Chao Yu 已提交
2981
void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2982
		block_t old_blkaddr, block_t *new_blkaddr,
2983 2984
		struct f2fs_summary *sum, int type,
		struct f2fs_io_info *fio, bool add_list)
J
Jaegeuk Kim 已提交
2985 2986
{
	struct sit_info *sit_i = SIT_I(sbi);
2987
	struct curseg_info *curseg = CURSEG_I(sbi, type);
J
Jaegeuk Kim 已提交
2988

C
Chao Yu 已提交
2989 2990
	down_read(&SM_I(sbi)->curseg_lock);

J
Jaegeuk Kim 已提交
2991
	mutex_lock(&curseg->curseg_mutex);
2992
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2993 2994 2995

	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

2996 2997
	f2fs_wait_discard_bio(sbi, *new_blkaddr);

J
Jaegeuk Kim 已提交
2998 2999 3000 3001 3002
	/*
	 * __add_sum_entry should be resided under the curseg_mutex
	 * because, this function updates a summary entry in the
	 * current summary block.
	 */
3003
	__add_sum_entry(sbi, type, sum);
J
Jaegeuk Kim 已提交
3004 3005

	__refresh_next_blkoff(sbi, curseg);
3006 3007

	stat_inc_block_count(sbi, curseg);
J
Jaegeuk Kim 已提交
3008

3009 3010 3011 3012 3013 3014 3015 3016
	/*
	 * SIT information should be updated before segment allocation,
	 * since SSR needs latest valid block information.
	 */
	update_sit_entry(sbi, *new_blkaddr, 1);
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
		update_sit_entry(sbi, old_blkaddr, -1);

3017 3018
	if (!__has_curseg_space(sbi, type))
		sit_i->s_ops->allocate_segment(sbi, type, false);
3019

J
Jaegeuk Kim 已提交
3020
	/*
3021 3022 3023
	 * segment dirty status should be updated after segment allocation,
	 * so we just need to update status only one time after previous
	 * segment being closed.
J
Jaegeuk Kim 已提交
3024
	 */
3025 3026
	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
	locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3027

3028
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3029

C
Chao Yu 已提交
3030
	if (page && IS_NODESEG(type)) {
J
Jaegeuk Kim 已提交
3031 3032
		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));

C
Chao Yu 已提交
3033 3034 3035
		f2fs_inode_chksum_set(sbi, page);
	}

3036 3037 3038 3039 3040
	if (add_list) {
		struct f2fs_bio_info *io;

		INIT_LIST_HEAD(&fio->list);
		fio->in_list = true;
3041
		fio->retry = false;
3042 3043 3044 3045 3046 3047
		io = sbi->write_io[fio->type] + fio->temp;
		spin_lock(&io->io_lock);
		list_add_tail(&fio->list, &io->io_list);
		spin_unlock(&io->io_lock);
	}

3048
	mutex_unlock(&curseg->curseg_mutex);
C
Chao Yu 已提交
3049 3050

	up_read(&SM_I(sbi)->curseg_lock);
3051 3052
}

C
Chao Yu 已提交
3053 3054 3055 3056 3057 3058 3059 3060 3061 3062 3063
static void update_device_state(struct f2fs_io_info *fio)
{
	struct f2fs_sb_info *sbi = fio->sbi;
	unsigned int devidx;

	if (!sbi->s_ndevs)
		return;

	devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);

	/* update device state for fsync */
C
Chao Yu 已提交
3064
	f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3065 3066 3067 3068 3069 3070 3071

	/* update device state for checkpoint */
	if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
		spin_lock(&sbi->dev_lock);
		f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
		spin_unlock(&sbi->dev_lock);
	}
C
Chao Yu 已提交
3072 3073
}

3074
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3075
{
3076
	int type = __get_segment_type(fio);
3077
	bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
3078

3079 3080
	if (keep_order)
		down_read(&fio->sbi->io_order_lock);
3081
reallocate:
C
Chao Yu 已提交
3082
	f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3083
			&fio->new_blkaddr, sum, type, fio, true);
3084 3085 3086
	if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
		invalidate_mapping_pages(META_MAPPING(fio->sbi),
					fio->old_blkaddr, fio->old_blkaddr);
3087

J
Jaegeuk Kim 已提交
3088
	/* writeout dirty page into bdev */
3089 3090
	f2fs_submit_page_write(fio);
	if (fio->retry) {
3091 3092 3093
		fio->old_blkaddr = fio->new_blkaddr;
		goto reallocate;
	}
3094 3095 3096

	update_device_state(fio);

3097 3098
	if (keep_order)
		up_read(&fio->sbi->io_order_lock);
J
Jaegeuk Kim 已提交
3099 3100
}

C
Chao Yu 已提交
3101
void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
C
Chao Yu 已提交
3102
					enum iostat_type io_type)
J
Jaegeuk Kim 已提交
3103
{
J
Jaegeuk Kim 已提交
3104
	struct f2fs_io_info fio = {
3105
		.sbi = sbi,
J
Jaegeuk Kim 已提交
3106
		.type = META,
3107
		.temp = HOT,
M
Mike Christie 已提交
3108
		.op = REQ_OP_WRITE,
3109
		.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3110 3111
		.old_blkaddr = page->index,
		.new_blkaddr = page->index,
3112
		.page = page,
3113
		.encrypted_page = NULL,
3114
		.in_list = false,
J
Jaegeuk Kim 已提交
3115 3116
	};

3117
	if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
M
Mike Christie 已提交
3118
		fio.op_flags &= ~REQ_META;
3119

J
Jaegeuk Kim 已提交
3120
	set_page_writeback(page);
J
Jaegeuk Kim 已提交
3121
	ClearPageError(page);
3122
	f2fs_submit_page_write(&fio);
C
Chao Yu 已提交
3123

C
Chao Yu 已提交
3124
	stat_inc_meta_count(sbi, page->index);
C
Chao Yu 已提交
3125
	f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3126 3127
}

C
Chao Yu 已提交
3128
void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3129 3130
{
	struct f2fs_summary sum;
3131

J
Jaegeuk Kim 已提交
3132
	set_summary(&sum, nid, 0, 0);
3133
	do_write_page(&sum, fio);
C
Chao Yu 已提交
3134 3135

	f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3136 3137
}

C
Chao Yu 已提交
3138 3139
void f2fs_outplace_write_data(struct dnode_of_data *dn,
					struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3140
{
3141
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
3142 3143
	struct f2fs_summary sum;

3144
	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3145
	set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3146
	do_write_page(&sum, fio);
3147
	f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
C
Chao Yu 已提交
3148 3149

	f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3150 3151
}

C
Chao Yu 已提交
3152
int f2fs_inplace_write_data(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3153
{
C
Chao Yu 已提交
3154
	int err;
3155
	struct f2fs_sb_info *sbi = fio->sbi;
C
Chao Yu 已提交
3156

3157
	fio->new_blkaddr = fio->old_blkaddr;
3158 3159
	/* i/o temperature is needed for passing down write hints */
	__get_segment_type(fio);
3160 3161 3162 3163

	f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
			GET_SEGNO(sbi, fio->new_blkaddr))->type));

3164
	stat_inc_inplace_blocks(fio->sbi);
C
Chao Yu 已提交
3165 3166

	err = f2fs_submit_page_bio(fio);
C
Chao Yu 已提交
3167 3168
	if (!err)
		update_device_state(fio);
C
Chao Yu 已提交
3169 3170 3171 3172

	f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);

	return err;
J
Jaegeuk Kim 已提交
3173 3174
}

C
Chao Yu 已提交
3175 3176 3177 3178 3179 3180 3181 3182 3183 3184 3185 3186
static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
						unsigned int segno)
{
	int i;

	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
		if (CURSEG_I(sbi, i)->segno == segno)
			break;
	}
	return i;
}

C
Chao Yu 已提交
3187
void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3188
				block_t old_blkaddr, block_t new_blkaddr,
3189
				bool recover_curseg, bool recover_newaddr)
J
Jaegeuk Kim 已提交
3190 3191 3192 3193 3194 3195
{
	struct sit_info *sit_i = SIT_I(sbi);
	struct curseg_info *curseg;
	unsigned int segno, old_cursegno;
	struct seg_entry *se;
	int type;
3196
	unsigned short old_blkoff;
J
Jaegeuk Kim 已提交
3197 3198 3199 3200 3201

	segno = GET_SEGNO(sbi, new_blkaddr);
	se = get_seg_entry(sbi, segno);
	type = se->type;

C
Chao Yu 已提交
3202 3203
	down_write(&SM_I(sbi)->curseg_lock);

3204 3205 3206 3207 3208 3209 3210 3211 3212
	if (!recover_curseg) {
		/* for recovery flow */
		if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
			if (old_blkaddr == NULL_ADDR)
				type = CURSEG_COLD_DATA;
			else
				type = CURSEG_WARM_DATA;
		}
	} else {
C
Chao Yu 已提交
3213 3214 3215 3216 3217
		if (IS_CURSEG(sbi, segno)) {
			/* se->type is volatile as SSR allocation */
			type = __f2fs_get_curseg(sbi, segno);
			f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
		} else {
J
Jaegeuk Kim 已提交
3218
			type = CURSEG_WARM_DATA;
C
Chao Yu 已提交
3219
		}
J
Jaegeuk Kim 已提交
3220
	}
3221

3222
	f2fs_bug_on(sbi, !IS_DATASEG(type));
J
Jaegeuk Kim 已提交
3223 3224 3225
	curseg = CURSEG_I(sbi, type);

	mutex_lock(&curseg->curseg_mutex);
3226
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3227 3228

	old_cursegno = curseg->segno;
3229
	old_blkoff = curseg->next_blkoff;
J
Jaegeuk Kim 已提交
3230 3231 3232 3233

	/* change the current segment */
	if (segno != curseg->segno) {
		curseg->next_segno = segno;
3234
		change_curseg(sbi, type);
J
Jaegeuk Kim 已提交
3235 3236
	}

J
Jaegeuk Kim 已提交
3237
	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3238
	__add_sum_entry(sbi, type, sum);
J
Jaegeuk Kim 已提交
3239

3240
	if (!recover_curseg || recover_newaddr)
3241
		update_sit_entry(sbi, new_blkaddr, 1);
3242 3243 3244
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
		invalidate_mapping_pages(META_MAPPING(sbi),
					old_blkaddr, old_blkaddr);
3245
		update_sit_entry(sbi, old_blkaddr, -1);
3246
	}
3247 3248 3249 3250

	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
	locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));

J
Jaegeuk Kim 已提交
3251 3252
	locate_dirty_segment(sbi, old_cursegno);

3253 3254 3255
	if (recover_curseg) {
		if (old_cursegno != curseg->segno) {
			curseg->next_segno = old_cursegno;
3256
			change_curseg(sbi, type);
3257 3258 3259 3260
		}
		curseg->next_blkoff = old_blkoff;
	}

3261
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3262
	mutex_unlock(&curseg->curseg_mutex);
C
Chao Yu 已提交
3263
	up_write(&SM_I(sbi)->curseg_lock);
J
Jaegeuk Kim 已提交
3264 3265
}

3266 3267
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
				block_t old_addr, block_t new_addr,
3268 3269
				unsigned char version, bool recover_curseg,
				bool recover_newaddr)
3270 3271 3272 3273 3274
{
	struct f2fs_summary sum;

	set_summary(&sum, dn->nid, dn->ofs_in_node, version);

C
Chao Yu 已提交
3275
	f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3276
					recover_curseg, recover_newaddr);
3277

3278
	f2fs_update_data_blkaddr(dn, new_addr);
3279 3280
}

3281
void f2fs_wait_on_page_writeback(struct page *page,
3282
				enum page_type type, bool ordered)
3283 3284
{
	if (PageWriteback(page)) {
3285 3286
		struct f2fs_sb_info *sbi = F2FS_P_SB(page);

3287
		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3288 3289 3290 3291
		if (ordered)
			wait_on_page_writeback(page);
		else
			wait_for_stable_page(page);
3292 3293 3294
	}
}

3295
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3296
{
3297
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3298 3299
	struct page *cpage;

3300 3301 3302
	if (!f2fs_post_read_required(inode))
		return;

3303
	if (!is_valid_data_blkaddr(sbi, blkaddr))
3304 3305 3306 3307
		return;

	cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
	if (cpage) {
3308
		f2fs_wait_on_page_writeback(cpage, DATA, true);
3309 3310 3311 3312
		f2fs_put_page(cpage, 1);
	}
}

3313 3314 3315 3316 3317 3318 3319 3320 3321
void f2fs_wait_on_block_writeback_range(struct inode *inode, block_t blkaddr,
								block_t len)
{
	block_t i;

	for (i = 0; i < len; i++)
		f2fs_wait_on_block_writeback(inode, blkaddr + i);
}

3322
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3323 3324 3325 3326 3327 3328 3329 3330 3331 3332
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct curseg_info *seg_i;
	unsigned char *kaddr;
	struct page *page;
	block_t start;
	int i, j, offset;

	start = start_sum_block(sbi);

C
Chao Yu 已提交
3333
	page = f2fs_get_meta_page(sbi, start++);
3334 3335
	if (IS_ERR(page))
		return PTR_ERR(page);
J
Jaegeuk Kim 已提交
3336 3337 3338 3339
	kaddr = (unsigned char *)page_address(page);

	/* Step 1: restore nat cache */
	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3340
	memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3341 3342 3343

	/* Step 2: restore sit cache */
	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3344
	memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363 3364 3365 3366 3367
	offset = 2 * SUM_JOURNAL_SIZE;

	/* Step 3: restore summary entries */
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		unsigned short blk_off;
		unsigned int segno;

		seg_i = CURSEG_I(sbi, i);
		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
		seg_i->next_segno = segno;
		reset_curseg(sbi, i, 0);
		seg_i->alloc_type = ckpt->alloc_type[i];
		seg_i->next_blkoff = blk_off;

		if (seg_i->alloc_type == SSR)
			blk_off = sbi->blocks_per_seg;

		for (j = 0; j < blk_off; j++) {
			struct f2fs_summary *s;
			s = (struct f2fs_summary *)(kaddr + offset);
			seg_i->sum_blk->entries[j] = *s;
			offset += SUMMARY_SIZE;
3368
			if (offset + SUMMARY_SIZE <= PAGE_SIZE -
J
Jaegeuk Kim 已提交
3369 3370 3371 3372 3373 3374
						SUM_FOOTER_SIZE)
				continue;

			f2fs_put_page(page, 1);
			page = NULL;

C
Chao Yu 已提交
3375
			page = f2fs_get_meta_page(sbi, start++);
3376 3377
			if (IS_ERR(page))
				return PTR_ERR(page);
J
Jaegeuk Kim 已提交
3378 3379 3380 3381 3382
			kaddr = (unsigned char *)page_address(page);
			offset = 0;
		}
	}
	f2fs_put_page(page, 1);
3383
	return 0;
J
Jaegeuk Kim 已提交
3384 3385 3386 3387 3388 3389 3390 3391 3392 3393 3394
}

static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct f2fs_summary_block *sum;
	struct curseg_info *curseg;
	struct page *new;
	unsigned short blk_off;
	unsigned int segno = 0;
	block_t blk_addr = 0;
3395
	int err = 0;
J
Jaegeuk Kim 已提交
3396 3397 3398 3399 3400 3401

	/* get segment number and block addr */
	if (IS_DATASEG(type)) {
		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
							CURSEG_HOT_DATA]);
3402
		if (__exist_node_summaries(sbi))
J
Jaegeuk Kim 已提交
3403 3404 3405 3406 3407 3408 3409 3410
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
		else
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
	} else {
		segno = le32_to_cpu(ckpt->cur_node_segno[type -
							CURSEG_HOT_NODE]);
		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
							CURSEG_HOT_NODE]);
3411
		if (__exist_node_summaries(sbi))
J
Jaegeuk Kim 已提交
3412 3413 3414 3415 3416 3417
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
							type - CURSEG_HOT_NODE);
		else
			blk_addr = GET_SUM_BLOCK(sbi, segno);
	}

C
Chao Yu 已提交
3418
	new = f2fs_get_meta_page(sbi, blk_addr);
3419 3420
	if (IS_ERR(new))
		return PTR_ERR(new);
J
Jaegeuk Kim 已提交
3421 3422 3423
	sum = (struct f2fs_summary_block *)page_address(new);

	if (IS_NODESEG(type)) {
3424
		if (__exist_node_summaries(sbi)) {
J
Jaegeuk Kim 已提交
3425 3426 3427 3428 3429 3430 3431
			struct f2fs_summary *ns = &sum->entries[0];
			int i;
			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
				ns->version = 0;
				ns->ofs_in_node = 0;
			}
		} else {
3432 3433 3434
			err = f2fs_restore_node_summary(sbi, segno, sum);
			if (err)
				goto out;
J
Jaegeuk Kim 已提交
3435 3436 3437 3438 3439 3440
		}
	}

	/* set uncompleted segment to curseg */
	curseg = CURSEG_I(sbi, type);
	mutex_lock(&curseg->curseg_mutex);
3441 3442 3443 3444 3445 3446 3447 3448

	/* update journal info */
	down_write(&curseg->journal_rwsem);
	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
	up_write(&curseg->journal_rwsem);

	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
J
Jaegeuk Kim 已提交
3449 3450 3451 3452 3453
	curseg->next_segno = segno;
	reset_curseg(sbi, type, 0);
	curseg->alloc_type = ckpt->alloc_type[type];
	curseg->next_blkoff = blk_off;
	mutex_unlock(&curseg->curseg_mutex);
3454
out:
J
Jaegeuk Kim 已提交
3455
	f2fs_put_page(new, 1);
3456
	return err;
J
Jaegeuk Kim 已提交
3457 3458 3459 3460
}

static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
{
3461 3462
	struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
	struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
J
Jaegeuk Kim 已提交
3463
	int type = CURSEG_HOT_DATA;
3464
	int err;
J
Jaegeuk Kim 已提交
3465

3466
	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
C
Chao Yu 已提交
3467
		int npages = f2fs_npages_for_summary_flush(sbi, true);
3468 3469

		if (npages >= 2)
C
Chao Yu 已提交
3470
			f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3471
							META_CP, true);
3472

J
Jaegeuk Kim 已提交
3473
		/* restore for compacted data summary */
3474 3475 3476
		err = read_compacted_summaries(sbi);
		if (err)
			return err;
J
Jaegeuk Kim 已提交
3477 3478 3479
		type = CURSEG_HOT_NODE;
	}

3480
	if (__exist_node_summaries(sbi))
C
Chao Yu 已提交
3481
		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
3482
					NR_CURSEG_TYPE - type, META_CP, true);
3483

3484 3485 3486 3487 3488 3489
	for (; type <= CURSEG_COLD_NODE; type++) {
		err = read_normal_summaries(sbi, type);
		if (err)
			return err;
	}

3490 3491 3492 3493 3494
	/* sanity check for summary blocks */
	if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
			sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
		return -EINVAL;

J
Jaegeuk Kim 已提交
3495 3496 3497 3498 3499 3500 3501 3502 3503 3504 3505 3506
	return 0;
}

static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
{
	struct page *page;
	unsigned char *kaddr;
	struct f2fs_summary *summary;
	struct curseg_info *seg_i;
	int written_size = 0;
	int i, j;

C
Chao Yu 已提交
3507
	page = f2fs_grab_meta_page(sbi, blkaddr++);
J
Jaegeuk Kim 已提交
3508
	kaddr = (unsigned char *)page_address(page);
3509
	memset(kaddr, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
3510 3511 3512

	/* Step 1: write nat cache */
	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3513
	memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3514 3515 3516 3517
	written_size += SUM_JOURNAL_SIZE;

	/* Step 2: write sit cache */
	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3518
	memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3519 3520 3521 3522 3523 3524 3525 3526 3527 3528 3529 3530 3531
	written_size += SUM_JOURNAL_SIZE;

	/* Step 3: write summary entries */
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		unsigned short blkoff;
		seg_i = CURSEG_I(sbi, i);
		if (sbi->ckpt->alloc_type[i] == SSR)
			blkoff = sbi->blocks_per_seg;
		else
			blkoff = curseg_blkoff(sbi, i);

		for (j = 0; j < blkoff; j++) {
			if (!page) {
C
Chao Yu 已提交
3532
				page = f2fs_grab_meta_page(sbi, blkaddr++);
J
Jaegeuk Kim 已提交
3533
				kaddr = (unsigned char *)page_address(page);
3534
				memset(kaddr, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
3535 3536 3537 3538 3539 3540
				written_size = 0;
			}
			summary = (struct f2fs_summary *)(kaddr + written_size);
			*summary = seg_i->sum_blk->entries[j];
			written_size += SUMMARY_SIZE;

3541
			if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
J
Jaegeuk Kim 已提交
3542 3543 3544
							SUM_FOOTER_SIZE)
				continue;

3545
			set_page_dirty(page);
J
Jaegeuk Kim 已提交
3546 3547 3548 3549
			f2fs_put_page(page, 1);
			page = NULL;
		}
	}
3550 3551
	if (page) {
		set_page_dirty(page);
J
Jaegeuk Kim 已提交
3552
		f2fs_put_page(page, 1);
3553
	}
J
Jaegeuk Kim 已提交
3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564
}

static void write_normal_summaries(struct f2fs_sb_info *sbi,
					block_t blkaddr, int type)
{
	int i, end;
	if (IS_DATASEG(type))
		end = type + NR_CURSEG_DATA_TYPE;
	else
		end = type + NR_CURSEG_NODE_TYPE;

3565 3566
	for (i = type; i < end; i++)
		write_current_sum_page(sbi, i, blkaddr + (i - type));
J
Jaegeuk Kim 已提交
3567 3568
}

C
Chao Yu 已提交
3569
void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
J
Jaegeuk Kim 已提交
3570
{
3571
	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
J
Jaegeuk Kim 已提交
3572 3573 3574 3575 3576
		write_compacted_summaries(sbi, start_blk);
	else
		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
}

C
Chao Yu 已提交
3577
void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
J
Jaegeuk Kim 已提交
3578
{
3579
	write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
J
Jaegeuk Kim 已提交
3580 3581
}

C
Chao Yu 已提交
3582
int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
J
Jaegeuk Kim 已提交
3583 3584 3585 3586 3587
					unsigned int val, int alloc)
{
	int i;

	if (type == NAT_JOURNAL) {
3588 3589
		for (i = 0; i < nats_in_cursum(journal); i++) {
			if (le32_to_cpu(nid_in_journal(journal, i)) == val)
J
Jaegeuk Kim 已提交
3590 3591
				return i;
		}
3592 3593
		if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
			return update_nats_in_cursum(journal, 1);
J
Jaegeuk Kim 已提交
3594
	} else if (type == SIT_JOURNAL) {
3595 3596
		for (i = 0; i < sits_in_cursum(journal); i++)
			if (le32_to_cpu(segno_in_journal(journal, i)) == val)
J
Jaegeuk Kim 已提交
3597
				return i;
3598 3599
		if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
			return update_sits_in_cursum(journal, 1);
J
Jaegeuk Kim 已提交
3600 3601 3602 3603 3604 3605 3606
	}
	return -1;
}

static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
					unsigned int segno)
{
3607
	return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
J
Jaegeuk Kim 已提交
3608 3609 3610 3611 3612 3613
}

static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
					unsigned int start)
{
	struct sit_info *sit_i = SIT_I(sbi);
3614
	struct page *page;
J
Jaegeuk Kim 已提交
3615 3616 3617 3618 3619
	pgoff_t src_off, dst_off;

	src_off = current_sit_addr(sbi, start);
	dst_off = next_sit_addr(sbi, src_off);

C
Chao Yu 已提交
3620
	page = f2fs_grab_meta_page(sbi, dst_off);
3621
	seg_info_to_sit_page(sbi, page, start);
J
Jaegeuk Kim 已提交
3622

3623
	set_page_dirty(page);
J
Jaegeuk Kim 已提交
3624 3625
	set_to_next_sit(sit_i, start);

3626
	return page;
J
Jaegeuk Kim 已提交
3627 3628
}

3629 3630 3631
static struct sit_entry_set *grab_sit_entry_set(void)
{
	struct sit_entry_set *ses =
3632
			f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670 3671 3672 3673 3674 3675 3676 3677 3678 3679 3680 3681 3682 3683 3684 3685 3686

	ses->entry_cnt = 0;
	INIT_LIST_HEAD(&ses->set_list);
	return ses;
}

static void release_sit_entry_set(struct sit_entry_set *ses)
{
	list_del(&ses->set_list);
	kmem_cache_free(sit_entry_set_slab, ses);
}

static void adjust_sit_entry_set(struct sit_entry_set *ses,
						struct list_head *head)
{
	struct sit_entry_set *next = ses;

	if (list_is_last(&ses->set_list, head))
		return;

	list_for_each_entry_continue(next, head, set_list)
		if (ses->entry_cnt <= next->entry_cnt)
			break;

	list_move_tail(&ses->set_list, &next->set_list);
}

static void add_sit_entry(unsigned int segno, struct list_head *head)
{
	struct sit_entry_set *ses;
	unsigned int start_segno = START_SEGNO(segno);

	list_for_each_entry(ses, head, set_list) {
		if (ses->start_segno == start_segno) {
			ses->entry_cnt++;
			adjust_sit_entry_set(ses, head);
			return;
		}
	}

	ses = grab_sit_entry_set();

	ses->start_segno = start_segno;
	ses->entry_cnt++;
	list_add(&ses->set_list, head);
}

static void add_sits_in_set(struct f2fs_sb_info *sbi)
{
	struct f2fs_sm_info *sm_info = SM_I(sbi);
	struct list_head *set_list = &sm_info->sit_entry_set;
	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
	unsigned int segno;

3687
	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3688 3689 3690 3691
		add_sit_entry(segno, set_list);
}

static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3692 3693
{
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3694
	struct f2fs_journal *journal = curseg->journal;
J
Jaegeuk Kim 已提交
3695 3696
	int i;

3697
	down_write(&curseg->journal_rwsem);
3698
	for (i = 0; i < sits_in_cursum(journal); i++) {
3699 3700 3701
		unsigned int segno;
		bool dirtied;

3702
		segno = le32_to_cpu(segno_in_journal(journal, i));
3703 3704 3705 3706
		dirtied = __mark_sit_entry_dirty(sbi, segno);

		if (!dirtied)
			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
J
Jaegeuk Kim 已提交
3707
	}
3708
	update_sits_in_cursum(journal, -i);
3709
	up_write(&curseg->journal_rwsem);
J
Jaegeuk Kim 已提交
3710 3711
}

J
Jaegeuk Kim 已提交
3712
/*
J
Jaegeuk Kim 已提交
3713 3714 3715
 * CP calls this function, which flushes SIT entries including sit_journal,
 * and moves prefree segs to free segs.
 */
C
Chao Yu 已提交
3716
void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
3717 3718 3719 3720
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3721
	struct f2fs_journal *journal = curseg->journal;
3722 3723 3724
	struct sit_entry_set *ses, *tmp;
	struct list_head *head = &SM_I(sbi)->sit_entry_set;
	bool to_journal = true;
3725
	struct seg_entry *se;
J
Jaegeuk Kim 已提交
3726

3727
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3728

3729 3730 3731
	if (!sit_i->dirty_sentries)
		goto out;

J
Jaegeuk Kim 已提交
3732
	/*
3733 3734
	 * add and account sit entries of dirty bitmap in sit entry
	 * set temporarily
J
Jaegeuk Kim 已提交
3735
	 */
3736
	add_sits_in_set(sbi);
J
Jaegeuk Kim 已提交
3737

3738 3739 3740 3741 3742
	/*
	 * if there are no enough space in journal to store dirty sit
	 * entries, remove all entries from journal and add and account
	 * them in sit entry set.
	 */
3743
	if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
3744
		remove_sits_in_journal(sbi);
3745

3746 3747 3748 3749 3750 3751
	/*
	 * there are two steps to flush sit entries:
	 * #1, flush sit entries to journal in current cold data summary block.
	 * #2, flush sit entries to sit page.
	 */
	list_for_each_entry_safe(ses, tmp, head, set_list) {
J
Jaegeuk Kim 已提交
3752
		struct page *page = NULL;
3753 3754 3755
		struct f2fs_sit_block *raw_sit = NULL;
		unsigned int start_segno = ses->start_segno;
		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3756
						(unsigned long)MAIN_SEGS(sbi));
3757 3758 3759
		unsigned int segno = start_segno;

		if (to_journal &&
3760
			!__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3761 3762
			to_journal = false;

3763 3764 3765
		if (to_journal) {
			down_write(&curseg->journal_rwsem);
		} else {
3766 3767
			page = get_next_sit_page(sbi, start_segno);
			raw_sit = page_address(page);
J
Jaegeuk Kim 已提交
3768 3769
		}

3770 3771 3772
		/* flush dirty sit entries in region of current sit set */
		for_each_set_bit_from(segno, bitmap, end) {
			int offset, sit_offset;
3773 3774

			se = get_seg_entry(sbi, segno);
3775 3776 3777 3778 3779
#ifdef CONFIG_F2FS_CHECK_FS
			if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
						SIT_VBLOCK_MAP_SIZE))
				f2fs_bug_on(sbi, 1);
#endif
3780 3781

			/* add discard candidates */
3782
			if (!(cpc->reason & CP_DISCARD)) {
3783
				cpc->trim_start = segno;
3784
				add_discard_addrs(sbi, cpc, false);
3785
			}
3786 3787

			if (to_journal) {
C
Chao Yu 已提交
3788
				offset = f2fs_lookup_journal_in_cursum(journal,
3789 3790
							SIT_JOURNAL, segno, 1);
				f2fs_bug_on(sbi, offset < 0);
3791
				segno_in_journal(journal, offset) =
3792 3793
							cpu_to_le32(segno);
				seg_info_to_raw_sit(se,
3794
					&sit_in_journal(journal, offset));
3795 3796
				check_block_count(sbi, segno,
					&sit_in_journal(journal, offset));
3797 3798 3799 3800
			} else {
				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
				seg_info_to_raw_sit(se,
						&raw_sit->entries[sit_offset]);
3801 3802
				check_block_count(sbi, segno,
						&raw_sit->entries[sit_offset]);
3803
			}
J
Jaegeuk Kim 已提交
3804

3805 3806 3807
			__clear_bit(segno, bitmap);
			sit_i->dirty_sentries--;
			ses->entry_cnt--;
J
Jaegeuk Kim 已提交
3808 3809
		}

3810 3811 3812
		if (to_journal)
			up_write(&curseg->journal_rwsem);
		else
3813 3814 3815 3816
			f2fs_put_page(page, 1);

		f2fs_bug_on(sbi, ses->entry_cnt);
		release_sit_entry_set(ses);
J
Jaegeuk Kim 已提交
3817
	}
3818 3819 3820 3821

	f2fs_bug_on(sbi, !list_empty(head));
	f2fs_bug_on(sbi, sit_i->dirty_sentries);
out:
3822
	if (cpc->reason & CP_DISCARD) {
3823 3824
		__u64 trim_start = cpc->trim_start;

3825
		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3826
			add_discard_addrs(sbi, cpc, false);
3827 3828

		cpc->trim_start = trim_start;
3829
	}
3830
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3831 3832 3833 3834 3835 3836 3837 3838 3839

	set_prefree_as_free_segments(sbi);
}

static int build_sit_info(struct f2fs_sb_info *sbi)
{
	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
	struct sit_info *sit_i;
	unsigned int sit_segs, start;
3840
	char *src_bitmap;
J
Jaegeuk Kim 已提交
3841 3842 3843
	unsigned int bitmap_size;

	/* allocate memory for SIT information */
C
Chao Yu 已提交
3844
	sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
3845 3846 3847 3848 3849
	if (!sit_i)
		return -ENOMEM;

	SM_I(sbi)->sit_info = sit_i;

3850 3851 3852 3853
	sit_i->sentries =
		f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
					      MAIN_SEGS(sbi)),
			      GFP_KERNEL);
J
Jaegeuk Kim 已提交
3854 3855 3856
	if (!sit_i->sentries)
		return -ENOMEM;

3857
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
C
Chao Yu 已提交
3858 3859
	sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
3860 3861 3862
	if (!sit_i->dirty_sentries_bitmap)
		return -ENOMEM;

3863
	for (start = 0; start < MAIN_SEGS(sbi); start++) {
J
Jaegeuk Kim 已提交
3864
		sit_i->sentries[start].cur_valid_map
C
Chao Yu 已提交
3865
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3866
		sit_i->sentries[start].ckpt_valid_map
C
Chao Yu 已提交
3867
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3868
		if (!sit_i->sentries[start].cur_valid_map ||
3869
				!sit_i->sentries[start].ckpt_valid_map)
J
Jaegeuk Kim 已提交
3870
			return -ENOMEM;
3871

C
Chao Yu 已提交
3872 3873
#ifdef CONFIG_F2FS_CHECK_FS
		sit_i->sentries[start].cur_valid_map_mir
C
Chao Yu 已提交
3874
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
C
Chao Yu 已提交
3875 3876 3877 3878
		if (!sit_i->sentries[start].cur_valid_map_mir)
			return -ENOMEM;
#endif

3879 3880 3881 3882 3883
		sit_i->sentries[start].discard_map
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
							GFP_KERNEL);
		if (!sit_i->sentries[start].discard_map)
			return -ENOMEM;
J
Jaegeuk Kim 已提交
3884 3885
	}

C
Chao Yu 已提交
3886
	sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3887 3888 3889
	if (!sit_i->tmp_map)
		return -ENOMEM;

3890
	if (__is_large_section(sbi)) {
3891 3892 3893 3894
		sit_i->sec_entries =
			f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
						      MAIN_SECS(sbi)),
				      GFP_KERNEL);
J
Jaegeuk Kim 已提交
3895 3896 3897 3898 3899 3900 3901 3902 3903 3904 3905
		if (!sit_i->sec_entries)
			return -ENOMEM;
	}

	/* get information related with SIT */
	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;

	/* setup SIT bitmap from ckeckpoint pack */
	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);

3906 3907
	sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
	if (!sit_i->sit_bitmap)
J
Jaegeuk Kim 已提交
3908 3909
		return -ENOMEM;

3910 3911 3912 3913 3914 3915
#ifdef CONFIG_F2FS_CHECK_FS
	sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
	if (!sit_i->sit_bitmap_mir)
		return -ENOMEM;
#endif

J
Jaegeuk Kim 已提交
3916 3917 3918 3919 3920
	/* init SIT information */
	sit_i->s_ops = &default_salloc_ops;

	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
3921
	sit_i->written_valid_blocks = 0;
J
Jaegeuk Kim 已提交
3922 3923 3924 3925
	sit_i->bitmap_size = bitmap_size;
	sit_i->dirty_sentries = 0;
	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
3926
	sit_i->mounted_time = ktime_get_real_seconds();
3927
	init_rwsem(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3928 3929 3930 3931 3932 3933 3934 3935 3936
	return 0;
}

static int build_free_segmap(struct f2fs_sb_info *sbi)
{
	struct free_segmap_info *free_i;
	unsigned int bitmap_size, sec_bitmap_size;

	/* allocate memory for free segmap information */
C
Chao Yu 已提交
3937
	free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
3938 3939 3940 3941 3942
	if (!free_i)
		return -ENOMEM;

	SM_I(sbi)->free_info = free_i;

3943
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
C
Chao Yu 已提交
3944
	free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3945 3946 3947
	if (!free_i->free_segmap)
		return -ENOMEM;

3948
	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
C
Chao Yu 已提交
3949
	free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3950 3951 3952 3953 3954 3955 3956 3957
	if (!free_i->free_secmap)
		return -ENOMEM;

	/* set all segments as dirty temporarily */
	memset(free_i->free_segmap, 0xff, bitmap_size);
	memset(free_i->free_secmap, 0xff, sec_bitmap_size);

	/* init free segmap information */
3958
	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
J
Jaegeuk Kim 已提交
3959 3960
	free_i->free_segments = 0;
	free_i->free_sections = 0;
3961
	spin_lock_init(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
3962 3963 3964 3965 3966
	return 0;
}

static int build_curseg(struct f2fs_sb_info *sbi)
{
N
Namjae Jeon 已提交
3967
	struct curseg_info *array;
J
Jaegeuk Kim 已提交
3968 3969
	int i;

3970 3971
	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
			     GFP_KERNEL);
J
Jaegeuk Kim 已提交
3972 3973 3974 3975 3976 3977 3978
	if (!array)
		return -ENOMEM;

	SM_I(sbi)->curseg_array = array;

	for (i = 0; i < NR_CURSEG_TYPE; i++) {
		mutex_init(&array[i].curseg_mutex);
C
Chao Yu 已提交
3979
		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3980 3981
		if (!array[i].sum_blk)
			return -ENOMEM;
3982
		init_rwsem(&array[i].journal_rwsem);
C
Chao Yu 已提交
3983 3984
		array[i].journal = f2fs_kzalloc(sbi,
				sizeof(struct f2fs_journal), GFP_KERNEL);
3985 3986
		if (!array[i].journal)
			return -ENOMEM;
J
Jaegeuk Kim 已提交
3987 3988 3989 3990 3991 3992
		array[i].segno = NULL_SEGNO;
		array[i].next_blkoff = 0;
	}
	return restore_curseg_summaries(sbi);
}

3993
static int build_sit_entries(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3994 3995 3996
{
	struct sit_info *sit_i = SIT_I(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3997
	struct f2fs_journal *journal = curseg->journal;
3998 3999
	struct seg_entry *se;
	struct f2fs_sit_entry sit;
4000 4001 4002
	int sit_blk_cnt = SIT_BLK_CNT(sbi);
	unsigned int i, start, end;
	unsigned int readed, start_blk = 0;
4003
	int err = 0;
4004
	block_t total_node_blocks = 0;
J
Jaegeuk Kim 已提交
4005

4006
	do {
C
Chao Yu 已提交
4007
		readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
4008
							META_SIT, true);
4009 4010 4011 4012

		start = start_blk * sit_i->sents_per_block;
		end = (start_blk + readed) * sit_i->sents_per_block;

4013
		for (; start < end && start < MAIN_SEGS(sbi); start++) {
4014 4015 4016
			struct f2fs_sit_block *sit_blk;
			struct page *page;

4017
			se = &sit_i->sentries[start];
4018
			page = get_current_sit_page(sbi, start);
4019 4020
			if (IS_ERR(page))
				return PTR_ERR(page);
4021 4022 4023
			sit_blk = (struct f2fs_sit_block *)page_address(page);
			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
			f2fs_put_page(page, 1);
4024

4025 4026 4027
			err = check_block_count(sbi, start, &sit);
			if (err)
				return err;
4028
			seg_info_from_raw_sit(se, &sit);
4029 4030
			if (IS_NODESEG(se->type))
				total_node_blocks += se->valid_blocks;
4031 4032

			/* build discard map only one time */
4033 4034 4035 4036 4037 4038 4039 4040 4041 4042
			if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
				memset(se->discard_map, 0xff,
					SIT_VBLOCK_MAP_SIZE);
			} else {
				memcpy(se->discard_map,
					se->cur_valid_map,
					SIT_VBLOCK_MAP_SIZE);
				sbi->discard_blks +=
					sbi->blocks_per_seg -
					se->valid_blocks;
4043
			}
4044

4045
			if (__is_large_section(sbi))
4046 4047
				get_sec_entry(sbi, start)->valid_blocks +=
							se->valid_blocks;
J
Jaegeuk Kim 已提交
4048
		}
4049 4050
		start_blk += readed;
	} while (start_blk < sit_blk_cnt);
4051 4052 4053 4054 4055 4056

	down_read(&curseg->journal_rwsem);
	for (i = 0; i < sits_in_cursum(journal); i++) {
		unsigned int old_valid_blocks;

		start = le32_to_cpu(segno_in_journal(journal, i));
J
Jaegeuk Kim 已提交
4057 4058 4059 4060 4061 4062 4063 4064 4065
		if (start >= MAIN_SEGS(sbi)) {
			f2fs_msg(sbi->sb, KERN_ERR,
					"Wrong journal entry on segno %u",
					start);
			set_sbi_flag(sbi, SBI_NEED_FSCK);
			err = -EINVAL;
			break;
		}

4066 4067 4068 4069
		se = &sit_i->sentries[start];
		sit = sit_in_journal(journal, i);

		old_valid_blocks = se->valid_blocks;
4070 4071
		if (IS_NODESEG(se->type))
			total_node_blocks -= old_valid_blocks;
4072

4073 4074 4075
		err = check_block_count(sbi, start, &sit);
		if (err)
			break;
4076
		seg_info_from_raw_sit(se, &sit);
4077 4078
		if (IS_NODESEG(se->type))
			total_node_blocks += se->valid_blocks;
4079

4080 4081 4082 4083 4084 4085 4086
		if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
			memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
		} else {
			memcpy(se->discard_map, se->cur_valid_map,
						SIT_VBLOCK_MAP_SIZE);
			sbi->discard_blks += old_valid_blocks;
			sbi->discard_blks -= se->valid_blocks;
4087 4088
		}

4089
		if (__is_large_section(sbi)) {
4090
			get_sec_entry(sbi, start)->valid_blocks +=
C
Chao Yu 已提交
4091 4092 4093 4094
							se->valid_blocks;
			get_sec_entry(sbi, start)->valid_blocks -=
							old_valid_blocks;
		}
4095 4096
	}
	up_read(&curseg->journal_rwsem);
4097 4098 4099 4100 4101 4102 4103 4104 4105

	if (!err && total_node_blocks != valid_node_count(sbi)) {
		f2fs_msg(sbi->sb, KERN_ERR,
			"SIT is corrupted node# %u vs %u",
			total_node_blocks, valid_node_count(sbi));
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		err = -EINVAL;
	}

4106
	return err;
J
Jaegeuk Kim 已提交
4107 4108 4109 4110 4111 4112 4113
}

static void init_free_segmap(struct f2fs_sb_info *sbi)
{
	unsigned int start;
	int type;

4114
	for (start = 0; start < MAIN_SEGS(sbi); start++) {
J
Jaegeuk Kim 已提交
4115 4116 4117
		struct seg_entry *sentry = get_seg_entry(sbi, start);
		if (!sentry->valid_blocks)
			__set_free(sbi, start);
4118 4119 4120
		else
			SIT_I(sbi)->written_valid_blocks +=
						sentry->valid_blocks;
J
Jaegeuk Kim 已提交
4121 4122 4123 4124 4125 4126 4127 4128 4129 4130 4131 4132 4133
	}

	/* set use the current segments */
	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
		__set_test_and_inuse(sbi, curseg_t->segno);
	}
}

static void init_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	struct free_segmap_info *free_i = FREE_I(sbi);
4134
	unsigned int segno = 0, offset = 0;
J
Jaegeuk Kim 已提交
4135 4136
	unsigned short valid_blocks;

4137
	while (1) {
J
Jaegeuk Kim 已提交
4138
		/* find dirty segment based on free segmap */
4139 4140
		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
		if (segno >= MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
4141 4142
			break;
		offset = segno + 1;
4143
		valid_blocks = get_valid_blocks(sbi, segno, false);
4144
		if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
J
Jaegeuk Kim 已提交
4145
			continue;
4146 4147 4148 4149
		if (valid_blocks > sbi->blocks_per_seg) {
			f2fs_bug_on(sbi, 1);
			continue;
		}
J
Jaegeuk Kim 已提交
4150 4151 4152 4153 4154 4155
		mutex_lock(&dirty_i->seglist_lock);
		__locate_dirty_segment(sbi, segno, DIRTY);
		mutex_unlock(&dirty_i->seglist_lock);
	}
}

4156
static int init_victim_secmap(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4157 4158
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4159
	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
4160

C
Chao Yu 已提交
4161
	dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4162
	if (!dirty_i->victim_secmap)
J
Jaegeuk Kim 已提交
4163 4164 4165 4166 4167 4168 4169 4170 4171 4172
		return -ENOMEM;
	return 0;
}

static int build_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i;
	unsigned int bitmap_size, i;

	/* allocate memory for dirty segments list information */
C
Chao Yu 已提交
4173 4174
	dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
4175 4176 4177 4178 4179 4180
	if (!dirty_i)
		return -ENOMEM;

	SM_I(sbi)->dirty_info = dirty_i;
	mutex_init(&dirty_i->seglist_lock);

4181
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
J
Jaegeuk Kim 已提交
4182 4183

	for (i = 0; i < NR_DIRTY_TYPE; i++) {
C
Chao Yu 已提交
4184 4185
		dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
4186 4187 4188 4189 4190
		if (!dirty_i->dirty_segmap[i])
			return -ENOMEM;
	}

	init_dirty_segmap(sbi);
4191
	return init_victim_secmap(sbi);
J
Jaegeuk Kim 已提交
4192 4193
}

J
Jaegeuk Kim 已提交
4194
/*
J
Jaegeuk Kim 已提交
4195 4196 4197 4198 4199 4200 4201
 * Update min, max modified time for cost-benefit GC algorithm
 */
static void init_min_max_mtime(struct f2fs_sb_info *sbi)
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int segno;

4202
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
4203

4204
	sit_i->min_mtime = ULLONG_MAX;
J
Jaegeuk Kim 已提交
4205

4206
	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
J
Jaegeuk Kim 已提交
4207 4208 4209 4210 4211 4212 4213 4214 4215 4216 4217
		unsigned int i;
		unsigned long long mtime = 0;

		for (i = 0; i < sbi->segs_per_sec; i++)
			mtime += get_seg_entry(sbi, segno + i)->mtime;

		mtime = div_u64(mtime, sbi->segs_per_sec);

		if (sit_i->min_mtime > mtime)
			sit_i->min_mtime = mtime;
	}
C
Chao Yu 已提交
4218
	sit_i->max_mtime = get_mtime(sbi, false);
4219
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
4220 4221
}

C
Chao Yu 已提交
4222
int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4223 4224 4225
{
	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
N
Namjae Jeon 已提交
4226
	struct f2fs_sm_info *sm_info;
J
Jaegeuk Kim 已提交
4227 4228
	int err;

C
Chao Yu 已提交
4229
	sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241
	if (!sm_info)
		return -ENOMEM;

	/* init sm info */
	sbi->sm_info = sm_info;
	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
4242 4243
	sm_info->rec_prefree_segments = sm_info->main_segments *
					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
J
Jaegeuk Kim 已提交
4244 4245 4246
	if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
		sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;

4247 4248
	if (!test_opt(sbi, LFS))
		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
4249
	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
4250
	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
4251
	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
4252
	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
C
Chao Yu 已提交
4253
	sm_info->min_ssr_sections = reserved_sections(sbi);
J
Jaegeuk Kim 已提交
4254

4255 4256
	INIT_LIST_HEAD(&sm_info->sit_entry_set);

C
Chao Yu 已提交
4257 4258
	init_rwsem(&sm_info->curseg_lock);

4259
	if (!f2fs_readonly(sbi->sb)) {
C
Chao Yu 已提交
4260
		err = f2fs_create_flush_cmd_control(sbi);
4261
		if (err)
4262
			return err;
4263 4264
	}

4265 4266 4267 4268
	err = create_discard_cmd_control(sbi);
	if (err)
		return err;

J
Jaegeuk Kim 已提交
4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279
	err = build_sit_info(sbi);
	if (err)
		return err;
	err = build_free_segmap(sbi);
	if (err)
		return err;
	err = build_curseg(sbi);
	if (err)
		return err;

	/* reinit free segmap based on SIT */
4280 4281 4282
	err = build_sit_entries(sbi);
	if (err)
		return err;
J
Jaegeuk Kim 已提交
4283 4284 4285 4286 4287 4288 4289 4290 4291 4292 4293 4294 4295 4296 4297 4298

	init_free_segmap(sbi);
	err = build_dirty_segmap(sbi);
	if (err)
		return err;

	init_min_max_mtime(sbi);
	return 0;
}

static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	mutex_lock(&dirty_i->seglist_lock);
4299
	kvfree(dirty_i->dirty_segmap[dirty_type]);
J
Jaegeuk Kim 已提交
4300 4301 4302 4303
	dirty_i->nr_dirty[dirty_type] = 0;
	mutex_unlock(&dirty_i->seglist_lock);
}

4304
static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4305 4306
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4307
	kvfree(dirty_i->victim_secmap);
J
Jaegeuk Kim 已提交
4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318 4319 4320 4321
}

static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	int i;

	if (!dirty_i)
		return;

	/* discard pre-free/dirty segments list */
	for (i = 0; i < NR_DIRTY_TYPE; i++)
		discard_dirty_segmap(sbi, i);

4322
	destroy_victim_secmap(sbi);
J
Jaegeuk Kim 已提交
4323
	SM_I(sbi)->dirty_info = NULL;
4324
	kvfree(dirty_i);
J
Jaegeuk Kim 已提交
4325 4326 4327 4328 4329 4330 4331 4332 4333 4334
}

static void destroy_curseg(struct f2fs_sb_info *sbi)
{
	struct curseg_info *array = SM_I(sbi)->curseg_array;
	int i;

	if (!array)
		return;
	SM_I(sbi)->curseg_array = NULL;
4335
	for (i = 0; i < NR_CURSEG_TYPE; i++) {
4336 4337
		kvfree(array[i].sum_blk);
		kvfree(array[i].journal);
4338
	}
4339
	kvfree(array);
J
Jaegeuk Kim 已提交
4340 4341 4342 4343 4344 4345 4346 4347
}

static void destroy_free_segmap(struct f2fs_sb_info *sbi)
{
	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
	if (!free_i)
		return;
	SM_I(sbi)->free_info = NULL;
4348 4349
	kvfree(free_i->free_segmap);
	kvfree(free_i->free_secmap);
4350
	kvfree(free_i);
J
Jaegeuk Kim 已提交
4351 4352 4353 4354 4355 4356 4357 4358 4359 4360 4361
}

static void destroy_sit_info(struct f2fs_sb_info *sbi)
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int start;

	if (!sit_i)
		return;

	if (sit_i->sentries) {
4362
		for (start = 0; start < MAIN_SEGS(sbi); start++) {
4363
			kvfree(sit_i->sentries[start].cur_valid_map);
C
Chao Yu 已提交
4364
#ifdef CONFIG_F2FS_CHECK_FS
4365
			kvfree(sit_i->sentries[start].cur_valid_map_mir);
C
Chao Yu 已提交
4366
#endif
4367 4368
			kvfree(sit_i->sentries[start].ckpt_valid_map);
			kvfree(sit_i->sentries[start].discard_map);
J
Jaegeuk Kim 已提交
4369 4370
		}
	}
4371
	kvfree(sit_i->tmp_map);
J
Jaegeuk Kim 已提交
4372

4373 4374 4375
	kvfree(sit_i->sentries);
	kvfree(sit_i->sec_entries);
	kvfree(sit_i->dirty_sentries_bitmap);
J
Jaegeuk Kim 已提交
4376 4377

	SM_I(sbi)->sit_info = NULL;
4378
	kvfree(sit_i->sit_bitmap);
4379
#ifdef CONFIG_F2FS_CHECK_FS
4380
	kvfree(sit_i->sit_bitmap_mir);
4381
#endif
4382
	kvfree(sit_i);
J
Jaegeuk Kim 已提交
4383 4384
}

C
Chao Yu 已提交
4385
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4386 4387
{
	struct f2fs_sm_info *sm_info = SM_I(sbi);
4388

4389 4390
	if (!sm_info)
		return;
C
Chao Yu 已提交
4391
	f2fs_destroy_flush_cmd_control(sbi, true);
4392
	destroy_discard_cmd_control(sbi);
J
Jaegeuk Kim 已提交
4393 4394 4395 4396 4397
	destroy_dirty_segmap(sbi);
	destroy_curseg(sbi);
	destroy_free_segmap(sbi);
	destroy_sit_info(sbi);
	sbi->sm_info = NULL;
4398
	kvfree(sm_info);
J
Jaegeuk Kim 已提交
4399
}
4400

C
Chao Yu 已提交
4401
int __init f2fs_create_segment_manager_caches(void)
4402 4403
{
	discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
4404
			sizeof(struct discard_entry));
4405
	if (!discard_entry_slab)
4406 4407
		goto fail;

4408 4409 4410
	discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
			sizeof(struct discard_cmd));
	if (!discard_cmd_slab)
C
Chao Yu 已提交
4411
		goto destroy_discard_entry;
C
Chao Yu 已提交
4412

4413
	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
4414
			sizeof(struct sit_entry_set));
4415
	if (!sit_entry_set_slab)
4416
		goto destroy_discard_cmd;
J
Jaegeuk Kim 已提交
4417 4418 4419 4420 4421

	inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
			sizeof(struct inmem_pages));
	if (!inmem_entry_slab)
		goto destroy_sit_entry_set;
4422
	return 0;
4423

J
Jaegeuk Kim 已提交
4424 4425
destroy_sit_entry_set:
	kmem_cache_destroy(sit_entry_set_slab);
4426 4427
destroy_discard_cmd:
	kmem_cache_destroy(discard_cmd_slab);
C
Chao Yu 已提交
4428
destroy_discard_entry:
4429 4430 4431
	kmem_cache_destroy(discard_entry_slab);
fail:
	return -ENOMEM;
4432 4433
}

C
Chao Yu 已提交
4434
void f2fs_destroy_segment_manager_caches(void)
4435
{
4436
	kmem_cache_destroy(sit_entry_set_slab);
4437
	kmem_cache_destroy(discard_cmd_slab);
4438
	kmem_cache_destroy(discard_entry_slab);
J
Jaegeuk Kim 已提交
4439
	kmem_cache_destroy(inmem_entry_slab);
4440
}