segment.c 112.0 KB
Newer Older
C
Chao Yu 已提交
1
// SPDX-License-Identifier: GPL-2.0
J
Jaegeuk Kim 已提交
2
/*
J
Jaegeuk Kim 已提交
3 4 5 6 7 8 9 10 11
 * fs/f2fs/segment.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
12
#include <linux/prefetch.h>
13
#include <linux/kthread.h>
14
#include <linux/swap.h>
15
#include <linux/timer.h>
16
#include <linux/freezer.h>
17
#include <linux/sched/signal.h>
J
Jaegeuk Kim 已提交
18 19 20 21

#include "f2fs.h"
#include "segment.h"
#include "node.h"
22
#include "gc.h"
J
Jaegeuk Kim 已提交
23
#include "trace.h"
24
#include <trace/events/f2fs.h>
J
Jaegeuk Kim 已提交
25

26 27
#define __reverse_ffz(x) __reverse_ffs(~(x))

28
static struct kmem_cache *discard_entry_slab;
29
static struct kmem_cache *discard_cmd_slab;
30
static struct kmem_cache *sit_entry_set_slab;
J
Jaegeuk Kim 已提交
31
static struct kmem_cache *inmem_entry_slab;
32

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
static unsigned long __reverse_ulong(unsigned char *str)
{
	unsigned long tmp = 0;
	int shift = 24, idx = 0;

#if BITS_PER_LONG == 64
	shift = 56;
#endif
	while (shift >= 0) {
		tmp |= (unsigned long)str[idx++] << shift;
		shift -= BITS_PER_BYTE;
	}
	return tmp;
}

48 49 50 51 52 53 54 55 56
/*
 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
 * MSB and LSB are reversed in a byte by f2fs_set_bit.
 */
static inline unsigned long __reverse_ffs(unsigned long word)
{
	int num = 0;

#if BITS_PER_LONG == 64
57
	if ((word & 0xffffffff00000000UL) == 0)
58
		num += 32;
59
	else
60 61
		word >>= 32;
#endif
62
	if ((word & 0xffff0000) == 0)
63
		num += 16;
64
	else
65
		word >>= 16;
66 67

	if ((word & 0xff00) == 0)
68
		num += 8;
69
	else
70
		word >>= 8;
71

72 73 74 75
	if ((word & 0xf0) == 0)
		num += 4;
	else
		word >>= 4;
76

77 78 79 80
	if ((word & 0xc) == 0)
		num += 2;
	else
		word >>= 2;
81

82 83 84 85 86 87
	if ((word & 0x2) == 0)
		num += 1;
	return num;
}

/*
A
arter97 已提交
88
 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
89
 * f2fs_set_bit makes MSB and LSB reversed in a byte.
F
Fan Li 已提交
90
 * @size must be integral times of unsigned long.
91
 * Example:
92 93 94
 *                             MSB <--> LSB
 *   f2fs_set_bit(0, bitmap) => 1000 0000
 *   f2fs_set_bit(7, bitmap) => 0000 0001
95 96 97 98 99
 */
static unsigned long __find_rev_next_bit(const unsigned long *addr,
			unsigned long size, unsigned long offset)
{
	const unsigned long *p = addr + BIT_WORD(offset);
F
Fan Li 已提交
100
	unsigned long result = size;
101 102 103 104 105
	unsigned long tmp;

	if (offset >= size)
		return size;

F
Fan Li 已提交
106
	size -= (offset & ~(BITS_PER_LONG - 1));
107
	offset %= BITS_PER_LONG;
108

F
Fan Li 已提交
109 110 111
	while (1) {
		if (*p == 0)
			goto pass;
112

113
		tmp = __reverse_ulong((unsigned char *)p);
F
Fan Li 已提交
114 115 116 117

		tmp &= ~0UL >> offset;
		if (size < BITS_PER_LONG)
			tmp &= (~0UL << (BITS_PER_LONG - size));
118
		if (tmp)
F
Fan Li 已提交
119 120 121 122
			goto found;
pass:
		if (size <= BITS_PER_LONG)
			break;
123
		size -= BITS_PER_LONG;
F
Fan Li 已提交
124
		offset = 0;
125
		p++;
126
	}
F
Fan Li 已提交
127 128 129
	return result;
found:
	return result - size + __reverse_ffs(tmp);
130 131 132 133 134 135
}

static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
			unsigned long size, unsigned long offset)
{
	const unsigned long *p = addr + BIT_WORD(offset);
136
	unsigned long result = size;
137 138 139 140 141
	unsigned long tmp;

	if (offset >= size)
		return size;

142
	size -= (offset & ~(BITS_PER_LONG - 1));
143
	offset %= BITS_PER_LONG;
144 145 146 147 148

	while (1) {
		if (*p == ~0UL)
			goto pass;

149
		tmp = __reverse_ulong((unsigned char *)p);
150 151 152 153 154

		if (offset)
			tmp |= ~0UL << (BITS_PER_LONG - offset);
		if (size < BITS_PER_LONG)
			tmp |= ~0UL >> size;
155
		if (tmp != ~0UL)
156 157 158 159
			goto found;
pass:
		if (size <= BITS_PER_LONG)
			break;
160
		size -= BITS_PER_LONG;
161
		offset = 0;
162
		p++;
163
	}
164 165 166
	return result;
found:
	return result - size + __reverse_ffz(tmp);
167 168
}

C
Chao Yu 已提交
169
bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
170 171 172 173 174 175 176
{
	int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
	int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
	int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);

	if (test_opt(sbi, LFS))
		return false;
177
	if (sbi->gc_mode == GC_URGENT)
178
		return true;
D
Daniel Rosenberg 已提交
179 180
	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
		return true;
181 182

	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
C
Chao Yu 已提交
183
			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
184 185
}

C
Chao Yu 已提交
186
void f2fs_register_inmem_page(struct inode *inode, struct page *page)
J
Jaegeuk Kim 已提交
187
{
J
Jaegeuk Kim 已提交
188
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
J
Jaegeuk Kim 已提交
189 190
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct inmem_pages *new;
191

J
Jaegeuk Kim 已提交
192
	f2fs_trace_pid(page);
193

C
Chao Yu 已提交
194 195 196
	set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
	SetPagePrivate(page);

J
Jaegeuk Kim 已提交
197 198 199 200 201
	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);

	/* add atomic page indices to the list */
	new->page = page;
	INIT_LIST_HEAD(&new->list);
C
Chao Yu 已提交
202

J
Jaegeuk Kim 已提交
203 204 205 206
	/* increase reference count with clean state */
	mutex_lock(&fi->inmem_lock);
	get_page(page);
	list_add_tail(&new->list, &fi->inmem_pages);
J
Jaegeuk Kim 已提交
207 208 209 210
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (list_empty(&fi->inmem_ilist))
		list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
211
	inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
J
Jaegeuk Kim 已提交
212
	mutex_unlock(&fi->inmem_lock);
213 214

	trace_f2fs_register_inmem_page(page, INMEM);
J
Jaegeuk Kim 已提交
215 216
}

217 218
static int __revoke_inmem_pages(struct inode *inode,
				struct list_head *head, bool drop, bool recover)
219
{
220
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
221
	struct inmem_pages *cur, *tmp;
222
	int err = 0;
223 224

	list_for_each_entry_safe(cur, tmp, head, list) {
225 226 227 228 229 230
		struct page *page = cur->page;

		if (drop)
			trace_f2fs_commit_inmem_page(page, INMEM_DROP);

		lock_page(page);
231

232 233
		f2fs_wait_on_page_writeback(page, DATA, true);

234 235 236 237 238
		if (recover) {
			struct dnode_of_data dn;
			struct node_info ni;

			trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
239
retry:
240
			set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
241 242
			err = f2fs_get_dnode_of_data(&dn, page->index,
								LOOKUP_NODE);
243 244 245 246 247 248
			if (err) {
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					cond_resched();
					goto retry;
				}
249 250 251
				err = -EAGAIN;
				goto next;
			}
252 253 254 255 256 257 258

			err = f2fs_get_node_info(sbi, dn.nid, &ni);
			if (err) {
				f2fs_put_dnode(&dn);
				return err;
			}

259
			if (cur->old_addr == NEW_ADDR) {
C
Chao Yu 已提交
260
				f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
261 262 263
				f2fs_update_data_blkaddr(&dn, NEW_ADDR);
			} else
				f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
264 265 266 267
					cur->old_addr, ni.version, true, true);
			f2fs_put_dnode(&dn);
		}
next:
268
		/* we don't need to invalidate this in the sccessful status */
C
Chao Yu 已提交
269
		if (drop || recover) {
270
			ClearPageUptodate(page);
C
Chao Yu 已提交
271 272
			clear_cold_data(page);
		}
273
		set_page_private(page, 0);
C
Chao Yu 已提交
274
		ClearPagePrivate(page);
275
		f2fs_put_page(page, 1);
276 277 278 279 280

		list_del(&cur->list);
		kmem_cache_free(inmem_entry_slab, cur);
		dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
	}
281
	return err;
282 283
}

C
Chao Yu 已提交
284
void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
J
Jaegeuk Kim 已提交
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299
{
	struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
	struct inode *inode;
	struct f2fs_inode_info *fi;
next:
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (list_empty(head)) {
		spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
		return;
	}
	fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
	inode = igrab(&fi->vfs_inode);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);

	if (inode) {
300 301 302 303 304 305 306
		if (gc_failure) {
			if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
				goto drop;
			goto skip;
		}
drop:
		set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
C
Chao Yu 已提交
307
		f2fs_drop_inmem_pages(inode);
J
Jaegeuk Kim 已提交
308 309
		iput(inode);
	}
310
skip:
J
Jaegeuk Kim 已提交
311 312 313 314 315
	congestion_wait(BLK_RW_ASYNC, HZ/50);
	cond_resched();
	goto next;
}

C
Chao Yu 已提交
316
void f2fs_drop_inmem_pages(struct inode *inode)
317
{
J
Jaegeuk Kim 已提交
318
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
319 320 321
	struct f2fs_inode_info *fi = F2FS_I(inode);

	mutex_lock(&fi->inmem_lock);
322
	__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
J
Jaegeuk Kim 已提交
323 324 325 326
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (!list_empty(&fi->inmem_ilist))
		list_del_init(&fi->inmem_ilist);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
327
	mutex_unlock(&fi->inmem_lock);
C
Chao Yu 已提交
328 329

	clear_inode_flag(inode, FI_ATOMIC_FILE);
330
	fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
C
Chao Yu 已提交
331
	stat_dec_atomic_write(inode);
332 333
}

C
Chao Yu 已提交
334
void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
335 336 337 338 339 340 341 342 343 344 345 346 347 348
{
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct list_head *head = &fi->inmem_pages;
	struct inmem_pages *cur = NULL;

	f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));

	mutex_lock(&fi->inmem_lock);
	list_for_each_entry(cur, head, list) {
		if (cur->page == page)
			break;
	}

349
	f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
350 351 352 353 354 355 356 357 358 359 360 361 362 363
	list_del(&cur->list);
	mutex_unlock(&fi->inmem_lock);

	dec_page_count(sbi, F2FS_INMEM_PAGES);
	kmem_cache_free(inmem_entry_slab, cur);

	ClearPageUptodate(page);
	set_page_private(page, 0);
	ClearPagePrivate(page);
	f2fs_put_page(page, 0);

	trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
}

C
Chao Yu 已提交
364
static int __f2fs_commit_inmem_pages(struct inode *inode)
J
Jaegeuk Kim 已提交
365 366 367 368 369
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct inmem_pages *cur, *tmp;
	struct f2fs_io_info fio = {
370
		.sbi = sbi,
C
Chao Yu 已提交
371
		.ino = inode->i_ino,
J
Jaegeuk Kim 已提交
372
		.type = DATA,
M
Mike Christie 已提交
373
		.op = REQ_OP_WRITE,
374
		.op_flags = REQ_SYNC | REQ_PRIO,
C
Chao Yu 已提交
375
		.io_type = FS_DATA_IO,
J
Jaegeuk Kim 已提交
376
	};
C
Chao Yu 已提交
377
	struct list_head revoke_list;
378
	bool submit_bio = false;
379
	int err = 0;
J
Jaegeuk Kim 已提交
380

C
Chao Yu 已提交
381 382
	INIT_LIST_HEAD(&revoke_list);

J
Jaegeuk Kim 已提交
383
	list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
384 385 386 387 388 389 390 391
		struct page *page = cur->page;

		lock_page(page);
		if (page->mapping == inode->i_mapping) {
			trace_f2fs_commit_inmem_page(page, INMEM);

			set_page_dirty(page);
			f2fs_wait_on_page_writeback(page, DATA, true);
392
			if (clear_page_dirty_for_io(page)) {
393
				inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
394
				f2fs_remove_dirty_inode(inode);
395
			}
396
retry:
397
			fio.page = page;
398
			fio.old_blkaddr = NULL_ADDR;
399
			fio.encrypted_page = NULL;
400
			fio.need_lock = LOCK_DONE;
C
Chao Yu 已提交
401
			err = f2fs_do_write_data_page(&fio);
402
			if (err) {
403 404 405 406 407
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					cond_resched();
					goto retry;
				}
408
				unlock_page(page);
409
				break;
410
			}
411 412
			/* record old blkaddr for revoking */
			cur->old_addr = fio.old_blkaddr;
413
			submit_bio = true;
414 415
		}
		unlock_page(page);
C
Chao Yu 已提交
416
		list_move_tail(&cur->list, &revoke_list);
J
Jaegeuk Kim 已提交
417
	}
418

419 420
	if (submit_bio)
		f2fs_submit_merged_write_cond(sbi, inode, NULL, 0, DATA);
421

C
Chao Yu 已提交
422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437
	if (err) {
		/*
		 * try to revoke all committed pages, but still we could fail
		 * due to no memory or other reason, if that happened, EAGAIN
		 * will be returned, which means in such case, transaction is
		 * already not integrity, caller should use journal to do the
		 * recovery or rewrite & commit last transaction. For other
		 * error number, revoking was done by filesystem itself.
		 */
		err = __revoke_inmem_pages(inode, &revoke_list, false, true);

		/* drop all uncommitted pages */
		__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
	} else {
		__revoke_inmem_pages(inode, &revoke_list, false, false);
	}
438

439 440 441
	return err;
}

C
Chao Yu 已提交
442
int f2fs_commit_inmem_pages(struct inode *inode)
443 444 445
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct f2fs_inode_info *fi = F2FS_I(inode);
446
	int err;
447 448 449

	f2fs_balance_fs(sbi, true);

450 451 452
	down_write(&fi->i_gc_rwsem[WRITE]);

	f2fs_lock_op(sbi);
C
Chao Yu 已提交
453 454
	set_inode_flag(inode, FI_ATOMIC_COMMIT);

455
	mutex_lock(&fi->inmem_lock);
C
Chao Yu 已提交
456
	err = __f2fs_commit_inmem_pages(inode);
457

J
Jaegeuk Kim 已提交
458 459 460 461
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (!list_empty(&fi->inmem_ilist))
		list_del_init(&fi->inmem_ilist);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
J
Jaegeuk Kim 已提交
462 463
	mutex_unlock(&fi->inmem_lock);

C
Chao Yu 已提交
464 465
	clear_inode_flag(inode, FI_ATOMIC_COMMIT);

466
	f2fs_unlock_op(sbi);
467 468
	up_write(&fi->i_gc_rwsem[WRITE]);

469
	return err;
J
Jaegeuk Kim 已提交
470 471
}

J
Jaegeuk Kim 已提交
472
/*
J
Jaegeuk Kim 已提交
473 474 475
 * This function balances dirty node and dentry pages.
 * In addition, it controls garbage collection.
 */
J
Jaegeuk Kim 已提交
476
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
J
Jaegeuk Kim 已提交
477
{
478 479
	if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
		f2fs_show_injection_info(FAULT_CHECKPOINT);
480
		f2fs_stop_checkpoint(sbi, false);
481
	}
482

483
	/* balance_fs_bg is able to be pending */
J
Jaegeuk Kim 已提交
484
	if (need && excess_cached_nats(sbi))
485 486
		f2fs_balance_fs_bg(sbi);

D
Daniel Rosenberg 已提交
487 488 489
	if (f2fs_is_checkpoint_ready(sbi))
		return;

J
Jaegeuk Kim 已提交
490
	/*
491 492
	 * We should do GC or end up with checkpoint, if there are so many dirty
	 * dir/node pages without enough free segments.
J
Jaegeuk Kim 已提交
493
	 */
494
	if (has_not_enough_free_secs(sbi, 0, 0)) {
J
Jaegeuk Kim 已提交
495
		mutex_lock(&sbi->gc_mutex);
496
		f2fs_gc(sbi, false, false, NULL_SEGNO);
J
Jaegeuk Kim 已提交
497 498 499
	}
}

500 501
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
{
502 503 504
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		return;

C
Chao Yu 已提交
505
	/* try to shrink extent cache when there is no enough memory */
C
Chao Yu 已提交
506
	if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
J
Jaegeuk Kim 已提交
507
		f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
C
Chao Yu 已提交
508

J
Jaegeuk Kim 已提交
509
	/* check the # of cached NAT entries */
C
Chao Yu 已提交
510 511
	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
		f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
J
Jaegeuk Kim 已提交
512

C
Chao Yu 已提交
513 514
	if (!f2fs_available_free_memory(sbi, FREE_NIDS))
		f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
515
	else
C
Chao Yu 已提交
516
		f2fs_build_free_nids(sbi, false, false);
C
Chao Yu 已提交
517

518
	if (!is_idle(sbi, REQ_TIME) &&
519
		(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
J
Jaegeuk Kim 已提交
520
		return;
C
Chao Yu 已提交
521

J
Jaegeuk Kim 已提交
522
	/* checkpoint is the only way to shrink partial cached entries */
C
Chao Yu 已提交
523 524
	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
			!f2fs_available_free_memory(sbi, INO_ENTRIES) ||
525 526
			excess_prefree_segs(sbi) ||
			excess_dirty_nats(sbi) ||
527
			excess_dirty_nodes(sbi) ||
J
Jaegeuk Kim 已提交
528
			f2fs_time_over(sbi, CP_TIME)) {
C
Chao Yu 已提交
529 530 531 532
		if (test_opt(sbi, DATA_FLUSH)) {
			struct blk_plug plug;

			blk_start_plug(&plug);
C
Chao Yu 已提交
533
			f2fs_sync_dirty_inodes(sbi, FILE_INODE);
C
Chao Yu 已提交
534 535
			blk_finish_plug(&plug);
		}
536
		f2fs_sync_fs(sbi->sb, true);
537
		stat_inc_bg_cp_count(sbi->stat_info);
C
Chao Yu 已提交
538
	}
539 540
}

541 542
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
				struct block_device *bdev)
J
Jaegeuk Kim 已提交
543
{
544
	struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
J
Jaegeuk Kim 已提交
545 546
	int ret;

547
	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
548
	bio_set_dev(bio, bdev);
J
Jaegeuk Kim 已提交
549 550
	ret = submit_bio_wait(bio);
	bio_put(bio);
551 552 553

	trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
				test_opt(sbi, FLUSH_MERGE), ret);
J
Jaegeuk Kim 已提交
554 555 556
	return ret;
}

C
Chao Yu 已提交
557
static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
J
Jaegeuk Kim 已提交
558
{
C
Chao Yu 已提交
559
	int ret = 0;
J
Jaegeuk Kim 已提交
560 561
	int i;

C
Chao Yu 已提交
562 563
	if (!sbi->s_ndevs)
		return __submit_flush_wait(sbi, sbi->sb->s_bdev);
564

C
Chao Yu 已提交
565
	for (i = 0; i < sbi->s_ndevs; i++) {
C
Chao Yu 已提交
566
		if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
C
Chao Yu 已提交
567
			continue;
568 569 570
		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
		if (ret)
			break;
J
Jaegeuk Kim 已提交
571 572 573 574
	}
	return ret;
}

575
static int issue_flush_thread(void *data)
576 577
{
	struct f2fs_sb_info *sbi = data;
578
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
579
	wait_queue_head_t *q = &fcc->flush_wait_queue;
580 581 582 583
repeat:
	if (kthread_should_stop())
		return 0;

584 585
	sb_start_intwrite(sbi->sb);

586
	if (!llist_empty(&fcc->issue_list)) {
587 588 589
		struct flush_cmd *cmd, *next;
		int ret;

590 591 592
		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);

C
Chao Yu 已提交
593 594 595
		cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);

		ret = submit_flush_wait(sbi, cmd->ino);
C
Chao Yu 已提交
596 597
		atomic_inc(&fcc->issued_flush);

598 599
		llist_for_each_entry_safe(cmd, next,
					  fcc->dispatch_list, llnode) {
600 601 602
			cmd->ret = ret;
			complete(&cmd->wait);
		}
603
		fcc->dispatch_list = NULL;
604 605
	}

606 607
	sb_end_intwrite(sbi->sb);

608
	wait_event_interruptible(*q,
609
		kthread_should_stop() || !llist_empty(&fcc->issue_list));
610 611 612
	goto repeat;
}

C
Chao Yu 已提交
613
int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
614
{
615
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
616
	struct flush_cmd cmd;
C
Chao Yu 已提交
617
	int ret;
618

J
Jaegeuk Kim 已提交
619 620 621
	if (test_opt(sbi, NOBARRIER))
		return 0;

C
Chao Yu 已提交
622
	if (!test_opt(sbi, FLUSH_MERGE)) {
C
Chao Yu 已提交
623
		ret = submit_flush_wait(sbi, ino);
C
Chao Yu 已提交
624 625 626
		atomic_inc(&fcc->issued_flush);
		return ret;
	}
J
Jaegeuk Kim 已提交
627

C
Chao Yu 已提交
628 629
	if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
		ret = submit_flush_wait(sbi, ino);
C
Chao Yu 已提交
630 631 632
		atomic_dec(&fcc->issing_flush);

		atomic_inc(&fcc->issued_flush);
J
Jaegeuk Kim 已提交
633 634
		return ret;
	}
635

C
Chao Yu 已提交
636
	cmd.ino = ino;
637
	init_completion(&cmd.wait);
638

639
	llist_add(&cmd.llnode, &fcc->issue_list);
640

641 642 643 644
	/* update issue_list before we wake up issue_flush thread */
	smp_mb();

	if (waitqueue_active(&fcc->flush_wait_queue))
645
		wake_up(&fcc->flush_wait_queue);
646

647 648
	if (fcc->f2fs_issue_flush) {
		wait_for_completion(&cmd.wait);
C
Chao Yu 已提交
649
		atomic_dec(&fcc->issing_flush);
650
	} else {
651 652 653 654 655 656 657 658 659
		struct llist_node *list;

		list = llist_del_all(&fcc->issue_list);
		if (!list) {
			wait_for_completion(&cmd.wait);
			atomic_dec(&fcc->issing_flush);
		} else {
			struct flush_cmd *tmp, *next;

C
Chao Yu 已提交
660
			ret = submit_flush_wait(sbi, ino);
661 662 663 664 665 666 667 668 669 670 671

			llist_for_each_entry_safe(tmp, next, list, llnode) {
				if (tmp == &cmd) {
					cmd.ret = ret;
					atomic_dec(&fcc->issing_flush);
					continue;
				}
				tmp->ret = ret;
				complete(&tmp->wait);
			}
		}
672
	}
673 674

	return cmd.ret;
675 676
}

C
Chao Yu 已提交
677
int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
678 679 680 681 682
{
	dev_t dev = sbi->sb->s_bdev->bd_dev;
	struct flush_cmd_control *fcc;
	int err = 0;

683 684
	if (SM_I(sbi)->fcc_info) {
		fcc = SM_I(sbi)->fcc_info;
685 686
		if (fcc->f2fs_issue_flush)
			return err;
687 688 689
		goto init_thread;
	}

C
Chao Yu 已提交
690
	fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
691 692
	if (!fcc)
		return -ENOMEM;
C
Chao Yu 已提交
693 694
	atomic_set(&fcc->issued_flush, 0);
	atomic_set(&fcc->issing_flush, 0);
695
	init_waitqueue_head(&fcc->flush_wait_queue);
696
	init_llist_head(&fcc->issue_list);
697
	SM_I(sbi)->fcc_info = fcc;
698 699 700
	if (!test_opt(sbi, FLUSH_MERGE))
		return err;

701
init_thread:
702 703 704 705 706
	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
	if (IS_ERR(fcc->f2fs_issue_flush)) {
		err = PTR_ERR(fcc->f2fs_issue_flush);
		kfree(fcc);
707
		SM_I(sbi)->fcc_info = NULL;
708 709 710 711 712 713
		return err;
	}

	return err;
}

C
Chao Yu 已提交
714
void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
715
{
716
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
717

718 719 720 721 722 723 724 725
	if (fcc && fcc->f2fs_issue_flush) {
		struct task_struct *flush_thread = fcc->f2fs_issue_flush;

		fcc->f2fs_issue_flush = NULL;
		kthread_stop(flush_thread);
	}
	if (free) {
		kfree(fcc);
726
		SM_I(sbi)->fcc_info = NULL;
727
	}
728 729
}

730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751
int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
{
	int ret = 0, i;

	if (!sbi->s_ndevs)
		return 0;

	for (i = 1; i < sbi->s_ndevs; i++) {
		if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
			continue;
		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
		if (ret)
			break;

		spin_lock(&sbi->dev_lock);
		f2fs_clear_bit(i, (char *)&sbi->dirty_device);
		spin_unlock(&sbi->dev_lock);
	}

	return ret;
}

J
Jaegeuk Kim 已提交
752 753 754 755 756 757 758 759 760 761 762 763 764 765
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	/* need not be added */
	if (IS_CURSEG(sbi, segno))
		return;

	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
		dirty_i->nr_dirty[dirty_type]++;

	if (dirty_type == DIRTY) {
		struct seg_entry *sentry = get_seg_entry(sbi, segno);
766
		enum dirty_type t = sentry->type;
767

768 769 770 771
		if (unlikely(t >= DIRTY)) {
			f2fs_bug_on(sbi, 1);
			return;
		}
772 773
		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
			dirty_i->nr_dirty[t]++;
J
Jaegeuk Kim 已提交
774 775 776 777 778 779 780 781 782 783 784 785
	}
}

static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
		dirty_i->nr_dirty[dirty_type]--;

	if (dirty_type == DIRTY) {
786 787 788 789 790
		struct seg_entry *sentry = get_seg_entry(sbi, segno);
		enum dirty_type t = sentry->type;

		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
			dirty_i->nr_dirty[t]--;
791

792
		if (get_valid_blocks(sbi, segno, true) == 0)
793
			clear_bit(GET_SEC_FROM_SEG(sbi, segno),
794
						dirty_i->victim_secmap);
J
Jaegeuk Kim 已提交
795 796 797
	}
}

J
Jaegeuk Kim 已提交
798
/*
J
Jaegeuk Kim 已提交
799 800 801 802
 * Should not occur error such as -ENOMEM.
 * Adding dirty entry into seglist is not critical operation.
 * If a given segment is one of current working segments, it won't be added.
 */
803
static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
804 805
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
D
Daniel Rosenberg 已提交
806
	unsigned short valid_blocks, ckpt_valid_blocks;
J
Jaegeuk Kim 已提交
807 808 809 810 811 812

	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
		return;

	mutex_lock(&dirty_i->seglist_lock);

813
	valid_blocks = get_valid_blocks(sbi, segno, false);
D
Daniel Rosenberg 已提交
814
	ckpt_valid_blocks = get_ckpt_valid_blocks(sbi, segno);
J
Jaegeuk Kim 已提交
815

D
Daniel Rosenberg 已提交
816 817
	if (valid_blocks == 0 && (!is_sbi_flag_set(sbi, SBI_CP_DISABLED) ||
				ckpt_valid_blocks == sbi->blocks_per_seg)) {
J
Jaegeuk Kim 已提交
818 819 820 821 822 823 824 825 826 827 828 829
		__locate_dirty_segment(sbi, segno, PRE);
		__remove_dirty_segment(sbi, segno, DIRTY);
	} else if (valid_blocks < sbi->blocks_per_seg) {
		__locate_dirty_segment(sbi, segno, DIRTY);
	} else {
		/* Recovery routine with SSR needs this */
		__remove_dirty_segment(sbi, segno, DIRTY);
	}

	mutex_unlock(&dirty_i->seglist_lock);
}

D
Daniel Rosenberg 已提交
830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889
/* This moves currently empty dirty blocks to prefree. Must hold seglist_lock */
void f2fs_dirty_to_prefree(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	unsigned int segno;

	mutex_lock(&dirty_i->seglist_lock);
	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
		if (get_valid_blocks(sbi, segno, false))
			continue;
		if (IS_CURSEG(sbi, segno))
			continue;
		__locate_dirty_segment(sbi, segno, PRE);
		__remove_dirty_segment(sbi, segno, DIRTY);
	}
	mutex_unlock(&dirty_i->seglist_lock);
}

int f2fs_disable_cp_again(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	block_t ovp = overprovision_segments(sbi) << sbi->log_blocks_per_seg;
	block_t holes[2] = {0, 0};	/* DATA and NODE */
	struct seg_entry *se;
	unsigned int segno;

	mutex_lock(&dirty_i->seglist_lock);
	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
		se = get_seg_entry(sbi, segno);
		if (IS_NODESEG(se->type))
			holes[NODE] += sbi->blocks_per_seg - se->valid_blocks;
		else
			holes[DATA] += sbi->blocks_per_seg - se->valid_blocks;
	}
	mutex_unlock(&dirty_i->seglist_lock);

	if (holes[DATA] > ovp || holes[NODE] > ovp)
		return -EAGAIN;
	return 0;
}

/* This is only used by SBI_CP_DISABLED */
static unsigned int get_free_segment(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	unsigned int segno = 0;

	mutex_lock(&dirty_i->seglist_lock);
	for_each_set_bit(segno, dirty_i->dirty_segmap[DIRTY], MAIN_SEGS(sbi)) {
		if (get_valid_blocks(sbi, segno, false))
			continue;
		if (get_ckpt_valid_blocks(sbi, segno))
			continue;
		mutex_unlock(&dirty_i->seglist_lock);
		return segno;
	}
	mutex_unlock(&dirty_i->seglist_lock);
	return NULL_SEGNO;
}

890
static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
891 892
		struct block_device *bdev, block_t lstart,
		block_t start, block_t len)
C
Chao Yu 已提交
893
{
894
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
895
	struct list_head *pend_list;
896
	struct discard_cmd *dc;
C
Chao Yu 已提交
897

C
Chao Yu 已提交
898 899 900 901
	f2fs_bug_on(sbi, !len);

	pend_list = &dcc->pend_list[plist_idx(len)];

902 903
	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
	INIT_LIST_HEAD(&dc->list);
904
	dc->bdev = bdev;
905
	dc->lstart = lstart;
906
	dc->start = start;
907
	dc->len = len;
908
	dc->ref = 0;
909
	dc->state = D_PREP;
910
	dc->issuing = 0;
911
	dc->error = 0;
912
	init_completion(&dc->wait);
C
Chao Yu 已提交
913
	list_add_tail(&dc->list, pend_list);
914 915
	spin_lock_init(&dc->lock);
	dc->bio_ref = 0;
C
Chao Yu 已提交
916
	atomic_inc(&dcc->discard_cmd_cnt);
C
Chao Yu 已提交
917
	dcc->undiscard_blks += len;
918 919 920 921 922 923 924

	return dc;
}

static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len,
C
Chao Yu 已提交
925 926
				struct rb_node *parent, struct rb_node **p,
				bool leftmost)
927 928 929 930 931 932 933
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *dc;

	dc = __create_discard_cmd(sbi, bdev, lstart, start, len);

	rb_link_node(&dc->rb_node, parent, p);
C
Chao Yu 已提交
934
	rb_insert_color_cached(&dc->rb_node, &dcc->root, leftmost);
935 936

	return dc;
937 938
}

939 940
static void __detach_discard_cmd(struct discard_cmd_control *dcc,
							struct discard_cmd *dc)
941
{
942
	if (dc->state == D_DONE)
943
		atomic_sub(dc->issuing, &dcc->issing_discard);
944 945

	list_del(&dc->list);
C
Chao Yu 已提交
946
	rb_erase_cached(&dc->rb_node, &dcc->root);
C
Chao Yu 已提交
947
	dcc->undiscard_blks -= dc->len;
948 949 950 951 952 953 954 955 956 957

	kmem_cache_free(discard_cmd_slab, dc);

	atomic_dec(&dcc->discard_cmd_cnt);
}

static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
							struct discard_cmd *dc)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
958
	unsigned long flags;
959

C
Chao Yu 已提交
960 961
	trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);

962 963 964 965 966 967 968
	spin_lock_irqsave(&dc->lock, flags);
	if (dc->bio_ref) {
		spin_unlock_irqrestore(&dc->lock, flags);
		return;
	}
	spin_unlock_irqrestore(&dc->lock, flags);

969 970
	f2fs_bug_on(sbi, dc->ref);

971 972
	if (dc->error == -EOPNOTSUPP)
		dc->error = 0;
973

974
	if (dc->error)
975 976 977
		printk_ratelimited(
			"%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
			KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
978
	__detach_discard_cmd(dcc, dc);
C
Chao Yu 已提交
979 980
}

981 982 983
static void f2fs_submit_discard_endio(struct bio *bio)
{
	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
984
	unsigned long flags;
985

986
	dc->error = blk_status_to_errno(bio->bi_status);
987 988 989 990 991 992 993 994

	spin_lock_irqsave(&dc->lock, flags);
	dc->bio_ref--;
	if (!dc->bio_ref && dc->state == D_SUBMIT) {
		dc->state = D_DONE;
		complete_all(&dc->wait);
	}
	spin_unlock_irqrestore(&dc->lock, flags);
995 996 997
	bio_put(bio);
}

W
Wei Yongjun 已提交
998
static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
999 1000 1001 1002 1003 1004 1005 1006 1007 1008 1009 1010 1011 1012
				block_t start, block_t end)
{
#ifdef CONFIG_F2FS_CHECK_FS
	struct seg_entry *sentry;
	unsigned int segno;
	block_t blk = start;
	unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
	unsigned long *map;

	while (blk < end) {
		segno = GET_SEGNO(sbi, blk);
		sentry = get_seg_entry(sbi, segno);
		offset = GET_BLKOFF_FROM_SEG0(sbi, blk);

1013 1014 1015 1016
		if (end < START_BLOCK(sbi, segno + 1))
			size = GET_BLKOFF_FROM_SEG0(sbi, end);
		else
			size = max_blocks;
C
Chao Yu 已提交
1017 1018 1019
		map = (unsigned long *)(sentry->cur_valid_map);
		offset = __find_rev_next_bit(map, size, offset);
		f2fs_bug_on(sbi, offset != size);
1020
		blk = START_BLOCK(sbi, segno + 1);
C
Chao Yu 已提交
1021 1022 1023 1024
	}
#endif
}

1025 1026 1027 1028 1029 1030 1031
static void __init_discard_policy(struct f2fs_sb_info *sbi,
				struct discard_policy *dpolicy,
				int discard_type, unsigned int granularity)
{
	/* common policy */
	dpolicy->type = discard_type;
	dpolicy->sync = true;
C
Chao Yu 已提交
1032
	dpolicy->ordered = false;
1033 1034 1035 1036 1037 1038 1039
	dpolicy->granularity = granularity;

	dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
	dpolicy->io_aware_gran = MAX_PLIST_NUM;

	if (discard_type == DPOLICY_BG) {
		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1040
		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1041 1042
		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
		dpolicy->io_aware = true;
1043
		dpolicy->sync = false;
C
Chao Yu 已提交
1044
		dpolicy->ordered = true;
1045 1046 1047 1048 1049 1050
		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
			dpolicy->granularity = 1;
			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
		}
	} else if (discard_type == DPOLICY_FORCE) {
		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
1051
		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
1052 1053 1054 1055 1056
		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
		dpolicy->io_aware = false;
	} else if (discard_type == DPOLICY_FSTRIM) {
		dpolicy->io_aware = false;
	} else if (discard_type == DPOLICY_UMOUNT) {
1057
		dpolicy->max_requests = UINT_MAX;
1058 1059 1060 1061
		dpolicy->io_aware = false;
	}
}

1062 1063 1064
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len);
1065
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
1066
static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1067
						struct discard_policy *dpolicy,
1068 1069
						struct discard_cmd *dc,
						unsigned int *issued)
1070
{
1071 1072 1073 1074
	struct block_device *bdev = dc->bdev;
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_discard_blocks =
			SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1075
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1076 1077 1078
	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
					&(dcc->fstrim_list) : &(dcc->wait_list);
	int flag = dpolicy->sync ? REQ_SYNC : 0;
1079 1080
	block_t lstart, start, len, total_len;
	int err = 0;
1081 1082

	if (dc->state != D_PREP)
1083
		return 0;
1084

1085
	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1086
		return 0;
1087

1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112
	trace_f2fs_issue_discard(bdev, dc->start, dc->len);

	lstart = dc->lstart;
	start = dc->start;
	len = dc->len;
	total_len = len;

	dc->len = 0;

	while (total_len && *issued < dpolicy->max_requests && !err) {
		struct bio *bio = NULL;
		unsigned long flags;
		bool last = true;

		if (len > max_discard_blocks) {
			len = max_discard_blocks;
			last = false;
		}

		(*issued)++;
		if (*issued == dpolicy->max_requests)
			last = true;

		dc->len += len;

1113 1114 1115 1116 1117
		if (time_to_inject(sbi, FAULT_DISCARD)) {
			f2fs_show_injection_info(FAULT_DISCARD);
			err = -EIO;
			goto submit;
		}
1118 1119 1120 1121
		err = __blkdev_issue_discard(bdev,
					SECTOR_FROM_BLOCK(start),
					SECTOR_FROM_BLOCK(len),
					GFP_NOFS, 0, &bio);
1122
submit:
1123
		if (err) {
1124
			spin_lock_irqsave(&dc->lock, flags);
1125
			if (dc->state == D_PARTIAL)
1126 1127 1128
				dc->state = D_SUBMIT;
			spin_unlock_irqrestore(&dc->lock, flags);

1129 1130
			break;
		}
1131

1132
		f2fs_bug_on(sbi, !bio);
1133

1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
		/*
		 * should keep before submission to avoid D_DONE
		 * right away
		 */
		spin_lock_irqsave(&dc->lock, flags);
		if (last)
			dc->state = D_SUBMIT;
		else
			dc->state = D_PARTIAL;
		dc->bio_ref++;
		spin_unlock_irqrestore(&dc->lock, flags);
1145

1146 1147 1148
		atomic_inc(&dcc->issing_discard);
		dc->issuing++;
		list_move_tail(&dc->list, wait_list);
C
Chao Yu 已提交
1149

1150 1151
		/* sanity check on discard range */
		__check_sit_bitmap(sbi, start, start + len);
1152

1153 1154 1155 1156 1157 1158 1159 1160
		bio->bi_private = dc;
		bio->bi_end_io = f2fs_submit_discard_endio;
		bio->bi_opf |= flag;
		submit_bio(bio);

		atomic_inc(&dcc->issued_discard);

		f2fs_update_iostat(sbi, FS_DISCARD, 1);
1161 1162 1163 1164 1165

		lstart += len;
		start += len;
		total_len -= len;
		len = total_len;
1166
	}
1167

1168
	if (!err && len)
1169
		__update_discard_tree_range(sbi, bdev, lstart, start, len);
1170
	return err;
1171 1172
}

1173 1174 1175 1176 1177
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len,
				struct rb_node **insert_p,
				struct rb_node *insert_parent)
1178
{
1179
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1180
	struct rb_node **p;
1181 1182
	struct rb_node *parent = NULL;
	struct discard_cmd *dc = NULL;
C
Chao Yu 已提交
1183
	bool leftmost = true;
1184 1185 1186 1187 1188 1189

	if (insert_p && insert_parent) {
		parent = insert_parent;
		p = insert_p;
		goto do_insert;
	}
1190

C
Chao Yu 已提交
1191 1192
	p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent,
							lstart, &leftmost);
1193
do_insert:
C
Chao Yu 已提交
1194 1195
	dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent,
								p, leftmost);
1196 1197
	if (!dc)
		return NULL;
1198

1199
	return dc;
1200 1201
}

C
Chao Yu 已提交
1202 1203 1204 1205 1206 1207
static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
						struct discard_cmd *dc)
{
	list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
}

1208 1209 1210
static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
				struct discard_cmd *dc, block_t blkaddr)
{
C
Chao Yu 已提交
1211
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1212 1213
	struct discard_info di = dc->di;
	bool modified = false;
1214

1215
	if (dc->state == D_DONE || dc->len == 1) {
1216 1217 1218 1219
		__remove_discard_cmd(sbi, dc);
		return;
	}

C
Chao Yu 已提交
1220 1221
	dcc->undiscard_blks -= di.len;

1222
	if (blkaddr > di.lstart) {
1223
		dc->len = blkaddr - dc->lstart;
C
Chao Yu 已提交
1224
		dcc->undiscard_blks += dc->len;
C
Chao Yu 已提交
1225
		__relocate_discard_cmd(dcc, dc);
1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238
		modified = true;
	}

	if (blkaddr < di.lstart + di.len - 1) {
		if (modified) {
			__insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
					di.start + blkaddr + 1 - di.lstart,
					di.lstart + di.len - 1 - blkaddr,
					NULL, NULL);
		} else {
			dc->lstart++;
			dc->len--;
			dc->start++;
C
Chao Yu 已提交
1239
			dcc->undiscard_blks += dc->len;
C
Chao Yu 已提交
1240
			__relocate_discard_cmd(dcc, dc);
1241
		}
1242 1243 1244
	}
}

1245 1246 1247
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len)
C
Chao Yu 已提交
1248
{
1249
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1250 1251 1252 1253
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct discard_cmd *dc;
	struct discard_info di = {0};
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
1254 1255 1256
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_discard_blocks =
			SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1257
	block_t end = lstart + len;
C
Chao Yu 已提交
1258

C
Chao Yu 已提交
1259
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1260 1261 1262
					NULL, lstart,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
C
Chao Yu 已提交
1263
					&insert_p, &insert_parent, true, NULL);
1264 1265 1266 1267 1268 1269 1270 1271
	if (dc)
		prev_dc = dc;

	if (!prev_dc) {
		di.lstart = lstart;
		di.len = next_dc ? next_dc->lstart - lstart : len;
		di.len = min(di.len, len);
		di.start = start;
C
Chao Yu 已提交
1272
	}
1273

1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297
	while (1) {
		struct rb_node *node;
		bool merged = false;
		struct discard_cmd *tdc = NULL;

		if (prev_dc) {
			di.lstart = prev_dc->lstart + prev_dc->len;
			if (di.lstart < lstart)
				di.lstart = lstart;
			if (di.lstart >= end)
				break;

			if (!next_dc || next_dc->lstart > end)
				di.len = end - di.lstart;
			else
				di.len = next_dc->lstart - di.lstart;
			di.start = start + di.lstart - lstart;
		}

		if (!di.len)
			goto next;

		if (prev_dc && prev_dc->state == D_PREP &&
			prev_dc->bdev == bdev &&
1298 1299
			__is_discard_back_mergeable(&di, &prev_dc->di,
							max_discard_blocks)) {
1300
			prev_dc->di.len += di.len;
C
Chao Yu 已提交
1301
			dcc->undiscard_blks += di.len;
C
Chao Yu 已提交
1302
			__relocate_discard_cmd(dcc, prev_dc);
1303 1304 1305 1306 1307 1308 1309
			di = prev_dc->di;
			tdc = prev_dc;
			merged = true;
		}

		if (next_dc && next_dc->state == D_PREP &&
			next_dc->bdev == bdev &&
1310 1311
			__is_discard_front_mergeable(&di, &next_dc->di,
							max_discard_blocks)) {
1312 1313 1314
			next_dc->di.lstart = di.lstart;
			next_dc->di.len += di.len;
			next_dc->di.start = di.start;
C
Chao Yu 已提交
1315
			dcc->undiscard_blks += di.len;
C
Chao Yu 已提交
1316
			__relocate_discard_cmd(dcc, next_dc);
1317 1318 1319
			if (tdc)
				__remove_discard_cmd(sbi, tdc);
			merged = true;
1320
		}
1321

1322
		if (!merged) {
1323 1324
			__insert_discard_tree(sbi, bdev, di.lstart, di.start,
							di.len, NULL, NULL);
1325
		}
1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340
 next:
		prev_dc = next_dc;
		if (!prev_dc)
			break;

		node = rb_next(&prev_dc->rb_node);
		next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
	}
}

static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
{
	block_t lblkstart = blkstart;

C
Chao Yu 已提交
1341
	trace_f2fs_queue_discard(bdev, blkstart, blklen);
1342 1343 1344 1345 1346 1347

	if (sbi->s_ndevs) {
		int devi = f2fs_target_device_index(sbi, blkstart);

		blkstart -= FDEV(devi).start_blk;
	}
1348
	mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1349
	__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1350
	mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1351 1352 1353
	return 0;
}

C
Chao Yu 已提交
1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370
static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
					struct discard_policy *dpolicy)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
	struct discard_cmd *dc;
	struct blk_plug plug;
	unsigned int pos = dcc->next_pos;
	unsigned int issued = 0;
	bool io_interrupted = false;

	mutex_lock(&dcc->cmd_lock);
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
					NULL, pos,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
C
Chao Yu 已提交
1371
					&insert_p, &insert_parent, true, NULL);
C
Chao Yu 已提交
1372 1373 1374 1375 1376 1377 1378
	if (!dc)
		dc = next_dc;

	blk_start_plug(&plug);

	while (dc) {
		struct rb_node *node;
1379
		int err = 0;
C
Chao Yu 已提交
1380 1381 1382 1383

		if (dc->state != D_PREP)
			goto next;

1384
		if (dpolicy->io_aware && !is_idle(sbi, DISCARD_TIME)) {
C
Chao Yu 已提交
1385 1386 1387 1388 1389
			io_interrupted = true;
			break;
		}

		dcc->next_pos = dc->lstart + dc->len;
1390
		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
C
Chao Yu 已提交
1391

1392
		if (issued >= dpolicy->max_requests)
C
Chao Yu 已提交
1393 1394 1395
			break;
next:
		node = rb_next(&dc->rb_node);
1396 1397
		if (err)
			__remove_discard_cmd(sbi, dc);
C
Chao Yu 已提交
1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413
		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
	}

	blk_finish_plug(&plug);

	if (!dc)
		dcc->next_pos = 0;

	mutex_unlock(&dcc->cmd_lock);

	if (!issued && io_interrupted)
		issued = -1;

	return issued;
}

C
Chao Yu 已提交
1414 1415
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
					struct discard_policy *dpolicy)
C
Chao Yu 已提交
1416 1417 1418 1419 1420
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *pend_list;
	struct discard_cmd *dc, *tmp;
	struct blk_plug plug;
1421
	int i, issued = 0;
1422
	bool io_interrupted = false;
C
Chao Yu 已提交
1423

C
Chao Yu 已提交
1424 1425 1426
	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
		if (i + 1 < dpolicy->granularity)
			break;
C
Chao Yu 已提交
1427 1428 1429 1430

		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
			return __issue_discard_cmd_orderly(sbi, dpolicy);

C
Chao Yu 已提交
1431
		pend_list = &dcc->pend_list[i];
1432 1433

		mutex_lock(&dcc->cmd_lock);
1434 1435
		if (list_empty(pend_list))
			goto next;
1436 1437 1438
		if (unlikely(dcc->rbtree_check))
			f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
								&dcc->root));
1439
		blk_start_plug(&plug);
C
Chao Yu 已提交
1440 1441 1442
		list_for_each_entry_safe(dc, tmp, pend_list, list) {
			f2fs_bug_on(sbi, dc->state != D_PREP);

C
Chao Yu 已提交
1443
			if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
1444
						!is_idle(sbi, DISCARD_TIME)) {
1445
				io_interrupted = true;
1446
				break;
1447
			}
1448

1449
			__submit_discard_cmd(sbi, dpolicy, dc, &issued);
1450

1451
			if (issued >= dpolicy->max_requests)
1452
				break;
C
Chao Yu 已提交
1453
		}
1454
		blk_finish_plug(&plug);
1455
next:
1456 1457
		mutex_unlock(&dcc->cmd_lock);

1458
		if (issued >= dpolicy->max_requests || io_interrupted)
1459
			break;
C
Chao Yu 已提交
1460
	}
1461

1462 1463 1464
	if (!issued && io_interrupted)
		issued = -1;

1465 1466 1467
	return issued;
}

1468
static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1469 1470 1471 1472 1473
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *pend_list;
	struct discard_cmd *dc, *tmp;
	int i;
1474
	bool dropped = false;
1475 1476 1477 1478 1479 1480 1481

	mutex_lock(&dcc->cmd_lock);
	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
		pend_list = &dcc->pend_list[i];
		list_for_each_entry_safe(dc, tmp, pend_list, list) {
			f2fs_bug_on(sbi, dc->state != D_PREP);
			__remove_discard_cmd(sbi, dc);
1482
			dropped = true;
1483 1484 1485
		}
	}
	mutex_unlock(&dcc->cmd_lock);
1486 1487

	return dropped;
C
Chao Yu 已提交
1488 1489
}

C
Chao Yu 已提交
1490
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1491 1492 1493 1494
{
	__drop_discard_cmd(sbi);
}

1495
static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1496 1497 1498
							struct discard_cmd *dc)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1499
	unsigned int len = 0;
C
Chao Yu 已提交
1500 1501 1502 1503 1504

	wait_for_completion_io(&dc->wait);
	mutex_lock(&dcc->cmd_lock);
	f2fs_bug_on(sbi, dc->state != D_DONE);
	dc->ref--;
1505 1506 1507
	if (!dc->ref) {
		if (!dc->error)
			len = dc->len;
C
Chao Yu 已提交
1508
		__remove_discard_cmd(sbi, dc);
1509
	}
C
Chao Yu 已提交
1510
	mutex_unlock(&dcc->cmd_lock);
1511 1512

	return len;
C
Chao Yu 已提交
1513 1514
}

1515
static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1516 1517
						struct discard_policy *dpolicy,
						block_t start, block_t end)
C
Chao Yu 已提交
1518 1519
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1520 1521
	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
					&(dcc->fstrim_list) : &(dcc->wait_list);
C
Chao Yu 已提交
1522
	struct discard_cmd *dc, *tmp;
1523
	bool need_wait;
1524
	unsigned int trimmed = 0;
1525 1526 1527

next:
	need_wait = false;
C
Chao Yu 已提交
1528 1529 1530

	mutex_lock(&dcc->cmd_lock);
	list_for_each_entry_safe(dc, tmp, wait_list, list) {
1531 1532
		if (dc->lstart + dc->len <= start || end <= dc->lstart)
			continue;
C
Chao Yu 已提交
1533
		if (dc->len < dpolicy->granularity)
1534
			continue;
C
Chao Yu 已提交
1535
		if (dc->state == D_DONE && !dc->ref) {
C
Chao Yu 已提交
1536
			wait_for_completion_io(&dc->wait);
1537 1538
			if (!dc->error)
				trimmed += dc->len;
C
Chao Yu 已提交
1539
			__remove_discard_cmd(sbi, dc);
1540 1541 1542 1543
		} else {
			dc->ref++;
			need_wait = true;
			break;
C
Chao Yu 已提交
1544 1545 1546
		}
	}
	mutex_unlock(&dcc->cmd_lock);
1547 1548

	if (need_wait) {
1549
		trimmed += __wait_one_discard_bio(sbi, dc);
1550 1551
		goto next;
	}
1552 1553

	return trimmed;
C
Chao Yu 已提交
1554 1555
}

1556
static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1557
						struct discard_policy *dpolicy)
1558
{
1559
	struct discard_policy dp;
1560
	unsigned int discard_blks;
1561

1562 1563
	if (dpolicy)
		return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1564 1565

	/* wait all */
1566
	__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1567
	discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1568
	__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1569 1570 1571
	discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);

	return discard_blks;
1572 1573
}

1574
/* This should be covered by global mutex, &sit_i->sentry_lock */
W
Wei Yongjun 已提交
1575
static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1576 1577 1578
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *dc;
1579
	bool need_wait = false;
1580 1581

	mutex_lock(&dcc->cmd_lock);
C
Chao Yu 已提交
1582 1583
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
							NULL, blkaddr);
1584
	if (dc) {
1585 1586 1587 1588 1589 1590
		if (dc->state == D_PREP) {
			__punch_discard_cmd(sbi, dc, blkaddr);
		} else {
			dc->ref++;
			need_wait = true;
		}
C
Chao Yu 已提交
1591
	}
C
Chao Yu 已提交
1592
	mutex_unlock(&dcc->cmd_lock);
1593

C
Chao Yu 已提交
1594 1595
	if (need_wait)
		__wait_one_discard_bio(sbi, dc);
C
Chao Yu 已提交
1596 1597
}

C
Chao Yu 已提交
1598
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1599 1600 1601 1602 1603 1604 1605 1606
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;

	if (dcc && dcc->f2fs_issue_discard) {
		struct task_struct *discard_thread = dcc->f2fs_issue_discard;

		dcc->f2fs_issue_discard = NULL;
		kthread_stop(discard_thread);
1607
	}
C
Chao Yu 已提交
1608 1609
}

1610
/* This comes from f2fs_put_super */
1611
bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
1612 1613
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1614
	struct discard_policy dpolicy;
1615
	bool dropped;
1616

1617 1618
	__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
					dcc->discard_granularity);
C
Chao Yu 已提交
1619
	__issue_discard_cmd(sbi, &dpolicy);
1620 1621
	dropped = __drop_discard_cmd(sbi);

1622 1623
	/* just to make sure there is no pending discard commands */
	__wait_all_discard_cmd(sbi, NULL);
1624 1625

	f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1626
	return dropped;
1627 1628
}

1629 1630 1631 1632 1633
static int issue_discard_thread(void *data)
{
	struct f2fs_sb_info *sbi = data;
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	wait_queue_head_t *q = &dcc->discard_wait_queue;
C
Chao Yu 已提交
1634
	struct discard_policy dpolicy;
1635 1636
	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
	int issued;
1637

1638
	set_freezable();
1639

1640
	do {
1641
		__init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
C
Chao Yu 已提交
1642 1643
					dcc->discard_granularity);

1644 1645 1646 1647
		wait_event_interruptible_timeout(*q,
				kthread_should_stop() || freezing(current) ||
				dcc->discard_wake,
				msecs_to_jiffies(wait_ms));
S
Sheng Yong 已提交
1648 1649 1650 1651

		if (dcc->discard_wake)
			dcc->discard_wake = 0;

1652 1653
		if (try_to_freeze())
			continue;
1654 1655
		if (f2fs_readonly(sbi->sb))
			continue;
1656 1657
		if (kthread_should_stop())
			return 0;
1658 1659 1660 1661
		if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
			wait_ms = dpolicy.max_interval;
			continue;
		}
1662

1663
		if (sbi->gc_mode == GC_URGENT)
1664
			__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1665

1666 1667
		sb_start_intwrite(sbi->sb);

C
Chao Yu 已提交
1668
		issued = __issue_discard_cmd(sbi, &dpolicy);
1669
		if (issued > 0) {
C
Chao Yu 已提交
1670 1671
			__wait_all_discard_cmd(sbi, &dpolicy);
			wait_ms = dpolicy.min_interval;
1672
		} else if (issued == -1){
1673 1674
			wait_ms = f2fs_time_to_wait(sbi, DISCARD_TIME);
			if (!wait_ms)
1675
				wait_ms = dpolicy.mid_interval;
1676
		} else {
C
Chao Yu 已提交
1677
			wait_ms = dpolicy.max_interval;
1678
		}
1679

1680
		sb_end_intwrite(sbi->sb);
1681 1682 1683

	} while (!kthread_should_stop());
	return 0;
1684 1685
}

1686
#ifdef CONFIG_BLK_DEV_ZONED
J
Jaegeuk Kim 已提交
1687 1688
static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
1689
{
1690
	sector_t sector, nr_sects;
1691
	block_t lblkstart = blkstart;
J
Jaegeuk Kim 已提交
1692 1693 1694 1695 1696 1697
	int devi = 0;

	if (sbi->s_ndevs) {
		devi = f2fs_target_device_index(sbi, blkstart);
		blkstart -= FDEV(devi).start_blk;
	}
1698 1699 1700 1701 1702 1703

	/*
	 * We need to know the type of the zone: for conventional zones,
	 * use regular discard if the drive supports it. For sequential
	 * zones, reset the zone write pointer.
	 */
J
Jaegeuk Kim 已提交
1704
	switch (get_blkz_type(sbi, bdev, blkstart)) {
1705 1706 1707 1708

	case BLK_ZONE_TYPE_CONVENTIONAL:
		if (!blk_queue_discard(bdev_get_queue(bdev)))
			return 0;
1709
		return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1710 1711
	case BLK_ZONE_TYPE_SEQWRITE_REQ:
	case BLK_ZONE_TYPE_SEQWRITE_PREF:
1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722
		sector = SECTOR_FROM_BLOCK(blkstart);
		nr_sects = SECTOR_FROM_BLOCK(blklen);

		if (sector & (bdev_zone_sectors(bdev) - 1) ||
				nr_sects != bdev_zone_sectors(bdev)) {
			f2fs_msg(sbi->sb, KERN_INFO,
				"(%d) %s: Unaligned discard attempted (block %x + %x)",
				devi, sbi->s_ndevs ? FDEV(devi).path: "",
				blkstart, blklen);
			return -EIO;
		}
1723
		trace_f2fs_issue_reset_zone(bdev, blkstart);
1724 1725 1726 1727 1728 1729 1730 1731 1732
		return blkdev_reset_zones(bdev, sector,
					  nr_sects, GFP_NOFS);
	default:
		/* Unknown zone type: broken device ? */
		return -EIO;
	}
}
#endif

J
Jaegeuk Kim 已提交
1733 1734 1735 1736
static int __issue_discard_async(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
1737
	if (f2fs_sb_has_blkzoned(sbi->sb) &&
J
Jaegeuk Kim 已提交
1738 1739 1740
				bdev_zoned_model(bdev) != BLK_ZONED_NONE)
		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
1741
	return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
J
Jaegeuk Kim 已提交
1742 1743
}

1744
static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1745 1746
				block_t blkstart, block_t blklen)
{
J
Jaegeuk Kim 已提交
1747 1748
	sector_t start = blkstart, len = 0;
	struct block_device *bdev;
1749 1750 1751
	struct seg_entry *se;
	unsigned int offset;
	block_t i;
J
Jaegeuk Kim 已提交
1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770
	int err = 0;

	bdev = f2fs_target_device(sbi, blkstart, NULL);

	for (i = blkstart; i < blkstart + blklen; i++, len++) {
		if (i != start) {
			struct block_device *bdev2 =
				f2fs_target_device(sbi, i, NULL);

			if (bdev2 != bdev) {
				err = __issue_discard_async(sbi, bdev,
						start, len);
				if (err)
					return err;
				bdev = bdev2;
				start = i;
				len = 0;
			}
		}
1771 1772 1773 1774 1775 1776 1777

		se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
		offset = GET_BLKOFF_FROM_SEG0(sbi, i);

		if (!f2fs_test_and_set_bit(offset, se->discard_map))
			sbi->discard_blks--;
	}
1778

J
Jaegeuk Kim 已提交
1779 1780 1781
	if (len)
		err = __issue_discard_async(sbi, bdev, start, len);
	return err;
1782 1783
}

1784 1785
static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
							bool check_only)
1786
{
1787 1788
	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
	int max_blocks = sbi->blocks_per_seg;
1789
	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1790 1791
	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1792
	unsigned long *discard_map = (unsigned long *)se->discard_map;
J
Jaegeuk Kim 已提交
1793
	unsigned long *dmap = SIT_I(sbi)->tmp_map;
1794
	unsigned int start = 0, end = -1;
1795
	bool force = (cpc->reason & CP_DISCARD);
C
Chao Yu 已提交
1796
	struct discard_entry *de = NULL;
1797
	struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1798 1799
	int i;

1800
	if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1801
		return false;
1802

1803
	if (!force) {
1804
		if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1805 1806
			SM_I(sbi)->dcc_info->nr_discards >=
				SM_I(sbi)->dcc_info->max_discards)
1807
			return false;
1808 1809
	}

1810 1811
	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
	for (i = 0; i < entries; i++)
1812
		dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1813
				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1814

1815 1816
	while (force || SM_I(sbi)->dcc_info->nr_discards <=
				SM_I(sbi)->dcc_info->max_discards) {
1817 1818 1819 1820 1821
		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
		if (start >= max_blocks)
			break;

		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1822 1823 1824 1825
		if (force && start && end != max_blocks
					&& (end - start) < cpc->trim_minlen)
			continue;

1826 1827 1828
		if (check_only)
			return true;

C
Chao Yu 已提交
1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839
		if (!de) {
			de = f2fs_kmem_cache_alloc(discard_entry_slab,
								GFP_F2FS_ZERO);
			de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
			list_add_tail(&de->list, head);
		}

		for (i = start; i < end; i++)
			__set_bit_le(i, (void *)de->discard_map);

		SM_I(sbi)->dcc_info->nr_discards += end - start;
1840
	}
1841
	return false;
1842 1843
}

1844 1845 1846 1847 1848 1849
static void release_discard_addr(struct discard_entry *entry)
{
	list_del(&entry->list);
	kmem_cache_free(discard_entry_slab, entry);
}

C
Chao Yu 已提交
1850
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1851
{
1852
	struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1853 1854 1855
	struct discard_entry *entry, *this;

	/* drop caches */
1856 1857
	list_for_each_entry_safe(entry, this, head, list)
		release_discard_addr(entry);
1858 1859
}

J
Jaegeuk Kim 已提交
1860
/*
C
Chao Yu 已提交
1861
 * Should call f2fs_clear_prefree_segments after checkpoint is done.
J
Jaegeuk Kim 已提交
1862 1863 1864 1865
 */
static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1866
	unsigned int segno;
J
Jaegeuk Kim 已提交
1867 1868

	mutex_lock(&dirty_i->seglist_lock);
1869
	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
1870 1871 1872 1873
		__set_test_and_free(sbi, segno);
	mutex_unlock(&dirty_i->seglist_lock);
}

C
Chao Yu 已提交
1874 1875
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
						struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1876
{
1877 1878
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *head = &dcc->entry_list;
1879
	struct discard_entry *entry, *this;
J
Jaegeuk Kim 已提交
1880
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1881 1882
	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
	unsigned int start = 0, end = -1;
1883
	unsigned int secno, start_segno;
1884
	bool force = (cpc->reason & CP_DISCARD);
1885
	bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
J
Jaegeuk Kim 已提交
1886 1887

	mutex_lock(&dirty_i->seglist_lock);
1888

J
Jaegeuk Kim 已提交
1889
	while (1) {
1890
		int i;
1891 1892 1893

		if (need_align && end != -1)
			end--;
1894 1895
		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
		if (start >= MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
1896
			break;
1897 1898
		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
								start + 1);
1899

1900 1901 1902 1903
		if (need_align) {
			start = rounddown(start, sbi->segs_per_sec);
			end = roundup(end, sbi->segs_per_sec);
		}
1904

1905 1906 1907 1908
		for (i = start; i < end; i++) {
			if (test_and_clear_bit(i, prefree_map))
				dirty_i->nr_dirty[PRE]--;
		}
1909

1910
		if (!f2fs_realtime_discard_enable(sbi))
1911
			continue;
J
Jaegeuk Kim 已提交
1912

1913 1914 1915 1916
		if (force && start >= cpc->trim_start &&
					(end - 1) <= cpc->trim_end)
				continue;

1917 1918
		if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1919
				(end - start) << sbi->log_blocks_per_seg);
1920 1921 1922
			continue;
		}
next:
1923 1924
		secno = GET_SEC_FROM_SEG(sbi, start);
		start_segno = GET_SEG_FROM_SEC(sbi, secno);
1925
		if (!IS_CURSEC(sbi, secno) &&
1926
			!get_valid_blocks(sbi, start, true))
1927 1928 1929 1930 1931 1932
			f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
				sbi->segs_per_sec << sbi->log_blocks_per_seg);

		start = start_segno + sbi->segs_per_sec;
		if (start < end)
			goto next;
1933 1934
		else
			end = start - 1;
J
Jaegeuk Kim 已提交
1935 1936
	}
	mutex_unlock(&dirty_i->seglist_lock);
1937 1938

	/* send small discards */
1939
	list_for_each_entry_safe(entry, this, head, list) {
C
Chao Yu 已提交
1940 1941 1942 1943 1944 1945 1946 1947 1948
		unsigned int cur_pos = 0, next_pos, len, total_len = 0;
		bool is_valid = test_bit_le(0, entry->discard_map);

find_next:
		if (is_valid) {
			next_pos = find_next_zero_bit_le(entry->discard_map,
					sbi->blocks_per_seg, cur_pos);
			len = next_pos - cur_pos;

1949
			if (f2fs_sb_has_blkzoned(sbi->sb) ||
1950
			    (force && len < cpc->trim_minlen))
C
Chao Yu 已提交
1951 1952 1953 1954 1955 1956 1957 1958 1959
				goto skip;

			f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
									len);
			total_len += len;
		} else {
			next_pos = find_next_bit_le(entry->discard_map,
					sbi->blocks_per_seg, cur_pos);
		}
1960
skip:
C
Chao Yu 已提交
1961 1962 1963 1964 1965 1966
		cur_pos = next_pos;
		is_valid = !is_valid;

		if (cur_pos < sbi->blocks_per_seg)
			goto find_next;

1967
		release_discard_addr(entry);
1968
		dcc->nr_discards -= total_len;
1969
	}
C
Chao Yu 已提交
1970

1971
	wake_up_discard_thread(sbi, false);
J
Jaegeuk Kim 已提交
1972 1973
}

1974
static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1975
{
1976
	dev_t dev = sbi->sb->s_bdev->bd_dev;
1977
	struct discard_cmd_control *dcc;
C
Chao Yu 已提交
1978
	int err = 0, i;
1979 1980 1981 1982 1983 1984

	if (SM_I(sbi)->dcc_info) {
		dcc = SM_I(sbi)->dcc_info;
		goto init_thread;
	}

C
Chao Yu 已提交
1985
	dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
1986 1987 1988
	if (!dcc)
		return -ENOMEM;

1989
	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
1990
	INIT_LIST_HEAD(&dcc->entry_list);
C
Chao Yu 已提交
1991
	for (i = 0; i < MAX_PLIST_NUM; i++)
C
Chao Yu 已提交
1992
		INIT_LIST_HEAD(&dcc->pend_list[i]);
1993
	INIT_LIST_HEAD(&dcc->wait_list);
1994
	INIT_LIST_HEAD(&dcc->fstrim_list);
1995
	mutex_init(&dcc->cmd_lock);
C
Chao Yu 已提交
1996 1997
	atomic_set(&dcc->issued_discard, 0);
	atomic_set(&dcc->issing_discard, 0);
C
Chao Yu 已提交
1998
	atomic_set(&dcc->discard_cmd_cnt, 0);
1999
	dcc->nr_discards = 0;
C
Chao Yu 已提交
2000
	dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
C
Chao Yu 已提交
2001
	dcc->undiscard_blks = 0;
C
Chao Yu 已提交
2002
	dcc->next_pos = 0;
C
Chao Yu 已提交
2003
	dcc->root = RB_ROOT_CACHED;
2004
	dcc->rbtree_check = false;
2005

2006
	init_waitqueue_head(&dcc->discard_wait_queue);
2007 2008
	SM_I(sbi)->dcc_info = dcc;
init_thread:
2009 2010 2011 2012 2013 2014 2015 2016 2017
	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
	if (IS_ERR(dcc->f2fs_issue_discard)) {
		err = PTR_ERR(dcc->f2fs_issue_discard);
		kfree(dcc);
		SM_I(sbi)->dcc_info = NULL;
		return err;
	}

2018 2019 2020
	return err;
}

2021
static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
2022 2023 2024
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;

2025 2026 2027
	if (!dcc)
		return;

C
Chao Yu 已提交
2028
	f2fs_stop_discard_thread(sbi);
2029 2030 2031

	kfree(dcc);
	SM_I(sbi)->dcc_info = NULL;
2032 2033
}

2034
static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
2035 2036
{
	struct sit_info *sit_i = SIT_I(sbi);
2037 2038

	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
J
Jaegeuk Kim 已提交
2039
		sit_i->dirty_sentries++;
2040 2041 2042 2043
		return false;
	}

	return true;
J
Jaegeuk Kim 已提交
2044 2045 2046 2047 2048 2049 2050 2051 2052 2053 2054 2055 2056 2057 2058 2059
}

static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
					unsigned int segno, int modified)
{
	struct seg_entry *se = get_seg_entry(sbi, segno);
	se->type = type;
	if (modified)
		__mark_sit_entry_dirty(sbi, segno);
}

static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
{
	struct seg_entry *se;
	unsigned int segno, offset;
	long int new_vblocks;
2060 2061 2062 2063
	bool exist;
#ifdef CONFIG_F2FS_CHECK_FS
	bool mir_exist;
#endif
J
Jaegeuk Kim 已提交
2064 2065 2066 2067 2068

	segno = GET_SEGNO(sbi, blkaddr);

	se = get_seg_entry(sbi, segno);
	new_vblocks = se->valid_blocks + del;
J
Jaegeuk Kim 已提交
2069
	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
2070

2071
	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
J
Jaegeuk Kim 已提交
2072 2073 2074
				(new_vblocks > sbi->blocks_per_seg)));

	se->valid_blocks = new_vblocks;
C
Chao Yu 已提交
2075 2076 2077
	se->mtime = get_mtime(sbi, false);
	if (se->mtime > SIT_I(sbi)->max_mtime)
		SIT_I(sbi)->max_mtime = se->mtime;
J
Jaegeuk Kim 已提交
2078 2079 2080

	/* Update valid block bitmap */
	if (del > 0) {
2081
		exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
C
Chao Yu 已提交
2082
#ifdef CONFIG_F2FS_CHECK_FS
2083 2084 2085 2086 2087 2088
		mir_exist = f2fs_test_and_set_bit(offset,
						se->cur_valid_map_mir);
		if (unlikely(exist != mir_exist)) {
			f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
				"when setting bitmap, blk:%u, old bit:%d",
				blkaddr, exist);
2089
			f2fs_bug_on(sbi, 1);
2090
		}
C
Chao Yu 已提交
2091
#endif
2092 2093 2094 2095
		if (unlikely(exist)) {
			f2fs_msg(sbi->sb, KERN_ERR,
				"Bitmap was wrongly set, blk:%u", blkaddr);
			f2fs_bug_on(sbi, 1);
2096 2097
			se->valid_blocks--;
			del = 0;
C
Chao Yu 已提交
2098
		}
2099

2100
		if (!f2fs_test_and_set_bit(offset, se->discard_map))
2101
			sbi->discard_blks--;
2102 2103

		/* don't overwrite by SSR to keep node chain */
D
Daniel Rosenberg 已提交
2104 2105
		if (IS_NODESEG(se->type) &&
				!is_sbi_flag_set(sbi, SBI_CP_DISABLED)) {
2106 2107 2108
			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
				se->ckpt_valid_blocks++;
		}
J
Jaegeuk Kim 已提交
2109
	} else {
2110
		exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
C
Chao Yu 已提交
2111
#ifdef CONFIG_F2FS_CHECK_FS
2112 2113 2114 2115 2116 2117
		mir_exist = f2fs_test_and_clear_bit(offset,
						se->cur_valid_map_mir);
		if (unlikely(exist != mir_exist)) {
			f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
				"when clearing bitmap, blk:%u, old bit:%d",
				blkaddr, exist);
2118
			f2fs_bug_on(sbi, 1);
2119
		}
C
Chao Yu 已提交
2120
#endif
2121 2122 2123 2124
		if (unlikely(!exist)) {
			f2fs_msg(sbi->sb, KERN_ERR,
				"Bitmap was wrongly cleared, blk:%u", blkaddr);
			f2fs_bug_on(sbi, 1);
2125 2126
			se->valid_blocks++;
			del = 0;
D
Daniel Rosenberg 已提交
2127 2128 2129 2130 2131 2132 2133 2134 2135
		} else if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
			/*
			 * If checkpoints are off, we must not reuse data that
			 * was used in the previous checkpoint. If it was used
			 * before, we must track that to know how much space we
			 * really have.
			 */
			if (f2fs_test_bit(offset, se->ckpt_valid_map))
				sbi->unusable_block_count++;
C
Chao Yu 已提交
2136
		}
2137

2138
		if (f2fs_test_and_clear_bit(offset, se->discard_map))
2139
			sbi->discard_blks++;
J
Jaegeuk Kim 已提交
2140 2141 2142 2143 2144 2145 2146 2147 2148 2149 2150 2151 2152
	}
	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
		se->ckpt_valid_blocks += del;

	__mark_sit_entry_dirty(sbi, segno);

	/* update total number of valid blocks to be written in ckpt area */
	SIT_I(sbi)->written_valid_blocks += del;

	if (sbi->segs_per_sec > 1)
		get_sec_entry(sbi, segno)->valid_blocks += del;
}

C
Chao Yu 已提交
2153
void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
J
Jaegeuk Kim 已提交
2154 2155 2156 2157
{
	unsigned int segno = GET_SEGNO(sbi, addr);
	struct sit_info *sit_i = SIT_I(sbi);

2158
	f2fs_bug_on(sbi, addr == NULL_ADDR);
J
Jaegeuk Kim 已提交
2159 2160 2161
	if (addr == NEW_ADDR)
		return;

2162 2163
	invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);

J
Jaegeuk Kim 已提交
2164
	/* add it into sit main buffer */
2165
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2166 2167 2168 2169 2170 2171

	update_sit_entry(sbi, addr, -1);

	/* add it into dirty seglist */
	locate_dirty_segment(sbi, segno);

2172
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2173 2174
}

C
Chao Yu 已提交
2175
bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2176 2177 2178 2179 2180 2181
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int segno, offset;
	struct seg_entry *se;
	bool is_cp = false;

2182
	if (!is_valid_data_blkaddr(sbi, blkaddr))
2183 2184
		return true;

2185
	down_read(&sit_i->sentry_lock);
2186 2187 2188 2189 2190 2191 2192 2193

	segno = GET_SEGNO(sbi, blkaddr);
	se = get_seg_entry(sbi, segno);
	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);

	if (f2fs_test_bit(offset, se->ckpt_valid_map))
		is_cp = true;

2194
	up_read(&sit_i->sentry_lock);
2195 2196 2197 2198

	return is_cp;
}

J
Jaegeuk Kim 已提交
2199
/*
J
Jaegeuk Kim 已提交
2200 2201 2202
 * This function should be resided under the curseg_mutex lock
 */
static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2203
					struct f2fs_summary *sum)
J
Jaegeuk Kim 已提交
2204 2205 2206
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	void *addr = curseg->sum_blk;
2207
	addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
J
Jaegeuk Kim 已提交
2208 2209 2210
	memcpy(addr, sum, sizeof(struct f2fs_summary));
}

J
Jaegeuk Kim 已提交
2211
/*
J
Jaegeuk Kim 已提交
2212 2213
 * Calculate the number of current summary pages for writing
 */
C
Chao Yu 已提交
2214
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
J
Jaegeuk Kim 已提交
2215 2216
{
	int valid_sum_count = 0;
2217
	int i, sum_in_page;
J
Jaegeuk Kim 已提交
2218 2219 2220 2221

	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		if (sbi->ckpt->alloc_type[i] == SSR)
			valid_sum_count += sbi->blocks_per_seg;
2222 2223 2224 2225 2226 2227 2228
		else {
			if (for_ra)
				valid_sum_count += le16_to_cpu(
					F2FS_CKPT(sbi)->cur_data_blkoff[i]);
			else
				valid_sum_count += curseg_blkoff(sbi, i);
		}
J
Jaegeuk Kim 已提交
2229 2230
	}

2231
	sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2232 2233
			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
	if (valid_sum_count <= sum_in_page)
J
Jaegeuk Kim 已提交
2234
		return 1;
2235
	else if ((valid_sum_count - sum_in_page) <=
2236
		(PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
J
Jaegeuk Kim 已提交
2237 2238 2239 2240
		return 2;
	return 3;
}

J
Jaegeuk Kim 已提交
2241
/*
J
Jaegeuk Kim 已提交
2242 2243
 * Caller should put this summary page
 */
C
Chao Yu 已提交
2244
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
2245
{
2246
	return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
J
Jaegeuk Kim 已提交
2247 2248
}

C
Chao Yu 已提交
2249 2250
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
					void *src, block_t blk_addr)
J
Jaegeuk Kim 已提交
2251
{
C
Chao Yu 已提交
2252
	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
C
Chao Yu 已提交
2253

2254
	memcpy(page_address(page), src, PAGE_SIZE);
J
Jaegeuk Kim 已提交
2255 2256 2257 2258
	set_page_dirty(page);
	f2fs_put_page(page, 1);
}

C
Chao Yu 已提交
2259 2260 2261
static void write_sum_page(struct f2fs_sb_info *sbi,
			struct f2fs_summary_block *sum_blk, block_t blk_addr)
{
C
Chao Yu 已提交
2262
	f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
C
Chao Yu 已提交
2263 2264
}

2265 2266 2267 2268
static void write_current_sum_page(struct f2fs_sb_info *sbi,
						int type, block_t blk_addr)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
C
Chao Yu 已提交
2269
	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2270 2271 2272 2273
	struct f2fs_summary_block *src = curseg->sum_blk;
	struct f2fs_summary_block *dst;

	dst = (struct f2fs_summary_block *)page_address(page);
2274
	memset(dst, 0, PAGE_SIZE);
2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290

	mutex_lock(&curseg->curseg_mutex);

	down_read(&curseg->journal_rwsem);
	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
	up_read(&curseg->journal_rwsem);

	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);

	mutex_unlock(&curseg->curseg_mutex);

	set_page_dirty(page);
	f2fs_put_page(page, 1);
}

J
Jaegeuk Kim 已提交
2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301
static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int segno = curseg->segno + 1;
	struct free_segmap_info *free_i = FREE_I(sbi);

	if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
		return !test_bit(segno, free_i->free_segmap);
	return 0;
}

J
Jaegeuk Kim 已提交
2302
/*
J
Jaegeuk Kim 已提交
2303 2304 2305 2306 2307 2308 2309 2310
 * Find a new segment from the free segments bitmap to right order
 * This function should be returned with success, otherwise BUG
 */
static void get_new_segment(struct f2fs_sb_info *sbi,
			unsigned int *newseg, bool new_sec, int dir)
{
	struct free_segmap_info *free_i = FREE_I(sbi);
	unsigned int segno, secno, zoneno;
2311
	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2312 2313
	unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
	unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
J
Jaegeuk Kim 已提交
2314 2315 2316 2317 2318
	unsigned int left_start = hint;
	bool init = true;
	int go_left = 0;
	int i;

2319
	spin_lock(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
2320 2321 2322

	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
		segno = find_next_zero_bit(free_i->free_segmap,
2323 2324
			GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
		if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
J
Jaegeuk Kim 已提交
2325 2326 2327
			goto got_it;
	}
find_other_zone:
2328 2329
	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
	if (secno >= MAIN_SECS(sbi)) {
J
Jaegeuk Kim 已提交
2330 2331
		if (dir == ALLOC_RIGHT) {
			secno = find_next_zero_bit(free_i->free_secmap,
2332 2333
							MAIN_SECS(sbi), 0);
			f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347
		} else {
			go_left = 1;
			left_start = hint - 1;
		}
	}
	if (go_left == 0)
		goto skip_left;

	while (test_bit(left_start, free_i->free_secmap)) {
		if (left_start > 0) {
			left_start--;
			continue;
		}
		left_start = find_next_zero_bit(free_i->free_secmap,
2348 2349
							MAIN_SECS(sbi), 0);
		f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
2350 2351 2352 2353
		break;
	}
	secno = left_start;
skip_left:
2354 2355
	segno = GET_SEG_FROM_SEC(sbi, secno);
	zoneno = GET_ZONE_FROM_SEC(sbi, secno);
J
Jaegeuk Kim 已提交
2356 2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386

	/* give up on finding another zone */
	if (!init)
		goto got_it;
	if (sbi->secs_per_zone == 1)
		goto got_it;
	if (zoneno == old_zoneno)
		goto got_it;
	if (dir == ALLOC_LEFT) {
		if (!go_left && zoneno + 1 >= total_zones)
			goto got_it;
		if (go_left && zoneno == 0)
			goto got_it;
	}
	for (i = 0; i < NR_CURSEG_TYPE; i++)
		if (CURSEG_I(sbi, i)->zone == zoneno)
			break;

	if (i < NR_CURSEG_TYPE) {
		/* zone is in user, try another */
		if (go_left)
			hint = zoneno * sbi->secs_per_zone - 1;
		else if (zoneno + 1 >= total_zones)
			hint = 0;
		else
			hint = (zoneno + 1) * sbi->secs_per_zone;
		init = false;
		goto find_other_zone;
	}
got_it:
	/* set it as dirty segment in free segmap */
2387
	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
J
Jaegeuk Kim 已提交
2388 2389
	__set_inuse(sbi, segno);
	*newseg = segno;
2390
	spin_unlock(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
2391 2392 2393 2394 2395 2396 2397 2398
}

static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	struct summary_footer *sum_footer;

	curseg->segno = curseg->next_segno;
2399
	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
J
Jaegeuk Kim 已提交
2400 2401 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411
	curseg->next_blkoff = 0;
	curseg->next_segno = NULL_SEGNO;

	sum_footer = &(curseg->sum_blk->footer);
	memset(sum_footer, 0, sizeof(struct summary_footer));
	if (IS_DATASEG(type))
		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
	if (IS_NODESEG(type))
		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
	__set_sit_entry_type(sbi, type, curseg->segno, modified);
}

2412 2413
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
J
Jaegeuk Kim 已提交
2414 2415 2416 2417
	/* if segs_per_sec is large than 1, we need to keep original policy. */
	if (sbi->segs_per_sec != 1)
		return CURSEG_I(sbi, type)->segno;

D
Daniel Rosenberg 已提交
2418 2419 2420
	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
		return 0;

2421 2422
	if (test_opt(sbi, NOHEAP) &&
		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
2423 2424
		return 0;

2425 2426
	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2427 2428

	/* find segments from 0 to reuse freed segments */
2429
	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2430 2431
		return 0;

2432 2433 2434
	return CURSEG_I(sbi, type)->segno;
}

J
Jaegeuk Kim 已提交
2435
/*
J
Jaegeuk Kim 已提交
2436 2437 2438 2439 2440 2441 2442 2443 2444 2445
 * Allocate a current working segment.
 * This function always allocates a free segment in LFS manner.
 */
static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int segno = curseg->segno;
	int dir = ALLOC_LEFT;

	write_sum_page(sbi, curseg->sum_blk,
2446
				GET_SUM_BLOCK(sbi, segno));
J
Jaegeuk Kim 已提交
2447 2448 2449 2450 2451 2452
	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
		dir = ALLOC_RIGHT;

	if (test_opt(sbi, NOHEAP))
		dir = ALLOC_RIGHT;

2453
	segno = __get_next_segno(sbi, type);
J
Jaegeuk Kim 已提交
2454 2455 2456 2457 2458 2459 2460 2461 2462 2463
	get_new_segment(sbi, &segno, new_sec, dir);
	curseg->next_segno = segno;
	reset_curseg(sbi, type, 1);
	curseg->alloc_type = LFS;
}

static void __next_free_blkoff(struct f2fs_sb_info *sbi,
			struct curseg_info *seg, block_t start)
{
	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2464
	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
J
Jaegeuk Kim 已提交
2465
	unsigned long *target_map = SIT_I(sbi)->tmp_map;
2466 2467 2468 2469 2470 2471 2472 2473 2474 2475
	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
	int i, pos;

	for (i = 0; i < entries; i++)
		target_map[i] = ckpt_map[i] | cur_map[i];

	pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);

	seg->next_blkoff = pos;
J
Jaegeuk Kim 已提交
2476 2477
}

J
Jaegeuk Kim 已提交
2478
/*
J
Jaegeuk Kim 已提交
2479 2480 2481 2482 2483 2484 2485 2486 2487 2488 2489 2490 2491
 * If a segment is written by LFS manner, next block offset is just obtained
 * by increasing the current block offset. However, if a segment is written by
 * SSR manner, next block offset obtained by calling __next_free_blkoff
 */
static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
				struct curseg_info *seg)
{
	if (seg->alloc_type == SSR)
		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
	else
		seg->next_blkoff++;
}

J
Jaegeuk Kim 已提交
2492
/*
A
arter97 已提交
2493
 * This function always allocates a used segment(from dirty seglist) by SSR
J
Jaegeuk Kim 已提交
2494 2495
 * manner, so it should recover the existing segment information of valid blocks
 */
2496
static void change_curseg(struct f2fs_sb_info *sbi, int type)
J
Jaegeuk Kim 已提交
2497 2498 2499 2500 2501 2502 2503 2504 2505 2506 2507 2508 2509 2510 2511 2512 2513 2514 2515 2516
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int new_segno = curseg->next_segno;
	struct f2fs_summary_block *sum_node;
	struct page *sum_page;

	write_sum_page(sbi, curseg->sum_blk,
				GET_SUM_BLOCK(sbi, curseg->segno));
	__set_test_and_inuse(sbi, new_segno);

	mutex_lock(&dirty_i->seglist_lock);
	__remove_dirty_segment(sbi, new_segno, PRE);
	__remove_dirty_segment(sbi, new_segno, DIRTY);
	mutex_unlock(&dirty_i->seglist_lock);

	reset_curseg(sbi, type, 1);
	curseg->alloc_type = SSR;
	__next_free_blkoff(sbi, curseg, 0);

C
Chao Yu 已提交
2517
	sum_page = f2fs_get_sum_page(sbi, new_segno);
2518
	f2fs_bug_on(sbi, IS_ERR(sum_page));
2519 2520 2521
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
	f2fs_put_page(sum_page, 1);
J
Jaegeuk Kim 已提交
2522 2523
}

2524 2525 2526 2527
static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2528
	unsigned segno = NULL_SEGNO;
2529 2530
	int i, cnt;
	bool reversed = false;
2531

C
Chao Yu 已提交
2532
	/* f2fs_need_SSR() already forces to do this */
2533 2534
	if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
		curseg->next_segno = segno;
2535
		return 1;
2536
	}
2537

2538 2539
	/* For node segments, let's do SSR more intensively */
	if (IS_NODESEG(type)) {
2540 2541 2542 2543 2544 2545 2546
		if (type >= CURSEG_WARM_NODE) {
			reversed = true;
			i = CURSEG_COLD_NODE;
		} else {
			i = CURSEG_HOT_NODE;
		}
		cnt = NR_CURSEG_NODE_TYPE;
2547
	} else {
2548 2549 2550 2551 2552 2553 2554
		if (type >= CURSEG_WARM_DATA) {
			reversed = true;
			i = CURSEG_COLD_DATA;
		} else {
			i = CURSEG_HOT_DATA;
		}
		cnt = NR_CURSEG_DATA_TYPE;
2555
	}
2556

2557
	for (; cnt-- > 0; reversed ? i-- : i++) {
2558 2559
		if (i == type)
			continue;
2560 2561
		if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
			curseg->next_segno = segno;
2562
			return 1;
2563
		}
2564
	}
D
Daniel Rosenberg 已提交
2565 2566 2567 2568 2569 2570 2571 2572 2573

	/* find valid_blocks=0 in dirty list */
	if (unlikely(is_sbi_flag_set(sbi, SBI_CP_DISABLED))) {
		segno = get_free_segment(sbi);
		if (segno != NULL_SEGNO) {
			curseg->next_segno = segno;
			return 1;
		}
	}
2574 2575 2576
	return 0;
}

J
Jaegeuk Kim 已提交
2577 2578 2579 2580 2581 2582 2583
/*
 * flush out current segment and replace it with new segment
 * This function should be returned with success, otherwise BUG
 */
static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
						int type, bool force)
{
J
Jaegeuk Kim 已提交
2584 2585
	struct curseg_info *curseg = CURSEG_I(sbi, type);

2586
	if (force)
J
Jaegeuk Kim 已提交
2587
		new_curseg(sbi, type, true);
2588 2589
	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
					type == CURSEG_WARM_NODE)
J
Jaegeuk Kim 已提交
2590
		new_curseg(sbi, type, false);
D
Daniel Rosenberg 已提交
2591 2592
	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type) &&
			likely(!is_sbi_flag_set(sbi, SBI_CP_DISABLED)))
J
Jaegeuk Kim 已提交
2593
		new_curseg(sbi, type, false);
C
Chao Yu 已提交
2594
	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2595
		change_curseg(sbi, type);
J
Jaegeuk Kim 已提交
2596 2597
	else
		new_curseg(sbi, type, false);
2598

J
Jaegeuk Kim 已提交
2599
	stat_inc_seg_type(sbi, curseg);
J
Jaegeuk Kim 已提交
2600 2601
}

C
Chao Yu 已提交
2602
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
2603
{
2604 2605
	struct curseg_info *curseg;
	unsigned int old_segno;
J
Jaegeuk Kim 已提交
2606 2607
	int i;

2608 2609
	down_write(&SIT_I(sbi)->sentry_lock);

2610 2611 2612 2613 2614 2615
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		curseg = CURSEG_I(sbi, i);
		old_segno = curseg->segno;
		SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
		locate_dirty_segment(sbi, old_segno);
	}
2616 2617

	up_write(&SIT_I(sbi)->sentry_lock);
J
Jaegeuk Kim 已提交
2618 2619 2620 2621 2622 2623
}

static const struct segment_allocation default_salloc_ops = {
	.allocate_segment = allocate_segment_by_default,
};

C
Chao Yu 已提交
2624 2625
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
						struct cp_control *cpc)
2626 2627 2628 2629
{
	__u64 trim_start = cpc->trim_start;
	bool has_candidate = false;

2630
	down_write(&SIT_I(sbi)->sentry_lock);
2631 2632 2633 2634 2635 2636
	for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
		if (add_discard_addrs(sbi, cpc, true)) {
			has_candidate = true;
			break;
		}
	}
2637
	up_write(&SIT_I(sbi)->sentry_lock);
2638 2639 2640 2641 2642

	cpc->trim_start = trim_start;
	return has_candidate;
}

2643
static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2644 2645 2646 2647 2648 2649 2650 2651 2652
					struct discard_policy *dpolicy,
					unsigned int start, unsigned int end)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
	struct discard_cmd *dc;
	struct blk_plug plug;
	int issued;
2653
	unsigned int trimmed = 0;
2654 2655 2656 2657 2658

next:
	issued = 0;

	mutex_lock(&dcc->cmd_lock);
2659 2660 2661
	if (unlikely(dcc->rbtree_check))
		f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
								&dcc->root));
2662

C
Chao Yu 已提交
2663
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2664 2665 2666
					NULL, start,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
C
Chao Yu 已提交
2667
					&insert_p, &insert_parent, true, NULL);
2668 2669 2670 2671 2672 2673 2674
	if (!dc)
		dc = next_dc;

	blk_start_plug(&plug);

	while (dc && dc->lstart <= end) {
		struct rb_node *node;
2675
		int err = 0;
2676 2677 2678 2679 2680 2681 2682 2683 2684

		if (dc->len < dpolicy->granularity)
			goto skip;

		if (dc->state != D_PREP) {
			list_move_tail(&dc->list, &dcc->fstrim_list);
			goto skip;
		}

2685
		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2686

2687
		if (issued >= dpolicy->max_requests) {
2688 2689
			start = dc->lstart + dc->len;

2690 2691 2692
			if (err)
				__remove_discard_cmd(sbi, dc);

2693 2694
			blk_finish_plug(&plug);
			mutex_unlock(&dcc->cmd_lock);
2695
			trimmed += __wait_all_discard_cmd(sbi, NULL);
2696 2697 2698 2699 2700
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			goto next;
		}
skip:
		node = rb_next(&dc->rb_node);
2701 2702
		if (err)
			__remove_discard_cmd(sbi, dc);
2703 2704 2705 2706 2707 2708 2709 2710
		dc = rb_entry_safe(node, struct discard_cmd, rb_node);

		if (fatal_signal_pending(current))
			break;
	}

	blk_finish_plug(&plug);
	mutex_unlock(&dcc->cmd_lock);
2711 2712

	return trimmed;
2713 2714
}

2715 2716
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
{
2717 2718
	__u64 start = F2FS_BYTES_TO_BLK(range->start);
	__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
C
Chao Yu 已提交
2719
	unsigned int start_segno, end_segno;
2720
	block_t start_block, end_block;
2721
	struct cp_control cpc;
C
Chao Yu 已提交
2722
	struct discard_policy dpolicy;
2723
	unsigned long long trimmed = 0;
C
Chao Yu 已提交
2724
	int err = 0;
2725
	bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
2726

2727
	if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2728 2729
		return -EINVAL;

2730 2731
	if (end < MAIN_BLKADDR(sbi))
		goto out;
2732

2733 2734 2735
	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
		f2fs_msg(sbi->sb, KERN_WARNING,
			"Found FS corruption, run fsck to fix.");
2736
		return -EIO;
2737 2738
	}

2739
	/* start/end segment number in main_area */
2740 2741 2742
	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
						GET_SEGNO(sbi, end);
2743 2744 2745 2746
	if (need_align) {
		start_segno = rounddown(start_segno, sbi->segs_per_sec);
		end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
	}
2747

2748
	cpc.reason = CP_DISCARD;
2749
	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
C
Chao Yu 已提交
2750 2751
	cpc.trim_start = start_segno;
	cpc.trim_end = end_segno;
2752

C
Chao Yu 已提交
2753 2754
	if (sbi->discard_blks == 0)
		goto out;
2755

C
Chao Yu 已提交
2756
	mutex_lock(&sbi->gc_mutex);
C
Chao Yu 已提交
2757
	err = f2fs_write_checkpoint(sbi, &cpc);
C
Chao Yu 已提交
2758 2759 2760
	mutex_unlock(&sbi->gc_mutex);
	if (err)
		goto out;
2761

2762 2763 2764 2765 2766 2767
	/*
	 * We filed discard candidates, but actually we don't need to wait for
	 * all of them, since they'll be issued in idle time along with runtime
	 * discard option. User configuration looks like using runtime discard
	 * or periodic fstrim instead of it.
	 */
2768
	if (f2fs_realtime_discard_enable(sbi))
2769 2770 2771 2772 2773 2774
		goto out;

	start_block = START_BLOCK(sbi, start_segno);
	end_block = START_BLOCK(sbi, end_segno + 1);

	__init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2775 2776
	trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
					start_block, end_block);
2777

2778
	trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
2779
					start_block, end_block);
C
Chao Yu 已提交
2780
out:
2781 2782
	if (!err)
		range->len = F2FS_BLK_TO_BYTES(trimmed);
C
Chao Yu 已提交
2783
	return err;
2784 2785
}

J
Jaegeuk Kim 已提交
2786 2787 2788 2789 2790 2791 2792 2793
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	if (curseg->next_blkoff < sbi->blocks_per_seg)
		return true;
	return false;
}

C
Chao Yu 已提交
2794
int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
2795 2796 2797 2798 2799 2800 2801 2802 2803 2804 2805
{
	switch (hint) {
	case WRITE_LIFE_SHORT:
		return CURSEG_HOT_DATA;
	case WRITE_LIFE_EXTREME:
		return CURSEG_COLD_DATA;
	default:
		return CURSEG_WARM_DATA;
	}
}

2806 2807 2808 2809 2810 2811 2812 2813 2814 2815 2816 2817 2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832 2833 2834 2835 2836 2837 2838
/* This returns write hints for each segment type. This hints will be
 * passed down to block layer. There are mapping tables which depend on
 * the mount option 'whint_mode'.
 *
 * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
 *
 * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
 *
 * User                  F2FS                     Block
 * ----                  ----                     -----
 *                       META                     WRITE_LIFE_NOT_SET
 *                       HOT_NODE                 "
 *                       WARM_NODE                "
 *                       COLD_NODE                "
 * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
 * extension list        "                        "
 *
 * -- buffered io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        "
 * WRITE_LIFE_MEDIUM     "                        "
 * WRITE_LIFE_LONG       "                        "
 *
 * -- direct io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
 * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
 * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
 *
2839 2840 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864
 * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
 *
 * User                  F2FS                     Block
 * ----                  ----                     -----
 *                       META                     WRITE_LIFE_MEDIUM;
 *                       HOT_NODE                 WRITE_LIFE_NOT_SET
 *                       WARM_NODE                "
 *                       COLD_NODE                WRITE_LIFE_NONE
 * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
 * extension list        "                        "
 *
 * -- buffered io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
 * WRITE_LIFE_NONE       "                        "
 * WRITE_LIFE_MEDIUM     "                        "
 * WRITE_LIFE_LONG       "                        "
 *
 * -- direct io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
 * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
 * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2865 2866
 */

C
Chao Yu 已提交
2867
enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
2868 2869
				enum page_type type, enum temp_type temp)
{
2870
	if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
2871
		if (type == DATA) {
2872
			if (temp == WARM)
2873
				return WRITE_LIFE_NOT_SET;
2874 2875 2876 2877
			else if (temp == HOT)
				return WRITE_LIFE_SHORT;
			else if (temp == COLD)
				return WRITE_LIFE_EXTREME;
2878 2879 2880
		} else {
			return WRITE_LIFE_NOT_SET;
		}
2881
	} else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
2882 2883 2884 2885 2886 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896
		if (type == DATA) {
			if (temp == WARM)
				return WRITE_LIFE_LONG;
			else if (temp == HOT)
				return WRITE_LIFE_SHORT;
			else if (temp == COLD)
				return WRITE_LIFE_EXTREME;
		} else if (type == NODE) {
			if (temp == WARM || temp == HOT)
				return WRITE_LIFE_NOT_SET;
			else if (temp == COLD)
				return WRITE_LIFE_NONE;
		} else if (type == META) {
			return WRITE_LIFE_MEDIUM;
		}
2897
	}
2898
	return WRITE_LIFE_NOT_SET;
2899 2900
}

2901
static int __get_segment_type_2(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2902
{
2903
	if (fio->type == DATA)
J
Jaegeuk Kim 已提交
2904 2905 2906 2907 2908
		return CURSEG_HOT_DATA;
	else
		return CURSEG_HOT_NODE;
}

2909
static int __get_segment_type_4(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2910
{
2911 2912
	if (fio->type == DATA) {
		struct inode *inode = fio->page->mapping->host;
J
Jaegeuk Kim 已提交
2913 2914 2915 2916 2917 2918

		if (S_ISDIR(inode->i_mode))
			return CURSEG_HOT_DATA;
		else
			return CURSEG_COLD_DATA;
	} else {
2919
		if (IS_DNODE(fio->page) && is_cold_node(fio->page))
2920
			return CURSEG_WARM_NODE;
J
Jaegeuk Kim 已提交
2921 2922 2923 2924 2925
		else
			return CURSEG_COLD_NODE;
	}
}

2926
static int __get_segment_type_6(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2927
{
2928 2929
	if (fio->type == DATA) {
		struct inode *inode = fio->page->mapping->host;
J
Jaegeuk Kim 已提交
2930

2931
		if (is_cold_data(fio->page) || file_is_cold(inode))
J
Jaegeuk Kim 已提交
2932
			return CURSEG_COLD_DATA;
C
Chao Yu 已提交
2933
		if (file_is_hot(inode) ||
2934
				is_inode_flag_set(inode, FI_HOT_DATA) ||
2935 2936
				f2fs_is_atomic_file(inode) ||
				f2fs_is_volatile_file(inode))
2937
			return CURSEG_HOT_DATA;
C
Chao Yu 已提交
2938
		return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
J
Jaegeuk Kim 已提交
2939
	} else {
2940 2941
		if (IS_DNODE(fio->page))
			return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
J
Jaegeuk Kim 已提交
2942
						CURSEG_HOT_NODE;
2943
		return CURSEG_COLD_NODE;
J
Jaegeuk Kim 已提交
2944 2945 2946
	}
}

2947
static int __get_segment_type(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2948
{
J
Jaegeuk Kim 已提交
2949 2950
	int type = 0;

2951
	switch (F2FS_OPTION(fio->sbi).active_logs) {
J
Jaegeuk Kim 已提交
2952
	case 2:
J
Jaegeuk Kim 已提交
2953 2954
		type = __get_segment_type_2(fio);
		break;
J
Jaegeuk Kim 已提交
2955
	case 4:
J
Jaegeuk Kim 已提交
2956 2957 2958 2959 2960 2961 2962
		type = __get_segment_type_4(fio);
		break;
	case 6:
		type = __get_segment_type_6(fio);
		break;
	default:
		f2fs_bug_on(fio->sbi, true);
J
Jaegeuk Kim 已提交
2963
	}
2964

J
Jaegeuk Kim 已提交
2965 2966 2967 2968 2969 2970 2971
	if (IS_HOT(type))
		fio->temp = HOT;
	else if (IS_WARM(type))
		fio->temp = WARM;
	else
		fio->temp = COLD;
	return type;
J
Jaegeuk Kim 已提交
2972 2973
}

C
Chao Yu 已提交
2974
void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2975
		block_t old_blkaddr, block_t *new_blkaddr,
2976 2977
		struct f2fs_summary *sum, int type,
		struct f2fs_io_info *fio, bool add_list)
J
Jaegeuk Kim 已提交
2978 2979
{
	struct sit_info *sit_i = SIT_I(sbi);
2980
	struct curseg_info *curseg = CURSEG_I(sbi, type);
J
Jaegeuk Kim 已提交
2981

C
Chao Yu 已提交
2982 2983
	down_read(&SM_I(sbi)->curseg_lock);

J
Jaegeuk Kim 已提交
2984
	mutex_lock(&curseg->curseg_mutex);
2985
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2986 2987 2988

	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

2989 2990
	f2fs_wait_discard_bio(sbi, *new_blkaddr);

J
Jaegeuk Kim 已提交
2991 2992 2993 2994 2995
	/*
	 * __add_sum_entry should be resided under the curseg_mutex
	 * because, this function updates a summary entry in the
	 * current summary block.
	 */
2996
	__add_sum_entry(sbi, type, sum);
J
Jaegeuk Kim 已提交
2997 2998

	__refresh_next_blkoff(sbi, curseg);
2999 3000

	stat_inc_block_count(sbi, curseg);
J
Jaegeuk Kim 已提交
3001

3002 3003 3004 3005 3006 3007 3008 3009
	/*
	 * SIT information should be updated before segment allocation,
	 * since SSR needs latest valid block information.
	 */
	update_sit_entry(sbi, *new_blkaddr, 1);
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
		update_sit_entry(sbi, old_blkaddr, -1);

3010 3011
	if (!__has_curseg_space(sbi, type))
		sit_i->s_ops->allocate_segment(sbi, type, false);
3012

J
Jaegeuk Kim 已提交
3013
	/*
3014 3015 3016
	 * segment dirty status should be updated after segment allocation,
	 * so we just need to update status only one time after previous
	 * segment being closed.
J
Jaegeuk Kim 已提交
3017
	 */
3018 3019
	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
	locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
3020

3021
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3022

C
Chao Yu 已提交
3023
	if (page && IS_NODESEG(type)) {
J
Jaegeuk Kim 已提交
3024 3025
		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));

C
Chao Yu 已提交
3026 3027 3028
		f2fs_inode_chksum_set(sbi, page);
	}

3029 3030 3031 3032 3033
	if (add_list) {
		struct f2fs_bio_info *io;

		INIT_LIST_HEAD(&fio->list);
		fio->in_list = true;
3034
		fio->retry = false;
3035 3036 3037 3038 3039 3040
		io = sbi->write_io[fio->type] + fio->temp;
		spin_lock(&io->io_lock);
		list_add_tail(&fio->list, &io->io_list);
		spin_unlock(&io->io_lock);
	}

3041
	mutex_unlock(&curseg->curseg_mutex);
C
Chao Yu 已提交
3042 3043

	up_read(&SM_I(sbi)->curseg_lock);
3044 3045
}

C
Chao Yu 已提交
3046 3047 3048 3049 3050 3051 3052 3053 3054 3055 3056
static void update_device_state(struct f2fs_io_info *fio)
{
	struct f2fs_sb_info *sbi = fio->sbi;
	unsigned int devidx;

	if (!sbi->s_ndevs)
		return;

	devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);

	/* update device state for fsync */
C
Chao Yu 已提交
3057
	f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
3058 3059 3060 3061 3062 3063 3064

	/* update device state for checkpoint */
	if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
		spin_lock(&sbi->dev_lock);
		f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
		spin_unlock(&sbi->dev_lock);
	}
C
Chao Yu 已提交
3065 3066
}

3067
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
3068
{
3069
	int type = __get_segment_type(fio);
3070
	bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
3071

3072 3073
	if (keep_order)
		down_read(&fio->sbi->io_order_lock);
3074
reallocate:
C
Chao Yu 已提交
3075
	f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
3076
			&fio->new_blkaddr, sum, type, fio, true);
3077 3078 3079
	if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
		invalidate_mapping_pages(META_MAPPING(fio->sbi),
					fio->old_blkaddr, fio->old_blkaddr);
3080

J
Jaegeuk Kim 已提交
3081
	/* writeout dirty page into bdev */
3082 3083
	f2fs_submit_page_write(fio);
	if (fio->retry) {
3084 3085 3086
		fio->old_blkaddr = fio->new_blkaddr;
		goto reallocate;
	}
3087 3088 3089

	update_device_state(fio);

3090 3091
	if (keep_order)
		up_read(&fio->sbi->io_order_lock);
J
Jaegeuk Kim 已提交
3092 3093
}

C
Chao Yu 已提交
3094
void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
C
Chao Yu 已提交
3095
					enum iostat_type io_type)
J
Jaegeuk Kim 已提交
3096
{
J
Jaegeuk Kim 已提交
3097
	struct f2fs_io_info fio = {
3098
		.sbi = sbi,
J
Jaegeuk Kim 已提交
3099
		.type = META,
3100
		.temp = HOT,
M
Mike Christie 已提交
3101
		.op = REQ_OP_WRITE,
3102
		.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3103 3104
		.old_blkaddr = page->index,
		.new_blkaddr = page->index,
3105
		.page = page,
3106
		.encrypted_page = NULL,
3107
		.in_list = false,
J
Jaegeuk Kim 已提交
3108 3109
	};

3110
	if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
M
Mike Christie 已提交
3111
		fio.op_flags &= ~REQ_META;
3112

J
Jaegeuk Kim 已提交
3113
	set_page_writeback(page);
J
Jaegeuk Kim 已提交
3114
	ClearPageError(page);
3115
	f2fs_submit_page_write(&fio);
C
Chao Yu 已提交
3116

C
Chao Yu 已提交
3117
	stat_inc_meta_count(sbi, page->index);
C
Chao Yu 已提交
3118
	f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3119 3120
}

C
Chao Yu 已提交
3121
void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3122 3123
{
	struct f2fs_summary sum;
3124

J
Jaegeuk Kim 已提交
3125
	set_summary(&sum, nid, 0, 0);
3126
	do_write_page(&sum, fio);
C
Chao Yu 已提交
3127 3128

	f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3129 3130
}

C
Chao Yu 已提交
3131 3132
void f2fs_outplace_write_data(struct dnode_of_data *dn,
					struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3133
{
3134
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
3135 3136
	struct f2fs_summary sum;

3137
	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3138
	set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3139
	do_write_page(&sum, fio);
3140
	f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
C
Chao Yu 已提交
3141 3142

	f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3143 3144
}

C
Chao Yu 已提交
3145
int f2fs_inplace_write_data(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3146
{
C
Chao Yu 已提交
3147
	int err;
3148
	struct f2fs_sb_info *sbi = fio->sbi;
C
Chao Yu 已提交
3149

3150
	fio->new_blkaddr = fio->old_blkaddr;
3151 3152
	/* i/o temperature is needed for passing down write hints */
	__get_segment_type(fio);
3153 3154 3155 3156

	f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
			GET_SEGNO(sbi, fio->new_blkaddr))->type));

3157
	stat_inc_inplace_blocks(fio->sbi);
C
Chao Yu 已提交
3158 3159

	err = f2fs_submit_page_bio(fio);
C
Chao Yu 已提交
3160 3161
	if (!err)
		update_device_state(fio);
C
Chao Yu 已提交
3162 3163 3164 3165

	f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);

	return err;
J
Jaegeuk Kim 已提交
3166 3167
}

C
Chao Yu 已提交
3168 3169 3170 3171 3172 3173 3174 3175 3176 3177 3178 3179
static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
						unsigned int segno)
{
	int i;

	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
		if (CURSEG_I(sbi, i)->segno == segno)
			break;
	}
	return i;
}

C
Chao Yu 已提交
3180
void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3181
				block_t old_blkaddr, block_t new_blkaddr,
3182
				bool recover_curseg, bool recover_newaddr)
J
Jaegeuk Kim 已提交
3183 3184 3185 3186 3187 3188
{
	struct sit_info *sit_i = SIT_I(sbi);
	struct curseg_info *curseg;
	unsigned int segno, old_cursegno;
	struct seg_entry *se;
	int type;
3189
	unsigned short old_blkoff;
J
Jaegeuk Kim 已提交
3190 3191 3192 3193 3194

	segno = GET_SEGNO(sbi, new_blkaddr);
	se = get_seg_entry(sbi, segno);
	type = se->type;

C
Chao Yu 已提交
3195 3196
	down_write(&SM_I(sbi)->curseg_lock);

3197 3198 3199 3200 3201 3202 3203 3204 3205
	if (!recover_curseg) {
		/* for recovery flow */
		if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
			if (old_blkaddr == NULL_ADDR)
				type = CURSEG_COLD_DATA;
			else
				type = CURSEG_WARM_DATA;
		}
	} else {
C
Chao Yu 已提交
3206 3207 3208 3209 3210
		if (IS_CURSEG(sbi, segno)) {
			/* se->type is volatile as SSR allocation */
			type = __f2fs_get_curseg(sbi, segno);
			f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
		} else {
J
Jaegeuk Kim 已提交
3211
			type = CURSEG_WARM_DATA;
C
Chao Yu 已提交
3212
		}
J
Jaegeuk Kim 已提交
3213
	}
3214

3215
	f2fs_bug_on(sbi, !IS_DATASEG(type));
J
Jaegeuk Kim 已提交
3216 3217 3218
	curseg = CURSEG_I(sbi, type);

	mutex_lock(&curseg->curseg_mutex);
3219
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3220 3221

	old_cursegno = curseg->segno;
3222
	old_blkoff = curseg->next_blkoff;
J
Jaegeuk Kim 已提交
3223 3224 3225 3226

	/* change the current segment */
	if (segno != curseg->segno) {
		curseg->next_segno = segno;
3227
		change_curseg(sbi, type);
J
Jaegeuk Kim 已提交
3228 3229
	}

J
Jaegeuk Kim 已提交
3230
	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3231
	__add_sum_entry(sbi, type, sum);
J
Jaegeuk Kim 已提交
3232

3233
	if (!recover_curseg || recover_newaddr)
3234
		update_sit_entry(sbi, new_blkaddr, 1);
3235 3236 3237
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
		invalidate_mapping_pages(META_MAPPING(sbi),
					old_blkaddr, old_blkaddr);
3238
		update_sit_entry(sbi, old_blkaddr, -1);
3239
	}
3240 3241 3242 3243

	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
	locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));

J
Jaegeuk Kim 已提交
3244 3245
	locate_dirty_segment(sbi, old_cursegno);

3246 3247 3248
	if (recover_curseg) {
		if (old_cursegno != curseg->segno) {
			curseg->next_segno = old_cursegno;
3249
			change_curseg(sbi, type);
3250 3251 3252 3253
		}
		curseg->next_blkoff = old_blkoff;
	}

3254
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3255
	mutex_unlock(&curseg->curseg_mutex);
C
Chao Yu 已提交
3256
	up_write(&SM_I(sbi)->curseg_lock);
J
Jaegeuk Kim 已提交
3257 3258
}

3259 3260
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
				block_t old_addr, block_t new_addr,
3261 3262
				unsigned char version, bool recover_curseg,
				bool recover_newaddr)
3263 3264 3265 3266 3267
{
	struct f2fs_summary sum;

	set_summary(&sum, dn->nid, dn->ofs_in_node, version);

C
Chao Yu 已提交
3268
	f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3269
					recover_curseg, recover_newaddr);
3270

3271
	f2fs_update_data_blkaddr(dn, new_addr);
3272 3273
}

3274
void f2fs_wait_on_page_writeback(struct page *page,
3275
				enum page_type type, bool ordered)
3276 3277
{
	if (PageWriteback(page)) {
3278 3279
		struct f2fs_sb_info *sbi = F2FS_P_SB(page);

3280
		f2fs_submit_merged_write_cond(sbi, NULL, page, 0, type);
3281 3282 3283 3284
		if (ordered)
			wait_on_page_writeback(page);
		else
			wait_for_stable_page(page);
3285 3286 3287
	}
}

3288
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3289
{
3290
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3291 3292
	struct page *cpage;

3293 3294 3295
	if (!f2fs_post_read_required(inode))
		return;

3296
	if (!is_valid_data_blkaddr(sbi, blkaddr))
3297 3298 3299 3300
		return;

	cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
	if (cpage) {
3301
		f2fs_wait_on_page_writeback(cpage, DATA, true);
3302 3303 3304 3305
		f2fs_put_page(cpage, 1);
	}
}

3306
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3307 3308 3309 3310 3311 3312 3313 3314 3315 3316
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct curseg_info *seg_i;
	unsigned char *kaddr;
	struct page *page;
	block_t start;
	int i, j, offset;

	start = start_sum_block(sbi);

C
Chao Yu 已提交
3317
	page = f2fs_get_meta_page(sbi, start++);
3318 3319
	if (IS_ERR(page))
		return PTR_ERR(page);
J
Jaegeuk Kim 已提交
3320 3321 3322 3323
	kaddr = (unsigned char *)page_address(page);

	/* Step 1: restore nat cache */
	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3324
	memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3325 3326 3327

	/* Step 2: restore sit cache */
	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3328
	memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3329 3330 3331 3332 3333 3334 3335 3336 3337 3338 3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351
	offset = 2 * SUM_JOURNAL_SIZE;

	/* Step 3: restore summary entries */
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		unsigned short blk_off;
		unsigned int segno;

		seg_i = CURSEG_I(sbi, i);
		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
		seg_i->next_segno = segno;
		reset_curseg(sbi, i, 0);
		seg_i->alloc_type = ckpt->alloc_type[i];
		seg_i->next_blkoff = blk_off;

		if (seg_i->alloc_type == SSR)
			blk_off = sbi->blocks_per_seg;

		for (j = 0; j < blk_off; j++) {
			struct f2fs_summary *s;
			s = (struct f2fs_summary *)(kaddr + offset);
			seg_i->sum_blk->entries[j] = *s;
			offset += SUMMARY_SIZE;
3352
			if (offset + SUMMARY_SIZE <= PAGE_SIZE -
J
Jaegeuk Kim 已提交
3353 3354 3355 3356 3357 3358
						SUM_FOOTER_SIZE)
				continue;

			f2fs_put_page(page, 1);
			page = NULL;

C
Chao Yu 已提交
3359
			page = f2fs_get_meta_page(sbi, start++);
3360 3361
			if (IS_ERR(page))
				return PTR_ERR(page);
J
Jaegeuk Kim 已提交
3362 3363 3364 3365 3366
			kaddr = (unsigned char *)page_address(page);
			offset = 0;
		}
	}
	f2fs_put_page(page, 1);
3367
	return 0;
J
Jaegeuk Kim 已提交
3368 3369 3370 3371 3372 3373 3374 3375 3376 3377 3378
}

static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct f2fs_summary_block *sum;
	struct curseg_info *curseg;
	struct page *new;
	unsigned short blk_off;
	unsigned int segno = 0;
	block_t blk_addr = 0;
3379
	int err = 0;
J
Jaegeuk Kim 已提交
3380 3381 3382 3383 3384 3385

	/* get segment number and block addr */
	if (IS_DATASEG(type)) {
		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
							CURSEG_HOT_DATA]);
3386
		if (__exist_node_summaries(sbi))
J
Jaegeuk Kim 已提交
3387 3388 3389 3390 3391 3392 3393 3394
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
		else
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
	} else {
		segno = le32_to_cpu(ckpt->cur_node_segno[type -
							CURSEG_HOT_NODE]);
		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
							CURSEG_HOT_NODE]);
3395
		if (__exist_node_summaries(sbi))
J
Jaegeuk Kim 已提交
3396 3397 3398 3399 3400 3401
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
							type - CURSEG_HOT_NODE);
		else
			blk_addr = GET_SUM_BLOCK(sbi, segno);
	}

C
Chao Yu 已提交
3402
	new = f2fs_get_meta_page(sbi, blk_addr);
3403 3404
	if (IS_ERR(new))
		return PTR_ERR(new);
J
Jaegeuk Kim 已提交
3405 3406 3407
	sum = (struct f2fs_summary_block *)page_address(new);

	if (IS_NODESEG(type)) {
3408
		if (__exist_node_summaries(sbi)) {
J
Jaegeuk Kim 已提交
3409 3410 3411 3412 3413 3414 3415
			struct f2fs_summary *ns = &sum->entries[0];
			int i;
			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
				ns->version = 0;
				ns->ofs_in_node = 0;
			}
		} else {
3416 3417 3418
			err = f2fs_restore_node_summary(sbi, segno, sum);
			if (err)
				goto out;
J
Jaegeuk Kim 已提交
3419 3420 3421 3422 3423 3424
		}
	}

	/* set uncompleted segment to curseg */
	curseg = CURSEG_I(sbi, type);
	mutex_lock(&curseg->curseg_mutex);
3425 3426 3427 3428 3429 3430 3431 3432

	/* update journal info */
	down_write(&curseg->journal_rwsem);
	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
	up_write(&curseg->journal_rwsem);

	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
J
Jaegeuk Kim 已提交
3433 3434 3435 3436 3437
	curseg->next_segno = segno;
	reset_curseg(sbi, type, 0);
	curseg->alloc_type = ckpt->alloc_type[type];
	curseg->next_blkoff = blk_off;
	mutex_unlock(&curseg->curseg_mutex);
3438
out:
J
Jaegeuk Kim 已提交
3439
	f2fs_put_page(new, 1);
3440
	return err;
J
Jaegeuk Kim 已提交
3441 3442 3443 3444
}

static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
{
3445 3446
	struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
	struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
J
Jaegeuk Kim 已提交
3447
	int type = CURSEG_HOT_DATA;
3448
	int err;
J
Jaegeuk Kim 已提交
3449

3450
	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
C
Chao Yu 已提交
3451
		int npages = f2fs_npages_for_summary_flush(sbi, true);
3452 3453

		if (npages >= 2)
C
Chao Yu 已提交
3454
			f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3455
							META_CP, true);
3456

J
Jaegeuk Kim 已提交
3457
		/* restore for compacted data summary */
3458 3459 3460
		err = read_compacted_summaries(sbi);
		if (err)
			return err;
J
Jaegeuk Kim 已提交
3461 3462 3463
		type = CURSEG_HOT_NODE;
	}

3464
	if (__exist_node_summaries(sbi))
C
Chao Yu 已提交
3465
		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
3466
					NR_CURSEG_TYPE - type, META_CP, true);
3467

3468 3469 3470 3471 3472 3473
	for (; type <= CURSEG_COLD_NODE; type++) {
		err = read_normal_summaries(sbi, type);
		if (err)
			return err;
	}

3474 3475 3476 3477 3478
	/* sanity check for summary blocks */
	if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
			sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
		return -EINVAL;

J
Jaegeuk Kim 已提交
3479 3480 3481 3482 3483 3484 3485 3486 3487 3488 3489 3490
	return 0;
}

static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
{
	struct page *page;
	unsigned char *kaddr;
	struct f2fs_summary *summary;
	struct curseg_info *seg_i;
	int written_size = 0;
	int i, j;

C
Chao Yu 已提交
3491
	page = f2fs_grab_meta_page(sbi, blkaddr++);
J
Jaegeuk Kim 已提交
3492
	kaddr = (unsigned char *)page_address(page);
3493
	memset(kaddr, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
3494 3495 3496

	/* Step 1: write nat cache */
	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3497
	memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3498 3499 3500 3501
	written_size += SUM_JOURNAL_SIZE;

	/* Step 2: write sit cache */
	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3502
	memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3503 3504 3505 3506 3507 3508 3509 3510 3511 3512 3513 3514 3515
	written_size += SUM_JOURNAL_SIZE;

	/* Step 3: write summary entries */
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		unsigned short blkoff;
		seg_i = CURSEG_I(sbi, i);
		if (sbi->ckpt->alloc_type[i] == SSR)
			blkoff = sbi->blocks_per_seg;
		else
			blkoff = curseg_blkoff(sbi, i);

		for (j = 0; j < blkoff; j++) {
			if (!page) {
C
Chao Yu 已提交
3516
				page = f2fs_grab_meta_page(sbi, blkaddr++);
J
Jaegeuk Kim 已提交
3517
				kaddr = (unsigned char *)page_address(page);
3518
				memset(kaddr, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
3519 3520 3521 3522 3523 3524
				written_size = 0;
			}
			summary = (struct f2fs_summary *)(kaddr + written_size);
			*summary = seg_i->sum_blk->entries[j];
			written_size += SUMMARY_SIZE;

3525
			if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
J
Jaegeuk Kim 已提交
3526 3527 3528
							SUM_FOOTER_SIZE)
				continue;

3529
			set_page_dirty(page);
J
Jaegeuk Kim 已提交
3530 3531 3532 3533
			f2fs_put_page(page, 1);
			page = NULL;
		}
	}
3534 3535
	if (page) {
		set_page_dirty(page);
J
Jaegeuk Kim 已提交
3536
		f2fs_put_page(page, 1);
3537
	}
J
Jaegeuk Kim 已提交
3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548
}

static void write_normal_summaries(struct f2fs_sb_info *sbi,
					block_t blkaddr, int type)
{
	int i, end;
	if (IS_DATASEG(type))
		end = type + NR_CURSEG_DATA_TYPE;
	else
		end = type + NR_CURSEG_NODE_TYPE;

3549 3550
	for (i = type; i < end; i++)
		write_current_sum_page(sbi, i, blkaddr + (i - type));
J
Jaegeuk Kim 已提交
3551 3552
}

C
Chao Yu 已提交
3553
void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
J
Jaegeuk Kim 已提交
3554
{
3555
	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
J
Jaegeuk Kim 已提交
3556 3557 3558 3559 3560
		write_compacted_summaries(sbi, start_blk);
	else
		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
}

C
Chao Yu 已提交
3561
void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
J
Jaegeuk Kim 已提交
3562
{
3563
	write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
J
Jaegeuk Kim 已提交
3564 3565
}

C
Chao Yu 已提交
3566
int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
J
Jaegeuk Kim 已提交
3567 3568 3569 3570 3571
					unsigned int val, int alloc)
{
	int i;

	if (type == NAT_JOURNAL) {
3572 3573
		for (i = 0; i < nats_in_cursum(journal); i++) {
			if (le32_to_cpu(nid_in_journal(journal, i)) == val)
J
Jaegeuk Kim 已提交
3574 3575
				return i;
		}
3576 3577
		if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
			return update_nats_in_cursum(journal, 1);
J
Jaegeuk Kim 已提交
3578
	} else if (type == SIT_JOURNAL) {
3579 3580
		for (i = 0; i < sits_in_cursum(journal); i++)
			if (le32_to_cpu(segno_in_journal(journal, i)) == val)
J
Jaegeuk Kim 已提交
3581
				return i;
3582 3583
		if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
			return update_sits_in_cursum(journal, 1);
J
Jaegeuk Kim 已提交
3584 3585 3586 3587 3588 3589 3590
	}
	return -1;
}

static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
					unsigned int segno)
{
3591
	return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
J
Jaegeuk Kim 已提交
3592 3593 3594 3595 3596 3597
}

static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
					unsigned int start)
{
	struct sit_info *sit_i = SIT_I(sbi);
3598
	struct page *page;
J
Jaegeuk Kim 已提交
3599 3600 3601 3602 3603
	pgoff_t src_off, dst_off;

	src_off = current_sit_addr(sbi, start);
	dst_off = next_sit_addr(sbi, src_off);

C
Chao Yu 已提交
3604
	page = f2fs_grab_meta_page(sbi, dst_off);
3605
	seg_info_to_sit_page(sbi, page, start);
J
Jaegeuk Kim 已提交
3606

3607
	set_page_dirty(page);
J
Jaegeuk Kim 已提交
3608 3609
	set_to_next_sit(sit_i, start);

3610
	return page;
J
Jaegeuk Kim 已提交
3611 3612
}

3613 3614 3615
static struct sit_entry_set *grab_sit_entry_set(void)
{
	struct sit_entry_set *ses =
3616
			f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3617 3618 3619 3620 3621 3622 3623 3624 3625 3626 3627 3628 3629 3630 3631 3632 3633 3634 3635 3636 3637 3638 3639 3640 3641 3642 3643 3644 3645 3646 3647 3648 3649 3650 3651 3652 3653 3654 3655 3656 3657 3658 3659 3660 3661 3662 3663 3664 3665 3666 3667 3668 3669 3670

	ses->entry_cnt = 0;
	INIT_LIST_HEAD(&ses->set_list);
	return ses;
}

static void release_sit_entry_set(struct sit_entry_set *ses)
{
	list_del(&ses->set_list);
	kmem_cache_free(sit_entry_set_slab, ses);
}

static void adjust_sit_entry_set(struct sit_entry_set *ses,
						struct list_head *head)
{
	struct sit_entry_set *next = ses;

	if (list_is_last(&ses->set_list, head))
		return;

	list_for_each_entry_continue(next, head, set_list)
		if (ses->entry_cnt <= next->entry_cnt)
			break;

	list_move_tail(&ses->set_list, &next->set_list);
}

static void add_sit_entry(unsigned int segno, struct list_head *head)
{
	struct sit_entry_set *ses;
	unsigned int start_segno = START_SEGNO(segno);

	list_for_each_entry(ses, head, set_list) {
		if (ses->start_segno == start_segno) {
			ses->entry_cnt++;
			adjust_sit_entry_set(ses, head);
			return;
		}
	}

	ses = grab_sit_entry_set();

	ses->start_segno = start_segno;
	ses->entry_cnt++;
	list_add(&ses->set_list, head);
}

static void add_sits_in_set(struct f2fs_sb_info *sbi)
{
	struct f2fs_sm_info *sm_info = SM_I(sbi);
	struct list_head *set_list = &sm_info->sit_entry_set;
	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
	unsigned int segno;

3671
	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3672 3673 3674 3675
		add_sit_entry(segno, set_list);
}

static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3676 3677
{
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3678
	struct f2fs_journal *journal = curseg->journal;
J
Jaegeuk Kim 已提交
3679 3680
	int i;

3681
	down_write(&curseg->journal_rwsem);
3682
	for (i = 0; i < sits_in_cursum(journal); i++) {
3683 3684 3685
		unsigned int segno;
		bool dirtied;

3686
		segno = le32_to_cpu(segno_in_journal(journal, i));
3687 3688 3689 3690
		dirtied = __mark_sit_entry_dirty(sbi, segno);

		if (!dirtied)
			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
J
Jaegeuk Kim 已提交
3691
	}
3692
	update_sits_in_cursum(journal, -i);
3693
	up_write(&curseg->journal_rwsem);
J
Jaegeuk Kim 已提交
3694 3695
}

J
Jaegeuk Kim 已提交
3696
/*
J
Jaegeuk Kim 已提交
3697 3698 3699
 * CP calls this function, which flushes SIT entries including sit_journal,
 * and moves prefree segs to free segs.
 */
C
Chao Yu 已提交
3700
void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
3701 3702 3703 3704
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3705
	struct f2fs_journal *journal = curseg->journal;
3706 3707 3708
	struct sit_entry_set *ses, *tmp;
	struct list_head *head = &SM_I(sbi)->sit_entry_set;
	bool to_journal = true;
3709
	struct seg_entry *se;
J
Jaegeuk Kim 已提交
3710

3711
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3712

3713 3714 3715
	if (!sit_i->dirty_sentries)
		goto out;

J
Jaegeuk Kim 已提交
3716
	/*
3717 3718
	 * add and account sit entries of dirty bitmap in sit entry
	 * set temporarily
J
Jaegeuk Kim 已提交
3719
	 */
3720
	add_sits_in_set(sbi);
J
Jaegeuk Kim 已提交
3721

3722 3723 3724 3725 3726
	/*
	 * if there are no enough space in journal to store dirty sit
	 * entries, remove all entries from journal and add and account
	 * them in sit entry set.
	 */
3727
	if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
3728
		remove_sits_in_journal(sbi);
3729

3730 3731 3732 3733 3734 3735
	/*
	 * there are two steps to flush sit entries:
	 * #1, flush sit entries to journal in current cold data summary block.
	 * #2, flush sit entries to sit page.
	 */
	list_for_each_entry_safe(ses, tmp, head, set_list) {
J
Jaegeuk Kim 已提交
3736
		struct page *page = NULL;
3737 3738 3739
		struct f2fs_sit_block *raw_sit = NULL;
		unsigned int start_segno = ses->start_segno;
		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3740
						(unsigned long)MAIN_SEGS(sbi));
3741 3742 3743
		unsigned int segno = start_segno;

		if (to_journal &&
3744
			!__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3745 3746
			to_journal = false;

3747 3748 3749
		if (to_journal) {
			down_write(&curseg->journal_rwsem);
		} else {
3750 3751
			page = get_next_sit_page(sbi, start_segno);
			raw_sit = page_address(page);
J
Jaegeuk Kim 已提交
3752 3753
		}

3754 3755 3756
		/* flush dirty sit entries in region of current sit set */
		for_each_set_bit_from(segno, bitmap, end) {
			int offset, sit_offset;
3757 3758

			se = get_seg_entry(sbi, segno);
3759 3760 3761 3762 3763
#ifdef CONFIG_F2FS_CHECK_FS
			if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
						SIT_VBLOCK_MAP_SIZE))
				f2fs_bug_on(sbi, 1);
#endif
3764 3765

			/* add discard candidates */
3766
			if (!(cpc->reason & CP_DISCARD)) {
3767
				cpc->trim_start = segno;
3768
				add_discard_addrs(sbi, cpc, false);
3769
			}
3770 3771

			if (to_journal) {
C
Chao Yu 已提交
3772
				offset = f2fs_lookup_journal_in_cursum(journal,
3773 3774
							SIT_JOURNAL, segno, 1);
				f2fs_bug_on(sbi, offset < 0);
3775
				segno_in_journal(journal, offset) =
3776 3777
							cpu_to_le32(segno);
				seg_info_to_raw_sit(se,
3778
					&sit_in_journal(journal, offset));
3779 3780
				check_block_count(sbi, segno,
					&sit_in_journal(journal, offset));
3781 3782 3783 3784
			} else {
				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
				seg_info_to_raw_sit(se,
						&raw_sit->entries[sit_offset]);
3785 3786
				check_block_count(sbi, segno,
						&raw_sit->entries[sit_offset]);
3787
			}
J
Jaegeuk Kim 已提交
3788

3789 3790 3791
			__clear_bit(segno, bitmap);
			sit_i->dirty_sentries--;
			ses->entry_cnt--;
J
Jaegeuk Kim 已提交
3792 3793
		}

3794 3795 3796
		if (to_journal)
			up_write(&curseg->journal_rwsem);
		else
3797 3798 3799 3800
			f2fs_put_page(page, 1);

		f2fs_bug_on(sbi, ses->entry_cnt);
		release_sit_entry_set(ses);
J
Jaegeuk Kim 已提交
3801
	}
3802 3803 3804 3805

	f2fs_bug_on(sbi, !list_empty(head));
	f2fs_bug_on(sbi, sit_i->dirty_sentries);
out:
3806
	if (cpc->reason & CP_DISCARD) {
3807 3808
		__u64 trim_start = cpc->trim_start;

3809
		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3810
			add_discard_addrs(sbi, cpc, false);
3811 3812

		cpc->trim_start = trim_start;
3813
	}
3814
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3815 3816 3817 3818 3819 3820 3821 3822 3823

	set_prefree_as_free_segments(sbi);
}

static int build_sit_info(struct f2fs_sb_info *sbi)
{
	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
	struct sit_info *sit_i;
	unsigned int sit_segs, start;
3824
	char *src_bitmap;
J
Jaegeuk Kim 已提交
3825 3826 3827
	unsigned int bitmap_size;

	/* allocate memory for SIT information */
C
Chao Yu 已提交
3828
	sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
3829 3830 3831 3832 3833
	if (!sit_i)
		return -ENOMEM;

	SM_I(sbi)->sit_info = sit_i;

3834 3835 3836 3837
	sit_i->sentries =
		f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
					      MAIN_SEGS(sbi)),
			      GFP_KERNEL);
J
Jaegeuk Kim 已提交
3838 3839 3840
	if (!sit_i->sentries)
		return -ENOMEM;

3841
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
C
Chao Yu 已提交
3842 3843
	sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
3844 3845 3846
	if (!sit_i->dirty_sentries_bitmap)
		return -ENOMEM;

3847
	for (start = 0; start < MAIN_SEGS(sbi); start++) {
J
Jaegeuk Kim 已提交
3848
		sit_i->sentries[start].cur_valid_map
C
Chao Yu 已提交
3849
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3850
		sit_i->sentries[start].ckpt_valid_map
C
Chao Yu 已提交
3851
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3852
		if (!sit_i->sentries[start].cur_valid_map ||
3853
				!sit_i->sentries[start].ckpt_valid_map)
J
Jaegeuk Kim 已提交
3854
			return -ENOMEM;
3855

C
Chao Yu 已提交
3856 3857
#ifdef CONFIG_F2FS_CHECK_FS
		sit_i->sentries[start].cur_valid_map_mir
C
Chao Yu 已提交
3858
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
C
Chao Yu 已提交
3859 3860 3861 3862
		if (!sit_i->sentries[start].cur_valid_map_mir)
			return -ENOMEM;
#endif

3863 3864 3865 3866 3867
		sit_i->sentries[start].discard_map
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
							GFP_KERNEL);
		if (!sit_i->sentries[start].discard_map)
			return -ENOMEM;
J
Jaegeuk Kim 已提交
3868 3869
	}

C
Chao Yu 已提交
3870
	sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3871 3872 3873
	if (!sit_i->tmp_map)
		return -ENOMEM;

J
Jaegeuk Kim 已提交
3874
	if (sbi->segs_per_sec > 1) {
3875 3876 3877 3878
		sit_i->sec_entries =
			f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
						      MAIN_SECS(sbi)),
				      GFP_KERNEL);
J
Jaegeuk Kim 已提交
3879 3880 3881 3882 3883 3884 3885 3886 3887 3888 3889
		if (!sit_i->sec_entries)
			return -ENOMEM;
	}

	/* get information related with SIT */
	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;

	/* setup SIT bitmap from ckeckpoint pack */
	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);

3890 3891
	sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
	if (!sit_i->sit_bitmap)
J
Jaegeuk Kim 已提交
3892 3893
		return -ENOMEM;

3894 3895 3896 3897 3898 3899
#ifdef CONFIG_F2FS_CHECK_FS
	sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
	if (!sit_i->sit_bitmap_mir)
		return -ENOMEM;
#endif

J
Jaegeuk Kim 已提交
3900 3901 3902 3903 3904
	/* init SIT information */
	sit_i->s_ops = &default_salloc_ops;

	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
3905
	sit_i->written_valid_blocks = 0;
J
Jaegeuk Kim 已提交
3906 3907 3908 3909
	sit_i->bitmap_size = bitmap_size;
	sit_i->dirty_sentries = 0;
	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
3910
	sit_i->mounted_time = ktime_get_real_seconds();
3911
	init_rwsem(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3912 3913 3914 3915 3916 3917 3918 3919 3920
	return 0;
}

static int build_free_segmap(struct f2fs_sb_info *sbi)
{
	struct free_segmap_info *free_i;
	unsigned int bitmap_size, sec_bitmap_size;

	/* allocate memory for free segmap information */
C
Chao Yu 已提交
3921
	free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
3922 3923 3924 3925 3926
	if (!free_i)
		return -ENOMEM;

	SM_I(sbi)->free_info = free_i;

3927
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
C
Chao Yu 已提交
3928
	free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3929 3930 3931
	if (!free_i->free_segmap)
		return -ENOMEM;

3932
	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
C
Chao Yu 已提交
3933
	free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3934 3935 3936 3937 3938 3939 3940 3941
	if (!free_i->free_secmap)
		return -ENOMEM;

	/* set all segments as dirty temporarily */
	memset(free_i->free_segmap, 0xff, bitmap_size);
	memset(free_i->free_secmap, 0xff, sec_bitmap_size);

	/* init free segmap information */
3942
	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
J
Jaegeuk Kim 已提交
3943 3944
	free_i->free_segments = 0;
	free_i->free_sections = 0;
3945
	spin_lock_init(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
3946 3947 3948 3949 3950
	return 0;
}

static int build_curseg(struct f2fs_sb_info *sbi)
{
N
Namjae Jeon 已提交
3951
	struct curseg_info *array;
J
Jaegeuk Kim 已提交
3952 3953
	int i;

3954 3955
	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
			     GFP_KERNEL);
J
Jaegeuk Kim 已提交
3956 3957 3958 3959 3960 3961 3962
	if (!array)
		return -ENOMEM;

	SM_I(sbi)->curseg_array = array;

	for (i = 0; i < NR_CURSEG_TYPE; i++) {
		mutex_init(&array[i].curseg_mutex);
C
Chao Yu 已提交
3963
		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3964 3965
		if (!array[i].sum_blk)
			return -ENOMEM;
3966
		init_rwsem(&array[i].journal_rwsem);
C
Chao Yu 已提交
3967 3968
		array[i].journal = f2fs_kzalloc(sbi,
				sizeof(struct f2fs_journal), GFP_KERNEL);
3969 3970
		if (!array[i].journal)
			return -ENOMEM;
J
Jaegeuk Kim 已提交
3971 3972 3973 3974 3975 3976
		array[i].segno = NULL_SEGNO;
		array[i].next_blkoff = 0;
	}
	return restore_curseg_summaries(sbi);
}

3977
static int build_sit_entries(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3978 3979 3980
{
	struct sit_info *sit_i = SIT_I(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3981
	struct f2fs_journal *journal = curseg->journal;
3982 3983
	struct seg_entry *se;
	struct f2fs_sit_entry sit;
3984 3985 3986
	int sit_blk_cnt = SIT_BLK_CNT(sbi);
	unsigned int i, start, end;
	unsigned int readed, start_blk = 0;
3987
	int err = 0;
3988
	block_t total_node_blocks = 0;
J
Jaegeuk Kim 已提交
3989

3990
	do {
C
Chao Yu 已提交
3991
		readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
3992
							META_SIT, true);
3993 3994 3995 3996

		start = start_blk * sit_i->sents_per_block;
		end = (start_blk + readed) * sit_i->sents_per_block;

3997
		for (; start < end && start < MAIN_SEGS(sbi); start++) {
3998 3999 4000
			struct f2fs_sit_block *sit_blk;
			struct page *page;

4001
			se = &sit_i->sentries[start];
4002
			page = get_current_sit_page(sbi, start);
4003 4004
			if (IS_ERR(page))
				return PTR_ERR(page);
4005 4006 4007
			sit_blk = (struct f2fs_sit_block *)page_address(page);
			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
			f2fs_put_page(page, 1);
4008

4009 4010 4011
			err = check_block_count(sbi, start, &sit);
			if (err)
				return err;
4012
			seg_info_from_raw_sit(se, &sit);
4013 4014
			if (IS_NODESEG(se->type))
				total_node_blocks += se->valid_blocks;
4015 4016

			/* build discard map only one time */
4017 4018 4019 4020 4021 4022 4023 4024 4025 4026
			if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
				memset(se->discard_map, 0xff,
					SIT_VBLOCK_MAP_SIZE);
			} else {
				memcpy(se->discard_map,
					se->cur_valid_map,
					SIT_VBLOCK_MAP_SIZE);
				sbi->discard_blks +=
					sbi->blocks_per_seg -
					se->valid_blocks;
4027
			}
4028

4029 4030 4031
			if (sbi->segs_per_sec > 1)
				get_sec_entry(sbi, start)->valid_blocks +=
							se->valid_blocks;
J
Jaegeuk Kim 已提交
4032
		}
4033 4034
		start_blk += readed;
	} while (start_blk < sit_blk_cnt);
4035 4036 4037 4038 4039 4040

	down_read(&curseg->journal_rwsem);
	for (i = 0; i < sits_in_cursum(journal); i++) {
		unsigned int old_valid_blocks;

		start = le32_to_cpu(segno_in_journal(journal, i));
J
Jaegeuk Kim 已提交
4041 4042 4043 4044 4045 4046 4047 4048 4049
		if (start >= MAIN_SEGS(sbi)) {
			f2fs_msg(sbi->sb, KERN_ERR,
					"Wrong journal entry on segno %u",
					start);
			set_sbi_flag(sbi, SBI_NEED_FSCK);
			err = -EINVAL;
			break;
		}

4050 4051 4052 4053
		se = &sit_i->sentries[start];
		sit = sit_in_journal(journal, i);

		old_valid_blocks = se->valid_blocks;
4054 4055
		if (IS_NODESEG(se->type))
			total_node_blocks -= old_valid_blocks;
4056

4057 4058 4059
		err = check_block_count(sbi, start, &sit);
		if (err)
			break;
4060
		seg_info_from_raw_sit(se, &sit);
4061 4062
		if (IS_NODESEG(se->type))
			total_node_blocks += se->valid_blocks;
4063

4064 4065 4066 4067 4068 4069 4070
		if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
			memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
		} else {
			memcpy(se->discard_map, se->cur_valid_map,
						SIT_VBLOCK_MAP_SIZE);
			sbi->discard_blks += old_valid_blocks;
			sbi->discard_blks -= se->valid_blocks;
4071 4072
		}

C
Chao Yu 已提交
4073
		if (sbi->segs_per_sec > 1) {
4074
			get_sec_entry(sbi, start)->valid_blocks +=
C
Chao Yu 已提交
4075 4076 4077 4078
							se->valid_blocks;
			get_sec_entry(sbi, start)->valid_blocks -=
							old_valid_blocks;
		}
4079 4080
	}
	up_read(&curseg->journal_rwsem);
4081 4082 4083 4084 4085 4086 4087 4088 4089

	if (!err && total_node_blocks != valid_node_count(sbi)) {
		f2fs_msg(sbi->sb, KERN_ERR,
			"SIT is corrupted node# %u vs %u",
			total_node_blocks, valid_node_count(sbi));
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		err = -EINVAL;
	}

4090
	return err;
J
Jaegeuk Kim 已提交
4091 4092 4093 4094 4095 4096 4097
}

static void init_free_segmap(struct f2fs_sb_info *sbi)
{
	unsigned int start;
	int type;

4098
	for (start = 0; start < MAIN_SEGS(sbi); start++) {
J
Jaegeuk Kim 已提交
4099 4100 4101
		struct seg_entry *sentry = get_seg_entry(sbi, start);
		if (!sentry->valid_blocks)
			__set_free(sbi, start);
4102 4103 4104
		else
			SIT_I(sbi)->written_valid_blocks +=
						sentry->valid_blocks;
J
Jaegeuk Kim 已提交
4105 4106 4107 4108 4109 4110 4111 4112 4113 4114 4115 4116 4117
	}

	/* set use the current segments */
	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
		__set_test_and_inuse(sbi, curseg_t->segno);
	}
}

static void init_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	struct free_segmap_info *free_i = FREE_I(sbi);
4118
	unsigned int segno = 0, offset = 0;
J
Jaegeuk Kim 已提交
4119 4120
	unsigned short valid_blocks;

4121
	while (1) {
J
Jaegeuk Kim 已提交
4122
		/* find dirty segment based on free segmap */
4123 4124
		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
		if (segno >= MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
4125 4126
			break;
		offset = segno + 1;
4127
		valid_blocks = get_valid_blocks(sbi, segno, false);
4128
		if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
J
Jaegeuk Kim 已提交
4129
			continue;
4130 4131 4132 4133
		if (valid_blocks > sbi->blocks_per_seg) {
			f2fs_bug_on(sbi, 1);
			continue;
		}
J
Jaegeuk Kim 已提交
4134 4135 4136 4137 4138 4139
		mutex_lock(&dirty_i->seglist_lock);
		__locate_dirty_segment(sbi, segno, DIRTY);
		mutex_unlock(&dirty_i->seglist_lock);
	}
}

4140
static int init_victim_secmap(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4141 4142
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4143
	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
4144

C
Chao Yu 已提交
4145
	dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4146
	if (!dirty_i->victim_secmap)
J
Jaegeuk Kim 已提交
4147 4148 4149 4150 4151 4152 4153 4154 4155 4156
		return -ENOMEM;
	return 0;
}

static int build_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i;
	unsigned int bitmap_size, i;

	/* allocate memory for dirty segments list information */
C
Chao Yu 已提交
4157 4158
	dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
4159 4160 4161 4162 4163 4164
	if (!dirty_i)
		return -ENOMEM;

	SM_I(sbi)->dirty_info = dirty_i;
	mutex_init(&dirty_i->seglist_lock);

4165
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
J
Jaegeuk Kim 已提交
4166 4167

	for (i = 0; i < NR_DIRTY_TYPE; i++) {
C
Chao Yu 已提交
4168 4169
		dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
4170 4171 4172 4173 4174
		if (!dirty_i->dirty_segmap[i])
			return -ENOMEM;
	}

	init_dirty_segmap(sbi);
4175
	return init_victim_secmap(sbi);
J
Jaegeuk Kim 已提交
4176 4177
}

J
Jaegeuk Kim 已提交
4178
/*
J
Jaegeuk Kim 已提交
4179 4180 4181 4182 4183 4184 4185
 * Update min, max modified time for cost-benefit GC algorithm
 */
static void init_min_max_mtime(struct f2fs_sb_info *sbi)
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int segno;

4186
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
4187

4188
	sit_i->min_mtime = ULLONG_MAX;
J
Jaegeuk Kim 已提交
4189

4190
	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
J
Jaegeuk Kim 已提交
4191 4192 4193 4194 4195 4196 4197 4198 4199 4200 4201
		unsigned int i;
		unsigned long long mtime = 0;

		for (i = 0; i < sbi->segs_per_sec; i++)
			mtime += get_seg_entry(sbi, segno + i)->mtime;

		mtime = div_u64(mtime, sbi->segs_per_sec);

		if (sit_i->min_mtime > mtime)
			sit_i->min_mtime = mtime;
	}
C
Chao Yu 已提交
4202
	sit_i->max_mtime = get_mtime(sbi, false);
4203
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
4204 4205
}

C
Chao Yu 已提交
4206
int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4207 4208 4209
{
	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
N
Namjae Jeon 已提交
4210
	struct f2fs_sm_info *sm_info;
J
Jaegeuk Kim 已提交
4211 4212
	int err;

C
Chao Yu 已提交
4213
	sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
4214 4215 4216 4217 4218 4219 4220 4221 4222 4223 4224 4225
	if (!sm_info)
		return -ENOMEM;

	/* init sm info */
	sbi->sm_info = sm_info;
	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
4226 4227
	sm_info->rec_prefree_segments = sm_info->main_segments *
					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
J
Jaegeuk Kim 已提交
4228 4229 4230
	if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
		sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;

4231 4232
	if (!test_opt(sbi, LFS))
		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
4233
	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
4234
	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
4235
	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
4236
	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
C
Chao Yu 已提交
4237
	sm_info->min_ssr_sections = reserved_sections(sbi);
J
Jaegeuk Kim 已提交
4238

4239 4240
	INIT_LIST_HEAD(&sm_info->sit_entry_set);

C
Chao Yu 已提交
4241 4242
	init_rwsem(&sm_info->curseg_lock);

4243
	if (!f2fs_readonly(sbi->sb)) {
C
Chao Yu 已提交
4244
		err = f2fs_create_flush_cmd_control(sbi);
4245
		if (err)
4246
			return err;
4247 4248
	}

4249 4250 4251 4252
	err = create_discard_cmd_control(sbi);
	if (err)
		return err;

J
Jaegeuk Kim 已提交
4253 4254 4255 4256 4257 4258 4259 4260 4261 4262 4263
	err = build_sit_info(sbi);
	if (err)
		return err;
	err = build_free_segmap(sbi);
	if (err)
		return err;
	err = build_curseg(sbi);
	if (err)
		return err;

	/* reinit free segmap based on SIT */
4264 4265 4266
	err = build_sit_entries(sbi);
	if (err)
		return err;
J
Jaegeuk Kim 已提交
4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 4279 4280 4281 4282

	init_free_segmap(sbi);
	err = build_dirty_segmap(sbi);
	if (err)
		return err;

	init_min_max_mtime(sbi);
	return 0;
}

static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	mutex_lock(&dirty_i->seglist_lock);
4283
	kvfree(dirty_i->dirty_segmap[dirty_type]);
J
Jaegeuk Kim 已提交
4284 4285 4286 4287
	dirty_i->nr_dirty[dirty_type] = 0;
	mutex_unlock(&dirty_i->seglist_lock);
}

4288
static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4289 4290
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4291
	kvfree(dirty_i->victim_secmap);
J
Jaegeuk Kim 已提交
4292 4293 4294 4295 4296 4297 4298 4299 4300 4301 4302 4303 4304 4305
}

static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	int i;

	if (!dirty_i)
		return;

	/* discard pre-free/dirty segments list */
	for (i = 0; i < NR_DIRTY_TYPE; i++)
		discard_dirty_segmap(sbi, i);

4306
	destroy_victim_secmap(sbi);
J
Jaegeuk Kim 已提交
4307 4308 4309 4310 4311 4312 4313 4314 4315 4316 4317 4318
	SM_I(sbi)->dirty_info = NULL;
	kfree(dirty_i);
}

static void destroy_curseg(struct f2fs_sb_info *sbi)
{
	struct curseg_info *array = SM_I(sbi)->curseg_array;
	int i;

	if (!array)
		return;
	SM_I(sbi)->curseg_array = NULL;
4319
	for (i = 0; i < NR_CURSEG_TYPE; i++) {
J
Jaegeuk Kim 已提交
4320
		kfree(array[i].sum_blk);
4321 4322
		kfree(array[i].journal);
	}
J
Jaegeuk Kim 已提交
4323 4324 4325 4326 4327 4328 4329 4330 4331
	kfree(array);
}

static void destroy_free_segmap(struct f2fs_sb_info *sbi)
{
	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
	if (!free_i)
		return;
	SM_I(sbi)->free_info = NULL;
4332 4333
	kvfree(free_i->free_segmap);
	kvfree(free_i->free_secmap);
J
Jaegeuk Kim 已提交
4334 4335 4336 4337 4338 4339 4340 4341 4342 4343 4344 4345
	kfree(free_i);
}

static void destroy_sit_info(struct f2fs_sb_info *sbi)
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int start;

	if (!sit_i)
		return;

	if (sit_i->sentries) {
4346
		for (start = 0; start < MAIN_SEGS(sbi); start++) {
J
Jaegeuk Kim 已提交
4347
			kfree(sit_i->sentries[start].cur_valid_map);
C
Chao Yu 已提交
4348 4349 4350
#ifdef CONFIG_F2FS_CHECK_FS
			kfree(sit_i->sentries[start].cur_valid_map_mir);
#endif
J
Jaegeuk Kim 已提交
4351
			kfree(sit_i->sentries[start].ckpt_valid_map);
4352
			kfree(sit_i->sentries[start].discard_map);
J
Jaegeuk Kim 已提交
4353 4354
		}
	}
J
Jaegeuk Kim 已提交
4355 4356
	kfree(sit_i->tmp_map);

4357 4358 4359
	kvfree(sit_i->sentries);
	kvfree(sit_i->sec_entries);
	kvfree(sit_i->dirty_sentries_bitmap);
J
Jaegeuk Kim 已提交
4360 4361 4362

	SM_I(sbi)->sit_info = NULL;
	kfree(sit_i->sit_bitmap);
4363 4364 4365
#ifdef CONFIG_F2FS_CHECK_FS
	kfree(sit_i->sit_bitmap_mir);
#endif
J
Jaegeuk Kim 已提交
4366 4367 4368
	kfree(sit_i);
}

C
Chao Yu 已提交
4369
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4370 4371
{
	struct f2fs_sm_info *sm_info = SM_I(sbi);
4372

4373 4374
	if (!sm_info)
		return;
C
Chao Yu 已提交
4375
	f2fs_destroy_flush_cmd_control(sbi, true);
4376
	destroy_discard_cmd_control(sbi);
J
Jaegeuk Kim 已提交
4377 4378 4379 4380 4381 4382 4383
	destroy_dirty_segmap(sbi);
	destroy_curseg(sbi);
	destroy_free_segmap(sbi);
	destroy_sit_info(sbi);
	sbi->sm_info = NULL;
	kfree(sm_info);
}
4384

C
Chao Yu 已提交
4385
int __init f2fs_create_segment_manager_caches(void)
4386 4387
{
	discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
4388
			sizeof(struct discard_entry));
4389
	if (!discard_entry_slab)
4390 4391
		goto fail;

4392 4393 4394
	discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
			sizeof(struct discard_cmd));
	if (!discard_cmd_slab)
C
Chao Yu 已提交
4395
		goto destroy_discard_entry;
C
Chao Yu 已提交
4396

4397
	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
4398
			sizeof(struct sit_entry_set));
4399
	if (!sit_entry_set_slab)
4400
		goto destroy_discard_cmd;
J
Jaegeuk Kim 已提交
4401 4402 4403 4404 4405

	inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
			sizeof(struct inmem_pages));
	if (!inmem_entry_slab)
		goto destroy_sit_entry_set;
4406
	return 0;
4407

J
Jaegeuk Kim 已提交
4408 4409
destroy_sit_entry_set:
	kmem_cache_destroy(sit_entry_set_slab);
4410 4411
destroy_discard_cmd:
	kmem_cache_destroy(discard_cmd_slab);
C
Chao Yu 已提交
4412
destroy_discard_entry:
4413 4414 4415
	kmem_cache_destroy(discard_entry_slab);
fail:
	return -ENOMEM;
4416 4417
}

C
Chao Yu 已提交
4418
void f2fs_destroy_segment_manager_caches(void)
4419
{
4420
	kmem_cache_destroy(sit_entry_set_slab);
4421
	kmem_cache_destroy(discard_cmd_slab);
4422
	kmem_cache_destroy(discard_entry_slab);
J
Jaegeuk Kim 已提交
4423
	kmem_cache_destroy(inmem_entry_slab);
4424
}