segment.c 109.1 KB
Newer Older
C
Chao Yu 已提交
1
// SPDX-License-Identifier: GPL-2.0
J
Jaegeuk Kim 已提交
2
/*
J
Jaegeuk Kim 已提交
3 4 5 6 7 8 9 10 11
 * fs/f2fs/segment.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
12
#include <linux/prefetch.h>
13
#include <linux/kthread.h>
14
#include <linux/swap.h>
15
#include <linux/timer.h>
16
#include <linux/freezer.h>
17
#include <linux/sched/signal.h>
J
Jaegeuk Kim 已提交
18 19 20 21

#include "f2fs.h"
#include "segment.h"
#include "node.h"
22
#include "gc.h"
J
Jaegeuk Kim 已提交
23
#include "trace.h"
24
#include <trace/events/f2fs.h>
J
Jaegeuk Kim 已提交
25

26 27
#define __reverse_ffz(x) __reverse_ffs(~(x))

28
static struct kmem_cache *discard_entry_slab;
29
static struct kmem_cache *discard_cmd_slab;
30
static struct kmem_cache *sit_entry_set_slab;
J
Jaegeuk Kim 已提交
31
static struct kmem_cache *inmem_entry_slab;
32

33 34 35 36 37 38 39 40 41 42 43 44 45 46 47
static unsigned long __reverse_ulong(unsigned char *str)
{
	unsigned long tmp = 0;
	int shift = 24, idx = 0;

#if BITS_PER_LONG == 64
	shift = 56;
#endif
	while (shift >= 0) {
		tmp |= (unsigned long)str[idx++] << shift;
		shift -= BITS_PER_BYTE;
	}
	return tmp;
}

48 49 50 51 52 53 54 55 56
/*
 * __reverse_ffs is copied from include/asm-generic/bitops/__ffs.h since
 * MSB and LSB are reversed in a byte by f2fs_set_bit.
 */
static inline unsigned long __reverse_ffs(unsigned long word)
{
	int num = 0;

#if BITS_PER_LONG == 64
57
	if ((word & 0xffffffff00000000UL) == 0)
58
		num += 32;
59
	else
60 61
		word >>= 32;
#endif
62
	if ((word & 0xffff0000) == 0)
63
		num += 16;
64
	else
65
		word >>= 16;
66 67

	if ((word & 0xff00) == 0)
68
		num += 8;
69
	else
70
		word >>= 8;
71

72 73 74 75
	if ((word & 0xf0) == 0)
		num += 4;
	else
		word >>= 4;
76

77 78 79 80
	if ((word & 0xc) == 0)
		num += 2;
	else
		word >>= 2;
81

82 83 84 85 86 87
	if ((word & 0x2) == 0)
		num += 1;
	return num;
}

/*
A
arter97 已提交
88
 * __find_rev_next(_zero)_bit is copied from lib/find_next_bit.c because
89
 * f2fs_set_bit makes MSB and LSB reversed in a byte.
F
Fan Li 已提交
90
 * @size must be integral times of unsigned long.
91
 * Example:
92 93 94
 *                             MSB <--> LSB
 *   f2fs_set_bit(0, bitmap) => 1000 0000
 *   f2fs_set_bit(7, bitmap) => 0000 0001
95 96 97 98 99
 */
static unsigned long __find_rev_next_bit(const unsigned long *addr,
			unsigned long size, unsigned long offset)
{
	const unsigned long *p = addr + BIT_WORD(offset);
F
Fan Li 已提交
100
	unsigned long result = size;
101 102 103 104 105
	unsigned long tmp;

	if (offset >= size)
		return size;

F
Fan Li 已提交
106
	size -= (offset & ~(BITS_PER_LONG - 1));
107
	offset %= BITS_PER_LONG;
108

F
Fan Li 已提交
109 110 111
	while (1) {
		if (*p == 0)
			goto pass;
112

113
		tmp = __reverse_ulong((unsigned char *)p);
F
Fan Li 已提交
114 115 116 117

		tmp &= ~0UL >> offset;
		if (size < BITS_PER_LONG)
			tmp &= (~0UL << (BITS_PER_LONG - size));
118
		if (tmp)
F
Fan Li 已提交
119 120 121 122
			goto found;
pass:
		if (size <= BITS_PER_LONG)
			break;
123
		size -= BITS_PER_LONG;
F
Fan Li 已提交
124
		offset = 0;
125
		p++;
126
	}
F
Fan Li 已提交
127 128 129
	return result;
found:
	return result - size + __reverse_ffs(tmp);
130 131 132 133 134 135
}

static unsigned long __find_rev_next_zero_bit(const unsigned long *addr,
			unsigned long size, unsigned long offset)
{
	const unsigned long *p = addr + BIT_WORD(offset);
136
	unsigned long result = size;
137 138 139 140 141
	unsigned long tmp;

	if (offset >= size)
		return size;

142
	size -= (offset & ~(BITS_PER_LONG - 1));
143
	offset %= BITS_PER_LONG;
144 145 146 147 148

	while (1) {
		if (*p == ~0UL)
			goto pass;

149
		tmp = __reverse_ulong((unsigned char *)p);
150 151 152 153 154

		if (offset)
			tmp |= ~0UL << (BITS_PER_LONG - offset);
		if (size < BITS_PER_LONG)
			tmp |= ~0UL >> size;
155
		if (tmp != ~0UL)
156 157 158 159
			goto found;
pass:
		if (size <= BITS_PER_LONG)
			break;
160
		size -= BITS_PER_LONG;
161
		offset = 0;
162
		p++;
163
	}
164 165 166
	return result;
found:
	return result - size + __reverse_ffz(tmp);
167 168
}

C
Chao Yu 已提交
169
bool f2fs_need_SSR(struct f2fs_sb_info *sbi)
170 171 172 173 174 175 176
{
	int node_secs = get_blocktype_secs(sbi, F2FS_DIRTY_NODES);
	int dent_secs = get_blocktype_secs(sbi, F2FS_DIRTY_DENTS);
	int imeta_secs = get_blocktype_secs(sbi, F2FS_DIRTY_IMETA);

	if (test_opt(sbi, LFS))
		return false;
177
	if (sbi->gc_mode == GC_URGENT)
178 179 180
		return true;

	return free_sections(sbi) <= (node_secs + 2 * dent_secs + imeta_secs +
C
Chao Yu 已提交
181
			SM_I(sbi)->min_ssr_sections + reserved_sections(sbi));
182 183
}

C
Chao Yu 已提交
184
void f2fs_register_inmem_page(struct inode *inode, struct page *page)
J
Jaegeuk Kim 已提交
185
{
J
Jaegeuk Kim 已提交
186
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
J
Jaegeuk Kim 已提交
187 188
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct inmem_pages *new;
189

J
Jaegeuk Kim 已提交
190
	f2fs_trace_pid(page);
191

C
Chao Yu 已提交
192 193 194
	set_page_private(page, (unsigned long)ATOMIC_WRITTEN_PAGE);
	SetPagePrivate(page);

J
Jaegeuk Kim 已提交
195 196 197 198 199
	new = f2fs_kmem_cache_alloc(inmem_entry_slab, GFP_NOFS);

	/* add atomic page indices to the list */
	new->page = page;
	INIT_LIST_HEAD(&new->list);
C
Chao Yu 已提交
200

J
Jaegeuk Kim 已提交
201 202 203 204
	/* increase reference count with clean state */
	mutex_lock(&fi->inmem_lock);
	get_page(page);
	list_add_tail(&new->list, &fi->inmem_pages);
J
Jaegeuk Kim 已提交
205 206 207 208
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (list_empty(&fi->inmem_ilist))
		list_add_tail(&fi->inmem_ilist, &sbi->inode_list[ATOMIC_FILE]);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
209
	inc_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
J
Jaegeuk Kim 已提交
210
	mutex_unlock(&fi->inmem_lock);
211 212

	trace_f2fs_register_inmem_page(page, INMEM);
J
Jaegeuk Kim 已提交
213 214
}

215 216
static int __revoke_inmem_pages(struct inode *inode,
				struct list_head *head, bool drop, bool recover)
217
{
218
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
219
	struct inmem_pages *cur, *tmp;
220
	int err = 0;
221 222

	list_for_each_entry_safe(cur, tmp, head, list) {
223 224 225 226 227 228
		struct page *page = cur->page;

		if (drop)
			trace_f2fs_commit_inmem_page(page, INMEM_DROP);

		lock_page(page);
229

230 231
		f2fs_wait_on_page_writeback(page, DATA, true);

232 233 234 235 236
		if (recover) {
			struct dnode_of_data dn;
			struct node_info ni;

			trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
237
retry:
238
			set_new_dnode(&dn, inode, NULL, NULL, 0);
C
Chao Yu 已提交
239 240
			err = f2fs_get_dnode_of_data(&dn, page->index,
								LOOKUP_NODE);
241 242 243 244 245 246
			if (err) {
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					cond_resched();
					goto retry;
				}
247 248 249
				err = -EAGAIN;
				goto next;
			}
250 251 252 253 254 255 256

			err = f2fs_get_node_info(sbi, dn.nid, &ni);
			if (err) {
				f2fs_put_dnode(&dn);
				return err;
			}

257
			if (cur->old_addr == NEW_ADDR) {
C
Chao Yu 已提交
258
				f2fs_invalidate_blocks(sbi, dn.data_blkaddr);
259 260 261
				f2fs_update_data_blkaddr(&dn, NEW_ADDR);
			} else
				f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
262 263 264 265
					cur->old_addr, ni.version, true, true);
			f2fs_put_dnode(&dn);
		}
next:
266 267 268
		/* we don't need to invalidate this in the sccessful status */
		if (drop || recover)
			ClearPageUptodate(page);
269
		set_page_private(page, 0);
C
Chao Yu 已提交
270
		ClearPagePrivate(page);
271
		f2fs_put_page(page, 1);
272 273 274 275 276

		list_del(&cur->list);
		kmem_cache_free(inmem_entry_slab, cur);
		dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
	}
277
	return err;
278 279
}

C
Chao Yu 已提交
280
void f2fs_drop_inmem_pages_all(struct f2fs_sb_info *sbi, bool gc_failure)
J
Jaegeuk Kim 已提交
281 282 283 284 285 286 287 288 289 290 291 292 293 294 295
{
	struct list_head *head = &sbi->inode_list[ATOMIC_FILE];
	struct inode *inode;
	struct f2fs_inode_info *fi;
next:
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (list_empty(head)) {
		spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
		return;
	}
	fi = list_first_entry(head, struct f2fs_inode_info, inmem_ilist);
	inode = igrab(&fi->vfs_inode);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);

	if (inode) {
296 297 298 299 300 301 302
		if (gc_failure) {
			if (fi->i_gc_failures[GC_FAILURE_ATOMIC])
				goto drop;
			goto skip;
		}
drop:
		set_inode_flag(inode, FI_ATOMIC_REVOKE_REQUEST);
C
Chao Yu 已提交
303
		f2fs_drop_inmem_pages(inode);
J
Jaegeuk Kim 已提交
304 305
		iput(inode);
	}
306
skip:
J
Jaegeuk Kim 已提交
307 308 309 310 311
	congestion_wait(BLK_RW_ASYNC, HZ/50);
	cond_resched();
	goto next;
}

C
Chao Yu 已提交
312
void f2fs_drop_inmem_pages(struct inode *inode)
313
{
J
Jaegeuk Kim 已提交
314
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
315 316 317
	struct f2fs_inode_info *fi = F2FS_I(inode);

	mutex_lock(&fi->inmem_lock);
318
	__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
J
Jaegeuk Kim 已提交
319 320 321 322
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (!list_empty(&fi->inmem_ilist))
		list_del_init(&fi->inmem_ilist);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
323
	mutex_unlock(&fi->inmem_lock);
C
Chao Yu 已提交
324 325

	clear_inode_flag(inode, FI_ATOMIC_FILE);
326
	fi->i_gc_failures[GC_FAILURE_ATOMIC] = 0;
C
Chao Yu 已提交
327
	stat_dec_atomic_write(inode);
328 329
}

C
Chao Yu 已提交
330
void f2fs_drop_inmem_page(struct inode *inode, struct page *page)
331 332 333 334 335 336 337 338 339 340 341 342 343 344
{
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct list_head *head = &fi->inmem_pages;
	struct inmem_pages *cur = NULL;

	f2fs_bug_on(sbi, !IS_ATOMIC_WRITTEN_PAGE(page));

	mutex_lock(&fi->inmem_lock);
	list_for_each_entry(cur, head, list) {
		if (cur->page == page)
			break;
	}

345
	f2fs_bug_on(sbi, list_empty(head) || cur->page != page);
346 347 348 349 350 351 352 353 354 355 356 357 358 359
	list_del(&cur->list);
	mutex_unlock(&fi->inmem_lock);

	dec_page_count(sbi, F2FS_INMEM_PAGES);
	kmem_cache_free(inmem_entry_slab, cur);

	ClearPageUptodate(page);
	set_page_private(page, 0);
	ClearPagePrivate(page);
	f2fs_put_page(page, 0);

	trace_f2fs_commit_inmem_page(page, INMEM_INVALIDATE);
}

C
Chao Yu 已提交
360
static int __f2fs_commit_inmem_pages(struct inode *inode)
J
Jaegeuk Kim 已提交
361 362 363 364 365
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct f2fs_inode_info *fi = F2FS_I(inode);
	struct inmem_pages *cur, *tmp;
	struct f2fs_io_info fio = {
366
		.sbi = sbi,
C
Chao Yu 已提交
367
		.ino = inode->i_ino,
J
Jaegeuk Kim 已提交
368
		.type = DATA,
M
Mike Christie 已提交
369
		.op = REQ_OP_WRITE,
370
		.op_flags = REQ_SYNC | REQ_PRIO,
C
Chao Yu 已提交
371
		.io_type = FS_DATA_IO,
J
Jaegeuk Kim 已提交
372
	};
C
Chao Yu 已提交
373
	struct list_head revoke_list;
374
	pgoff_t last_idx = ULONG_MAX;
375
	int err = 0;
J
Jaegeuk Kim 已提交
376

C
Chao Yu 已提交
377 378
	INIT_LIST_HEAD(&revoke_list);

J
Jaegeuk Kim 已提交
379
	list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
380 381 382 383 384 385 386 387
		struct page *page = cur->page;

		lock_page(page);
		if (page->mapping == inode->i_mapping) {
			trace_f2fs_commit_inmem_page(page, INMEM);

			set_page_dirty(page);
			f2fs_wait_on_page_writeback(page, DATA, true);
388
			if (clear_page_dirty_for_io(page)) {
389
				inode_dec_dirty_pages(inode);
C
Chao Yu 已提交
390
				f2fs_remove_dirty_inode(inode);
391
			}
392
retry:
393
			fio.page = page;
394
			fio.old_blkaddr = NULL_ADDR;
395
			fio.encrypted_page = NULL;
396
			fio.need_lock = LOCK_DONE;
C
Chao Yu 已提交
397
			err = f2fs_do_write_data_page(&fio);
398
			if (err) {
399 400 401 402 403
				if (err == -ENOMEM) {
					congestion_wait(BLK_RW_ASYNC, HZ/50);
					cond_resched();
					goto retry;
				}
404
				unlock_page(page);
405
				break;
406
			}
407 408
			/* record old blkaddr for revoking */
			cur->old_addr = fio.old_blkaddr;
409
			last_idx = page->index;
410 411
		}
		unlock_page(page);
C
Chao Yu 已提交
412
		list_move_tail(&cur->list, &revoke_list);
J
Jaegeuk Kim 已提交
413
	}
414

415
	if (last_idx != ULONG_MAX)
416
		f2fs_submit_merged_write_cond(sbi, inode, 0, last_idx, DATA);
417

C
Chao Yu 已提交
418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433
	if (err) {
		/*
		 * try to revoke all committed pages, but still we could fail
		 * due to no memory or other reason, if that happened, EAGAIN
		 * will be returned, which means in such case, transaction is
		 * already not integrity, caller should use journal to do the
		 * recovery or rewrite & commit last transaction. For other
		 * error number, revoking was done by filesystem itself.
		 */
		err = __revoke_inmem_pages(inode, &revoke_list, false, true);

		/* drop all uncommitted pages */
		__revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
	} else {
		__revoke_inmem_pages(inode, &revoke_list, false, false);
	}
434

435 436 437
	return err;
}

C
Chao Yu 已提交
438
int f2fs_commit_inmem_pages(struct inode *inode)
439 440 441
{
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
	struct f2fs_inode_info *fi = F2FS_I(inode);
442
	int err;
443 444 445

	f2fs_balance_fs(sbi, true);

446 447 448
	down_write(&fi->i_gc_rwsem[WRITE]);

	f2fs_lock_op(sbi);
C
Chao Yu 已提交
449 450
	set_inode_flag(inode, FI_ATOMIC_COMMIT);

451
	mutex_lock(&fi->inmem_lock);
C
Chao Yu 已提交
452
	err = __f2fs_commit_inmem_pages(inode);
453

J
Jaegeuk Kim 已提交
454 455 456 457
	spin_lock(&sbi->inode_lock[ATOMIC_FILE]);
	if (!list_empty(&fi->inmem_ilist))
		list_del_init(&fi->inmem_ilist);
	spin_unlock(&sbi->inode_lock[ATOMIC_FILE]);
J
Jaegeuk Kim 已提交
458 459
	mutex_unlock(&fi->inmem_lock);

C
Chao Yu 已提交
460 461
	clear_inode_flag(inode, FI_ATOMIC_COMMIT);

462
	f2fs_unlock_op(sbi);
463 464
	up_write(&fi->i_gc_rwsem[WRITE]);

465
	return err;
J
Jaegeuk Kim 已提交
466 467
}

J
Jaegeuk Kim 已提交
468
/*
J
Jaegeuk Kim 已提交
469 470 471
 * This function balances dirty node and dentry pages.
 * In addition, it controls garbage collection.
 */
J
Jaegeuk Kim 已提交
472
void f2fs_balance_fs(struct f2fs_sb_info *sbi, bool need)
J
Jaegeuk Kim 已提交
473
{
474 475
	if (time_to_inject(sbi, FAULT_CHECKPOINT)) {
		f2fs_show_injection_info(FAULT_CHECKPOINT);
476
		f2fs_stop_checkpoint(sbi, false);
477
	}
478

479
	/* balance_fs_bg is able to be pending */
J
Jaegeuk Kim 已提交
480
	if (need && excess_cached_nats(sbi))
481 482
		f2fs_balance_fs_bg(sbi);

J
Jaegeuk Kim 已提交
483
	/*
484 485
	 * We should do GC or end up with checkpoint, if there are so many dirty
	 * dir/node pages without enough free segments.
J
Jaegeuk Kim 已提交
486
	 */
487
	if (has_not_enough_free_secs(sbi, 0, 0)) {
J
Jaegeuk Kim 已提交
488
		mutex_lock(&sbi->gc_mutex);
489
		f2fs_gc(sbi, false, false, NULL_SEGNO);
J
Jaegeuk Kim 已提交
490 491 492
	}
}

493 494
void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
{
495 496 497
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		return;

C
Chao Yu 已提交
498
	/* try to shrink extent cache when there is no enough memory */
C
Chao Yu 已提交
499
	if (!f2fs_available_free_memory(sbi, EXTENT_CACHE))
J
Jaegeuk Kim 已提交
500
		f2fs_shrink_extent_tree(sbi, EXTENT_CACHE_SHRINK_NUMBER);
C
Chao Yu 已提交
501

J
Jaegeuk Kim 已提交
502
	/* check the # of cached NAT entries */
C
Chao Yu 已提交
503 504
	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES))
		f2fs_try_to_free_nats(sbi, NAT_ENTRY_PER_BLOCK);
J
Jaegeuk Kim 已提交
505

C
Chao Yu 已提交
506 507
	if (!f2fs_available_free_memory(sbi, FREE_NIDS))
		f2fs_try_to_free_nids(sbi, MAX_FREE_NIDS);
508
	else
C
Chao Yu 已提交
509
		f2fs_build_free_nids(sbi, false, false);
C
Chao Yu 已提交
510

511 512
	if (!is_idle(sbi) &&
		(!excess_dirty_nats(sbi) && !excess_dirty_nodes(sbi)))
J
Jaegeuk Kim 已提交
513
		return;
C
Chao Yu 已提交
514

J
Jaegeuk Kim 已提交
515
	/* checkpoint is the only way to shrink partial cached entries */
C
Chao Yu 已提交
516 517
	if (!f2fs_available_free_memory(sbi, NAT_ENTRIES) ||
			!f2fs_available_free_memory(sbi, INO_ENTRIES) ||
518 519
			excess_prefree_segs(sbi) ||
			excess_dirty_nats(sbi) ||
520
			excess_dirty_nodes(sbi) ||
J
Jaegeuk Kim 已提交
521
			f2fs_time_over(sbi, CP_TIME)) {
C
Chao Yu 已提交
522 523 524 525
		if (test_opt(sbi, DATA_FLUSH)) {
			struct blk_plug plug;

			blk_start_plug(&plug);
C
Chao Yu 已提交
526
			f2fs_sync_dirty_inodes(sbi, FILE_INODE);
C
Chao Yu 已提交
527 528
			blk_finish_plug(&plug);
		}
529
		f2fs_sync_fs(sbi->sb, true);
530
		stat_inc_bg_cp_count(sbi->stat_info);
C
Chao Yu 已提交
531
	}
532 533
}

534 535
static int __submit_flush_wait(struct f2fs_sb_info *sbi,
				struct block_device *bdev)
J
Jaegeuk Kim 已提交
536
{
537
	struct bio *bio = f2fs_bio_alloc(sbi, 0, true);
J
Jaegeuk Kim 已提交
538 539
	int ret;

540
	bio->bi_opf = REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH;
541
	bio_set_dev(bio, bdev);
J
Jaegeuk Kim 已提交
542 543
	ret = submit_bio_wait(bio);
	bio_put(bio);
544 545 546

	trace_f2fs_issue_flush(bdev, test_opt(sbi, NOBARRIER),
				test_opt(sbi, FLUSH_MERGE), ret);
J
Jaegeuk Kim 已提交
547 548 549
	return ret;
}

C
Chao Yu 已提交
550
static int submit_flush_wait(struct f2fs_sb_info *sbi, nid_t ino)
J
Jaegeuk Kim 已提交
551
{
C
Chao Yu 已提交
552
	int ret = 0;
J
Jaegeuk Kim 已提交
553 554
	int i;

C
Chao Yu 已提交
555 556
	if (!sbi->s_ndevs)
		return __submit_flush_wait(sbi, sbi->sb->s_bdev);
557

C
Chao Yu 已提交
558
	for (i = 0; i < sbi->s_ndevs; i++) {
C
Chao Yu 已提交
559
		if (!f2fs_is_dirty_device(sbi, ino, i, FLUSH_INO))
C
Chao Yu 已提交
560
			continue;
561 562 563
		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
		if (ret)
			break;
J
Jaegeuk Kim 已提交
564 565 566 567
	}
	return ret;
}

568
static int issue_flush_thread(void *data)
569 570
{
	struct f2fs_sb_info *sbi = data;
571
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
572
	wait_queue_head_t *q = &fcc->flush_wait_queue;
573 574 575 576
repeat:
	if (kthread_should_stop())
		return 0;

577 578
	sb_start_intwrite(sbi->sb);

579
	if (!llist_empty(&fcc->issue_list)) {
580 581 582
		struct flush_cmd *cmd, *next;
		int ret;

583 584 585
		fcc->dispatch_list = llist_del_all(&fcc->issue_list);
		fcc->dispatch_list = llist_reverse_order(fcc->dispatch_list);

C
Chao Yu 已提交
586 587 588
		cmd = llist_entry(fcc->dispatch_list, struct flush_cmd, llnode);

		ret = submit_flush_wait(sbi, cmd->ino);
C
Chao Yu 已提交
589 590
		atomic_inc(&fcc->issued_flush);

591 592
		llist_for_each_entry_safe(cmd, next,
					  fcc->dispatch_list, llnode) {
593 594 595
			cmd->ret = ret;
			complete(&cmd->wait);
		}
596
		fcc->dispatch_list = NULL;
597 598
	}

599 600
	sb_end_intwrite(sbi->sb);

601
	wait_event_interruptible(*q,
602
		kthread_should_stop() || !llist_empty(&fcc->issue_list));
603 604 605
	goto repeat;
}

C
Chao Yu 已提交
606
int f2fs_issue_flush(struct f2fs_sb_info *sbi, nid_t ino)
607
{
608
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
609
	struct flush_cmd cmd;
C
Chao Yu 已提交
610
	int ret;
611

J
Jaegeuk Kim 已提交
612 613 614
	if (test_opt(sbi, NOBARRIER))
		return 0;

C
Chao Yu 已提交
615
	if (!test_opt(sbi, FLUSH_MERGE)) {
C
Chao Yu 已提交
616
		ret = submit_flush_wait(sbi, ino);
C
Chao Yu 已提交
617 618 619
		atomic_inc(&fcc->issued_flush);
		return ret;
	}
J
Jaegeuk Kim 已提交
620

C
Chao Yu 已提交
621 622
	if (atomic_inc_return(&fcc->issing_flush) == 1 || sbi->s_ndevs > 1) {
		ret = submit_flush_wait(sbi, ino);
C
Chao Yu 已提交
623 624 625
		atomic_dec(&fcc->issing_flush);

		atomic_inc(&fcc->issued_flush);
J
Jaegeuk Kim 已提交
626 627
		return ret;
	}
628

C
Chao Yu 已提交
629
	cmd.ino = ino;
630
	init_completion(&cmd.wait);
631

632
	llist_add(&cmd.llnode, &fcc->issue_list);
633

634 635 636 637
	/* update issue_list before we wake up issue_flush thread */
	smp_mb();

	if (waitqueue_active(&fcc->flush_wait_queue))
638
		wake_up(&fcc->flush_wait_queue);
639

640 641
	if (fcc->f2fs_issue_flush) {
		wait_for_completion(&cmd.wait);
C
Chao Yu 已提交
642
		atomic_dec(&fcc->issing_flush);
643
	} else {
644 645 646 647 648 649 650 651 652
		struct llist_node *list;

		list = llist_del_all(&fcc->issue_list);
		if (!list) {
			wait_for_completion(&cmd.wait);
			atomic_dec(&fcc->issing_flush);
		} else {
			struct flush_cmd *tmp, *next;

C
Chao Yu 已提交
653
			ret = submit_flush_wait(sbi, ino);
654 655 656 657 658 659 660 661 662 663 664

			llist_for_each_entry_safe(tmp, next, list, llnode) {
				if (tmp == &cmd) {
					cmd.ret = ret;
					atomic_dec(&fcc->issing_flush);
					continue;
				}
				tmp->ret = ret;
				complete(&tmp->wait);
			}
		}
665
	}
666 667

	return cmd.ret;
668 669
}

C
Chao Yu 已提交
670
int f2fs_create_flush_cmd_control(struct f2fs_sb_info *sbi)
671 672 673 674 675
{
	dev_t dev = sbi->sb->s_bdev->bd_dev;
	struct flush_cmd_control *fcc;
	int err = 0;

676 677
	if (SM_I(sbi)->fcc_info) {
		fcc = SM_I(sbi)->fcc_info;
678 679
		if (fcc->f2fs_issue_flush)
			return err;
680 681 682
		goto init_thread;
	}

C
Chao Yu 已提交
683
	fcc = f2fs_kzalloc(sbi, sizeof(struct flush_cmd_control), GFP_KERNEL);
684 685
	if (!fcc)
		return -ENOMEM;
C
Chao Yu 已提交
686 687
	atomic_set(&fcc->issued_flush, 0);
	atomic_set(&fcc->issing_flush, 0);
688
	init_waitqueue_head(&fcc->flush_wait_queue);
689
	init_llist_head(&fcc->issue_list);
690
	SM_I(sbi)->fcc_info = fcc;
691 692 693
	if (!test_opt(sbi, FLUSH_MERGE))
		return err;

694
init_thread:
695 696 697 698 699
	fcc->f2fs_issue_flush = kthread_run(issue_flush_thread, sbi,
				"f2fs_flush-%u:%u", MAJOR(dev), MINOR(dev));
	if (IS_ERR(fcc->f2fs_issue_flush)) {
		err = PTR_ERR(fcc->f2fs_issue_flush);
		kfree(fcc);
700
		SM_I(sbi)->fcc_info = NULL;
701 702 703 704 705 706
		return err;
	}

	return err;
}

C
Chao Yu 已提交
707
void f2fs_destroy_flush_cmd_control(struct f2fs_sb_info *sbi, bool free)
708
{
709
	struct flush_cmd_control *fcc = SM_I(sbi)->fcc_info;
710

711 712 713 714 715 716 717 718
	if (fcc && fcc->f2fs_issue_flush) {
		struct task_struct *flush_thread = fcc->f2fs_issue_flush;

		fcc->f2fs_issue_flush = NULL;
		kthread_stop(flush_thread);
	}
	if (free) {
		kfree(fcc);
719
		SM_I(sbi)->fcc_info = NULL;
720
	}
721 722
}

723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
int f2fs_flush_device_cache(struct f2fs_sb_info *sbi)
{
	int ret = 0, i;

	if (!sbi->s_ndevs)
		return 0;

	for (i = 1; i < sbi->s_ndevs; i++) {
		if (!f2fs_test_bit(i, (char *)&sbi->dirty_device))
			continue;
		ret = __submit_flush_wait(sbi, FDEV(i).bdev);
		if (ret)
			break;

		spin_lock(&sbi->dev_lock);
		f2fs_clear_bit(i, (char *)&sbi->dirty_device);
		spin_unlock(&sbi->dev_lock);
	}

	return ret;
}

J
Jaegeuk Kim 已提交
745 746 747 748 749 750 751 752 753 754 755 756 757 758
static void __locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	/* need not be added */
	if (IS_CURSEG(sbi, segno))
		return;

	if (!test_and_set_bit(segno, dirty_i->dirty_segmap[dirty_type]))
		dirty_i->nr_dirty[dirty_type]++;

	if (dirty_type == DIRTY) {
		struct seg_entry *sentry = get_seg_entry(sbi, segno);
759
		enum dirty_type t = sentry->type;
760

761 762 763 764
		if (unlikely(t >= DIRTY)) {
			f2fs_bug_on(sbi, 1);
			return;
		}
765 766
		if (!test_and_set_bit(segno, dirty_i->dirty_segmap[t]))
			dirty_i->nr_dirty[t]++;
J
Jaegeuk Kim 已提交
767 768 769 770 771 772 773 774 775 776 777 778
	}
}

static void __remove_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	if (test_and_clear_bit(segno, dirty_i->dirty_segmap[dirty_type]))
		dirty_i->nr_dirty[dirty_type]--;

	if (dirty_type == DIRTY) {
779 780 781 782 783
		struct seg_entry *sentry = get_seg_entry(sbi, segno);
		enum dirty_type t = sentry->type;

		if (test_and_clear_bit(segno, dirty_i->dirty_segmap[t]))
			dirty_i->nr_dirty[t]--;
784

785
		if (get_valid_blocks(sbi, segno, true) == 0)
786
			clear_bit(GET_SEC_FROM_SEG(sbi, segno),
787
						dirty_i->victim_secmap);
J
Jaegeuk Kim 已提交
788 789 790
	}
}

J
Jaegeuk Kim 已提交
791
/*
J
Jaegeuk Kim 已提交
792 793 794 795
 * Should not occur error such as -ENOMEM.
 * Adding dirty entry into seglist is not critical operation.
 * If a given segment is one of current working segments, it won't be added.
 */
796
static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
797 798 799 800 801 802 803 804 805
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	unsigned short valid_blocks;

	if (segno == NULL_SEGNO || IS_CURSEG(sbi, segno))
		return;

	mutex_lock(&dirty_i->seglist_lock);

806
	valid_blocks = get_valid_blocks(sbi, segno, false);
J
Jaegeuk Kim 已提交
807 808 809 810 811 812 813 814 815 816 817 818 819 820

	if (valid_blocks == 0) {
		__locate_dirty_segment(sbi, segno, PRE);
		__remove_dirty_segment(sbi, segno, DIRTY);
	} else if (valid_blocks < sbi->blocks_per_seg) {
		__locate_dirty_segment(sbi, segno, DIRTY);
	} else {
		/* Recovery routine with SSR needs this */
		__remove_dirty_segment(sbi, segno, DIRTY);
	}

	mutex_unlock(&dirty_i->seglist_lock);
}

821
static struct discard_cmd *__create_discard_cmd(struct f2fs_sb_info *sbi,
822 823
		struct block_device *bdev, block_t lstart,
		block_t start, block_t len)
C
Chao Yu 已提交
824
{
825
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
826
	struct list_head *pend_list;
827
	struct discard_cmd *dc;
C
Chao Yu 已提交
828

C
Chao Yu 已提交
829 830 831 832
	f2fs_bug_on(sbi, !len);

	pend_list = &dcc->pend_list[plist_idx(len)];

833 834
	dc = f2fs_kmem_cache_alloc(discard_cmd_slab, GFP_NOFS);
	INIT_LIST_HEAD(&dc->list);
835
	dc->bdev = bdev;
836
	dc->lstart = lstart;
837
	dc->start = start;
838
	dc->len = len;
839
	dc->ref = 0;
840
	dc->state = D_PREP;
841
	dc->issuing = 0;
842
	dc->error = 0;
843
	init_completion(&dc->wait);
C
Chao Yu 已提交
844
	list_add_tail(&dc->list, pend_list);
845 846
	spin_lock_init(&dc->lock);
	dc->bio_ref = 0;
C
Chao Yu 已提交
847
	atomic_inc(&dcc->discard_cmd_cnt);
C
Chao Yu 已提交
848
	dcc->undiscard_blks += len;
849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866

	return dc;
}

static struct discard_cmd *__attach_discard_cmd(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len,
				struct rb_node *parent, struct rb_node **p)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *dc;

	dc = __create_discard_cmd(sbi, bdev, lstart, start, len);

	rb_link_node(&dc->rb_node, parent, p);
	rb_insert_color(&dc->rb_node, &dcc->root);

	return dc;
867 868
}

869 870
static void __detach_discard_cmd(struct discard_cmd_control *dcc,
							struct discard_cmd *dc)
871
{
872
	if (dc->state == D_DONE)
873
		atomic_sub(dc->issuing, &dcc->issing_discard);
874 875 876

	list_del(&dc->list);
	rb_erase(&dc->rb_node, &dcc->root);
C
Chao Yu 已提交
877
	dcc->undiscard_blks -= dc->len;
878 879 880 881 882 883 884 885 886 887

	kmem_cache_free(discard_cmd_slab, dc);

	atomic_dec(&dcc->discard_cmd_cnt);
}

static void __remove_discard_cmd(struct f2fs_sb_info *sbi,
							struct discard_cmd *dc)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
888
	unsigned long flags;
889

C
Chao Yu 已提交
890 891
	trace_f2fs_remove_discard(dc->bdev, dc->start, dc->len);

892 893 894 895 896 897 898
	spin_lock_irqsave(&dc->lock, flags);
	if (dc->bio_ref) {
		spin_unlock_irqrestore(&dc->lock, flags);
		return;
	}
	spin_unlock_irqrestore(&dc->lock, flags);

899 900
	f2fs_bug_on(sbi, dc->ref);

901 902
	if (dc->error == -EOPNOTSUPP)
		dc->error = 0;
903

904
	if (dc->error)
905 906 907
		printk_ratelimited(
			"%sF2FS-fs: Issue discard(%u, %u, %u) failed, ret: %d",
			KERN_INFO, dc->lstart, dc->start, dc->len, dc->error);
908
	__detach_discard_cmd(dcc, dc);
C
Chao Yu 已提交
909 910
}

911 912 913
static void f2fs_submit_discard_endio(struct bio *bio)
{
	struct discard_cmd *dc = (struct discard_cmd *)bio->bi_private;
914
	unsigned long flags;
915

916
	dc->error = blk_status_to_errno(bio->bi_status);
917 918 919 920 921 922 923 924

	spin_lock_irqsave(&dc->lock, flags);
	dc->bio_ref--;
	if (!dc->bio_ref && dc->state == D_SUBMIT) {
		dc->state = D_DONE;
		complete_all(&dc->wait);
	}
	spin_unlock_irqrestore(&dc->lock, flags);
925 926 927
	bio_put(bio);
}

W
Wei Yongjun 已提交
928
static void __check_sit_bitmap(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
929 930 931 932 933 934 935 936 937 938 939 940 941 942
				block_t start, block_t end)
{
#ifdef CONFIG_F2FS_CHECK_FS
	struct seg_entry *sentry;
	unsigned int segno;
	block_t blk = start;
	unsigned long offset, size, max_blocks = sbi->blocks_per_seg;
	unsigned long *map;

	while (blk < end) {
		segno = GET_SEGNO(sbi, blk);
		sentry = get_seg_entry(sbi, segno);
		offset = GET_BLKOFF_FROM_SEG0(sbi, blk);

943 944 945 946
		if (end < START_BLOCK(sbi, segno + 1))
			size = GET_BLKOFF_FROM_SEG0(sbi, end);
		else
			size = max_blocks;
C
Chao Yu 已提交
947 948 949
		map = (unsigned long *)(sentry->cur_valid_map);
		offset = __find_rev_next_bit(map, size, offset);
		f2fs_bug_on(sbi, offset != size);
950
		blk = START_BLOCK(sbi, segno + 1);
C
Chao Yu 已提交
951 952 953 954
	}
#endif
}

955 956 957 958 959 960 961
static void __init_discard_policy(struct f2fs_sb_info *sbi,
				struct discard_policy *dpolicy,
				int discard_type, unsigned int granularity)
{
	/* common policy */
	dpolicy->type = discard_type;
	dpolicy->sync = true;
C
Chao Yu 已提交
962
	dpolicy->ordered = false;
963 964 965 966 967 968 969
	dpolicy->granularity = granularity;

	dpolicy->max_requests = DEF_MAX_DISCARD_REQUEST;
	dpolicy->io_aware_gran = MAX_PLIST_NUM;

	if (discard_type == DPOLICY_BG) {
		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
970
		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
971 972
		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
		dpolicy->io_aware = true;
973
		dpolicy->sync = false;
C
Chao Yu 已提交
974
		dpolicy->ordered = true;
975 976 977 978 979 980
		if (utilization(sbi) > DEF_DISCARD_URGENT_UTIL) {
			dpolicy->granularity = 1;
			dpolicy->max_interval = DEF_MIN_DISCARD_ISSUE_TIME;
		}
	} else if (discard_type == DPOLICY_FORCE) {
		dpolicy->min_interval = DEF_MIN_DISCARD_ISSUE_TIME;
981
		dpolicy->mid_interval = DEF_MID_DISCARD_ISSUE_TIME;
982 983 984 985 986
		dpolicy->max_interval = DEF_MAX_DISCARD_ISSUE_TIME;
		dpolicy->io_aware = false;
	} else if (discard_type == DPOLICY_FSTRIM) {
		dpolicy->io_aware = false;
	} else if (discard_type == DPOLICY_UMOUNT) {
987
		dpolicy->max_requests = UINT_MAX;
988 989 990 991
		dpolicy->io_aware = false;
	}
}

992 993 994
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len);
995
/* this function is copied from blkdev_issue_discard from block/blk-lib.c */
996
static int __submit_discard_cmd(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
997
						struct discard_policy *dpolicy,
998 999
						struct discard_cmd *dc,
						unsigned int *issued)
1000
{
1001 1002 1003 1004
	struct block_device *bdev = dc->bdev;
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_discard_blocks =
			SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1005
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1006 1007 1008
	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
					&(dcc->fstrim_list) : &(dcc->wait_list);
	int flag = dpolicy->sync ? REQ_SYNC : 0;
1009 1010
	block_t lstart, start, len, total_len;
	int err = 0;
1011 1012

	if (dc->state != D_PREP)
1013
		return 0;
1014

1015
	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
1016
		return 0;
1017

1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037 1038 1039 1040 1041 1042
	trace_f2fs_issue_discard(bdev, dc->start, dc->len);

	lstart = dc->lstart;
	start = dc->start;
	len = dc->len;
	total_len = len;

	dc->len = 0;

	while (total_len && *issued < dpolicy->max_requests && !err) {
		struct bio *bio = NULL;
		unsigned long flags;
		bool last = true;

		if (len > max_discard_blocks) {
			len = max_discard_blocks;
			last = false;
		}

		(*issued)++;
		if (*issued == dpolicy->max_requests)
			last = true;

		dc->len += len;

1043 1044 1045 1046 1047
		if (time_to_inject(sbi, FAULT_DISCARD)) {
			f2fs_show_injection_info(FAULT_DISCARD);
			err = -EIO;
			goto submit;
		}
1048 1049 1050 1051
		err = __blkdev_issue_discard(bdev,
					SECTOR_FROM_BLOCK(start),
					SECTOR_FROM_BLOCK(len),
					GFP_NOFS, 0, &bio);
1052
submit:
1053
		if (err) {
1054
			spin_lock_irqsave(&dc->lock, flags);
1055
			if (dc->state == D_PARTIAL)
1056 1057 1058
				dc->state = D_SUBMIT;
			spin_unlock_irqrestore(&dc->lock, flags);

1059 1060
			break;
		}
1061

1062
		f2fs_bug_on(sbi, !bio);
1063

1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074
		/*
		 * should keep before submission to avoid D_DONE
		 * right away
		 */
		spin_lock_irqsave(&dc->lock, flags);
		if (last)
			dc->state = D_SUBMIT;
		else
			dc->state = D_PARTIAL;
		dc->bio_ref++;
		spin_unlock_irqrestore(&dc->lock, flags);
1075

1076 1077 1078
		atomic_inc(&dcc->issing_discard);
		dc->issuing++;
		list_move_tail(&dc->list, wait_list);
C
Chao Yu 已提交
1079

1080 1081
		/* sanity check on discard range */
		__check_sit_bitmap(sbi, start, start + len);
1082

1083 1084 1085 1086 1087 1088 1089 1090
		bio->bi_private = dc;
		bio->bi_end_io = f2fs_submit_discard_endio;
		bio->bi_opf |= flag;
		submit_bio(bio);

		atomic_inc(&dcc->issued_discard);

		f2fs_update_iostat(sbi, FS_DISCARD, 1);
1091 1092 1093 1094 1095

		lstart += len;
		start += len;
		total_len -= len;
		len = total_len;
1096
	}
1097

1098
	if (!err && len)
1099
		__update_discard_tree_range(sbi, bdev, lstart, start, len);
1100
	return err;
1101 1102
}

1103 1104 1105 1106 1107
static struct discard_cmd *__insert_discard_tree(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len,
				struct rb_node **insert_p,
				struct rb_node *insert_parent)
1108
{
1109
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1110
	struct rb_node **p;
1111 1112 1113 1114 1115 1116 1117 1118
	struct rb_node *parent = NULL;
	struct discard_cmd *dc = NULL;

	if (insert_p && insert_parent) {
		parent = insert_parent;
		p = insert_p;
		goto do_insert;
	}
1119

C
Chao Yu 已提交
1120
	p = f2fs_lookup_rb_tree_for_insert(sbi, &dcc->root, &parent, lstart);
1121 1122 1123 1124
do_insert:
	dc = __attach_discard_cmd(sbi, bdev, lstart, start, len, parent, p);
	if (!dc)
		return NULL;
1125

1126
	return dc;
1127 1128
}

C
Chao Yu 已提交
1129 1130 1131 1132 1133 1134
static void __relocate_discard_cmd(struct discard_cmd_control *dcc,
						struct discard_cmd *dc)
{
	list_move_tail(&dc->list, &dcc->pend_list[plist_idx(dc->len)]);
}

1135 1136 1137
static void __punch_discard_cmd(struct f2fs_sb_info *sbi,
				struct discard_cmd *dc, block_t blkaddr)
{
C
Chao Yu 已提交
1138
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1139 1140
	struct discard_info di = dc->di;
	bool modified = false;
1141

1142
	if (dc->state == D_DONE || dc->len == 1) {
1143 1144 1145 1146
		__remove_discard_cmd(sbi, dc);
		return;
	}

C
Chao Yu 已提交
1147 1148
	dcc->undiscard_blks -= di.len;

1149
	if (blkaddr > di.lstart) {
1150
		dc->len = blkaddr - dc->lstart;
C
Chao Yu 已提交
1151
		dcc->undiscard_blks += dc->len;
C
Chao Yu 已提交
1152
		__relocate_discard_cmd(dcc, dc);
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
		modified = true;
	}

	if (blkaddr < di.lstart + di.len - 1) {
		if (modified) {
			__insert_discard_tree(sbi, dc->bdev, blkaddr + 1,
					di.start + blkaddr + 1 - di.lstart,
					di.lstart + di.len - 1 - blkaddr,
					NULL, NULL);
		} else {
			dc->lstart++;
			dc->len--;
			dc->start++;
C
Chao Yu 已提交
1166
			dcc->undiscard_blks += dc->len;
C
Chao Yu 已提交
1167
			__relocate_discard_cmd(dcc, dc);
1168
		}
1169 1170 1171
	}
}

1172 1173 1174
static void __update_discard_tree_range(struct f2fs_sb_info *sbi,
				struct block_device *bdev, block_t lstart,
				block_t start, block_t len)
C
Chao Yu 已提交
1175
{
1176
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1177 1178 1179 1180
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct discard_cmd *dc;
	struct discard_info di = {0};
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
1181 1182 1183
	struct request_queue *q = bdev_get_queue(bdev);
	unsigned int max_discard_blocks =
			SECTOR_TO_BLOCK(q->limits.max_discard_sectors);
1184
	block_t end = lstart + len;
C
Chao Yu 已提交
1185

C
Chao Yu 已提交
1186
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198
					NULL, lstart,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
					&insert_p, &insert_parent, true);
	if (dc)
		prev_dc = dc;

	if (!prev_dc) {
		di.lstart = lstart;
		di.len = next_dc ? next_dc->lstart - lstart : len;
		di.len = min(di.len, len);
		di.start = start;
C
Chao Yu 已提交
1199
	}
1200

1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213 1214 1215 1216 1217 1218 1219 1220 1221 1222 1223 1224
	while (1) {
		struct rb_node *node;
		bool merged = false;
		struct discard_cmd *tdc = NULL;

		if (prev_dc) {
			di.lstart = prev_dc->lstart + prev_dc->len;
			if (di.lstart < lstart)
				di.lstart = lstart;
			if (di.lstart >= end)
				break;

			if (!next_dc || next_dc->lstart > end)
				di.len = end - di.lstart;
			else
				di.len = next_dc->lstart - di.lstart;
			di.start = start + di.lstart - lstart;
		}

		if (!di.len)
			goto next;

		if (prev_dc && prev_dc->state == D_PREP &&
			prev_dc->bdev == bdev &&
1225 1226
			__is_discard_back_mergeable(&di, &prev_dc->di,
							max_discard_blocks)) {
1227
			prev_dc->di.len += di.len;
C
Chao Yu 已提交
1228
			dcc->undiscard_blks += di.len;
C
Chao Yu 已提交
1229
			__relocate_discard_cmd(dcc, prev_dc);
1230 1231 1232 1233 1234 1235 1236
			di = prev_dc->di;
			tdc = prev_dc;
			merged = true;
		}

		if (next_dc && next_dc->state == D_PREP &&
			next_dc->bdev == bdev &&
1237 1238
			__is_discard_front_mergeable(&di, &next_dc->di,
							max_discard_blocks)) {
1239 1240 1241
			next_dc->di.lstart = di.lstart;
			next_dc->di.len += di.len;
			next_dc->di.start = di.start;
C
Chao Yu 已提交
1242
			dcc->undiscard_blks += di.len;
C
Chao Yu 已提交
1243
			__relocate_discard_cmd(dcc, next_dc);
1244 1245 1246
			if (tdc)
				__remove_discard_cmd(sbi, tdc);
			merged = true;
1247
		}
1248

1249
		if (!merged) {
1250 1251
			__insert_discard_tree(sbi, bdev, di.lstart, di.start,
							di.len, NULL, NULL);
1252
		}
1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267
 next:
		prev_dc = next_dc;
		if (!prev_dc)
			break;

		node = rb_next(&prev_dc->rb_node);
		next_dc = rb_entry_safe(node, struct discard_cmd, rb_node);
	}
}

static int __queue_discard_cmd(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
{
	block_t lblkstart = blkstart;

C
Chao Yu 已提交
1268
	trace_f2fs_queue_discard(bdev, blkstart, blklen);
1269 1270 1271 1272 1273 1274

	if (sbi->s_ndevs) {
		int devi = f2fs_target_device_index(sbi, blkstart);

		blkstart -= FDEV(devi).start_blk;
	}
1275
	mutex_lock(&SM_I(sbi)->dcc_info->cmd_lock);
1276
	__update_discard_tree_range(sbi, bdev, lblkstart, blkstart, blklen);
1277
	mutex_unlock(&SM_I(sbi)->dcc_info->cmd_lock);
1278 1279 1280
	return 0;
}

C
Chao Yu 已提交
1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305
static unsigned int __issue_discard_cmd_orderly(struct f2fs_sb_info *sbi,
					struct discard_policy *dpolicy)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
	struct discard_cmd *dc;
	struct blk_plug plug;
	unsigned int pos = dcc->next_pos;
	unsigned int issued = 0;
	bool io_interrupted = false;

	mutex_lock(&dcc->cmd_lock);
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
					NULL, pos,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
					&insert_p, &insert_parent, true);
	if (!dc)
		dc = next_dc;

	blk_start_plug(&plug);

	while (dc) {
		struct rb_node *node;
1306
		int err = 0;
C
Chao Yu 已提交
1307 1308 1309 1310 1311 1312 1313 1314 1315 1316

		if (dc->state != D_PREP)
			goto next;

		if (dpolicy->io_aware && !is_idle(sbi)) {
			io_interrupted = true;
			break;
		}

		dcc->next_pos = dc->lstart + dc->len;
1317
		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
C
Chao Yu 已提交
1318

1319
		if (issued >= dpolicy->max_requests)
C
Chao Yu 已提交
1320 1321 1322
			break;
next:
		node = rb_next(&dc->rb_node);
1323 1324
		if (err)
			__remove_discard_cmd(sbi, dc);
C
Chao Yu 已提交
1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335 1336 1337 1338 1339 1340
		dc = rb_entry_safe(node, struct discard_cmd, rb_node);
	}

	blk_finish_plug(&plug);

	if (!dc)
		dcc->next_pos = 0;

	mutex_unlock(&dcc->cmd_lock);

	if (!issued && io_interrupted)
		issued = -1;

	return issued;
}

C
Chao Yu 已提交
1341 1342
static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
					struct discard_policy *dpolicy)
C
Chao Yu 已提交
1343 1344 1345 1346 1347
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *pend_list;
	struct discard_cmd *dc, *tmp;
	struct blk_plug plug;
1348
	int i, issued = 0;
1349
	bool io_interrupted = false;
C
Chao Yu 已提交
1350

C
Chao Yu 已提交
1351 1352 1353
	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
		if (i + 1 < dpolicy->granularity)
			break;
C
Chao Yu 已提交
1354 1355 1356 1357

		if (i < DEFAULT_DISCARD_GRANULARITY && dpolicy->ordered)
			return __issue_discard_cmd_orderly(sbi, dpolicy);

C
Chao Yu 已提交
1358
		pend_list = &dcc->pend_list[i];
1359 1360

		mutex_lock(&dcc->cmd_lock);
1361 1362
		if (list_empty(pend_list))
			goto next;
1363 1364 1365
		if (unlikely(dcc->rbtree_check))
			f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
								&dcc->root));
1366
		blk_start_plug(&plug);
C
Chao Yu 已提交
1367 1368 1369
		list_for_each_entry_safe(dc, tmp, pend_list, list) {
			f2fs_bug_on(sbi, dc->state != D_PREP);

C
Chao Yu 已提交
1370 1371
			if (dpolicy->io_aware && i < dpolicy->io_aware_gran &&
								!is_idle(sbi)) {
1372
				io_interrupted = true;
1373
				break;
1374
			}
1375

1376
			__submit_discard_cmd(sbi, dpolicy, dc, &issued);
1377

1378
			if (issued >= dpolicy->max_requests)
1379
				break;
C
Chao Yu 已提交
1380
		}
1381
		blk_finish_plug(&plug);
1382
next:
1383 1384
		mutex_unlock(&dcc->cmd_lock);

1385
		if (issued >= dpolicy->max_requests || io_interrupted)
1386
			break;
C
Chao Yu 已提交
1387
	}
1388

1389 1390 1391
	if (!issued && io_interrupted)
		issued = -1;

1392 1393 1394
	return issued;
}

1395
static bool __drop_discard_cmd(struct f2fs_sb_info *sbi)
1396 1397 1398 1399 1400
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *pend_list;
	struct discard_cmd *dc, *tmp;
	int i;
1401
	bool dropped = false;
1402 1403 1404 1405 1406 1407 1408

	mutex_lock(&dcc->cmd_lock);
	for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
		pend_list = &dcc->pend_list[i];
		list_for_each_entry_safe(dc, tmp, pend_list, list) {
			f2fs_bug_on(sbi, dc->state != D_PREP);
			__remove_discard_cmd(sbi, dc);
1409
			dropped = true;
1410 1411 1412
		}
	}
	mutex_unlock(&dcc->cmd_lock);
1413 1414

	return dropped;
C
Chao Yu 已提交
1415 1416
}

C
Chao Yu 已提交
1417
void f2fs_drop_discard_cmd(struct f2fs_sb_info *sbi)
1418 1419 1420 1421
{
	__drop_discard_cmd(sbi);
}

1422
static unsigned int __wait_one_discard_bio(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1423 1424 1425
							struct discard_cmd *dc)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
1426
	unsigned int len = 0;
C
Chao Yu 已提交
1427 1428 1429 1430 1431

	wait_for_completion_io(&dc->wait);
	mutex_lock(&dcc->cmd_lock);
	f2fs_bug_on(sbi, dc->state != D_DONE);
	dc->ref--;
1432 1433 1434
	if (!dc->ref) {
		if (!dc->error)
			len = dc->len;
C
Chao Yu 已提交
1435
		__remove_discard_cmd(sbi, dc);
1436
	}
C
Chao Yu 已提交
1437
	mutex_unlock(&dcc->cmd_lock);
1438 1439

	return len;
C
Chao Yu 已提交
1440 1441
}

1442
static unsigned int __wait_discard_cmd_range(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1443 1444
						struct discard_policy *dpolicy,
						block_t start, block_t end)
C
Chao Yu 已提交
1445 1446
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1447 1448
	struct list_head *wait_list = (dpolicy->type == DPOLICY_FSTRIM) ?
					&(dcc->fstrim_list) : &(dcc->wait_list);
C
Chao Yu 已提交
1449
	struct discard_cmd *dc, *tmp;
1450
	bool need_wait;
1451
	unsigned int trimmed = 0;
1452 1453 1454

next:
	need_wait = false;
C
Chao Yu 已提交
1455 1456 1457

	mutex_lock(&dcc->cmd_lock);
	list_for_each_entry_safe(dc, tmp, wait_list, list) {
1458 1459
		if (dc->lstart + dc->len <= start || end <= dc->lstart)
			continue;
C
Chao Yu 已提交
1460
		if (dc->len < dpolicy->granularity)
1461
			continue;
C
Chao Yu 已提交
1462
		if (dc->state == D_DONE && !dc->ref) {
C
Chao Yu 已提交
1463
			wait_for_completion_io(&dc->wait);
1464 1465
			if (!dc->error)
				trimmed += dc->len;
C
Chao Yu 已提交
1466
			__remove_discard_cmd(sbi, dc);
1467 1468 1469 1470
		} else {
			dc->ref++;
			need_wait = true;
			break;
C
Chao Yu 已提交
1471 1472 1473
		}
	}
	mutex_unlock(&dcc->cmd_lock);
1474 1475

	if (need_wait) {
1476
		trimmed += __wait_one_discard_bio(sbi, dc);
1477 1478
		goto next;
	}
1479 1480

	return trimmed;
C
Chao Yu 已提交
1481 1482
}

1483
static unsigned int __wait_all_discard_cmd(struct f2fs_sb_info *sbi,
C
Chao Yu 已提交
1484
						struct discard_policy *dpolicy)
1485
{
1486
	struct discard_policy dp;
1487
	unsigned int discard_blks;
1488

1489 1490
	if (dpolicy)
		return __wait_discard_cmd_range(sbi, dpolicy, 0, UINT_MAX);
1491 1492

	/* wait all */
1493
	__init_discard_policy(sbi, &dp, DPOLICY_FSTRIM, 1);
1494
	discard_blks = __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);
1495
	__init_discard_policy(sbi, &dp, DPOLICY_UMOUNT, 1);
1496 1497 1498
	discard_blks += __wait_discard_cmd_range(sbi, &dp, 0, UINT_MAX);

	return discard_blks;
1499 1500
}

1501
/* This should be covered by global mutex, &sit_i->sentry_lock */
W
Wei Yongjun 已提交
1502
static void f2fs_wait_discard_bio(struct f2fs_sb_info *sbi, block_t blkaddr)
1503 1504 1505
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *dc;
1506
	bool need_wait = false;
1507 1508

	mutex_lock(&dcc->cmd_lock);
C
Chao Yu 已提交
1509 1510
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree(&dcc->root,
							NULL, blkaddr);
1511
	if (dc) {
1512 1513 1514 1515 1516 1517
		if (dc->state == D_PREP) {
			__punch_discard_cmd(sbi, dc, blkaddr);
		} else {
			dc->ref++;
			need_wait = true;
		}
C
Chao Yu 已提交
1518
	}
C
Chao Yu 已提交
1519
	mutex_unlock(&dcc->cmd_lock);
1520

C
Chao Yu 已提交
1521 1522
	if (need_wait)
		__wait_one_discard_bio(sbi, dc);
C
Chao Yu 已提交
1523 1524
}

C
Chao Yu 已提交
1525
void f2fs_stop_discard_thread(struct f2fs_sb_info *sbi)
1526 1527 1528 1529 1530 1531 1532 1533
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;

	if (dcc && dcc->f2fs_issue_discard) {
		struct task_struct *discard_thread = dcc->f2fs_issue_discard;

		dcc->f2fs_issue_discard = NULL;
		kthread_stop(discard_thread);
1534
	}
C
Chao Yu 已提交
1535 1536
}

1537
/* This comes from f2fs_put_super */
1538
bool f2fs_wait_discard_bios(struct f2fs_sb_info *sbi)
1539 1540
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
C
Chao Yu 已提交
1541
	struct discard_policy dpolicy;
1542
	bool dropped;
1543

1544 1545
	__init_discard_policy(sbi, &dpolicy, DPOLICY_UMOUNT,
					dcc->discard_granularity);
C
Chao Yu 已提交
1546
	__issue_discard_cmd(sbi, &dpolicy);
1547 1548
	dropped = __drop_discard_cmd(sbi);

1549 1550
	/* just to make sure there is no pending discard commands */
	__wait_all_discard_cmd(sbi, NULL);
1551 1552

	f2fs_bug_on(sbi, atomic_read(&dcc->discard_cmd_cnt));
1553
	return dropped;
1554 1555
}

1556 1557 1558 1559 1560
static int issue_discard_thread(void *data)
{
	struct f2fs_sb_info *sbi = data;
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	wait_queue_head_t *q = &dcc->discard_wait_queue;
C
Chao Yu 已提交
1561
	struct discard_policy dpolicy;
1562 1563
	unsigned int wait_ms = DEF_MIN_DISCARD_ISSUE_TIME;
	int issued;
1564 1565
	unsigned long interval = sbi->interval_time[REQ_TIME] * HZ;
	long delta;
1566

1567
	set_freezable();
1568

1569
	do {
1570
		__init_discard_policy(sbi, &dpolicy, DPOLICY_BG,
C
Chao Yu 已提交
1571 1572
					dcc->discard_granularity);

1573 1574 1575 1576
		wait_event_interruptible_timeout(*q,
				kthread_should_stop() || freezing(current) ||
				dcc->discard_wake,
				msecs_to_jiffies(wait_ms));
S
Sheng Yong 已提交
1577 1578 1579 1580

		if (dcc->discard_wake)
			dcc->discard_wake = 0;

1581 1582
		if (try_to_freeze())
			continue;
1583 1584
		if (f2fs_readonly(sbi->sb))
			continue;
1585 1586
		if (kthread_should_stop())
			return 0;
1587 1588 1589 1590
		if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
			wait_ms = dpolicy.max_interval;
			continue;
		}
1591

1592
		if (sbi->gc_mode == GC_URGENT)
1593
			__init_discard_policy(sbi, &dpolicy, DPOLICY_FORCE, 1);
1594

1595 1596
		sb_start_intwrite(sbi->sb);

C
Chao Yu 已提交
1597
		issued = __issue_discard_cmd(sbi, &dpolicy);
1598
		if (issued > 0) {
C
Chao Yu 已提交
1599 1600
			__wait_all_discard_cmd(sbi, &dpolicy);
			wait_ms = dpolicy.min_interval;
1601
		} else if (issued == -1){
1602 1603 1604 1605 1606
			delta = (sbi->last_time[REQ_TIME] + interval) - jiffies;
			if (delta > 0)
				wait_ms = jiffies_to_msecs(delta);
			else
				wait_ms = dpolicy.mid_interval;
1607
		} else {
C
Chao Yu 已提交
1608
			wait_ms = dpolicy.max_interval;
1609
		}
1610

1611
		sb_end_intwrite(sbi->sb);
1612 1613 1614

	} while (!kthread_should_stop());
	return 0;
1615 1616
}

1617
#ifdef CONFIG_BLK_DEV_ZONED
J
Jaegeuk Kim 已提交
1618 1619
static int __f2fs_issue_discard_zone(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
1620
{
1621
	sector_t sector, nr_sects;
1622
	block_t lblkstart = blkstart;
J
Jaegeuk Kim 已提交
1623 1624 1625 1626 1627 1628
	int devi = 0;

	if (sbi->s_ndevs) {
		devi = f2fs_target_device_index(sbi, blkstart);
		blkstart -= FDEV(devi).start_blk;
	}
1629 1630 1631 1632 1633 1634

	/*
	 * We need to know the type of the zone: for conventional zones,
	 * use regular discard if the drive supports it. For sequential
	 * zones, reset the zone write pointer.
	 */
J
Jaegeuk Kim 已提交
1635
	switch (get_blkz_type(sbi, bdev, blkstart)) {
1636 1637 1638 1639

	case BLK_ZONE_TYPE_CONVENTIONAL:
		if (!blk_queue_discard(bdev_get_queue(bdev)))
			return 0;
1640
		return __queue_discard_cmd(sbi, bdev, lblkstart, blklen);
1641 1642
	case BLK_ZONE_TYPE_SEQWRITE_REQ:
	case BLK_ZONE_TYPE_SEQWRITE_PREF:
1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653
		sector = SECTOR_FROM_BLOCK(blkstart);
		nr_sects = SECTOR_FROM_BLOCK(blklen);

		if (sector & (bdev_zone_sectors(bdev) - 1) ||
				nr_sects != bdev_zone_sectors(bdev)) {
			f2fs_msg(sbi->sb, KERN_INFO,
				"(%d) %s: Unaligned discard attempted (block %x + %x)",
				devi, sbi->s_ndevs ? FDEV(devi).path: "",
				blkstart, blklen);
			return -EIO;
		}
1654
		trace_f2fs_issue_reset_zone(bdev, blkstart);
1655 1656 1657 1658 1659 1660 1661 1662 1663
		return blkdev_reset_zones(bdev, sector,
					  nr_sects, GFP_NOFS);
	default:
		/* Unknown zone type: broken device ? */
		return -EIO;
	}
}
#endif

J
Jaegeuk Kim 已提交
1664 1665 1666 1667
static int __issue_discard_async(struct f2fs_sb_info *sbi,
		struct block_device *bdev, block_t blkstart, block_t blklen)
{
#ifdef CONFIG_BLK_DEV_ZONED
1668
	if (f2fs_sb_has_blkzoned(sbi->sb) &&
J
Jaegeuk Kim 已提交
1669 1670 1671
				bdev_zoned_model(bdev) != BLK_ZONED_NONE)
		return __f2fs_issue_discard_zone(sbi, bdev, blkstart, blklen);
#endif
1672
	return __queue_discard_cmd(sbi, bdev, blkstart, blklen);
J
Jaegeuk Kim 已提交
1673 1674
}

1675
static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
1676 1677
				block_t blkstart, block_t blklen)
{
J
Jaegeuk Kim 已提交
1678 1679
	sector_t start = blkstart, len = 0;
	struct block_device *bdev;
1680 1681 1682
	struct seg_entry *se;
	unsigned int offset;
	block_t i;
J
Jaegeuk Kim 已提交
1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701
	int err = 0;

	bdev = f2fs_target_device(sbi, blkstart, NULL);

	for (i = blkstart; i < blkstart + blklen; i++, len++) {
		if (i != start) {
			struct block_device *bdev2 =
				f2fs_target_device(sbi, i, NULL);

			if (bdev2 != bdev) {
				err = __issue_discard_async(sbi, bdev,
						start, len);
				if (err)
					return err;
				bdev = bdev2;
				start = i;
				len = 0;
			}
		}
1702 1703 1704 1705 1706 1707 1708

		se = get_seg_entry(sbi, GET_SEGNO(sbi, i));
		offset = GET_BLKOFF_FROM_SEG0(sbi, i);

		if (!f2fs_test_and_set_bit(offset, se->discard_map))
			sbi->discard_blks--;
	}
1709

J
Jaegeuk Kim 已提交
1710 1711 1712
	if (len)
		err = __issue_discard_async(sbi, bdev, start, len);
	return err;
1713 1714
}

1715 1716
static bool add_discard_addrs(struct f2fs_sb_info *sbi, struct cp_control *cpc,
							bool check_only)
1717
{
1718 1719
	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
	int max_blocks = sbi->blocks_per_seg;
1720
	struct seg_entry *se = get_seg_entry(sbi, cpc->trim_start);
1721 1722
	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
1723
	unsigned long *discard_map = (unsigned long *)se->discard_map;
J
Jaegeuk Kim 已提交
1724
	unsigned long *dmap = SIT_I(sbi)->tmp_map;
1725
	unsigned int start = 0, end = -1;
1726
	bool force = (cpc->reason & CP_DISCARD);
C
Chao Yu 已提交
1727
	struct discard_entry *de = NULL;
1728
	struct list_head *head = &SM_I(sbi)->dcc_info->entry_list;
1729 1730
	int i;

1731
	if (se->valid_blocks == max_blocks || !f2fs_hw_support_discard(sbi))
1732
		return false;
1733

1734
	if (!force) {
1735
		if (!f2fs_realtime_discard_enable(sbi) || !se->valid_blocks ||
1736 1737
			SM_I(sbi)->dcc_info->nr_discards >=
				SM_I(sbi)->dcc_info->max_discards)
1738
			return false;
1739 1740
	}

1741 1742
	/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
	for (i = 0; i < entries; i++)
1743
		dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
1744
				(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
1745

1746 1747
	while (force || SM_I(sbi)->dcc_info->nr_discards <=
				SM_I(sbi)->dcc_info->max_discards) {
1748 1749 1750 1751 1752
		start = __find_rev_next_bit(dmap, max_blocks, end + 1);
		if (start >= max_blocks)
			break;

		end = __find_rev_next_zero_bit(dmap, max_blocks, start + 1);
1753 1754 1755 1756
		if (force && start && end != max_blocks
					&& (end - start) < cpc->trim_minlen)
			continue;

1757 1758 1759
		if (check_only)
			return true;

C
Chao Yu 已提交
1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770
		if (!de) {
			de = f2fs_kmem_cache_alloc(discard_entry_slab,
								GFP_F2FS_ZERO);
			de->start_blkaddr = START_BLOCK(sbi, cpc->trim_start);
			list_add_tail(&de->list, head);
		}

		for (i = start; i < end; i++)
			__set_bit_le(i, (void *)de->discard_map);

		SM_I(sbi)->dcc_info->nr_discards += end - start;
1771
	}
1772
	return false;
1773 1774
}

1775 1776 1777 1778 1779 1780
static void release_discard_addr(struct discard_entry *entry)
{
	list_del(&entry->list);
	kmem_cache_free(discard_entry_slab, entry);
}

C
Chao Yu 已提交
1781
void f2fs_release_discard_addrs(struct f2fs_sb_info *sbi)
1782
{
1783
	struct list_head *head = &(SM_I(sbi)->dcc_info->entry_list);
1784 1785 1786
	struct discard_entry *entry, *this;

	/* drop caches */
1787 1788
	list_for_each_entry_safe(entry, this, head, list)
		release_discard_addr(entry);
1789 1790
}

J
Jaegeuk Kim 已提交
1791
/*
C
Chao Yu 已提交
1792
 * Should call f2fs_clear_prefree_segments after checkpoint is done.
J
Jaegeuk Kim 已提交
1793 1794 1795 1796
 */
static void set_prefree_as_free_segments(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1797
	unsigned int segno;
J
Jaegeuk Kim 已提交
1798 1799

	mutex_lock(&dirty_i->seglist_lock);
1800
	for_each_set_bit(segno, dirty_i->dirty_segmap[PRE], MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
1801 1802 1803 1804
		__set_test_and_free(sbi, segno);
	mutex_unlock(&dirty_i->seglist_lock);
}

C
Chao Yu 已提交
1805 1806
void f2fs_clear_prefree_segments(struct f2fs_sb_info *sbi,
						struct cp_control *cpc)
J
Jaegeuk Kim 已提交
1807
{
1808 1809
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct list_head *head = &dcc->entry_list;
1810
	struct discard_entry *entry, *this;
J
Jaegeuk Kim 已提交
1811
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
1812 1813
	unsigned long *prefree_map = dirty_i->dirty_segmap[PRE];
	unsigned int start = 0, end = -1;
1814
	unsigned int secno, start_segno;
1815
	bool force = (cpc->reason & CP_DISCARD);
1816
	bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
J
Jaegeuk Kim 已提交
1817 1818

	mutex_lock(&dirty_i->seglist_lock);
1819

J
Jaegeuk Kim 已提交
1820
	while (1) {
1821
		int i;
1822 1823 1824

		if (need_align && end != -1)
			end--;
1825 1826
		start = find_next_bit(prefree_map, MAIN_SEGS(sbi), end + 1);
		if (start >= MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
1827
			break;
1828 1829
		end = find_next_zero_bit(prefree_map, MAIN_SEGS(sbi),
								start + 1);
1830

1831 1832 1833 1834
		if (need_align) {
			start = rounddown(start, sbi->segs_per_sec);
			end = roundup(end, sbi->segs_per_sec);
		}
1835

1836 1837 1838 1839
		for (i = start; i < end; i++) {
			if (test_and_clear_bit(i, prefree_map))
				dirty_i->nr_dirty[PRE]--;
		}
1840

1841
		if (!f2fs_realtime_discard_enable(sbi))
1842
			continue;
J
Jaegeuk Kim 已提交
1843

1844 1845 1846 1847
		if (force && start >= cpc->trim_start &&
					(end - 1) <= cpc->trim_end)
				continue;

1848 1849
		if (!test_opt(sbi, LFS) || sbi->segs_per_sec == 1) {
			f2fs_issue_discard(sbi, START_BLOCK(sbi, start),
1850
				(end - start) << sbi->log_blocks_per_seg);
1851 1852 1853
			continue;
		}
next:
1854 1855
		secno = GET_SEC_FROM_SEG(sbi, start);
		start_segno = GET_SEG_FROM_SEC(sbi, secno);
1856
		if (!IS_CURSEC(sbi, secno) &&
1857
			!get_valid_blocks(sbi, start, true))
1858 1859 1860 1861 1862 1863
			f2fs_issue_discard(sbi, START_BLOCK(sbi, start_segno),
				sbi->segs_per_sec << sbi->log_blocks_per_seg);

		start = start_segno + sbi->segs_per_sec;
		if (start < end)
			goto next;
1864 1865
		else
			end = start - 1;
J
Jaegeuk Kim 已提交
1866 1867
	}
	mutex_unlock(&dirty_i->seglist_lock);
1868 1869

	/* send small discards */
1870
	list_for_each_entry_safe(entry, this, head, list) {
C
Chao Yu 已提交
1871 1872 1873 1874 1875 1876 1877 1878 1879
		unsigned int cur_pos = 0, next_pos, len, total_len = 0;
		bool is_valid = test_bit_le(0, entry->discard_map);

find_next:
		if (is_valid) {
			next_pos = find_next_zero_bit_le(entry->discard_map,
					sbi->blocks_per_seg, cur_pos);
			len = next_pos - cur_pos;

1880
			if (f2fs_sb_has_blkzoned(sbi->sb) ||
1881
			    (force && len < cpc->trim_minlen))
C
Chao Yu 已提交
1882 1883 1884 1885 1886 1887 1888 1889 1890
				goto skip;

			f2fs_issue_discard(sbi, entry->start_blkaddr + cur_pos,
									len);
			total_len += len;
		} else {
			next_pos = find_next_bit_le(entry->discard_map,
					sbi->blocks_per_seg, cur_pos);
		}
1891
skip:
C
Chao Yu 已提交
1892 1893 1894 1895 1896 1897
		cur_pos = next_pos;
		is_valid = !is_valid;

		if (cur_pos < sbi->blocks_per_seg)
			goto find_next;

1898
		release_discard_addr(entry);
1899
		dcc->nr_discards -= total_len;
1900
	}
C
Chao Yu 已提交
1901

1902
	wake_up_discard_thread(sbi, false);
J
Jaegeuk Kim 已提交
1903 1904
}

1905
static int create_discard_cmd_control(struct f2fs_sb_info *sbi)
1906
{
1907
	dev_t dev = sbi->sb->s_bdev->bd_dev;
1908
	struct discard_cmd_control *dcc;
C
Chao Yu 已提交
1909
	int err = 0, i;
1910 1911 1912 1913 1914 1915

	if (SM_I(sbi)->dcc_info) {
		dcc = SM_I(sbi)->dcc_info;
		goto init_thread;
	}

C
Chao Yu 已提交
1916
	dcc = f2fs_kzalloc(sbi, sizeof(struct discard_cmd_control), GFP_KERNEL);
1917 1918 1919
	if (!dcc)
		return -ENOMEM;

1920
	dcc->discard_granularity = DEFAULT_DISCARD_GRANULARITY;
1921
	INIT_LIST_HEAD(&dcc->entry_list);
C
Chao Yu 已提交
1922
	for (i = 0; i < MAX_PLIST_NUM; i++)
C
Chao Yu 已提交
1923
		INIT_LIST_HEAD(&dcc->pend_list[i]);
1924
	INIT_LIST_HEAD(&dcc->wait_list);
1925
	INIT_LIST_HEAD(&dcc->fstrim_list);
1926
	mutex_init(&dcc->cmd_lock);
C
Chao Yu 已提交
1927 1928
	atomic_set(&dcc->issued_discard, 0);
	atomic_set(&dcc->issing_discard, 0);
C
Chao Yu 已提交
1929
	atomic_set(&dcc->discard_cmd_cnt, 0);
1930
	dcc->nr_discards = 0;
C
Chao Yu 已提交
1931
	dcc->max_discards = MAIN_SEGS(sbi) << sbi->log_blocks_per_seg;
C
Chao Yu 已提交
1932
	dcc->undiscard_blks = 0;
C
Chao Yu 已提交
1933
	dcc->next_pos = 0;
1934
	dcc->root = RB_ROOT;
1935
	dcc->rbtree_check = false;
1936

1937
	init_waitqueue_head(&dcc->discard_wait_queue);
1938 1939
	SM_I(sbi)->dcc_info = dcc;
init_thread:
1940 1941 1942 1943 1944 1945 1946 1947 1948
	dcc->f2fs_issue_discard = kthread_run(issue_discard_thread, sbi,
				"f2fs_discard-%u:%u", MAJOR(dev), MINOR(dev));
	if (IS_ERR(dcc->f2fs_issue_discard)) {
		err = PTR_ERR(dcc->f2fs_issue_discard);
		kfree(dcc);
		SM_I(sbi)->dcc_info = NULL;
		return err;
	}

1949 1950 1951
	return err;
}

1952
static void destroy_discard_cmd_control(struct f2fs_sb_info *sbi)
1953 1954 1955
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;

1956 1957 1958
	if (!dcc)
		return;

C
Chao Yu 已提交
1959
	f2fs_stop_discard_thread(sbi);
1960 1961 1962

	kfree(dcc);
	SM_I(sbi)->dcc_info = NULL;
1963 1964
}

1965
static bool __mark_sit_entry_dirty(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
1966 1967
{
	struct sit_info *sit_i = SIT_I(sbi);
1968 1969

	if (!__test_and_set_bit(segno, sit_i->dirty_sentries_bitmap)) {
J
Jaegeuk Kim 已提交
1970
		sit_i->dirty_sentries++;
1971 1972 1973 1974
		return false;
	}

	return true;
J
Jaegeuk Kim 已提交
1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990
}

static void __set_sit_entry_type(struct f2fs_sb_info *sbi, int type,
					unsigned int segno, int modified)
{
	struct seg_entry *se = get_seg_entry(sbi, segno);
	se->type = type;
	if (modified)
		__mark_sit_entry_dirty(sbi, segno);
}

static void update_sit_entry(struct f2fs_sb_info *sbi, block_t blkaddr, int del)
{
	struct seg_entry *se;
	unsigned int segno, offset;
	long int new_vblocks;
1991 1992 1993 1994
	bool exist;
#ifdef CONFIG_F2FS_CHECK_FS
	bool mir_exist;
#endif
J
Jaegeuk Kim 已提交
1995 1996 1997 1998 1999

	segno = GET_SEGNO(sbi, blkaddr);

	se = get_seg_entry(sbi, segno);
	new_vblocks = se->valid_blocks + del;
J
Jaegeuk Kim 已提交
2000
	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);
J
Jaegeuk Kim 已提交
2001

2002
	f2fs_bug_on(sbi, (new_vblocks >> (sizeof(unsigned short) << 3) ||
J
Jaegeuk Kim 已提交
2003 2004 2005
				(new_vblocks > sbi->blocks_per_seg)));

	se->valid_blocks = new_vblocks;
C
Chao Yu 已提交
2006 2007 2008
	se->mtime = get_mtime(sbi, false);
	if (se->mtime > SIT_I(sbi)->max_mtime)
		SIT_I(sbi)->max_mtime = se->mtime;
J
Jaegeuk Kim 已提交
2009 2010 2011

	/* Update valid block bitmap */
	if (del > 0) {
2012
		exist = f2fs_test_and_set_bit(offset, se->cur_valid_map);
C
Chao Yu 已提交
2013
#ifdef CONFIG_F2FS_CHECK_FS
2014 2015 2016 2017 2018 2019
		mir_exist = f2fs_test_and_set_bit(offset,
						se->cur_valid_map_mir);
		if (unlikely(exist != mir_exist)) {
			f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
				"when setting bitmap, blk:%u, old bit:%d",
				blkaddr, exist);
2020
			f2fs_bug_on(sbi, 1);
2021
		}
C
Chao Yu 已提交
2022
#endif
2023 2024 2025 2026
		if (unlikely(exist)) {
			f2fs_msg(sbi->sb, KERN_ERR,
				"Bitmap was wrongly set, blk:%u", blkaddr);
			f2fs_bug_on(sbi, 1);
2027 2028
			se->valid_blocks--;
			del = 0;
C
Chao Yu 已提交
2029
		}
2030

2031
		if (!f2fs_test_and_set_bit(offset, se->discard_map))
2032
			sbi->discard_blks--;
2033 2034

		/* don't overwrite by SSR to keep node chain */
2035
		if (IS_NODESEG(se->type)) {
2036 2037 2038
			if (!f2fs_test_and_set_bit(offset, se->ckpt_valid_map))
				se->ckpt_valid_blocks++;
		}
J
Jaegeuk Kim 已提交
2039
	} else {
2040
		exist = f2fs_test_and_clear_bit(offset, se->cur_valid_map);
C
Chao Yu 已提交
2041
#ifdef CONFIG_F2FS_CHECK_FS
2042 2043 2044 2045 2046 2047
		mir_exist = f2fs_test_and_clear_bit(offset,
						se->cur_valid_map_mir);
		if (unlikely(exist != mir_exist)) {
			f2fs_msg(sbi->sb, KERN_ERR, "Inconsistent error "
				"when clearing bitmap, blk:%u, old bit:%d",
				blkaddr, exist);
2048
			f2fs_bug_on(sbi, 1);
2049
		}
C
Chao Yu 已提交
2050
#endif
2051 2052 2053 2054
		if (unlikely(!exist)) {
			f2fs_msg(sbi->sb, KERN_ERR,
				"Bitmap was wrongly cleared, blk:%u", blkaddr);
			f2fs_bug_on(sbi, 1);
2055 2056
			se->valid_blocks++;
			del = 0;
C
Chao Yu 已提交
2057
		}
2058

2059
		if (f2fs_test_and_clear_bit(offset, se->discard_map))
2060
			sbi->discard_blks++;
J
Jaegeuk Kim 已提交
2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073
	}
	if (!f2fs_test_bit(offset, se->ckpt_valid_map))
		se->ckpt_valid_blocks += del;

	__mark_sit_entry_dirty(sbi, segno);

	/* update total number of valid blocks to be written in ckpt area */
	SIT_I(sbi)->written_valid_blocks += del;

	if (sbi->segs_per_sec > 1)
		get_sec_entry(sbi, segno)->valid_blocks += del;
}

C
Chao Yu 已提交
2074
void f2fs_invalidate_blocks(struct f2fs_sb_info *sbi, block_t addr)
J
Jaegeuk Kim 已提交
2075 2076 2077 2078
{
	unsigned int segno = GET_SEGNO(sbi, addr);
	struct sit_info *sit_i = SIT_I(sbi);

2079
	f2fs_bug_on(sbi, addr == NULL_ADDR);
J
Jaegeuk Kim 已提交
2080 2081 2082
	if (addr == NEW_ADDR)
		return;

2083 2084
	invalidate_mapping_pages(META_MAPPING(sbi), addr, addr);

J
Jaegeuk Kim 已提交
2085
	/* add it into sit main buffer */
2086
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2087 2088 2089 2090 2091 2092

	update_sit_entry(sbi, addr, -1);

	/* add it into dirty seglist */
	locate_dirty_segment(sbi, segno);

2093
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2094 2095
}

C
Chao Yu 已提交
2096
bool f2fs_is_checkpointed_data(struct f2fs_sb_info *sbi, block_t blkaddr)
2097 2098 2099 2100 2101 2102
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int segno, offset;
	struct seg_entry *se;
	bool is_cp = false;

2103
	if (!is_valid_data_blkaddr(sbi, blkaddr))
2104 2105
		return true;

2106
	down_read(&sit_i->sentry_lock);
2107 2108 2109 2110 2111 2112 2113 2114

	segno = GET_SEGNO(sbi, blkaddr);
	se = get_seg_entry(sbi, segno);
	offset = GET_BLKOFF_FROM_SEG0(sbi, blkaddr);

	if (f2fs_test_bit(offset, se->ckpt_valid_map))
		is_cp = true;

2115
	up_read(&sit_i->sentry_lock);
2116 2117 2118 2119

	return is_cp;
}

J
Jaegeuk Kim 已提交
2120
/*
J
Jaegeuk Kim 已提交
2121 2122 2123
 * This function should be resided under the curseg_mutex lock
 */
static void __add_sum_entry(struct f2fs_sb_info *sbi, int type,
2124
					struct f2fs_summary *sum)
J
Jaegeuk Kim 已提交
2125 2126 2127
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	void *addr = curseg->sum_blk;
2128
	addr += curseg->next_blkoff * sizeof(struct f2fs_summary);
J
Jaegeuk Kim 已提交
2129 2130 2131
	memcpy(addr, sum, sizeof(struct f2fs_summary));
}

J
Jaegeuk Kim 已提交
2132
/*
J
Jaegeuk Kim 已提交
2133 2134
 * Calculate the number of current summary pages for writing
 */
C
Chao Yu 已提交
2135
int f2fs_npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
J
Jaegeuk Kim 已提交
2136 2137
{
	int valid_sum_count = 0;
2138
	int i, sum_in_page;
J
Jaegeuk Kim 已提交
2139 2140 2141 2142

	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		if (sbi->ckpt->alloc_type[i] == SSR)
			valid_sum_count += sbi->blocks_per_seg;
2143 2144 2145 2146 2147 2148 2149
		else {
			if (for_ra)
				valid_sum_count += le16_to_cpu(
					F2FS_CKPT(sbi)->cur_data_blkoff[i]);
			else
				valid_sum_count += curseg_blkoff(sbi, i);
		}
J
Jaegeuk Kim 已提交
2150 2151
	}

2152
	sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
2153 2154
			SUM_FOOTER_SIZE) / SUMMARY_SIZE;
	if (valid_sum_count <= sum_in_page)
J
Jaegeuk Kim 已提交
2155
		return 1;
2156
	else if ((valid_sum_count - sum_in_page) <=
2157
		(PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
J
Jaegeuk Kim 已提交
2158 2159 2160 2161
		return 2;
	return 3;
}

J
Jaegeuk Kim 已提交
2162
/*
J
Jaegeuk Kim 已提交
2163 2164
 * Caller should put this summary page
 */
C
Chao Yu 已提交
2165
struct page *f2fs_get_sum_page(struct f2fs_sb_info *sbi, unsigned int segno)
J
Jaegeuk Kim 已提交
2166
{
2167
	return f2fs_get_meta_page_nofail(sbi, GET_SUM_BLOCK(sbi, segno));
J
Jaegeuk Kim 已提交
2168 2169
}

C
Chao Yu 已提交
2170 2171
void f2fs_update_meta_page(struct f2fs_sb_info *sbi,
					void *src, block_t blk_addr)
J
Jaegeuk Kim 已提交
2172
{
C
Chao Yu 已提交
2173
	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
C
Chao Yu 已提交
2174

2175
	memcpy(page_address(page), src, PAGE_SIZE);
J
Jaegeuk Kim 已提交
2176 2177 2178 2179
	set_page_dirty(page);
	f2fs_put_page(page, 1);
}

C
Chao Yu 已提交
2180 2181 2182
static void write_sum_page(struct f2fs_sb_info *sbi,
			struct f2fs_summary_block *sum_blk, block_t blk_addr)
{
C
Chao Yu 已提交
2183
	f2fs_update_meta_page(sbi, (void *)sum_blk, blk_addr);
C
Chao Yu 已提交
2184 2185
}

2186 2187 2188 2189
static void write_current_sum_page(struct f2fs_sb_info *sbi,
						int type, block_t blk_addr)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
C
Chao Yu 已提交
2190
	struct page *page = f2fs_grab_meta_page(sbi, blk_addr);
2191 2192 2193 2194
	struct f2fs_summary_block *src = curseg->sum_blk;
	struct f2fs_summary_block *dst;

	dst = (struct f2fs_summary_block *)page_address(page);
2195
	memset(dst, 0, PAGE_SIZE);
2196 2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211

	mutex_lock(&curseg->curseg_mutex);

	down_read(&curseg->journal_rwsem);
	memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
	up_read(&curseg->journal_rwsem);

	memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
	memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);

	mutex_unlock(&curseg->curseg_mutex);

	set_page_dirty(page);
	f2fs_put_page(page, 1);
}

J
Jaegeuk Kim 已提交
2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222
static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int segno = curseg->segno + 1;
	struct free_segmap_info *free_i = FREE_I(sbi);

	if (segno < MAIN_SEGS(sbi) && segno % sbi->segs_per_sec)
		return !test_bit(segno, free_i->free_segmap);
	return 0;
}

J
Jaegeuk Kim 已提交
2223
/*
J
Jaegeuk Kim 已提交
2224 2225 2226 2227 2228 2229 2230 2231
 * Find a new segment from the free segments bitmap to right order
 * This function should be returned with success, otherwise BUG
 */
static void get_new_segment(struct f2fs_sb_info *sbi,
			unsigned int *newseg, bool new_sec, int dir)
{
	struct free_segmap_info *free_i = FREE_I(sbi);
	unsigned int segno, secno, zoneno;
2232
	unsigned int total_zones = MAIN_SECS(sbi) / sbi->secs_per_zone;
2233 2234
	unsigned int hint = GET_SEC_FROM_SEG(sbi, *newseg);
	unsigned int old_zoneno = GET_ZONE_FROM_SEG(sbi, *newseg);
J
Jaegeuk Kim 已提交
2235 2236 2237 2238 2239
	unsigned int left_start = hint;
	bool init = true;
	int go_left = 0;
	int i;

2240
	spin_lock(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
2241 2242 2243

	if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
		segno = find_next_zero_bit(free_i->free_segmap,
2244 2245
			GET_SEG_FROM_SEC(sbi, hint + 1), *newseg + 1);
		if (segno < GET_SEG_FROM_SEC(sbi, hint + 1))
J
Jaegeuk Kim 已提交
2246 2247 2248
			goto got_it;
	}
find_other_zone:
2249 2250
	secno = find_next_zero_bit(free_i->free_secmap, MAIN_SECS(sbi), hint);
	if (secno >= MAIN_SECS(sbi)) {
J
Jaegeuk Kim 已提交
2251 2252
		if (dir == ALLOC_RIGHT) {
			secno = find_next_zero_bit(free_i->free_secmap,
2253 2254
							MAIN_SECS(sbi), 0);
			f2fs_bug_on(sbi, secno >= MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
2255 2256 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268
		} else {
			go_left = 1;
			left_start = hint - 1;
		}
	}
	if (go_left == 0)
		goto skip_left;

	while (test_bit(left_start, free_i->free_secmap)) {
		if (left_start > 0) {
			left_start--;
			continue;
		}
		left_start = find_next_zero_bit(free_i->free_secmap,
2269 2270
							MAIN_SECS(sbi), 0);
		f2fs_bug_on(sbi, left_start >= MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
2271 2272 2273 2274
		break;
	}
	secno = left_start;
skip_left:
2275 2276
	segno = GET_SEG_FROM_SEC(sbi, secno);
	zoneno = GET_ZONE_FROM_SEC(sbi, secno);
J
Jaegeuk Kim 已提交
2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307

	/* give up on finding another zone */
	if (!init)
		goto got_it;
	if (sbi->secs_per_zone == 1)
		goto got_it;
	if (zoneno == old_zoneno)
		goto got_it;
	if (dir == ALLOC_LEFT) {
		if (!go_left && zoneno + 1 >= total_zones)
			goto got_it;
		if (go_left && zoneno == 0)
			goto got_it;
	}
	for (i = 0; i < NR_CURSEG_TYPE; i++)
		if (CURSEG_I(sbi, i)->zone == zoneno)
			break;

	if (i < NR_CURSEG_TYPE) {
		/* zone is in user, try another */
		if (go_left)
			hint = zoneno * sbi->secs_per_zone - 1;
		else if (zoneno + 1 >= total_zones)
			hint = 0;
		else
			hint = (zoneno + 1) * sbi->secs_per_zone;
		init = false;
		goto find_other_zone;
	}
got_it:
	/* set it as dirty segment in free segmap */
2308
	f2fs_bug_on(sbi, test_bit(segno, free_i->free_segmap));
J
Jaegeuk Kim 已提交
2309 2310
	__set_inuse(sbi, segno);
	*newseg = segno;
2311
	spin_unlock(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
2312 2313 2314 2315 2316 2317 2318 2319
}

static void reset_curseg(struct f2fs_sb_info *sbi, int type, int modified)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	struct summary_footer *sum_footer;

	curseg->segno = curseg->next_segno;
2320
	curseg->zone = GET_ZONE_FROM_SEG(sbi, curseg->segno);
J
Jaegeuk Kim 已提交
2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332
	curseg->next_blkoff = 0;
	curseg->next_segno = NULL_SEGNO;

	sum_footer = &(curseg->sum_blk->footer);
	memset(sum_footer, 0, sizeof(struct summary_footer));
	if (IS_DATASEG(type))
		SET_SUM_TYPE(sum_footer, SUM_TYPE_DATA);
	if (IS_NODESEG(type))
		SET_SUM_TYPE(sum_footer, SUM_TYPE_NODE);
	__set_sit_entry_type(sbi, type, curseg->segno, modified);
}

2333 2334
static unsigned int __get_next_segno(struct f2fs_sb_info *sbi, int type)
{
J
Jaegeuk Kim 已提交
2335 2336 2337 2338
	/* if segs_per_sec is large than 1, we need to keep original policy. */
	if (sbi->segs_per_sec != 1)
		return CURSEG_I(sbi, type)->segno;

2339 2340
	if (test_opt(sbi, NOHEAP) &&
		(type == CURSEG_HOT_DATA || IS_NODESEG(type)))
2341 2342
		return 0;

2343 2344
	if (SIT_I(sbi)->last_victim[ALLOC_NEXT])
		return SIT_I(sbi)->last_victim[ALLOC_NEXT];
2345 2346

	/* find segments from 0 to reuse freed segments */
2347
	if (F2FS_OPTION(sbi).alloc_mode == ALLOC_MODE_REUSE)
2348 2349
		return 0;

2350 2351 2352
	return CURSEG_I(sbi, type)->segno;
}

J
Jaegeuk Kim 已提交
2353
/*
J
Jaegeuk Kim 已提交
2354 2355 2356 2357 2358 2359 2360 2361 2362 2363
 * Allocate a current working segment.
 * This function always allocates a free segment in LFS manner.
 */
static void new_curseg(struct f2fs_sb_info *sbi, int type, bool new_sec)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int segno = curseg->segno;
	int dir = ALLOC_LEFT;

	write_sum_page(sbi, curseg->sum_blk,
2364
				GET_SUM_BLOCK(sbi, segno));
J
Jaegeuk Kim 已提交
2365 2366 2367 2368 2369 2370
	if (type == CURSEG_WARM_DATA || type == CURSEG_COLD_DATA)
		dir = ALLOC_RIGHT;

	if (test_opt(sbi, NOHEAP))
		dir = ALLOC_RIGHT;

2371
	segno = __get_next_segno(sbi, type);
J
Jaegeuk Kim 已提交
2372 2373 2374 2375 2376 2377 2378 2379 2380 2381
	get_new_segment(sbi, &segno, new_sec, dir);
	curseg->next_segno = segno;
	reset_curseg(sbi, type, 1);
	curseg->alloc_type = LFS;
}

static void __next_free_blkoff(struct f2fs_sb_info *sbi,
			struct curseg_info *seg, block_t start)
{
	struct seg_entry *se = get_seg_entry(sbi, seg->segno);
2382
	int entries = SIT_VBLOCK_MAP_SIZE / sizeof(unsigned long);
J
Jaegeuk Kim 已提交
2383
	unsigned long *target_map = SIT_I(sbi)->tmp_map;
2384 2385 2386 2387 2388 2389 2390 2391 2392 2393
	unsigned long *ckpt_map = (unsigned long *)se->ckpt_valid_map;
	unsigned long *cur_map = (unsigned long *)se->cur_valid_map;
	int i, pos;

	for (i = 0; i < entries; i++)
		target_map[i] = ckpt_map[i] | cur_map[i];

	pos = __find_rev_next_zero_bit(target_map, sbi->blocks_per_seg, start);

	seg->next_blkoff = pos;
J
Jaegeuk Kim 已提交
2394 2395
}

J
Jaegeuk Kim 已提交
2396
/*
J
Jaegeuk Kim 已提交
2397 2398 2399 2400 2401 2402 2403 2404 2405 2406 2407 2408 2409
 * If a segment is written by LFS manner, next block offset is just obtained
 * by increasing the current block offset. However, if a segment is written by
 * SSR manner, next block offset obtained by calling __next_free_blkoff
 */
static void __refresh_next_blkoff(struct f2fs_sb_info *sbi,
				struct curseg_info *seg)
{
	if (seg->alloc_type == SSR)
		__next_free_blkoff(sbi, seg, seg->next_blkoff + 1);
	else
		seg->next_blkoff++;
}

J
Jaegeuk Kim 已提交
2410
/*
A
arter97 已提交
2411
 * This function always allocates a used segment(from dirty seglist) by SSR
J
Jaegeuk Kim 已提交
2412 2413
 * manner, so it should recover the existing segment information of valid blocks
 */
2414
static void change_curseg(struct f2fs_sb_info *sbi, int type)
J
Jaegeuk Kim 已提交
2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	unsigned int new_segno = curseg->next_segno;
	struct f2fs_summary_block *sum_node;
	struct page *sum_page;

	write_sum_page(sbi, curseg->sum_blk,
				GET_SUM_BLOCK(sbi, curseg->segno));
	__set_test_and_inuse(sbi, new_segno);

	mutex_lock(&dirty_i->seglist_lock);
	__remove_dirty_segment(sbi, new_segno, PRE);
	__remove_dirty_segment(sbi, new_segno, DIRTY);
	mutex_unlock(&dirty_i->seglist_lock);

	reset_curseg(sbi, type, 1);
	curseg->alloc_type = SSR;
	__next_free_blkoff(sbi, curseg, 0);

C
Chao Yu 已提交
2435
	sum_page = f2fs_get_sum_page(sbi, new_segno);
2436 2437 2438
	sum_node = (struct f2fs_summary_block *)page_address(sum_page);
	memcpy(curseg->sum_blk, sum_node, SUM_ENTRY_SIZE);
	f2fs_put_page(sum_page, 1);
J
Jaegeuk Kim 已提交
2439 2440
}

2441 2442 2443 2444
static int get_ssr_segment(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	const struct victim_selection *v_ops = DIRTY_I(sbi)->v_ops;
2445
	unsigned segno = NULL_SEGNO;
2446 2447
	int i, cnt;
	bool reversed = false;
2448

C
Chao Yu 已提交
2449
	/* f2fs_need_SSR() already forces to do this */
2450 2451
	if (v_ops->get_victim(sbi, &segno, BG_GC, type, SSR)) {
		curseg->next_segno = segno;
2452
		return 1;
2453
	}
2454

2455 2456
	/* For node segments, let's do SSR more intensively */
	if (IS_NODESEG(type)) {
2457 2458 2459 2460 2461 2462 2463
		if (type >= CURSEG_WARM_NODE) {
			reversed = true;
			i = CURSEG_COLD_NODE;
		} else {
			i = CURSEG_HOT_NODE;
		}
		cnt = NR_CURSEG_NODE_TYPE;
2464
	} else {
2465 2466 2467 2468 2469 2470 2471
		if (type >= CURSEG_WARM_DATA) {
			reversed = true;
			i = CURSEG_COLD_DATA;
		} else {
			i = CURSEG_HOT_DATA;
		}
		cnt = NR_CURSEG_DATA_TYPE;
2472
	}
2473

2474
	for (; cnt-- > 0; reversed ? i-- : i++) {
2475 2476
		if (i == type)
			continue;
2477 2478
		if (v_ops->get_victim(sbi, &segno, BG_GC, i, SSR)) {
			curseg->next_segno = segno;
2479
			return 1;
2480
		}
2481
	}
2482 2483 2484
	return 0;
}

J
Jaegeuk Kim 已提交
2485 2486 2487 2488 2489 2490 2491
/*
 * flush out current segment and replace it with new segment
 * This function should be returned with success, otherwise BUG
 */
static void allocate_segment_by_default(struct f2fs_sb_info *sbi,
						int type, bool force)
{
J
Jaegeuk Kim 已提交
2492 2493
	struct curseg_info *curseg = CURSEG_I(sbi, type);

2494
	if (force)
J
Jaegeuk Kim 已提交
2495
		new_curseg(sbi, type, true);
2496 2497
	else if (!is_set_ckpt_flags(sbi, CP_CRC_RECOVERY_FLAG) &&
					type == CURSEG_WARM_NODE)
J
Jaegeuk Kim 已提交
2498
		new_curseg(sbi, type, false);
J
Jaegeuk Kim 已提交
2499 2500
	else if (curseg->alloc_type == LFS && is_next_segment_free(sbi, type))
		new_curseg(sbi, type, false);
C
Chao Yu 已提交
2501
	else if (f2fs_need_SSR(sbi) && get_ssr_segment(sbi, type))
2502
		change_curseg(sbi, type);
J
Jaegeuk Kim 已提交
2503 2504
	else
		new_curseg(sbi, type, false);
2505

J
Jaegeuk Kim 已提交
2506
	stat_inc_seg_type(sbi, curseg);
J
Jaegeuk Kim 已提交
2507 2508
}

C
Chao Yu 已提交
2509
void f2fs_allocate_new_segments(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
2510
{
2511 2512
	struct curseg_info *curseg;
	unsigned int old_segno;
J
Jaegeuk Kim 已提交
2513 2514
	int i;

2515 2516
	down_write(&SIT_I(sbi)->sentry_lock);

2517 2518 2519 2520 2521 2522
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		curseg = CURSEG_I(sbi, i);
		old_segno = curseg->segno;
		SIT_I(sbi)->s_ops->allocate_segment(sbi, i, true);
		locate_dirty_segment(sbi, old_segno);
	}
2523 2524

	up_write(&SIT_I(sbi)->sentry_lock);
J
Jaegeuk Kim 已提交
2525 2526 2527 2528 2529 2530
}

static const struct segment_allocation default_salloc_ops = {
	.allocate_segment = allocate_segment_by_default,
};

C
Chao Yu 已提交
2531 2532
bool f2fs_exist_trim_candidates(struct f2fs_sb_info *sbi,
						struct cp_control *cpc)
2533 2534 2535 2536
{
	__u64 trim_start = cpc->trim_start;
	bool has_candidate = false;

2537
	down_write(&SIT_I(sbi)->sentry_lock);
2538 2539 2540 2541 2542 2543
	for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++) {
		if (add_discard_addrs(sbi, cpc, true)) {
			has_candidate = true;
			break;
		}
	}
2544
	up_write(&SIT_I(sbi)->sentry_lock);
2545 2546 2547 2548 2549

	cpc->trim_start = trim_start;
	return has_candidate;
}

2550
static unsigned int __issue_discard_cmd_range(struct f2fs_sb_info *sbi,
2551 2552 2553 2554 2555 2556 2557 2558 2559
					struct discard_policy *dpolicy,
					unsigned int start, unsigned int end)
{
	struct discard_cmd_control *dcc = SM_I(sbi)->dcc_info;
	struct discard_cmd *prev_dc = NULL, *next_dc = NULL;
	struct rb_node **insert_p = NULL, *insert_parent = NULL;
	struct discard_cmd *dc;
	struct blk_plug plug;
	int issued;
2560
	unsigned int trimmed = 0;
2561 2562 2563 2564 2565

next:
	issued = 0;

	mutex_lock(&dcc->cmd_lock);
2566 2567 2568
	if (unlikely(dcc->rbtree_check))
		f2fs_bug_on(sbi, !f2fs_check_rb_tree_consistence(sbi,
								&dcc->root));
2569

C
Chao Yu 已提交
2570
	dc = (struct discard_cmd *)f2fs_lookup_rb_tree_ret(&dcc->root,
2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581
					NULL, start,
					(struct rb_entry **)&prev_dc,
					(struct rb_entry **)&next_dc,
					&insert_p, &insert_parent, true);
	if (!dc)
		dc = next_dc;

	blk_start_plug(&plug);

	while (dc && dc->lstart <= end) {
		struct rb_node *node;
2582
		int err = 0;
2583 2584 2585 2586 2587 2588 2589 2590 2591

		if (dc->len < dpolicy->granularity)
			goto skip;

		if (dc->state != D_PREP) {
			list_move_tail(&dc->list, &dcc->fstrim_list);
			goto skip;
		}

2592
		err = __submit_discard_cmd(sbi, dpolicy, dc, &issued);
2593

2594
		if (issued >= dpolicy->max_requests) {
2595 2596
			start = dc->lstart + dc->len;

2597 2598 2599
			if (err)
				__remove_discard_cmd(sbi, dc);

2600 2601
			blk_finish_plug(&plug);
			mutex_unlock(&dcc->cmd_lock);
2602
			trimmed += __wait_all_discard_cmd(sbi, NULL);
2603 2604 2605 2606 2607
			congestion_wait(BLK_RW_ASYNC, HZ/50);
			goto next;
		}
skip:
		node = rb_next(&dc->rb_node);
2608 2609
		if (err)
			__remove_discard_cmd(sbi, dc);
2610 2611 2612 2613 2614 2615 2616 2617
		dc = rb_entry_safe(node, struct discard_cmd, rb_node);

		if (fatal_signal_pending(current))
			break;
	}

	blk_finish_plug(&plug);
	mutex_unlock(&dcc->cmd_lock);
2618 2619

	return trimmed;
2620 2621
}

2622 2623
int f2fs_trim_fs(struct f2fs_sb_info *sbi, struct fstrim_range *range)
{
2624 2625
	__u64 start = F2FS_BYTES_TO_BLK(range->start);
	__u64 end = start + F2FS_BYTES_TO_BLK(range->len) - 1;
C
Chao Yu 已提交
2626
	unsigned int start_segno, end_segno;
2627
	block_t start_block, end_block;
2628
	struct cp_control cpc;
C
Chao Yu 已提交
2629
	struct discard_policy dpolicy;
2630
	unsigned long long trimmed = 0;
C
Chao Yu 已提交
2631
	int err = 0;
2632
	bool need_align = test_opt(sbi, LFS) && sbi->segs_per_sec > 1;
2633

2634
	if (start >= MAX_BLKADDR(sbi) || range->len < sbi->blocksize)
2635 2636
		return -EINVAL;

2637 2638
	if (end < MAIN_BLKADDR(sbi))
		goto out;
2639

2640 2641 2642
	if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
		f2fs_msg(sbi->sb, KERN_WARNING,
			"Found FS corruption, run fsck to fix.");
2643
		return -EIO;
2644 2645
	}

2646
	/* start/end segment number in main_area */
2647 2648 2649
	start_segno = (start <= MAIN_BLKADDR(sbi)) ? 0 : GET_SEGNO(sbi, start);
	end_segno = (end >= MAX_BLKADDR(sbi)) ? MAIN_SEGS(sbi) - 1 :
						GET_SEGNO(sbi, end);
2650 2651 2652 2653
	if (need_align) {
		start_segno = rounddown(start_segno, sbi->segs_per_sec);
		end_segno = roundup(end_segno + 1, sbi->segs_per_sec) - 1;
	}
2654

2655
	cpc.reason = CP_DISCARD;
2656
	cpc.trim_minlen = max_t(__u64, 1, F2FS_BYTES_TO_BLK(range->minlen));
C
Chao Yu 已提交
2657 2658
	cpc.trim_start = start_segno;
	cpc.trim_end = end_segno;
2659

C
Chao Yu 已提交
2660 2661
	if (sbi->discard_blks == 0)
		goto out;
2662

C
Chao Yu 已提交
2663
	mutex_lock(&sbi->gc_mutex);
C
Chao Yu 已提交
2664
	err = f2fs_write_checkpoint(sbi, &cpc);
C
Chao Yu 已提交
2665 2666 2667
	mutex_unlock(&sbi->gc_mutex);
	if (err)
		goto out;
2668

2669 2670 2671 2672 2673 2674
	/*
	 * We filed discard candidates, but actually we don't need to wait for
	 * all of them, since they'll be issued in idle time along with runtime
	 * discard option. User configuration looks like using runtime discard
	 * or periodic fstrim instead of it.
	 */
2675
	if (f2fs_realtime_discard_enable(sbi))
2676 2677 2678 2679 2680 2681
		goto out;

	start_block = START_BLOCK(sbi, start_segno);
	end_block = START_BLOCK(sbi, end_segno + 1);

	__init_discard_policy(sbi, &dpolicy, DPOLICY_FSTRIM, cpc.trim_minlen);
2682 2683
	trimmed = __issue_discard_cmd_range(sbi, &dpolicy,
					start_block, end_block);
2684

2685
	trimmed += __wait_discard_cmd_range(sbi, &dpolicy,
2686
					start_block, end_block);
C
Chao Yu 已提交
2687
out:
2688 2689
	if (!err)
		range->len = F2FS_BLK_TO_BYTES(trimmed);
C
Chao Yu 已提交
2690
	return err;
2691 2692
}

J
Jaegeuk Kim 已提交
2693 2694 2695 2696 2697 2698 2699 2700
static bool __has_curseg_space(struct f2fs_sb_info *sbi, int type)
{
	struct curseg_info *curseg = CURSEG_I(sbi, type);
	if (curseg->next_blkoff < sbi->blocks_per_seg)
		return true;
	return false;
}

C
Chao Yu 已提交
2701
int f2fs_rw_hint_to_seg_type(enum rw_hint hint)
2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712
{
	switch (hint) {
	case WRITE_LIFE_SHORT:
		return CURSEG_HOT_DATA;
	case WRITE_LIFE_EXTREME:
		return CURSEG_COLD_DATA;
	default:
		return CURSEG_WARM_DATA;
	}
}

2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737 2738 2739 2740 2741 2742 2743 2744 2745
/* This returns write hints for each segment type. This hints will be
 * passed down to block layer. There are mapping tables which depend on
 * the mount option 'whint_mode'.
 *
 * 1) whint_mode=off. F2FS only passes down WRITE_LIFE_NOT_SET.
 *
 * 2) whint_mode=user-based. F2FS tries to pass down hints given by users.
 *
 * User                  F2FS                     Block
 * ----                  ----                     -----
 *                       META                     WRITE_LIFE_NOT_SET
 *                       HOT_NODE                 "
 *                       WARM_NODE                "
 *                       COLD_NODE                "
 * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
 * extension list        "                        "
 *
 * -- buffered io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        "
 * WRITE_LIFE_MEDIUM     "                        "
 * WRITE_LIFE_LONG       "                        "
 *
 * -- direct io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
 * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
 * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
 *
2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771
 * 3) whint_mode=fs-based. F2FS passes down hints with its policy.
 *
 * User                  F2FS                     Block
 * ----                  ----                     -----
 *                       META                     WRITE_LIFE_MEDIUM;
 *                       HOT_NODE                 WRITE_LIFE_NOT_SET
 *                       WARM_NODE                "
 *                       COLD_NODE                WRITE_LIFE_NONE
 * ioctl(COLD)           COLD_DATA                WRITE_LIFE_EXTREME
 * extension list        "                        "
 *
 * -- buffered io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_LONG
 * WRITE_LIFE_NONE       "                        "
 * WRITE_LIFE_MEDIUM     "                        "
 * WRITE_LIFE_LONG       "                        "
 *
 * -- direct io
 * WRITE_LIFE_EXTREME    COLD_DATA                WRITE_LIFE_EXTREME
 * WRITE_LIFE_SHORT      HOT_DATA                 WRITE_LIFE_SHORT
 * WRITE_LIFE_NOT_SET    WARM_DATA                WRITE_LIFE_NOT_SET
 * WRITE_LIFE_NONE       "                        WRITE_LIFE_NONE
 * WRITE_LIFE_MEDIUM     "                        WRITE_LIFE_MEDIUM
 * WRITE_LIFE_LONG       "                        WRITE_LIFE_LONG
2772 2773
 */

C
Chao Yu 已提交
2774
enum rw_hint f2fs_io_type_to_rw_hint(struct f2fs_sb_info *sbi,
2775 2776
				enum page_type type, enum temp_type temp)
{
2777
	if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_USER) {
2778
		if (type == DATA) {
2779
			if (temp == WARM)
2780
				return WRITE_LIFE_NOT_SET;
2781 2782 2783 2784
			else if (temp == HOT)
				return WRITE_LIFE_SHORT;
			else if (temp == COLD)
				return WRITE_LIFE_EXTREME;
2785 2786 2787
		} else {
			return WRITE_LIFE_NOT_SET;
		}
2788
	} else if (F2FS_OPTION(sbi).whint_mode == WHINT_MODE_FS) {
2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799 2800 2801 2802 2803
		if (type == DATA) {
			if (temp == WARM)
				return WRITE_LIFE_LONG;
			else if (temp == HOT)
				return WRITE_LIFE_SHORT;
			else if (temp == COLD)
				return WRITE_LIFE_EXTREME;
		} else if (type == NODE) {
			if (temp == WARM || temp == HOT)
				return WRITE_LIFE_NOT_SET;
			else if (temp == COLD)
				return WRITE_LIFE_NONE;
		} else if (type == META) {
			return WRITE_LIFE_MEDIUM;
		}
2804
	}
2805
	return WRITE_LIFE_NOT_SET;
2806 2807
}

2808
static int __get_segment_type_2(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2809
{
2810
	if (fio->type == DATA)
J
Jaegeuk Kim 已提交
2811 2812 2813 2814 2815
		return CURSEG_HOT_DATA;
	else
		return CURSEG_HOT_NODE;
}

2816
static int __get_segment_type_4(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2817
{
2818 2819
	if (fio->type == DATA) {
		struct inode *inode = fio->page->mapping->host;
J
Jaegeuk Kim 已提交
2820 2821 2822 2823 2824 2825

		if (S_ISDIR(inode->i_mode))
			return CURSEG_HOT_DATA;
		else
			return CURSEG_COLD_DATA;
	} else {
2826
		if (IS_DNODE(fio->page) && is_cold_node(fio->page))
2827
			return CURSEG_WARM_NODE;
J
Jaegeuk Kim 已提交
2828 2829 2830 2831 2832
		else
			return CURSEG_COLD_NODE;
	}
}

2833
static int __get_segment_type_6(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2834
{
2835 2836
	if (fio->type == DATA) {
		struct inode *inode = fio->page->mapping->host;
J
Jaegeuk Kim 已提交
2837

2838
		if (is_cold_data(fio->page) || file_is_cold(inode))
J
Jaegeuk Kim 已提交
2839
			return CURSEG_COLD_DATA;
C
Chao Yu 已提交
2840
		if (file_is_hot(inode) ||
2841
				is_inode_flag_set(inode, FI_HOT_DATA) ||
2842 2843
				f2fs_is_atomic_file(inode) ||
				f2fs_is_volatile_file(inode))
2844
			return CURSEG_HOT_DATA;
C
Chao Yu 已提交
2845
		return f2fs_rw_hint_to_seg_type(inode->i_write_hint);
J
Jaegeuk Kim 已提交
2846
	} else {
2847 2848
		if (IS_DNODE(fio->page))
			return is_cold_node(fio->page) ? CURSEG_WARM_NODE :
J
Jaegeuk Kim 已提交
2849
						CURSEG_HOT_NODE;
2850
		return CURSEG_COLD_NODE;
J
Jaegeuk Kim 已提交
2851 2852 2853
	}
}

2854
static int __get_segment_type(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
2855
{
J
Jaegeuk Kim 已提交
2856 2857
	int type = 0;

2858
	switch (F2FS_OPTION(fio->sbi).active_logs) {
J
Jaegeuk Kim 已提交
2859
	case 2:
J
Jaegeuk Kim 已提交
2860 2861
		type = __get_segment_type_2(fio);
		break;
J
Jaegeuk Kim 已提交
2862
	case 4:
J
Jaegeuk Kim 已提交
2863 2864 2865 2866 2867 2868 2869
		type = __get_segment_type_4(fio);
		break;
	case 6:
		type = __get_segment_type_6(fio);
		break;
	default:
		f2fs_bug_on(fio->sbi, true);
J
Jaegeuk Kim 已提交
2870
	}
2871

J
Jaegeuk Kim 已提交
2872 2873 2874 2875 2876 2877 2878
	if (IS_HOT(type))
		fio->temp = HOT;
	else if (IS_WARM(type))
		fio->temp = WARM;
	else
		fio->temp = COLD;
	return type;
J
Jaegeuk Kim 已提交
2879 2880
}

C
Chao Yu 已提交
2881
void f2fs_allocate_data_block(struct f2fs_sb_info *sbi, struct page *page,
2882
		block_t old_blkaddr, block_t *new_blkaddr,
2883 2884
		struct f2fs_summary *sum, int type,
		struct f2fs_io_info *fio, bool add_list)
J
Jaegeuk Kim 已提交
2885 2886
{
	struct sit_info *sit_i = SIT_I(sbi);
2887
	struct curseg_info *curseg = CURSEG_I(sbi, type);
J
Jaegeuk Kim 已提交
2888

C
Chao Yu 已提交
2889 2890
	down_read(&SM_I(sbi)->curseg_lock);

J
Jaegeuk Kim 已提交
2891
	mutex_lock(&curseg->curseg_mutex);
2892
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2893 2894 2895

	*new_blkaddr = NEXT_FREE_BLKADDR(sbi, curseg);

2896 2897
	f2fs_wait_discard_bio(sbi, *new_blkaddr);

J
Jaegeuk Kim 已提交
2898 2899 2900 2901 2902
	/*
	 * __add_sum_entry should be resided under the curseg_mutex
	 * because, this function updates a summary entry in the
	 * current summary block.
	 */
2903
	__add_sum_entry(sbi, type, sum);
J
Jaegeuk Kim 已提交
2904 2905

	__refresh_next_blkoff(sbi, curseg);
2906 2907

	stat_inc_block_count(sbi, curseg);
J
Jaegeuk Kim 已提交
2908

2909 2910 2911 2912 2913 2914 2915 2916
	/*
	 * SIT information should be updated before segment allocation,
	 * since SSR needs latest valid block information.
	 */
	update_sit_entry(sbi, *new_blkaddr, 1);
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
		update_sit_entry(sbi, old_blkaddr, -1);

2917 2918
	if (!__has_curseg_space(sbi, type))
		sit_i->s_ops->allocate_segment(sbi, type, false);
2919

J
Jaegeuk Kim 已提交
2920
	/*
2921 2922 2923
	 * segment dirty status should be updated after segment allocation,
	 * so we just need to update status only one time after previous
	 * segment being closed.
J
Jaegeuk Kim 已提交
2924
	 */
2925 2926
	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
	locate_dirty_segment(sbi, GET_SEGNO(sbi, *new_blkaddr));
2927

2928
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
2929

C
Chao Yu 已提交
2930
	if (page && IS_NODESEG(type)) {
J
Jaegeuk Kim 已提交
2931 2932
		fill_node_footer_blkaddr(page, NEXT_FREE_BLKADDR(sbi, curseg));

C
Chao Yu 已提交
2933 2934 2935
		f2fs_inode_chksum_set(sbi, page);
	}

2936 2937 2938 2939 2940
	if (add_list) {
		struct f2fs_bio_info *io;

		INIT_LIST_HEAD(&fio->list);
		fio->in_list = true;
2941
		fio->retry = false;
2942 2943 2944 2945 2946 2947
		io = sbi->write_io[fio->type] + fio->temp;
		spin_lock(&io->io_lock);
		list_add_tail(&fio->list, &io->io_list);
		spin_unlock(&io->io_lock);
	}

2948
	mutex_unlock(&curseg->curseg_mutex);
C
Chao Yu 已提交
2949 2950

	up_read(&SM_I(sbi)->curseg_lock);
2951 2952
}

C
Chao Yu 已提交
2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963
static void update_device_state(struct f2fs_io_info *fio)
{
	struct f2fs_sb_info *sbi = fio->sbi;
	unsigned int devidx;

	if (!sbi->s_ndevs)
		return;

	devidx = f2fs_target_device_index(sbi, fio->new_blkaddr);

	/* update device state for fsync */
C
Chao Yu 已提交
2964
	f2fs_set_dirty_device(sbi, fio->ino, devidx, FLUSH_INO);
2965 2966 2967 2968 2969 2970 2971

	/* update device state for checkpoint */
	if (!f2fs_test_bit(devidx, (char *)&sbi->dirty_device)) {
		spin_lock(&sbi->dev_lock);
		f2fs_set_bit(devidx, (char *)&sbi->dirty_device);
		spin_unlock(&sbi->dev_lock);
	}
C
Chao Yu 已提交
2972 2973
}

2974
static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
2975
{
2976
	int type = __get_segment_type(fio);
2977
	bool keep_order = (test_opt(fio->sbi, LFS) && type == CURSEG_COLD_DATA);
2978

2979 2980
	if (keep_order)
		down_read(&fio->sbi->io_order_lock);
2981
reallocate:
C
Chao Yu 已提交
2982
	f2fs_allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
2983
			&fio->new_blkaddr, sum, type, fio, true);
2984 2985 2986
	if (GET_SEGNO(fio->sbi, fio->old_blkaddr) != NULL_SEGNO)
		invalidate_mapping_pages(META_MAPPING(fio->sbi),
					fio->old_blkaddr, fio->old_blkaddr);
2987

J
Jaegeuk Kim 已提交
2988
	/* writeout dirty page into bdev */
2989 2990
	f2fs_submit_page_write(fio);
	if (fio->retry) {
2991 2992 2993
		fio->old_blkaddr = fio->new_blkaddr;
		goto reallocate;
	}
2994 2995 2996

	update_device_state(fio);

2997 2998
	if (keep_order)
		up_read(&fio->sbi->io_order_lock);
J
Jaegeuk Kim 已提交
2999 3000
}

C
Chao Yu 已提交
3001
void f2fs_do_write_meta_page(struct f2fs_sb_info *sbi, struct page *page,
C
Chao Yu 已提交
3002
					enum iostat_type io_type)
J
Jaegeuk Kim 已提交
3003
{
J
Jaegeuk Kim 已提交
3004
	struct f2fs_io_info fio = {
3005
		.sbi = sbi,
J
Jaegeuk Kim 已提交
3006
		.type = META,
3007
		.temp = HOT,
M
Mike Christie 已提交
3008
		.op = REQ_OP_WRITE,
3009
		.op_flags = REQ_SYNC | REQ_META | REQ_PRIO,
3010 3011
		.old_blkaddr = page->index,
		.new_blkaddr = page->index,
3012
		.page = page,
3013
		.encrypted_page = NULL,
3014
		.in_list = false,
J
Jaegeuk Kim 已提交
3015 3016
	};

3017
	if (unlikely(page->index >= MAIN_BLKADDR(sbi)))
M
Mike Christie 已提交
3018
		fio.op_flags &= ~REQ_META;
3019

J
Jaegeuk Kim 已提交
3020
	set_page_writeback(page);
J
Jaegeuk Kim 已提交
3021
	ClearPageError(page);
3022
	f2fs_submit_page_write(&fio);
C
Chao Yu 已提交
3023 3024

	f2fs_update_iostat(sbi, io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3025 3026
}

C
Chao Yu 已提交
3027
void f2fs_do_write_node_page(unsigned int nid, struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3028 3029
{
	struct f2fs_summary sum;
3030

J
Jaegeuk Kim 已提交
3031
	set_summary(&sum, nid, 0, 0);
3032
	do_write_page(&sum, fio);
C
Chao Yu 已提交
3033 3034

	f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3035 3036
}

C
Chao Yu 已提交
3037 3038
void f2fs_outplace_write_data(struct dnode_of_data *dn,
					struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3039
{
3040
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
3041 3042
	struct f2fs_summary sum;

3043
	f2fs_bug_on(sbi, dn->data_blkaddr == NULL_ADDR);
3044
	set_summary(&sum, dn->nid, dn->ofs_in_node, fio->version);
3045
	do_write_page(&sum, fio);
3046
	f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
C
Chao Yu 已提交
3047 3048

	f2fs_update_iostat(sbi, fio->io_type, F2FS_BLKSIZE);
J
Jaegeuk Kim 已提交
3049 3050
}

C
Chao Yu 已提交
3051
int f2fs_inplace_write_data(struct f2fs_io_info *fio)
J
Jaegeuk Kim 已提交
3052
{
C
Chao Yu 已提交
3053
	int err;
3054
	struct f2fs_sb_info *sbi = fio->sbi;
C
Chao Yu 已提交
3055

3056
	fio->new_blkaddr = fio->old_blkaddr;
3057 3058
	/* i/o temperature is needed for passing down write hints */
	__get_segment_type(fio);
3059 3060 3061 3062

	f2fs_bug_on(sbi, !IS_DATASEG(get_seg_entry(sbi,
			GET_SEGNO(sbi, fio->new_blkaddr))->type));

3063
	stat_inc_inplace_blocks(fio->sbi);
C
Chao Yu 已提交
3064 3065

	err = f2fs_submit_page_bio(fio);
C
Chao Yu 已提交
3066 3067
	if (!err)
		update_device_state(fio);
C
Chao Yu 已提交
3068 3069 3070 3071

	f2fs_update_iostat(fio->sbi, fio->io_type, F2FS_BLKSIZE);

	return err;
J
Jaegeuk Kim 已提交
3072 3073
}

C
Chao Yu 已提交
3074 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085
static inline int __f2fs_get_curseg(struct f2fs_sb_info *sbi,
						unsigned int segno)
{
	int i;

	for (i = CURSEG_HOT_DATA; i < NO_CHECK_TYPE; i++) {
		if (CURSEG_I(sbi, i)->segno == segno)
			break;
	}
	return i;
}

C
Chao Yu 已提交
3086
void f2fs_do_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
3087
				block_t old_blkaddr, block_t new_blkaddr,
3088
				bool recover_curseg, bool recover_newaddr)
J
Jaegeuk Kim 已提交
3089 3090 3091 3092 3093 3094
{
	struct sit_info *sit_i = SIT_I(sbi);
	struct curseg_info *curseg;
	unsigned int segno, old_cursegno;
	struct seg_entry *se;
	int type;
3095
	unsigned short old_blkoff;
J
Jaegeuk Kim 已提交
3096 3097 3098 3099 3100

	segno = GET_SEGNO(sbi, new_blkaddr);
	se = get_seg_entry(sbi, segno);
	type = se->type;

C
Chao Yu 已提交
3101 3102
	down_write(&SM_I(sbi)->curseg_lock);

3103 3104 3105 3106 3107 3108 3109 3110 3111
	if (!recover_curseg) {
		/* for recovery flow */
		if (se->valid_blocks == 0 && !IS_CURSEG(sbi, segno)) {
			if (old_blkaddr == NULL_ADDR)
				type = CURSEG_COLD_DATA;
			else
				type = CURSEG_WARM_DATA;
		}
	} else {
C
Chao Yu 已提交
3112 3113 3114 3115 3116
		if (IS_CURSEG(sbi, segno)) {
			/* se->type is volatile as SSR allocation */
			type = __f2fs_get_curseg(sbi, segno);
			f2fs_bug_on(sbi, type == NO_CHECK_TYPE);
		} else {
J
Jaegeuk Kim 已提交
3117
			type = CURSEG_WARM_DATA;
C
Chao Yu 已提交
3118
		}
J
Jaegeuk Kim 已提交
3119
	}
3120

3121
	f2fs_bug_on(sbi, !IS_DATASEG(type));
J
Jaegeuk Kim 已提交
3122 3123 3124
	curseg = CURSEG_I(sbi, type);

	mutex_lock(&curseg->curseg_mutex);
3125
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3126 3127

	old_cursegno = curseg->segno;
3128
	old_blkoff = curseg->next_blkoff;
J
Jaegeuk Kim 已提交
3129 3130 3131 3132

	/* change the current segment */
	if (segno != curseg->segno) {
		curseg->next_segno = segno;
3133
		change_curseg(sbi, type);
J
Jaegeuk Kim 已提交
3134 3135
	}

J
Jaegeuk Kim 已提交
3136
	curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
3137
	__add_sum_entry(sbi, type, sum);
J
Jaegeuk Kim 已提交
3138

3139
	if (!recover_curseg || recover_newaddr)
3140
		update_sit_entry(sbi, new_blkaddr, 1);
3141 3142 3143
	if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO) {
		invalidate_mapping_pages(META_MAPPING(sbi),
					old_blkaddr, old_blkaddr);
3144
		update_sit_entry(sbi, old_blkaddr, -1);
3145
	}
3146 3147 3148 3149

	locate_dirty_segment(sbi, GET_SEGNO(sbi, old_blkaddr));
	locate_dirty_segment(sbi, GET_SEGNO(sbi, new_blkaddr));

J
Jaegeuk Kim 已提交
3150 3151
	locate_dirty_segment(sbi, old_cursegno);

3152 3153 3154
	if (recover_curseg) {
		if (old_cursegno != curseg->segno) {
			curseg->next_segno = old_cursegno;
3155
			change_curseg(sbi, type);
3156 3157 3158 3159
		}
		curseg->next_blkoff = old_blkoff;
	}

3160
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3161
	mutex_unlock(&curseg->curseg_mutex);
C
Chao Yu 已提交
3162
	up_write(&SM_I(sbi)->curseg_lock);
J
Jaegeuk Kim 已提交
3163 3164
}

3165 3166
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
				block_t old_addr, block_t new_addr,
3167 3168
				unsigned char version, bool recover_curseg,
				bool recover_newaddr)
3169 3170 3171 3172 3173
{
	struct f2fs_summary sum;

	set_summary(&sum, dn->nid, dn->ofs_in_node, version);

C
Chao Yu 已提交
3174
	f2fs_do_replace_block(sbi, &sum, old_addr, new_addr,
3175
					recover_curseg, recover_newaddr);
3176

3177
	f2fs_update_data_blkaddr(dn, new_addr);
3178 3179
}

3180
void f2fs_wait_on_page_writeback(struct page *page,
3181
				enum page_type type, bool ordered)
3182 3183
{
	if (PageWriteback(page)) {
3184 3185
		struct f2fs_sb_info *sbi = F2FS_P_SB(page);

3186 3187
		f2fs_submit_merged_write_cond(sbi, page->mapping->host,
						0, page->index, type);
3188 3189 3190 3191
		if (ordered)
			wait_on_page_writeback(page);
		else
			wait_for_stable_page(page);
3192 3193 3194
	}
}

3195
void f2fs_wait_on_block_writeback(struct inode *inode, block_t blkaddr)
3196
{
3197
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
3198 3199
	struct page *cpage;

3200 3201 3202
	if (!f2fs_post_read_required(inode))
		return;

3203
	if (!is_valid_data_blkaddr(sbi, blkaddr))
3204 3205 3206 3207
		return;

	cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
	if (cpage) {
3208
		f2fs_wait_on_page_writeback(cpage, DATA, true);
3209 3210 3211 3212
		f2fs_put_page(cpage, 1);
	}
}

3213
static int read_compacted_summaries(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3214 3215 3216 3217 3218 3219 3220 3221 3222 3223
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct curseg_info *seg_i;
	unsigned char *kaddr;
	struct page *page;
	block_t start;
	int i, j, offset;

	start = start_sum_block(sbi);

C
Chao Yu 已提交
3224
	page = f2fs_get_meta_page(sbi, start++);
3225 3226
	if (IS_ERR(page))
		return PTR_ERR(page);
J
Jaegeuk Kim 已提交
3227 3228 3229 3230
	kaddr = (unsigned char *)page_address(page);

	/* Step 1: restore nat cache */
	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3231
	memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3232 3233 3234

	/* Step 2: restore sit cache */
	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3235
	memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3236 3237 3238 3239 3240 3241 3242 3243 3244 3245 3246 3247 3248 3249 3250 3251 3252 3253 3254 3255 3256 3257 3258
	offset = 2 * SUM_JOURNAL_SIZE;

	/* Step 3: restore summary entries */
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		unsigned short blk_off;
		unsigned int segno;

		seg_i = CURSEG_I(sbi, i);
		segno = le32_to_cpu(ckpt->cur_data_segno[i]);
		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[i]);
		seg_i->next_segno = segno;
		reset_curseg(sbi, i, 0);
		seg_i->alloc_type = ckpt->alloc_type[i];
		seg_i->next_blkoff = blk_off;

		if (seg_i->alloc_type == SSR)
			blk_off = sbi->blocks_per_seg;

		for (j = 0; j < blk_off; j++) {
			struct f2fs_summary *s;
			s = (struct f2fs_summary *)(kaddr + offset);
			seg_i->sum_blk->entries[j] = *s;
			offset += SUMMARY_SIZE;
3259
			if (offset + SUMMARY_SIZE <= PAGE_SIZE -
J
Jaegeuk Kim 已提交
3260 3261 3262 3263 3264 3265
						SUM_FOOTER_SIZE)
				continue;

			f2fs_put_page(page, 1);
			page = NULL;

C
Chao Yu 已提交
3266
			page = f2fs_get_meta_page(sbi, start++);
3267 3268
			if (IS_ERR(page))
				return PTR_ERR(page);
J
Jaegeuk Kim 已提交
3269 3270 3271 3272 3273
			kaddr = (unsigned char *)page_address(page);
			offset = 0;
		}
	}
	f2fs_put_page(page, 1);
3274
	return 0;
J
Jaegeuk Kim 已提交
3275 3276 3277 3278 3279 3280 3281 3282 3283 3284 3285
}

static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
{
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
	struct f2fs_summary_block *sum;
	struct curseg_info *curseg;
	struct page *new;
	unsigned short blk_off;
	unsigned int segno = 0;
	block_t blk_addr = 0;
3286
	int err = 0;
J
Jaegeuk Kim 已提交
3287 3288 3289 3290 3291 3292

	/* get segment number and block addr */
	if (IS_DATASEG(type)) {
		segno = le32_to_cpu(ckpt->cur_data_segno[type]);
		blk_off = le16_to_cpu(ckpt->cur_data_blkoff[type -
							CURSEG_HOT_DATA]);
3293
		if (__exist_node_summaries(sbi))
J
Jaegeuk Kim 已提交
3294 3295 3296 3297 3298 3299 3300 3301
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_TYPE, type);
		else
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_DATA_TYPE, type);
	} else {
		segno = le32_to_cpu(ckpt->cur_node_segno[type -
							CURSEG_HOT_NODE]);
		blk_off = le16_to_cpu(ckpt->cur_node_blkoff[type -
							CURSEG_HOT_NODE]);
3302
		if (__exist_node_summaries(sbi))
J
Jaegeuk Kim 已提交
3303 3304 3305 3306 3307 3308
			blk_addr = sum_blk_addr(sbi, NR_CURSEG_NODE_TYPE,
							type - CURSEG_HOT_NODE);
		else
			blk_addr = GET_SUM_BLOCK(sbi, segno);
	}

C
Chao Yu 已提交
3309
	new = f2fs_get_meta_page(sbi, blk_addr);
3310 3311
	if (IS_ERR(new))
		return PTR_ERR(new);
J
Jaegeuk Kim 已提交
3312 3313 3314
	sum = (struct f2fs_summary_block *)page_address(new);

	if (IS_NODESEG(type)) {
3315
		if (__exist_node_summaries(sbi)) {
J
Jaegeuk Kim 已提交
3316 3317 3318 3319 3320 3321 3322
			struct f2fs_summary *ns = &sum->entries[0];
			int i;
			for (i = 0; i < sbi->blocks_per_seg; i++, ns++) {
				ns->version = 0;
				ns->ofs_in_node = 0;
			}
		} else {
3323 3324 3325
			err = f2fs_restore_node_summary(sbi, segno, sum);
			if (err)
				goto out;
J
Jaegeuk Kim 已提交
3326 3327 3328 3329 3330 3331
		}
	}

	/* set uncompleted segment to curseg */
	curseg = CURSEG_I(sbi, type);
	mutex_lock(&curseg->curseg_mutex);
3332 3333 3334 3335 3336 3337 3338 3339

	/* update journal info */
	down_write(&curseg->journal_rwsem);
	memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
	up_write(&curseg->journal_rwsem);

	memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
	memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
J
Jaegeuk Kim 已提交
3340 3341 3342 3343 3344
	curseg->next_segno = segno;
	reset_curseg(sbi, type, 0);
	curseg->alloc_type = ckpt->alloc_type[type];
	curseg->next_blkoff = blk_off;
	mutex_unlock(&curseg->curseg_mutex);
3345
out:
J
Jaegeuk Kim 已提交
3346
	f2fs_put_page(new, 1);
3347
	return err;
J
Jaegeuk Kim 已提交
3348 3349 3350 3351
}

static int restore_curseg_summaries(struct f2fs_sb_info *sbi)
{
3352 3353
	struct f2fs_journal *sit_j = CURSEG_I(sbi, CURSEG_COLD_DATA)->journal;
	struct f2fs_journal *nat_j = CURSEG_I(sbi, CURSEG_HOT_DATA)->journal;
J
Jaegeuk Kim 已提交
3354
	int type = CURSEG_HOT_DATA;
3355
	int err;
J
Jaegeuk Kim 已提交
3356

3357
	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG)) {
C
Chao Yu 已提交
3358
		int npages = f2fs_npages_for_summary_flush(sbi, true);
3359 3360

		if (npages >= 2)
C
Chao Yu 已提交
3361
			f2fs_ra_meta_pages(sbi, start_sum_block(sbi), npages,
3362
							META_CP, true);
3363

J
Jaegeuk Kim 已提交
3364
		/* restore for compacted data summary */
3365 3366 3367
		err = read_compacted_summaries(sbi);
		if (err)
			return err;
J
Jaegeuk Kim 已提交
3368 3369 3370
		type = CURSEG_HOT_NODE;
	}

3371
	if (__exist_node_summaries(sbi))
C
Chao Yu 已提交
3372
		f2fs_ra_meta_pages(sbi, sum_blk_addr(sbi, NR_CURSEG_TYPE, type),
3373
					NR_CURSEG_TYPE - type, META_CP, true);
3374

3375 3376 3377 3378 3379 3380
	for (; type <= CURSEG_COLD_NODE; type++) {
		err = read_normal_summaries(sbi, type);
		if (err)
			return err;
	}

3381 3382 3383 3384 3385
	/* sanity check for summary blocks */
	if (nats_in_cursum(nat_j) > NAT_JOURNAL_ENTRIES ||
			sits_in_cursum(sit_j) > SIT_JOURNAL_ENTRIES)
		return -EINVAL;

J
Jaegeuk Kim 已提交
3386 3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397
	return 0;
}

static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
{
	struct page *page;
	unsigned char *kaddr;
	struct f2fs_summary *summary;
	struct curseg_info *seg_i;
	int written_size = 0;
	int i, j;

C
Chao Yu 已提交
3398
	page = f2fs_grab_meta_page(sbi, blkaddr++);
J
Jaegeuk Kim 已提交
3399
	kaddr = (unsigned char *)page_address(page);
3400
	memset(kaddr, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
3401 3402 3403

	/* Step 1: write nat cache */
	seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
3404
	memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3405 3406 3407 3408
	written_size += SUM_JOURNAL_SIZE;

	/* Step 2: write sit cache */
	seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
3409
	memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
J
Jaegeuk Kim 已提交
3410 3411 3412 3413 3414 3415 3416 3417 3418 3419 3420 3421 3422
	written_size += SUM_JOURNAL_SIZE;

	/* Step 3: write summary entries */
	for (i = CURSEG_HOT_DATA; i <= CURSEG_COLD_DATA; i++) {
		unsigned short blkoff;
		seg_i = CURSEG_I(sbi, i);
		if (sbi->ckpt->alloc_type[i] == SSR)
			blkoff = sbi->blocks_per_seg;
		else
			blkoff = curseg_blkoff(sbi, i);

		for (j = 0; j < blkoff; j++) {
			if (!page) {
C
Chao Yu 已提交
3423
				page = f2fs_grab_meta_page(sbi, blkaddr++);
J
Jaegeuk Kim 已提交
3424
				kaddr = (unsigned char *)page_address(page);
3425
				memset(kaddr, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
3426 3427 3428 3429 3430 3431
				written_size = 0;
			}
			summary = (struct f2fs_summary *)(kaddr + written_size);
			*summary = seg_i->sum_blk->entries[j];
			written_size += SUMMARY_SIZE;

3432
			if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
J
Jaegeuk Kim 已提交
3433 3434 3435
							SUM_FOOTER_SIZE)
				continue;

3436
			set_page_dirty(page);
J
Jaegeuk Kim 已提交
3437 3438 3439 3440
			f2fs_put_page(page, 1);
			page = NULL;
		}
	}
3441 3442
	if (page) {
		set_page_dirty(page);
J
Jaegeuk Kim 已提交
3443
		f2fs_put_page(page, 1);
3444
	}
J
Jaegeuk Kim 已提交
3445 3446 3447 3448 3449 3450 3451 3452 3453 3454 3455
}

static void write_normal_summaries(struct f2fs_sb_info *sbi,
					block_t blkaddr, int type)
{
	int i, end;
	if (IS_DATASEG(type))
		end = type + NR_CURSEG_DATA_TYPE;
	else
		end = type + NR_CURSEG_NODE_TYPE;

3456 3457
	for (i = type; i < end; i++)
		write_current_sum_page(sbi, i, blkaddr + (i - type));
J
Jaegeuk Kim 已提交
3458 3459
}

C
Chao Yu 已提交
3460
void f2fs_write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
J
Jaegeuk Kim 已提交
3461
{
3462
	if (is_set_ckpt_flags(sbi, CP_COMPACT_SUM_FLAG))
J
Jaegeuk Kim 已提交
3463 3464 3465 3466 3467
		write_compacted_summaries(sbi, start_blk);
	else
		write_normal_summaries(sbi, start_blk, CURSEG_HOT_DATA);
}

C
Chao Yu 已提交
3468
void f2fs_write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
J
Jaegeuk Kim 已提交
3469
{
3470
	write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
J
Jaegeuk Kim 已提交
3471 3472
}

C
Chao Yu 已提交
3473
int f2fs_lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
J
Jaegeuk Kim 已提交
3474 3475 3476 3477 3478
					unsigned int val, int alloc)
{
	int i;

	if (type == NAT_JOURNAL) {
3479 3480
		for (i = 0; i < nats_in_cursum(journal); i++) {
			if (le32_to_cpu(nid_in_journal(journal, i)) == val)
J
Jaegeuk Kim 已提交
3481 3482
				return i;
		}
3483 3484
		if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
			return update_nats_in_cursum(journal, 1);
J
Jaegeuk Kim 已提交
3485
	} else if (type == SIT_JOURNAL) {
3486 3487
		for (i = 0; i < sits_in_cursum(journal); i++)
			if (le32_to_cpu(segno_in_journal(journal, i)) == val)
J
Jaegeuk Kim 已提交
3488
				return i;
3489 3490
		if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
			return update_sits_in_cursum(journal, 1);
J
Jaegeuk Kim 已提交
3491 3492 3493 3494 3495 3496 3497
	}
	return -1;
}

static struct page *get_current_sit_page(struct f2fs_sb_info *sbi,
					unsigned int segno)
{
3498
	return f2fs_get_meta_page_nofail(sbi, current_sit_addr(sbi, segno));
J
Jaegeuk Kim 已提交
3499 3500 3501 3502 3503 3504
}

static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
					unsigned int start)
{
	struct sit_info *sit_i = SIT_I(sbi);
3505
	struct page *page;
J
Jaegeuk Kim 已提交
3506 3507 3508 3509 3510
	pgoff_t src_off, dst_off;

	src_off = current_sit_addr(sbi, start);
	dst_off = next_sit_addr(sbi, src_off);

C
Chao Yu 已提交
3511
	page = f2fs_grab_meta_page(sbi, dst_off);
3512
	seg_info_to_sit_page(sbi, page, start);
J
Jaegeuk Kim 已提交
3513

3514
	set_page_dirty(page);
J
Jaegeuk Kim 已提交
3515 3516
	set_to_next_sit(sit_i, start);

3517
	return page;
J
Jaegeuk Kim 已提交
3518 3519
}

3520 3521 3522
static struct sit_entry_set *grab_sit_entry_set(void)
{
	struct sit_entry_set *ses =
3523
			f2fs_kmem_cache_alloc(sit_entry_set_slab, GFP_NOFS);
3524 3525 3526 3527 3528 3529 3530 3531 3532 3533 3534 3535 3536 3537 3538 3539 3540 3541 3542 3543 3544 3545 3546 3547 3548 3549 3550 3551 3552 3553 3554 3555 3556 3557 3558 3559 3560 3561 3562 3563 3564 3565 3566 3567 3568 3569 3570 3571 3572 3573 3574 3575 3576 3577

	ses->entry_cnt = 0;
	INIT_LIST_HEAD(&ses->set_list);
	return ses;
}

static void release_sit_entry_set(struct sit_entry_set *ses)
{
	list_del(&ses->set_list);
	kmem_cache_free(sit_entry_set_slab, ses);
}

static void adjust_sit_entry_set(struct sit_entry_set *ses,
						struct list_head *head)
{
	struct sit_entry_set *next = ses;

	if (list_is_last(&ses->set_list, head))
		return;

	list_for_each_entry_continue(next, head, set_list)
		if (ses->entry_cnt <= next->entry_cnt)
			break;

	list_move_tail(&ses->set_list, &next->set_list);
}

static void add_sit_entry(unsigned int segno, struct list_head *head)
{
	struct sit_entry_set *ses;
	unsigned int start_segno = START_SEGNO(segno);

	list_for_each_entry(ses, head, set_list) {
		if (ses->start_segno == start_segno) {
			ses->entry_cnt++;
			adjust_sit_entry_set(ses, head);
			return;
		}
	}

	ses = grab_sit_entry_set();

	ses->start_segno = start_segno;
	ses->entry_cnt++;
	list_add(&ses->set_list, head);
}

static void add_sits_in_set(struct f2fs_sb_info *sbi)
{
	struct f2fs_sm_info *sm_info = SM_I(sbi);
	struct list_head *set_list = &sm_info->sit_entry_set;
	unsigned long *bitmap = SIT_I(sbi)->dirty_sentries_bitmap;
	unsigned int segno;

3578
	for_each_set_bit(segno, bitmap, MAIN_SEGS(sbi))
3579 3580 3581 3582
		add_sit_entry(segno, set_list);
}

static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3583 3584
{
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3585
	struct f2fs_journal *journal = curseg->journal;
J
Jaegeuk Kim 已提交
3586 3587
	int i;

3588
	down_write(&curseg->journal_rwsem);
3589
	for (i = 0; i < sits_in_cursum(journal); i++) {
3590 3591 3592
		unsigned int segno;
		bool dirtied;

3593
		segno = le32_to_cpu(segno_in_journal(journal, i));
3594 3595 3596 3597
		dirtied = __mark_sit_entry_dirty(sbi, segno);

		if (!dirtied)
			add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
J
Jaegeuk Kim 已提交
3598
	}
3599
	update_sits_in_cursum(journal, -i);
3600
	up_write(&curseg->journal_rwsem);
J
Jaegeuk Kim 已提交
3601 3602
}

J
Jaegeuk Kim 已提交
3603
/*
J
Jaegeuk Kim 已提交
3604 3605 3606
 * CP calls this function, which flushes SIT entries including sit_journal,
 * and moves prefree segs to free segs.
 */
C
Chao Yu 已提交
3607
void f2fs_flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
J
Jaegeuk Kim 已提交
3608 3609 3610 3611
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3612
	struct f2fs_journal *journal = curseg->journal;
3613 3614 3615
	struct sit_entry_set *ses, *tmp;
	struct list_head *head = &SM_I(sbi)->sit_entry_set;
	bool to_journal = true;
3616
	struct seg_entry *se;
J
Jaegeuk Kim 已提交
3617

3618
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3619

3620 3621 3622
	if (!sit_i->dirty_sentries)
		goto out;

J
Jaegeuk Kim 已提交
3623
	/*
3624 3625
	 * add and account sit entries of dirty bitmap in sit entry
	 * set temporarily
J
Jaegeuk Kim 已提交
3626
	 */
3627
	add_sits_in_set(sbi);
J
Jaegeuk Kim 已提交
3628

3629 3630 3631 3632 3633
	/*
	 * if there are no enough space in journal to store dirty sit
	 * entries, remove all entries from journal and add and account
	 * them in sit entry set.
	 */
3634
	if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
3635
		remove_sits_in_journal(sbi);
3636

3637 3638 3639 3640 3641 3642
	/*
	 * there are two steps to flush sit entries:
	 * #1, flush sit entries to journal in current cold data summary block.
	 * #2, flush sit entries to sit page.
	 */
	list_for_each_entry_safe(ses, tmp, head, set_list) {
J
Jaegeuk Kim 已提交
3643
		struct page *page = NULL;
3644 3645 3646
		struct f2fs_sit_block *raw_sit = NULL;
		unsigned int start_segno = ses->start_segno;
		unsigned int end = min(start_segno + SIT_ENTRY_PER_BLOCK,
3647
						(unsigned long)MAIN_SEGS(sbi));
3648 3649 3650
		unsigned int segno = start_segno;

		if (to_journal &&
3651
			!__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
3652 3653
			to_journal = false;

3654 3655 3656
		if (to_journal) {
			down_write(&curseg->journal_rwsem);
		} else {
3657 3658
			page = get_next_sit_page(sbi, start_segno);
			raw_sit = page_address(page);
J
Jaegeuk Kim 已提交
3659 3660
		}

3661 3662 3663
		/* flush dirty sit entries in region of current sit set */
		for_each_set_bit_from(segno, bitmap, end) {
			int offset, sit_offset;
3664 3665

			se = get_seg_entry(sbi, segno);
3666 3667 3668 3669 3670
#ifdef CONFIG_F2FS_CHECK_FS
			if (memcmp(se->cur_valid_map, se->cur_valid_map_mir,
						SIT_VBLOCK_MAP_SIZE))
				f2fs_bug_on(sbi, 1);
#endif
3671 3672

			/* add discard candidates */
3673
			if (!(cpc->reason & CP_DISCARD)) {
3674
				cpc->trim_start = segno;
3675
				add_discard_addrs(sbi, cpc, false);
3676
			}
3677 3678

			if (to_journal) {
C
Chao Yu 已提交
3679
				offset = f2fs_lookup_journal_in_cursum(journal,
3680 3681
							SIT_JOURNAL, segno, 1);
				f2fs_bug_on(sbi, offset < 0);
3682
				segno_in_journal(journal, offset) =
3683 3684
							cpu_to_le32(segno);
				seg_info_to_raw_sit(se,
3685
					&sit_in_journal(journal, offset));
3686 3687
				check_block_count(sbi, segno,
					&sit_in_journal(journal, offset));
3688 3689 3690 3691
			} else {
				sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
				seg_info_to_raw_sit(se,
						&raw_sit->entries[sit_offset]);
3692 3693
				check_block_count(sbi, segno,
						&raw_sit->entries[sit_offset]);
3694
			}
J
Jaegeuk Kim 已提交
3695

3696 3697 3698
			__clear_bit(segno, bitmap);
			sit_i->dirty_sentries--;
			ses->entry_cnt--;
J
Jaegeuk Kim 已提交
3699 3700
		}

3701 3702 3703
		if (to_journal)
			up_write(&curseg->journal_rwsem);
		else
3704 3705 3706 3707
			f2fs_put_page(page, 1);

		f2fs_bug_on(sbi, ses->entry_cnt);
		release_sit_entry_set(ses);
J
Jaegeuk Kim 已提交
3708
	}
3709 3710 3711 3712

	f2fs_bug_on(sbi, !list_empty(head));
	f2fs_bug_on(sbi, sit_i->dirty_sentries);
out:
3713
	if (cpc->reason & CP_DISCARD) {
3714 3715
		__u64 trim_start = cpc->trim_start;

3716
		for (; cpc->trim_start <= cpc->trim_end; cpc->trim_start++)
3717
			add_discard_addrs(sbi, cpc, false);
3718 3719

		cpc->trim_start = trim_start;
3720
	}
3721
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3722 3723 3724 3725 3726 3727 3728 3729 3730

	set_prefree_as_free_segments(sbi);
}

static int build_sit_info(struct f2fs_sb_info *sbi)
{
	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
	struct sit_info *sit_i;
	unsigned int sit_segs, start;
3731
	char *src_bitmap;
J
Jaegeuk Kim 已提交
3732 3733 3734
	unsigned int bitmap_size;

	/* allocate memory for SIT information */
C
Chao Yu 已提交
3735
	sit_i = f2fs_kzalloc(sbi, sizeof(struct sit_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
3736 3737 3738 3739 3740
	if (!sit_i)
		return -ENOMEM;

	SM_I(sbi)->sit_info = sit_i;

3741 3742 3743 3744
	sit_i->sentries =
		f2fs_kvzalloc(sbi, array_size(sizeof(struct seg_entry),
					      MAIN_SEGS(sbi)),
			      GFP_KERNEL);
J
Jaegeuk Kim 已提交
3745 3746 3747
	if (!sit_i->sentries)
		return -ENOMEM;

3748
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
C
Chao Yu 已提交
3749 3750
	sit_i->dirty_sentries_bitmap = f2fs_kvzalloc(sbi, bitmap_size,
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
3751 3752 3753
	if (!sit_i->dirty_sentries_bitmap)
		return -ENOMEM;

3754
	for (start = 0; start < MAIN_SEGS(sbi); start++) {
J
Jaegeuk Kim 已提交
3755
		sit_i->sentries[start].cur_valid_map
C
Chao Yu 已提交
3756
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3757
		sit_i->sentries[start].ckpt_valid_map
C
Chao Yu 已提交
3758
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
3759
		if (!sit_i->sentries[start].cur_valid_map ||
3760
				!sit_i->sentries[start].ckpt_valid_map)
J
Jaegeuk Kim 已提交
3761
			return -ENOMEM;
3762

C
Chao Yu 已提交
3763 3764
#ifdef CONFIG_F2FS_CHECK_FS
		sit_i->sentries[start].cur_valid_map_mir
C
Chao Yu 已提交
3765
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
C
Chao Yu 已提交
3766 3767 3768 3769
		if (!sit_i->sentries[start].cur_valid_map_mir)
			return -ENOMEM;
#endif

3770 3771 3772 3773 3774
		sit_i->sentries[start].discard_map
			= f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE,
							GFP_KERNEL);
		if (!sit_i->sentries[start].discard_map)
			return -ENOMEM;
J
Jaegeuk Kim 已提交
3775 3776
	}

C
Chao Yu 已提交
3777
	sit_i->tmp_map = f2fs_kzalloc(sbi, SIT_VBLOCK_MAP_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3778 3779 3780
	if (!sit_i->tmp_map)
		return -ENOMEM;

J
Jaegeuk Kim 已提交
3781
	if (sbi->segs_per_sec > 1) {
3782 3783 3784 3785
		sit_i->sec_entries =
			f2fs_kvzalloc(sbi, array_size(sizeof(struct sec_entry),
						      MAIN_SECS(sbi)),
				      GFP_KERNEL);
J
Jaegeuk Kim 已提交
3786 3787 3788 3789 3790 3791 3792 3793 3794 3795 3796
		if (!sit_i->sec_entries)
			return -ENOMEM;
	}

	/* get information related with SIT */
	sit_segs = le32_to_cpu(raw_super->segment_count_sit) >> 1;

	/* setup SIT bitmap from ckeckpoint pack */
	bitmap_size = __bitmap_size(sbi, SIT_BITMAP);
	src_bitmap = __bitmap_ptr(sbi, SIT_BITMAP);

3797 3798
	sit_i->sit_bitmap = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
	if (!sit_i->sit_bitmap)
J
Jaegeuk Kim 已提交
3799 3800
		return -ENOMEM;

3801 3802 3803 3804 3805 3806
#ifdef CONFIG_F2FS_CHECK_FS
	sit_i->sit_bitmap_mir = kmemdup(src_bitmap, bitmap_size, GFP_KERNEL);
	if (!sit_i->sit_bitmap_mir)
		return -ENOMEM;
#endif

J
Jaegeuk Kim 已提交
3807 3808 3809 3810 3811
	/* init SIT information */
	sit_i->s_ops = &default_salloc_ops;

	sit_i->sit_base_addr = le32_to_cpu(raw_super->sit_blkaddr);
	sit_i->sit_blocks = sit_segs << sbi->log_blocks_per_seg;
3812
	sit_i->written_valid_blocks = 0;
J
Jaegeuk Kim 已提交
3813 3814 3815 3816
	sit_i->bitmap_size = bitmap_size;
	sit_i->dirty_sentries = 0;
	sit_i->sents_per_block = SIT_ENTRY_PER_BLOCK;
	sit_i->elapsed_time = le64_to_cpu(sbi->ckpt->elapsed_time);
3817
	sit_i->mounted_time = ktime_get_real_seconds();
3818
	init_rwsem(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
3819 3820 3821 3822 3823 3824 3825 3826 3827
	return 0;
}

static int build_free_segmap(struct f2fs_sb_info *sbi)
{
	struct free_segmap_info *free_i;
	unsigned int bitmap_size, sec_bitmap_size;

	/* allocate memory for free segmap information */
C
Chao Yu 已提交
3828
	free_i = f2fs_kzalloc(sbi, sizeof(struct free_segmap_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
3829 3830 3831 3832 3833
	if (!free_i)
		return -ENOMEM;

	SM_I(sbi)->free_info = free_i;

3834
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
C
Chao Yu 已提交
3835
	free_i->free_segmap = f2fs_kvmalloc(sbi, bitmap_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3836 3837 3838
	if (!free_i->free_segmap)
		return -ENOMEM;

3839
	sec_bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
C
Chao Yu 已提交
3840
	free_i->free_secmap = f2fs_kvmalloc(sbi, sec_bitmap_size, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3841 3842 3843 3844 3845 3846 3847 3848
	if (!free_i->free_secmap)
		return -ENOMEM;

	/* set all segments as dirty temporarily */
	memset(free_i->free_segmap, 0xff, bitmap_size);
	memset(free_i->free_secmap, 0xff, sec_bitmap_size);

	/* init free segmap information */
3849
	free_i->start_segno = GET_SEGNO_FROM_SEG0(sbi, MAIN_BLKADDR(sbi));
J
Jaegeuk Kim 已提交
3850 3851
	free_i->free_segments = 0;
	free_i->free_sections = 0;
3852
	spin_lock_init(&free_i->segmap_lock);
J
Jaegeuk Kim 已提交
3853 3854 3855 3856 3857
	return 0;
}

static int build_curseg(struct f2fs_sb_info *sbi)
{
N
Namjae Jeon 已提交
3858
	struct curseg_info *array;
J
Jaegeuk Kim 已提交
3859 3860
	int i;

3861 3862
	array = f2fs_kzalloc(sbi, array_size(NR_CURSEG_TYPE, sizeof(*array)),
			     GFP_KERNEL);
J
Jaegeuk Kim 已提交
3863 3864 3865 3866 3867 3868 3869
	if (!array)
		return -ENOMEM;

	SM_I(sbi)->curseg_array = array;

	for (i = 0; i < NR_CURSEG_TYPE; i++) {
		mutex_init(&array[i].curseg_mutex);
C
Chao Yu 已提交
3870
		array[i].sum_blk = f2fs_kzalloc(sbi, PAGE_SIZE, GFP_KERNEL);
J
Jaegeuk Kim 已提交
3871 3872
		if (!array[i].sum_blk)
			return -ENOMEM;
3873
		init_rwsem(&array[i].journal_rwsem);
C
Chao Yu 已提交
3874 3875
		array[i].journal = f2fs_kzalloc(sbi,
				sizeof(struct f2fs_journal), GFP_KERNEL);
3876 3877
		if (!array[i].journal)
			return -ENOMEM;
J
Jaegeuk Kim 已提交
3878 3879 3880 3881 3882 3883
		array[i].segno = NULL_SEGNO;
		array[i].next_blkoff = 0;
	}
	return restore_curseg_summaries(sbi);
}

3884
static int build_sit_entries(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
3885 3886 3887
{
	struct sit_info *sit_i = SIT_I(sbi);
	struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
3888
	struct f2fs_journal *journal = curseg->journal;
3889 3890
	struct seg_entry *se;
	struct f2fs_sit_entry sit;
3891 3892 3893
	int sit_blk_cnt = SIT_BLK_CNT(sbi);
	unsigned int i, start, end;
	unsigned int readed, start_blk = 0;
3894
	int err = 0;
3895
	block_t total_node_blocks = 0;
J
Jaegeuk Kim 已提交
3896

3897
	do {
C
Chao Yu 已提交
3898
		readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
3899
							META_SIT, true);
3900 3901 3902 3903

		start = start_blk * sit_i->sents_per_block;
		end = (start_blk + readed) * sit_i->sents_per_block;

3904
		for (; start < end && start < MAIN_SEGS(sbi); start++) {
3905 3906 3907
			struct f2fs_sit_block *sit_blk;
			struct page *page;

3908
			se = &sit_i->sentries[start];
3909 3910 3911 3912
			page = get_current_sit_page(sbi, start);
			sit_blk = (struct f2fs_sit_block *)page_address(page);
			sit = sit_blk->entries[SIT_ENTRY_OFFSET(sit_i, start)];
			f2fs_put_page(page, 1);
3913

3914 3915 3916
			err = check_block_count(sbi, start, &sit);
			if (err)
				return err;
3917
			seg_info_from_raw_sit(se, &sit);
3918 3919
			if (IS_NODESEG(se->type))
				total_node_blocks += se->valid_blocks;
3920 3921

			/* build discard map only one time */
3922 3923 3924 3925 3926 3927 3928 3929 3930 3931
			if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
				memset(se->discard_map, 0xff,
					SIT_VBLOCK_MAP_SIZE);
			} else {
				memcpy(se->discard_map,
					se->cur_valid_map,
					SIT_VBLOCK_MAP_SIZE);
				sbi->discard_blks +=
					sbi->blocks_per_seg -
					se->valid_blocks;
3932
			}
3933

3934 3935 3936
			if (sbi->segs_per_sec > 1)
				get_sec_entry(sbi, start)->valid_blocks +=
							se->valid_blocks;
J
Jaegeuk Kim 已提交
3937
		}
3938 3939
		start_blk += readed;
	} while (start_blk < sit_blk_cnt);
3940 3941 3942 3943 3944 3945

	down_read(&curseg->journal_rwsem);
	for (i = 0; i < sits_in_cursum(journal); i++) {
		unsigned int old_valid_blocks;

		start = le32_to_cpu(segno_in_journal(journal, i));
J
Jaegeuk Kim 已提交
3946 3947 3948 3949 3950 3951 3952 3953 3954
		if (start >= MAIN_SEGS(sbi)) {
			f2fs_msg(sbi->sb, KERN_ERR,
					"Wrong journal entry on segno %u",
					start);
			set_sbi_flag(sbi, SBI_NEED_FSCK);
			err = -EINVAL;
			break;
		}

3955 3956 3957 3958
		se = &sit_i->sentries[start];
		sit = sit_in_journal(journal, i);

		old_valid_blocks = se->valid_blocks;
3959 3960
		if (IS_NODESEG(se->type))
			total_node_blocks -= old_valid_blocks;
3961

3962 3963 3964
		err = check_block_count(sbi, start, &sit);
		if (err)
			break;
3965
		seg_info_from_raw_sit(se, &sit);
3966 3967
		if (IS_NODESEG(se->type))
			total_node_blocks += se->valid_blocks;
3968

3969 3970 3971 3972 3973 3974 3975
		if (is_set_ckpt_flags(sbi, CP_TRIMMED_FLAG)) {
			memset(se->discard_map, 0xff, SIT_VBLOCK_MAP_SIZE);
		} else {
			memcpy(se->discard_map, se->cur_valid_map,
						SIT_VBLOCK_MAP_SIZE);
			sbi->discard_blks += old_valid_blocks;
			sbi->discard_blks -= se->valid_blocks;
3976 3977
		}

C
Chao Yu 已提交
3978
		if (sbi->segs_per_sec > 1) {
3979
			get_sec_entry(sbi, start)->valid_blocks +=
C
Chao Yu 已提交
3980 3981 3982 3983
							se->valid_blocks;
			get_sec_entry(sbi, start)->valid_blocks -=
							old_valid_blocks;
		}
3984 3985
	}
	up_read(&curseg->journal_rwsem);
3986 3987 3988 3989 3990 3991 3992 3993 3994

	if (!err && total_node_blocks != valid_node_count(sbi)) {
		f2fs_msg(sbi->sb, KERN_ERR,
			"SIT is corrupted node# %u vs %u",
			total_node_blocks, valid_node_count(sbi));
		set_sbi_flag(sbi, SBI_NEED_FSCK);
		err = -EINVAL;
	}

3995
	return err;
J
Jaegeuk Kim 已提交
3996 3997 3998 3999 4000 4001 4002
}

static void init_free_segmap(struct f2fs_sb_info *sbi)
{
	unsigned int start;
	int type;

4003
	for (start = 0; start < MAIN_SEGS(sbi); start++) {
J
Jaegeuk Kim 已提交
4004 4005 4006
		struct seg_entry *sentry = get_seg_entry(sbi, start);
		if (!sentry->valid_blocks)
			__set_free(sbi, start);
4007 4008 4009
		else
			SIT_I(sbi)->written_valid_blocks +=
						sentry->valid_blocks;
J
Jaegeuk Kim 已提交
4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022
	}

	/* set use the current segments */
	for (type = CURSEG_HOT_DATA; type <= CURSEG_COLD_NODE; type++) {
		struct curseg_info *curseg_t = CURSEG_I(sbi, type);
		__set_test_and_inuse(sbi, curseg_t->segno);
	}
}

static void init_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	struct free_segmap_info *free_i = FREE_I(sbi);
4023
	unsigned int segno = 0, offset = 0;
J
Jaegeuk Kim 已提交
4024 4025
	unsigned short valid_blocks;

4026
	while (1) {
J
Jaegeuk Kim 已提交
4027
		/* find dirty segment based on free segmap */
4028 4029
		segno = find_next_inuse(free_i, MAIN_SEGS(sbi), offset);
		if (segno >= MAIN_SEGS(sbi))
J
Jaegeuk Kim 已提交
4030 4031
			break;
		offset = segno + 1;
4032
		valid_blocks = get_valid_blocks(sbi, segno, false);
4033
		if (valid_blocks == sbi->blocks_per_seg || !valid_blocks)
J
Jaegeuk Kim 已提交
4034
			continue;
4035 4036 4037 4038
		if (valid_blocks > sbi->blocks_per_seg) {
			f2fs_bug_on(sbi, 1);
			continue;
		}
J
Jaegeuk Kim 已提交
4039 4040 4041 4042 4043 4044
		mutex_lock(&dirty_i->seglist_lock);
		__locate_dirty_segment(sbi, segno, DIRTY);
		mutex_unlock(&dirty_i->seglist_lock);
	}
}

4045
static int init_victim_secmap(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4046 4047
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4048
	unsigned int bitmap_size = f2fs_bitmap_size(MAIN_SECS(sbi));
J
Jaegeuk Kim 已提交
4049

C
Chao Yu 已提交
4050
	dirty_i->victim_secmap = f2fs_kvzalloc(sbi, bitmap_size, GFP_KERNEL);
4051
	if (!dirty_i->victim_secmap)
J
Jaegeuk Kim 已提交
4052 4053 4054 4055 4056 4057 4058 4059 4060 4061
		return -ENOMEM;
	return 0;
}

static int build_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i;
	unsigned int bitmap_size, i;

	/* allocate memory for dirty segments list information */
C
Chao Yu 已提交
4062 4063
	dirty_i = f2fs_kzalloc(sbi, sizeof(struct dirty_seglist_info),
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
4064 4065 4066 4067 4068 4069
	if (!dirty_i)
		return -ENOMEM;

	SM_I(sbi)->dirty_info = dirty_i;
	mutex_init(&dirty_i->seglist_lock);

4070
	bitmap_size = f2fs_bitmap_size(MAIN_SEGS(sbi));
J
Jaegeuk Kim 已提交
4071 4072

	for (i = 0; i < NR_DIRTY_TYPE; i++) {
C
Chao Yu 已提交
4073 4074
		dirty_i->dirty_segmap[i] = f2fs_kvzalloc(sbi, bitmap_size,
								GFP_KERNEL);
J
Jaegeuk Kim 已提交
4075 4076 4077 4078 4079
		if (!dirty_i->dirty_segmap[i])
			return -ENOMEM;
	}

	init_dirty_segmap(sbi);
4080
	return init_victim_secmap(sbi);
J
Jaegeuk Kim 已提交
4081 4082
}

J
Jaegeuk Kim 已提交
4083
/*
J
Jaegeuk Kim 已提交
4084 4085 4086 4087 4088 4089 4090
 * Update min, max modified time for cost-benefit GC algorithm
 */
static void init_min_max_mtime(struct f2fs_sb_info *sbi)
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int segno;

4091
	down_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
4092

4093
	sit_i->min_mtime = ULLONG_MAX;
J
Jaegeuk Kim 已提交
4094

4095
	for (segno = 0; segno < MAIN_SEGS(sbi); segno += sbi->segs_per_sec) {
J
Jaegeuk Kim 已提交
4096 4097 4098 4099 4100 4101 4102 4103 4104 4105 4106
		unsigned int i;
		unsigned long long mtime = 0;

		for (i = 0; i < sbi->segs_per_sec; i++)
			mtime += get_seg_entry(sbi, segno + i)->mtime;

		mtime = div_u64(mtime, sbi->segs_per_sec);

		if (sit_i->min_mtime > mtime)
			sit_i->min_mtime = mtime;
	}
C
Chao Yu 已提交
4107
	sit_i->max_mtime = get_mtime(sbi, false);
4108
	up_write(&sit_i->sentry_lock);
J
Jaegeuk Kim 已提交
4109 4110
}

C
Chao Yu 已提交
4111
int f2fs_build_segment_manager(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4112 4113 4114
{
	struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
	struct f2fs_checkpoint *ckpt = F2FS_CKPT(sbi);
N
Namjae Jeon 已提交
4115
	struct f2fs_sm_info *sm_info;
J
Jaegeuk Kim 已提交
4116 4117
	int err;

C
Chao Yu 已提交
4118
	sm_info = f2fs_kzalloc(sbi, sizeof(struct f2fs_sm_info), GFP_KERNEL);
J
Jaegeuk Kim 已提交
4119 4120 4121 4122 4123 4124 4125 4126 4127 4128 4129 4130
	if (!sm_info)
		return -ENOMEM;

	/* init sm info */
	sbi->sm_info = sm_info;
	sm_info->seg0_blkaddr = le32_to_cpu(raw_super->segment0_blkaddr);
	sm_info->main_blkaddr = le32_to_cpu(raw_super->main_blkaddr);
	sm_info->segment_count = le32_to_cpu(raw_super->segment_count);
	sm_info->reserved_segments = le32_to_cpu(ckpt->rsvd_segment_count);
	sm_info->ovp_segments = le32_to_cpu(ckpt->overprov_segment_count);
	sm_info->main_segments = le32_to_cpu(raw_super->segment_count_main);
	sm_info->ssa_blkaddr = le32_to_cpu(raw_super->ssa_blkaddr);
4131 4132
	sm_info->rec_prefree_segments = sm_info->main_segments *
					DEF_RECLAIM_PREFREE_SEGMENTS / 100;
J
Jaegeuk Kim 已提交
4133 4134 4135
	if (sm_info->rec_prefree_segments > DEF_MAX_RECLAIM_PREFREE_SEGMENTS)
		sm_info->rec_prefree_segments = DEF_MAX_RECLAIM_PREFREE_SEGMENTS;

4136 4137
	if (!test_opt(sbi, LFS))
		sm_info->ipu_policy = 1 << F2FS_IPU_FSYNC;
4138
	sm_info->min_ipu_util = DEF_MIN_IPU_UTIL;
4139
	sm_info->min_fsync_blocks = DEF_MIN_FSYNC_BLOCKS;
4140
	sm_info->min_seq_blocks = sbi->blocks_per_seg * sbi->segs_per_sec;
4141
	sm_info->min_hot_blocks = DEF_MIN_HOT_BLOCKS;
C
Chao Yu 已提交
4142
	sm_info->min_ssr_sections = reserved_sections(sbi);
J
Jaegeuk Kim 已提交
4143

4144 4145
	INIT_LIST_HEAD(&sm_info->sit_entry_set);

C
Chao Yu 已提交
4146 4147
	init_rwsem(&sm_info->curseg_lock);

4148
	if (!f2fs_readonly(sbi->sb)) {
C
Chao Yu 已提交
4149
		err = f2fs_create_flush_cmd_control(sbi);
4150
		if (err)
4151
			return err;
4152 4153
	}

4154 4155 4156 4157
	err = create_discard_cmd_control(sbi);
	if (err)
		return err;

J
Jaegeuk Kim 已提交
4158 4159 4160 4161 4162 4163 4164 4165 4166 4167 4168
	err = build_sit_info(sbi);
	if (err)
		return err;
	err = build_free_segmap(sbi);
	if (err)
		return err;
	err = build_curseg(sbi);
	if (err)
		return err;

	/* reinit free segmap based on SIT */
4169 4170 4171
	err = build_sit_entries(sbi);
	if (err)
		return err;
J
Jaegeuk Kim 已提交
4172 4173 4174 4175 4176 4177 4178 4179 4180 4181 4182 4183 4184 4185 4186 4187

	init_free_segmap(sbi);
	err = build_dirty_segmap(sbi);
	if (err)
		return err;

	init_min_max_mtime(sbi);
	return 0;
}

static void discard_dirty_segmap(struct f2fs_sb_info *sbi,
		enum dirty_type dirty_type)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);

	mutex_lock(&dirty_i->seglist_lock);
4188
	kvfree(dirty_i->dirty_segmap[dirty_type]);
J
Jaegeuk Kim 已提交
4189 4190 4191 4192
	dirty_i->nr_dirty[dirty_type] = 0;
	mutex_unlock(&dirty_i->seglist_lock);
}

4193
static void destroy_victim_secmap(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4194 4195
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
4196
	kvfree(dirty_i->victim_secmap);
J
Jaegeuk Kim 已提交
4197 4198 4199 4200 4201 4202 4203 4204 4205 4206 4207 4208 4209 4210
}

static void destroy_dirty_segmap(struct f2fs_sb_info *sbi)
{
	struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
	int i;

	if (!dirty_i)
		return;

	/* discard pre-free/dirty segments list */
	for (i = 0; i < NR_DIRTY_TYPE; i++)
		discard_dirty_segmap(sbi, i);

4211
	destroy_victim_secmap(sbi);
J
Jaegeuk Kim 已提交
4212 4213 4214 4215 4216 4217 4218 4219 4220 4221 4222 4223
	SM_I(sbi)->dirty_info = NULL;
	kfree(dirty_i);
}

static void destroy_curseg(struct f2fs_sb_info *sbi)
{
	struct curseg_info *array = SM_I(sbi)->curseg_array;
	int i;

	if (!array)
		return;
	SM_I(sbi)->curseg_array = NULL;
4224
	for (i = 0; i < NR_CURSEG_TYPE; i++) {
J
Jaegeuk Kim 已提交
4225
		kfree(array[i].sum_blk);
4226 4227
		kfree(array[i].journal);
	}
J
Jaegeuk Kim 已提交
4228 4229 4230 4231 4232 4233 4234 4235 4236
	kfree(array);
}

static void destroy_free_segmap(struct f2fs_sb_info *sbi)
{
	struct free_segmap_info *free_i = SM_I(sbi)->free_info;
	if (!free_i)
		return;
	SM_I(sbi)->free_info = NULL;
4237 4238
	kvfree(free_i->free_segmap);
	kvfree(free_i->free_secmap);
J
Jaegeuk Kim 已提交
4239 4240 4241 4242 4243 4244 4245 4246 4247 4248 4249 4250
	kfree(free_i);
}

static void destroy_sit_info(struct f2fs_sb_info *sbi)
{
	struct sit_info *sit_i = SIT_I(sbi);
	unsigned int start;

	if (!sit_i)
		return;

	if (sit_i->sentries) {
4251
		for (start = 0; start < MAIN_SEGS(sbi); start++) {
J
Jaegeuk Kim 已提交
4252
			kfree(sit_i->sentries[start].cur_valid_map);
C
Chao Yu 已提交
4253 4254 4255
#ifdef CONFIG_F2FS_CHECK_FS
			kfree(sit_i->sentries[start].cur_valid_map_mir);
#endif
J
Jaegeuk Kim 已提交
4256
			kfree(sit_i->sentries[start].ckpt_valid_map);
4257
			kfree(sit_i->sentries[start].discard_map);
J
Jaegeuk Kim 已提交
4258 4259
		}
	}
J
Jaegeuk Kim 已提交
4260 4261
	kfree(sit_i->tmp_map);

4262 4263 4264
	kvfree(sit_i->sentries);
	kvfree(sit_i->sec_entries);
	kvfree(sit_i->dirty_sentries_bitmap);
J
Jaegeuk Kim 已提交
4265 4266 4267

	SM_I(sbi)->sit_info = NULL;
	kfree(sit_i->sit_bitmap);
4268 4269 4270
#ifdef CONFIG_F2FS_CHECK_FS
	kfree(sit_i->sit_bitmap_mir);
#endif
J
Jaegeuk Kim 已提交
4271 4272 4273
	kfree(sit_i);
}

C
Chao Yu 已提交
4274
void f2fs_destroy_segment_manager(struct f2fs_sb_info *sbi)
J
Jaegeuk Kim 已提交
4275 4276
{
	struct f2fs_sm_info *sm_info = SM_I(sbi);
4277

4278 4279
	if (!sm_info)
		return;
C
Chao Yu 已提交
4280
	f2fs_destroy_flush_cmd_control(sbi, true);
4281
	destroy_discard_cmd_control(sbi);
J
Jaegeuk Kim 已提交
4282 4283 4284 4285 4286 4287 4288
	destroy_dirty_segmap(sbi);
	destroy_curseg(sbi);
	destroy_free_segmap(sbi);
	destroy_sit_info(sbi);
	sbi->sm_info = NULL;
	kfree(sm_info);
}
4289

C
Chao Yu 已提交
4290
int __init f2fs_create_segment_manager_caches(void)
4291 4292
{
	discard_entry_slab = f2fs_kmem_cache_create("discard_entry",
4293
			sizeof(struct discard_entry));
4294
	if (!discard_entry_slab)
4295 4296
		goto fail;

4297 4298 4299
	discard_cmd_slab = f2fs_kmem_cache_create("discard_cmd",
			sizeof(struct discard_cmd));
	if (!discard_cmd_slab)
C
Chao Yu 已提交
4300
		goto destroy_discard_entry;
C
Chao Yu 已提交
4301

4302
	sit_entry_set_slab = f2fs_kmem_cache_create("sit_entry_set",
4303
			sizeof(struct sit_entry_set));
4304
	if (!sit_entry_set_slab)
4305
		goto destroy_discard_cmd;
J
Jaegeuk Kim 已提交
4306 4307 4308 4309 4310

	inmem_entry_slab = f2fs_kmem_cache_create("inmem_page_entry",
			sizeof(struct inmem_pages));
	if (!inmem_entry_slab)
		goto destroy_sit_entry_set;
4311
	return 0;
4312

J
Jaegeuk Kim 已提交
4313 4314
destroy_sit_entry_set:
	kmem_cache_destroy(sit_entry_set_slab);
4315 4316
destroy_discard_cmd:
	kmem_cache_destroy(discard_cmd_slab);
C
Chao Yu 已提交
4317
destroy_discard_entry:
4318 4319 4320
	kmem_cache_destroy(discard_entry_slab);
fail:
	return -ENOMEM;
4321 4322
}

C
Chao Yu 已提交
4323
void f2fs_destroy_segment_manager_caches(void)
4324
{
4325
	kmem_cache_destroy(sit_entry_set_slab);
4326
	kmem_cache_destroy(discard_cmd_slab);
4327
	kmem_cache_destroy(discard_entry_slab);
J
Jaegeuk Kim 已提交
4328
	kmem_cache_destroy(inmem_entry_slab);
4329
}