data.c 41.8 KB
Newer Older
J
Jaegeuk Kim 已提交
1
/*
2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
 * fs/f2fs/data.c
 *
 * Copyright (c) 2012 Samsung Electronics Co., Ltd.
 *             http://www.samsung.com/
 *
 * This program is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License version 2 as
 * published by the Free Software Foundation.
 */
#include <linux/fs.h>
#include <linux/f2fs_fs.h>
#include <linux/buffer_head.h>
#include <linux/mpage.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
C
Chao Yu 已提交
17
#include <linux/pagevec.h>
18 19
#include <linux/blkdev.h>
#include <linux/bio.h>
20
#include <linux/prefetch.h>
21
#include <linux/uio.h>
J
Jaegeuk Kim 已提交
22
#include <linux/cleancache.h>
23 24 25 26

#include "f2fs.h"
#include "node.h"
#include "segment.h"
J
Jaegeuk Kim 已提交
27
#include "trace.h"
28
#include <trace/events/f2fs.h>
29

30
static void f2fs_read_end_io(struct bio *bio)
31
{
32 33
	struct bio_vec *bvec;
	int i;
34

35
	if (f2fs_bio_encrypted(bio)) {
36
		if (bio->bi_error) {
37
			fscrypt_release_ctx(bio->bi_private);
38
		} else {
39
			fscrypt_decrypt_bio_pages(bio->bi_private, bio);
40 41 42 43
			return;
		}
	}

44 45
	bio_for_each_segment_all(bvec, bio, i) {
		struct page *page = bvec->bv_page;
J
Jaegeuk Kim 已提交
46

47
		if (!bio->bi_error) {
J
Jaegeuk Kim 已提交
48 49 50 51 52 53 54 55 56 57
			SetPageUptodate(page);
		} else {
			ClearPageUptodate(page);
			SetPageError(page);
		}
		unlock_page(page);
	}
	bio_put(bio);
}

58
static void f2fs_write_end_io(struct bio *bio)
59
{
60
	struct f2fs_sb_info *sbi = bio->bi_private;
61 62
	struct bio_vec *bvec;
	int i;
63

64
	bio_for_each_segment_all(bvec, bio, i) {
65 66
		struct page *page = bvec->bv_page;

67
		fscrypt_pullback_bio_page(&page, true);
68

69
		if (unlikely(bio->bi_error)) {
70
			set_bit(AS_EIO, &page->mapping->flags);
71
			f2fs_stop_checkpoint(sbi);
72 73 74
		}
		end_page_writeback(page);
		dec_page_count(sbi, F2FS_WRITEBACK);
75
	}
76

77
	if (!get_pages(sbi, F2FS_WRITEBACK) && wq_has_sleeper(&sbi->cp_wait))
78 79 80 81 82
		wake_up(&sbi->cp_wait);

	bio_put(bio);
}

83 84 85 86 87 88 89 90
/*
 * Low-level block read/write IO operations.
 */
static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
				int npages, bool is_read)
{
	struct bio *bio;

J
Jaegeuk Kim 已提交
91
	bio = f2fs_bio_alloc(npages);
92 93

	bio->bi_bdev = sbi->sb->s_bdev;
C
Chao Yu 已提交
94
	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
95
	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
96
	bio->bi_private = is_read ? NULL : sbi;
97 98 99 100

	return bio;
}

J
Jaegeuk Kim 已提交
101
static void __submit_merged_bio(struct f2fs_bio_info *io)
102
{
J
Jaegeuk Kim 已提交
103
	struct f2fs_io_info *fio = &io->fio;
104 105 106 107

	if (!io->bio)
		return;

108
	if (is_read_io(fio->rw))
109
		trace_f2fs_submit_read_bio(io->sbi->sb, fio, io->bio);
110
	else
111
		trace_f2fs_submit_write_bio(io->sbi->sb, fio, io->bio);
112

113
	submit_bio(fio->rw, io->bio);
114 115 116
	io->bio = NULL;
}

117 118
static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
						struct page *page, nid_t ino)
C
Chao Yu 已提交
119 120 121 122 123
{
	struct bio_vec *bvec;
	struct page *target;
	int i;

124
	if (!io->bio)
C
Chao Yu 已提交
125
		return false;
126 127 128

	if (!inode && !page && !ino)
		return true;
C
Chao Yu 已提交
129 130 131

	bio_for_each_segment_all(bvec, io->bio, i) {

132
		if (bvec->bv_page->mapping)
C
Chao Yu 已提交
133
			target = bvec->bv_page;
134 135
		else
			target = fscrypt_control_page(bvec->bv_page);
C
Chao Yu 已提交
136

137 138 139 140 141
		if (inode && inode == target->mapping->host)
			return true;
		if (page && page == target)
			return true;
		if (ino && ino == ino_of_node(target))
C
Chao Yu 已提交
142 143 144 145 146 147
			return true;
	}

	return false;
}

148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
						struct page *page, nid_t ino,
						enum page_type type)
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
	struct f2fs_bio_info *io = &sbi->write_io[btype];
	bool ret;

	down_read(&io->io_rwsem);
	ret = __has_merged_page(io, inode, page, ino);
	up_read(&io->io_rwsem);
	return ret;
}

static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type, int rw)
165 166 167 168 169 170
{
	enum page_type btype = PAGE_TYPE_OF_BIO(type);
	struct f2fs_bio_info *io;

	io = is_read_io(rw) ? &sbi->read_io : &sbi->write_io[btype];

171
	down_write(&io->io_rwsem);
J
Jaegeuk Kim 已提交
172

173 174 175
	if (!__has_merged_page(io, inode, page, ino))
		goto out;

J
Jaegeuk Kim 已提交
176 177 178
	/* change META to META_FLUSH in the checkpoint procedure */
	if (type >= META_FLUSH) {
		io->fio.type = META_FLUSH;
J
Jaegeuk Kim 已提交
179 180 181 182
		if (test_opt(sbi, NOBARRIER))
			io->fio.rw = WRITE_FLUSH | REQ_META | REQ_PRIO;
		else
			io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
J
Jaegeuk Kim 已提交
183 184
	}
	__submit_merged_bio(io);
185
out:
186
	up_write(&io->io_rwsem);
187 188
}

189 190 191 192 193 194 195 196 197 198 199 200 201 202
void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
									int rw)
{
	__f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw);
}

void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
				struct inode *inode, struct page *page,
				nid_t ino, enum page_type type, int rw)
{
	if (has_merged_page(sbi, inode, page, ino, type))
		__f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw);
}

203 204 205 206 207 208 209
void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
{
	f2fs_submit_merged_bio(sbi, DATA, WRITE);
	f2fs_submit_merged_bio(sbi, NODE, WRITE);
	f2fs_submit_merged_bio(sbi, META, WRITE);
}

210 211 212 213
/*
 * Fill the locked page with data located in the block address.
 * Return unlocked page.
 */
214
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
215 216
{
	struct bio *bio;
217 218
	struct page *page = fio->encrypted_page ?
			fio->encrypted_page : fio->page;
219

220
	trace_f2fs_submit_page_bio(page, fio);
221
	f2fs_trace_ios(fio, 0);
222 223

	/* Allocate a new bio */
224
	bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->rw));
225

226
	if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
227 228 229 230
		bio_put(bio);
		return -EFAULT;
	}

231
	submit_bio(fio->rw, bio);
232 233 234
	return 0;
}

235
void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
236
{
237
	struct f2fs_sb_info *sbi = fio->sbi;
J
Jaegeuk Kim 已提交
238
	enum page_type btype = PAGE_TYPE_OF_BIO(fio->type);
239
	struct f2fs_bio_info *io;
240
	bool is_read = is_read_io(fio->rw);
241
	struct page *bio_page;
242

243
	io = is_read ? &sbi->read_io : &sbi->write_io[btype];
244

245 246 247
	if (fio->old_blkaddr != NEW_ADDR)
		verify_block_addr(sbi, fio->old_blkaddr);
	verify_block_addr(sbi, fio->new_blkaddr);
248

249
	down_write(&io->io_rwsem);
250

251
	if (!is_read)
252 253
		inc_page_count(sbi, F2FS_WRITEBACK);

254
	if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
J
Jaegeuk Kim 已提交
255 256
						io->fio.rw != fio->rw))
		__submit_merged_bio(io);
257 258
alloc_new:
	if (io->bio == NULL) {
J
Jaegeuk Kim 已提交
259
		int bio_blocks = MAX_BIO_BLOCKS(sbi);
260

261 262
		io->bio = __bio_alloc(sbi, fio->new_blkaddr,
						bio_blocks, is_read);
J
Jaegeuk Kim 已提交
263
		io->fio = *fio;
264 265
	}

266 267
	bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;

268 269
	if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
							PAGE_SIZE) {
J
Jaegeuk Kim 已提交
270
		__submit_merged_bio(io);
271 272 273
		goto alloc_new;
	}

274
	io->last_block_in_bio = fio->new_blkaddr;
275
	f2fs_trace_ios(fio, 0);
276

277
	up_write(&io->io_rwsem);
278
	trace_f2fs_submit_page_mbio(fio->page, fio);
279 280
}

J
Jaegeuk Kim 已提交
281
/*
282 283 284 285 286
 * Lock ordering for the change of data block address:
 * ->data_page
 *  ->node_page
 *    update block addresses in the node page
 */
287
void set_data_blkaddr(struct dnode_of_data *dn)
288 289 290 291 292 293
{
	struct f2fs_node *rn;
	__le32 *addr_array;
	struct page *node_page = dn->node_page;
	unsigned int ofs_in_node = dn->ofs_in_node;

294
	f2fs_wait_on_page_writeback(node_page, NODE, true);
295

296
	rn = F2FS_NODE(node_page);
297 298 299

	/* Get physical address of data block */
	addr_array = blkaddr_in_node(rn);
J
Jaegeuk Kim 已提交
300
	addr_array[ofs_in_node] = cpu_to_le32(dn->data_blkaddr);
301 302
	if (set_page_dirty(node_page))
		dn->node_changed = true;
303 304
}

305 306 307 308 309 310 311
void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
{
	dn->data_blkaddr = blkaddr;
	set_data_blkaddr(dn);
	f2fs_update_extent_cache(dn);
}

312 313
int reserve_new_block(struct dnode_of_data *dn)
{
314
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
315

316
	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
317
		return -EPERM;
318
	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
319 320
		return -ENOSPC;

321 322
	trace_f2fs_reserve_new_block(dn->inode, dn->nid, dn->ofs_in_node);

323
	dn->data_blkaddr = NEW_ADDR;
324
	set_data_blkaddr(dn);
325
	mark_inode_dirty(dn->inode);
326 327 328 329
	sync_inode_page(dn);
	return 0;
}

330 331 332 333 334 335 336 337
int f2fs_reserve_block(struct dnode_of_data *dn, pgoff_t index)
{
	bool need_put = dn->inode_page ? false : true;
	int err;

	err = get_dnode_of_data(dn, index, ALLOC_NODE);
	if (err)
		return err;
338

339 340
	if (dn->data_blkaddr == NULL_ADDR)
		err = reserve_new_block(dn);
341
	if (err || need_put)
342 343 344 345
		f2fs_put_dnode(dn);
	return err;
}

346
int f2fs_get_block(struct dnode_of_data *dn, pgoff_t index)
347
{
348
	struct extent_info ei;
349
	struct inode *inode = dn->inode;
350

351 352 353
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn->data_blkaddr = ei.blk + index - ei.fofs;
		return 0;
354
	}
355

356
	return f2fs_reserve_block(dn, index);
357 358
}

359 360
struct page *get_read_data_page(struct inode *inode, pgoff_t index,
						int rw, bool for_write)
361 362 363 364
{
	struct address_space *mapping = inode->i_mapping;
	struct dnode_of_data dn;
	struct page *page;
C
Chao Yu 已提交
365
	struct extent_info ei;
366
	int err;
367
	struct f2fs_io_info fio = {
368
		.sbi = F2FS_I_SB(inode),
369
		.type = DATA,
370
		.rw = rw,
371
		.encrypted_page = NULL,
372
	};
373

374 375 376
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
		return read_mapping_page(mapping, index, NULL);

377
	page = f2fs_grab_cache_page(mapping, index, for_write);
378 379 380
	if (!page)
		return ERR_PTR(-ENOMEM);

C
Chao Yu 已提交
381 382 383 384 385
	if (f2fs_lookup_extent_cache(inode, index, &ei)) {
		dn.data_blkaddr = ei.blk + index - ei.fofs;
		goto got_it;
	}

386
	set_new_dnode(&dn, inode, NULL, NULL, 0);
387
	err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
388 389
	if (err)
		goto put_err;
390 391
	f2fs_put_dnode(&dn);

392
	if (unlikely(dn.data_blkaddr == NULL_ADDR)) {
393 394
		err = -ENOENT;
		goto put_err;
395
	}
C
Chao Yu 已提交
396
got_it:
397 398
	if (PageUptodate(page)) {
		unlock_page(page);
399
		return page;
400
	}
401

J
Jaegeuk Kim 已提交
402 403 404 405 406 407 408
	/*
	 * A new dentry page is allocated but not able to be written, since its
	 * new inode page couldn't be allocated due to -ENOSPC.
	 * In such the case, its blkaddr can be remained as NEW_ADDR.
	 * see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
	 */
	if (dn.data_blkaddr == NEW_ADDR) {
409
		zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
410
		SetPageUptodate(page);
411
		unlock_page(page);
J
Jaegeuk Kim 已提交
412 413
		return page;
	}
414

415
	fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
416 417
	fio.page = page;
	err = f2fs_submit_page_bio(&fio);
418
	if (err)
419
		goto put_err;
420
	return page;
421 422 423 424

put_err:
	f2fs_put_page(page, 1);
	return ERR_PTR(err);
425 426 427 428 429 430 431 432 433 434 435 436
}

struct page *find_data_page(struct inode *inode, pgoff_t index)
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;

	page = find_get_page(mapping, index);
	if (page && PageUptodate(page))
		return page;
	f2fs_put_page(page, 0);

437
	page = get_read_data_page(inode, index, READ_SYNC, false);
438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
	if (IS_ERR(page))
		return page;

	if (PageUptodate(page))
		return page;

	wait_on_page_locked(page);
	if (unlikely(!PageUptodate(page))) {
		f2fs_put_page(page, 0);
		return ERR_PTR(-EIO);
	}
	return page;
}

/*
 * If it tries to access a hole, return an error.
 * Because, the callers, functions in dir.c and GC, should be able to know
 * whether this page exists or not.
 */
457 458
struct page *get_lock_data_page(struct inode *inode, pgoff_t index,
							bool for_write)
459 460 461 462
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
repeat:
463
	page = get_read_data_page(inode, index, READ_SYNC, for_write);
464 465
	if (IS_ERR(page))
		return page;
466

467
	/* wait for read completion */
468
	lock_page(page);
469
	if (unlikely(!PageUptodate(page))) {
470 471
		f2fs_put_page(page, 1);
		return ERR_PTR(-EIO);
472
	}
473
	if (unlikely(page->mapping != mapping)) {
474 475
		f2fs_put_page(page, 1);
		goto repeat;
476 477 478 479
	}
	return page;
}

J
Jaegeuk Kim 已提交
480
/*
481 482
 * Caller ensures that this data page is never allocated.
 * A new zero-filled data page is allocated in the page cache.
483
 *
C
Chao Yu 已提交
484 485
 * Also, caller should grab and release a rwsem by calling f2fs_lock_op() and
 * f2fs_unlock_op().
486 487
 * Note that, ipage is set only by make_empty_dir, and if any error occur,
 * ipage should be released by this function.
488
 */
489
struct page *get_new_data_page(struct inode *inode,
490
		struct page *ipage, pgoff_t index, bool new_i_size)
491 492 493 494 495
{
	struct address_space *mapping = inode->i_mapping;
	struct page *page;
	struct dnode_of_data dn;
	int err;
496

497
	page = f2fs_grab_cache_page(mapping, index, true);
498 499 500 501 502 503
	if (!page) {
		/*
		 * before exiting, we should make sure ipage will be released
		 * if any error occur.
		 */
		f2fs_put_page(ipage, 1);
504
		return ERR_PTR(-ENOMEM);
505
	}
506

507
	set_new_dnode(&dn, inode, ipage, NULL, 0);
508
	err = f2fs_reserve_block(&dn, index);
509 510
	if (err) {
		f2fs_put_page(page, 1);
511
		return ERR_PTR(err);
512
	}
513 514
	if (!ipage)
		f2fs_put_dnode(&dn);
515 516

	if (PageUptodate(page))
517
		goto got_it;
518 519

	if (dn.data_blkaddr == NEW_ADDR) {
520
		zero_user_segment(page, 0, PAGE_SIZE);
521
		SetPageUptodate(page);
522
	} else {
523
		f2fs_put_page(page, 1);
524

525 526 527
		/* if ipage exists, blkaddr should be NEW_ADDR */
		f2fs_bug_on(F2FS_I_SB(inode), ipage);
		page = get_lock_data_page(inode, index, true);
528
		if (IS_ERR(page))
529
			return page;
530
	}
531
got_it:
C
Chao Yu 已提交
532
	if (new_i_size && i_size_read(inode) <
533 534
				((loff_t)(index + 1) << PAGE_SHIFT)) {
		i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
535 536
		/* Only the directory inode sets new_i_size */
		set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
537 538 539 540
	}
	return page;
}

541 542
static int __allocate_data_block(struct dnode_of_data *dn)
{
543
	struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
544 545
	struct f2fs_summary sum;
	struct node_info ni;
546
	int seg = CURSEG_WARM_DATA;
547
	pgoff_t fofs;
548 549 550

	if (unlikely(is_inode_flag_set(F2FS_I(dn->inode), FI_NO_ALLOC)))
		return -EPERM;
551 552 553 554 555

	dn->data_blkaddr = datablock_addr(dn->node_page, dn->ofs_in_node);
	if (dn->data_blkaddr == NEW_ADDR)
		goto alloc;

556 557 558
	if (unlikely(!inc_valid_block_count(sbi, dn->inode, 1)))
		return -ENOSPC;

559
alloc:
560 561 562
	get_node_info(sbi, dn->nid, &ni);
	set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);

563 564 565
	if (dn->ofs_in_node == 0 && dn->inode_page == dn->node_page)
		seg = CURSEG_DIRECT_IO;

566 567
	allocate_data_block(sbi, NULL, dn->data_blkaddr, &dn->data_blkaddr,
								&sum, seg);
568
	set_data_blkaddr(dn);
569

570
	/* update i_size */
571
	fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
572
							dn->ofs_in_node;
573
	if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
C
Chao Yu 已提交
574
		i_size_write(dn->inode,
575
				((loff_t)(fofs + 1) << PAGE_SHIFT));
576 577 578
	return 0;
}

579
ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
580
{
581
	struct inode *inode = file_inode(iocb->ki_filp);
C
Chao Yu 已提交
582
	struct f2fs_map_blocks map;
583
	ssize_t ret = 0;
584

585
	map.m_lblk = F2FS_BYTES_TO_BLK(iocb->ki_pos);
586
	map.m_len = F2FS_BLK_ALIGN(iov_iter_count(from));
587
	map.m_next_pgofs = NULL;
588

589 590 591 592 593 594 595 596 597 598
	if (f2fs_encrypted_inode(inode))
		return 0;

	if (iocb->ki_flags & IOCB_DIRECT) {
		ret = f2fs_convert_inline_inode(inode);
		if (ret)
			return ret;
		return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
	}
	if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
599 600 601 602
		ret = f2fs_convert_inline_inode(inode);
		if (ret)
			return ret;
	}
603 604
	if (!f2fs_has_inline_data(inode))
		return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
605
	return ret;
606 607
}

J
Jaegeuk Kim 已提交
608
/*
J
Jaegeuk Kim 已提交
609 610
 * f2fs_map_blocks() now supported readahead/bmap/rw direct_IO with
 * f2fs_map_blocks structure.
C
Chao Yu 已提交
611 612 613 614 615
 * If original data blocks are allocated, then give them to blockdev.
 * Otherwise,
 *     a. preallocate requested block addresses
 *     b. do not use extent cache for better performance
 *     c. give the block addresses to blockdev
616
 */
C
Chao Yu 已提交
617
int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
C
Chao Yu 已提交
618
						int create, int flag)
619
{
J
Jaegeuk Kim 已提交
620
	unsigned int maxblocks = map->m_len;
621
	struct dnode_of_data dn;
622
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
623 624 625
	int mode = create ? ALLOC_NODE : LOOKUP_NODE_RA;
	pgoff_t pgofs, end_offset;
	int err = 0, ofs = 1;
626
	struct extent_info ei;
627
	bool allocated = false;
628
	block_t blkaddr;
629

J
Jaegeuk Kim 已提交
630 631 632 633 634
	map->m_len = 0;
	map->m_flags = 0;

	/* it only supports block size == page size */
	pgofs =	(pgoff_t)map->m_lblk;
635

636
	if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
J
Jaegeuk Kim 已提交
637 638 639
		map->m_pblk = ei.blk + pgofs - ei.fofs;
		map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
		map->m_flags = F2FS_MAP_MAPPED;
640
		goto out;
641
	}
642

C
Chao Yu 已提交
643
next_dnode:
644
	if (create)
645
		f2fs_lock_op(sbi);
646 647 648

	/* When reading holes, we need its node page */
	set_new_dnode(&dn, inode, NULL, NULL, 0);
649
	err = get_dnode_of_data(&dn, pgofs, mode);
650
	if (err) {
651
		if (err == -ENOENT) {
652
			err = 0;
653 654 655 656
			if (map->m_next_pgofs)
				*map->m_next_pgofs =
					get_next_page_offset(&dn, pgofs);
		}
657
		goto unlock_out;
658
	}
C
Chao Yu 已提交
659

660
	end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
C
Chao Yu 已提交
661 662 663 664 665

next_block:
	blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);

	if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
C
Chao Yu 已提交
666
		if (create) {
667 668
			if (unlikely(f2fs_cp_error(sbi))) {
				err = -EIO;
C
Chao Yu 已提交
669
				goto sync_out;
670
			}
671 672 673 674 675 676
			if (flag == F2FS_GET_BLOCK_PRE_AIO) {
				if (blkaddr == NULL_ADDR)
					err = reserve_new_block(&dn);
			} else {
				err = __allocate_data_block(&dn);
			}
C
Chao Yu 已提交
677
			if (err)
C
Chao Yu 已提交
678
				goto sync_out;
C
Chao Yu 已提交
679 680
			allocated = true;
			map->m_flags = F2FS_MAP_NEW;
C
Chao Yu 已提交
681
			blkaddr = dn.data_blkaddr;
C
Chao Yu 已提交
682
		} else {
683 684 685 686 687
			if (flag == F2FS_GET_BLOCK_FIEMAP &&
						blkaddr == NULL_ADDR) {
				if (map->m_next_pgofs)
					*map->m_next_pgofs = pgofs + 1;
			}
C
Chao Yu 已提交
688
			if (flag != F2FS_GET_BLOCK_FIEMAP ||
C
Chao Yu 已提交
689
						blkaddr != NEW_ADDR) {
C
Chao Yu 已提交
690 691
				if (flag == F2FS_GET_BLOCK_BMAP)
					err = -ENOENT;
C
Chao Yu 已提交
692
				goto sync_out;
C
Chao Yu 已提交
693
			}
C
Chao Yu 已提交
694 695
		}
	}
696

C
Chao Yu 已提交
697 698 699 700 701 702 703 704 705 706
	if (map->m_len == 0) {
		/* preallocated unwritten block should be mapped for fiemap. */
		if (blkaddr == NEW_ADDR)
			map->m_flags |= F2FS_MAP_UNWRITTEN;
		map->m_flags |= F2FS_MAP_MAPPED;

		map->m_pblk = blkaddr;
		map->m_len = 1;
	} else if ((map->m_pblk != NEW_ADDR &&
			blkaddr == (map->m_pblk + ofs)) ||
707
			(map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
708 709
			flag == F2FS_GET_BLOCK_PRE_DIO ||
			flag == F2FS_GET_BLOCK_PRE_AIO) {
C
Chao Yu 已提交
710 711 712 713 714
		ofs++;
		map->m_len++;
	} else {
		goto sync_out;
	}
715 716 717 718

	dn.ofs_in_node++;
	pgofs++;

C
Chao Yu 已提交
719 720 721
	if (map->m_len < maxblocks) {
		if (dn.ofs_in_node < end_offset)
			goto next_block;
722

723 724 725 726
		if (allocated)
			sync_inode_page(&dn);
		f2fs_put_dnode(&dn);

727 728
		if (create) {
			f2fs_unlock_op(sbi);
729
			f2fs_balance_fs(sbi, allocated);
730
		}
731
		allocated = false;
C
Chao Yu 已提交
732
		goto next_dnode;
733
	}
734

735 736 737
sync_out:
	if (allocated)
		sync_inode_page(&dn);
738
	f2fs_put_dnode(&dn);
739
unlock_out:
740
	if (create) {
741
		f2fs_unlock_op(sbi);
742
		f2fs_balance_fs(sbi, allocated);
743
	}
744
out:
J
Jaegeuk Kim 已提交
745
	trace_f2fs_map_blocks(inode, map, err);
746
	return err;
747 748
}

J
Jaegeuk Kim 已提交
749
static int __get_data_block(struct inode *inode, sector_t iblock,
750 751
			struct buffer_head *bh, int create, int flag,
			pgoff_t *next_pgofs)
J
Jaegeuk Kim 已提交
752 753 754 755 756 757
{
	struct f2fs_map_blocks map;
	int ret;

	map.m_lblk = iblock;
	map.m_len = bh->b_size >> inode->i_blkbits;
758
	map.m_next_pgofs = next_pgofs;
J
Jaegeuk Kim 已提交
759

C
Chao Yu 已提交
760
	ret = f2fs_map_blocks(inode, &map, create, flag);
J
Jaegeuk Kim 已提交
761 762 763 764 765 766 767 768
	if (!ret) {
		map_bh(bh, inode->i_sb, map.m_pblk);
		bh->b_state = (bh->b_state & ~F2FS_MAP_FLAGS) | map.m_flags;
		bh->b_size = map.m_len << inode->i_blkbits;
	}
	return ret;
}

769
static int get_data_block(struct inode *inode, sector_t iblock,
770 771
			struct buffer_head *bh_result, int create, int flag,
			pgoff_t *next_pgofs)
C
Chao Yu 已提交
772
{
773 774
	return __get_data_block(inode, iblock, bh_result, create,
							flag, next_pgofs);
C
Chao Yu 已提交
775 776 777
}

static int get_data_block_dio(struct inode *inode, sector_t iblock,
778 779
			struct buffer_head *bh_result, int create)
{
C
Chao Yu 已提交
780
	return __get_data_block(inode, iblock, bh_result, create,
781
						F2FS_GET_BLOCK_DIO, NULL);
782 783
}

C
Chao Yu 已提交
784
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
785 786
			struct buffer_head *bh_result, int create)
{
787
	/* Block number less than F2FS MAX BLOCKS */
C
Chao Yu 已提交
788
	if (unlikely(iblock >= F2FS_I_SB(inode)->max_file_blocks))
789 790
		return -EFBIG;

C
Chao Yu 已提交
791
	return __get_data_block(inode, iblock, bh_result, create,
792
						F2FS_GET_BLOCK_BMAP, NULL);
793 794
}

795 796 797 798 799 800 801 802 803 804
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
{
	return (offset >> inode->i_blkbits);
}

static inline loff_t blk_to_logical(struct inode *inode, sector_t blk)
{
	return (blk << inode->i_blkbits);
}

J
Jaegeuk Kim 已提交
805 806 807
int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
		u64 start, u64 len)
{
808 809
	struct buffer_head map_bh;
	sector_t start_blk, last_blk;
810
	pgoff_t next_pgofs;
811
	loff_t isize;
812 813 814 815 816 817 818 819
	u64 logical = 0, phys = 0, size = 0;
	u32 flags = 0;
	int ret = 0;

	ret = fiemap_check_flags(fieinfo, FIEMAP_FLAG_SYNC);
	if (ret)
		return ret;

J
Jaegeuk Kim 已提交
820 821 822 823 824 825
	if (f2fs_has_inline_data(inode)) {
		ret = f2fs_inline_data_fiemap(inode, fieinfo, start, len);
		if (ret != -EAGAIN)
			return ret;
	}

A
Al Viro 已提交
826
	inode_lock(inode);
827 828

	isize = i_size_read(inode);
829 830
	if (start >= isize)
		goto out;
831

832 833
	if (start + len > isize)
		len = isize - start;
834 835 836 837 838 839

	if (logical_to_blk(inode, len) == 0)
		len = blk_to_logical(inode, 1);

	start_blk = logical_to_blk(inode, start);
	last_blk = logical_to_blk(inode, start + len - 1);
840

841 842 843 844
next:
	memset(&map_bh, 0, sizeof(struct buffer_head));
	map_bh.b_size = len;

C
Chao Yu 已提交
845
	ret = get_data_block(inode, start_blk, &map_bh, 0,
846
					F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
847 848 849 850 851
	if (ret)
		goto out;

	/* HOLE */
	if (!buffer_mapped(&map_bh)) {
852
		start_blk = next_pgofs;
853
		/* Go through holes util pass the EOF */
854
		if (blk_to_logical(inode, start_blk) < isize)
855 856 857 858 859 860 861
			goto prep_next;
		/* Found a hole beyond isize means no more extents.
		 * Note that the premise is that filesystems don't
		 * punch holes beyond isize and keep size unchanged.
		 */
		flags |= FIEMAP_EXTENT_LAST;
	}
862

863 864 865 866
	if (size) {
		if (f2fs_encrypted_inode(inode))
			flags |= FIEMAP_EXTENT_DATA_ENCRYPTED;

867 868
		ret = fiemap_fill_next_extent(fieinfo, logical,
				phys, size, flags);
869
	}
870

871 872
	if (start_blk > last_blk || ret)
		goto out;
873

874 875 876 877 878 879
	logical = blk_to_logical(inode, start_blk);
	phys = blk_to_logical(inode, map_bh.b_blocknr);
	size = map_bh.b_size;
	flags = 0;
	if (buffer_unwritten(&map_bh))
		flags = FIEMAP_EXTENT_UNWRITTEN;
880

881
	start_blk += logical_to_blk(inode, size);
882

883
prep_next:
884 885 886 887 888 889 890 891 892
	cond_resched();
	if (fatal_signal_pending(current))
		ret = -EINTR;
	else
		goto next;
out:
	if (ret == 1)
		ret = 0;

A
Al Viro 已提交
893
	inode_unlock(inode);
894
	return ret;
J
Jaegeuk Kim 已提交
895 896
}

J
Jaegeuk Kim 已提交
897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921
/*
 * This function was originally taken from fs/mpage.c, and customized for f2fs.
 * Major change was from block_size == page_size in f2fs by default.
 */
static int f2fs_mpage_readpages(struct address_space *mapping,
			struct list_head *pages, struct page *page,
			unsigned nr_pages)
{
	struct bio *bio = NULL;
	unsigned page_idx;
	sector_t last_block_in_bio = 0;
	struct inode *inode = mapping->host;
	const unsigned blkbits = inode->i_blkbits;
	const unsigned blocksize = 1 << blkbits;
	sector_t block_in_file;
	sector_t last_block;
	sector_t last_block_in_file;
	sector_t block_nr;
	struct block_device *bdev = inode->i_sb->s_bdev;
	struct f2fs_map_blocks map;

	map.m_pblk = 0;
	map.m_lblk = 0;
	map.m_len = 0;
	map.m_flags = 0;
922
	map.m_next_pgofs = NULL;
J
Jaegeuk Kim 已提交
923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959

	for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {

		prefetchw(&page->flags);
		if (pages) {
			page = list_entry(pages->prev, struct page, lru);
			list_del(&page->lru);
			if (add_to_page_cache_lru(page, mapping,
						  page->index, GFP_KERNEL))
				goto next_page;
		}

		block_in_file = (sector_t)page->index;
		last_block = block_in_file + nr_pages;
		last_block_in_file = (i_size_read(inode) + blocksize - 1) >>
								blkbits;
		if (last_block > last_block_in_file)
			last_block = last_block_in_file;

		/*
		 * Map blocks using the previous result first.
		 */
		if ((map.m_flags & F2FS_MAP_MAPPED) &&
				block_in_file > map.m_lblk &&
				block_in_file < (map.m_lblk + map.m_len))
			goto got_it;

		/*
		 * Then do more f2fs_map_blocks() calls until we are
		 * done with this page.
		 */
		map.m_flags = 0;

		if (block_in_file < last_block) {
			map.m_lblk = block_in_file;
			map.m_len = last_block - block_in_file;

960
			if (f2fs_map_blocks(inode, &map, 0,
961
						F2FS_GET_BLOCK_READ))
J
Jaegeuk Kim 已提交
962 963 964 965 966 967 968 969 970 971 972 973
				goto set_error_page;
		}
got_it:
		if ((map.m_flags & F2FS_MAP_MAPPED)) {
			block_nr = map.m_pblk + block_in_file - map.m_lblk;
			SetPageMappedToDisk(page);

			if (!PageUptodate(page) && !cleancache_get_page(page)) {
				SetPageUptodate(page);
				goto confused;
			}
		} else {
974
			zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
975 976 977 978 979 980 981 982 983 984 985 986 987 988 989
			SetPageUptodate(page);
			unlock_page(page);
			goto next_page;
		}

		/*
		 * This page will go to BIO.  Do we need to send this
		 * BIO off first?
		 */
		if (bio && (last_block_in_bio != block_nr - 1)) {
submit_and_realloc:
			submit_bio(READ, bio);
			bio = NULL;
		}
		if (bio == NULL) {
990
			struct fscrypt_ctx *ctx = NULL;
991 992 993 994

			if (f2fs_encrypted_inode(inode) &&
					S_ISREG(inode->i_mode)) {

995
				ctx = fscrypt_get_ctx(inode, GFP_NOFS);
996 997 998 999
				if (IS_ERR(ctx))
					goto set_error_page;

				/* wait the page to be moved by cleaning */
1000 1001
				f2fs_wait_on_encrypted_page_writeback(
						F2FS_I_SB(inode), block_nr);
1002 1003
			}

J
Jaegeuk Kim 已提交
1004
			bio = bio_alloc(GFP_KERNEL,
1005
				min_t(int, nr_pages, BIO_MAX_PAGES));
1006 1007
			if (!bio) {
				if (ctx)
1008
					fscrypt_release_ctx(ctx);
J
Jaegeuk Kim 已提交
1009
				goto set_error_page;
1010
			}
J
Jaegeuk Kim 已提交
1011 1012
			bio->bi_bdev = bdev;
			bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(block_nr);
1013
			bio->bi_end_io = f2fs_read_end_io;
1014
			bio->bi_private = ctx;
J
Jaegeuk Kim 已提交
1015 1016 1017 1018 1019 1020 1021 1022 1023
		}

		if (bio_add_page(bio, page, blocksize, 0) < blocksize)
			goto submit_and_realloc;

		last_block_in_bio = block_nr;
		goto next_page;
set_error_page:
		SetPageError(page);
1024
		zero_user_segment(page, 0, PAGE_SIZE);
J
Jaegeuk Kim 已提交
1025 1026 1027 1028 1029 1030 1031 1032 1033 1034
		unlock_page(page);
		goto next_page;
confused:
		if (bio) {
			submit_bio(READ, bio);
			bio = NULL;
		}
		unlock_page(page);
next_page:
		if (pages)
1035
			put_page(page);
J
Jaegeuk Kim 已提交
1036 1037 1038 1039 1040 1041 1042
	}
	BUG_ON(pages && !list_empty(pages));
	if (bio)
		submit_bio(READ, bio);
	return 0;
}

1043 1044
static int f2fs_read_data_page(struct file *file, struct page *page)
{
H
Huajun Li 已提交
1045
	struct inode *inode = page->mapping->host;
1046
	int ret = -EAGAIN;
H
Huajun Li 已提交
1047

1048 1049
	trace_f2fs_readpage(page, DATA);

A
arter97 已提交
1050
	/* If the file has inline data, try to read it directly */
H
Huajun Li 已提交
1051 1052
	if (f2fs_has_inline_data(inode))
		ret = f2fs_read_inline_data(inode, page);
1053
	if (ret == -EAGAIN)
J
Jaegeuk Kim 已提交
1054
		ret = f2fs_mpage_readpages(page->mapping, NULL, page, 1);
H
Huajun Li 已提交
1055
	return ret;
1056 1057 1058 1059 1060 1061
}

static int f2fs_read_data_pages(struct file *file,
			struct address_space *mapping,
			struct list_head *pages, unsigned nr_pages)
{
H
Huajun Li 已提交
1062
	struct inode *inode = file->f_mapping->host;
1063 1064 1065
	struct page *page = list_entry(pages->prev, struct page, lru);

	trace_f2fs_readpages(inode, page, nr_pages);
H
Huajun Li 已提交
1066 1067 1068 1069 1070

	/* If the file has inline data, skip readpages */
	if (f2fs_has_inline_data(inode))
		return 0;

J
Jaegeuk Kim 已提交
1071
	return f2fs_mpage_readpages(mapping, pages, NULL, nr_pages);
1072 1073
}

1074
int do_write_data_page(struct f2fs_io_info *fio)
1075
{
1076
	struct page *page = fio->page;
1077 1078 1079 1080 1081
	struct inode *inode = page->mapping->host;
	struct dnode_of_data dn;
	int err = 0;

	set_new_dnode(&dn, inode, NULL, NULL, 0);
1082
	err = get_dnode_of_data(&dn, page->index, LOOKUP_NODE);
1083 1084 1085
	if (err)
		return err;

1086
	fio->old_blkaddr = dn.data_blkaddr;
1087 1088

	/* This page is already truncated */
1089
	if (fio->old_blkaddr == NULL_ADDR) {
1090
		ClearPageUptodate(page);
1091
		goto out_writepage;
1092
	}
1093

1094
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
1095
		gfp_t gfp_flags = GFP_NOFS;
1096 1097 1098

		/* wait for GCed encrypted page writeback */
		f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode),
1099
							fio->old_blkaddr);
1100 1101 1102
retry_encrypt:
		fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
								gfp_flags);
1103 1104
		if (IS_ERR(fio->encrypted_page)) {
			err = PTR_ERR(fio->encrypted_page);
1105 1106 1107 1108 1109 1110 1111 1112
			if (err == -ENOMEM) {
				/* flush pending ios and wait for a while */
				f2fs_flush_merged_bios(F2FS_I_SB(inode));
				congestion_wait(BLK_RW_ASYNC, HZ/50);
				gfp_flags |= __GFP_NOFAIL;
				err = 0;
				goto retry_encrypt;
			}
1113 1114 1115 1116
			goto out_writepage;
		}
	}

1117 1118 1119 1120 1121 1122
	set_page_writeback(page);

	/*
	 * If current allocation needs SSR,
	 * it had better in-place writes for updated data.
	 */
1123
	if (unlikely(fio->old_blkaddr != NEW_ADDR &&
1124
			!is_cold_data(page) &&
1125
			!IS_ATOMIC_WRITTEN_PAGE(page) &&
1126
			need_inplace_update(inode))) {
1127
		rewrite_data_page(fio);
1128
		set_inode_flag(F2FS_I(inode), FI_UPDATE_WRITE);
1129
		trace_f2fs_do_write_data_page(page, IPU);
1130
	} else {
1131
		write_data_page(&dn, fio);
1132
		trace_f2fs_do_write_data_page(page, OPU);
1133
		set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
1134 1135
		if (page->index == 0)
			set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
	}
out_writepage:
	f2fs_put_dnode(&dn);
	return err;
}

static int f2fs_write_data_page(struct page *page,
					struct writeback_control *wbc)
{
	struct inode *inode = page->mapping->host;
1146
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1147 1148
	loff_t i_size = i_size_read(inode);
	const pgoff_t end_index = ((unsigned long long) i_size)
1149
							>> PAGE_SHIFT;
H
Huajun Li 已提交
1150
	unsigned offset = 0;
1151
	bool need_balance_fs = false;
1152
	int err = 0;
J
Jaegeuk Kim 已提交
1153
	struct f2fs_io_info fio = {
1154
		.sbi = sbi,
J
Jaegeuk Kim 已提交
1155
		.type = DATA,
C
Chris Fries 已提交
1156
		.rw = (wbc->sync_mode == WB_SYNC_ALL) ? WRITE_SYNC : WRITE,
1157
		.page = page,
1158
		.encrypted_page = NULL,
J
Jaegeuk Kim 已提交
1159
	};
1160

1161 1162
	trace_f2fs_writepage(page, DATA);

1163
	if (page->index < end_index)
1164
		goto write;
1165 1166 1167 1168 1169

	/*
	 * If the offset is out-of-range of file size,
	 * this page does not have to be written to disk.
	 */
1170
	offset = i_size & (PAGE_SIZE - 1);
1171
	if ((page->index >= end_index + 1) || !offset)
1172
		goto out;
1173

1174
	zero_user_segment(page, offset, PAGE_SIZE);
1175
write:
1176
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
1177
		goto redirty_out;
1178 1179 1180 1181 1182
	if (f2fs_is_drop_cache(inode))
		goto out;
	if (f2fs_is_volatile_file(inode) && !wbc->for_reclaim &&
			available_free_memory(sbi, BASE_CHECK))
		goto redirty_out;
1183

1184
	/* Dentry blocks are controlled by checkpoint */
1185
	if (S_ISDIR(inode->i_mode)) {
1186 1187
		if (unlikely(f2fs_cp_error(sbi)))
			goto redirty_out;
1188
		err = do_write_data_page(&fio);
1189 1190
		goto done;
	}
H
Huajun Li 已提交
1191

1192 1193 1194
	/* we should bypass data pages to proceed the kworkder jobs */
	if (unlikely(f2fs_cp_error(sbi))) {
		SetPageError(page);
1195
		goto out;
1196 1197
	}

1198
	if (!wbc->for_reclaim)
1199
		need_balance_fs = true;
1200
	else if (has_not_enough_free_secs(sbi, 0))
1201
		goto redirty_out;
1202

1203
	err = -EAGAIN;
1204
	f2fs_lock_op(sbi);
1205 1206 1207
	if (f2fs_has_inline_data(inode))
		err = f2fs_write_inline_data(inode, page);
	if (err == -EAGAIN)
1208
		err = do_write_data_page(&fio);
1209 1210 1211 1212
	f2fs_unlock_op(sbi);
done:
	if (err && err != -ENOENT)
		goto redirty_out;
1213 1214

	clear_cold_data(page);
1215
out:
1216
	inode_dec_dirty_pages(inode);
1217 1218
	if (err)
		ClearPageUptodate(page);
1219 1220 1221 1222 1223 1224

	if (wbc->for_reclaim) {
		f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE);
		remove_dirty_inode(inode);
	}

1225
	unlock_page(page);
J
Jaegeuk Kim 已提交
1226
	f2fs_balance_fs(sbi, need_balance_fs);
1227 1228

	if (unlikely(f2fs_cp_error(sbi)))
1229
		f2fs_submit_merged_bio(sbi, DATA, WRITE);
1230

1231 1232 1233
	return 0;

redirty_out:
1234
	redirty_page_for_writepage(wbc, page);
1235
	return AOP_WRITEPAGE_ACTIVATE;
1236 1237
}

1238 1239 1240 1241 1242 1243 1244 1245 1246
static int __f2fs_writepage(struct page *page, struct writeback_control *wbc,
			void *data)
{
	struct address_space *mapping = data;
	int ret = mapping->a_ops->writepage(page, wbc);
	mapping_set_error(mapping, ret);
	return ret;
}

C
Chao Yu 已提交
1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279
/*
 * This function was copied from write_cche_pages from mm/page-writeback.c.
 * The major change is making write step of cold data page separately from
 * warm/hot data page.
 */
static int f2fs_write_cache_pages(struct address_space *mapping,
			struct writeback_control *wbc, writepage_t writepage,
			void *data)
{
	int ret = 0;
	int done = 0;
	struct pagevec pvec;
	int nr_pages;
	pgoff_t uninitialized_var(writeback_index);
	pgoff_t index;
	pgoff_t end;		/* Inclusive */
	pgoff_t done_index;
	int cycled;
	int range_whole = 0;
	int tag;
	int step = 0;

	pagevec_init(&pvec, 0);
next:
	if (wbc->range_cyclic) {
		writeback_index = mapping->writeback_index; /* prev offset */
		index = writeback_index;
		if (index == 0)
			cycled = 1;
		else
			cycled = 0;
		end = -1;
	} else {
1280 1281
		index = wbc->range_start >> PAGE_SHIFT;
		end = wbc->range_end >> PAGE_SHIFT;
C
Chao Yu 已提交
1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 1311 1312 1313 1314 1315 1316 1317 1318 1319 1320 1321 1322 1323 1324
		if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
			range_whole = 1;
		cycled = 1; /* ignore range_cyclic tests */
	}
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag = PAGECACHE_TAG_TOWRITE;
	else
		tag = PAGECACHE_TAG_DIRTY;
retry:
	if (wbc->sync_mode == WB_SYNC_ALL || wbc->tagged_writepages)
		tag_pages_for_writeback(mapping, index, end);
	done_index = index;
	while (!done && (index <= end)) {
		int i;

		nr_pages = pagevec_lookup_tag(&pvec, mapping, &index, tag,
			      min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1);
		if (nr_pages == 0)
			break;

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];

			if (page->index > end) {
				done = 1;
				break;
			}

			done_index = page->index;

			lock_page(page);

			if (unlikely(page->mapping != mapping)) {
continue_unlock:
				unlock_page(page);
				continue;
			}

			if (!PageDirty(page)) {
				/* someone wrote it for us */
				goto continue_unlock;
			}

1325
			if (step == is_cold_data(page))
C
Chao Yu 已提交
1326 1327 1328 1329
				goto continue_unlock;

			if (PageWriteback(page)) {
				if (wbc->sync_mode != WB_SYNC_NONE)
1330 1331
					f2fs_wait_on_page_writeback(page,
								DATA, true);
C
Chao Yu 已提交
1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355 1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368 1369 1370 1371 1372 1373 1374 1375 1376 1377 1378
				else
					goto continue_unlock;
			}

			BUG_ON(PageWriteback(page));
			if (!clear_page_dirty_for_io(page))
				goto continue_unlock;

			ret = (*writepage)(page, wbc, data);
			if (unlikely(ret)) {
				if (ret == AOP_WRITEPAGE_ACTIVATE) {
					unlock_page(page);
					ret = 0;
				} else {
					done_index = page->index + 1;
					done = 1;
					break;
				}
			}

			if (--wbc->nr_to_write <= 0 &&
			    wbc->sync_mode == WB_SYNC_NONE) {
				done = 1;
				break;
			}
		}
		pagevec_release(&pvec);
		cond_resched();
	}

	if (step < 1) {
		step++;
		goto next;
	}

	if (!cycled && !done) {
		cycled = 1;
		index = 0;
		end = writeback_index - 1;
		goto retry;
	}
	if (wbc->range_cyclic || (range_whole && wbc->nr_to_write > 0))
		mapping->writeback_index = done_index;

	return ret;
}

1379
static int f2fs_write_data_pages(struct address_space *mapping,
1380 1381 1382
			    struct writeback_control *wbc)
{
	struct inode *inode = mapping->host;
1383
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1384
	bool locked = false;
1385
	int ret;
1386
	long diff;
1387

P
P J P 已提交
1388 1389 1390 1391
	/* deal with chardevs and other special file */
	if (!mapping->a_ops->writepage)
		return 0;

1392 1393 1394 1395
	/* skip writing if there is no dirty page in this inode */
	if (!get_dirty_pages(inode) && wbc->sync_mode == WB_SYNC_NONE)
		return 0;

1396 1397 1398 1399 1400
	if (S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_NONE &&
			get_dirty_pages(inode) < nr_pages_to_skip(sbi, DATA) &&
			available_free_memory(sbi, DIRTY_DENTS))
		goto skip_write;

C
Chao Yu 已提交
1401 1402 1403 1404
	/* skip writing during file defragment */
	if (is_inode_flag_set(F2FS_I(inode), FI_DO_DEFRAG))
		goto skip_write;

1405 1406 1407 1408
	/* during POR, we don't need to trigger writepage at all. */
	if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
		goto skip_write;

Y
Yunlei He 已提交
1409 1410
	trace_f2fs_writepages(mapping->host, wbc, DATA);

1411
	diff = nr_pages_to_write(sbi, DATA, wbc);
1412

1413
	if (!S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_ALL) {
1414 1415 1416
		mutex_lock(&sbi->writepages);
		locked = true;
	}
C
Chao Yu 已提交
1417
	ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
1418
	f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
1419 1420
	if (locked)
		mutex_unlock(&sbi->writepages);
J
Jaegeuk Kim 已提交
1421

1422
	remove_dirty_inode(inode);
1423

1424
	wbc->nr_to_write = max((long)0, wbc->nr_to_write - diff);
1425
	return ret;
1426 1427

skip_write:
1428
	wbc->pages_skipped += get_dirty_pages(inode);
Y
Yunlei He 已提交
1429
	trace_f2fs_writepages(mapping->host, wbc, DATA);
1430
	return 0;
1431 1432
}

1433 1434 1435
static void f2fs_write_failed(struct address_space *mapping, loff_t to)
{
	struct inode *inode = mapping->host;
J
Jaegeuk Kim 已提交
1436
	loff_t i_size = i_size_read(inode);
1437

J
Jaegeuk Kim 已提交
1438 1439 1440
	if (to > i_size) {
		truncate_pagecache(inode, i_size);
		truncate_blocks(inode, i_size, true);
1441 1442 1443
	}
}

1444 1445 1446 1447 1448 1449 1450 1451
static int prepare_write_begin(struct f2fs_sb_info *sbi,
			struct page *page, loff_t pos, unsigned len,
			block_t *blk_addr, bool *node_changed)
{
	struct inode *inode = page->mapping->host;
	pgoff_t index = page->index;
	struct dnode_of_data dn;
	struct page *ipage;
1452 1453
	bool locked = false;
	struct extent_info ei;
1454 1455
	int err = 0;

1456 1457 1458 1459 1460
	/*
	 * we already allocated all the blocks, so we don't need to get
	 * the block addresses when there is no need to fill the page.
	 */
	if (!f2fs_has_inline_data(inode) && !f2fs_encrypted_inode(inode) &&
1461
					len == PAGE_SIZE)
1462 1463
		return 0;

1464
	if (f2fs_has_inline_data(inode) ||
1465
			(pos & PAGE_MASK) >= i_size_read(inode)) {
1466 1467 1468 1469
		f2fs_lock_op(sbi);
		locked = true;
	}
restart:
1470 1471 1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482
	/* check inline_data */
	ipage = get_node_page(sbi, inode->i_ino);
	if (IS_ERR(ipage)) {
		err = PTR_ERR(ipage);
		goto unlock_out;
	}

	set_new_dnode(&dn, inode, ipage, ipage, 0);

	if (f2fs_has_inline_data(inode)) {
		if (pos + len <= MAX_INLINE_DATA) {
			read_inline_data(page, ipage);
			set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
1483
			set_inline_node(ipage);
1484 1485 1486
		} else {
			err = f2fs_convert_inline_page(&dn, page);
			if (err)
1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498
				goto out;
			if (dn.data_blkaddr == NULL_ADDR)
				err = f2fs_get_block(&dn, index);
		}
	} else if (locked) {
		err = f2fs_get_block(&dn, index);
	} else {
		if (f2fs_lookup_extent_cache(inode, index, &ei)) {
			dn.data_blkaddr = ei.blk + index - ei.fofs;
		} else {
			/* hole case */
			err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
1499
			if (err || dn.data_blkaddr == NULL_ADDR) {
1500 1501 1502 1503 1504
				f2fs_put_dnode(&dn);
				f2fs_lock_op(sbi);
				locked = true;
				goto restart;
			}
1505 1506
		}
	}
1507

1508 1509 1510
	/* convert_inline_page can make node_changed */
	*blk_addr = dn.data_blkaddr;
	*node_changed = dn.node_changed;
1511
out:
1512 1513
	f2fs_put_dnode(&dn);
unlock_out:
1514 1515
	if (locked)
		f2fs_unlock_op(sbi);
1516 1517 1518
	return err;
}

1519 1520 1521 1522 1523
static int f2fs_write_begin(struct file *file, struct address_space *mapping,
		loff_t pos, unsigned len, unsigned flags,
		struct page **pagep, void **fsdata)
{
	struct inode *inode = mapping->host;
1524
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1525
	struct page *page = NULL;
1526
	pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
1527 1528
	bool need_balance = false;
	block_t blkaddr = NULL_ADDR;
1529 1530
	int err = 0;

1531 1532
	trace_f2fs_write_begin(inode, pos, len, flags);

1533 1534 1535 1536 1537 1538 1539 1540 1541 1542
	/*
	 * We should check this at this moment to avoid deadlock on inode page
	 * and #0 page. The locking rule for inline_data conversion should be:
	 * lock_page(page #0) -> lock_page(inode_page)
	 */
	if (index != 0) {
		err = f2fs_convert_inline_inode(inode);
		if (err)
			goto fail;
	}
1543
repeat:
1544
	page = grab_cache_page_write_begin(mapping, index, flags);
1545 1546 1547 1548
	if (!page) {
		err = -ENOMEM;
		goto fail;
	}
1549

1550 1551
	*pagep = page;

1552 1553
	err = prepare_write_begin(sbi, page, pos, len,
					&blkaddr, &need_balance);
1554
	if (err)
1555
		goto fail;
1556

1557
	if (need_balance && has_not_enough_free_secs(sbi, 0)) {
1558
		unlock_page(page);
J
Jaegeuk Kim 已提交
1559
		f2fs_balance_fs(sbi, true);
1560 1561 1562 1563 1564 1565 1566 1567
		lock_page(page);
		if (page->mapping != mapping) {
			/* The page got truncated from under us */
			f2fs_put_page(page, 1);
			goto repeat;
		}
	}

1568
	f2fs_wait_on_page_writeback(page, DATA, false);
1569

1570 1571
	/* wait for GCed encrypted page writeback */
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
1572
		f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
1573

1574
	if (len == PAGE_SIZE)
C
Chao Yu 已提交
1575 1576 1577
		goto out_update;
	if (PageUptodate(page))
		goto out_clear;
1578

1579 1580
	if ((pos & PAGE_MASK) >= i_size_read(inode)) {
		unsigned start = pos & (PAGE_SIZE - 1);
1581 1582 1583
		unsigned end = start + len;

		/* Reading beyond i_size is simple: memset to zero */
1584
		zero_user_segments(page, 0, start, end, PAGE_SIZE);
C
Chao Yu 已提交
1585
		goto out_update;
1586 1587
	}

1588
	if (blkaddr == NEW_ADDR) {
1589
		zero_user_segment(page, 0, PAGE_SIZE);
1590
	} else {
1591
		struct f2fs_io_info fio = {
1592
			.sbi = sbi,
1593 1594
			.type = DATA,
			.rw = READ_SYNC,
1595 1596
			.old_blkaddr = blkaddr,
			.new_blkaddr = blkaddr,
1597
			.page = page,
1598
			.encrypted_page = NULL,
1599
		};
1600
		err = f2fs_submit_page_bio(&fio);
1601 1602
		if (err)
			goto fail;
1603

1604
		lock_page(page);
1605
		if (unlikely(!PageUptodate(page))) {
1606 1607
			err = -EIO;
			goto fail;
1608
		}
1609
		if (unlikely(page->mapping != mapping)) {
1610 1611
			f2fs_put_page(page, 1);
			goto repeat;
1612
		}
1613 1614 1615

		/* avoid symlink page */
		if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
1616
			err = fscrypt_decrypt_page(page);
1617
			if (err)
1618 1619
				goto fail;
		}
1620
	}
C
Chao Yu 已提交
1621
out_update:
1622
	SetPageUptodate(page);
C
Chao Yu 已提交
1623
out_clear:
1624 1625
	clear_cold_data(page);
	return 0;
1626

1627
fail:
1628
	f2fs_put_page(page, 1);
1629 1630
	f2fs_write_failed(mapping, pos + len);
	return err;
1631 1632
}

1633 1634 1635 1636 1637 1638 1639
static int f2fs_write_end(struct file *file,
			struct address_space *mapping,
			loff_t pos, unsigned len, unsigned copied,
			struct page *page, void *fsdata)
{
	struct inode *inode = page->mapping->host;

1640 1641
	trace_f2fs_write_end(inode, pos, len, copied);

1642
	set_page_dirty(page);
1643 1644 1645 1646 1647 1648

	if (pos + copied > i_size_read(inode)) {
		i_size_write(inode, pos + copied);
		mark_inode_dirty(inode);
	}

1649
	f2fs_put_page(page, 1);
1650
	f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
1651 1652 1653
	return copied;
}

1654 1655
static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
			   loff_t offset)
1656 1657 1658 1659 1660 1661
{
	unsigned blocksize_mask = inode->i_sb->s_blocksize - 1;

	if (offset & blocksize_mask)
		return -EINVAL;

A
Al Viro 已提交
1662 1663 1664
	if (iov_iter_alignment(iter) & blocksize_mask)
		return -EINVAL;

1665 1666 1667
	return 0;
}

1668 1669
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
			      loff_t offset)
1670
{
1671
	struct address_space *mapping = iocb->ki_filp->f_mapping;
1672 1673 1674
	struct inode *inode = mapping->host;
	size_t count = iov_iter_count(iter);
	int err;
1675

1676
	err = check_direct_IO(inode, iter, offset);
1677 1678
	if (err)
		return err;
H
Huajun Li 已提交
1679

1680 1681 1682
	if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
		return 0;

1683
	trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
1684

C
Chao Yu 已提交
1685
	err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
1686
	if (err < 0 && iov_iter_rw(iter) == WRITE)
1687
		f2fs_write_failed(mapping, offset + count);
1688

1689
	trace_f2fs_direct_IO_exit(inode, offset, count, iov_iter_rw(iter), err);
1690

1691
	return err;
1692 1693
}

1694 1695
void f2fs_invalidate_page(struct page *page, unsigned int offset,
							unsigned int length)
1696 1697
{
	struct inode *inode = page->mapping->host;
1698
	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1699

1700
	if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
1701
		(offset % PAGE_SIZE || length != PAGE_SIZE))
1702 1703
		return;

1704 1705 1706 1707 1708 1709 1710 1711
	if (PageDirty(page)) {
		if (inode->i_ino == F2FS_META_INO(sbi))
			dec_page_count(sbi, F2FS_DIRTY_META);
		else if (inode->i_ino == F2FS_NODE_INO(sbi))
			dec_page_count(sbi, F2FS_DIRTY_NODES);
		else
			inode_dec_dirty_pages(inode);
	}
C
Chao Yu 已提交
1712 1713 1714 1715 1716

	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
		return;

1717 1718 1719
	ClearPagePrivate(page);
}

1720
int f2fs_release_page(struct page *page, gfp_t wait)
1721
{
1722 1723 1724 1725
	/* If this is dirty page, keep PagePrivate */
	if (PageDirty(page))
		return 0;

C
Chao Yu 已提交
1726 1727 1728 1729
	/* This is atomic written page, keep Private */
	if (IS_ATOMIC_WRITTEN_PAGE(page))
		return 0;

1730
	ClearPagePrivate(page);
1731
	return 1;
1732 1733 1734 1735 1736 1737 1738
}

static int f2fs_set_data_page_dirty(struct page *page)
{
	struct address_space *mapping = page->mapping;
	struct inode *inode = mapping->host;

1739 1740
	trace_f2fs_set_page_dirty(page, DATA);

1741
	SetPageUptodate(page);
1742

1743
	if (f2fs_is_atomic_file(inode)) {
C
Chao Yu 已提交
1744 1745 1746 1747 1748 1749 1750 1751 1752
		if (!IS_ATOMIC_WRITTEN_PAGE(page)) {
			register_inmem_page(inode, page);
			return 1;
		}
		/*
		 * Previously, this page has been registered, we just
		 * return here.
		 */
		return 0;
1753 1754
	}

1755 1756
	if (!PageDirty(page)) {
		__set_page_dirty_nobuffers(page);
1757
		update_dirty_page(inode, page);
1758 1759 1760 1761 1762
		return 1;
	}
	return 0;
}

J
Jaegeuk Kim 已提交
1763 1764
static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
{
1765 1766
	struct inode *inode = mapping->host;

J
Jaegeuk Kim 已提交
1767 1768 1769 1770 1771 1772 1773
	if (f2fs_has_inline_data(inode))
		return 0;

	/* make sure allocating whole blocks */
	if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY))
		filemap_write_and_wait(mapping);

C
Chao Yu 已提交
1774
	return generic_block_bmap(mapping, block, get_data_block_bmap);
1775 1776
}

1777 1778 1779 1780 1781 1782
const struct address_space_operations f2fs_dblock_aops = {
	.readpage	= f2fs_read_data_page,
	.readpages	= f2fs_read_data_pages,
	.writepage	= f2fs_write_data_page,
	.writepages	= f2fs_write_data_pages,
	.write_begin	= f2fs_write_begin,
1783
	.write_end	= f2fs_write_end,
1784
	.set_page_dirty	= f2fs_set_data_page_dirty,
1785 1786
	.invalidatepage	= f2fs_invalidate_page,
	.releasepage	= f2fs_release_page,
1787
	.direct_IO	= f2fs_direct_IO,
J
Jaegeuk Kim 已提交
1788
	.bmap		= f2fs_bmap,
1789
};