file.c 19.0 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/file.c
3 4 5 6 7 8 9 10 11 12 13 14
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/file.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
15
 *  ext4 fs regular file handling primitives
16 17 18 19 20 21 22
 *
 *  64-bit file support on 64-bit platforms by Jakub Jelinek
 *	(jj@sunsite.ms.mff.cuni.cz)
 */

#include <linux/time.h>
#include <linux/fs.h>
23 24
#include <linux/mount.h>
#include <linux/path.h>
25
#include <linux/dax.h>
26
#include <linux/quotaops.h>
27
#include <linux/pagevec.h>
28
#include <linux/uio.h>
29 30
#include "ext4.h"
#include "ext4_jbd2.h"
31 32 33 34 35
#include "xattr.h"
#include "acl.h"

/*
 * Called when an inode is released. Note that this is different
36
 * from ext4_file_open: open gets called at every open, but release
37 38
 * gets called only when /all/ the files are closed.
 */
39
static int ext4_release_file(struct inode *inode, struct file *filp)
40
{
41
	if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
42
		ext4_alloc_da_blocks(inode);
43
		ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
44
	}
45 46
	/* if we are the last writer on the inode, drop the block reservation */
	if ((filp->f_mode & FMODE_WRITE) &&
47 48
			(atomic_read(&inode->i_writecount) == 1) &&
		        !EXT4_I(inode)->i_reserved_data_blocks)
49
	{
50
		down_write(&EXT4_I(inode)->i_data_sem);
51
		ext4_discard_preallocations(inode);
52
		up_write(&EXT4_I(inode)->i_data_sem);
53 54
	}
	if (is_dx(inode) && filp->private_data)
55
		ext4_htree_free_dir_info(filp->private_data);
56 57 58 59

	return 0;
}

60
static void ext4_unwritten_wait(struct inode *inode)
61 62 63
{
	wait_queue_head_t *wq = ext4_ioend_wq(inode);

64
	wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
65 66 67 68 69 70 71 72 73 74 75 76
}

/*
 * This tests whether the IO in question is block-aligned or not.
 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
 * are converted to written only after the IO is complete.  Until they are
 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
 * it needs to zero out portions of the start and/or end block.  If 2 AIO
 * threads are at work on the same unwritten block, they must be synchronized
 * or one thread will zero the other's data, causing corruption.
 */
static int
A
Al Viro 已提交
77
ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
78 79 80 81
{
	struct super_block *sb = inode->i_sb;
	int blockmask = sb->s_blocksize - 1;

82
	if (pos >= i_size_read(inode))
83 84
		return 0;

A
Al Viro 已提交
85
	if ((pos | iov_iter_alignment(from)) & blockmask)
86 87 88 89 90
		return 1;

	return 0;
}

91
static ssize_t
A
Al Viro 已提交
92
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
93
{
94
	struct file *file = iocb->ki_filp;
95
	struct inode *inode = file_inode(iocb->ki_filp);
96
	struct mutex *aio_mutex = NULL;
97
	struct blk_plug plug;
98
	int o_direct = iocb->ki_flags & IOCB_DIRECT;
99
	int overwrite = 0;
100
	ssize_t ret;
101

102 103 104 105 106 107 108
	/*
	 * Unaligned direct AIO must be serialized; see comment above
	 * In the case of O_APPEND, assume that we must always serialize
	 */
	if (o_direct &&
	    ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
	    !is_sync_kiocb(iocb) &&
109
	    (iocb->ki_flags & IOCB_APPEND ||
110
	     ext4_unaligned_aio(inode, from, iocb->ki_pos))) {
111 112 113 114 115 116
		aio_mutex = ext4_aio_mutex(inode);
		mutex_lock(aio_mutex);
		ext4_unwritten_wait(inode);
	}

	mutex_lock(&inode->i_mutex);
117 118
	ret = generic_write_checks(iocb, from);
	if (ret <= 0)
119
		goto out;
120

121 122 123 124
	/*
	 * If we have encountered a bitmap-format file, the size limit
	 * is smaller than s_maxbytes, which is for extent-mapped files.
	 */
125
	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
126
		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
127

128
		if (iocb->ki_pos >= sbi->s_bitmap_maxbytes) {
129
			ret = -EFBIG;
130
			goto out;
131
		}
132
		iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
133 134
	}

135
	iocb->private = &overwrite;
136
	if (o_direct) {
137 138
		size_t length = iov_iter_count(from);
		loff_t pos = iocb->ki_pos;
139 140 141
		blk_start_plug(&plug);

		/* check whether we do a DIO overwrite or not */
142
		if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168
		    !file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
			struct ext4_map_blocks map;
			unsigned int blkbits = inode->i_blkbits;
			int err, len;

			map.m_lblk = pos >> blkbits;
			map.m_len = (EXT4_BLOCK_ALIGN(pos + length, blkbits) >> blkbits)
				- map.m_lblk;
			len = map.m_len;

			err = ext4_map_blocks(NULL, inode, &map, 0);
			/*
			 * 'err==len' means that all of blocks has
			 * been preallocated no matter they are
			 * initialized or not.  For excluding
			 * unwritten extents, we need to check
			 * m_flags.  There are two conditions that
			 * indicate for initialized extents.  1) If we
			 * hit extent cache, EXT4_MAP_MAPPED flag is
			 * returned; 2) If we do a real lookup,
			 * non-flags are returned.  So we should check
			 * these two conditions.
			 */
			if (err == len && (map.m_flags & EXT4_MAP_MAPPED))
				overwrite = 1;
		}
169
	}
170

A
Al Viro 已提交
171
	ret = __generic_file_write_iter(iocb, from);
172
	mutex_unlock(&inode->i_mutex);
173

174 175 176 177 178 179
	if (ret > 0) {
		ssize_t err;

		err = generic_write_sync(file, iocb->ki_pos - ret, ret);
		if (err < 0)
			ret = err;
180
	}
181 182
	if (o_direct)
		blk_finish_plug(&plug);
183

184 185 186 187 188 189
	if (aio_mutex)
		mutex_unlock(aio_mutex);
	return ret;

out:
	mutex_unlock(&inode->i_mutex);
190 191
	if (aio_mutex)
		mutex_unlock(aio_mutex);
192
	return ret;
193 194
}

R
Ross Zwisler 已提交
195 196 197
#ifdef CONFIG_FS_DAX
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
198 199
	int result;
	handle_t *handle = NULL;
200 201
	struct inode *inode = file_inode(vma->vm_file);
	struct super_block *sb = inode->i_sb;
202 203 204 205 206
	bool write = vmf->flags & FAULT_FLAG_WRITE;

	if (write) {
		sb_start_pagefault(sb);
		file_update_time(vma->vm_file);
207
		down_read(&EXT4_I(inode)->i_mmap_sem);
208 209
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
						EXT4_DATA_TRANS_BLOCKS(sb));
210 211
	} else
		down_read(&EXT4_I(inode)->i_mmap_sem);
212 213 214 215

	if (IS_ERR(handle))
		result = VM_FAULT_SIGBUS;
	else
216
		result = __dax_fault(vma, vmf, ext4_dax_mmap_get_block, NULL);
217 218 219 220

	if (write) {
		if (!IS_ERR(handle))
			ext4_journal_stop(handle);
221
		up_read(&EXT4_I(inode)->i_mmap_sem);
222
		sb_end_pagefault(sb);
223 224
	} else
		up_read(&EXT4_I(inode)->i_mmap_sem);
225 226

	return result;
R
Ross Zwisler 已提交
227 228
}

M
Matthew Wilcox 已提交
229 230 231
static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
						pmd_t *pmd, unsigned int flags)
{
232 233 234 235 236 237 238 239 240
	int result;
	handle_t *handle = NULL;
	struct inode *inode = file_inode(vma->vm_file);
	struct super_block *sb = inode->i_sb;
	bool write = flags & FAULT_FLAG_WRITE;

	if (write) {
		sb_start_pagefault(sb);
		file_update_time(vma->vm_file);
241
		down_read(&EXT4_I(inode)->i_mmap_sem);
242 243 244
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
				ext4_chunk_trans_blocks(inode,
							PMD_SIZE / PAGE_SIZE));
245 246
	} else
		down_read(&EXT4_I(inode)->i_mmap_sem);
247 248 249 250 251

	if (IS_ERR(handle))
		result = VM_FAULT_SIGBUS;
	else
		result = __dax_pmd_fault(vma, addr, pmd, flags,
252
				ext4_dax_mmap_get_block, NULL);
253 254 255 256

	if (write) {
		if (!IS_ERR(handle))
			ext4_journal_stop(handle);
257
		up_read(&EXT4_I(inode)->i_mmap_sem);
258
		sb_end_pagefault(sb);
259 260
	} else
		up_read(&EXT4_I(inode)->i_mmap_sem);
261 262

	return result;
M
Matthew Wilcox 已提交
263 264
}

R
Ross Zwisler 已提交
265 266
static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
{
267 268 269 270 271 272
	int err;
	struct inode *inode = file_inode(vma->vm_file);

	sb_start_pagefault(inode->i_sb);
	file_update_time(vma->vm_file);
	down_read(&EXT4_I(inode)->i_mmap_sem);
273
	err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306
	up_read(&EXT4_I(inode)->i_mmap_sem);
	sb_end_pagefault(inode->i_sb);

	return err;
}

/*
 * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
 * handler we check for races agaist truncate. Note that since we cycle through
 * i_mmap_sem, we are sure that also any hole punching that began before we
 * were called is finished by now and so if it included part of the file we
 * are working on, our pte will get unmapped and the check for pte_same() in
 * wp_pfn_shared() fails. Thus fault gets retried and things work out as
 * desired.
 */
static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
				struct vm_fault *vmf)
{
	struct inode *inode = file_inode(vma->vm_file);
	struct super_block *sb = inode->i_sb;
	int ret = VM_FAULT_NOPAGE;
	loff_t size;

	sb_start_pagefault(sb);
	file_update_time(vma->vm_file);
	down_read(&EXT4_I(inode)->i_mmap_sem);
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (vmf->pgoff >= size)
		ret = VM_FAULT_SIGBUS;
	up_read(&EXT4_I(inode)->i_mmap_sem);
	sb_end_pagefault(sb);

	return ret;
R
Ross Zwisler 已提交
307 308 309 310
}

static const struct vm_operations_struct ext4_dax_vm_ops = {
	.fault		= ext4_dax_fault,
M
Matthew Wilcox 已提交
311
	.pmd_fault	= ext4_dax_pmd_fault,
R
Ross Zwisler 已提交
312
	.page_mkwrite	= ext4_dax_mkwrite,
313
	.pfn_mkwrite	= ext4_dax_pfn_mkwrite,
R
Ross Zwisler 已提交
314 315 316 317 318
};
#else
#define ext4_dax_vm_ops	ext4_file_vm_ops
#endif

319
static const struct vm_operations_struct ext4_file_vm_ops = {
320
	.fault		= ext4_filemap_fault,
321
	.map_pages	= filemap_map_pages,
322 323 324 325 326
	.page_mkwrite   = ext4_page_mkwrite,
};

static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
327 328 329
	struct inode *inode = file->f_mapping->host;

	if (ext4_encrypted_inode(inode)) {
330
		int err = ext4_get_encryption_info(inode);
331 332
		if (err)
			return 0;
333 334
		if (ext4_encryption_info(inode) == NULL)
			return -ENOKEY;
335
	}
336
	file_accessed(file);
R
Ross Zwisler 已提交
337 338
	if (IS_DAX(file_inode(file))) {
		vma->vm_ops = &ext4_dax_vm_ops;
M
Matthew Wilcox 已提交
339
		vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
R
Ross Zwisler 已提交
340 341 342
	} else {
		vma->vm_ops = &ext4_file_vm_ops;
	}
343 344 345
	return 0;
}

346 347 348 349 350 351 352
static int ext4_file_open(struct inode * inode, struct file * filp)
{
	struct super_block *sb = inode->i_sb;
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct vfsmount *mnt = filp->f_path.mnt;
	struct path path;
	char buf[64], *cp;
353
	int ret;
354 355 356 357 358 359 360 361 362 363 364

	if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
		     !(sb->s_flags & MS_RDONLY))) {
		sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
		/*
		 * Sample where the filesystem has been mounted and
		 * store it in the superblock for sysadmin convenience
		 * when trying to sort through large numbers of block
		 * devices or filesystem images.
		 */
		memset(buf, 0, sizeof(buf));
365 366
		path.mnt = mnt;
		path.dentry = mnt->mnt_root;
367 368
		cp = d_path(&path, buf, sizeof(buf));
		if (!IS_ERR(cp)) {
369 370 371
			handle_t *handle;
			int err;

372
			handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
373 374
			if (IS_ERR(handle))
				return PTR_ERR(handle);
375
			BUFFER_TRACE(sbi->s_sbh, "get_write_access");
376 377 378 379 380
			err = ext4_journal_get_write_access(handle, sbi->s_sbh);
			if (err) {
				ext4_journal_stop(handle);
				return err;
			}
381 382
			strlcpy(sbi->s_es->s_last_mounted, cp,
				sizeof(sbi->s_es->s_last_mounted));
383 384
			ext4_handle_dirty_super(handle, sb);
			ext4_journal_stop(handle);
385 386
		}
	}
387 388 389 390 391 392 393
	if (ext4_encrypted_inode(inode)) {
		ret = ext4_get_encryption_info(inode);
		if (ret)
			return -EACCES;
		if (ext4_encryption_info(inode) == NULL)
			return -ENOKEY;
	}
394 395 396 397
	/*
	 * Set up the jbd2_inode if we are opening the inode for
	 * writing and the journal is present
	 */
398
	if (filp->f_mode & FMODE_WRITE) {
399
		ret = ext4_inode_attach_jinode(inode);
400 401
		if (ret < 0)
			return ret;
402
	}
403
	return dquot_file_open(inode, filp);
404 405
}

406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
/*
 * Here we use ext4_map_blocks() to get a block mapping for a extent-based
 * file rather than ext4_ext_walk_space() because we can introduce
 * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
 * function.  When extent status tree has been fully implemented, it will
 * track all extent status for a file and we can directly use it to
 * retrieve the offset for SEEK_DATA/SEEK_HOLE.
 */

/*
 * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
 * lookup page cache to check whether or not there has some data between
 * [startoff, endoff] because, if this range contains an unwritten extent,
 * we determine this extent as a data or a hole according to whether the
 * page cache has data or not.
 */
422 423 424 425
static int ext4_find_unwritten_pgoff(struct inode *inode,
				     int whence,
				     struct ext4_map_blocks *map,
				     loff_t *offset)
426 427
{
	struct pagevec pvec;
428
	unsigned int blkbits;
429 430
	pgoff_t index;
	pgoff_t end;
431
	loff_t endoff;
432 433 434 435
	loff_t startoff;
	loff_t lastoff;
	int found = 0;

436
	blkbits = inode->i_sb->s_blocksize_bits;
437 438
	startoff = *offset;
	lastoff = startoff;
439
	endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
440 441 442 443 444 445 446 447 448 449 450 451 452

	index = startoff >> PAGE_CACHE_SHIFT;
	end = endoff >> PAGE_CACHE_SHIFT;

	pagevec_init(&pvec, 0);
	do {
		int i, num;
		unsigned long nr_pages;

		num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
					  (pgoff_t)num);
		if (nr_pages == 0) {
453
			if (whence == SEEK_DATA)
454 455
				break;

456
			BUG_ON(whence != SEEK_HOLE);
457 458 459 460 461 462 463 464 465 466 467 468 469 470 471
			/*
			 * If this is the first time to go into the loop and
			 * offset is not beyond the end offset, it will be a
			 * hole at this offset
			 */
			if (lastoff == startoff || lastoff < endoff)
				found = 1;
			break;
		}

		/*
		 * If this is the first time to go into the loop and
		 * offset is smaller than the first page offset, it will be a
		 * hole at this offset.
		 */
472
		if (lastoff == startoff && whence == SEEK_HOLE &&
473 474 475 476 477 478 479 480 481 482 483 484 485
		    lastoff < page_offset(pvec.pages[0])) {
			found = 1;
			break;
		}

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
			struct buffer_head *bh, *head;

			/*
			 * If the current offset is not beyond the end of given
			 * range, it will be a hole.
			 */
486
			if (lastoff < endoff && whence == SEEK_HOLE &&
487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510
			    page->index > end) {
				found = 1;
				*offset = lastoff;
				goto out;
			}

			lock_page(page);

			if (unlikely(page->mapping != inode->i_mapping)) {
				unlock_page(page);
				continue;
			}

			if (!page_has_buffers(page)) {
				unlock_page(page);
				continue;
			}

			if (page_has_buffers(page)) {
				lastoff = page_offset(page);
				bh = head = page_buffers(page);
				do {
					if (buffer_uptodate(bh) ||
					    buffer_unwritten(bh)) {
511
						if (whence == SEEK_DATA)
512 513
							found = 1;
					} else {
514
						if (whence == SEEK_HOLE)
515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535
							found = 1;
					}
					if (found) {
						*offset = max_t(loff_t,
							startoff, lastoff);
						unlock_page(page);
						goto out;
					}
					lastoff += bh->b_size;
					bh = bh->b_this_page;
				} while (bh != head);
			}

			lastoff = page_offset(page) + PAGE_SIZE;
			unlock_page(page);
		}

		/*
		 * The no. of pages is less than our desired, that would be a
		 * hole in there.
		 */
536
		if (nr_pages < num && whence == SEEK_HOLE) {
537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556
			found = 1;
			*offset = lastoff;
			break;
		}

		index = pvec.pages[i - 1]->index + 1;
		pagevec_release(&pvec);
	} while (index <= end);

out:
	pagevec_release(&pvec);
	return found;
}

/*
 * ext4_seek_data() retrieves the offset for SEEK_DATA.
 */
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{
	struct inode *inode = file->f_mapping->host;
557 558 559 560 561 562
	struct ext4_map_blocks map;
	struct extent_status es;
	ext4_lblk_t start, last, end;
	loff_t dataoff, isize;
	int blkbits;
	int ret = 0;
563 564

	mutex_lock(&inode->i_mutex);
565 566 567

	isize = i_size_read(inode);
	if (offset >= isize) {
568 569 570
		mutex_unlock(&inode->i_mutex);
		return -ENXIO;
	}
571 572 573 574 575 576 577 578 579 580 581 582 583 584

	blkbits = inode->i_sb->s_blocksize_bits;
	start = offset >> blkbits;
	last = start;
	end = isize >> blkbits;
	dataoff = offset;

	do {
		map.m_lblk = last;
		map.m_len = end - last + 1;
		ret = ext4_map_blocks(NULL, inode, &map, 0);
		if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
			if (last != start)
				dataoff = (loff_t)last << blkbits;
585
			break;
586
		}
587

588 589 590 591 592 593 594 595
		/*
		 * If there is a delay extent at this offset,
		 * it will be as a data.
		 */
		ext4_es_find_delayed_extent_range(inode, last, last, &es);
		if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
			if (last != start)
				dataoff = (loff_t)last << blkbits;
596 597 598
			break;
		}

599 600 601 602 603 604 605 606 607 608 609 610
		/*
		 * If there is a unwritten extent at this offset,
		 * it will be as a data or a hole according to page
		 * cache that has data or not.
		 */
		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
			int unwritten;
			unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
							      &map, &dataoff);
			if (unwritten)
				break;
		}
611

612 613 614
		last++;
		dataoff = (loff_t)last << blkbits;
	} while (last <= end);
615 616 617

	mutex_unlock(&inode->i_mutex);

618 619 620 621
	if (dataoff > isize)
		return -ENXIO;

	return vfs_setpos(file, dataoff, maxsize);
622 623 624
}

/*
625
 * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
626 627 628 629
 */
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{
	struct inode *inode = file->f_mapping->host;
630 631 632 633 634 635
	struct ext4_map_blocks map;
	struct extent_status es;
	ext4_lblk_t start, last, end;
	loff_t holeoff, isize;
	int blkbits;
	int ret = 0;
636 637

	mutex_lock(&inode->i_mutex);
638 639 640

	isize = i_size_read(inode);
	if (offset >= isize) {
641 642 643 644
		mutex_unlock(&inode->i_mutex);
		return -ENXIO;
	}

645 646 647 648 649
	blkbits = inode->i_sb->s_blocksize_bits;
	start = offset >> blkbits;
	last = start;
	end = isize >> blkbits;
	holeoff = offset;
650

651 652 653 654 655 656 657 658 659
	do {
		map.m_lblk = last;
		map.m_len = end - last + 1;
		ret = ext4_map_blocks(NULL, inode, &map, 0);
		if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
			last += ret;
			holeoff = (loff_t)last << blkbits;
			continue;
		}
660

661 662 663 664 665 666 667 668 669 670
		/*
		 * If there is a delay extent at this offset,
		 * we will skip this extent.
		 */
		ext4_es_find_delayed_extent_range(inode, last, last, &es);
		if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
			last = es.es_lblk + es.es_len;
			holeoff = (loff_t)last << blkbits;
			continue;
		}
671

672 673 674 675 676 677 678 679 680 681 682 683
		/*
		 * If there is a unwritten extent at this offset,
		 * it will be as a data or a hole according to page
		 * cache that has data or not.
		 */
		if (map.m_flags & EXT4_MAP_UNWRITTEN) {
			int unwritten;
			unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
							      &map, &holeoff);
			if (!unwritten) {
				last += ret;
				holeoff = (loff_t)last << blkbits;
684 685
				continue;
			}
686
		}
687 688 689 690 691

		/* find a hole */
		break;
	} while (last <= end);

692 693
	mutex_unlock(&inode->i_mutex);

694 695 696 697
	if (holeoff > isize)
		holeoff = isize;

	return vfs_setpos(file, holeoff, maxsize);
698 699
}

700
/*
701 702 703
 * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
 * by calling generic_file_llseek_size() with the appropriate maxbytes
 * value for each.
704
 */
705
loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
706 707 708 709 710 711 712 713 714
{
	struct inode *inode = file->f_mapping->host;
	loff_t maxbytes;

	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
		maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
	else
		maxbytes = inode->i_sb->s_maxbytes;

715
	switch (whence) {
716 717 718
	case SEEK_SET:
	case SEEK_CUR:
	case SEEK_END:
719
		return generic_file_llseek_size(file, offset, whence,
720 721 722 723 724 725 726 727
						maxbytes, i_size_read(inode));
	case SEEK_DATA:
		return ext4_seek_data(file, offset, maxbytes);
	case SEEK_HOLE:
		return ext4_seek_hole(file, offset, maxbytes);
	}

	return -EINVAL;
728 729
}

730
const struct file_operations ext4_file_operations = {
731
	.llseek		= ext4_llseek,
732
	.read_iter	= generic_file_read_iter,
A
Al Viro 已提交
733
	.write_iter	= ext4_file_write_iter,
A
Andi Kleen 已提交
734
	.unlocked_ioctl = ext4_ioctl,
735
#ifdef CONFIG_COMPAT
736
	.compat_ioctl	= ext4_compat_ioctl,
737
#endif
738
	.mmap		= ext4_file_mmap,
739
	.open		= ext4_file_open,
740 741
	.release	= ext4_release_file,
	.fsync		= ext4_sync_file,
742
	.splice_read	= generic_file_splice_read,
A
Al Viro 已提交
743
	.splice_write	= iter_file_splice_write,
744
	.fallocate	= ext4_fallocate,
745 746
};

747
const struct inode_operations ext4_file_inode_operations = {
748
	.setattr	= ext4_setattr,
749
	.getattr	= ext4_getattr,
750 751
	.setxattr	= generic_setxattr,
	.getxattr	= generic_getxattr,
752
	.listxattr	= ext4_listxattr,
753
	.removexattr	= generic_removexattr,
754
	.get_acl	= ext4_get_acl,
755
	.set_acl	= ext4_set_acl,
756
	.fiemap		= ext4_fiemap,
757 758
};