file.c 17.9 KB
Newer Older
1
/*
2
 *  linux/fs/ext4/file.c
3 4 5 6 7 8 9 10 11 12 13 14
 *
 * Copyright (C) 1992, 1993, 1994, 1995
 * Remy Card (card@masi.ibp.fr)
 * Laboratoire MASI - Institut Blaise Pascal
 * Universite Pierre et Marie Curie (Paris VI)
 *
 *  from
 *
 *  linux/fs/minix/file.c
 *
 *  Copyright (C) 1991, 1992  Linus Torvalds
 *
15
 *  ext4 fs regular file handling primitives
16 17 18 19 20 21 22
 *
 *  64-bit file support on 64-bit platforms by Jakub Jelinek
 *	(jj@sunsite.ms.mff.cuni.cz)
 */

#include <linux/time.h>
#include <linux/fs.h>
23 24
#include <linux/mount.h>
#include <linux/path.h>
25
#include <linux/dax.h>
26
#include <linux/quotaops.h>
27
#include <linux/pagevec.h>
28
#include <linux/uio.h>
29 30
#include "ext4.h"
#include "ext4_jbd2.h"
31 32 33 34 35
#include "xattr.h"
#include "acl.h"

/*
 * Called when an inode is released. Note that this is different
36
 * from ext4_file_open: open gets called at every open, but release
37 38
 * gets called only when /all/ the files are closed.
 */
39
static int ext4_release_file(struct inode *inode, struct file *filp)
40
{
41
	if (ext4_test_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE)) {
42
		ext4_alloc_da_blocks(inode);
43
		ext4_clear_inode_state(inode, EXT4_STATE_DA_ALLOC_CLOSE);
44
	}
45 46
	/* if we are the last writer on the inode, drop the block reservation */
	if ((filp->f_mode & FMODE_WRITE) &&
47 48
			(atomic_read(&inode->i_writecount) == 1) &&
		        !EXT4_I(inode)->i_reserved_data_blocks)
49
	{
50
		down_write(&EXT4_I(inode)->i_data_sem);
51
		ext4_discard_preallocations(inode);
52
		up_write(&EXT4_I(inode)->i_data_sem);
53 54
	}
	if (is_dx(inode) && filp->private_data)
55
		ext4_htree_free_dir_info(filp->private_data);
56 57 58 59

	return 0;
}

60
static void ext4_unwritten_wait(struct inode *inode)
61 62 63
{
	wait_queue_head_t *wq = ext4_ioend_wq(inode);

64
	wait_event(*wq, (atomic_read(&EXT4_I(inode)->i_unwritten) == 0));
65 66 67 68 69 70 71 72 73 74 75 76
}

/*
 * This tests whether the IO in question is block-aligned or not.
 * Ext4 utilizes unwritten extents when hole-filling during direct IO, and they
 * are converted to written only after the IO is complete.  Until they are
 * mapped, these blocks appear as holes, so dio_zero_block() will assume that
 * it needs to zero out portions of the start and/or end block.  If 2 AIO
 * threads are at work on the same unwritten block, they must be synchronized
 * or one thread will zero the other's data, causing corruption.
 */
static int
A
Al Viro 已提交
77
ext4_unaligned_aio(struct inode *inode, struct iov_iter *from, loff_t pos)
78 79 80 81
{
	struct super_block *sb = inode->i_sb;
	int blockmask = sb->s_blocksize - 1;

82
	if (pos >= i_size_read(inode))
83 84
		return 0;

A
Al Viro 已提交
85
	if ((pos | iov_iter_alignment(from)) & blockmask)
86 87 88 89 90
		return 1;

	return 0;
}

91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
/* Is IO overwriting allocated and initialized blocks? */
static bool ext4_overwrite_io(struct inode *inode, loff_t pos, loff_t len)
{
	struct ext4_map_blocks map;
	unsigned int blkbits = inode->i_blkbits;
	int err, blklen;

	if (pos + len > i_size_read(inode))
		return false;

	map.m_lblk = pos >> blkbits;
	map.m_len = EXT4_MAX_BLOCKS(len, pos, blkbits);
	blklen = map.m_len;

	err = ext4_map_blocks(NULL, inode, &map, 0);
	/*
	 * 'err==len' means that all of the blocks have been preallocated,
	 * regardless of whether they have been initialized or not. To exclude
	 * unwritten extents, we need to check m_flags.
	 */
	return err == blklen && (map.m_flags & EXT4_MAP_MAPPED);
}

static ssize_t ext4_write_checks(struct kiocb *iocb, struct iov_iter *from)
{
	struct inode *inode = file_inode(iocb->ki_filp);
	ssize_t ret;

	ret = generic_write_checks(iocb, from);
	if (ret <= 0)
		return ret;
	/*
	 * If we have encountered a bitmap-format file, the size limit
	 * is smaller than s_maxbytes, which is for extent-mapped files.
	 */
	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS))) {
		struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);

		if (iocb->ki_pos >= sbi->s_bitmap_maxbytes)
			return -EFBIG;
		iov_iter_truncate(from, sbi->s_bitmap_maxbytes - iocb->ki_pos);
	}
	return iov_iter_count(from);
}

136
static ssize_t
A
Al Viro 已提交
137
ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
138
{
139
	struct inode *inode = file_inode(iocb->ki_filp);
140
	int o_direct = iocb->ki_flags & IOCB_DIRECT;
141
	int unaligned_aio = 0;
142
	int overwrite = 0;
143
	ssize_t ret;
144

145
	inode_lock(inode);
146
	ret = ext4_write_checks(iocb, from);
147 148 149
	if (ret <= 0)
		goto out;

150
	/*
151 152 153
	 * Unaligned direct AIO must be serialized among each other as zeroing
	 * of partial blocks of two competing unaligned AIOs can result in data
	 * corruption.
154
	 */
155
	if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
156
	    !is_sync_kiocb(iocb) &&
157 158
	    ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
		unaligned_aio = 1;
159 160 161
		ext4_unwritten_wait(inode);
	}

162
	iocb->private = &overwrite;
163 164 165 166
	/* Check whether we do a DIO overwrite or not */
	if (o_direct && ext4_should_dioread_nolock(inode) && !unaligned_aio &&
	    ext4_overwrite_io(inode, iocb->ki_pos, iov_iter_count(from)))
		overwrite = 1;
167

A
Al Viro 已提交
168
	ret = __generic_file_write_iter(iocb, from);
A
Al Viro 已提交
169
	inode_unlock(inode);
170

171 172
	if (ret > 0)
		ret = generic_write_sync(iocb, ret);
173

174 175 176
	return ret;

out:
A
Al Viro 已提交
177
	inode_unlock(inode);
178
	return ret;
179 180
}

R
Ross Zwisler 已提交
181 182 183
#ifdef CONFIG_FS_DAX
static int ext4_dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
{
184 185
	int result;
	handle_t *handle = NULL;
186 187
	struct inode *inode = file_inode(vma->vm_file);
	struct super_block *sb = inode->i_sb;
188 189 190 191 192
	bool write = vmf->flags & FAULT_FLAG_WRITE;

	if (write) {
		sb_start_pagefault(sb);
		file_update_time(vma->vm_file);
193
		down_read(&EXT4_I(inode)->i_mmap_sem);
194 195
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
						EXT4_DATA_TRANS_BLOCKS(sb));
196 197
	} else
		down_read(&EXT4_I(inode)->i_mmap_sem);
198 199 200 201

	if (IS_ERR(handle))
		result = VM_FAULT_SIGBUS;
	else
R
Ross Zwisler 已提交
202
		result = dax_fault(vma, vmf, ext4_dax_get_block);
203 204 205 206

	if (write) {
		if (!IS_ERR(handle))
			ext4_journal_stop(handle);
207
		up_read(&EXT4_I(inode)->i_mmap_sem);
208
		sb_end_pagefault(sb);
209 210
	} else
		up_read(&EXT4_I(inode)->i_mmap_sem);
211 212

	return result;
R
Ross Zwisler 已提交
213 214
}

M
Matthew Wilcox 已提交
215 216 217
static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
						pmd_t *pmd, unsigned int flags)
{
218 219 220 221 222 223 224 225 226
	int result;
	handle_t *handle = NULL;
	struct inode *inode = file_inode(vma->vm_file);
	struct super_block *sb = inode->i_sb;
	bool write = flags & FAULT_FLAG_WRITE;

	if (write) {
		sb_start_pagefault(sb);
		file_update_time(vma->vm_file);
227
		down_read(&EXT4_I(inode)->i_mmap_sem);
228 229 230
		handle = ext4_journal_start_sb(sb, EXT4_HT_WRITE_PAGE,
				ext4_chunk_trans_blocks(inode,
							PMD_SIZE / PAGE_SIZE));
231 232
	} else
		down_read(&EXT4_I(inode)->i_mmap_sem);
233 234 235 236

	if (IS_ERR(handle))
		result = VM_FAULT_SIGBUS;
	else
R
Ross Zwisler 已提交
237
		result = dax_pmd_fault(vma, addr, pmd, flags,
238
					 ext4_dax_get_block);
239 240 241 242

	if (write) {
		if (!IS_ERR(handle))
			ext4_journal_stop(handle);
243
		up_read(&EXT4_I(inode)->i_mmap_sem);
244
		sb_end_pagefault(sb);
245 246
	} else
		up_read(&EXT4_I(inode)->i_mmap_sem);
247 248

	return result;
M
Matthew Wilcox 已提交
249 250
}

251
/*
252
 * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
253 254 255 256 257 258 259 260 261 262 263 264 265
 * handler we check for races agaist truncate. Note that since we cycle through
 * i_mmap_sem, we are sure that also any hole punching that began before we
 * were called is finished by now and so if it included part of the file we
 * are working on, our pte will get unmapped and the check for pte_same() in
 * wp_pfn_shared() fails. Thus fault gets retried and things work out as
 * desired.
 */
static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
				struct vm_fault *vmf)
{
	struct inode *inode = file_inode(vma->vm_file);
	struct super_block *sb = inode->i_sb;
	loff_t size;
266
	int ret;
267 268 269 270 271 272 273

	sb_start_pagefault(sb);
	file_update_time(vma->vm_file);
	down_read(&EXT4_I(inode)->i_mmap_sem);
	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
	if (vmf->pgoff >= size)
		ret = VM_FAULT_SIGBUS;
274 275
	else
		ret = dax_pfn_mkwrite(vma, vmf);
276 277 278 279
	up_read(&EXT4_I(inode)->i_mmap_sem);
	sb_end_pagefault(sb);

	return ret;
R
Ross Zwisler 已提交
280 281 282 283
}

static const struct vm_operations_struct ext4_dax_vm_ops = {
	.fault		= ext4_dax_fault,
M
Matthew Wilcox 已提交
284
	.pmd_fault	= ext4_dax_pmd_fault,
285
	.page_mkwrite	= ext4_dax_fault,
286
	.pfn_mkwrite	= ext4_dax_pfn_mkwrite,
R
Ross Zwisler 已提交
287 288 289 290 291
};
#else
#define ext4_dax_vm_ops	ext4_file_vm_ops
#endif

292
static const struct vm_operations_struct ext4_file_vm_ops = {
293
	.fault		= ext4_filemap_fault,
294
	.map_pages	= filemap_map_pages,
295 296 297 298 299
	.page_mkwrite   = ext4_page_mkwrite,
};

static int ext4_file_mmap(struct file *file, struct vm_area_struct *vma)
{
300 301 302
	struct inode *inode = file->f_mapping->host;

	if (ext4_encrypted_inode(inode)) {
303
		int err = fscrypt_get_encryption_info(inode);
304 305
		if (err)
			return 0;
306
		if (!fscrypt_has_encryption_key(inode))
307
			return -ENOKEY;
308
	}
309
	file_accessed(file);
R
Ross Zwisler 已提交
310 311
	if (IS_DAX(file_inode(file))) {
		vma->vm_ops = &ext4_dax_vm_ops;
M
Matthew Wilcox 已提交
312
		vma->vm_flags |= VM_MIXEDMAP | VM_HUGEPAGE;
R
Ross Zwisler 已提交
313 314 315
	} else {
		vma->vm_ops = &ext4_file_vm_ops;
	}
316 317 318
	return 0;
}

319 320 321 322 323
static int ext4_file_open(struct inode * inode, struct file * filp)
{
	struct super_block *sb = inode->i_sb;
	struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
	struct vfsmount *mnt = filp->f_path.mnt;
324
	struct dentry *dir;
325 326
	struct path path;
	char buf[64], *cp;
327
	int ret;
328 329 330 331 332 333 334 335 336 337 338

	if (unlikely(!(sbi->s_mount_flags & EXT4_MF_MNTDIR_SAMPLED) &&
		     !(sb->s_flags & MS_RDONLY))) {
		sbi->s_mount_flags |= EXT4_MF_MNTDIR_SAMPLED;
		/*
		 * Sample where the filesystem has been mounted and
		 * store it in the superblock for sysadmin convenience
		 * when trying to sort through large numbers of block
		 * devices or filesystem images.
		 */
		memset(buf, 0, sizeof(buf));
339 340
		path.mnt = mnt;
		path.dentry = mnt->mnt_root;
341 342
		cp = d_path(&path, buf, sizeof(buf));
		if (!IS_ERR(cp)) {
343 344 345
			handle_t *handle;
			int err;

346
			handle = ext4_journal_start_sb(sb, EXT4_HT_MISC, 1);
347 348
			if (IS_ERR(handle))
				return PTR_ERR(handle);
349
			BUFFER_TRACE(sbi->s_sbh, "get_write_access");
350 351 352 353 354
			err = ext4_journal_get_write_access(handle, sbi->s_sbh);
			if (err) {
				ext4_journal_stop(handle);
				return err;
			}
355 356
			strlcpy(sbi->s_es->s_last_mounted, cp,
				sizeof(sbi->s_es->s_last_mounted));
357 358
			ext4_handle_dirty_super(handle, sb);
			ext4_journal_stop(handle);
359 360
		}
	}
361
	if (ext4_encrypted_inode(inode)) {
362
		ret = fscrypt_get_encryption_info(inode);
363 364
		if (ret)
			return -EACCES;
365
		if (!fscrypt_has_encryption_key(inode))
366 367
			return -ENOKEY;
	}
368

M
Miklos Szeredi 已提交
369
	dir = dget_parent(file_dentry(filp));
370
	if (ext4_encrypted_inode(d_inode(dir)) &&
371
			!fscrypt_has_permitted_context(d_inode(dir), inode)) {
372
		ext4_warning(inode->i_sb,
373
			     "Inconsistent encryption contexts: %lu/%lu",
374
			     (unsigned long) d_inode(dir)->i_ino,
375
			     (unsigned long) inode->i_ino);
376
		dput(dir);
377 378
		return -EPERM;
	}
379
	dput(dir);
380 381 382 383
	/*
	 * Set up the jbd2_inode if we are opening the inode for
	 * writing and the journal is present
	 */
384
	if (filp->f_mode & FMODE_WRITE) {
385
		ret = ext4_inode_attach_jinode(inode);
386 387
		if (ret < 0)
			return ret;
388
	}
389
	return dquot_file_open(inode, filp);
390 391
}

392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407
/*
 * Here we use ext4_map_blocks() to get a block mapping for a extent-based
 * file rather than ext4_ext_walk_space() because we can introduce
 * SEEK_DATA/SEEK_HOLE for block-mapped and extent-mapped file at the same
 * function.  When extent status tree has been fully implemented, it will
 * track all extent status for a file and we can directly use it to
 * retrieve the offset for SEEK_DATA/SEEK_HOLE.
 */

/*
 * When we retrieve the offset for SEEK_DATA/SEEK_HOLE, we would need to
 * lookup page cache to check whether or not there has some data between
 * [startoff, endoff] because, if this range contains an unwritten extent,
 * we determine this extent as a data or a hole according to whether the
 * page cache has data or not.
 */
408 409
static int ext4_find_unwritten_pgoff(struct inode *inode,
				     int whence,
410
				     ext4_lblk_t end_blk,
411
				     loff_t *offset)
412 413
{
	struct pagevec pvec;
414
	unsigned int blkbits;
415 416
	pgoff_t index;
	pgoff_t end;
417
	loff_t endoff;
418 419 420 421
	loff_t startoff;
	loff_t lastoff;
	int found = 0;

422
	blkbits = inode->i_sb->s_blocksize_bits;
423 424
	startoff = *offset;
	lastoff = startoff;
425
	endoff = (loff_t)end_blk << blkbits;
426

427 428
	index = startoff >> PAGE_SHIFT;
	end = endoff >> PAGE_SHIFT;
429 430 431 432 433 434 435 436 437 438

	pagevec_init(&pvec, 0);
	do {
		int i, num;
		unsigned long nr_pages;

		num = min_t(pgoff_t, end - index, PAGEVEC_SIZE);
		nr_pages = pagevec_lookup(&pvec, inode->i_mapping, index,
					  (pgoff_t)num);
		if (nr_pages == 0) {
439
			if (whence == SEEK_DATA)
440 441
				break;

442
			BUG_ON(whence != SEEK_HOLE);
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457
			/*
			 * If this is the first time to go into the loop and
			 * offset is not beyond the end offset, it will be a
			 * hole at this offset
			 */
			if (lastoff == startoff || lastoff < endoff)
				found = 1;
			break;
		}

		/*
		 * If this is the first time to go into the loop and
		 * offset is smaller than the first page offset, it will be a
		 * hole at this offset.
		 */
458
		if (lastoff == startoff && whence == SEEK_HOLE &&
459 460 461 462 463 464 465 466 467 468 469 470 471
		    lastoff < page_offset(pvec.pages[0])) {
			found = 1;
			break;
		}

		for (i = 0; i < nr_pages; i++) {
			struct page *page = pvec.pages[i];
			struct buffer_head *bh, *head;

			/*
			 * If the current offset is not beyond the end of given
			 * range, it will be a hole.
			 */
472
			if (lastoff < endoff && whence == SEEK_HOLE &&
473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496
			    page->index > end) {
				found = 1;
				*offset = lastoff;
				goto out;
			}

			lock_page(page);

			if (unlikely(page->mapping != inode->i_mapping)) {
				unlock_page(page);
				continue;
			}

			if (!page_has_buffers(page)) {
				unlock_page(page);
				continue;
			}

			if (page_has_buffers(page)) {
				lastoff = page_offset(page);
				bh = head = page_buffers(page);
				do {
					if (buffer_uptodate(bh) ||
					    buffer_unwritten(bh)) {
497
						if (whence == SEEK_DATA)
498 499
							found = 1;
					} else {
500
						if (whence == SEEK_HOLE)
501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
							found = 1;
					}
					if (found) {
						*offset = max_t(loff_t,
							startoff, lastoff);
						unlock_page(page);
						goto out;
					}
					lastoff += bh->b_size;
					bh = bh->b_this_page;
				} while (bh != head);
			}

			lastoff = page_offset(page) + PAGE_SIZE;
			unlock_page(page);
		}

		/*
		 * The no. of pages is less than our desired, that would be a
		 * hole in there.
		 */
522
		if (nr_pages < num && whence == SEEK_HOLE) {
523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542
			found = 1;
			*offset = lastoff;
			break;
		}

		index = pvec.pages[i - 1]->index + 1;
		pagevec_release(&pvec);
	} while (index <= end);

out:
	pagevec_release(&pvec);
	return found;
}

/*
 * ext4_seek_data() retrieves the offset for SEEK_DATA.
 */
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{
	struct inode *inode = file->f_mapping->host;
543 544 545 546
	struct extent_status es;
	ext4_lblk_t start, last, end;
	loff_t dataoff, isize;
	int blkbits;
547
	int ret;
548

A
Al Viro 已提交
549
	inode_lock(inode);
550 551 552

	isize = i_size_read(inode);
	if (offset >= isize) {
A
Al Viro 已提交
553
		inode_unlock(inode);
554 555
		return -ENXIO;
	}
556 557 558 559 560 561 562 563

	blkbits = inode->i_sb->s_blocksize_bits;
	start = offset >> blkbits;
	last = start;
	end = isize >> blkbits;
	dataoff = offset;

	do {
564 565 566 567 568 569 570
		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
		if (ret <= 0) {
			/* No extent found -> no data */
			if (ret == 0)
				ret = -ENXIO;
			inode_unlock(inode);
			return ret;
571
		}
572

573 574 575 576
		last = es.es_lblk;
		if (last != start)
			dataoff = (loff_t)last << blkbits;
		if (!ext4_es_is_unwritten(&es))
577 578
			break;

579 580 581 582 583
		/*
		 * If there is a unwritten extent at this offset,
		 * it will be as a data or a hole according to page
		 * cache that has data or not.
		 */
584 585 586 587
		if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
					      es.es_lblk + es.es_len, &dataoff))
			break;
		last += es.es_len;
588
		dataoff = (loff_t)last << blkbits;
589
		cond_resched();
590
	} while (last <= end);
591

A
Al Viro 已提交
592
	inode_unlock(inode);
593

594 595 596 597
	if (dataoff > isize)
		return -ENXIO;

	return vfs_setpos(file, dataoff, maxsize);
598 599 600
}

/*
601
 * ext4_seek_hole() retrieves the offset for SEEK_HOLE.
602 603 604 605
 */
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{
	struct inode *inode = file->f_mapping->host;
606 607 608 609
	struct extent_status es;
	ext4_lblk_t start, last, end;
	loff_t holeoff, isize;
	int blkbits;
610
	int ret;
611

A
Al Viro 已提交
612
	inode_lock(inode);
613 614 615

	isize = i_size_read(inode);
	if (offset >= isize) {
A
Al Viro 已提交
616
		inode_unlock(inode);
617 618 619
		return -ENXIO;
	}

620 621 622 623 624
	blkbits = inode->i_sb->s_blocksize_bits;
	start = offset >> blkbits;
	last = start;
	end = isize >> blkbits;
	holeoff = offset;
625

626
	do {
627 628 629 630
		ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
		if (ret < 0) {
			inode_unlock(inode);
			return ret;
631
		}
632 633 634 635 636
		/* Found a hole? */
		if (ret == 0 || es.es_lblk > last) {
			if (last != start)
				holeoff = (loff_t)last << blkbits;
			break;
637 638 639 640 641 642
		}
		/*
		 * If there is a unwritten extent at this offset,
		 * it will be as a data or a hole according to page
		 * cache that has data or not.
		 */
643 644 645 646
		if (ext4_es_is_unwritten(&es) &&
		    ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
					      last + es.es_len, &holeoff))
			break;
647

648 649 650
		last += es.es_len;
		holeoff = (loff_t)last << blkbits;
		cond_resched();
651 652
	} while (last <= end);

A
Al Viro 已提交
653
	inode_unlock(inode);
654

655 656 657 658
	if (holeoff > isize)
		holeoff = isize;

	return vfs_setpos(file, holeoff, maxsize);
659 660
}

661
/*
662 663 664
 * ext4_llseek() handles both block-mapped and extent-mapped maxbytes values
 * by calling generic_file_llseek_size() with the appropriate maxbytes
 * value for each.
665
 */
666
loff_t ext4_llseek(struct file *file, loff_t offset, int whence)
667 668 669 670 671 672 673 674 675
{
	struct inode *inode = file->f_mapping->host;
	loff_t maxbytes;

	if (!(ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)))
		maxbytes = EXT4_SB(inode->i_sb)->s_bitmap_maxbytes;
	else
		maxbytes = inode->i_sb->s_maxbytes;

676
	switch (whence) {
677 678 679
	case SEEK_SET:
	case SEEK_CUR:
	case SEEK_END:
680
		return generic_file_llseek_size(file, offset, whence,
681 682 683 684 685 686 687 688
						maxbytes, i_size_read(inode));
	case SEEK_DATA:
		return ext4_seek_data(file, offset, maxbytes);
	case SEEK_HOLE:
		return ext4_seek_hole(file, offset, maxbytes);
	}

	return -EINVAL;
689 690
}

691
const struct file_operations ext4_file_operations = {
692
	.llseek		= ext4_llseek,
693
	.read_iter	= generic_file_read_iter,
A
Al Viro 已提交
694
	.write_iter	= ext4_file_write_iter,
A
Andi Kleen 已提交
695
	.unlocked_ioctl = ext4_ioctl,
696
#ifdef CONFIG_COMPAT
697
	.compat_ioctl	= ext4_compat_ioctl,
698
#endif
699
	.mmap		= ext4_file_mmap,
700
	.open		= ext4_file_open,
701 702
	.release	= ext4_release_file,
	.fsync		= ext4_sync_file,
703
	.get_unmapped_area = thp_get_unmapped_area,
704
	.splice_read	= generic_file_splice_read,
A
Al Viro 已提交
705
	.splice_write	= iter_file_splice_write,
706
	.fallocate	= ext4_fallocate,
707 708
};

709
const struct inode_operations ext4_file_inode_operations = {
710
	.setattr	= ext4_setattr,
711
	.getattr	= ext4_getattr,
712
	.listxattr	= ext4_listxattr,
713
	.get_acl	= ext4_get_acl,
714
	.set_acl	= ext4_set_acl,
715
	.fiemap		= ext4_fiemap,
716 717
};