diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 14cdc101d165d94bb6114763989322ac1958848c..1b5f15653b1bb82ca0fc7801fe01fe8c4c75047a 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -447,7 +447,6 @@ prototypes: int (*flush) (struct file *); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t start, loff_t end, int datasync); - int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); ssize_t (*readv) (struct file *, const struct iovec *, unsigned long, diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index d619c8d71966e255474b3bce54f2b277dd1b337d..b5039a00caafae44660514da3829994436a35e84 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -828,7 +828,6 @@ struct file_operations { int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t, loff_t, int datasync); - int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); diff --git a/fs/aio.c b/fs/aio.c index 1157e13a36d681ecba8e926bb9e91943cfeb6723..428484f2f8413dc6972da9857b3392264b9e6421 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -1078,6 +1078,17 @@ static void aio_complete(struct kiocb *kiocb, long res, long res2) unsigned tail, pos, head; unsigned long flags; + if (kiocb->ki_flags & IOCB_WRITE) { + struct file *file = kiocb->ki_filp; + + /* + * Tell lockdep we inherited freeze protection from submission + * thread. + */ + __sb_writers_acquired(file_inode(file)->i_sb, SB_FREEZE_WRITE); + file_end_write(file); + } + /* * Special case handling for sync iocbs: * - events go directly into the iocb for fast handling @@ -1392,122 +1403,106 @@ SYSCALL_DEFINE1(io_destroy, aio_context_t, ctx) return -EINVAL; } -typedef ssize_t (rw_iter_op)(struct kiocb *, struct iov_iter *); - -static int aio_setup_vectored_rw(int rw, char __user *buf, size_t len, - struct iovec **iovec, - bool compat, - struct iov_iter *iter) +static int aio_setup_rw(int rw, struct iocb *iocb, struct iovec **iovec, + bool vectored, bool compat, struct iov_iter *iter) { + void __user *buf = (void __user *)(uintptr_t)iocb->aio_buf; + size_t len = iocb->aio_nbytes; + + if (!vectored) { + ssize_t ret = import_single_range(rw, buf, len, *iovec, iter); + *iovec = NULL; + return ret; + } #ifdef CONFIG_COMPAT if (compat) - return compat_import_iovec(rw, - (struct compat_iovec __user *)buf, - len, UIO_FASTIOV, iovec, iter); + return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec, + iter); #endif - return import_iovec(rw, (struct iovec __user *)buf, - len, UIO_FASTIOV, iovec, iter); + return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter); } -/* - * aio_run_iocb: - * Performs the initial checks and io submission. - */ -static ssize_t aio_run_iocb(struct kiocb *req, unsigned opcode, - char __user *buf, size_t len, bool compat) +static inline ssize_t aio_ret(struct kiocb *req, ssize_t ret) +{ + switch (ret) { + case -EIOCBQUEUED: + return ret; + case -ERESTARTSYS: + case -ERESTARTNOINTR: + case -ERESTARTNOHAND: + case -ERESTART_RESTARTBLOCK: + /* + * There's no easy way to restart the syscall since other AIO's + * may be already running. Just fail this IO with EINTR. + */ + ret = -EINTR; + /*FALLTHRU*/ + default: + aio_complete(req, ret, 0); + return 0; + } +} + +static ssize_t aio_read(struct kiocb *req, struct iocb *iocb, bool vectored, + bool compat) { struct file *file = req->ki_filp; - ssize_t ret; - int rw; - fmode_t mode; - rw_iter_op *iter_op; struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; struct iov_iter iter; + ssize_t ret; - switch (opcode) { - case IOCB_CMD_PREAD: - case IOCB_CMD_PREADV: - mode = FMODE_READ; - rw = READ; - iter_op = file->f_op->read_iter; - goto rw_common; - - case IOCB_CMD_PWRITE: - case IOCB_CMD_PWRITEV: - mode = FMODE_WRITE; - rw = WRITE; - iter_op = file->f_op->write_iter; - goto rw_common; -rw_common: - if (unlikely(!(file->f_mode & mode))) - return -EBADF; - - if (!iter_op) - return -EINVAL; - - if (opcode == IOCB_CMD_PREADV || opcode == IOCB_CMD_PWRITEV) - ret = aio_setup_vectored_rw(rw, buf, len, - &iovec, compat, &iter); - else { - ret = import_single_range(rw, buf, len, iovec, &iter); - iovec = NULL; - } - if (!ret) - ret = rw_verify_area(rw, file, &req->ki_pos, - iov_iter_count(&iter)); - if (ret < 0) { - kfree(iovec); - return ret; - } - - if (rw == WRITE) - file_start_write(file); - - ret = iter_op(req, &iter); - - if (rw == WRITE) - file_end_write(file); - kfree(iovec); - break; - - case IOCB_CMD_FDSYNC: - if (!file->f_op->aio_fsync) - return -EINVAL; - - ret = file->f_op->aio_fsync(req, 1); - break; + if (unlikely(!(file->f_mode & FMODE_READ))) + return -EBADF; + if (unlikely(!file->f_op->read_iter)) + return -EINVAL; - case IOCB_CMD_FSYNC: - if (!file->f_op->aio_fsync) - return -EINVAL; + ret = aio_setup_rw(READ, iocb, &iovec, vectored, compat, &iter); + if (ret) + return ret; + ret = rw_verify_area(READ, file, &req->ki_pos, iov_iter_count(&iter)); + if (!ret) + ret = aio_ret(req, file->f_op->read_iter(req, &iter)); + kfree(iovec); + return ret; +} - ret = file->f_op->aio_fsync(req, 0); - break; +static ssize_t aio_write(struct kiocb *req, struct iocb *iocb, bool vectored, + bool compat) +{ + struct file *file = req->ki_filp; + struct iovec inline_vecs[UIO_FASTIOV], *iovec = inline_vecs; + struct iov_iter iter; + ssize_t ret; - default: - pr_debug("EINVAL: no operation provided\n"); + if (unlikely(!(file->f_mode & FMODE_WRITE))) + return -EBADF; + if (unlikely(!file->f_op->write_iter)) return -EINVAL; - } - if (ret != -EIOCBQUEUED) { + ret = aio_setup_rw(WRITE, iocb, &iovec, vectored, compat, &iter); + if (ret) + return ret; + ret = rw_verify_area(WRITE, file, &req->ki_pos, iov_iter_count(&iter)); + if (!ret) { + req->ki_flags |= IOCB_WRITE; + file_start_write(file); + ret = aio_ret(req, file->f_op->write_iter(req, &iter)); /* - * There's no easy way to restart the syscall since other AIO's - * may be already running. Just fail this IO with EINTR. + * We release freeze protection in aio_complete(). Fool lockdep + * by telling it the lock got released so that it doesn't + * complain about held lock when we return to userspace. */ - if (unlikely(ret == -ERESTARTSYS || ret == -ERESTARTNOINTR || - ret == -ERESTARTNOHAND || - ret == -ERESTART_RESTARTBLOCK)) - ret = -EINTR; - aio_complete(req, ret, 0); + __sb_writers_release(file_inode(file)->i_sb, SB_FREEZE_WRITE); } - - return 0; + kfree(iovec); + return ret; } static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, struct iocb *iocb, bool compat) { struct aio_kiocb *req; + struct file *file; ssize_t ret; /* enforce forwards compatibility on users */ @@ -1530,7 +1525,7 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, if (unlikely(!req)) return -EAGAIN; - req->common.ki_filp = fget(iocb->aio_fildes); + req->common.ki_filp = file = fget(iocb->aio_fildes); if (unlikely(!req->common.ki_filp)) { ret = -EBADF; goto out_put_req; @@ -1565,13 +1560,29 @@ static int io_submit_one(struct kioctx *ctx, struct iocb __user *user_iocb, req->ki_user_iocb = user_iocb; req->ki_user_data = iocb->aio_data; - ret = aio_run_iocb(&req->common, iocb->aio_lio_opcode, - (char __user *)(unsigned long)iocb->aio_buf, - iocb->aio_nbytes, - compat); - if (ret) - goto out_put_req; + get_file(file); + switch (iocb->aio_lio_opcode) { + case IOCB_CMD_PREAD: + ret = aio_read(&req->common, iocb, false, compat); + break; + case IOCB_CMD_PWRITE: + ret = aio_write(&req->common, iocb, false, compat); + break; + case IOCB_CMD_PREADV: + ret = aio_read(&req->common, iocb, true, compat); + break; + case IOCB_CMD_PWRITEV: + ret = aio_write(&req->common, iocb, true, compat); + break; + default: + pr_debug("invalid aio operation %d\n", iocb->aio_lio_opcode); + ret = -EINVAL; + break; + } + fput(file); + if (ret && ret != -EIOCBQUEUED) + goto out_put_req; return 0; out_put_req: put_reqs_available(ctx, 1); diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c index a1861357900127e19182932c39322c55d17fe5fe..0ee19ecc982d4e4ef8137836ba4d753559eb7612 100644 --- a/fs/ntfs/dir.c +++ b/fs/ntfs/dir.c @@ -1544,8 +1544,6 @@ const struct file_operations ntfs_dir_ops = { .iterate = ntfs_readdir, /* Read directory contents. */ #ifdef NTFS_RW .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ - /*.aio_fsync = ,*/ /* Sync all outstanding async - i/o operations on a kiocb. */ #endif /* NTFS_RW */ /*.ioctl = ,*/ /* Perform function on the mounted filesystem. */ diff --git a/fs/splice.c b/fs/splice.c index 153d4f3bd441febd7004b1862cd218afc0ee6252..dcaf185a5731130ab67bd7076a9be26dd4f0d7f0 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -299,13 +299,8 @@ ssize_t generic_file_splice_read(struct file *in, loff_t *ppos, { struct iov_iter to; struct kiocb kiocb; - loff_t isize; int idx, ret; - isize = i_size_read(in->f_mapping->host); - if (unlikely(*ppos >= isize)) - return 0; - iov_iter_pipe(&to, ITER_PIPE | READ, pipe, len); idx = to.idx; init_sync_kiocb(&kiocb, in); diff --git a/include/linux/fs.h b/include/linux/fs.h index 16d2b6e874d679597478e653f3ef23974e0c33de..dc0478c07b2abd3887d7f5b1b84818a4ee24162e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -321,6 +321,7 @@ struct writeback_control; #define IOCB_HIPRI (1 << 3) #define IOCB_DSYNC (1 << 4) #define IOCB_SYNC (1 << 5) +#define IOCB_WRITE (1 << 6) struct kiocb { struct file *ki_filp; @@ -1709,7 +1710,6 @@ struct file_operations { int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); int (*fsync) (struct file *, loff_t, loff_t, int datasync); - int (*aio_fsync) (struct kiocb *, int datasync); int (*fasync) (int, struct file *, int); int (*lock) (struct file *, int, struct file_lock *); ssize_t (*sendpage) (struct file *, struct page *, int, size_t, loff_t *, int); diff --git a/mm/filemap.c b/mm/filemap.c index c7fe2f16503f9f906e3f01be0155818ef86c522e..50b52fe51937ca70e62a33ab1553aef9b77ad1a0 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1732,6 +1732,9 @@ static ssize_t do_generic_file_read(struct file *filp, loff_t *ppos, if (inode->i_blkbits == PAGE_SHIFT || !mapping->a_ops->is_partially_uptodate) goto page_not_up_to_date; + /* pipes can't handle partially uptodate pages */ + if (unlikely(iter->type & ITER_PIPE)) + goto page_not_up_to_date; if (!trylock_page(page)) goto page_not_up_to_date; /* Did it get truncated before we got the lock? */