提交 e6a4b6f5 编写于 作者: L Linus Torvalds

Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs

Pull vfs fixes from Al Viro.

Clean up file table accesses (get rid of fget_light() in favor of the
fdget() interface), add proper file position locking.

* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
  get rid of fget_light()
  sockfd_lookup_light(): switch to fdget^W^Waway from fget_light
  vfs: atomic f_pos accesses as per POSIX
  ocfs2 syncs the wrong range...
......@@ -683,35 +683,65 @@ EXPORT_SYMBOL(fget_raw);
* The fput_needed flag returned by fget_light should be passed to the
* corresponding fput_light.
*/
struct file *__fget_light(unsigned int fd, fmode_t mask, int *fput_needed)
static unsigned long __fget_light(unsigned int fd, fmode_t mask)
{
struct files_struct *files = current->files;
struct file *file;
*fput_needed = 0;
if (atomic_read(&files->count) == 1) {
file = __fcheck_files(files, fd);
if (file && (file->f_mode & mask))
file = NULL;
if (!file || unlikely(file->f_mode & mask))
return 0;
return (unsigned long)file;
} else {
file = __fget(fd, mask);
if (file)
*fput_needed = 1;
if (!file)
return 0;
return FDPUT_FPUT | (unsigned long)file;
}
return file;
}
struct file *fget_light(unsigned int fd, int *fput_needed)
unsigned long __fdget(unsigned int fd)
{
return __fget_light(fd, FMODE_PATH, fput_needed);
return __fget_light(fd, FMODE_PATH);
}
EXPORT_SYMBOL(fget_light);
EXPORT_SYMBOL(__fdget);
struct file *fget_raw_light(unsigned int fd, int *fput_needed)
unsigned long __fdget_raw(unsigned int fd)
{
return __fget_light(fd, 0, fput_needed);
return __fget_light(fd, 0);
}
unsigned long __fdget_pos(unsigned int fd)
{
struct files_struct *files = current->files;
struct file *file;
unsigned long v;
if (atomic_read(&files->count) == 1) {
file = __fcheck_files(files, fd);
v = 0;
} else {
file = __fget(fd, 0);
v = FDPUT_FPUT;
}
if (!file)
return 0;
if (file->f_mode & FMODE_ATOMIC_POS) {
if (file_count(file) > 1) {
v |= FDPUT_POS_UNLOCK;
mutex_lock(&file->f_pos_lock);
}
}
return v | (unsigned long)file;
}
/*
* We only lock f_pos if we have threads or if the file might be
* shared with another process. In both cases we'll have an elevated
* file count (done either by fdget() or by fork()).
*/
void set_close_on_exec(unsigned int fd, int flag)
{
struct files_struct *files = current->files;
......
......@@ -135,6 +135,7 @@ struct file *get_empty_filp(void)
atomic_long_set(&f->f_count, 1);
rwlock_init(&f->f_owner.lock);
spin_lock_init(&f->f_lock);
mutex_init(&f->f_pos_lock);
eventpoll_init_file(f);
/* f->f_version: 0 */
return f;
......
......@@ -1884,7 +1884,7 @@ static int path_init(int dfd, const char *name, unsigned int flags,
nd->path = f.file->f_path;
if (flags & LOOKUP_RCU) {
if (f.need_put)
if (f.flags & FDPUT_FPUT)
*fp = f.file;
nd->seq = __read_seqcount_begin(&nd->path.dentry->d_seq);
rcu_read_lock();
......
......@@ -2393,8 +2393,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
if (((file->f_flags & O_DSYNC) && !direct_io) || IS_SYNC(inode) ||
((file->f_flags & O_DIRECT) && !direct_io)) {
ret = filemap_fdatawrite_range(file->f_mapping, pos,
pos + count - 1);
ret = filemap_fdatawrite_range(file->f_mapping, *ppos,
*ppos + count - 1);
if (ret < 0)
written = ret;
......@@ -2407,8 +2407,8 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb,
}
if (!ret)
ret = filemap_fdatawait_range(file->f_mapping, pos,
pos + count - 1);
ret = filemap_fdatawait_range(file->f_mapping, *ppos,
*ppos + count - 1);
}
/*
......
......@@ -705,6 +705,10 @@ static int do_dentry_open(struct file *f,
return 0;
}
/* POSIX.1-2008/SUSv4 Section XSI 2.9.7 */
if (S_ISREG(inode->i_mode))
f->f_mode |= FMODE_ATOMIC_POS;
f->f_op = fops_get(inode->i_fop);
if (unlikely(WARN_ON(!f->f_op))) {
error = -ENODEV;
......
......@@ -264,10 +264,22 @@ loff_t vfs_llseek(struct file *file, loff_t offset, int whence)
}
EXPORT_SYMBOL(vfs_llseek);
static inline struct fd fdget_pos(int fd)
{
return __to_fd(__fdget_pos(fd));
}
static inline void fdput_pos(struct fd f)
{
if (f.flags & FDPUT_POS_UNLOCK)
mutex_unlock(&f.file->f_pos_lock);
fdput(f);
}
SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
{
off_t retval;
struct fd f = fdget(fd);
struct fd f = fdget_pos(fd);
if (!f.file)
return -EBADF;
......@@ -278,7 +290,7 @@ SYSCALL_DEFINE3(lseek, unsigned int, fd, off_t, offset, unsigned int, whence)
if (res != (loff_t)retval)
retval = -EOVERFLOW; /* LFS: should only happen on 32 bit platforms */
}
fdput(f);
fdput_pos(f);
return retval;
}
......@@ -498,7 +510,7 @@ static inline void file_pos_write(struct file *file, loff_t pos)
SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
{
struct fd f = fdget(fd);
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
......@@ -506,7 +518,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
ret = vfs_read(f.file, buf, count, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput(f);
fdput_pos(f);
}
return ret;
}
......@@ -514,7 +526,7 @@ SYSCALL_DEFINE3(read, unsigned int, fd, char __user *, buf, size_t, count)
SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
size_t, count)
{
struct fd f = fdget(fd);
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
......@@ -522,7 +534,7 @@ SYSCALL_DEFINE3(write, unsigned int, fd, const char __user *, buf,
ret = vfs_write(f.file, buf, count, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput(f);
fdput_pos(f);
}
return ret;
......@@ -797,7 +809,7 @@ EXPORT_SYMBOL(vfs_writev);
SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen)
{
struct fd f = fdget(fd);
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
......@@ -805,7 +817,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
ret = vfs_readv(f.file, vec, vlen, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput(f);
fdput_pos(f);
}
if (ret > 0)
......@@ -817,7 +829,7 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
unsigned long, vlen)
{
struct fd f = fdget(fd);
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
......@@ -825,7 +837,7 @@ SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
ret = vfs_writev(f.file, vec, vlen, &pos);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput(f);
fdput_pos(f);
}
if (ret > 0)
......@@ -968,7 +980,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
const struct compat_iovec __user *,vec,
compat_ulong_t, vlen)
{
struct fd f = fdget(fd);
struct fd f = fdget_pos(fd);
ssize_t ret;
loff_t pos;
......@@ -978,7 +990,7 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
ret = compat_readv(f.file, vec, vlen, &pos);
if (ret >= 0)
f.file->f_pos = pos;
fdput(f);
fdput_pos(f);
return ret;
}
......@@ -1035,7 +1047,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
const struct compat_iovec __user *, vec,
compat_ulong_t, vlen)
{
struct fd f = fdget(fd);
struct fd f = fdget_pos(fd);
ssize_t ret;
loff_t pos;
......@@ -1045,7 +1057,7 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
ret = compat_writev(f.file, vec, vlen, &pos);
if (ret >= 0)
f.file->f_pos = pos;
fdput(f);
fdput_pos(f);
return ret;
}
......
......@@ -28,33 +28,36 @@ static inline void fput_light(struct file *file, int fput_needed)
struct fd {
struct file *file;
int need_put;
unsigned int flags;
};
#define FDPUT_FPUT 1
#define FDPUT_POS_UNLOCK 2
static inline void fdput(struct fd fd)
{
if (fd.need_put)
if (fd.flags & FDPUT_FPUT)
fput(fd.file);
}
extern struct file *fget(unsigned int fd);
extern struct file *fget_light(unsigned int fd, int *fput_needed);
extern struct file *fget_raw(unsigned int fd);
extern unsigned long __fdget(unsigned int fd);
extern unsigned long __fdget_raw(unsigned int fd);
extern unsigned long __fdget_pos(unsigned int fd);
static inline struct fd fdget(unsigned int fd)
static inline struct fd __to_fd(unsigned long v)
{
int b;
struct file *f = fget_light(fd, &b);
return (struct fd){f,b};
return (struct fd){(struct file *)(v & ~3),v & 3};
}
extern struct file *fget_raw(unsigned int fd);
extern struct file *fget_raw_light(unsigned int fd, int *fput_needed);
static inline struct fd fdget(unsigned int fd)
{
return __to_fd(__fdget(fd));
}
static inline struct fd fdget_raw(unsigned int fd)
{
int b;
struct file *f = fget_raw_light(fd, &b);
return (struct fd){f,b};
return __to_fd(__fdget_raw(fd));
}
extern int f_dupfd(unsigned int from, struct file *file, unsigned flags);
......
......@@ -123,6 +123,9 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
/* File is opened with O_PATH; almost nothing can be done with it */
#define FMODE_PATH ((__force fmode_t)0x4000)
/* File needs atomic accesses to f_pos */
#define FMODE_ATOMIC_POS ((__force fmode_t)0x8000)
/* File was opened by fanotify and shouldn't generate fanotify events */
#define FMODE_NONOTIFY ((__force fmode_t)0x1000000)
......@@ -780,13 +783,14 @@ struct file {
const struct file_operations *f_op;
/*
* Protects f_ep_links, f_flags, f_pos vs i_size in lseek SEEK_CUR.
* Protects f_ep_links, f_flags.
* Must not be taken from IRQ context.
*/
spinlock_t f_lock;
atomic_long_t f_count;
unsigned int f_flags;
fmode_t f_mode;
struct mutex f_pos_lock;
loff_t f_pos;
struct fown_struct f_owner;
const struct cred *f_cred;
......@@ -808,7 +812,7 @@ struct file {
#ifdef CONFIG_DEBUG_WRITECOUNT
unsigned long f_mnt_write_state;
#endif
};
} __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */
struct file_handle {
__u32 handle_bytes;
......
......@@ -450,16 +450,17 @@ EXPORT_SYMBOL(sockfd_lookup);
static struct socket *sockfd_lookup_light(int fd, int *err, int *fput_needed)
{
struct file *file;
struct fd f = fdget(fd);
struct socket *sock;
*err = -EBADF;
file = fget_light(fd, fput_needed);
if (file) {
sock = sock_from_file(file, err);
if (sock)
if (f.file) {
sock = sock_from_file(f.file, err);
if (likely(sock)) {
*fput_needed = f.flags;
return sock;
fput_light(file, *fput_needed);
}
fdput(f);
}
return NULL;
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册