提交 b2858d7d 编写于 作者: M Miklos Szeredi 提交者: Jens Axboe

splice: fix kmaps in default_file_splice_write()

Unfortunately multiple kmap() within a single thread are deadlockable,
so writing out multiple buffers with writev() isn't possible.

Change the implementation so that it does a separate write() for each
buffer.  This actually simplifies the code a lot since the
splice_from_pipe() helper can be used.

This limitation is caused by HIGHMEM pages, and so only affects a
subset of architectures and configurations.  In the future it may be
worth to implement default_file_splice_write() in a more efficient way
on configs that allow it.
Signed-off-by: NMiklos Szeredi <mszeredi@suse.cz>
Signed-off-by: NJens Axboe <jens.axboe@oracle.com>
上级 4fc981ef
...@@ -535,8 +535,8 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec, ...@@ -535,8 +535,8 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
return res; return res;
} }
static ssize_t kernel_writev(struct file *file, const struct iovec *vec, static ssize_t kernel_write(struct file *file, const char *buf, size_t count,
unsigned long vlen, loff_t *ppos) loff_t pos)
{ {
mm_segment_t old_fs; mm_segment_t old_fs;
ssize_t res; ssize_t res;
...@@ -544,7 +544,7 @@ static ssize_t kernel_writev(struct file *file, const struct iovec *vec, ...@@ -544,7 +544,7 @@ static ssize_t kernel_writev(struct file *file, const struct iovec *vec,
old_fs = get_fs(); old_fs = get_fs();
set_fs(get_ds()); set_fs(get_ds());
/* The cast to a user pointer is valid due to the set_fs() */ /* The cast to a user pointer is valid due to the set_fs() */
res = vfs_writev(file, (const struct iovec __user *)vec, vlen, ppos); res = vfs_write(file, (const char __user *)buf, count, &pos);
set_fs(old_fs); set_fs(old_fs);
return res; return res;
...@@ -1003,120 +1003,34 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out, ...@@ -1003,120 +1003,34 @@ generic_file_splice_write(struct pipe_inode_info *pipe, struct file *out,
EXPORT_SYMBOL(generic_file_splice_write); EXPORT_SYMBOL(generic_file_splice_write);
static struct pipe_buffer *nth_pipe_buf(struct pipe_inode_info *pipe, int n) static int write_pipe_buf(struct pipe_inode_info *pipe, struct pipe_buffer *buf,
{ struct splice_desc *sd)
return &pipe->bufs[(pipe->curbuf + n) % PIPE_BUFFERS];
}
static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
struct file *out, loff_t *ppos,
size_t len, unsigned int flags)
{ {
ssize_t ret = 0; int ret;
ssize_t total_len = 0; void *data;
int do_wakeup = 0;
pipe_lock(pipe);
while (len) {
struct pipe_buffer *buf;
void *data[PIPE_BUFFERS];
struct iovec vec[PIPE_BUFFERS];
unsigned int nr_pages = 0;
unsigned int write_len = 0;
unsigned int now_len = len;
unsigned int this_len;
int i;
BUG_ON(pipe->nrbufs > PIPE_BUFFERS);
for (i = 0; i < pipe->nrbufs && now_len; i++) {
buf = nth_pipe_buf(pipe, i);
ret = buf->ops->confirm(pipe, buf); ret = buf->ops->confirm(pipe, buf);
if (ret) if (ret)
break; return ret;
data[i] = buf->ops->map(pipe, buf, 0);
this_len = min(buf->len, now_len);
vec[i].iov_base = (void __user *) data[i] + buf->offset;
vec[i].iov_len = this_len;
now_len -= this_len;
write_len += this_len;
nr_pages++;
}
if (nr_pages) {
ret = kernel_writev(out, vec, nr_pages, ppos);
if (ret == 0)
ret = -EIO;
if (ret > 0) {
len -= ret;
total_len += ret;
}
}
for (i = 0; i < nr_pages; i++) {
buf = nth_pipe_buf(pipe, i);
buf->ops->unmap(pipe, buf, data[i]);
if (ret > 0) {
this_len = min_t(unsigned, vec[i].iov_len, ret);
buf->offset += this_len;
buf->len -= this_len;
ret -= this_len;
}
}
if (ret < 0)
break;
while (pipe->nrbufs) {
const struct pipe_buf_operations *ops;
buf = nth_pipe_buf(pipe, 0);
if (buf->len)
break;
ops = buf->ops;
buf->ops = NULL;
ops->release(pipe, buf);
pipe->curbuf = (pipe->curbuf + 1) % PIPE_BUFFERS;
pipe->nrbufs--;
if (pipe->inode)
do_wakeup = 1;
}
if (pipe->nrbufs)
continue;
if (!pipe->writers)
break;
if (!pipe->waiting_writers) {
if (total_len)
break;
}
if (flags & SPLICE_F_NONBLOCK) { data = buf->ops->map(pipe, buf, 0);
ret = -EAGAIN; ret = kernel_write(sd->u.file, data + buf->offset, sd->len, sd->pos);
break; buf->ops->unmap(pipe, buf, data);
}
if (signal_pending(current)) { return ret;
ret = -ERESTARTSYS; }
break;
}
if (do_wakeup) {
wakeup_pipe_writers(pipe);
do_wakeup = 0;
}
pipe_wait(pipe); static ssize_t default_file_splice_write(struct pipe_inode_info *pipe,
} struct file *out, loff_t *ppos,
pipe_unlock(pipe); size_t len, unsigned int flags)
{
ssize_t ret;
if (do_wakeup) ret = splice_from_pipe(pipe, out, ppos, len, flags, write_pipe_buf);
wakeup_pipe_writers(pipe); if (ret > 0)
*ppos += ret;
return total_len ? total_len : ret; return ret;
} }
/** /**
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册