提交 b3ae4755 编写于 作者: M Mike Marshall

Orangefs: implement .write_iter

Until now, orangefs_devreq_write_iter has just been a wrapper for
the old-fashioned orangefs_devreq_writev... linux would call
.write_iter with "struct kiocb *iocb" and "struct iov_iter *iter"
and .write_iter would just:

        return pvfs2_devreq_writev(iocb->ki_filp,
                                   iter->iov,
                                   iter->nr_segs,
                                   &iocb->ki_pos);
Signed-off-by: NMike Marshall <hubcap@omnibond.com>
上级 85096169
...@@ -245,202 +245,156 @@ static ssize_t orangefs_devreq_read(struct file *file, ...@@ -245,202 +245,156 @@ static ssize_t orangefs_devreq_read(struct file *file,
} }
/* /*
* Function for writev() callers into the device. Readdir related * Function for writev() callers into the device.
* operations have an extra iovec containing info about objects *
* contained in directories. * Userspace should have written:
* - __u32 version
* - __u32 magic
* - __u64 tag
* - struct orangefs_downcall_s
* - trailer buffer (in the case of READDIR operations)
*/ */
static ssize_t orangefs_devreq_writev(struct file *file, static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
const struct iovec *iov, struct iov_iter *iter)
size_t count,
loff_t *offset)
{ {
ssize_t ret;
struct orangefs_kernel_op_s *op = NULL; struct orangefs_kernel_op_s *op = NULL;
void *buffer = NULL; struct {
void *ptr = NULL; __u32 version;
unsigned long i = 0; __u32 magic;
int num_remaining = MAX_DEV_REQ_DOWNSIZE; __u64 tag;
int ret = 0; } head;
/* num elements in iovec without trailer */ int total = ret = iov_iter_count(iter);
int notrailer_count = 4; int n;
/* int downcall_size = sizeof(struct orangefs_downcall_s);
* If there's a trailer, its iov index will be equal to int head_size = sizeof(head);
* notrailer_count.
*/ gossip_debug(GOSSIP_DEV_DEBUG, "%s: total:%d: ret:%zd:\n",
int trailer_index = notrailer_count; __func__,
int payload_size = 0; total,
int returned_downcall_size = 0; ret);
__s32 magic = 0;
__s32 proto_ver = 0;
__u64 tag = 0;
ssize_t total_returned_size = 0;
/* if (total < MAX_DEV_REQ_DOWNSIZE) {
* There will always be at least notrailer_count iovecs, and gossip_err("%s: total:%d: must be at least:%lu:\n",
* when there's a trailer, one more than notrailer_count. Check
* count's sanity.
*/
if (count != notrailer_count && count != (notrailer_count + 1)) {
gossip_err("%s: count:%zu: notrailer_count :%d:\n",
__func__, __func__,
count, total,
notrailer_count); MAX_DEV_REQ_DOWNSIZE);
return -EPROTO; ret = -EFAULT;
goto out;
} }
n = copy_from_iter(&head, head_size, iter);
/* Copy the non-trailer iovec data into a device request buffer. */ if (n < head_size) {
buffer = dev_req_alloc(); gossip_err("%s: failed to copy head.\n", __func__);
if (!buffer) { ret = -EFAULT;
gossip_err("%s: dev_req_alloc failed.\n", __func__); goto out;
return -ENOMEM;
}
ptr = buffer;
for (i = 0; i < notrailer_count; i++) {
if (iov[i].iov_len > num_remaining) {
gossip_err
("writev error: Freeing buffer and returning\n");
dev_req_release(buffer);
return -EMSGSIZE;
}
ret = copy_from_user(ptr, iov[i].iov_base, iov[i].iov_len);
if (ret) {
gossip_err("Failed to copy data from user space\n");
dev_req_release(buffer);
return -EIO;
}
num_remaining -= iov[i].iov_len;
ptr += iov[i].iov_len;
payload_size += iov[i].iov_len;
} }
total_returned_size = payload_size;
/* these elements are currently 8 byte aligned (8 bytes for (version +
* magic) 8 bytes for tag). If you add another element, either
* make it 8 bytes big, or use get_unaligned when asigning.
*/
ptr = buffer;
proto_ver = *((__s32 *) ptr); /* unused */
ptr += sizeof(__s32);
magic = *((__s32 *) ptr); if (head.version < ORANGEFS_MINIMUM_USERSPACE_VERSION) {
ptr += sizeof(__s32); gossip_err("%s: userspace claims version"
"%d, minimum version required: %d.\n",
tag = *((__u64 *) ptr); __func__,
ptr += sizeof(__u64); head.version,
ORANGEFS_MINIMUM_USERSPACE_VERSION);
ret = -EPROTO;
goto out;
}
if (magic != ORANGEFS_DEVREQ_MAGIC) { if (head.magic != ORANGEFS_DEVREQ_MAGIC) {
gossip_err("Error: Device magic number does not match.\n"); gossip_err("Error: Device magic number does not match.\n");
dev_req_release(buffer); ret = -EPROTO;
return -EPROTO; goto out;
} }
op = orangefs_devreq_remove_op(tag); op = orangefs_devreq_remove_op(head.tag);
if (op) { if (!op) {
/* Increase ref count! */ gossip_err("WARNING: No one's waiting for tag %llu\n",
get_op(op); llu(head.tag));
goto out;
/* calculate the size of the returned downcall. */
returned_downcall_size =
payload_size - (2 * sizeof(__s32) + sizeof(__u64));
/* copy the passed in downcall into the op */
if (returned_downcall_size ==
sizeof(struct orangefs_downcall_s)) {
memcpy(&op->downcall,
ptr,
sizeof(struct orangefs_downcall_s));
} else {
gossip_err("%s: returned downcall size:%d: \n",
__func__,
returned_downcall_size);
dev_req_release(buffer);
put_op(op);
return -EMSGSIZE;
} }
/* Don't tolerate an unexpected trailer iovec. */ get_op(op); /* increase ref count. */
if ((op->downcall.trailer_size == 0) &&
(count != notrailer_count)) { n = copy_from_iter(&op->downcall, downcall_size, iter);
gossip_err("%s: unexpected trailer iovec.\n", if (n != downcall_size) {
__func__); gossip_err("%s: failed to copy downcall.\n", __func__);
dev_req_release(buffer);
put_op(op); put_op(op);
return -EPROTO; ret = -EFAULT;
goto out;
} }
/* Don't consider the trailer if there's a bad status. */ if (op->downcall.status)
if (op->downcall.status != 0) goto wakeup;
goto no_trailer;
/* get the trailer if there is one. */
if (op->downcall.trailer_size == 0)
goto no_trailer;
gossip_debug(GOSSIP_DEV_DEBUG,
"%s: op->downcall.trailer_size %lld\n",
__func__,
op->downcall.trailer_size);
/* /*
* Bail if we think think there should be a trailer, but * We've successfully peeled off the head and the downcall.
* there's no iovec for it. * Something has gone awry if total doesn't equal the
* sum of head_size, downcall_size and trailer_size.
*/ */
if (count != (notrailer_count + 1)) { if ((head_size + downcall_size + op->downcall.trailer_size) != total) {
gossip_err("%s: trailer_size:%lld: count:%zu:\n", gossip_err("%s: funky write, head_size:%d"
": downcall_size:%d: trailer_size:%lld"
": total size:%d:\n",
__func__, __func__,
head_size,
downcall_size,
op->downcall.trailer_size, op->downcall.trailer_size,
count); total);
dev_req_release(buffer);
put_op(op); put_op(op);
return -EPROTO; ret = -EFAULT;
goto out;
} }
/* Verify that trailer_size is accurate. */ /* Only READDIR operations should have trailers. */
if (op->downcall.trailer_size != iov[trailer_index].iov_len) { if ((op->downcall.type != ORANGEFS_VFS_OP_READDIR) &&
gossip_err("%s: trailer_size:%lld: != iov_len:%zd:\n", (op->downcall.trailer_size != 0)) {
gossip_err("%s: %x operation with trailer.",
__func__, __func__,
op->downcall.trailer_size, op->downcall.type);
iov[trailer_index].iov_len);
dev_req_release(buffer);
put_op(op); put_op(op);
return -EMSGSIZE; ret = -EFAULT;
goto out;
} }
total_returned_size += iov[trailer_index].iov_len; /* READDIR operations should always have trailers. */
if ((op->downcall.type == ORANGEFS_VFS_OP_READDIR) &&
(op->downcall.trailer_size == 0)) {
gossip_err("%s: %x operation with no trailer.",
__func__,
op->downcall.type);
put_op(op);
ret = -EFAULT;
goto out;
}
/* if (op->downcall.type != ORANGEFS_VFS_OP_READDIR)
* Allocate a buffer, copy the trailer bytes into it and goto wakeup;
* attach it to the downcall.
*/ op->downcall.trailer_buf =
op->downcall.trailer_buf = vmalloc(iov[trailer_index].iov_len); vmalloc(op->downcall.trailer_size);
if (op->downcall.trailer_buf != NULL) { if (op->downcall.trailer_buf == NULL) {
gossip_debug(GOSSIP_DEV_DEBUG, "vmalloc: %p\n", gossip_err("%s: failed trailer vmalloc.\n",
op->downcall.trailer_buf);
ret = copy_from_user(op->downcall.trailer_buf,
iov[trailer_index].iov_base,
iov[trailer_index].iov_len);
if (ret) {
gossip_err("%s: Failed to copy trailer.\n",
__func__); __func__);
dev_req_release(buffer);
gossip_debug(GOSSIP_DEV_DEBUG,
"vfree: %p\n",
op->downcall.trailer_buf);
vfree(op->downcall.trailer_buf);
op->downcall.trailer_buf = NULL;
put_op(op); put_op(op);
return -EIO; ret = -ENOMEM;
goto out;
} }
} else { memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size);
gossip_err("writev: could not vmalloc for trailer!\n"); n = copy_from_iter(op->downcall.trailer_buf,
dev_req_release(buffer); op->downcall.trailer_size,
iter);
if (n != op->downcall.trailer_size) {
gossip_err("%s: failed to copy trailer.\n", __func__);
vfree(op->downcall.trailer_buf);
put_op(op); put_op(op);
return -ENOMEM; ret = -EFAULT;
goto out;
} }
no_trailer: wakeup:
/* if this operation is an I/O operation we need to wait /*
* If this operation is an I/O operation we need to wait
* for all data to be copied before we can return to avoid * for all data to be copied before we can return to avoid
* buffer corruption and races that can pull the buffers * buffer corruption and races that can pull the buffers
* out from under us. * out from under us.
...@@ -450,7 +404,7 @@ static ssize_t orangefs_devreq_writev(struct file *file, ...@@ -450,7 +404,7 @@ static ssize_t orangefs_devreq_writev(struct file *file,
* application reading/writing this device to return until * application reading/writing this device to return until
* the buffers are done being used. * the buffers are done being used.
*/ */
if (op->upcall.type == ORANGEFS_VFS_OP_FILE_IO) { if (op->downcall.type == ORANGEFS_VFS_OP_FILE_IO) {
int timed_out = 0; int timed_out = 0;
DEFINE_WAIT(wait_entry); DEFINE_WAIT(wait_entry);
...@@ -509,7 +463,6 @@ static ssize_t orangefs_devreq_writev(struct file *file, ...@@ -509,7 +463,6 @@ static ssize_t orangefs_devreq_writev(struct file *file,
if (!timed_out) if (!timed_out)
op_release(op); op_release(op);
} else { } else {
/* /*
* tell the vfs op waiting on a waitqueue that * tell the vfs op waiting on a waitqueue that
* this op is done * this op is done
...@@ -524,25 +477,8 @@ static ssize_t orangefs_devreq_writev(struct file *file, ...@@ -524,25 +477,8 @@ static ssize_t orangefs_devreq_writev(struct file *file,
*/ */
wake_up_interruptible(&op->waitq); wake_up_interruptible(&op->waitq);
} }
} else { out:
/* ignore downcalls that we're not interested in */ return ret;
gossip_debug(GOSSIP_DEV_DEBUG,
"WARNING: No one's waiting for tag %llu\n",
llu(tag));
}
/* put_op? */
dev_req_release(buffer);
return total_returned_size;
}
static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
struct iov_iter *iter)
{
return orangefs_devreq_writev(iocb->ki_filp,
iter->iov,
iter->nr_segs,
&iocb->ki_pos);
} }
/* Returns whether any FS are still pending remounted */ /* Returns whether any FS are still pending remounted */
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册