提交 583bbf06 编写于 作者: L Luke Hsiao 提交者: David S. Miller

io_uring: allow tcp ancillary data for __sys_recvmsg_sock()

For TCP tx zero-copy, the kernel notifies the process of completions by
queuing completion notifications on the socket error queue. This patch
allows reading these notifications via recvmsg to support TCP tx
zero-copy.

Ancillary data was originally disallowed due to privilege escalation
via io_uring's offloading of sendmsg() onto a kernel thread with kernel
credentials (https://crbug.com/project-zero/1975). So, we must ensure
that the socket type is one where the ancillary data types that are
delivered on recvmsg are plain data (no file descriptors or values that
are translated based on the identity of the calling process).

This was tested by using io_uring to call recvmsg on the MSG_ERRQUEUE
with tx zero-copy enabled. Before this patch, we received -EINVALID from
this specific code path. After this patch, we could read tcp tx
zero-copy completion notifications from the MSG_ERRQUEUE.
Signed-off-by: NSoheil Hassas Yeganeh <soheil@google.com>
Signed-off-by: NArjun Roy <arjunroy@google.com>
Acked-by: NEric Dumazet <edumazet@google.com>
Reviewed-by: NJann Horn <jannh@google.com>
Reviewed-by: NJens Axboe <axboe@kernel.dk>
Signed-off-by: NLuke Hsiao <lukehsiao@google.com>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 37a319b5
...@@ -41,6 +41,8 @@ struct net; ...@@ -41,6 +41,8 @@ struct net;
#define SOCK_PASSCRED 3 #define SOCK_PASSCRED 3
#define SOCK_PASSSEC 4 #define SOCK_PASSSEC 4
#define PROTO_CMSG_DATA_ONLY 0x0001
#ifndef ARCH_HAS_SOCKET_TYPES #ifndef ARCH_HAS_SOCKET_TYPES
/** /**
* enum sock_type - Socket types * enum sock_type - Socket types
...@@ -135,6 +137,7 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *, ...@@ -135,6 +137,7 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
struct proto_ops { struct proto_ops {
int family; int family;
unsigned int flags;
struct module *owner; struct module *owner;
int (*release) (struct socket *sock); int (*release) (struct socket *sock);
int (*bind) (struct socket *sock, int (*bind) (struct socket *sock,
......
...@@ -1017,6 +1017,7 @@ static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon ...@@ -1017,6 +1017,7 @@ static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
const struct proto_ops inet_stream_ops = { const struct proto_ops inet_stream_ops = {
.family = PF_INET, .family = PF_INET,
.flags = PROTO_CMSG_DATA_ONLY,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.release = inet_release, .release = inet_release,
.bind = inet_bind, .bind = inet_bind,
......
...@@ -661,6 +661,7 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, ...@@ -661,6 +661,7 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
const struct proto_ops inet6_stream_ops = { const struct proto_ops inet6_stream_ops = {
.family = PF_INET6, .family = PF_INET6,
.flags = PROTO_CMSG_DATA_ONLY,
.owner = THIS_MODULE, .owner = THIS_MODULE,
.release = inet6_release, .release = inet6_release,
.bind = inet6_bind, .bind = inet6_bind,
......
...@@ -2628,9 +2628,11 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg, ...@@ -2628,9 +2628,11 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
struct user_msghdr __user *umsg, struct user_msghdr __user *umsg,
struct sockaddr __user *uaddr, unsigned int flags) struct sockaddr __user *uaddr, unsigned int flags)
{ {
/* disallow ancillary data requests from this path */ if (msg->msg_control || msg->msg_controllen) {
if (msg->msg_control || msg->msg_controllen) /* disallow ancillary data reqs unless cmsg is plain data */
return -EINVAL; if (!(sock->ops->flags & PROTO_CMSG_DATA_ONLY))
return -EINVAL;
}
return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0); return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册