提交 05c2828c 编写于 作者: M Michael S. Tsirkin 提交者: David S. Miller

tun: export underlying socket

Tun device looks similar to a packet socket
in that both pass complete frames from/to userspace.

This patch fills in enough fields in the socket underlying tun driver
to support sendmsg/recvmsg operations, and message flags
MSG_TRUNC and MSG_DONTWAIT, and exports access to this socket
to modules.  Regular read/write behaviour is unchanged.

This way, code using raw sockets to inject packets
into a physical device, can support injecting
packets into host network stack almost without modification.

First user of this interface will be vhost virtualization
accelerator.
Signed-off-by: NMichael S. Tsirkin <mst@redhat.com>
Acked-by: NHerbert Xu <herbert@gondor.apana.org.au>
Acked-by: NDavid S. Miller <davem@davemloft.net>
Signed-off-by: NDavid S. Miller <davem@davemloft.net>
上级 ad72c347
...@@ -144,6 +144,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file) ...@@ -144,6 +144,7 @@ static int tun_attach(struct tun_struct *tun, struct file *file)
err = 0; err = 0;
tfile->tun = tun; tfile->tun = tun;
tun->tfile = tfile; tun->tfile = tfile;
tun->socket.file = file;
dev_hold(tun->dev); dev_hold(tun->dev);
sock_hold(tun->socket.sk); sock_hold(tun->socket.sk);
atomic_inc(&tfile->count); atomic_inc(&tfile->count);
...@@ -158,6 +159,7 @@ static void __tun_detach(struct tun_struct *tun) ...@@ -158,6 +159,7 @@ static void __tun_detach(struct tun_struct *tun)
/* Detach from net device */ /* Detach from net device */
netif_tx_lock_bh(tun->dev); netif_tx_lock_bh(tun->dev);
tun->tfile = NULL; tun->tfile = NULL;
tun->socket.file = NULL;
netif_tx_unlock_bh(tun->dev); netif_tx_unlock_bh(tun->dev);
/* Drop read queue */ /* Drop read queue */
...@@ -387,7 +389,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev) ...@@ -387,7 +389,8 @@ static netdev_tx_t tun_net_xmit(struct sk_buff *skb, struct net_device *dev)
/* Notify and wake up reader process */ /* Notify and wake up reader process */
if (tun->flags & TUN_FASYNC) if (tun->flags & TUN_FASYNC)
kill_fasync(&tun->fasync, SIGIO, POLL_IN); kill_fasync(&tun->fasync, SIGIO, POLL_IN);
wake_up_interruptible(&tun->socket.wait); wake_up_interruptible_poll(&tun->socket.wait, POLLIN |
POLLRDNORM | POLLRDBAND);
return NETDEV_TX_OK; return NETDEV_TX_OK;
drop: drop:
...@@ -743,7 +746,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, ...@@ -743,7 +746,7 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
len = min_t(int, skb->len, len); len = min_t(int, skb->len, len);
skb_copy_datagram_const_iovec(skb, 0, iv, total, len); skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
total += len; total += skb->len;
tun->dev->stats.tx_packets++; tun->dev->stats.tx_packets++;
tun->dev->stats.tx_bytes += len; tun->dev->stats.tx_bytes += len;
...@@ -751,34 +754,23 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun, ...@@ -751,34 +754,23 @@ static __inline__ ssize_t tun_put_user(struct tun_struct *tun,
return total; return total;
} }
static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, static ssize_t tun_do_read(struct tun_struct *tun,
unsigned long count, loff_t pos) struct kiocb *iocb, const struct iovec *iv,
ssize_t len, int noblock)
{ {
struct file *file = iocb->ki_filp;
struct tun_file *tfile = file->private_data;
struct tun_struct *tun = __tun_get(tfile);
DECLARE_WAITQUEUE(wait, current); DECLARE_WAITQUEUE(wait, current);
struct sk_buff *skb; struct sk_buff *skb;
ssize_t len, ret = 0; ssize_t ret = 0;
if (!tun)
return -EBADFD;
DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name); DBG(KERN_INFO "%s: tun_chr_read\n", tun->dev->name);
len = iov_length(iv, count);
if (len < 0) {
ret = -EINVAL;
goto out;
}
add_wait_queue(&tun->socket.wait, &wait); add_wait_queue(&tun->socket.wait, &wait);
while (len) { while (len) {
current->state = TASK_INTERRUPTIBLE; current->state = TASK_INTERRUPTIBLE;
/* Read frames from the queue */ /* Read frames from the queue */
if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) { if (!(skb=skb_dequeue(&tun->socket.sk->sk_receive_queue))) {
if (file->f_flags & O_NONBLOCK) { if (noblock) {
ret = -EAGAIN; ret = -EAGAIN;
break; break;
} }
...@@ -805,6 +797,27 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv, ...@@ -805,6 +797,27 @@ static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
current->state = TASK_RUNNING; current->state = TASK_RUNNING;
remove_wait_queue(&tun->socket.wait, &wait); remove_wait_queue(&tun->socket.wait, &wait);
return ret;
}
static ssize_t tun_chr_aio_read(struct kiocb *iocb, const struct iovec *iv,
unsigned long count, loff_t pos)
{
struct file *file = iocb->ki_filp;
struct tun_file *tfile = file->private_data;
struct tun_struct *tun = __tun_get(tfile);
ssize_t len, ret;
if (!tun)
return -EBADFD;
len = iov_length(iv, count);
if (len < 0) {
ret = -EINVAL;
goto out;
}
ret = tun_do_read(tun, iocb, iv, len, file->f_flags & O_NONBLOCK);
ret = min_t(ssize_t, ret, len);
out: out:
tun_put(tun); tun_put(tun);
return ret; return ret;
...@@ -847,7 +860,8 @@ static void tun_sock_write_space(struct sock *sk) ...@@ -847,7 +860,8 @@ static void tun_sock_write_space(struct sock *sk)
return; return;
if (sk->sk_sleep && waitqueue_active(sk->sk_sleep)) if (sk->sk_sleep && waitqueue_active(sk->sk_sleep))
wake_up_interruptible_sync(sk->sk_sleep); wake_up_interruptible_sync_poll(sk->sk_sleep, POLLOUT |
POLLWRNORM | POLLWRBAND);
tun = tun_sk(sk)->tun; tun = tun_sk(sk)->tun;
kill_fasync(&tun->fasync, SIGIO, POLL_OUT); kill_fasync(&tun->fasync, SIGIO, POLL_OUT);
...@@ -858,6 +872,37 @@ static void tun_sock_destruct(struct sock *sk) ...@@ -858,6 +872,37 @@ static void tun_sock_destruct(struct sock *sk)
free_netdev(tun_sk(sk)->tun->dev); free_netdev(tun_sk(sk)->tun->dev);
} }
static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *m, size_t total_len)
{
struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
return tun_get_user(tun, m->msg_iov, total_len,
m->msg_flags & MSG_DONTWAIT);
}
static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
struct msghdr *m, size_t total_len,
int flags)
{
struct tun_struct *tun = container_of(sock, struct tun_struct, socket);
int ret;
if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
return -EINVAL;
ret = tun_do_read(tun, iocb, m->msg_iov, total_len,
flags & MSG_DONTWAIT);
if (ret > total_len) {
m->msg_flags |= MSG_TRUNC;
ret = flags & MSG_TRUNC ? ret : total_len;
}
return ret;
}
/* Ops structure to mimic raw sockets with tun */
static const struct proto_ops tun_socket_ops = {
.sendmsg = tun_sendmsg,
.recvmsg = tun_recvmsg,
};
static struct proto tun_proto = { static struct proto tun_proto = {
.name = "tun", .name = "tun",
.owner = THIS_MODULE, .owner = THIS_MODULE,
...@@ -986,6 +1031,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr) ...@@ -986,6 +1031,7 @@ static int tun_set_iff(struct net *net, struct file *file, struct ifreq *ifr)
goto err_free_dev; goto err_free_dev;
init_waitqueue_head(&tun->socket.wait); init_waitqueue_head(&tun->socket.wait);
tun->socket.ops = &tun_socket_ops;
sock_init_data(&tun->socket, sk); sock_init_data(&tun->socket, sk);
sk->sk_write_space = tun_sock_write_space; sk->sk_write_space = tun_sock_write_space;
sk->sk_sndbuf = INT_MAX; sk->sk_sndbuf = INT_MAX;
...@@ -1525,6 +1571,23 @@ static void tun_cleanup(void) ...@@ -1525,6 +1571,23 @@ static void tun_cleanup(void)
rtnl_link_unregister(&tun_link_ops); rtnl_link_unregister(&tun_link_ops);
} }
/* Get an underlying socket object from tun file. Returns error unless file is
* attached to a device. The returned object works like a packet socket, it
* can be used for sock_sendmsg/sock_recvmsg. The caller is responsible for
* holding a reference to the file for as long as the socket is in use. */
struct socket *tun_get_socket(struct file *file)
{
struct tun_struct *tun;
if (file->f_op != &tun_fops)
return ERR_PTR(-EINVAL);
tun = tun_get(file);
if (!tun)
return ERR_PTR(-EBADFD);
tun_put(tun);
return &tun->socket;
}
EXPORT_SYMBOL_GPL(tun_get_socket);
module_init(tun_init); module_init(tun_init);
module_exit(tun_cleanup); module_exit(tun_cleanup);
MODULE_DESCRIPTION(DRV_DESCRIPTION); MODULE_DESCRIPTION(DRV_DESCRIPTION);
......
...@@ -86,4 +86,18 @@ struct tun_filter { ...@@ -86,4 +86,18 @@ struct tun_filter {
__u8 addr[0][ETH_ALEN]; __u8 addr[0][ETH_ALEN];
}; };
#ifdef __KERNEL__
#if defined(CONFIG_TUN) || defined(CONFIG_TUN_MODULE)
struct socket *tun_get_socket(struct file *);
#else
#include <linux/err.h>
#include <linux/errno.h>
struct file;
struct socket;
static inline struct socket *tun_get_socket(struct file *f)
{
return ERR_PTR(-EINVAL);
}
#endif /* CONFIG_TUN */
#endif /* __KERNEL__ */
#endif /* __IF_TUN_H */ #endif /* __IF_TUN_H */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册