提交 77424a45 编写于 作者: P Peter Maydell

Merge remote-tracking branch 'remotes/mst/tags/for_upstream' into staging

virtio, vhost, pc: fixes

Here are some bugfixes that didn't make 2.8.
Signed-off-by: NMichael S. Tsirkin <mst@redhat.com>

# gpg: Signature made Fri 16 Dec 2016 21:13:43 GMT
# gpg:                using RSA key 0x281F0DB8D28D5469
# gpg: Good signature from "Michael S. Tsirkin <mst@kernel.org>"
# gpg:                 aka "Michael S. Tsirkin <mst@redhat.com>"
# Primary key fingerprint: 0270 606B 6F3C DF3D 0B17  0970 C350 3912 AFBE 8E67
#      Subkey fingerprint: 5D09 FD08 71C8 F85B 94CA  8A0D 281F 0DB8 D28D 5469

* remotes/mst/tags/for_upstream:
  virtio: avoid using guest_notifier_mask in vhost-user mode
  pci: fix error message for express slots
  i386: amd_iommu: fix MMIO register count and access
  tests/vhost-user-bridge: use contrib/libvhost-user
  contrib: add libvhost-user
  tests/vhost-user-bridge: do not accept more than one connection
  tests/vhost-user-bridge: indicate peer disconnected
  tests/vhost-user-bridge: remove unnecessary dispatcher_remove
  tests/vhost-user-bridge: remove false comment
Signed-off-by: NPeter Maydell <peter.maydell@linaro.org>
......@@ -149,6 +149,7 @@ dummy := $(call unnest-vars,, \
qga-obj-y \
ivshmem-client-obj-y \
ivshmem-server-obj-y \
libvhost-user-obj-y \
qga-vss-dll-obj-y \
block-obj-y \
block-obj-m \
......
......@@ -115,7 +115,7 @@ qga-vss-dll-obj-y = qga/
# contrib
ivshmem-client-obj-y = contrib/ivshmem-client/
ivshmem-server-obj-y = contrib/ivshmem-server/
libvhost-user-obj-y = contrib/libvhost-user/
######################################################################
trace-events-y = trace-events
......
libvhost-user-obj-y = libvhost-user.o
/*
* Vhost User library
*
* Copyright IBM, Corp. 2007
* Copyright (c) 2016 Red Hat, Inc.
*
* Authors:
* Anthony Liguori <aliguori@us.ibm.com>
* Marc-André Lureau <mlureau@redhat.com>
* Victor Kaplansky <victork@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* later. See the COPYING file in the top-level directory.
*/
#include <qemu/osdep.h>
#include <sys/eventfd.h>
#include <linux/vhost.h>
#include "qemu/atomic.h"
#include "libvhost-user.h"
#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
/* The version of the protocol we support */
#define VHOST_USER_VERSION 1
#define LIBVHOST_USER_DEBUG 0
#define DPRINT(...) \
do { \
if (LIBVHOST_USER_DEBUG) { \
fprintf(stderr, __VA_ARGS__); \
} \
} while (0)
static const char *
vu_request_to_string(int req)
{
#define REQ(req) [req] = #req
static const char *vu_request_str[] = {
REQ(VHOST_USER_NONE),
REQ(VHOST_USER_GET_FEATURES),
REQ(VHOST_USER_SET_FEATURES),
REQ(VHOST_USER_NONE),
REQ(VHOST_USER_GET_FEATURES),
REQ(VHOST_USER_SET_FEATURES),
REQ(VHOST_USER_SET_OWNER),
REQ(VHOST_USER_RESET_OWNER),
REQ(VHOST_USER_SET_MEM_TABLE),
REQ(VHOST_USER_SET_LOG_BASE),
REQ(VHOST_USER_SET_LOG_FD),
REQ(VHOST_USER_SET_VRING_NUM),
REQ(VHOST_USER_SET_VRING_ADDR),
REQ(VHOST_USER_SET_VRING_BASE),
REQ(VHOST_USER_GET_VRING_BASE),
REQ(VHOST_USER_SET_VRING_KICK),
REQ(VHOST_USER_SET_VRING_CALL),
REQ(VHOST_USER_SET_VRING_ERR),
REQ(VHOST_USER_GET_PROTOCOL_FEATURES),
REQ(VHOST_USER_SET_PROTOCOL_FEATURES),
REQ(VHOST_USER_GET_QUEUE_NUM),
REQ(VHOST_USER_SET_VRING_ENABLE),
REQ(VHOST_USER_SEND_RARP),
REQ(VHOST_USER_INPUT_GET_CONFIG),
REQ(VHOST_USER_MAX),
};
#undef REQ
if (req < VHOST_USER_MAX) {
return vu_request_str[req];
} else {
return "unknown";
}
}
static void
vu_panic(VuDev *dev, const char *msg, ...)
{
char *buf = NULL;
va_list ap;
va_start(ap, msg);
(void)vasprintf(&buf, msg, ap);
va_end(ap);
dev->broken = true;
dev->panic(dev, buf);
free(buf);
/* FIXME: find a way to call virtio_error? */
}
/* Translate guest physical address to our virtual address. */
void *
vu_gpa_to_va(VuDev *dev, uint64_t guest_addr)
{
int i;
/* Find matching memory region. */
for (i = 0; i < dev->nregions; i++) {
VuDevRegion *r = &dev->regions[i];
if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
return (void *)(uintptr_t)
guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
}
}
return NULL;
}
/* Translate qemu virtual address to our virtual address. */
static void *
qva_to_va(VuDev *dev, uint64_t qemu_addr)
{
int i;
/* Find matching memory region. */
for (i = 0; i < dev->nregions; i++) {
VuDevRegion *r = &dev->regions[i];
if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
return (void *)(uintptr_t)
qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
}
}
return NULL;
}
static void
vmsg_close_fds(VhostUserMsg *vmsg)
{
int i;
for (i = 0; i < vmsg->fd_num; i++) {
close(vmsg->fds[i]);
}
}
static bool
vu_message_read(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
{
char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
struct iovec iov = {
.iov_base = (char *)vmsg,
.iov_len = VHOST_USER_HDR_SIZE,
};
struct msghdr msg = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = control,
.msg_controllen = sizeof(control),
};
size_t fd_size;
struct cmsghdr *cmsg;
int rc;
do {
rc = recvmsg(conn_fd, &msg, 0);
} while (rc < 0 && (errno == EINTR || errno == EAGAIN));
if (rc <= 0) {
vu_panic(dev, "Error while recvmsg: %s", strerror(errno));
return false;
}
vmsg->fd_num = 0;
for (cmsg = CMSG_FIRSTHDR(&msg);
cmsg != NULL;
cmsg = CMSG_NXTHDR(&msg, cmsg))
{
if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
fd_size = cmsg->cmsg_len - CMSG_LEN(0);
vmsg->fd_num = fd_size / sizeof(int);
memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
break;
}
}
if (vmsg->size > sizeof(vmsg->payload)) {
vu_panic(dev,
"Error: too big message request: %d, size: vmsg->size: %u, "
"while sizeof(vmsg->payload) = %zu\n",
vmsg->request, vmsg->size, sizeof(vmsg->payload));
goto fail;
}
if (vmsg->size) {
do {
rc = read(conn_fd, &vmsg->payload, vmsg->size);
} while (rc < 0 && (errno == EINTR || errno == EAGAIN));
if (rc <= 0) {
vu_panic(dev, "Error while reading: %s", strerror(errno));
goto fail;
}
assert(rc == vmsg->size);
}
return true;
fail:
vmsg_close_fds(vmsg);
return false;
}
static bool
vu_message_write(VuDev *dev, int conn_fd, VhostUserMsg *vmsg)
{
int rc;
uint8_t *p = (uint8_t *)vmsg;
/* Set the version in the flags when sending the reply */
vmsg->flags &= ~VHOST_USER_VERSION_MASK;
vmsg->flags |= VHOST_USER_VERSION;
vmsg->flags |= VHOST_USER_REPLY_MASK;
do {
rc = write(conn_fd, p, VHOST_USER_HDR_SIZE);
} while (rc < 0 && (errno == EINTR || errno == EAGAIN));
do {
if (vmsg->data) {
rc = write(conn_fd, vmsg->data, vmsg->size);
} else {
rc = write(conn_fd, p + VHOST_USER_HDR_SIZE, vmsg->size);
}
} while (rc < 0 && (errno == EINTR || errno == EAGAIN));
if (rc <= 0) {
vu_panic(dev, "Error while writing: %s", strerror(errno));
return false;
}
return true;
}
/* Kick the log_call_fd if required. */
static void
vu_log_kick(VuDev *dev)
{
if (dev->log_call_fd != -1) {
DPRINT("Kicking the QEMU's log...\n");
if (eventfd_write(dev->log_call_fd, 1) < 0) {
vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
}
}
}
static void
vu_log_page(uint8_t *log_table, uint64_t page)
{
DPRINT("Logged dirty guest page: %"PRId64"\n", page);
atomic_or(&log_table[page / 8], 1 << (page % 8));
}
static void
vu_log_write(VuDev *dev, uint64_t address, uint64_t length)
{
uint64_t page;
if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
!dev->log_table || !length) {
return;
}
assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
page = address / VHOST_LOG_PAGE;
while (page * VHOST_LOG_PAGE < address + length) {
vu_log_page(dev->log_table, page);
page += VHOST_LOG_PAGE;
}
vu_log_kick(dev);
}
static void
vu_kick_cb(VuDev *dev, int condition, void *data)
{
int index = (intptr_t)data;
VuVirtq *vq = &dev->vq[index];
int sock = vq->kick_fd;
eventfd_t kick_data;
ssize_t rc;
rc = eventfd_read(sock, &kick_data);
if (rc == -1) {
vu_panic(dev, "kick eventfd_read(): %s", strerror(errno));
dev->remove_watch(dev, dev->vq[index].kick_fd);
} else {
DPRINT("Got kick_data: %016"PRIx64" handler:%p idx:%d\n",
kick_data, vq->handler, index);
if (vq->handler) {
vq->handler(dev, index);
}
}
}
static bool
vu_get_features_exec(VuDev *dev, VhostUserMsg *vmsg)
{
vmsg->payload.u64 =
1ULL << VHOST_F_LOG_ALL |
1ULL << VHOST_USER_F_PROTOCOL_FEATURES;
if (dev->iface->get_features) {
vmsg->payload.u64 |= dev->iface->get_features(dev);
}
vmsg->size = sizeof(vmsg->payload.u64);
DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
return true;
}
static void
vu_set_enable_all_rings(VuDev *dev, bool enabled)
{
int i;
for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
dev->vq[i].enable = enabled;
}
}
static bool
vu_set_features_exec(VuDev *dev, VhostUserMsg *vmsg)
{
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
dev->features = vmsg->payload.u64;
if (!(dev->features & VHOST_USER_F_PROTOCOL_FEATURES)) {
vu_set_enable_all_rings(dev, true);
}
if (dev->iface->set_features) {
dev->iface->set_features(dev, dev->features);
}
return false;
}
static bool
vu_set_owner_exec(VuDev *dev, VhostUserMsg *vmsg)
{
return false;
}
static void
vu_close_log(VuDev *dev)
{
if (dev->log_table) {
if (munmap(dev->log_table, dev->log_size) != 0) {
perror("close log munmap() error");
}
dev->log_table = NULL;
}
if (dev->log_call_fd != -1) {
close(dev->log_call_fd);
dev->log_call_fd = -1;
}
}
static bool
vu_reset_device_exec(VuDev *dev, VhostUserMsg *vmsg)
{
vu_set_enable_all_rings(dev, false);
return false;
}
static bool
vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int i;
VhostUserMemory *memory = &vmsg->payload.memory;
dev->nregions = memory->nregions;
DPRINT("Nregions: %d\n", memory->nregions);
for (i = 0; i < dev->nregions; i++) {
void *mmap_addr;
VhostUserMemoryRegion *msg_region = &memory->regions[i];
VuDevRegion *dev_region = &dev->regions[i];
DPRINT("Region %d\n", i);
DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
msg_region->guest_phys_addr);
DPRINT(" memory_size: 0x%016"PRIx64"\n",
msg_region->memory_size);
DPRINT(" userspace_addr 0x%016"PRIx64"\n",
msg_region->userspace_addr);
DPRINT(" mmap_offset 0x%016"PRIx64"\n",
msg_region->mmap_offset);
dev_region->gpa = msg_region->guest_phys_addr;
dev_region->size = msg_region->memory_size;
dev_region->qva = msg_region->userspace_addr;
dev_region->mmap_offset = msg_region->mmap_offset;
/* We don't use offset argument of mmap() since the
* mapped address has to be page aligned, and we use huge
* pages. */
mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
PROT_READ | PROT_WRITE, MAP_SHARED,
vmsg->fds[i], 0);
if (mmap_addr == MAP_FAILED) {
vu_panic(dev, "region mmap error: %s", strerror(errno));
} else {
dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
DPRINT(" mmap_addr: 0x%016"PRIx64"\n",
dev_region->mmap_addr);
}
close(vmsg->fds[i]);
}
return false;
}
static bool
vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int fd;
uint64_t log_mmap_size, log_mmap_offset;
void *rc;
if (vmsg->fd_num != 1 ||
vmsg->size != sizeof(vmsg->payload.log)) {
vu_panic(dev, "Invalid log_base message");
return true;
}
fd = vmsg->fds[0];
log_mmap_offset = vmsg->payload.log.mmap_offset;
log_mmap_size = vmsg->payload.log.mmap_size;
DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
log_mmap_offset);
if (rc == MAP_FAILED) {
perror("log mmap error");
}
dev->log_table = rc;
dev->log_size = log_mmap_size;
vmsg->size = sizeof(vmsg->payload.u64);
return true;
}
static bool
vu_set_log_fd_exec(VuDev *dev, VhostUserMsg *vmsg)
{
if (vmsg->fd_num != 1) {
vu_panic(dev, "Invalid log_fd message");
return false;
}
if (dev->log_call_fd != -1) {
close(dev->log_call_fd);
}
dev->log_call_fd = vmsg->fds[0];
DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
return false;
}
static bool
vu_set_vring_num_exec(VuDev *dev, VhostUserMsg *vmsg)
{
unsigned int index = vmsg->payload.state.index;
unsigned int num = vmsg->payload.state.num;
DPRINT("State.index: %d\n", index);
DPRINT("State.num: %d\n", num);
dev->vq[index].vring.num = num;
return false;
}
static bool
vu_set_vring_addr_exec(VuDev *dev, VhostUserMsg *vmsg)
{
struct vhost_vring_addr *vra = &vmsg->payload.addr;
unsigned int index = vra->index;
VuVirtq *vq = &dev->vq[index];
DPRINT("vhost_vring_addr:\n");
DPRINT(" index: %d\n", vra->index);
DPRINT(" flags: %d\n", vra->flags);
DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
vq->vring.flags = vra->flags;
vq->vring.desc = qva_to_va(dev, vra->desc_user_addr);
vq->vring.used = qva_to_va(dev, vra->used_user_addr);
vq->vring.avail = qva_to_va(dev, vra->avail_user_addr);
vq->vring.log_guest_addr = vra->log_guest_addr;
DPRINT("Setting virtq addresses:\n");
DPRINT(" vring_desc at %p\n", vq->vring.desc);
DPRINT(" vring_used at %p\n", vq->vring.used);
DPRINT(" vring_avail at %p\n", vq->vring.avail);
if (!(vq->vring.desc && vq->vring.used && vq->vring.avail)) {
vu_panic(dev, "Invalid vring_addr message");
return false;
}
vq->used_idx = vq->vring.used->idx;
return false;
}
static bool
vu_set_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
{
unsigned int index = vmsg->payload.state.index;
unsigned int num = vmsg->payload.state.num;
DPRINT("State.index: %d\n", index);
DPRINT("State.num: %d\n", num);
dev->vq[index].shadow_avail_idx = dev->vq[index].last_avail_idx = num;
return false;
}
static bool
vu_get_vring_base_exec(VuDev *dev, VhostUserMsg *vmsg)
{
unsigned int index = vmsg->payload.state.index;
DPRINT("State.index: %d\n", index);
vmsg->payload.state.num = dev->vq[index].last_avail_idx;
vmsg->size = sizeof(vmsg->payload.state);
dev->vq[index].started = false;
if (dev->iface->queue_set_started) {
dev->iface->queue_set_started(dev, index, false);
}
if (dev->vq[index].call_fd != -1) {
close(dev->vq[index].call_fd);
dev->vq[index].call_fd = -1;
}
if (dev->vq[index].kick_fd != -1) {
dev->remove_watch(dev, dev->vq[index].kick_fd);
close(dev->vq[index].kick_fd);
dev->vq[index].kick_fd = -1;
}
return true;
}
static bool
vu_check_queue_msg_file(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
if (index >= VHOST_MAX_NR_VIRTQUEUE) {
vmsg_close_fds(vmsg);
vu_panic(dev, "Invalid queue index: %u", index);
return false;
}
if (vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK ||
vmsg->fd_num != 1) {
vmsg_close_fds(vmsg);
vu_panic(dev, "Invalid fds in request: %d", vmsg->request);
return false;
}
return true;
}
static bool
vu_set_vring_kick_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
if (!vu_check_queue_msg_file(dev, vmsg)) {
return false;
}
if (dev->vq[index].kick_fd != -1) {
dev->remove_watch(dev, dev->vq[index].kick_fd);
close(dev->vq[index].kick_fd);
dev->vq[index].kick_fd = -1;
}
if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
dev->vq[index].kick_fd = vmsg->fds[0];
DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
}
dev->vq[index].started = true;
if (dev->iface->queue_set_started) {
dev->iface->queue_set_started(dev, index, true);
}
if (dev->vq[index].kick_fd != -1 && dev->vq[index].handler) {
dev->set_watch(dev, dev->vq[index].kick_fd, VU_WATCH_IN,
vu_kick_cb, (void *)(long)index);
DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
dev->vq[index].kick_fd, index);
}
return false;
}
void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
vu_queue_handler_cb handler)
{
int qidx = vq - dev->vq;
vq->handler = handler;
if (vq->kick_fd >= 0) {
if (handler) {
dev->set_watch(dev, vq->kick_fd, VU_WATCH_IN,
vu_kick_cb, (void *)(long)qidx);
} else {
dev->remove_watch(dev, vq->kick_fd);
}
}
}
static bool
vu_set_vring_call_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
if (!vu_check_queue_msg_file(dev, vmsg)) {
return false;
}
if (dev->vq[index].call_fd != -1) {
close(dev->vq[index].call_fd);
dev->vq[index].call_fd = -1;
}
if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
dev->vq[index].call_fd = vmsg->fds[0];
}
DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
return false;
}
static bool
vu_set_vring_err_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int index = vmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
if (!vu_check_queue_msg_file(dev, vmsg)) {
return false;
}
if (dev->vq[index].err_fd != -1) {
close(dev->vq[index].err_fd);
dev->vq[index].err_fd = -1;
}
if (!(vmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK)) {
dev->vq[index].err_fd = vmsg->fds[0];
}
return false;
}
static bool
vu_get_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
{
uint64_t features = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
if (dev->iface->get_protocol_features) {
features |= dev->iface->get_protocol_features(dev);
}
vmsg->payload.u64 = features;
vmsg->size = sizeof(vmsg->payload.u64);
return true;
}
static bool
vu_set_protocol_features_exec(VuDev *dev, VhostUserMsg *vmsg)
{
uint64_t features = vmsg->payload.u64;
DPRINT("u64: 0x%016"PRIx64"\n", features);
dev->protocol_features = vmsg->payload.u64;
if (dev->iface->set_protocol_features) {
dev->iface->set_protocol_features(dev, features);
}
return false;
}
static bool
vu_get_queue_num_exec(VuDev *dev, VhostUserMsg *vmsg)
{
DPRINT("Function %s() not implemented yet.\n", __func__);
return false;
}
static bool
vu_set_vring_enable_exec(VuDev *dev, VhostUserMsg *vmsg)
{
unsigned int index = vmsg->payload.state.index;
unsigned int enable = vmsg->payload.state.num;
DPRINT("State.index: %d\n", index);
DPRINT("State.enable: %d\n", enable);
if (index >= VHOST_MAX_NR_VIRTQUEUE) {
vu_panic(dev, "Invalid vring_enable index: %u", index);
return false;
}
dev->vq[index].enable = enable;
return false;
}
static bool
vu_process_message(VuDev *dev, VhostUserMsg *vmsg)
{
int do_reply = 0;
/* Print out generic part of the request. */
DPRINT("================ Vhost user message ================\n");
DPRINT("Request: %s (%d)\n", vu_request_to_string(vmsg->request),
vmsg->request);
DPRINT("Flags: 0x%x\n", vmsg->flags);
DPRINT("Size: %d\n", vmsg->size);
if (vmsg->fd_num) {
int i;
DPRINT("Fds:");
for (i = 0; i < vmsg->fd_num; i++) {
DPRINT(" %d", vmsg->fds[i]);
}
DPRINT("\n");
}
if (dev->iface->process_msg &&
dev->iface->process_msg(dev, vmsg, &do_reply)) {
return do_reply;
}
switch (vmsg->request) {
case VHOST_USER_GET_FEATURES:
return vu_get_features_exec(dev, vmsg);
case VHOST_USER_SET_FEATURES:
return vu_set_features_exec(dev, vmsg);
case VHOST_USER_GET_PROTOCOL_FEATURES:
return vu_get_protocol_features_exec(dev, vmsg);
case VHOST_USER_SET_PROTOCOL_FEATURES:
return vu_set_protocol_features_exec(dev, vmsg);
case VHOST_USER_SET_OWNER:
return vu_set_owner_exec(dev, vmsg);
case VHOST_USER_RESET_OWNER:
return vu_reset_device_exec(dev, vmsg);
case VHOST_USER_SET_MEM_TABLE:
return vu_set_mem_table_exec(dev, vmsg);
case VHOST_USER_SET_LOG_BASE:
return vu_set_log_base_exec(dev, vmsg);
case VHOST_USER_SET_LOG_FD:
return vu_set_log_fd_exec(dev, vmsg);
case VHOST_USER_SET_VRING_NUM:
return vu_set_vring_num_exec(dev, vmsg);
case VHOST_USER_SET_VRING_ADDR:
return vu_set_vring_addr_exec(dev, vmsg);
case VHOST_USER_SET_VRING_BASE:
return vu_set_vring_base_exec(dev, vmsg);
case VHOST_USER_GET_VRING_BASE:
return vu_get_vring_base_exec(dev, vmsg);
case VHOST_USER_SET_VRING_KICK:
return vu_set_vring_kick_exec(dev, vmsg);
case VHOST_USER_SET_VRING_CALL:
return vu_set_vring_call_exec(dev, vmsg);
case VHOST_USER_SET_VRING_ERR:
return vu_set_vring_err_exec(dev, vmsg);
case VHOST_USER_GET_QUEUE_NUM:
return vu_get_queue_num_exec(dev, vmsg);
case VHOST_USER_SET_VRING_ENABLE:
return vu_set_vring_enable_exec(dev, vmsg);
default:
vmsg_close_fds(vmsg);
vu_panic(dev, "Unhandled request: %d", vmsg->request);
}
return false;
}
bool
vu_dispatch(VuDev *dev)
{
VhostUserMsg vmsg = { 0, };
int reply_requested;
bool success = false;
if (!vu_message_read(dev, dev->sock, &vmsg)) {
goto end;
}
reply_requested = vu_process_message(dev, &vmsg);
if (!reply_requested) {
success = true;
goto end;
}
if (!vu_message_write(dev, dev->sock, &vmsg)) {
goto end;
}
success = true;
end:
g_free(vmsg.data);
return success;
}
void
vu_deinit(VuDev *dev)
{
int i;
for (i = 0; i < dev->nregions; i++) {
VuDevRegion *r = &dev->regions[i];
void *m = (void *) (uintptr_t) r->mmap_addr;
if (m != MAP_FAILED) {
munmap(m, r->size + r->mmap_offset);
}
}
dev->nregions = 0;
for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
VuVirtq *vq = &dev->vq[i];
if (vq->call_fd != -1) {
close(vq->call_fd);
vq->call_fd = -1;
}
if (vq->kick_fd != -1) {
close(vq->kick_fd);
vq->kick_fd = -1;
}
if (vq->err_fd != -1) {
close(vq->err_fd);
vq->err_fd = -1;
}
}
vu_close_log(dev);
if (dev->sock != -1) {
close(dev->sock);
}
}
void
vu_init(VuDev *dev,
int socket,
vu_panic_cb panic,
vu_set_watch_cb set_watch,
vu_remove_watch_cb remove_watch,
const VuDevIface *iface)
{
int i;
assert(socket >= 0);
assert(set_watch);
assert(remove_watch);
assert(iface);
assert(panic);
memset(dev, 0, sizeof(*dev));
dev->sock = socket;
dev->panic = panic;
dev->set_watch = set_watch;
dev->remove_watch = remove_watch;
dev->iface = iface;
dev->log_call_fd = -1;
for (i = 0; i < VHOST_MAX_NR_VIRTQUEUE; i++) {
dev->vq[i] = (VuVirtq) {
.call_fd = -1, .kick_fd = -1, .err_fd = -1,
.notification = true,
};
}
}
VuVirtq *
vu_get_queue(VuDev *dev, int qidx)
{
assert(qidx < VHOST_MAX_NR_VIRTQUEUE);
return &dev->vq[qidx];
}
bool
vu_queue_enabled(VuDev *dev, VuVirtq *vq)
{
return vq->enable;
}
static inline uint16_t
vring_avail_flags(VuVirtq *vq)
{
return vq->vring.avail->flags;
}
static inline uint16_t
vring_avail_idx(VuVirtq *vq)
{
vq->shadow_avail_idx = vq->vring.avail->idx;
return vq->shadow_avail_idx;
}
static inline uint16_t
vring_avail_ring(VuVirtq *vq, int i)
{
return vq->vring.avail->ring[i];
}
static inline uint16_t
vring_get_used_event(VuVirtq *vq)
{
return vring_avail_ring(vq, vq->vring.num);
}
static int
virtqueue_num_heads(VuDev *dev, VuVirtq *vq, unsigned int idx)
{
uint16_t num_heads = vring_avail_idx(vq) - idx;
/* Check it isn't doing very strange things with descriptor numbers. */
if (num_heads > vq->vring.num) {
vu_panic(dev, "Guest moved used index from %u to %u",
idx, vq->shadow_avail_idx);
return -1;
}
if (num_heads) {
/* On success, callers read a descriptor at vq->last_avail_idx.
* Make sure descriptor read does not bypass avail index read. */
smp_rmb();
}
return num_heads;
}
static bool
virtqueue_get_head(VuDev *dev, VuVirtq *vq,
unsigned int idx, unsigned int *head)
{
/* Grab the next descriptor number they're advertising, and increment
* the index we've seen. */
*head = vring_avail_ring(vq, idx % vq->vring.num);
/* If their number is silly, that's a fatal mistake. */
if (*head >= vq->vring.num) {
vu_panic(dev, "Guest says index %u is available", head);
return false;
}
return true;
}
enum {
VIRTQUEUE_READ_DESC_ERROR = -1,
VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */
VIRTQUEUE_READ_DESC_MORE = 1, /* more buffers in chain */
};
static int
virtqueue_read_next_desc(VuDev *dev, struct vring_desc *desc,
int i, unsigned int max, unsigned int *next)
{
/* If this descriptor says it doesn't chain, we're done. */
if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
return VIRTQUEUE_READ_DESC_DONE;
}
/* Check they're not leading us off end of descriptors. */
*next = desc[i].next;
/* Make sure compiler knows to grab that: we don't want it changing! */
smp_wmb();
if (*next >= max) {
vu_panic(dev, "Desc next is %u", next);
return VIRTQUEUE_READ_DESC_ERROR;
}
return VIRTQUEUE_READ_DESC_MORE;
}
void
vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes,
unsigned int *out_bytes,
unsigned max_in_bytes, unsigned max_out_bytes)
{
unsigned int idx;
unsigned int total_bufs, in_total, out_total;
int rc;
idx = vq->last_avail_idx;
total_bufs = in_total = out_total = 0;
while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) {
unsigned int max, num_bufs, indirect = 0;
struct vring_desc *desc;
unsigned int i;
max = vq->vring.num;
num_bufs = total_bufs;
if (!virtqueue_get_head(dev, vq, idx++, &i)) {
goto err;
}
desc = vq->vring.desc;
if (desc[i].flags & VRING_DESC_F_INDIRECT) {
if (desc[i].len % sizeof(struct vring_desc)) {
vu_panic(dev, "Invalid size for indirect buffer table");
goto err;
}
/* If we've got too many, that implies a descriptor loop. */
if (num_bufs >= max) {
vu_panic(dev, "Looped descriptor");
goto err;
}
/* loop over the indirect descriptor table */
indirect = 1;
max = desc[i].len / sizeof(struct vring_desc);
desc = vu_gpa_to_va(dev, desc[i].addr);
num_bufs = i = 0;
}
do {
/* If we've got too many, that implies a descriptor loop. */
if (++num_bufs > max) {
vu_panic(dev, "Looped descriptor");
goto err;
}
if (desc[i].flags & VRING_DESC_F_WRITE) {
in_total += desc[i].len;
} else {
out_total += desc[i].len;
}
if (in_total >= max_in_bytes && out_total >= max_out_bytes) {
goto done;
}
rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
} while (rc == VIRTQUEUE_READ_DESC_MORE);
if (rc == VIRTQUEUE_READ_DESC_ERROR) {
goto err;
}
if (!indirect) {
total_bufs = num_bufs;
} else {
total_bufs++;
}
}
if (rc < 0) {
goto err;
}
done:
if (in_bytes) {
*in_bytes = in_total;
}
if (out_bytes) {
*out_bytes = out_total;
}
return;
err:
in_total = out_total = 0;
goto done;
}
bool
vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
unsigned int out_bytes)
{
unsigned int in_total, out_total;
vu_queue_get_avail_bytes(dev, vq, &in_total, &out_total,
in_bytes, out_bytes);
return in_bytes <= in_total && out_bytes <= out_total;
}
/* Fetch avail_idx from VQ memory only when we really need to know if
* guest has added some buffers. */
int
vu_queue_empty(VuDev *dev, VuVirtq *vq)
{
if (vq->shadow_avail_idx != vq->last_avail_idx) {
return 0;
}
return vring_avail_idx(vq) == vq->last_avail_idx;
}
static inline
bool has_feature(uint64_t features, unsigned int fbit)
{
assert(fbit < 64);
return !!(features & (1ULL << fbit));
}
static inline
bool vu_has_feature(VuDev *dev,
unsigned int fbit)
{
return has_feature(dev->features, fbit);
}
static bool
vring_notify(VuDev *dev, VuVirtq *vq)
{
uint16_t old, new;
bool v;
/* We need to expose used array entries before checking used event. */
smp_mb();
/* Always notify when queue is empty (when feature acknowledge) */
if (vu_has_feature(dev, VIRTIO_F_NOTIFY_ON_EMPTY) &&
!vq->inuse && vu_queue_empty(dev, vq)) {
return true;
}
if (!vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
return !(vring_avail_flags(vq) & VRING_AVAIL_F_NO_INTERRUPT);
}
v = vq->signalled_used_valid;
vq->signalled_used_valid = true;
old = vq->signalled_used;
new = vq->signalled_used = vq->used_idx;
return !v || vring_need_event(vring_get_used_event(vq), new, old);
}
void
vu_queue_notify(VuDev *dev, VuVirtq *vq)
{
if (unlikely(dev->broken)) {
return;
}
if (!vring_notify(dev, vq)) {
DPRINT("skipped notify...\n");
return;
}
if (eventfd_write(vq->call_fd, 1) < 0) {
vu_panic(dev, "Error writing eventfd: %s", strerror(errno));
}
}
static inline void
vring_used_flags_set_bit(VuVirtq *vq, int mask)
{
uint16_t *flags;
flags = (uint16_t *)((char*)vq->vring.used +
offsetof(struct vring_used, flags));
*flags |= mask;
}
static inline void
vring_used_flags_unset_bit(VuVirtq *vq, int mask)
{
uint16_t *flags;
flags = (uint16_t *)((char*)vq->vring.used +
offsetof(struct vring_used, flags));
*flags &= ~mask;
}
static inline void
vring_set_avail_event(VuVirtq *vq, uint16_t val)
{
if (!vq->notification) {
return;
}
*((uint16_t *) &vq->vring.used->ring[vq->vring.num]) = val;
}
void
vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable)
{
vq->notification = enable;
if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
vring_set_avail_event(vq, vring_avail_idx(vq));
} else if (enable) {
vring_used_flags_unset_bit(vq, VRING_USED_F_NO_NOTIFY);
} else {
vring_used_flags_set_bit(vq, VRING_USED_F_NO_NOTIFY);
}
if (enable) {
/* Expose avail event/used flags before caller checks the avail idx. */
smp_mb();
}
}
static void
virtqueue_map_desc(VuDev *dev,
unsigned int *p_num_sg, struct iovec *iov,
unsigned int max_num_sg, bool is_write,
uint64_t pa, size_t sz)
{
unsigned num_sg = *p_num_sg;
assert(num_sg <= max_num_sg);
if (!sz) {
vu_panic(dev, "virtio: zero sized buffers are not allowed");
return;
}
iov[num_sg].iov_base = vu_gpa_to_va(dev, pa);
iov[num_sg].iov_len = sz;
num_sg++;
*p_num_sg = num_sg;
}
/* Round number down to multiple */
#define ALIGN_DOWN(n, m) ((n) / (m) * (m))
/* Round number up to multiple */
#define ALIGN_UP(n, m) ALIGN_DOWN((n) + (m) - 1, (m))
static void *
virtqueue_alloc_element(size_t sz,
unsigned out_num, unsigned in_num)
{
VuVirtqElement *elem;
size_t in_sg_ofs = ALIGN_UP(sz, __alignof__(elem->in_sg[0]));
size_t out_sg_ofs = in_sg_ofs + in_num * sizeof(elem->in_sg[0]);
size_t out_sg_end = out_sg_ofs + out_num * sizeof(elem->out_sg[0]);
assert(sz >= sizeof(VuVirtqElement));
elem = malloc(out_sg_end);
elem->out_num = out_num;
elem->in_num = in_num;
elem->in_sg = (void *)elem + in_sg_ofs;
elem->out_sg = (void *)elem + out_sg_ofs;
return elem;
}
void *
vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz)
{
unsigned int i, head, max;
VuVirtqElement *elem;
unsigned out_num, in_num;
struct iovec iov[VIRTQUEUE_MAX_SIZE];
struct vring_desc *desc;
int rc;
if (unlikely(dev->broken)) {
return NULL;
}
if (vu_queue_empty(dev, vq)) {
return NULL;
}
/* Needed after virtio_queue_empty(), see comment in
* virtqueue_num_heads(). */
smp_rmb();
/* When we start there are none of either input nor output. */
out_num = in_num = 0;
max = vq->vring.num;
if (vq->inuse >= vq->vring.num) {
vu_panic(dev, "Virtqueue size exceeded");
return NULL;
}
if (!virtqueue_get_head(dev, vq, vq->last_avail_idx++, &head)) {
return NULL;
}
if (vu_has_feature(dev, VIRTIO_RING_F_EVENT_IDX)) {
vring_set_avail_event(vq, vq->last_avail_idx);
}
i = head;
desc = vq->vring.desc;
if (desc[i].flags & VRING_DESC_F_INDIRECT) {
if (desc[i].len % sizeof(struct vring_desc)) {
vu_panic(dev, "Invalid size for indirect buffer table");
}
/* loop over the indirect descriptor table */
max = desc[i].len / sizeof(struct vring_desc);
desc = vu_gpa_to_va(dev, desc[i].addr);
i = 0;
}
/* Collect all the descriptors */
do {
if (desc[i].flags & VRING_DESC_F_WRITE) {
virtqueue_map_desc(dev, &in_num, iov + out_num,
VIRTQUEUE_MAX_SIZE - out_num, true,
desc[i].addr, desc[i].len);
} else {
if (in_num) {
vu_panic(dev, "Incorrect order for descriptors");
return NULL;
}
virtqueue_map_desc(dev, &out_num, iov,
VIRTQUEUE_MAX_SIZE, false,
desc[i].addr, desc[i].len);
}
/* If we've got too many, that implies a descriptor loop. */
if ((in_num + out_num) > max) {
vu_panic(dev, "Looped descriptor");
}
rc = virtqueue_read_next_desc(dev, desc, i, max, &i);
} while (rc == VIRTQUEUE_READ_DESC_MORE);
if (rc == VIRTQUEUE_READ_DESC_ERROR) {
return NULL;
}
/* Now copy what we have collected and mapped */
elem = virtqueue_alloc_element(sz, out_num, in_num);
elem->index = head;
for (i = 0; i < out_num; i++) {
elem->out_sg[i] = iov[i];
}
for (i = 0; i < in_num; i++) {
elem->in_sg[i] = iov[out_num + i];
}
vq->inuse++;
return elem;
}
bool
vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num)
{
if (num > vq->inuse) {
return false;
}
vq->last_avail_idx -= num;
vq->inuse -= num;
return true;
}
static inline
void vring_used_write(VuDev *dev, VuVirtq *vq,
struct vring_used_elem *uelem, int i)
{
struct vring_used *used = vq->vring.used;
used->ring[i] = *uelem;
vu_log_write(dev, vq->vring.log_guest_addr +
offsetof(struct vring_used, ring[i]),
sizeof(used->ring[i]));
}
static void
vu_log_queue_fill(VuDev *dev, VuVirtq *vq,
const VuVirtqElement *elem,
unsigned int len)
{
struct vring_desc *desc = vq->vring.desc;
unsigned int i, max, min;
unsigned num_bufs = 0;
max = vq->vring.num;
i = elem->index;
if (desc[i].flags & VRING_DESC_F_INDIRECT) {
if (desc[i].len % sizeof(struct vring_desc)) {
vu_panic(dev, "Invalid size for indirect buffer table");
}
/* loop over the indirect descriptor table */
max = desc[i].len / sizeof(struct vring_desc);
desc = vu_gpa_to_va(dev, desc[i].addr);
i = 0;
}
do {
if (++num_bufs > max) {
vu_panic(dev, "Looped descriptor");
return;
}
if (desc[i].flags & VRING_DESC_F_WRITE) {
min = MIN(desc[i].len, len);
vu_log_write(dev, desc[i].addr, min);
len -= min;
}
} while (len > 0 &&
(virtqueue_read_next_desc(dev, desc, i, max, &i)
== VIRTQUEUE_READ_DESC_MORE));
}
void
vu_queue_fill(VuDev *dev, VuVirtq *vq,
const VuVirtqElement *elem,
unsigned int len, unsigned int idx)
{
struct vring_used_elem uelem;
if (unlikely(dev->broken)) {
return;
}
vu_log_queue_fill(dev, vq, elem, len);
idx = (idx + vq->used_idx) % vq->vring.num;
uelem.id = elem->index;
uelem.len = len;
vring_used_write(dev, vq, &uelem, idx);
}
static inline
void vring_used_idx_set(VuDev *dev, VuVirtq *vq, uint16_t val)
{
vq->vring.used->idx = val;
vu_log_write(dev,
vq->vring.log_guest_addr + offsetof(struct vring_used, idx),
sizeof(vq->vring.used->idx));
vq->used_idx = val;
}
void
vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int count)
{
uint16_t old, new;
if (unlikely(dev->broken)) {
return;
}
/* Make sure buffer is written before we update index. */
smp_wmb();
old = vq->used_idx;
new = old + count;
vring_used_idx_set(dev, vq, new);
vq->inuse -= count;
if (unlikely((int16_t)(new - vq->signalled_used) < (uint16_t)(new - old))) {
vq->signalled_used_valid = false;
}
}
void
vu_queue_push(VuDev *dev, VuVirtq *vq,
const VuVirtqElement *elem, unsigned int len)
{
vu_queue_fill(dev, vq, elem, len, 0);
vu_queue_flush(dev, vq, 1);
}
/*
* Vhost User library
*
* Copyright (c) 2016 Red Hat, Inc.
*
* Authors:
* Victor Kaplansky <victork@redhat.com>
* Marc-André Lureau <mlureau@redhat.com>
*
* This work is licensed under the terms of the GNU GPL, version 2 or
* later. See the COPYING file in the top-level directory.
*/
#ifndef LIBVHOST_USER_H
#define LIBVHOST_USER_H
#include <stdint.h>
#include <stdbool.h>
#include <stddef.h>
#include <linux/vhost.h>
#include "standard-headers/linux/virtio_ring.h"
/* Based on qemu/hw/virtio/vhost-user.c */
#define VHOST_USER_F_PROTOCOL_FEATURES 30
#define VHOST_LOG_PAGE 4096
#define VHOST_MAX_NR_VIRTQUEUE 8
#define VIRTQUEUE_MAX_SIZE 1024
#define VHOST_MEMORY_MAX_NREGIONS 8
enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_MQ = 0,
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
VHOST_USER_PROTOCOL_F_RARP = 2,
VHOST_USER_PROTOCOL_F_MAX
};
#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
VHOST_USER_GET_FEATURES = 1,
VHOST_USER_SET_FEATURES = 2,
VHOST_USER_SET_OWNER = 3,
VHOST_USER_RESET_OWNER = 4,
VHOST_USER_SET_MEM_TABLE = 5,
VHOST_USER_SET_LOG_BASE = 6,
VHOST_USER_SET_LOG_FD = 7,
VHOST_USER_SET_VRING_NUM = 8,
VHOST_USER_SET_VRING_ADDR = 9,
VHOST_USER_SET_VRING_BASE = 10,
VHOST_USER_GET_VRING_BASE = 11,
VHOST_USER_SET_VRING_KICK = 12,
VHOST_USER_SET_VRING_CALL = 13,
VHOST_USER_SET_VRING_ERR = 14,
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
VHOST_USER_GET_QUEUE_NUM = 17,
VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_SEND_RARP = 19,
VHOST_USER_INPUT_GET_CONFIG = 20,
VHOST_USER_MAX
} VhostUserRequest;
typedef struct VhostUserMemoryRegion {
uint64_t guest_phys_addr;
uint64_t memory_size;
uint64_t userspace_addr;
uint64_t mmap_offset;
} VhostUserMemoryRegion;
typedef struct VhostUserMemory {
uint32_t nregions;
uint32_t padding;
VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;
typedef struct VhostUserLog {
uint64_t mmap_size;
uint64_t mmap_offset;
} VhostUserLog;
#if defined(_WIN32)
# define VU_PACKED __attribute__((gcc_struct, packed))
#else
# define VU_PACKED __attribute__((packed))
#endif
typedef struct VhostUserMsg {
VhostUserRequest request;
#define VHOST_USER_VERSION_MASK (0x3)
#define VHOST_USER_REPLY_MASK (0x1 << 2)
uint32_t flags;
uint32_t size; /* the following payload size */
union {
#define VHOST_USER_VRING_IDX_MASK (0xff)
#define VHOST_USER_VRING_NOFD_MASK (0x1 << 8)
uint64_t u64;
struct vhost_vring_state state;
struct vhost_vring_addr addr;
VhostUserMemory memory;
VhostUserLog log;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int fd_num;
uint8_t *data;
} VU_PACKED VhostUserMsg;
typedef struct VuDevRegion {
/* Guest Physical address. */
uint64_t gpa;
/* Memory region size. */
uint64_t size;
/* QEMU virtual address (userspace). */
uint64_t qva;
/* Starting offset in our mmaped space. */
uint64_t mmap_offset;
/* Start address of mmaped space. */
uint64_t mmap_addr;
} VuDevRegion;
typedef struct VuDev VuDev;
typedef uint64_t (*vu_get_features_cb) (VuDev *dev);
typedef void (*vu_set_features_cb) (VuDev *dev, uint64_t features);
typedef int (*vu_process_msg_cb) (VuDev *dev, VhostUserMsg *vmsg,
int *do_reply);
typedef void (*vu_queue_set_started_cb) (VuDev *dev, int qidx, bool started);
typedef struct VuDevIface {
/* called by VHOST_USER_GET_FEATURES to get the features bitmask */
vu_get_features_cb get_features;
/* enable vhost implementation features */
vu_set_features_cb set_features;
/* get the protocol feature bitmask from the underlying vhost
* implementation */
vu_get_features_cb get_protocol_features;
/* enable protocol features in the underlying vhost implementation. */
vu_set_features_cb set_protocol_features;
/* process_msg is called for each vhost-user message received */
/* skip libvhost-user processing if return value != 0 */
vu_process_msg_cb process_msg;
/* tells when queues can be processed */
vu_queue_set_started_cb queue_set_started;
} VuDevIface;
typedef void (*vu_queue_handler_cb) (VuDev *dev, int qidx);
typedef struct VuRing {
unsigned int num;
struct vring_desc *desc;
struct vring_avail *avail;
struct vring_used *used;
uint64_t log_guest_addr;
uint32_t flags;
} VuRing;
typedef struct VuVirtq {
VuRing vring;
/* Next head to pop */
uint16_t last_avail_idx;
/* Last avail_idx read from VQ. */
uint16_t shadow_avail_idx;
uint16_t used_idx;
/* Last used index value we have signalled on */
uint16_t signalled_used;
/* Last used index value we have signalled on */
bool signalled_used_valid;
/* Notification enabled? */
bool notification;
int inuse;
vu_queue_handler_cb handler;
int call_fd;
int kick_fd;
int err_fd;
unsigned int enable;
bool started;
} VuVirtq;
enum VuWatchCondtion {
VU_WATCH_IN = 1 << 0,
VU_WATCH_OUT = 1 << 1,
VU_WATCH_PRI = 1 << 2,
VU_WATCH_ERR = 1 << 3,
VU_WATCH_HUP = 1 << 4,
};
typedef void (*vu_panic_cb) (VuDev *dev, const char *err);
typedef void (*vu_watch_cb) (VuDev *dev, int condition, void *data);
typedef void (*vu_set_watch_cb) (VuDev *dev, int fd, int condition,
vu_watch_cb cb, void *data);
typedef void (*vu_remove_watch_cb) (VuDev *dev, int fd);
struct VuDev {
int sock;
uint32_t nregions;
VuDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
VuVirtq vq[VHOST_MAX_NR_VIRTQUEUE];
int log_call_fd;
uint64_t log_size;
uint8_t *log_table;
uint64_t features;
uint64_t protocol_features;
bool broken;
/* @set_watch: add or update the given fd to the watch set,
* call cb when condition is met */
vu_set_watch_cb set_watch;
/* @remove_watch: remove the given fd from the watch set */
vu_remove_watch_cb remove_watch;
/* @panic: encountered an unrecoverable error, you may try to
* re-initialize */
vu_panic_cb panic;
const VuDevIface *iface;
};
typedef struct VuVirtqElement {
unsigned int index;
unsigned int out_num;
unsigned int in_num;
struct iovec *in_sg;
struct iovec *out_sg;
} VuVirtqElement;
/**
* vu_init:
* @dev: a VuDev context
* @socket: the socket connected to vhost-user master
* @panic: a panic callback
* @set_watch: a set_watch callback
* @remove_watch: a remove_watch callback
* @iface: a VuDevIface structure with vhost-user device callbacks
*
* Intializes a VuDev vhost-user context.
**/
void vu_init(VuDev *dev,
int socket,
vu_panic_cb panic,
vu_set_watch_cb set_watch,
vu_remove_watch_cb remove_watch,
const VuDevIface *iface);
/**
* vu_deinit:
* @dev: a VuDev context
*
* Cleans up the VuDev context
*/
void vu_deinit(VuDev *dev);
/**
* vu_dispatch:
* @dev: a VuDev context
*
* Process one vhost-user message.
*
* Returns: TRUE on success, FALSE on failure.
*/
bool vu_dispatch(VuDev *dev);
/**
* vu_gpa_to_va:
* @dev: a VuDev context
* @guest_addr: guest address
*
* Translate a guest address to a pointer. Returns NULL on failure.
*/
void *vu_gpa_to_va(VuDev *dev, uint64_t guest_addr);
/**
* vu_get_queue:
* @dev: a VuDev context
* @qidx: queue index
*
* Returns the queue number @qidx.
*/
VuVirtq *vu_get_queue(VuDev *dev, int qidx);
/**
* vu_set_queue_handler:
* @dev: a VuDev context
* @vq: a VuVirtq queue
* @handler: the queue handler callback
*
* Set the queue handler. This function may be called several times
* for the same queue. If called with NULL @handler, the handler is
* removed.
*/
void vu_set_queue_handler(VuDev *dev, VuVirtq *vq,
vu_queue_handler_cb handler);
/**
* vu_queue_set_notification:
* @dev: a VuDev context
* @vq: a VuVirtq queue
* @enable: state
*
* Set whether the queue notifies (via event index or interrupt)
*/
void vu_queue_set_notification(VuDev *dev, VuVirtq *vq, int enable);
/**
* vu_queue_enabled:
* @dev: a VuDev context
* @vq: a VuVirtq queue
*
* Returns: whether the queue is enabled.
*/
bool vu_queue_enabled(VuDev *dev, VuVirtq *vq);
/**
* vu_queue_enabled:
* @dev: a VuDev context
* @vq: a VuVirtq queue
*
* Returns: whether the queue is empty.
*/
int vu_queue_empty(VuDev *dev, VuVirtq *vq);
/**
* vu_queue_notify:
* @dev: a VuDev context
* @vq: a VuVirtq queue
*
* Request to notify the queue via callfd (skipped if unnecessary)
*/
void vu_queue_notify(VuDev *dev, VuVirtq *vq);
/**
* vu_queue_pop:
* @dev: a VuDev context
* @vq: a VuVirtq queue
* @sz: the size of struct to return (must be >= VuVirtqElement)
*
* Returns: a VuVirtqElement filled from the queue or NULL.
*/
void *vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz);
/**
* vu_queue_rewind:
* @dev: a VuDev context
* @vq: a VuVirtq queue
* @num: number of elements to push back
*
* Pretend that elements weren't popped from the virtqueue. The next
* virtqueue_pop() will refetch the oldest element.
*
* Returns: true on success, false if @num is greater than the number of in use
* elements.
*/
bool vu_queue_rewind(VuDev *dev, VuVirtq *vq, unsigned int num);
/**
* vu_queue_fill:
* @dev: a VuDev context
* @vq: a VuVirtq queue
* @elem: a VuVirtqElement
* @len: length in bytes to write
* @idx: optional offset for the used ring index (0 in general)
*
* Fill the used ring with @elem element.
*/
void vu_queue_fill(VuDev *dev, VuVirtq *vq,
const VuVirtqElement *elem,
unsigned int len, unsigned int idx);
/**
* vu_queue_push:
* @dev: a VuDev context
* @vq: a VuVirtq queue
* @elem: a VuVirtqElement
* @len: length in bytes to write
*
* Helper that combines vu_queue_fill() with a vu_queue_flush().
*/
void vu_queue_push(VuDev *dev, VuVirtq *vq,
const VuVirtqElement *elem, unsigned int len);
/**
* vu_queue_flush:
* @dev: a VuDev context
* @vq: a VuVirtq queue
* @num: number of elements to flush
*
* Mark the last number of elements as done (used.idx is updated by
* num elements).
*/
void vu_queue_flush(VuDev *dev, VuVirtq *vq, unsigned int num);
/**
* vu_queue_get_avail_bytes:
* @dev: a VuDev context
* @vq: a VuVirtq queue
* @in_bytes: in bytes
* @out_bytes: out bytes
* @max_in_bytes: stop counting after max_in_bytes
* @max_out_bytes: stop counting after max_out_bytes
*
* Count the number of available bytes, up to max_in_bytes/max_out_bytes.
*/
void vu_queue_get_avail_bytes(VuDev *vdev, VuVirtq *vq, unsigned int *in_bytes,
unsigned int *out_bytes,
unsigned max_in_bytes, unsigned max_out_bytes);
/**
* vu_queue_avail_bytes:
* @dev: a VuDev context
* @vq: a VuVirtq queue
* @in_bytes: expected in bytes
* @out_bytes: expected out bytes
*
* Returns: true if in_bytes <= in_total && out_bytes <= out_total
*/
bool vu_queue_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int in_bytes,
unsigned int out_bytes);
#endif /* LIBVHOST_USER_H */
......@@ -562,7 +562,7 @@ static void amdvi_mmio_trace(hwaddr addr, unsigned size)
trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
} else {
index = index >= AMDVI_MMIO_REGS_LOW ? AMDVI_MMIO_REGS_LOW : index;
trace_amdvi_mmio_read(amdvi_mmio_high[index], addr, size, addr & ~0x07);
trace_amdvi_mmio_read(amdvi_mmio_low[index], addr, size, addr & ~0x07);
}
}
......
......@@ -49,8 +49,8 @@
#define AMDVI_CAPAB_INIT_TYPE (3 << 16)
/* No. of used MMIO registers */
#define AMDVI_MMIO_REGS_HIGH 8
#define AMDVI_MMIO_REGS_LOW 7
#define AMDVI_MMIO_REGS_HIGH 7
#define AMDVI_MMIO_REGS_LOW 8
/* MMIO registers */
#define AMDVI_MMIO_DEVICE_TABLE 0x0000
......
......@@ -982,8 +982,8 @@ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, PCIBus *bus,
pci_get_function_0(pci_dev)) {
error_setg(errp, "PCI: slot %d function 0 already ocuppied by %s,"
" new func %s cannot be exposed to guest.",
PCI_SLOT(devfn),
bus->devices[PCI_DEVFN(PCI_SLOT(devfn), 0)]->name,
PCI_SLOT(pci_get_function_0(pci_dev)->devfn),
pci_get_function_0(pci_dev)->name,
name);
return NULL;
......
......@@ -1098,7 +1098,7 @@ static int virtio_ccw_set_guest_notifier(VirtioCcwDevice *dev, int n,
* We do not support individual masking for channel devices, so we
* need to manually trigger any guest masking callbacks here.
*/
if (k->guest_notifier_mask) {
if (k->guest_notifier_mask && vdev->use_guest_notifier_mask) {
k->guest_notifier_mask(vdev, n, false);
}
/* get lost events and re-inject */
......@@ -1107,7 +1107,7 @@ static int virtio_ccw_set_guest_notifier(VirtioCcwDevice *dev, int n,
event_notifier_set(notifier);
}
} else {
if (k->guest_notifier_mask) {
if (k->guest_notifier_mask && vdev->use_guest_notifier_mask) {
k->guest_notifier_mask(vdev, n, true);
}
if (with_irqfd) {
......
......@@ -402,7 +402,7 @@ static int virtio_mmio_set_guest_notifier(DeviceState *d, int n, bool assign,
event_notifier_cleanup(notifier);
}
if (vdc->guest_notifier_mask) {
if (vdc->guest_notifier_mask && vdev->use_guest_notifier_mask) {
vdc->guest_notifier_mask(vdev, n, !assign);
}
......
......@@ -689,7 +689,7 @@ tests/test-filter-mirror$(EXESUF): tests/test-filter-mirror.o $(qtest-obj-y)
tests/test-filter-redirector$(EXESUF): tests/test-filter-redirector.o $(qtest-obj-y)
tests/test-x86-cpuid-compat$(EXESUF): tests/test-x86-cpuid-compat.o $(qtest-obj-y)
tests/ivshmem-test$(EXESUF): tests/ivshmem-test.o contrib/ivshmem-server/ivshmem-server.o $(libqos-pc-obj-y)
tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o
tests/vhost-user-bridge$(EXESUF): tests/vhost-user-bridge.o contrib/libvhost-user/libvhost-user.o $(test-util-obj-y)
tests/test-uuid$(EXESUF): tests/test-uuid.o $(test-util-obj-y)
tests/test-arm-mptimer$(EXESUF): tests/test-arm-mptimer.o
......
......@@ -30,17 +30,9 @@
#define _FILE_OFFSET_BITS 64
#include "qemu/osdep.h"
#include <sys/socket.h>
#include <sys/un.h>
#include <sys/unistd.h>
#include <sys/eventfd.h>
#include <arpa/inet.h>
#include <netdb.h>
#include <linux/vhost.h>
#include "qemu/atomic.h"
#include "qemu/iov.h"
#include "standard-headers/linux/virtio_net.h"
#include "standard-headers/linux/virtio_ring.h"
#include "contrib/libvhost-user/libvhost-user.h"
#define VHOST_USER_BRIDGE_DEBUG 1
......@@ -64,6 +56,17 @@ typedef struct Dispatcher {
Event events[FD_SETSIZE];
} Dispatcher;
typedef struct VubrDev {
VuDev vudev;
Dispatcher dispatcher;
int backend_udp_sock;
struct sockaddr_in backend_udp_dest;
int hdrlen;
int sock;
int ready;
int quit;
} VubrDev;
static void
vubr_die(const char *s)
{
......@@ -101,8 +104,6 @@ dispatcher_add(Dispatcher *dispr, int sock, void *ctx, CallbackFunc cb)
return 0;
}
/* dispatcher_remove() is not currently in use but may be useful
* in the future. */
static int
dispatcher_remove(Dispatcher *dispr, int sock)
{
......@@ -157,1039 +158,313 @@ dispatcher_wait(Dispatcher *dispr, uint32_t timeout)
return 0;
}
typedef struct VubrVirtq {
int call_fd;
int kick_fd;
uint32_t size;
uint16_t last_avail_index;
uint16_t last_used_index;
struct vring_desc *desc;
struct vring_avail *avail;
struct vring_used *used;
uint64_t log_guest_addr;
int enable;
} VubrVirtq;
/* Based on qemu/hw/virtio/vhost-user.c */
#define VHOST_MEMORY_MAX_NREGIONS 8
#define VHOST_USER_F_PROTOCOL_FEATURES 30
/* v1.0 compliant. */
#define VIRTIO_F_VERSION_1 32
#define VHOST_LOG_PAGE 4096
enum VhostUserProtocolFeature {
VHOST_USER_PROTOCOL_F_MQ = 0,
VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
VHOST_USER_PROTOCOL_F_RARP = 2,
VHOST_USER_PROTOCOL_F_MAX
};
#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
typedef enum VhostUserRequest {
VHOST_USER_NONE = 0,
VHOST_USER_GET_FEATURES = 1,
VHOST_USER_SET_FEATURES = 2,
VHOST_USER_SET_OWNER = 3,
VHOST_USER_RESET_OWNER = 4,
VHOST_USER_SET_MEM_TABLE = 5,
VHOST_USER_SET_LOG_BASE = 6,
VHOST_USER_SET_LOG_FD = 7,
VHOST_USER_SET_VRING_NUM = 8,
VHOST_USER_SET_VRING_ADDR = 9,
VHOST_USER_SET_VRING_BASE = 10,
VHOST_USER_GET_VRING_BASE = 11,
VHOST_USER_SET_VRING_KICK = 12,
VHOST_USER_SET_VRING_CALL = 13,
VHOST_USER_SET_VRING_ERR = 14,
VHOST_USER_GET_PROTOCOL_FEATURES = 15,
VHOST_USER_SET_PROTOCOL_FEATURES = 16,
VHOST_USER_GET_QUEUE_NUM = 17,
VHOST_USER_SET_VRING_ENABLE = 18,
VHOST_USER_SEND_RARP = 19,
VHOST_USER_MAX
} VhostUserRequest;
typedef struct VhostUserMemoryRegion {
uint64_t guest_phys_addr;
uint64_t memory_size;
uint64_t userspace_addr;
uint64_t mmap_offset;
} VhostUserMemoryRegion;
typedef struct VhostUserMemory {
uint32_t nregions;
uint32_t padding;
VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;
typedef struct VhostUserLog {
uint64_t mmap_size;
uint64_t mmap_offset;
} VhostUserLog;
typedef struct VhostUserMsg {
VhostUserRequest request;
#define VHOST_USER_VERSION_MASK (0x3)
#define VHOST_USER_REPLY_MASK (0x1<<2)
uint32_t flags;
uint32_t size; /* the following payload size */
union {
#define VHOST_USER_VRING_IDX_MASK (0xff)
#define VHOST_USER_VRING_NOFD_MASK (0x1<<8)
uint64_t u64;
struct vhost_vring_state state;
struct vhost_vring_addr addr;
VhostUserMemory memory;
VhostUserLog log;
} payload;
int fds[VHOST_MEMORY_MAX_NREGIONS];
int fd_num;
} QEMU_PACKED VhostUserMsg;
#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64)
/* The version of the protocol we support */
#define VHOST_USER_VERSION (0x1)
#define MAX_NR_VIRTQUEUE (8)
typedef struct VubrDevRegion {
/* Guest Physical address. */
uint64_t gpa;
/* Memory region size. */
uint64_t size;
/* QEMU virtual address (userspace). */
uint64_t qva;
/* Starting offset in our mmaped space. */
uint64_t mmap_offset;
/* Start address of mmaped space. */
uint64_t mmap_addr;
} VubrDevRegion;
typedef struct VubrDev {
int sock;
Dispatcher dispatcher;
uint32_t nregions;
VubrDevRegion regions[VHOST_MEMORY_MAX_NREGIONS];
VubrVirtq vq[MAX_NR_VIRTQUEUE];
int log_call_fd;
uint64_t log_size;
uint8_t *log_table;
int backend_udp_sock;
struct sockaddr_in backend_udp_dest;
int ready;
uint64_t features;
int hdrlen;
} VubrDev;
static const char *vubr_request_str[] = {
[VHOST_USER_NONE] = "VHOST_USER_NONE",
[VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES",
[VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES",
[VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER",
[VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER",
[VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE",
[VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE",
[VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD",
[VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM",
[VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR",
[VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE",
[VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE",
[VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK",
[VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL",
[VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR",
[VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES",
[VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES",
[VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM",
[VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE",
[VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP",
[VHOST_USER_MAX] = "VHOST_USER_MAX",
};
static void
print_buffer(uint8_t *buf, size_t len)
{
int i;
printf("Raw buffer:\n");
for (i = 0; i < len; i++) {
if (i % 16 == 0) {
printf("\n");
}
if (i % 4 == 0) {
printf(" ");
}
printf("%02x ", buf[i]);
}
printf("\n............................................................\n");
}
/* Translate guest physical address to our virtual address. */
static uint64_t
gpa_to_va(VubrDev *dev, uint64_t guest_addr)
{
int i;
/* Find matching memory region. */
for (i = 0; i < dev->nregions; i++) {
VubrDevRegion *r = &dev->regions[i];
if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) {
return guest_addr - r->gpa + r->mmap_addr + r->mmap_offset;
}
}
assert(!"address not found in regions");
return 0;
}
/* Translate qemu virtual address to our virtual address. */
static uint64_t
qva_to_va(VubrDev *dev, uint64_t qemu_addr)
{
int i;
/* Find matching memory region. */
for (i = 0; i < dev->nregions; i++) {
VubrDevRegion *r = &dev->regions[i];
if ((qemu_addr >= r->qva) && (qemu_addr < (r->qva + r->size))) {
return qemu_addr - r->qva + r->mmap_addr + r->mmap_offset;
}
}
assert(!"address not found in regions");
return 0;
}
static void
vubr_message_read(int conn_fd, VhostUserMsg *vmsg)
vubr_handle_tx(VuDev *dev, int qidx)
{
char control[CMSG_SPACE(VHOST_MEMORY_MAX_NREGIONS * sizeof(int))] = { };
struct iovec iov = {
.iov_base = (char *)vmsg,
.iov_len = VHOST_USER_HDR_SIZE,
};
struct msghdr msg = {
.msg_iov = &iov,
.msg_iovlen = 1,
.msg_control = control,
.msg_controllen = sizeof(control),
};
size_t fd_size;
struct cmsghdr *cmsg;
int rc;
VuVirtq *vq = vu_get_queue(dev, qidx);
VubrDev *vubr = container_of(dev, VubrDev, vudev);
int hdrlen = vubr->hdrlen;
VuVirtqElement *elem = NULL;
rc = recvmsg(conn_fd, &msg, 0);
assert(qidx % 2);
if (rc == 0) {
vubr_die("recvmsg");
fprintf(stderr, "Peer disconnected.\n");
exit(1);
}
if (rc < 0) {
vubr_die("recvmsg");
}
for (;;) {
ssize_t ret;
unsigned int out_num;
struct iovec sg[VIRTQUEUE_MAX_SIZE], *out_sg;
vmsg->fd_num = 0;
for (cmsg = CMSG_FIRSTHDR(&msg);
cmsg != NULL;
cmsg = CMSG_NXTHDR(&msg, cmsg))
{
if (cmsg->cmsg_level == SOL_SOCKET && cmsg->cmsg_type == SCM_RIGHTS) {
fd_size = cmsg->cmsg_len - CMSG_LEN(0);
vmsg->fd_num = fd_size / sizeof(int);
memcpy(vmsg->fds, CMSG_DATA(cmsg), fd_size);
elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
if (!elem) {
break;
}
}
if (vmsg->size > sizeof(vmsg->payload)) {
fprintf(stderr,
"Error: too big message request: %d, size: vmsg->size: %u, "
"while sizeof(vmsg->payload) = %zu\n",
vmsg->request, vmsg->size, sizeof(vmsg->payload));
exit(1);
}
if (vmsg->size) {
rc = read(conn_fd, &vmsg->payload, vmsg->size);
if (rc == 0) {
vubr_die("recvmsg");
fprintf(stderr, "Peer disconnected.\n");
exit(1);
out_num = elem->out_num;
out_sg = elem->out_sg;
if (out_num < 1) {
fprintf(stderr, "virtio-net header not in first element\n");
break;
}
if (rc < 0) {
vubr_die("recvmsg");
if (VHOST_USER_BRIDGE_DEBUG) {
iov_hexdump(out_sg, out_num, stderr, "TX:", 1024);
}
assert(rc == vmsg->size);
if (hdrlen) {
unsigned sg_num = iov_copy(sg, ARRAY_SIZE(sg),
out_sg, out_num,
hdrlen, -1);
out_num = sg_num;
out_sg = sg;
}
}
static void
vubr_message_write(int conn_fd, VhostUserMsg *vmsg)
{
int rc;
struct msghdr msg = {
.msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
.msg_namelen = sizeof(struct sockaddr_in),
.msg_iov = out_sg,
.msg_iovlen = out_num,
};
do {
rc = write(conn_fd, vmsg, VHOST_USER_HDR_SIZE + vmsg->size);
} while (rc < 0 && errno == EINTR);
if (rc < 0) {
vubr_die("write");
}
}
ret = sendmsg(vubr->backend_udp_sock, &msg, 0);
} while (ret == -1 && (errno == EAGAIN || errno == EINTR));
static void
vubr_backend_udp_sendbuf(VubrDev *dev, uint8_t *buf, size_t len)
{
int slen = sizeof(struct sockaddr_in);
if (sendto(dev->backend_udp_sock, buf, len, 0,
(struct sockaddr *) &dev->backend_udp_dest, slen) == -1) {
vubr_die("sendto()");
if (ret == -1) {
vubr_die("sendmsg()");
}
}
static int
vubr_backend_udp_recvbuf(VubrDev *dev, uint8_t *buf, size_t buflen)
{
int slen = sizeof(struct sockaddr_in);
int rc;
vu_queue_push(dev, vq, elem, 0);
vu_queue_notify(dev, vq);
rc = recvfrom(dev->backend_udp_sock, buf, buflen, 0,
(struct sockaddr *) &dev->backend_udp_dest,
(socklen_t *)&slen);
if (rc == -1) {
vubr_die("recvfrom()");
free(elem);
elem = NULL;
}
return rc;
free(elem);
}
static void
vubr_consume_raw_packet(VubrDev *dev, uint8_t *buf, uint32_t len)
iov_restore_front(struct iovec *front, struct iovec *iov, size_t bytes)
{
int hdrlen = dev->hdrlen;
DPRINT(" hdrlen = %d\n", dev->hdrlen);
struct iovec *cur;
if (VHOST_USER_BRIDGE_DEBUG) {
print_buffer(buf, len);
for (cur = front; front != iov; cur++) {
bytes -= cur->iov_len;
}
vubr_backend_udp_sendbuf(dev, buf + hdrlen, len - hdrlen);
}
/* Kick the log_call_fd if required. */
static void
vubr_log_kick(VubrDev *dev)
{
if (dev->log_call_fd != -1) {
DPRINT("Kicking the QEMU's log...\n");
eventfd_write(dev->log_call_fd, 1);
}
cur->iov_base -= bytes;
cur->iov_len += bytes;
}
/* Kick the guest if necessary. */
static void
vubr_virtqueue_kick(VubrVirtq *vq)
iov_truncate(struct iovec *iov, unsigned iovc, size_t bytes)
{
if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT)) {
DPRINT("Kicking the guest...\n");
eventfd_write(vq->call_fd, 1);
}
}
static void
vubr_log_page(uint8_t *log_table, uint64_t page)
{
DPRINT("Logged dirty guest page: %"PRId64"\n", page);
atomic_or(&log_table[page / 8], 1 << (page % 8));
}
unsigned i;
static void
vubr_log_write(VubrDev *dev, uint64_t address, uint64_t length)
{
uint64_t page;
if (!(dev->features & (1ULL << VHOST_F_LOG_ALL)) ||
!dev->log_table || !length) {
for (i = 0; i < iovc; i++, iov++) {
if (bytes < iov->iov_len) {
iov->iov_len = bytes;
return;
}
assert(dev->log_size > ((address + length - 1) / VHOST_LOG_PAGE / 8));
page = address / VHOST_LOG_PAGE;
while (page * VHOST_LOG_PAGE < address + length) {
vubr_log_page(dev->log_table, page);
page += VHOST_LOG_PAGE;
bytes -= iov->iov_len;
}
vubr_log_kick(dev);
assert(!"couldn't truncate iov");
}
static void
vubr_post_buffer(VubrDev *dev, VubrVirtq *vq, uint8_t *buf, int32_t len)
vubr_backend_recv_cb(int sock, void *ctx)
{
struct vring_desc *desc = vq->desc;
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
uint64_t log_guest_addr = vq->log_guest_addr;
int32_t remaining_len = len;
unsigned int size = vq->size;
uint16_t avail_index = atomic_mb_read(&avail->idx);
/* We check the available descriptors before posting the
* buffer, so here we assume that enough available
* descriptors. */
assert(vq->last_avail_index != avail_index);
uint16_t a_index = vq->last_avail_index % size;
uint16_t u_index = vq->last_used_index % size;
uint16_t d_index = avail->ring[a_index];
int i = d_index;
uint32_t written_len = 0;
do {
DPRINT("Post packet to guest on vq:\n");
DPRINT(" size = %d\n", vq->size);
DPRINT(" last_avail_index = %d\n", vq->last_avail_index);
DPRINT(" last_used_index = %d\n", vq->last_used_index);
DPRINT(" a_index = %d\n", a_index);
DPRINT(" u_index = %d\n", u_index);
DPRINT(" d_index = %d\n", d_index);
DPRINT(" desc[%d].addr = 0x%016"PRIx64"\n", i, desc[i].addr);
DPRINT(" desc[%d].len = %d\n", i, desc[i].len);
DPRINT(" desc[%d].flags = %d\n", i, desc[i].flags);
DPRINT(" avail->idx = %d\n", avail_index);
DPRINT(" used->idx = %d\n", used->idx);
if (!(desc[i].flags & VRING_DESC_F_WRITE)) {
/* FIXME: we should find writable descriptor. */
fprintf(stderr, "Error: descriptor is not writable. Exiting.\n");
exit(1);
}
void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
uint32_t chunk_len = desc[i].len;
uint32_t chunk_write_len = MIN(remaining_len, chunk_len);
memcpy(chunk_start, buf + written_len, chunk_write_len);
vubr_log_write(dev, desc[i].addr, chunk_write_len);
remaining_len -= chunk_write_len;
written_len += chunk_write_len;
if ((remaining_len == 0) || !(desc[i].flags & VRING_DESC_F_NEXT)) {
break;
}
VubrDev *vubr = (VubrDev *) ctx;
VuDev *dev = &vubr->vudev;
VuVirtq *vq = vu_get_queue(dev, 0);
VuVirtqElement *elem = NULL;
struct iovec mhdr_sg[VIRTQUEUE_MAX_SIZE];
struct virtio_net_hdr_mrg_rxbuf mhdr;
unsigned mhdr_cnt = 0;
int hdrlen = vubr->hdrlen;
int i = 0;
struct virtio_net_hdr hdr = {
.flags = 0,
.gso_type = VIRTIO_NET_HDR_GSO_NONE
};
i = desc[i].next;
} while (1);
DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
DPRINT(" hdrlen = %d\n", hdrlen);
if (remaining_len > 0) {
fprintf(stderr,
"Too long packet for RX, remaining_len = %d, Dropping...\n",
remaining_len);
if (!vu_queue_enabled(dev, vq) ||
!vu_queue_avail_bytes(dev, vq, hdrlen, 0)) {
DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
return;
}
/* Add descriptor to the used ring. */
used->ring[u_index].id = d_index;
used->ring[u_index].len = len;
vubr_log_write(dev,
log_guest_addr + offsetof(struct vring_used, ring[u_index]),
sizeof(used->ring[u_index]));
vq->last_avail_index++;
vq->last_used_index++;
atomic_mb_set(&used->idx, vq->last_used_index);
vubr_log_write(dev,
log_guest_addr + offsetof(struct vring_used, idx),
sizeof(used->idx));
/* Kick the guest if necessary. */
vubr_virtqueue_kick(vq);
}
static int
vubr_process_desc(VubrDev *dev, VubrVirtq *vq)
{
struct vring_desc *desc = vq->desc;
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
uint64_t log_guest_addr = vq->log_guest_addr;
unsigned int size = vq->size;
uint16_t a_index = vq->last_avail_index % size;
uint16_t u_index = vq->last_used_index % size;
uint16_t d_index = avail->ring[a_index];
uint32_t i, len = 0;
size_t buf_size = 4096;
uint8_t buf[4096];
DPRINT("Chunks: ");
i = d_index;
do {
void *chunk_start = (void *)(uintptr_t)gpa_to_va(dev, desc[i].addr);
uint32_t chunk_len = desc[i].len;
struct iovec *sg;
ssize_t ret, total = 0;
unsigned int num;
assert(!(desc[i].flags & VRING_DESC_F_WRITE));
if (len + chunk_len < buf_size) {
memcpy(buf + len, chunk_start, chunk_len);
DPRINT("%d ", chunk_len);
} else {
fprintf(stderr, "Error: too long packet. Dropping...\n");
elem = vu_queue_pop(dev, vq, sizeof(VuVirtqElement));
if (!elem) {
break;
}
len += chunk_len;
if (!(desc[i].flags & VRING_DESC_F_NEXT)) {
if (elem->in_num < 1) {
fprintf(stderr, "virtio-net contains no in buffers\n");
break;
}
i = desc[i].next;
} while (1);
DPRINT("\n");
if (!len) {
return -1;
sg = elem->in_sg;
num = elem->in_num;
if (i == 0) {
if (hdrlen == 12) {
mhdr_cnt = iov_copy(mhdr_sg, ARRAY_SIZE(mhdr_sg),
sg, elem->in_num,
offsetof(typeof(mhdr), num_buffers),
sizeof(mhdr.num_buffers));
}
iov_from_buf(sg, elem->in_num, 0, &hdr, sizeof hdr);
total += hdrlen;
assert(iov_discard_front(&sg, &num, hdrlen) == hdrlen);
}
/* Add descriptor to the used ring. */
used->ring[u_index].id = d_index;
used->ring[u_index].len = len;
vubr_log_write(dev,
log_guest_addr + offsetof(struct vring_used, ring[u_index]),
sizeof(used->ring[u_index]));
vubr_consume_raw_packet(dev, buf, len);
return 0;
}
struct msghdr msg = {
.msg_name = (struct sockaddr *) &vubr->backend_udp_dest,
.msg_namelen = sizeof(struct sockaddr_in),
.msg_iov = sg,
.msg_iovlen = elem->in_num,
.msg_flags = MSG_DONTWAIT,
};
do {
ret = recvmsg(vubr->backend_udp_sock, &msg, 0);
} while (ret == -1 && (errno == EINTR));
static void
vubr_process_avail(VubrDev *dev, VubrVirtq *vq)
{
struct vring_avail *avail = vq->avail;
struct vring_used *used = vq->used;
uint64_t log_guest_addr = vq->log_guest_addr;
while (vq->last_avail_index != atomic_mb_read(&avail->idx)) {
vubr_process_desc(dev, vq);
vq->last_avail_index++;
vq->last_used_index++;
if (i == 0) {
iov_restore_front(elem->in_sg, sg, hdrlen);
}
atomic_mb_set(&used->idx, vq->last_used_index);
vubr_log_write(dev,
log_guest_addr + offsetof(struct vring_used, idx),
sizeof(used->idx));
}
if (ret == -1) {
if (errno == EWOULDBLOCK) {
vu_queue_rewind(dev, vq, 1);
break;
}
static void
vubr_backend_recv_cb(int sock, void *ctx)
{
VubrDev *dev = (VubrDev *) ctx;
VubrVirtq *rx_vq = &dev->vq[0];
uint8_t buf[4096];
struct virtio_net_hdr_v1 *hdr = (struct virtio_net_hdr_v1 *)buf;
int hdrlen = dev->hdrlen;
int buflen = sizeof(buf);
int len;
if (!dev->ready) {
return;
vubr_die("recvmsg()");
}
DPRINT("\n\n *** IN UDP RECEIVE CALLBACK ***\n\n");
DPRINT(" hdrlen = %d\n", hdrlen);
total += ret;
iov_truncate(elem->in_sg, elem->in_num, total);
vu_queue_fill(dev, vq, elem, total, i++);
uint16_t avail_index = atomic_mb_read(&rx_vq->avail->idx);
free(elem);
elem = NULL;
} while (false); /* could loop if DONTWAIT worked? */
/* If there is no available descriptors, just do nothing.
* The buffer will be handled by next arrived UDP packet,
* or next kick on receive virtq. */
if (rx_vq->last_avail_index == avail_index) {
DPRINT("Got UDP packet, but no available descriptors on RX virtq.\n");
return;
if (mhdr_cnt) {
mhdr.num_buffers = i;
iov_from_buf(mhdr_sg, mhdr_cnt,
0,
&mhdr.num_buffers, sizeof mhdr.num_buffers);
}
memset(buf, 0, hdrlen);
/* TODO: support mergeable buffers. */
if (hdrlen == 12)
hdr->num_buffers = 1;
len = vubr_backend_udp_recvbuf(dev, buf + hdrlen, buflen - hdrlen);
vu_queue_flush(dev, vq, i);
vu_queue_notify(dev, vq);
vubr_post_buffer(dev, rx_vq, buf, len + hdrlen);
free(elem);
}
static void
vubr_kick_cb(int sock, void *ctx)
vubr_receive_cb(int sock, void *ctx)
{
VubrDev *dev = (VubrDev *) ctx;
eventfd_t kick_data;
ssize_t rc;
VubrDev *vubr = (VubrDev *)ctx;
rc = eventfd_read(sock, &kick_data);
if (rc == -1) {
vubr_die("eventfd_read()");
} else {
DPRINT("Got kick_data: %016"PRIx64"\n", kick_data);
vubr_process_avail(dev, &dev->vq[1]);
if (!vu_dispatch(&vubr->vudev)) {
fprintf(stderr, "Error while dispatching\n");
}
}
static int
vubr_none_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("Function %s() not implemented yet.\n", __func__);
return 0;
}
typedef struct WatchData {
VuDev *dev;
vu_watch_cb cb;
void *data;
} WatchData;
static int
vubr_get_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
static void
watch_cb(int sock, void *ctx)
{
vmsg->payload.u64 =
((1ULL << VIRTIO_NET_F_MRG_RXBUF) |
(1ULL << VHOST_F_LOG_ALL) |
(1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) |
(1ULL << VHOST_USER_F_PROTOCOL_FEATURES));
vmsg->size = sizeof(vmsg->payload.u64);
struct WatchData *wd = ctx;
DPRINT("Sending back to guest u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
/* Reply */
return 1;
wd->cb(wd->dev, VU_WATCH_IN, wd->data);
}
static int
vubr_set_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
static void
vubr_set_watch(VuDev *dev, int fd, int condition,
vu_watch_cb cb, void *data)
{
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
dev->features = vmsg->payload.u64;
if ((dev->features & (1ULL << VIRTIO_F_VERSION_1)) ||
(dev->features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
dev->hdrlen = 12;
} else {
dev->hdrlen = 10;
}
VubrDev *vubr = container_of(dev, VubrDev, vudev);
static WatchData watches[FD_SETSIZE];
struct WatchData *wd = &watches[fd];
return 0;
}
static int
vubr_set_owner_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
return 0;
wd->cb = cb;
wd->data = data;
wd->dev = dev;
dispatcher_add(&vubr->dispatcher, fd, wd, watch_cb);
}
static void
vubr_close_log(VubrDev *dev)
vubr_remove_watch(VuDev *dev, int fd)
{
if (dev->log_table) {
if (munmap(dev->log_table, dev->log_size) != 0) {
vubr_die("munmap()");
}
VubrDev *vubr = container_of(dev, VubrDev, vudev);
dev->log_table = 0;
}
if (dev->log_call_fd != -1) {
close(dev->log_call_fd);
dev->log_call_fd = -1;
}
}
static int
vubr_reset_device_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
vubr_close_log(dev);
dev->ready = 0;
dev->features = 0;
return 0;
dispatcher_remove(&vubr->dispatcher, fd);
}
static int
vubr_set_mem_table_exec(VubrDev *dev, VhostUserMsg *vmsg)
vubr_send_rarp_exec(VuDev *dev, VhostUserMsg *vmsg)
{
int i;
VhostUserMemory *memory = &vmsg->payload.memory;
dev->nregions = memory->nregions;
DPRINT("Nregions: %d\n", memory->nregions);
for (i = 0; i < dev->nregions; i++) {
void *mmap_addr;
VhostUserMemoryRegion *msg_region = &memory->regions[i];
VubrDevRegion *dev_region = &dev->regions[i];
DPRINT("Region %d\n", i);
DPRINT(" guest_phys_addr: 0x%016"PRIx64"\n",
msg_region->guest_phys_addr);
DPRINT(" memory_size: 0x%016"PRIx64"\n",
msg_region->memory_size);
DPRINT(" userspace_addr 0x%016"PRIx64"\n",
msg_region->userspace_addr);
DPRINT(" mmap_offset 0x%016"PRIx64"\n",
msg_region->mmap_offset);
dev_region->gpa = msg_region->guest_phys_addr;
dev_region->size = msg_region->memory_size;
dev_region->qva = msg_region->userspace_addr;
dev_region->mmap_offset = msg_region->mmap_offset;
/* We don't use offset argument of mmap() since the
* mapped address has to be page aligned, and we use huge
* pages. */
mmap_addr = mmap(0, dev_region->size + dev_region->mmap_offset,
PROT_READ | PROT_WRITE, MAP_SHARED,
vmsg->fds[i], 0);
if (mmap_addr == MAP_FAILED) {
vubr_die("mmap");
}
dev_region->mmap_addr = (uint64_t)(uintptr_t)mmap_addr;
DPRINT(" mmap_addr: 0x%016"PRIx64"\n", dev_region->mmap_addr);
close(vmsg->fds[i]);
}
DPRINT("Function %s() not implemented yet.\n", __func__);
return 0;
}
static int
vubr_set_log_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
vubr_process_msg(VuDev *dev, VhostUserMsg *vmsg, int *do_reply)
{
int fd;
uint64_t log_mmap_size, log_mmap_offset;
void *rc;
assert(vmsg->fd_num == 1);
fd = vmsg->fds[0];
assert(vmsg->size == sizeof(vmsg->payload.log));
log_mmap_offset = vmsg->payload.log.mmap_offset;
log_mmap_size = vmsg->payload.log.mmap_size;
DPRINT("Log mmap_offset: %"PRId64"\n", log_mmap_offset);
DPRINT("Log mmap_size: %"PRId64"\n", log_mmap_size);
rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd,
log_mmap_offset);
if (rc == MAP_FAILED) {
vubr_die("mmap");
}
dev->log_table = rc;
dev->log_size = log_mmap_size;
vmsg->size = sizeof(vmsg->payload.u64);
/* Reply */
switch (vmsg->request) {
case VHOST_USER_SEND_RARP:
*do_reply = vubr_send_rarp_exec(dev, vmsg);
return 1;
}
static int
vubr_set_log_fd_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
assert(vmsg->fd_num == 1);
dev->log_call_fd = vmsg->fds[0];
DPRINT("Got log_call_fd: %d\n", vmsg->fds[0]);
return 0;
}
static int
vubr_set_vring_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
unsigned int index = vmsg->payload.state.index;
unsigned int num = vmsg->payload.state.num;
DPRINT("State.index: %d\n", index);
DPRINT("State.num: %d\n", num);
dev->vq[index].size = num;
default:
/* let the library handle the rest */
return 0;
}
static int
vubr_set_vring_addr_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
struct vhost_vring_addr *vra = &vmsg->payload.addr;
unsigned int index = vra->index;
VubrVirtq *vq = &dev->vq[index];
DPRINT("vhost_vring_addr:\n");
DPRINT(" index: %d\n", vra->index);
DPRINT(" flags: %d\n", vra->flags);
DPRINT(" desc_user_addr: 0x%016llx\n", vra->desc_user_addr);
DPRINT(" used_user_addr: 0x%016llx\n", vra->used_user_addr);
DPRINT(" avail_user_addr: 0x%016llx\n", vra->avail_user_addr);
DPRINT(" log_guest_addr: 0x%016llx\n", vra->log_guest_addr);
vq->desc = (struct vring_desc *)(uintptr_t)qva_to_va(dev, vra->desc_user_addr);
vq->used = (struct vring_used *)(uintptr_t)qva_to_va(dev, vra->used_user_addr);
vq->avail = (struct vring_avail *)(uintptr_t)qva_to_va(dev, vra->avail_user_addr);
vq->log_guest_addr = vra->log_guest_addr;
DPRINT("Setting virtq addresses:\n");
DPRINT(" vring_desc at %p\n", vq->desc);
DPRINT(" vring_used at %p\n", vq->used);
DPRINT(" vring_avail at %p\n", vq->avail);
vq->last_used_index = vq->used->idx;
if (vq->last_avail_index != vq->used->idx) {
DPRINT("Last avail index != used index: %d != %d, resuming",
vq->last_avail_index, vq->used->idx);
vq->last_avail_index = vq->used->idx;
}
return 0;
}
static int
vubr_set_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
static void
vubr_set_features(VuDev *dev, uint64_t features)
{
unsigned int index = vmsg->payload.state.index;
unsigned int num = vmsg->payload.state.num;
DPRINT("State.index: %d\n", index);
DPRINT("State.num: %d\n", num);
dev->vq[index].last_avail_index = num;
VubrDev *vubr = container_of(dev, VubrDev, vudev);
return 0;
}
static int
vubr_get_vring_base_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
unsigned int index = vmsg->payload.state.index;
DPRINT("State.index: %d\n", index);
vmsg->payload.state.num = dev->vq[index].last_avail_index;
vmsg->size = sizeof(vmsg->payload.state);
/* FIXME: this is a work-around for a bug in QEMU enabling
* too early vrings. When protocol features are enabled,
* we have to respect * VHOST_USER_SET_VRING_ENABLE request. */
dev->ready = 0;
if (dev->vq[index].call_fd != -1) {
close(dev->vq[index].call_fd);
dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
dev->vq[index].call_fd = -1;
}
if (dev->vq[index].kick_fd != -1) {
close(dev->vq[index].kick_fd);
dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
dev->vq[index].kick_fd = -1;
if ((features & (1ULL << VIRTIO_F_VERSION_1)) ||
(features & (1ULL << VIRTIO_NET_F_MRG_RXBUF))) {
vubr->hdrlen = 12;
} else {
vubr->hdrlen = 10;
}
/* Reply */
return 1;
}
static int
vubr_set_vring_kick_exec(VubrDev *dev, VhostUserMsg *vmsg)
static uint64_t
vubr_get_features(VuDev *dev)
{
uint64_t u64_arg = vmsg->payload.u64;
int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
assert(vmsg->fd_num == 1);
if (dev->vq[index].kick_fd != -1) {
close(dev->vq[index].kick_fd);
dispatcher_remove(&dev->dispatcher, dev->vq[index].kick_fd);
}
dev->vq[index].kick_fd = vmsg->fds[0];
DPRINT("Got kick_fd: %d for vq: %d\n", vmsg->fds[0], index);
if (index % 2 == 1) {
/* TX queue. */
dispatcher_add(&dev->dispatcher, dev->vq[index].kick_fd,
dev, vubr_kick_cb);
DPRINT("Waiting for kicks on fd: %d for vq: %d\n",
dev->vq[index].kick_fd, index);
}
/* We temporarily use this hack to determine that both TX and RX
* queues are set up and ready for processing.
* FIXME: we need to rely in VHOST_USER_SET_VRING_ENABLE and
* actual kicks. */
if (dev->vq[0].kick_fd != -1 &&
dev->vq[1].kick_fd != -1) {
dev->ready = 1;
DPRINT("vhost-user-bridge is ready for processing queues.\n");
}
return 0;
return 1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE |
1ULL << VIRTIO_NET_F_MRG_RXBUF;
}
static int
vubr_set_vring_call_exec(VubrDev *dev, VhostUserMsg *vmsg)
static void
vubr_queue_set_started(VuDev *dev, int qidx, bool started)
{
uint64_t u64_arg = vmsg->payload.u64;
int index = u64_arg & VHOST_USER_VRING_IDX_MASK;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
assert((u64_arg & VHOST_USER_VRING_NOFD_MASK) == 0);
assert(vmsg->fd_num == 1);
VuVirtq *vq = vu_get_queue(dev, qidx);
if (dev->vq[index].call_fd != -1) {
close(dev->vq[index].call_fd);
dispatcher_remove(&dev->dispatcher, dev->vq[index].call_fd);
if (qidx % 2 == 1) {
vu_set_queue_handler(dev, vq, started ? vubr_handle_tx : NULL);
}
dev->vq[index].call_fd = vmsg->fds[0];
DPRINT("Got call_fd: %d for vq: %d\n", vmsg->fds[0], index);
return 0;
}
static int
vubr_set_vring_err_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
return 0;
}
static int
vubr_get_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
vmsg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD;
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
vmsg->size = sizeof(vmsg->payload.u64);
/* Reply */
return 1;
}
static int
vubr_set_protocol_features_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
/* FIXME: unimplented */
DPRINT("u64: 0x%016"PRIx64"\n", vmsg->payload.u64);
return 0;
}
static int
vubr_get_queue_num_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("Function %s() not implemented yet.\n", __func__);
return 0;
}
static int
vubr_set_vring_enable_exec(VubrDev *dev, VhostUserMsg *vmsg)
static void
vubr_panic(VuDev *dev, const char *msg)
{
unsigned int index = vmsg->payload.state.index;
unsigned int enable = vmsg->payload.state.num;
VubrDev *vubr = container_of(dev, VubrDev, vudev);
DPRINT("State.index: %d\n", index);
DPRINT("State.enable: %d\n", enable);
dev->vq[index].enable = enable;
return 0;
}
fprintf(stderr, "PANIC: %s\n", msg);
static int
vubr_send_rarp_exec(VubrDev *dev, VhostUserMsg *vmsg)
{
DPRINT("Function %s() not implemented yet.\n", __func__);
return 0;
dispatcher_remove(&vubr->dispatcher, dev->sock);
vubr->quit = 1;
}
static int
vubr_execute_request(VubrDev *dev, VhostUserMsg *vmsg)
{
/* Print out generic part of the request. */
DPRINT(
"================== Vhost user message from QEMU ==================\n");
DPRINT("Request: %s (%d)\n", vubr_request_str[vmsg->request],
vmsg->request);
DPRINT("Flags: 0x%x\n", vmsg->flags);
DPRINT("Size: %d\n", vmsg->size);
if (vmsg->fd_num) {
int i;
DPRINT("Fds:");
for (i = 0; i < vmsg->fd_num; i++) {
DPRINT(" %d", vmsg->fds[i]);
}
DPRINT("\n");
}
switch (vmsg->request) {
case VHOST_USER_NONE:
return vubr_none_exec(dev, vmsg);
case VHOST_USER_GET_FEATURES:
return vubr_get_features_exec(dev, vmsg);
case VHOST_USER_SET_FEATURES:
return vubr_set_features_exec(dev, vmsg);
case VHOST_USER_SET_OWNER:
return vubr_set_owner_exec(dev, vmsg);
case VHOST_USER_RESET_OWNER:
return vubr_reset_device_exec(dev, vmsg);
case VHOST_USER_SET_MEM_TABLE:
return vubr_set_mem_table_exec(dev, vmsg);
case VHOST_USER_SET_LOG_BASE:
return vubr_set_log_base_exec(dev, vmsg);
case VHOST_USER_SET_LOG_FD:
return vubr_set_log_fd_exec(dev, vmsg);
case VHOST_USER_SET_VRING_NUM:
return vubr_set_vring_num_exec(dev, vmsg);
case VHOST_USER_SET_VRING_ADDR:
return vubr_set_vring_addr_exec(dev, vmsg);
case VHOST_USER_SET_VRING_BASE:
return vubr_set_vring_base_exec(dev, vmsg);
case VHOST_USER_GET_VRING_BASE:
return vubr_get_vring_base_exec(dev, vmsg);
case VHOST_USER_SET_VRING_KICK:
return vubr_set_vring_kick_exec(dev, vmsg);
case VHOST_USER_SET_VRING_CALL:
return vubr_set_vring_call_exec(dev, vmsg);
case VHOST_USER_SET_VRING_ERR:
return vubr_set_vring_err_exec(dev, vmsg);
case VHOST_USER_GET_PROTOCOL_FEATURES:
return vubr_get_protocol_features_exec(dev, vmsg);
case VHOST_USER_SET_PROTOCOL_FEATURES:
return vubr_set_protocol_features_exec(dev, vmsg);
case VHOST_USER_GET_QUEUE_NUM:
return vubr_get_queue_num_exec(dev, vmsg);
case VHOST_USER_SET_VRING_ENABLE:
return vubr_set_vring_enable_exec(dev, vmsg);
case VHOST_USER_SEND_RARP:
return vubr_send_rarp_exec(dev, vmsg);
case VHOST_USER_MAX:
assert(vmsg->request != VHOST_USER_MAX);
}
return 0;
}
static void
vubr_receive_cb(int sock, void *ctx)
{
VubrDev *dev = (VubrDev *) ctx;
VhostUserMsg vmsg;
int reply_requested;
vubr_message_read(sock, &vmsg);
reply_requested = vubr_execute_request(dev, &vmsg);
if (reply_requested) {
/* Set the version in the flags when sending the reply */
vmsg.flags &= ~VHOST_USER_VERSION_MASK;
vmsg.flags |= VHOST_USER_VERSION;
vmsg.flags |= VHOST_USER_REPLY_MASK;
vubr_message_write(sock, &vmsg);
}
}
static const VuDevIface vuiface = {
.get_features = vubr_get_features,
.set_features = vubr_set_features,
.process_msg = vubr_process_msg,
.queue_set_started = vubr_queue_set_started,
};
static void
vubr_accept_cb(int sock, void *ctx)
......@@ -1204,36 +479,26 @@ vubr_accept_cb(int sock, void *ctx)
vubr_die("accept()");
}
DPRINT("Got connection from remote peer on sock %d\n", conn_fd);
vu_init(&dev->vudev,
conn_fd,
vubr_panic,
vubr_set_watch,
vubr_remove_watch,
&vuiface);
dispatcher_add(&dev->dispatcher, conn_fd, ctx, vubr_receive_cb);
dispatcher_remove(&dev->dispatcher, sock);
}
static VubrDev *
vubr_new(const char *path, bool client)
{
VubrDev *dev = (VubrDev *) calloc(1, sizeof(VubrDev));
dev->nregions = 0;
int i;
struct sockaddr_un un;
CallbackFunc cb;
size_t len;
for (i = 0; i < MAX_NR_VIRTQUEUE; i++) {
dev->vq[i] = (VubrVirtq) {
.call_fd = -1, .kick_fd = -1,
.size = 0,
.last_avail_index = 0, .last_used_index = 0,
.desc = 0, .avail = 0, .used = 0,
.enable = 0,
};
}
/* Init log */
dev->log_call_fd = -1;
dev->log_size = 0;
dev->log_table = 0;
dev->ready = 0;
dev->features = 0;
/* Get a UNIX socket. */
dev->sock = socket(AF_UNIX, SOCK_STREAM, 0);
if (dev->sock == -1) {
......@@ -1261,10 +526,17 @@ vubr_new(const char *path, bool client)
if (connect(dev->sock, (struct sockaddr *)&un, len) == -1) {
vubr_die("connect");
}
vu_init(&dev->vudev,
dev->sock,
vubr_panic,
vubr_set_watch,
vubr_remove_watch,
&vuiface);
cb = vubr_receive_cb;
}
dispatcher_init(&dev->dispatcher);
dispatcher_add(&dev->dispatcher, dev->sock, (void *)dev, cb);
return dev;
......@@ -1345,7 +617,7 @@ vubr_backend_udp_setup(VubrDev *dev,
static void
vubr_run(VubrDev *dev)
{
while (1) {
while (!dev->quit) {
/* timeout 200ms */
dispatcher_wait(&dev->dispatcher, 200000);
/* Here one can try polling strategy. */
......@@ -1421,6 +693,9 @@ main(int argc, char *argv[])
vubr_backend_udp_setup(dev, lhost, lport, rhost, rport);
vubr_run(dev);
vu_deinit(&dev->vudev);
return 0;
out:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册