vhost-user.c 11.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
/*
 * vhost-user
 *
 * Copyright (c) 2013 Virtual Open Systems Sarl.
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

#include "hw/virtio/vhost.h"
#include "hw/virtio/vhost-backend.h"
#include "sysemu/char.h"
#include "sysemu/kvm.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
17
#include "exec/ram_addr.h"
18 19 20 21 22 23 24 25 26

#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <linux/vhost.h>

#define VHOST_MEMORY_MAX_NREGIONS    8
27
#define VHOST_USER_F_PROTOCOL_FEATURES 30
28 29 30
#define VHOST_USER_PROTOCOL_FEATURE_MASK 0x1ULL

#define VHOST_USER_PROTOCOL_F_MQ    0
31 32 33 34 35 36

typedef enum VhostUserRequest {
    VHOST_USER_NONE = 0,
    VHOST_USER_GET_FEATURES = 1,
    VHOST_USER_SET_FEATURES = 2,
    VHOST_USER_SET_OWNER = 3,
37
    VHOST_USER_RESET_DEVICE = 4,
38 39 40 41 42 43 44 45 46 47
    VHOST_USER_SET_MEM_TABLE = 5,
    VHOST_USER_SET_LOG_BASE = 6,
    VHOST_USER_SET_LOG_FD = 7,
    VHOST_USER_SET_VRING_NUM = 8,
    VHOST_USER_SET_VRING_ADDR = 9,
    VHOST_USER_SET_VRING_BASE = 10,
    VHOST_USER_GET_VRING_BASE = 11,
    VHOST_USER_SET_VRING_KICK = 12,
    VHOST_USER_SET_VRING_CALL = 13,
    VHOST_USER_SET_VRING_ERR = 14,
48 49
    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
50
    VHOST_USER_GET_QUEUE_NUM = 17,
51 52 53 54 55 56 57
    VHOST_USER_MAX
} VhostUserRequest;

typedef struct VhostUserMemoryRegion {
    uint64_t guest_phys_addr;
    uint64_t memory_size;
    uint64_t userspace_addr;
58
    uint64_t mmap_offset;
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103
} VhostUserMemoryRegion;

typedef struct VhostUserMemory {
    uint32_t nregions;
    uint32_t padding;
    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;

typedef struct VhostUserMsg {
    VhostUserRequest request;

#define VHOST_USER_VERSION_MASK     (0x3)
#define VHOST_USER_REPLY_MASK       (0x1<<2)
    uint32_t flags;
    uint32_t size; /* the following payload size */
    union {
#define VHOST_USER_VRING_IDX_MASK   (0xff)
#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
        uint64_t u64;
        struct vhost_vring_state state;
        struct vhost_vring_addr addr;
        VhostUserMemory memory;
    };
} QEMU_PACKED VhostUserMsg;

static VhostUserMsg m __attribute__ ((unused));
#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
                            + sizeof(m.flags) \
                            + sizeof(m.size))

#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)

/* The version of the protocol we support */
#define VHOST_USER_VERSION    (0x1)

static bool ioeventfd_enabled(void)
{
    return kvm_enabled() && kvm_eventfds_enabled();
}

static unsigned long int ioctl_to_vhost_user_request[VHOST_USER_MAX] = {
    -1,                     /* VHOST_USER_NONE */
    VHOST_GET_FEATURES,     /* VHOST_USER_GET_FEATURES */
    VHOST_SET_FEATURES,     /* VHOST_USER_SET_FEATURES */
    VHOST_SET_OWNER,        /* VHOST_USER_SET_OWNER */
104
    VHOST_RESET_DEVICE,      /* VHOST_USER_RESET_DEVICE */
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137
    VHOST_SET_MEM_TABLE,    /* VHOST_USER_SET_MEM_TABLE */
    VHOST_SET_LOG_BASE,     /* VHOST_USER_SET_LOG_BASE */
    VHOST_SET_LOG_FD,       /* VHOST_USER_SET_LOG_FD */
    VHOST_SET_VRING_NUM,    /* VHOST_USER_SET_VRING_NUM */
    VHOST_SET_VRING_ADDR,   /* VHOST_USER_SET_VRING_ADDR */
    VHOST_SET_VRING_BASE,   /* VHOST_USER_SET_VRING_BASE */
    VHOST_GET_VRING_BASE,   /* VHOST_USER_GET_VRING_BASE */
    VHOST_SET_VRING_KICK,   /* VHOST_USER_SET_VRING_KICK */
    VHOST_SET_VRING_CALL,   /* VHOST_USER_SET_VRING_CALL */
    VHOST_SET_VRING_ERR     /* VHOST_USER_SET_VRING_ERR */
};

static VhostUserRequest vhost_user_request_translate(unsigned long int request)
{
    VhostUserRequest idx;

    for (idx = 0; idx < VHOST_USER_MAX; idx++) {
        if (ioctl_to_vhost_user_request[idx] == request) {
            break;
        }
    }

    return (idx == VHOST_USER_MAX) ? VHOST_USER_NONE : idx;
}

static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
{
    CharDriverState *chr = dev->opaque;
    uint8_t *p = (uint8_t *) msg;
    int r, size = VHOST_USER_HDR_SIZE;

    r = qemu_chr_fe_read_all(chr, p, size);
    if (r != size) {
138
        error_report("Failed to read msg header. Read %d instead of %d.", r,
139 140 141 142 143 144 145
                size);
        goto fail;
    }

    /* validate received flags */
    if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
        error_report("Failed to read msg header."
146
                " Flags 0x%x instead of 0x%x.", msg->flags,
147 148 149 150 151 152 153
                VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
        goto fail;
    }

    /* validate message size is sane */
    if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
        error_report("Failed to read msg header."
154
                " Size %d exceeds the maximum %zu.", msg->size,
155 156 157 158 159 160 161 162 163 164
                VHOST_USER_PAYLOAD_SIZE);
        goto fail;
    }

    if (msg->size) {
        p += VHOST_USER_HDR_SIZE;
        size = msg->size;
        r = qemu_chr_fe_read_all(chr, p, size);
        if (r != size) {
            error_report("Failed to read msg payload."
165
                         " Read %d instead of %d.", r, msg->size);
166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
            goto fail;
        }
    }

    return 0;

fail:
    return -1;
}

static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
                            int *fds, int fd_num)
{
    CharDriverState *chr = dev->opaque;
    int size = VHOST_USER_HDR_SIZE + msg->size;

    if (fd_num) {
        qemu_chr_fe_set_msgfds(chr, fds, fd_num);
    }

    return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
            0 : -1;
}

static int vhost_user_call(struct vhost_dev *dev, unsigned long int request,
        void *arg)
{
    VhostUserMsg msg;
    VhostUserRequest msg_request;
    struct vhost_vring_file *file = 0;
    int need_reply = 0;
    int fds[VHOST_MEMORY_MAX_NREGIONS];
198
    int i, fd;
199 200 201 202
    size_t fd_num = 0;

    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

203 204 205 206 207 208 209
    /* only translate vhost ioctl requests */
    if (request > VHOST_USER_MAX) {
        msg_request = vhost_user_request_translate(request);
    } else {
        msg_request = request;
    }

210 211 212 213
    msg.request = msg_request;
    msg.flags = VHOST_USER_VERSION;
    msg.size = 0;

214 215
    switch (msg_request) {
    case VHOST_USER_GET_FEATURES:
216
    case VHOST_USER_GET_PROTOCOL_FEATURES:
217
    case VHOST_USER_GET_QUEUE_NUM:
218 219 220
        need_reply = 1;
        break;

221 222
    case VHOST_USER_SET_FEATURES:
    case VHOST_USER_SET_LOG_BASE:
223
    case VHOST_USER_SET_PROTOCOL_FEATURES:
224 225 226 227
        msg.u64 = *((__u64 *) arg);
        msg.size = sizeof(m.u64);
        break;

228
    case VHOST_USER_SET_OWNER:
229
    case VHOST_USER_RESET_DEVICE:
230 231
        break;

232
    case VHOST_USER_SET_MEM_TABLE:
233 234
        for (i = 0; i < dev->mem->nregions; ++i) {
            struct vhost_memory_region *reg = dev->mem->regions + i;
235
            ram_addr_t ram_addr;
M
Michael S. Tsirkin 已提交
236 237 238

            assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
            qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr, &ram_addr);
239
            fd = qemu_get_ram_fd(ram_addr);
240 241 242 243 244
            if (fd > 0) {
                msg.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
                msg.memory.regions[fd_num].memory_size  = reg->memory_size;
                msg.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
                msg.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
245
                    (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
246 247
                assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
                fds[fd_num++] = fd;
248 249 250 251 252 253
            }
        }

        msg.memory.nregions = fd_num;

        if (!fd_num) {
254 255
            error_report("Failed initializing vhost-user memory map, "
                    "consider using -object memory-backend-file share=on");
256 257 258 259 260 261 262 263 264
            return -1;
        }

        msg.size = sizeof(m.memory.nregions);
        msg.size += sizeof(m.memory.padding);
        msg.size += fd_num * sizeof(VhostUserMemoryRegion);

        break;

265
    case VHOST_USER_SET_LOG_FD:
266 267 268
        fds[fd_num++] = *((int *) arg);
        break;

269 270
    case VHOST_USER_SET_VRING_NUM:
    case VHOST_USER_SET_VRING_BASE:
271 272 273 274
        memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
        msg.size = sizeof(m.state);
        break;

275
    case VHOST_USER_GET_VRING_BASE:
276 277 278 279 280
        memcpy(&msg.state, arg, sizeof(struct vhost_vring_state));
        msg.size = sizeof(m.state);
        need_reply = 1;
        break;

281
    case VHOST_USER_SET_VRING_ADDR:
282 283 284 285
        memcpy(&msg.addr, arg, sizeof(struct vhost_vring_addr));
        msg.size = sizeof(m.addr);
        break;

286 287 288
    case VHOST_USER_SET_VRING_KICK:
    case VHOST_USER_SET_VRING_CALL:
    case VHOST_USER_SET_VRING_ERR:
289
        file = arg;
290
        msg.u64 = file->index & VHOST_USER_VRING_IDX_MASK;
291 292 293 294 295 296 297 298
        msg.size = sizeof(m.u64);
        if (ioeventfd_enabled() && file->fd > 0) {
            fds[fd_num++] = file->fd;
        } else {
            msg.u64 |= VHOST_USER_VRING_NOFD_MASK;
        }
        break;
    default:
299
        error_report("vhost-user trying to send unhandled ioctl");
300 301 302 303 304 305 306 307 308 309 310 311 312 313 314
        return -1;
        break;
    }

    if (vhost_user_write(dev, &msg, fds, fd_num) < 0) {
        return 0;
    }

    if (need_reply) {
        if (vhost_user_read(dev, &msg) < 0) {
            return 0;
        }

        if (msg_request != msg.request) {
            error_report("Received unexpected msg type."
315
                    " Expected %d received %d", msg_request, msg.request);
316 317 318 319 320
            return -1;
        }

        switch (msg_request) {
        case VHOST_USER_GET_FEATURES:
321
        case VHOST_USER_GET_PROTOCOL_FEATURES:
322
        case VHOST_USER_GET_QUEUE_NUM:
323
            if (msg.size != sizeof(m.u64)) {
324
                error_report("Received bad msg size.");
325 326 327 328 329 330
                return -1;
            }
            *((__u64 *) arg) = msg.u64;
            break;
        case VHOST_USER_GET_VRING_BASE:
            if (msg.size != sizeof(m.state)) {
331
                error_report("Received bad msg size.");
332 333 334 335 336
                return -1;
            }
            memcpy(arg, &msg.state, sizeof(struct vhost_vring_state));
            break;
        default:
337
            error_report("Received unexpected msg type.");
338 339 340 341 342 343 344 345 346 347
            return -1;
            break;
        }
    }

    return 0;
}

static int vhost_user_init(struct vhost_dev *dev, void *opaque)
{
348 349 350
    unsigned long long features;
    int err;

351 352 353 354
    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    dev->opaque = opaque;

355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
    err = vhost_user_call(dev, VHOST_USER_GET_FEATURES, &features);
    if (err < 0) {
        return err;
    }

    if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
        dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;

        err = vhost_user_call(dev, VHOST_USER_GET_PROTOCOL_FEATURES, &features);
        if (err < 0) {
            return err;
        }

        dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
        err = vhost_user_call(dev, VHOST_USER_SET_PROTOCOL_FEATURES,
                              &dev->protocol_features);
        if (err < 0) {
            return err;
        }
374 375 376 377 378 379 380 381

        /* query the max queues we support if backend supports Multiple Queue */
        if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
            err = vhost_user_call(dev, VHOST_USER_GET_QUEUE_NUM, &dev->max_queues);
            if (err < 0) {
                return err;
            }
        }
382 383
    }

384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
    return 0;
}

static int vhost_user_cleanup(struct vhost_dev *dev)
{
    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    dev->opaque = 0;

    return 0;
}

const VhostOps user_ops = {
        .backend_type = VHOST_BACKEND_TYPE_USER,
        .vhost_call = vhost_user_call,
        .vhost_backend_init = vhost_user_init,
        .vhost_backend_cleanup = vhost_user_cleanup
        };