vhost-user.c 17.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * vhost-user
 *
 * Copyright (c) 2013 Virtual Open Systems Sarl.
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

#include "hw/virtio/vhost.h"
#include "hw/virtio/vhost-backend.h"
13
#include "hw/virtio/virtio-net.h"
14 15 16 17
#include "sysemu/char.h"
#include "sysemu/kvm.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
18
#include "exec/ram_addr.h"
19
#include "migration/migration.h"
20 21 22 23 24 25 26 27 28

#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <linux/vhost.h>

#define VHOST_MEMORY_MAX_NREGIONS    8
29
#define VHOST_USER_F_PROTOCOL_FEATURES 30
30

31 32 33 34 35 36 37 38 39
enum VhostUserProtocolFeature {
    VHOST_USER_PROTOCOL_F_MQ = 0,
    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
    VHOST_USER_PROTOCOL_F_RARP = 2,

    VHOST_USER_PROTOCOL_F_MAX
};

#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
40 41 42 43 44 45

typedef enum VhostUserRequest {
    VHOST_USER_NONE = 0,
    VHOST_USER_GET_FEATURES = 1,
    VHOST_USER_SET_FEATURES = 2,
    VHOST_USER_SET_OWNER = 3,
46
    VHOST_USER_RESET_OWNER = 4,
47 48 49 50 51 52 53 54 55 56
    VHOST_USER_SET_MEM_TABLE = 5,
    VHOST_USER_SET_LOG_BASE = 6,
    VHOST_USER_SET_LOG_FD = 7,
    VHOST_USER_SET_VRING_NUM = 8,
    VHOST_USER_SET_VRING_ADDR = 9,
    VHOST_USER_SET_VRING_BASE = 10,
    VHOST_USER_GET_VRING_BASE = 11,
    VHOST_USER_SET_VRING_KICK = 12,
    VHOST_USER_SET_VRING_CALL = 13,
    VHOST_USER_SET_VRING_ERR = 14,
57 58
    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
59
    VHOST_USER_GET_QUEUE_NUM = 17,
60
    VHOST_USER_SET_VRING_ENABLE = 18,
61
    VHOST_USER_SEND_RARP = 19,
62 63 64 65 66 67 68
    VHOST_USER_MAX
} VhostUserRequest;

typedef struct VhostUserMemoryRegion {
    uint64_t guest_phys_addr;
    uint64_t memory_size;
    uint64_t userspace_addr;
69
    uint64_t mmap_offset;
70 71 72 73 74 75 76 77
} VhostUserMemoryRegion;

typedef struct VhostUserMemory {
    uint32_t nregions;
    uint32_t padding;
    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;

78 79 80 81 82
typedef struct VhostUserLog {
    uint64_t mmap_size;
    uint64_t mmap_offset;
} VhostUserLog;

83 84 85 86 87 88 89 90 91 92 93 94 95 96
typedef struct VhostUserMsg {
    VhostUserRequest request;

#define VHOST_USER_VERSION_MASK     (0x3)
#define VHOST_USER_REPLY_MASK       (0x1<<2)
    uint32_t flags;
    uint32_t size; /* the following payload size */
    union {
#define VHOST_USER_VRING_IDX_MASK   (0xff)
#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
        uint64_t u64;
        struct vhost_vring_state state;
        struct vhost_vring_addr addr;
        VhostUserMemory memory;
97
        VhostUserLog log;
98
    } payload;
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
} QEMU_PACKED VhostUserMsg;

static VhostUserMsg m __attribute__ ((unused));
#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
                            + sizeof(m.flags) \
                            + sizeof(m.size))

#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)

/* The version of the protocol we support */
#define VHOST_USER_VERSION    (0x1)

static bool ioeventfd_enabled(void)
{
    return kvm_enabled() && kvm_eventfds_enabled();
}

static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
{
    CharDriverState *chr = dev->opaque;
    uint8_t *p = (uint8_t *) msg;
    int r, size = VHOST_USER_HDR_SIZE;

    r = qemu_chr_fe_read_all(chr, p, size);
    if (r != size) {
124 125
        error_report("Failed to read msg header. Read %d instead of %d."
                     " Original request %d.", r, size, msg->request);
126 127 128 129 130 131
        goto fail;
    }

    /* validate received flags */
    if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
        error_report("Failed to read msg header."
132
                " Flags 0x%x instead of 0x%x.", msg->flags,
133 134 135 136 137 138 139
                VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
        goto fail;
    }

    /* validate message size is sane */
    if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
        error_report("Failed to read msg header."
140
                " Size %d exceeds the maximum %zu.", msg->size,
141 142 143 144 145 146 147 148 149 150
                VHOST_USER_PAYLOAD_SIZE);
        goto fail;
    }

    if (msg->size) {
        p += VHOST_USER_HDR_SIZE;
        size = msg->size;
        r = qemu_chr_fe_read_all(chr, p, size);
        if (r != size) {
            error_report("Failed to read msg payload."
151
                         " Read %d instead of %d.", r, msg->size);
152 153 154 155 156 157 158 159 160 161
            goto fail;
        }
    }

    return 0;

fail:
    return -1;
}

162 163 164 165
static bool vhost_user_one_time_request(VhostUserRequest request)
{
    switch (request) {
    case VHOST_USER_SET_OWNER:
166
    case VHOST_USER_RESET_OWNER:
167 168 169 170 171 172 173 174 175
    case VHOST_USER_SET_MEM_TABLE:
    case VHOST_USER_GET_QUEUE_NUM:
        return true;
    default:
        return false;
    }
}

/* most non-init callers ignore the error */
176 177 178 179 180 181
static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
                            int *fds, int fd_num)
{
    CharDriverState *chr = dev->opaque;
    int size = VHOST_USER_HDR_SIZE + msg->size;

182 183 184 185 186 187 188 189 190
    /*
     * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
     * we just need send it once in the first time. For later such
     * request, we just ignore it.
     */
    if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
        return 0;
    }

191 192 193 194 195 196 197 198
    if (fd_num) {
        qemu_chr_fe_set_msgfds(chr, fds, fd_num);
    }

    return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
            0 : -1;
}

199 200
static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
                                   struct vhost_log *log)
201
{
202 203 204 205 206 207 208
    int fds[VHOST_MEMORY_MAX_NREGIONS];
    size_t fd_num = 0;
    bool shmfd = virtio_has_feature(dev->protocol_features,
                                    VHOST_USER_PROTOCOL_F_LOG_SHMFD);
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_LOG_BASE,
        .flags = VHOST_USER_VERSION,
M
Michael S. Tsirkin 已提交
209
        .payload.log.mmap_size = log->size * sizeof(*(log->log)),
210 211
        .payload.log.mmap_offset = 0,
        .size = sizeof(msg.payload.log),
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
    };

    if (shmfd && log->fd != -1) {
        fds[fd_num++] = log->fd;
    }

    vhost_user_write(dev, &msg, fds, fd_num);

    if (shmfd) {
        msg.size = 0;
        if (vhost_user_read(dev, &msg) < 0) {
            return 0;
        }

        if (msg.request != VHOST_USER_SET_LOG_BASE) {
            error_report("Received unexpected msg type. "
                         "Expected %d received %d",
                         VHOST_USER_SET_LOG_BASE, msg.request);
            return -1;
        }
232
    }
233 234

    return 0;
235 236
}

237 238
static int vhost_user_set_mem_table(struct vhost_dev *dev,
                                    struct vhost_memory *mem)
239 240
{
    int fds[VHOST_MEMORY_MAX_NREGIONS];
241
    int i, fd;
242
    size_t fd_num = 0;
243 244 245 246
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_MEM_TABLE,
        .flags = VHOST_USER_VERSION,
    };
247

248 249 250 251 252 253 254 255 256
    for (i = 0; i < dev->mem->nregions; ++i) {
        struct vhost_memory_region *reg = dev->mem->regions + i;
        ram_addr_t ram_addr;

        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
        qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr,
                                &ram_addr);
        fd = qemu_get_ram_fd(ram_addr);
        if (fd > 0) {
257 258 259 260
            msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
            msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
            msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
261 262 263 264
                (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
            fds[fd_num++] = fd;
        }
265 266
    }

267
    msg.payload.memory.nregions = fd_num;
268 269 270 271 272

    if (!fd_num) {
        error_report("Failed initializing vhost-user memory map, "
                     "consider using -object memory-backend-file share=on");
        return -1;
273 274
    }

275 276
    msg.size = sizeof(msg.payload.memory.nregions);
    msg.size += sizeof(msg.payload.memory.padding);
277
    msg.size += fd_num * sizeof(VhostUserMemoryRegion);
278

279
    vhost_user_write(dev, &msg, fds, fd_num);
280

281 282
    return 0;
}
283

284 285 286 287 288 289
static int vhost_user_set_vring_addr(struct vhost_dev *dev,
                                     struct vhost_vring_addr *addr)
{
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_VRING_ADDR,
        .flags = VHOST_USER_VERSION,
290
        .payload.addr = *addr,
291
        .size = sizeof(msg.payload.addr),
292
    };
293

294
    vhost_user_write(dev, &msg, NULL, 0);
295

296 297
    return 0;
}
298

299 300 301 302 303 304
static int vhost_user_set_vring_endian(struct vhost_dev *dev,
                                       struct vhost_vring_state *ring)
{
    error_report("vhost-user trying to send unhandled ioctl");
    return -1;
}
305

306 307 308 309 310 311 312
static int vhost_set_vring(struct vhost_dev *dev,
                           unsigned long int request,
                           struct vhost_vring_state *ring)
{
    VhostUserMsg msg = {
        .request = request,
        .flags = VHOST_USER_VERSION,
313
        .payload.state = *ring,
314
        .size = sizeof(msg.payload.state),
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335
    };

    vhost_user_write(dev, &msg, NULL, 0);

    return 0;
}

static int vhost_user_set_vring_num(struct vhost_dev *dev,
                                    struct vhost_vring_state *ring)
{
    return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
}

static int vhost_user_set_vring_base(struct vhost_dev *dev,
                                     struct vhost_vring_state *ring)
{
    return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
}

static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
{
336
    int i;
337

338
    if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
339 340 341
        return -1;
    }

342 343 344 345 346 347 348 349
    for (i = 0; i < dev->nvqs; ++i) {
        struct vhost_vring_state state = {
            .index = dev->vq_index + i,
            .num   = enable,
        };

        vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
    }
350

351 352
    return 0;
}
353 354 355 356 357 358 359

static int vhost_user_get_vring_base(struct vhost_dev *dev,
                                     struct vhost_vring_state *ring)
{
    VhostUserMsg msg = {
        .request = VHOST_USER_GET_VRING_BASE,
        .flags = VHOST_USER_VERSION,
360
        .payload.state = *ring,
361
        .size = sizeof(msg.payload.state),
362 363 364 365 366
    };

    vhost_user_write(dev, &msg, NULL, 0);

    if (vhost_user_read(dev, &msg) < 0) {
367 368 369
        return 0;
    }

370 371 372 373 374
    if (msg.request != VHOST_USER_GET_VRING_BASE) {
        error_report("Received unexpected msg type. Expected %d received %d",
                     VHOST_USER_GET_VRING_BASE, msg.request);
        return -1;
    }
375

376
    if (msg.size != sizeof(msg.payload.state)) {
377 378
        error_report("Received bad msg size.");
        return -1;
379 380
    }

381
    *ring = msg.payload.state;
382

383 384 385
    return 0;
}

386 387 388
static int vhost_set_vring_file(struct vhost_dev *dev,
                                VhostUserRequest request,
                                struct vhost_vring_file *file)
389
{
390 391
    int fds[VHOST_MEMORY_MAX_NREGIONS];
    size_t fd_num = 0;
392
    VhostUserMsg msg = {
393
        .request = request,
394
        .flags = VHOST_USER_VERSION,
395
        .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
396
        .size = sizeof(msg.payload.u64),
397 398
    };

399 400 401
    if (ioeventfd_enabled() && file->fd > 0) {
        fds[fd_num++] = file->fd;
    } else {
402
        msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
403 404 405 406
    }

    vhost_user_write(dev, &msg, fds, fd_num);

407 408
    return 0;
}
409

410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426
static int vhost_user_set_vring_kick(struct vhost_dev *dev,
                                     struct vhost_vring_file *file)
{
    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
}

static int vhost_user_set_vring_call(struct vhost_dev *dev,
                                     struct vhost_vring_file *file)
{
    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
}

static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
{
    VhostUserMsg msg = {
        .request = request,
        .flags = VHOST_USER_VERSION,
427
        .payload.u64 = u64,
428
        .size = sizeof(msg.payload.u64),
429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456
    };

    vhost_user_write(dev, &msg, NULL, 0);

    return 0;
}

static int vhost_user_set_features(struct vhost_dev *dev,
                                   uint64_t features)
{
    return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
}

static int vhost_user_set_protocol_features(struct vhost_dev *dev,
                                            uint64_t features)
{
    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
}

static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
{
    VhostUserMsg msg = {
        .request = request,
        .flags = VHOST_USER_VERSION,
    };

    if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
        return 0;
457
    }
458

459 460 461 462 463 464 465 466 467 468 469 470
    vhost_user_write(dev, &msg, NULL, 0);

    if (vhost_user_read(dev, &msg) < 0) {
        return 0;
    }

    if (msg.request != request) {
        error_report("Received unexpected msg type. Expected %d received %d",
                     request, msg.request);
        return -1;
    }

471
    if (msg.size != sizeof(msg.payload.u64)) {
472 473 474 475
        error_report("Received bad msg size.");
        return -1;
    }

476
    *u64 = msg.payload.u64;
477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500

    return 0;
}

static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
{
    return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
}

static int vhost_user_set_owner(struct vhost_dev *dev)
{
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_OWNER,
        .flags = VHOST_USER_VERSION,
    };

    vhost_user_write(dev, &msg, NULL, 0);

    return 0;
}

static int vhost_user_reset_device(struct vhost_dev *dev)
{
    VhostUserMsg msg = {
501
        .request = VHOST_USER_RESET_OWNER,
502 503 504 505 506
        .flags = VHOST_USER_VERSION,
    };

    vhost_user_write(dev, &msg, NULL, 0);

507 508 509
    return 0;
}

510 511
static int vhost_user_init(struct vhost_dev *dev, void *opaque)
{
512
    uint64_t features;
513 514
    int err;

515 516 517 518
    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    dev->opaque = opaque;

519
    err = vhost_user_get_features(dev, &features);
520 521 522 523 524 525 526
    if (err < 0) {
        return err;
    }

    if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
        dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;

527 528
        err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
                                 &features);
529 530 531 532 533
        if (err < 0) {
            return err;
        }

        dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
534
        err = vhost_user_set_protocol_features(dev, dev->protocol_features);
535 536 537
        if (err < 0) {
            return err;
        }
538 539 540

        /* query the max queues we support if backend supports Multiple Queue */
        if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
541 542
            err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
                                     &dev->max_queues);
543 544 545 546
            if (err < 0) {
                return err;
            }
        }
547 548
    }

549 550 551 552 553 554 555 556
    if (dev->migration_blocker == NULL &&
        !virtio_has_feature(dev->protocol_features,
                            VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
        error_setg(&dev->migration_blocker,
                   "Migration disabled: vhost-user backend lacks "
                   "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
    }

557 558 559 560 561 562 563 564 565 566 567 568
    return 0;
}

static int vhost_user_cleanup(struct vhost_dev *dev)
{
    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    dev->opaque = 0;

    return 0;
}

569 570 571 572 573 574 575
static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
{
    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);

    return idx;
}

576 577 578 579 580
static int vhost_user_memslots_limit(struct vhost_dev *dev)
{
    return VHOST_MEMORY_MAX_NREGIONS;
}

581 582 583 584 585 586 587 588
static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
{
    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    return virtio_has_feature(dev->protocol_features,
                              VHOST_USER_PROTOCOL_F_LOG_SHMFD);
}

589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605
static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
{
    VhostUserMsg msg = { 0 };
    int err;

    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    /* If guest supports GUEST_ANNOUNCE do nothing */
    if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
        return 0;
    }

    /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
    if (virtio_has_feature(dev->protocol_features,
                           VHOST_USER_PROTOCOL_F_RARP)) {
        msg.request = VHOST_USER_SEND_RARP;
        msg.flags = VHOST_USER_VERSION;
606
        memcpy((char *)&msg.payload.u64, mac_addr, 6);
607
        msg.size = sizeof(msg.payload.u64);
608 609 610 611 612 613 614

        err = vhost_user_write(dev, &msg, NULL, 0);
        return err;
    }
    return -1;
}

615 616 617
const VhostOps user_ops = {
        .backend_type = VHOST_BACKEND_TYPE_USER,
        .vhost_backend_init = vhost_user_init,
618
        .vhost_backend_cleanup = vhost_user_cleanup,
619
        .vhost_backend_memslots_limit = vhost_user_memslots_limit,
620 621 622 623 624 625 626 627 628 629 630 631 632 633 634
        .vhost_set_log_base = vhost_user_set_log_base,
        .vhost_set_mem_table = vhost_user_set_mem_table,
        .vhost_set_vring_addr = vhost_user_set_vring_addr,
        .vhost_set_vring_endian = vhost_user_set_vring_endian,
        .vhost_set_vring_num = vhost_user_set_vring_num,
        .vhost_set_vring_base = vhost_user_set_vring_base,
        .vhost_get_vring_base = vhost_user_get_vring_base,
        .vhost_set_vring_kick = vhost_user_set_vring_kick,
        .vhost_set_vring_call = vhost_user_set_vring_call,
        .vhost_set_features = vhost_user_set_features,
        .vhost_get_features = vhost_user_get_features,
        .vhost_set_owner = vhost_user_set_owner,
        .vhost_reset_device = vhost_user_reset_device,
        .vhost_get_vq_index = vhost_user_get_vq_index,
        .vhost_set_vring_enable = vhost_user_set_vring_enable,
635
        .vhost_requires_shm_log = vhost_user_requires_shm_log,
636
        .vhost_migration_done = vhost_user_migration_done,
637
};