vhost-user.c 17.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12
/*
 * vhost-user
 *
 * Copyright (c) 2013 Virtual Open Systems Sarl.
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

#include "hw/virtio/vhost.h"
#include "hw/virtio/vhost-backend.h"
13
#include "hw/virtio/virtio-net.h"
14 15 16 17
#include "sysemu/char.h"
#include "sysemu/kvm.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
18
#include "exec/ram_addr.h"
19
#include "migration/migration.h"
20 21 22 23 24 25 26 27 28

#include <fcntl.h>
#include <unistd.h>
#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <linux/vhost.h>

#define VHOST_MEMORY_MAX_NREGIONS    8
29
#define VHOST_USER_F_PROTOCOL_FEATURES 30
30

31 32 33 34 35 36 37 38 39
enum VhostUserProtocolFeature {
    VHOST_USER_PROTOCOL_F_MQ = 0,
    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
    VHOST_USER_PROTOCOL_F_RARP = 2,

    VHOST_USER_PROTOCOL_F_MAX
};

#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
40 41 42 43 44 45

typedef enum VhostUserRequest {
    VHOST_USER_NONE = 0,
    VHOST_USER_GET_FEATURES = 1,
    VHOST_USER_SET_FEATURES = 2,
    VHOST_USER_SET_OWNER = 3,
46
    VHOST_USER_RESET_OWNER = 4,
47 48 49 50 51 52 53 54 55 56
    VHOST_USER_SET_MEM_TABLE = 5,
    VHOST_USER_SET_LOG_BASE = 6,
    VHOST_USER_SET_LOG_FD = 7,
    VHOST_USER_SET_VRING_NUM = 8,
    VHOST_USER_SET_VRING_ADDR = 9,
    VHOST_USER_SET_VRING_BASE = 10,
    VHOST_USER_GET_VRING_BASE = 11,
    VHOST_USER_SET_VRING_KICK = 12,
    VHOST_USER_SET_VRING_CALL = 13,
    VHOST_USER_SET_VRING_ERR = 14,
57 58
    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
59
    VHOST_USER_GET_QUEUE_NUM = 17,
60
    VHOST_USER_SET_VRING_ENABLE = 18,
61
    VHOST_USER_SEND_RARP = 19,
62 63 64 65 66 67 68
    VHOST_USER_MAX
} VhostUserRequest;

typedef struct VhostUserMemoryRegion {
    uint64_t guest_phys_addr;
    uint64_t memory_size;
    uint64_t userspace_addr;
69
    uint64_t mmap_offset;
70 71 72 73 74 75 76 77
} VhostUserMemoryRegion;

typedef struct VhostUserMemory {
    uint32_t nregions;
    uint32_t padding;
    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;

78 79 80 81 82
typedef struct VhostUserLog {
    uint64_t mmap_size;
    uint64_t mmap_offset;
} VhostUserLog;

83 84 85 86 87 88 89 90 91 92 93 94 95 96
typedef struct VhostUserMsg {
    VhostUserRequest request;

#define VHOST_USER_VERSION_MASK     (0x3)
#define VHOST_USER_REPLY_MASK       (0x1<<2)
    uint32_t flags;
    uint32_t size; /* the following payload size */
    union {
#define VHOST_USER_VRING_IDX_MASK   (0xff)
#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
        uint64_t u64;
        struct vhost_vring_state state;
        struct vhost_vring_addr addr;
        VhostUserMemory memory;
97
        VhostUserLog log;
98
    } payload;
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
} QEMU_PACKED VhostUserMsg;

static VhostUserMsg m __attribute__ ((unused));
#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
                            + sizeof(m.flags) \
                            + sizeof(m.size))

#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)

/* The version of the protocol we support */
#define VHOST_USER_VERSION    (0x1)

static bool ioeventfd_enabled(void)
{
    return kvm_enabled() && kvm_eventfds_enabled();
}

static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
{
    CharDriverState *chr = dev->opaque;
    uint8_t *p = (uint8_t *) msg;
    int r, size = VHOST_USER_HDR_SIZE;

    r = qemu_chr_fe_read_all(chr, p, size);
    if (r != size) {
124
        error_report("Failed to read msg header. Read %d instead of %d.", r,
125 126 127 128 129 130 131
                size);
        goto fail;
    }

    /* validate received flags */
    if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
        error_report("Failed to read msg header."
132
                " Flags 0x%x instead of 0x%x.", msg->flags,
133 134 135 136 137 138 139
                VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
        goto fail;
    }

    /* validate message size is sane */
    if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
        error_report("Failed to read msg header."
140
                " Size %d exceeds the maximum %zu.", msg->size,
141 142 143 144 145 146 147 148 149 150
                VHOST_USER_PAYLOAD_SIZE);
        goto fail;
    }

    if (msg->size) {
        p += VHOST_USER_HDR_SIZE;
        size = msg->size;
        r = qemu_chr_fe_read_all(chr, p, size);
        if (r != size) {
            error_report("Failed to read msg payload."
151
                         " Read %d instead of %d.", r, msg->size);
152 153 154 155 156 157 158 159 160 161
            goto fail;
        }
    }

    return 0;

fail:
    return -1;
}

162 163 164 165
static bool vhost_user_one_time_request(VhostUserRequest request)
{
    switch (request) {
    case VHOST_USER_SET_OWNER:
166
    case VHOST_USER_RESET_OWNER:
167 168 169 170 171 172 173 174 175
    case VHOST_USER_SET_MEM_TABLE:
    case VHOST_USER_GET_QUEUE_NUM:
        return true;
    default:
        return false;
    }
}

/* most non-init callers ignore the error */
176 177 178 179 180 181
static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
                            int *fds, int fd_num)
{
    CharDriverState *chr = dev->opaque;
    int size = VHOST_USER_HDR_SIZE + msg->size;

182 183 184 185 186 187 188 189 190
    /*
     * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
     * we just need send it once in the first time. For later such
     * request, we just ignore it.
     */
    if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
        return 0;
    }

191 192 193 194 195 196 197 198
    if (fd_num) {
        qemu_chr_fe_set_msgfds(chr, fds, fd_num);
    }

    return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
            0 : -1;
}

199 200
static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
                                   struct vhost_log *log)
201
{
202 203 204 205 206 207 208
    int fds[VHOST_MEMORY_MAX_NREGIONS];
    size_t fd_num = 0;
    bool shmfd = virtio_has_feature(dev->protocol_features,
                                    VHOST_USER_PROTOCOL_F_LOG_SHMFD);
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_LOG_BASE,
        .flags = VHOST_USER_VERSION,
209 210 211
        .payload.log.mmap_size = log->size,
        .payload.log.mmap_offset = 0,
        .size = sizeof(msg.payload.log),
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231
    };

    if (shmfd && log->fd != -1) {
        fds[fd_num++] = log->fd;
    }

    vhost_user_write(dev, &msg, fds, fd_num);

    if (shmfd) {
        msg.size = 0;
        if (vhost_user_read(dev, &msg) < 0) {
            return 0;
        }

        if (msg.request != VHOST_USER_SET_LOG_BASE) {
            error_report("Received unexpected msg type. "
                         "Expected %d received %d",
                         VHOST_USER_SET_LOG_BASE, msg.request);
            return -1;
        }
232
    }
233 234

    return 0;
235 236
}

237 238
static int vhost_user_set_mem_table(struct vhost_dev *dev,
                                    struct vhost_memory *mem)
239 240
{
    int fds[VHOST_MEMORY_MAX_NREGIONS];
241
    int i, fd;
242
    size_t fd_num = 0;
243 244 245 246
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_MEM_TABLE,
        .flags = VHOST_USER_VERSION,
    };
247

248 249 250 251 252 253 254 255 256
    for (i = 0; i < dev->mem->nregions; ++i) {
        struct vhost_memory_region *reg = dev->mem->regions + i;
        ram_addr_t ram_addr;

        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
        qemu_ram_addr_from_host((void *)(uintptr_t)reg->userspace_addr,
                                &ram_addr);
        fd = qemu_get_ram_fd(ram_addr);
        if (fd > 0) {
257 258 259 260
            msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
            msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
            msg.payload.memory.regions[fd_num].mmap_offset = reg->userspace_addr -
261 262 263 264
                (uintptr_t) qemu_get_ram_block_host_ptr(ram_addr);
            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
            fds[fd_num++] = fd;
        }
265 266
    }

267
    msg.payload.memory.nregions = fd_num;
268 269 270 271 272

    if (!fd_num) {
        error_report("Failed initializing vhost-user memory map, "
                     "consider using -object memory-backend-file share=on");
        return -1;
273 274
    }

275 276
    msg.size = sizeof(msg.payload.memory.nregions);
    msg.size += sizeof(msg.payload.memory.padding);
277
    msg.size += fd_num * sizeof(VhostUserMemoryRegion);
278

279
    vhost_user_write(dev, &msg, fds, fd_num);
280

281 282
    return 0;
}
283

284 285 286 287 288 289
static int vhost_user_set_vring_addr(struct vhost_dev *dev,
                                     struct vhost_vring_addr *addr)
{
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_VRING_ADDR,
        .flags = VHOST_USER_VERSION,
290
        .payload.addr = *addr,
291
        .size = sizeof(msg.payload.addr),
292
    };
293

294
    vhost_user_write(dev, &msg, NULL, 0);
295

296 297
    return 0;
}
298

299 300 301 302 303 304
static int vhost_user_set_vring_endian(struct vhost_dev *dev,
                                       struct vhost_vring_state *ring)
{
    error_report("vhost-user trying to send unhandled ioctl");
    return -1;
}
305

306 307 308 309 310 311 312
static int vhost_set_vring(struct vhost_dev *dev,
                           unsigned long int request,
                           struct vhost_vring_state *ring)
{
    VhostUserMsg msg = {
        .request = request,
        .flags = VHOST_USER_VERSION,
313
        .payload.state = *ring,
314
        .size = sizeof(msg.payload.state),
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340
    };

    vhost_user_write(dev, &msg, NULL, 0);

    return 0;
}

static int vhost_user_set_vring_num(struct vhost_dev *dev,
                                    struct vhost_vring_state *ring)
{
    return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
}

static int vhost_user_set_vring_base(struct vhost_dev *dev,
                                     struct vhost_vring_state *ring)
{
    return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
}

static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
{
    struct vhost_vring_state state = {
        .index = dev->vq_index,
        .num   = enable,
    };

341
    if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
342 343 344
        return -1;
    }

345 346 347 348 349 350 351 352 353 354
    return vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
}


static int vhost_user_get_vring_base(struct vhost_dev *dev,
                                     struct vhost_vring_state *ring)
{
    VhostUserMsg msg = {
        .request = VHOST_USER_GET_VRING_BASE,
        .flags = VHOST_USER_VERSION,
355
        .payload.state = *ring,
356
        .size = sizeof(msg.payload.state),
357 358 359 360 361
    };

    vhost_user_write(dev, &msg, NULL, 0);

    if (vhost_user_read(dev, &msg) < 0) {
362 363 364
        return 0;
    }

365 366 367 368 369
    if (msg.request != VHOST_USER_GET_VRING_BASE) {
        error_report("Received unexpected msg type. Expected %d received %d",
                     VHOST_USER_GET_VRING_BASE, msg.request);
        return -1;
    }
370

371
    if (msg.size != sizeof(msg.payload.state)) {
372 373
        error_report("Received bad msg size.");
        return -1;
374 375
    }

376
    *ring = msg.payload.state;
377

378 379 380
    return 0;
}

381 382 383
static int vhost_set_vring_file(struct vhost_dev *dev,
                                VhostUserRequest request,
                                struct vhost_vring_file *file)
384
{
385 386
    int fds[VHOST_MEMORY_MAX_NREGIONS];
    size_t fd_num = 0;
387
    VhostUserMsg msg = {
388
        .request = request,
389
        .flags = VHOST_USER_VERSION,
390
        .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
391
        .size = sizeof(msg.payload.u64),
392 393
    };

394 395 396
    if (ioeventfd_enabled() && file->fd > 0) {
        fds[fd_num++] = file->fd;
    } else {
397
        msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
398 399 400 401
    }

    vhost_user_write(dev, &msg, fds, fd_num);

402 403
    return 0;
}
404

405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421
static int vhost_user_set_vring_kick(struct vhost_dev *dev,
                                     struct vhost_vring_file *file)
{
    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
}

static int vhost_user_set_vring_call(struct vhost_dev *dev,
                                     struct vhost_vring_file *file)
{
    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
}

static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
{
    VhostUserMsg msg = {
        .request = request,
        .flags = VHOST_USER_VERSION,
422
        .payload.u64 = u64,
423
        .size = sizeof(msg.payload.u64),
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451
    };

    vhost_user_write(dev, &msg, NULL, 0);

    return 0;
}

static int vhost_user_set_features(struct vhost_dev *dev,
                                   uint64_t features)
{
    return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
}

static int vhost_user_set_protocol_features(struct vhost_dev *dev,
                                            uint64_t features)
{
    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
}

static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
{
    VhostUserMsg msg = {
        .request = request,
        .flags = VHOST_USER_VERSION,
    };

    if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
        return 0;
452
    }
453

454 455 456 457 458 459 460 461 462 463 464 465
    vhost_user_write(dev, &msg, NULL, 0);

    if (vhost_user_read(dev, &msg) < 0) {
        return 0;
    }

    if (msg.request != request) {
        error_report("Received unexpected msg type. Expected %d received %d",
                     request, msg.request);
        return -1;
    }

466
    if (msg.size != sizeof(msg.payload.u64)) {
467 468 469 470
        error_report("Received bad msg size.");
        return -1;
    }

471
    *u64 = msg.payload.u64;
472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495

    return 0;
}

static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
{
    return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
}

static int vhost_user_set_owner(struct vhost_dev *dev)
{
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_OWNER,
        .flags = VHOST_USER_VERSION,
    };

    vhost_user_write(dev, &msg, NULL, 0);

    return 0;
}

static int vhost_user_reset_device(struct vhost_dev *dev)
{
    VhostUserMsg msg = {
496
        .request = VHOST_USER_RESET_OWNER,
497 498 499 500 501
        .flags = VHOST_USER_VERSION,
    };

    vhost_user_write(dev, &msg, NULL, 0);

502 503 504
    return 0;
}

505 506
static int vhost_user_init(struct vhost_dev *dev, void *opaque)
{
507
    uint64_t features;
508 509
    int err;

510 511 512 513
    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    dev->opaque = opaque;

514
    err = vhost_user_get_features(dev, &features);
515 516 517 518 519 520 521
    if (err < 0) {
        return err;
    }

    if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
        dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;

522 523
        err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
                                 &features);
524 525 526 527 528
        if (err < 0) {
            return err;
        }

        dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
529
        err = vhost_user_set_protocol_features(dev, dev->protocol_features);
530 531 532
        if (err < 0) {
            return err;
        }
533 534 535

        /* query the max queues we support if backend supports Multiple Queue */
        if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
536 537
            err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
                                     &dev->max_queues);
538 539 540 541
            if (err < 0) {
                return err;
            }
        }
542 543
    }

544 545 546 547 548 549 550 551
    if (dev->migration_blocker == NULL &&
        !virtio_has_feature(dev->protocol_features,
                            VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
        error_setg(&dev->migration_blocker,
                   "Migration disabled: vhost-user backend lacks "
                   "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
    }

552 553 554 555 556 557 558 559 560 561 562 563
    return 0;
}

static int vhost_user_cleanup(struct vhost_dev *dev)
{
    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    dev->opaque = 0;

    return 0;
}

564 565 566 567 568 569 570
static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
{
    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);

    return idx;
}

571 572 573 574 575
static int vhost_user_memslots_limit(struct vhost_dev *dev)
{
    return VHOST_MEMORY_MAX_NREGIONS;
}

576 577 578 579 580 581 582 583
static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
{
    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    return virtio_has_feature(dev->protocol_features,
                              VHOST_USER_PROTOCOL_F_LOG_SHMFD);
}

584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
{
    VhostUserMsg msg = { 0 };
    int err;

    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    /* If guest supports GUEST_ANNOUNCE do nothing */
    if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
        return 0;
    }

    /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
    if (virtio_has_feature(dev->protocol_features,
                           VHOST_USER_PROTOCOL_F_RARP)) {
        msg.request = VHOST_USER_SEND_RARP;
        msg.flags = VHOST_USER_VERSION;
601
        memcpy((char *)&msg.payload.u64, mac_addr, 6);
602
        msg.size = sizeof(msg.payload.u64);
603 604 605 606 607 608 609

        err = vhost_user_write(dev, &msg, NULL, 0);
        return err;
    }
    return -1;
}

610 611 612
const VhostOps user_ops = {
        .backend_type = VHOST_BACKEND_TYPE_USER,
        .vhost_backend_init = vhost_user_init,
613
        .vhost_backend_cleanup = vhost_user_cleanup,
614
        .vhost_backend_memslots_limit = vhost_user_memslots_limit,
615 616 617 618 619 620 621 622 623 624 625 626 627 628 629
        .vhost_set_log_base = vhost_user_set_log_base,
        .vhost_set_mem_table = vhost_user_set_mem_table,
        .vhost_set_vring_addr = vhost_user_set_vring_addr,
        .vhost_set_vring_endian = vhost_user_set_vring_endian,
        .vhost_set_vring_num = vhost_user_set_vring_num,
        .vhost_set_vring_base = vhost_user_set_vring_base,
        .vhost_get_vring_base = vhost_user_get_vring_base,
        .vhost_set_vring_kick = vhost_user_set_vring_kick,
        .vhost_set_vring_call = vhost_user_set_vring_call,
        .vhost_set_features = vhost_user_set_features,
        .vhost_get_features = vhost_user_get_features,
        .vhost_set_owner = vhost_user_set_owner,
        .vhost_reset_device = vhost_user_reset_device,
        .vhost_get_vq_index = vhost_user_get_vq_index,
        .vhost_set_vring_enable = vhost_user_set_vring_enable,
630
        .vhost_requires_shm_log = vhost_user_requires_shm_log,
631
        .vhost_migration_done = vhost_user_migration_done,
632
};