vhost-user.c 18.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/*
 * vhost-user
 *
 * Copyright (c) 2013 Virtual Open Systems Sarl.
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 *
 */

P
Peter Maydell 已提交
11
#include "qemu/osdep.h"
12
#include "qapi/error.h"
13 14
#include "hw/virtio/vhost.h"
#include "hw/virtio/vhost-backend.h"
15
#include "hw/virtio/virtio-net.h"
16 17 18 19
#include "sysemu/char.h"
#include "sysemu/kvm.h"
#include "qemu/error-report.h"
#include "qemu/sockets.h"
20
#include "migration/migration.h"
21 22 23 24 25 26 27

#include <sys/ioctl.h>
#include <sys/socket.h>
#include <sys/un.h>
#include <linux/vhost.h>

#define VHOST_MEMORY_MAX_NREGIONS    8
28
#define VHOST_USER_F_PROTOCOL_FEATURES 30
29

30 31 32 33 34 35 36 37 38
enum VhostUserProtocolFeature {
    VHOST_USER_PROTOCOL_F_MQ = 0,
    VHOST_USER_PROTOCOL_F_LOG_SHMFD = 1,
    VHOST_USER_PROTOCOL_F_RARP = 2,

    VHOST_USER_PROTOCOL_F_MAX
};

#define VHOST_USER_PROTOCOL_FEATURE_MASK ((1 << VHOST_USER_PROTOCOL_F_MAX) - 1)
39 40 41 42 43 44

typedef enum VhostUserRequest {
    VHOST_USER_NONE = 0,
    VHOST_USER_GET_FEATURES = 1,
    VHOST_USER_SET_FEATURES = 2,
    VHOST_USER_SET_OWNER = 3,
45
    VHOST_USER_RESET_OWNER = 4,
46 47 48 49 50 51 52 53 54 55
    VHOST_USER_SET_MEM_TABLE = 5,
    VHOST_USER_SET_LOG_BASE = 6,
    VHOST_USER_SET_LOG_FD = 7,
    VHOST_USER_SET_VRING_NUM = 8,
    VHOST_USER_SET_VRING_ADDR = 9,
    VHOST_USER_SET_VRING_BASE = 10,
    VHOST_USER_GET_VRING_BASE = 11,
    VHOST_USER_SET_VRING_KICK = 12,
    VHOST_USER_SET_VRING_CALL = 13,
    VHOST_USER_SET_VRING_ERR = 14,
56 57
    VHOST_USER_GET_PROTOCOL_FEATURES = 15,
    VHOST_USER_SET_PROTOCOL_FEATURES = 16,
58
    VHOST_USER_GET_QUEUE_NUM = 17,
59
    VHOST_USER_SET_VRING_ENABLE = 18,
60
    VHOST_USER_SEND_RARP = 19,
61 62 63 64 65 66 67
    VHOST_USER_MAX
} VhostUserRequest;

typedef struct VhostUserMemoryRegion {
    uint64_t guest_phys_addr;
    uint64_t memory_size;
    uint64_t userspace_addr;
68
    uint64_t mmap_offset;
69 70 71 72 73 74 75 76
} VhostUserMemoryRegion;

typedef struct VhostUserMemory {
    uint32_t nregions;
    uint32_t padding;
    VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS];
} VhostUserMemory;

77 78 79 80 81
typedef struct VhostUserLog {
    uint64_t mmap_size;
    uint64_t mmap_offset;
} VhostUserLog;

82 83 84 85 86 87 88 89 90 91 92 93 94 95
typedef struct VhostUserMsg {
    VhostUserRequest request;

#define VHOST_USER_VERSION_MASK     (0x3)
#define VHOST_USER_REPLY_MASK       (0x1<<2)
    uint32_t flags;
    uint32_t size; /* the following payload size */
    union {
#define VHOST_USER_VRING_IDX_MASK   (0xff)
#define VHOST_USER_VRING_NOFD_MASK  (0x1<<8)
        uint64_t u64;
        struct vhost_vring_state state;
        struct vhost_vring_addr addr;
        VhostUserMemory memory;
96
        VhostUserLog log;
97
    } payload;
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
} QEMU_PACKED VhostUserMsg;

static VhostUserMsg m __attribute__ ((unused));
#define VHOST_USER_HDR_SIZE (sizeof(m.request) \
                            + sizeof(m.flags) \
                            + sizeof(m.size))

#define VHOST_USER_PAYLOAD_SIZE (sizeof(m) - VHOST_USER_HDR_SIZE)

/* The version of the protocol we support */
#define VHOST_USER_VERSION    (0x1)

static bool ioeventfd_enabled(void)
{
    return kvm_enabled() && kvm_eventfds_enabled();
}

static int vhost_user_read(struct vhost_dev *dev, VhostUserMsg *msg)
{
    CharDriverState *chr = dev->opaque;
    uint8_t *p = (uint8_t *) msg;
    int r, size = VHOST_USER_HDR_SIZE;

    r = qemu_chr_fe_read_all(chr, p, size);
    if (r != size) {
123 124
        error_report("Failed to read msg header. Read %d instead of %d."
                     " Original request %d.", r, size, msg->request);
125 126 127 128 129 130
        goto fail;
    }

    /* validate received flags */
    if (msg->flags != (VHOST_USER_REPLY_MASK | VHOST_USER_VERSION)) {
        error_report("Failed to read msg header."
131
                " Flags 0x%x instead of 0x%x.", msg->flags,
132 133 134 135 136 137 138
                VHOST_USER_REPLY_MASK | VHOST_USER_VERSION);
        goto fail;
    }

    /* validate message size is sane */
    if (msg->size > VHOST_USER_PAYLOAD_SIZE) {
        error_report("Failed to read msg header."
139
                " Size %d exceeds the maximum %zu.", msg->size,
140 141 142 143 144 145 146 147 148 149
                VHOST_USER_PAYLOAD_SIZE);
        goto fail;
    }

    if (msg->size) {
        p += VHOST_USER_HDR_SIZE;
        size = msg->size;
        r = qemu_chr_fe_read_all(chr, p, size);
        if (r != size) {
            error_report("Failed to read msg payload."
150
                         " Read %d instead of %d.", r, msg->size);
151 152 153 154 155 156 157 158 159 160
            goto fail;
        }
    }

    return 0;

fail:
    return -1;
}

161 162 163 164
static bool vhost_user_one_time_request(VhostUserRequest request)
{
    switch (request) {
    case VHOST_USER_SET_OWNER:
165
    case VHOST_USER_RESET_OWNER:
166 167 168 169 170 171 172 173 174
    case VHOST_USER_SET_MEM_TABLE:
    case VHOST_USER_GET_QUEUE_NUM:
        return true;
    default:
        return false;
    }
}

/* most non-init callers ignore the error */
175 176 177 178 179 180
static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg,
                            int *fds, int fd_num)
{
    CharDriverState *chr = dev->opaque;
    int size = VHOST_USER_HDR_SIZE + msg->size;

181 182 183 184 185 186 187 188 189
    /*
     * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE,
     * we just need send it once in the first time. For later such
     * request, we just ignore it.
     */
    if (vhost_user_one_time_request(msg->request) && dev->vq_index != 0) {
        return 0;
    }

190
    qemu_chr_fe_set_msgfds(chr, fds, fd_num);
191 192 193 194 195

    return qemu_chr_fe_write_all(chr, (const uint8_t *) msg, size) == size ?
            0 : -1;
}

196 197
static int vhost_user_set_log_base(struct vhost_dev *dev, uint64_t base,
                                   struct vhost_log *log)
198
{
199 200 201 202 203 204 205
    int fds[VHOST_MEMORY_MAX_NREGIONS];
    size_t fd_num = 0;
    bool shmfd = virtio_has_feature(dev->protocol_features,
                                    VHOST_USER_PROTOCOL_F_LOG_SHMFD);
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_LOG_BASE,
        .flags = VHOST_USER_VERSION,
M
Michael S. Tsirkin 已提交
206
        .payload.log.mmap_size = log->size * sizeof(*(log->log)),
207 208
        .payload.log.mmap_offset = 0,
        .size = sizeof(msg.payload.log),
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
    };

    if (shmfd && log->fd != -1) {
        fds[fd_num++] = log->fd;
    }

    vhost_user_write(dev, &msg, fds, fd_num);

    if (shmfd) {
        msg.size = 0;
        if (vhost_user_read(dev, &msg) < 0) {
            return 0;
        }

        if (msg.request != VHOST_USER_SET_LOG_BASE) {
            error_report("Received unexpected msg type. "
                         "Expected %d received %d",
                         VHOST_USER_SET_LOG_BASE, msg.request);
            return -1;
        }
229
    }
230 231

    return 0;
232 233
}

234 235
static int vhost_user_set_mem_table(struct vhost_dev *dev,
                                    struct vhost_memory *mem)
236 237
{
    int fds[VHOST_MEMORY_MAX_NREGIONS];
238
    int i, fd;
239
    size_t fd_num = 0;
240 241 242 243
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_MEM_TABLE,
        .flags = VHOST_USER_VERSION,
    };
244

245 246
    for (i = 0; i < dev->mem->nregions; ++i) {
        struct vhost_memory_region *reg = dev->mem->regions + i;
247
        ram_addr_t offset;
248
        MemoryRegion *mr;
249 250

        assert((uintptr_t)reg->userspace_addr == reg->userspace_addr);
251 252
        mr = memory_region_from_host((void *)(uintptr_t)reg->userspace_addr,
                                     &offset);
253
        fd = memory_region_get_fd(mr);
254
        if (fd > 0) {
255 256 257
            msg.payload.memory.regions[fd_num].userspace_addr = reg->userspace_addr;
            msg.payload.memory.regions[fd_num].memory_size  = reg->memory_size;
            msg.payload.memory.regions[fd_num].guest_phys_addr = reg->guest_phys_addr;
258
            msg.payload.memory.regions[fd_num].mmap_offset = offset;
259 260 261
            assert(fd_num < VHOST_MEMORY_MAX_NREGIONS);
            fds[fd_num++] = fd;
        }
262 263
    }

264
    msg.payload.memory.nregions = fd_num;
265 266 267 268 269

    if (!fd_num) {
        error_report("Failed initializing vhost-user memory map, "
                     "consider using -object memory-backend-file share=on");
        return -1;
270 271
    }

272 273
    msg.size = sizeof(msg.payload.memory.nregions);
    msg.size += sizeof(msg.payload.memory.padding);
274
    msg.size += fd_num * sizeof(VhostUserMemoryRegion);
275

276
    vhost_user_write(dev, &msg, fds, fd_num);
277

278 279
    return 0;
}
280

281 282 283 284 285 286
static int vhost_user_set_vring_addr(struct vhost_dev *dev,
                                     struct vhost_vring_addr *addr)
{
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_VRING_ADDR,
        .flags = VHOST_USER_VERSION,
287
        .payload.addr = *addr,
288
        .size = sizeof(msg.payload.addr),
289
    };
290

291
    vhost_user_write(dev, &msg, NULL, 0);
292

293 294
    return 0;
}
295

296 297 298 299 300 301
static int vhost_user_set_vring_endian(struct vhost_dev *dev,
                                       struct vhost_vring_state *ring)
{
    error_report("vhost-user trying to send unhandled ioctl");
    return -1;
}
302

303 304 305 306 307 308 309
static int vhost_set_vring(struct vhost_dev *dev,
                           unsigned long int request,
                           struct vhost_vring_state *ring)
{
    VhostUserMsg msg = {
        .request = request,
        .flags = VHOST_USER_VERSION,
310
        .payload.state = *ring,
311
        .size = sizeof(msg.payload.state),
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332
    };

    vhost_user_write(dev, &msg, NULL, 0);

    return 0;
}

static int vhost_user_set_vring_num(struct vhost_dev *dev,
                                    struct vhost_vring_state *ring)
{
    return vhost_set_vring(dev, VHOST_USER_SET_VRING_NUM, ring);
}

static int vhost_user_set_vring_base(struct vhost_dev *dev,
                                     struct vhost_vring_state *ring)
{
    return vhost_set_vring(dev, VHOST_USER_SET_VRING_BASE, ring);
}

static int vhost_user_set_vring_enable(struct vhost_dev *dev, int enable)
{
333
    int i;
334

335
    if (!virtio_has_feature(dev->features, VHOST_USER_F_PROTOCOL_FEATURES)) {
336 337 338
        return -1;
    }

339 340 341 342 343 344 345 346
    for (i = 0; i < dev->nvqs; ++i) {
        struct vhost_vring_state state = {
            .index = dev->vq_index + i,
            .num   = enable,
        };

        vhost_set_vring(dev, VHOST_USER_SET_VRING_ENABLE, &state);
    }
347

348 349
    return 0;
}
350 351 352 353 354 355 356

static int vhost_user_get_vring_base(struct vhost_dev *dev,
                                     struct vhost_vring_state *ring)
{
    VhostUserMsg msg = {
        .request = VHOST_USER_GET_VRING_BASE,
        .flags = VHOST_USER_VERSION,
357
        .payload.state = *ring,
358
        .size = sizeof(msg.payload.state),
359 360 361 362 363
    };

    vhost_user_write(dev, &msg, NULL, 0);

    if (vhost_user_read(dev, &msg) < 0) {
364 365 366
        return 0;
    }

367 368 369 370 371
    if (msg.request != VHOST_USER_GET_VRING_BASE) {
        error_report("Received unexpected msg type. Expected %d received %d",
                     VHOST_USER_GET_VRING_BASE, msg.request);
        return -1;
    }
372

373
    if (msg.size != sizeof(msg.payload.state)) {
374 375
        error_report("Received bad msg size.");
        return -1;
376 377
    }

378
    *ring = msg.payload.state;
379

380 381 382
    return 0;
}

383 384 385
static int vhost_set_vring_file(struct vhost_dev *dev,
                                VhostUserRequest request,
                                struct vhost_vring_file *file)
386
{
387 388
    int fds[VHOST_MEMORY_MAX_NREGIONS];
    size_t fd_num = 0;
389
    VhostUserMsg msg = {
390
        .request = request,
391
        .flags = VHOST_USER_VERSION,
392
        .payload.u64 = file->index & VHOST_USER_VRING_IDX_MASK,
393
        .size = sizeof(msg.payload.u64),
394 395
    };

396 397 398
    if (ioeventfd_enabled() && file->fd > 0) {
        fds[fd_num++] = file->fd;
    } else {
399
        msg.payload.u64 |= VHOST_USER_VRING_NOFD_MASK;
400 401 402 403
    }

    vhost_user_write(dev, &msg, fds, fd_num);

404 405
    return 0;
}
406

407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423
static int vhost_user_set_vring_kick(struct vhost_dev *dev,
                                     struct vhost_vring_file *file)
{
    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_KICK, file);
}

static int vhost_user_set_vring_call(struct vhost_dev *dev,
                                     struct vhost_vring_file *file)
{
    return vhost_set_vring_file(dev, VHOST_USER_SET_VRING_CALL, file);
}

static int vhost_user_set_u64(struct vhost_dev *dev, int request, uint64_t u64)
{
    VhostUserMsg msg = {
        .request = request,
        .flags = VHOST_USER_VERSION,
424
        .payload.u64 = u64,
425
        .size = sizeof(msg.payload.u64),
426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
    };

    vhost_user_write(dev, &msg, NULL, 0);

    return 0;
}

static int vhost_user_set_features(struct vhost_dev *dev,
                                   uint64_t features)
{
    return vhost_user_set_u64(dev, VHOST_USER_SET_FEATURES, features);
}

static int vhost_user_set_protocol_features(struct vhost_dev *dev,
                                            uint64_t features)
{
    return vhost_user_set_u64(dev, VHOST_USER_SET_PROTOCOL_FEATURES, features);
}

static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64)
{
    VhostUserMsg msg = {
        .request = request,
        .flags = VHOST_USER_VERSION,
    };

    if (vhost_user_one_time_request(request) && dev->vq_index != 0) {
        return 0;
454
    }
455

456 457 458 459 460 461 462 463 464 465 466 467
    vhost_user_write(dev, &msg, NULL, 0);

    if (vhost_user_read(dev, &msg) < 0) {
        return 0;
    }

    if (msg.request != request) {
        error_report("Received unexpected msg type. Expected %d received %d",
                     request, msg.request);
        return -1;
    }

468
    if (msg.size != sizeof(msg.payload.u64)) {
469 470 471 472
        error_report("Received bad msg size.");
        return -1;
    }

473
    *u64 = msg.payload.u64;
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497

    return 0;
}

static int vhost_user_get_features(struct vhost_dev *dev, uint64_t *features)
{
    return vhost_user_get_u64(dev, VHOST_USER_GET_FEATURES, features);
}

static int vhost_user_set_owner(struct vhost_dev *dev)
{
    VhostUserMsg msg = {
        .request = VHOST_USER_SET_OWNER,
        .flags = VHOST_USER_VERSION,
    };

    vhost_user_write(dev, &msg, NULL, 0);

    return 0;
}

static int vhost_user_reset_device(struct vhost_dev *dev)
{
    VhostUserMsg msg = {
498
        .request = VHOST_USER_RESET_OWNER,
499 500 501 502 503
        .flags = VHOST_USER_VERSION,
    };

    vhost_user_write(dev, &msg, NULL, 0);

504 505 506
    return 0;
}

507 508
static int vhost_user_init(struct vhost_dev *dev, void *opaque)
{
509
    uint64_t features;
510 511
    int err;

512 513 514 515
    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    dev->opaque = opaque;

516
    err = vhost_user_get_features(dev, &features);
517 518 519 520 521 522 523
    if (err < 0) {
        return err;
    }

    if (virtio_has_feature(features, VHOST_USER_F_PROTOCOL_FEATURES)) {
        dev->backend_features |= 1ULL << VHOST_USER_F_PROTOCOL_FEATURES;

524 525
        err = vhost_user_get_u64(dev, VHOST_USER_GET_PROTOCOL_FEATURES,
                                 &features);
526 527 528 529 530
        if (err < 0) {
            return err;
        }

        dev->protocol_features = features & VHOST_USER_PROTOCOL_FEATURE_MASK;
531
        err = vhost_user_set_protocol_features(dev, dev->protocol_features);
532 533 534
        if (err < 0) {
            return err;
        }
535 536 537

        /* query the max queues we support if backend supports Multiple Queue */
        if (dev->protocol_features & (1ULL << VHOST_USER_PROTOCOL_F_MQ)) {
538 539
            err = vhost_user_get_u64(dev, VHOST_USER_GET_QUEUE_NUM,
                                     &dev->max_queues);
540 541 542 543
            if (err < 0) {
                return err;
            }
        }
544 545
    }

546 547 548 549 550 551 552 553
    if (dev->migration_blocker == NULL &&
        !virtio_has_feature(dev->protocol_features,
                            VHOST_USER_PROTOCOL_F_LOG_SHMFD)) {
        error_setg(&dev->migration_blocker,
                   "Migration disabled: vhost-user backend lacks "
                   "VHOST_USER_PROTOCOL_F_LOG_SHMFD feature.");
    }

554 555 556 557 558 559 560 561 562 563 564 565
    return 0;
}

static int vhost_user_cleanup(struct vhost_dev *dev)
{
    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    dev->opaque = 0;

    return 0;
}

566 567 568 569 570 571 572
static int vhost_user_get_vq_index(struct vhost_dev *dev, int idx)
{
    assert(idx >= dev->vq_index && idx < dev->vq_index + dev->nvqs);

    return idx;
}

573 574 575 576 577
static int vhost_user_memslots_limit(struct vhost_dev *dev)
{
    return VHOST_MEMORY_MAX_NREGIONS;
}

578 579 580 581 582 583 584 585
static bool vhost_user_requires_shm_log(struct vhost_dev *dev)
{
    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    return virtio_has_feature(dev->protocol_features,
                              VHOST_USER_PROTOCOL_F_LOG_SHMFD);
}

586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602
static int vhost_user_migration_done(struct vhost_dev *dev, char* mac_addr)
{
    VhostUserMsg msg = { 0 };
    int err;

    assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_USER);

    /* If guest supports GUEST_ANNOUNCE do nothing */
    if (virtio_has_feature(dev->acked_features, VIRTIO_NET_F_GUEST_ANNOUNCE)) {
        return 0;
    }

    /* if backend supports VHOST_USER_PROTOCOL_F_RARP ask it to send the RARP */
    if (virtio_has_feature(dev->protocol_features,
                           VHOST_USER_PROTOCOL_F_RARP)) {
        msg.request = VHOST_USER_SEND_RARP;
        msg.flags = VHOST_USER_VERSION;
603
        memcpy((char *)&msg.payload.u64, mac_addr, 6);
604
        msg.size = sizeof(msg.payload.u64);
605 606 607 608 609 610 611

        err = vhost_user_write(dev, &msg, NULL, 0);
        return err;
    }
    return -1;
}

612 613 614 615
static bool vhost_user_can_merge(struct vhost_dev *dev,
                                 uint64_t start1, uint64_t size1,
                                 uint64_t start2, uint64_t size2)
{
616
    ram_addr_t offset;
617 618 619
    int mfd, rfd;
    MemoryRegion *mr;

620
    mr = memory_region_from_host((void *)(uintptr_t)start1, &offset);
621
    mfd = memory_region_get_fd(mr);
622

623
    mr = memory_region_from_host((void *)(uintptr_t)start2, &offset);
624
    rfd = memory_region_get_fd(mr);
625 626 627 628

    return mfd == rfd;
}

629 630 631
const VhostOps user_ops = {
        .backend_type = VHOST_BACKEND_TYPE_USER,
        .vhost_backend_init = vhost_user_init,
632
        .vhost_backend_cleanup = vhost_user_cleanup,
633
        .vhost_backend_memslots_limit = vhost_user_memslots_limit,
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648
        .vhost_set_log_base = vhost_user_set_log_base,
        .vhost_set_mem_table = vhost_user_set_mem_table,
        .vhost_set_vring_addr = vhost_user_set_vring_addr,
        .vhost_set_vring_endian = vhost_user_set_vring_endian,
        .vhost_set_vring_num = vhost_user_set_vring_num,
        .vhost_set_vring_base = vhost_user_set_vring_base,
        .vhost_get_vring_base = vhost_user_get_vring_base,
        .vhost_set_vring_kick = vhost_user_set_vring_kick,
        .vhost_set_vring_call = vhost_user_set_vring_call,
        .vhost_set_features = vhost_user_set_features,
        .vhost_get_features = vhost_user_get_features,
        .vhost_set_owner = vhost_user_set_owner,
        .vhost_reset_device = vhost_user_reset_device,
        .vhost_get_vq_index = vhost_user_get_vq_index,
        .vhost_set_vring_enable = vhost_user_set_vring_enable,
649
        .vhost_requires_shm_log = vhost_user_requires_shm_log,
650
        .vhost_migration_done = vhost_user_migration_done,
651
        .vhost_backend_can_merge = vhost_user_can_merge,
652
};