server.c 25.6 KB
Newer Older
1
/*
B
bellard 已提交
2 3
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
 *
F
Fam Zheng 已提交
4
 *  Network Block Device Server Side
B
bellard 已提交
5 6 7 8 9 10 11 12 13 14 15
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; under version 2 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
16
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
17
 */
B
bellard 已提交
18

F
Fam Zheng 已提交
19
#include "nbd-internal.h"
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43

static int system_errno_to_nbd_errno(int err)
{
    switch (err) {
    case 0:
        return NBD_SUCCESS;
    case EPERM:
        return NBD_EPERM;
    case EIO:
        return NBD_EIO;
    case ENOMEM:
        return NBD_ENOMEM;
#ifdef EDQUOT
    case EDQUOT:
#endif
    case EFBIG:
    case ENOSPC:
        return NBD_ENOSPC;
    case EINVAL:
    default:
        return NBD_EINVAL;
    }
}

44 45 46 47 48 49 50 51 52 53 54
/* Definitions for opaque data types */

typedef struct NBDRequest NBDRequest;

struct NBDRequest {
    QSIMPLEQ_ENTRY(NBDRequest) entry;
    NBDClient *client;
    uint8_t *data;
};

struct NBDExport {
55
    int refcount;
56 57
    void (*close)(NBDExport *exp);

M
Max Reitz 已提交
58
    BlockBackend *blk;
P
Paolo Bonzini 已提交
59
    char *name;
60 61 62
    off_t dev_offset;
    off_t size;
    uint32_t nbdflags;
63
    QTAILQ_HEAD(, NBDClient) clients;
P
Paolo Bonzini 已提交
64
    QTAILQ_ENTRY(NBDExport) next;
M
Max Reitz 已提交
65 66

    AioContext *ctx;
67 68
};

P
Paolo Bonzini 已提交
69 70
static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);

71 72 73 74 75 76 77 78 79 80 81 82
struct NBDClient {
    int refcount;
    void (*close)(NBDClient *client);

    NBDExport *exp;
    int sock;

    Coroutine *recv_coroutine;

    CoMutex send_lock;
    Coroutine *send_coroutine;

M
Max Reitz 已提交
83 84
    bool can_read;

85
    QTAILQ_ENTRY(NBDClient) next;
86
    int nb_requests;
87
    bool closing;
88 89
};

B
bellard 已提交
90 91
/* That's all folks */

M
Max Reitz 已提交
92 93 94 95
static void nbd_set_handlers(NBDClient *client);
static void nbd_unset_handlers(NBDClient *client);
static void nbd_update_can_read(NBDClient *client);

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115
static ssize_t drop_sync(int fd, size_t size)
{
    ssize_t ret, dropped = size;
    uint8_t *buffer = g_malloc(MIN(65536, size));

    while (size > 0) {
        ret = read_sync(fd, buffer, MIN(65536, size));
        if (ret < 0) {
            g_free(buffer);
            return ret;
        }

        assert(ret <= size);
        size -= ret;
    }

    g_free(buffer);
    return dropped;
}

116
/* Basic flow for negotiation
B
bellard 已提交
117 118 119

   Server         Client
   Negotiate
120 121 122 123 124 125 126 127 128 129 130 131 132

   or

   Server         Client
   Negotiate #1
                  Option
   Negotiate #2

   ----

   followed by

   Server         Client
B
bellard 已提交
133 134 135 136 137 138 139
                  Request
   Response
                  Request
   Response
                  ...
   ...
                  Request (type == 2)
140

B
bellard 已提交
141 142
*/

143
static int nbd_send_rep(int csock, uint32_t type, uint32_t opt)
144 145
{
    uint64_t magic;
146
    uint32_t len;
147

148 149 150 151
    magic = cpu_to_be64(NBD_REP_MAGIC);
    if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
        LOG("write failed (rep magic)");
        return -EINVAL;
152
    }
153 154 155 156
    opt = cpu_to_be32(opt);
    if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
        LOG("write failed (rep opt)");
        return -EINVAL;
157
    }
158 159 160 161
    type = cpu_to_be32(type);
    if (write_sync(csock, &type, sizeof(type)) != sizeof(type)) {
        LOG("write failed (rep type)");
        return -EINVAL;
162
    }
163 164 165 166
    len = cpu_to_be32(0);
    if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) {
        LOG("write failed (rep data length)");
        return -EINVAL;
167
    }
168 169
    return 0;
}
170

171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215
static int nbd_send_rep_list(int csock, NBDExport *exp)
{
    uint64_t magic, name_len;
    uint32_t opt, type, len;

    name_len = strlen(exp->name);
    magic = cpu_to_be64(NBD_REP_MAGIC);
    if (write_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
        LOG("write failed (magic)");
        return -EINVAL;
     }
    opt = cpu_to_be32(NBD_OPT_LIST);
    if (write_sync(csock, &opt, sizeof(opt)) != sizeof(opt)) {
        LOG("write failed (opt)");
        return -EINVAL;
    }
    type = cpu_to_be32(NBD_REP_SERVER);
    if (write_sync(csock, &type, sizeof(type)) != sizeof(type)) {
        LOG("write failed (reply type)");
        return -EINVAL;
    }
    len = cpu_to_be32(name_len + sizeof(len));
    if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) {
        LOG("write failed (length)");
        return -EINVAL;
    }
    len = cpu_to_be32(name_len);
    if (write_sync(csock, &len, sizeof(len)) != sizeof(len)) {
        LOG("write failed (length)");
        return -EINVAL;
    }
    if (write_sync(csock, exp->name, name_len) != name_len) {
        LOG("write failed (buffer)");
        return -EINVAL;
    }
    return 0;
}

static int nbd_handle_list(NBDClient *client, uint32_t length)
{
    int csock;
    NBDExport *exp;

    csock = client->sock;
    if (length) {
216 217 218
        if (drop_sync(csock, length) != length) {
            return -EIO;
        }
219 220 221 222 223 224 225 226 227 228 229 230 231
        return nbd_send_rep(csock, NBD_REP_ERR_INVALID, NBD_OPT_LIST);
    }

    /* For each export, send a NBD_REP_SERVER reply. */
    QTAILQ_FOREACH(exp, &exports, next) {
        if (nbd_send_rep_list(csock, exp)) {
            return -EINVAL;
        }
    }
    /* Finish with a NBD_REP_ACK. */
    return nbd_send_rep(csock, NBD_REP_ACK, NBD_OPT_LIST);
}

232 233 234 235
static int nbd_handle_export_name(NBDClient *client, uint32_t length)
{
    int rc = -EINVAL, csock = client->sock;
    char name[256];
236

237 238 239
    /* Client sends:
        [20 ..  xx]   export name (length bytes)
     */
240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
    TRACE("Checking length");
    if (length > 255) {
        LOG("Bad length received");
        goto fail;
    }
    if (read_sync(csock, name, length) != length) {
        LOG("read failed");
        goto fail;
    }
    name[length] = '\0';

    client->exp = nbd_export_find(name);
    if (!client->exp) {
        LOG("export not found");
        goto fail;
    }

    QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
    nbd_export_get(client->exp);
    rc = 0;
fail:
    return rc;
}

264 265
static int nbd_receive_options(NBDClient *client)
{
M
Max Reitz 已提交
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293
    int csock = client->sock;
    uint32_t flags;

    /* Client sends:
        [ 0 ..   3]   client flags

        [ 0 ..   7]   NBD_OPTS_MAGIC
        [ 8 ..  11]   NBD option
        [12 ..  15]   Data length
        ...           Rest of request

        [ 0 ..   7]   NBD_OPTS_MAGIC
        [ 8 ..  11]   Second NBD option
        [12 ..  15]   Data length
        ...           Rest of request
    */

    if (read_sync(csock, &flags, sizeof(flags)) != sizeof(flags)) {
        LOG("read failed");
        return -EIO;
    }
    TRACE("Checking client flags");
    be32_to_cpus(&flags);
    if (flags != 0 && flags != NBD_FLAG_C_FIXED_NEWSTYLE) {
        LOG("Bad client flags received");
        return -EIO;
    }

294
    while (1) {
M
Max Reitz 已提交
295
        int ret;
296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321
        uint32_t tmp, length;
        uint64_t magic;

        if (read_sync(csock, &magic, sizeof(magic)) != sizeof(magic)) {
            LOG("read failed");
            return -EINVAL;
        }
        TRACE("Checking opts magic");
        if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
            LOG("Bad magic received");
            return -EINVAL;
        }

        if (read_sync(csock, &tmp, sizeof(tmp)) != sizeof(tmp)) {
            LOG("read failed");
            return -EINVAL;
        }

        if (read_sync(csock, &length, sizeof(length)) != sizeof(length)) {
            LOG("read failed");
            return -EINVAL;
        }
        length = be32_to_cpu(length);

        TRACE("Checking option");
        switch (be32_to_cpu(tmp)) {
322
        case NBD_OPT_LIST:
323 324 325
            ret = nbd_handle_list(client, length);
            if (ret < 0) {
                return ret;
326 327 328
            }
            break;

329 330 331 332 333 334 335 336 337 338 339 340 341 342 343
        case NBD_OPT_ABORT:
            return -EINVAL;

        case NBD_OPT_EXPORT_NAME:
            return nbd_handle_export_name(client, length);

        default:
            tmp = be32_to_cpu(tmp);
            LOG("Unsupported option 0x%x", tmp);
            nbd_send_rep(client->sock, NBD_REP_ERR_UNSUP, tmp);
            return -EINVAL;
        }
    }
}

344
static int nbd_send_negotiate(NBDClient *client)
B
bellard 已提交
345
{
346
    int csock = client->sock;
N
Nick Thomas 已提交
347
    char buf[8 + 8 + 8 + 128];
348
    int rc;
349 350
    const int myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
                         NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA);
N
Nick Thomas 已提交
351

352 353 354
    /* Negotiation header without options:
        [ 0 ..   7]   passwd       ("NBDMAGIC")
        [ 8 ..  15]   magic        (NBD_CLIENT_MAGIC)
N
Nick Thomas 已提交
355
        [16 ..  23]   size
356
        [24 ..  25]   server flags (0)
H
Hani Benhabiles 已提交
357
        [26 ..  27]   export flags
358 359 360 361 362 363 364 365 366 367 368
        [28 .. 151]   reserved     (0)

       Negotiation header with options, part 1:
        [ 0 ..   7]   passwd       ("NBDMAGIC")
        [ 8 ..  15]   magic        (NBD_OPTS_MAGIC)
        [16 ..  17]   server flags (0)

       part 2 (after options are sent):
        [18 ..  25]   size
        [26 ..  27]   export flags
        [28 .. 151]   reserved     (0)
N
Nick Thomas 已提交
369 370
     */

371
    qemu_set_block(csock);
372 373
    rc = -EINVAL;

N
Nick Thomas 已提交
374
    TRACE("Beginning negotiation.");
375
    memset(buf, 0, sizeof(buf));
N
Nick Thomas 已提交
376
    memcpy(buf, "NBDMAGIC", 8);
377 378 379 380 381 382 383
    if (client->exp) {
        assert ((client->exp->nbdflags & ~65535) == 0);
        cpu_to_be64w((uint64_t*)(buf + 8), NBD_CLIENT_MAGIC);
        cpu_to_be64w((uint64_t*)(buf + 16), client->exp->size);
        cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags);
    } else {
        cpu_to_be64w((uint64_t*)(buf + 8), NBD_OPTS_MAGIC);
384
        cpu_to_be16w((uint16_t *)(buf + 16), NBD_FLAG_FIXED_NEWSTYLE);
385
    }
N
Nick Thomas 已提交
386

387 388 389 390 391 392 393 394 395 396 397
    if (client->exp) {
        if (write_sync(csock, buf, sizeof(buf)) != sizeof(buf)) {
            LOG("write failed");
            goto fail;
        }
    } else {
        if (write_sync(csock, buf, 18) != 18) {
            LOG("write failed");
            goto fail;
        }
        rc = nbd_receive_options(client);
398
        if (rc != 0) {
399 400 401 402 403 404 405 406 407 408 409
            LOG("option negotiation failed");
            goto fail;
        }

        assert ((client->exp->nbdflags & ~65535) == 0);
        cpu_to_be64w((uint64_t*)(buf + 18), client->exp->size);
        cpu_to_be16w((uint16_t*)(buf + 26), client->exp->nbdflags | myflags);
        if (write_sync(csock, buf + 18, sizeof(buf) - 18) != sizeof(buf) - 18) {
            LOG("write failed");
            goto fail;
        }
N
Nick Thomas 已提交
410 411
    }

D
Dong Xu Wang 已提交
412
    TRACE("Negotiation succeeded.");
413 414
    rc = 0;
fail:
415
    qemu_set_nonblock(csock);
416
    return rc;
B
bellard 已提交
417 418
}

P
Paolo Bonzini 已提交
419
#ifdef __linux__
B
bellard 已提交
420 421 422

int nbd_disconnect(int fd)
{
N
Nick Thomas 已提交
423 424 425 426
    ioctl(fd, NBD_CLEAR_QUE);
    ioctl(fd, NBD_DISCONNECT);
    ioctl(fd, NBD_CLEAR_SOCK);
    return 0;
B
bellard 已提交
427 428
}

429 430 431 432
#else

int nbd_disconnect(int fd)
{
433
    return -ENOTSUP;
434 435
}
#endif
B
bellard 已提交
436

P
Paolo Bonzini 已提交
437
static ssize_t nbd_receive_request(int csock, struct nbd_request *request)
438
{
P
Paolo Bonzini 已提交
439
    uint8_t buf[NBD_REQUEST_SIZE];
N
Nick Thomas 已提交
440
    uint32_t magic;
441
    ssize_t ret;
N
Nick Thomas 已提交
442

443 444 445 446 447 448
    ret = read_sync(csock, buf, sizeof(buf));
    if (ret < 0) {
        return ret;
    }

    if (ret != sizeof(buf)) {
N
Nick Thomas 已提交
449
        LOG("read failed");
450
        return -EINVAL;
N
Nick Thomas 已提交
451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472
    }

    /* Request
       [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
       [ 4 ..  7]   type    (0 == READ, 1 == WRITE)
       [ 8 .. 15]   handle
       [16 .. 23]   from
       [24 .. 27]   len
     */

    magic = be32_to_cpup((uint32_t*)buf);
    request->type  = be32_to_cpup((uint32_t*)(buf + 4));
    request->handle = be64_to_cpup((uint64_t*)(buf + 8));
    request->from  = be64_to_cpup((uint64_t*)(buf + 16));
    request->len   = be32_to_cpup((uint32_t*)(buf + 24));

    TRACE("Got request: "
          "{ magic = 0x%x, .type = %d, from = %" PRIu64" , len = %u }",
          magic, request->type, request->from, request->len);

    if (magic != NBD_REQUEST_MAGIC) {
        LOG("invalid magic (got 0x%x)", magic);
473
        return -EINVAL;
N
Nick Thomas 已提交
474 475
    }
    return 0;
476 477
}

P
Paolo Bonzini 已提交
478
static ssize_t nbd_send_reply(int csock, struct nbd_reply *reply)
479
{
P
Paolo Bonzini 已提交
480
    uint8_t buf[NBD_REPLY_SIZE];
481
    ssize_t ret;
N
Nick Thomas 已提交
482

483 484
    reply->error = system_errno_to_nbd_errno(reply->error);

N
Nick Thomas 已提交
485 486 487 488 489 490 491 492 493 494 495
    /* Reply
       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
       [ 4 ..  7]    error   (0 == no error)
       [ 7 .. 15]    handle
     */
    cpu_to_be32w((uint32_t*)buf, NBD_REPLY_MAGIC);
    cpu_to_be32w((uint32_t*)(buf + 4), reply->error);
    cpu_to_be64w((uint64_t*)(buf + 8), reply->handle);

    TRACE("Sending response to client");

496 497 498 499 500 501
    ret = write_sync(csock, buf, sizeof(buf));
    if (ret < 0) {
        return ret;
    }

    if (ret != sizeof(buf)) {
N
Nick Thomas 已提交
502
        LOG("writing to socket failed");
503
        return -EINVAL;
N
Nick Thomas 已提交
504 505
    }
    return 0;
506
}
B
bellard 已提交
507

P
Paolo Bonzini 已提交
508 509
#define MAX_NBD_REQUESTS 16

510
void nbd_client_get(NBDClient *client)
511 512 513 514
{
    client->refcount++;
}

515
void nbd_client_put(NBDClient *client)
516 517
{
    if (--client->refcount == 0) {
518
        /* The last reference should be dropped by client->close,
M
Max Reitz 已提交
519
         * which is called by client_close.
520 521 522
         */
        assert(client->closing);

M
Max Reitz 已提交
523
        nbd_unset_handlers(client);
524 525
        close(client->sock);
        client->sock = -1;
526 527 528 529
        if (client->exp) {
            QTAILQ_REMOVE(&client->exp->clients, client, next);
            nbd_export_put(client->exp);
        }
530 531 532 533
        g_free(client);
    }
}

M
Max Reitz 已提交
534
static void client_close(NBDClient *client)
535
{
536 537 538 539 540 541 542 543 544 545 546 547
    if (client->closing) {
        return;
    }

    client->closing = true;

    /* Force requests to finish.  They will drop their own references,
     * then we'll close the socket and free the NBDClient.
     */
    shutdown(client->sock, 2);

    /* Also tell the client, so that they release their reference.  */
548 549 550 551 552
    if (client->close) {
        client->close(client);
    }
}

553
static NBDRequest *nbd_request_get(NBDClient *client)
P
Paolo Bonzini 已提交
554 555
{
    NBDRequest *req;
556

P
Paolo Bonzini 已提交
557 558
    assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
    client->nb_requests++;
M
Max Reitz 已提交
559
    nbd_update_can_read(client);
P
Paolo Bonzini 已提交
560

561
    req = g_new0(NBDRequest, 1);
562 563
    nbd_client_get(client);
    req->client = client;
P
Paolo Bonzini 已提交
564 565 566
    return req;
}

567
static void nbd_request_put(NBDRequest *req)
P
Paolo Bonzini 已提交
568
{
569
    NBDClient *client = req->client;
570

571 572 573
    if (req->data) {
        qemu_vfree(req->data);
    }
574
    g_free(req);
575

M
Max Reitz 已提交
576 577
    client->nb_requests--;
    nbd_update_can_read(client);
578
    nbd_client_put(client);
P
Paolo Bonzini 已提交
579 580
}

M
Max Reitz 已提交
581
static void blk_aio_attached(AioContext *ctx, void *opaque)
M
Max Reitz 已提交
582 583 584 585 586 587 588 589 590 591 592 593 594
{
    NBDExport *exp = opaque;
    NBDClient *client;

    TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx);

    exp->ctx = ctx;

    QTAILQ_FOREACH(client, &exp->clients, next) {
        nbd_set_handlers(client);
    }
}

M
Max Reitz 已提交
595
static void blk_aio_detach(void *opaque)
M
Max Reitz 已提交
596 597 598 599 600 601 602 603 604 605 606 607 608
{
    NBDExport *exp = opaque;
    NBDClient *client;

    TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);

    QTAILQ_FOREACH(client, &exp->clients, next) {
        nbd_unset_handlers(client);
    }

    exp->ctx = NULL;
}

609
NBDExport *nbd_export_new(BlockBackend *blk, off_t dev_offset, off_t size,
M
Max Reitz 已提交
610 611
                          uint32_t nbdflags, void (*close)(NBDExport *),
                          Error **errp)
P
Paolo Bonzini 已提交
612 613
{
    NBDExport *exp = g_malloc0(sizeof(NBDExport));
614
    exp->refcount = 1;
615
    QTAILQ_INIT(&exp->clients);
M
Max Reitz 已提交
616
    exp->blk = blk;
P
Paolo Bonzini 已提交
617 618
    exp->dev_offset = dev_offset;
    exp->nbdflags = nbdflags;
M
Max Reitz 已提交
619 620 621 622 623 624 625 626
    exp->size = size < 0 ? blk_getlength(blk) : size;
    if (exp->size < 0) {
        error_setg_errno(errp, -exp->size,
                         "Failed to determine the NBD export's length");
        goto fail;
    }
    exp->size -= exp->size % BDRV_SECTOR_SIZE;

627
    exp->close = close;
M
Max Reitz 已提交
628 629 630
    exp->ctx = blk_get_aio_context(blk);
    blk_ref(blk);
    blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
631 632 633 634 635
    /*
     * NBD exports are used for non-shared storage migration.  Make sure
     * that BDRV_O_INCOMING is cleared and the image is ready for write
     * access since the export could be available before migration handover.
     */
M
Max Reitz 已提交
636
    blk_invalidate_cache(blk, NULL);
P
Paolo Bonzini 已提交
637
    return exp;
M
Max Reitz 已提交
638 639 640 641

fail:
    g_free(exp);
    return NULL;
P
Paolo Bonzini 已提交
642 643
}

P
Paolo Bonzini 已提交
644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676
NBDExport *nbd_export_find(const char *name)
{
    NBDExport *exp;
    QTAILQ_FOREACH(exp, &exports, next) {
        if (strcmp(name, exp->name) == 0) {
            return exp;
        }
    }

    return NULL;
}

void nbd_export_set_name(NBDExport *exp, const char *name)
{
    if (exp->name == name) {
        return;
    }

    nbd_export_get(exp);
    if (exp->name != NULL) {
        g_free(exp->name);
        exp->name = NULL;
        QTAILQ_REMOVE(&exports, exp, next);
        nbd_export_put(exp);
    }
    if (name != NULL) {
        nbd_export_get(exp);
        exp->name = g_strdup(name);
        QTAILQ_INSERT_TAIL(&exports, exp, next);
    }
    nbd_export_put(exp);
}

P
Paolo Bonzini 已提交
677 678
void nbd_export_close(NBDExport *exp)
{
679
    NBDClient *client, *next;
680

681 682
    nbd_export_get(exp);
    QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
M
Max Reitz 已提交
683
        client_close(client);
684
    }
P
Paolo Bonzini 已提交
685
    nbd_export_set_name(exp, NULL);
686
    nbd_export_put(exp);
687 688 689 690 691 692 693 694 695 696 697 698 699
}

void nbd_export_get(NBDExport *exp)
{
    assert(exp->refcount > 0);
    exp->refcount++;
}

void nbd_export_put(NBDExport *exp)
{
    assert(exp->refcount > 0);
    if (exp->refcount == 1) {
        nbd_export_close(exp);
P
Paolo Bonzini 已提交
700 701
    }

702
    if (--exp->refcount == 0) {
P
Paolo Bonzini 已提交
703 704
        assert(exp->name == NULL);

705 706 707 708
        if (exp->close) {
            exp->close(exp);
        }

709 710 711 712 713 714 715
        if (exp->blk) {
            blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
                                            blk_aio_detach, exp);
            blk_unref(exp->blk);
            exp->blk = NULL;
        }

716 717
        g_free(exp);
    }
P
Paolo Bonzini 已提交
718 719
}

720
BlockBackend *nbd_export_get_blockdev(NBDExport *exp)
P
Paolo Bonzini 已提交
721
{
M
Max Reitz 已提交
722
    return exp->blk;
P
Paolo Bonzini 已提交
723 724
}

P
Paolo Bonzini 已提交
725 726 727 728 729 730 731 732 733
void nbd_export_close_all(void)
{
    NBDExport *exp, *next;

    QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
        nbd_export_close(exp);
    }
}

P
Paolo Bonzini 已提交
734 735
static ssize_t nbd_co_send_reply(NBDRequest *req, struct nbd_reply *reply,
                                 int len)
736
{
737 738
    NBDClient *client = req->client;
    int csock = client->sock;
P
Paolo Bonzini 已提交
739
    ssize_t rc, ret;
740

P
Paolo Bonzini 已提交
741 742
    qemu_co_mutex_lock(&client->send_lock);
    client->send_coroutine = qemu_coroutine_self();
M
Max Reitz 已提交
743
    nbd_set_handlers(client);
P
Paolo Bonzini 已提交
744

745 746 747 748 749
    if (!len) {
        rc = nbd_send_reply(csock, reply);
    } else {
        socket_set_cork(csock, 1);
        rc = nbd_send_reply(csock, reply);
750
        if (rc >= 0) {
P
Paolo Bonzini 已提交
751
            ret = qemu_co_send(csock, req->data, len);
752
            if (ret != len) {
753
                rc = -EIO;
754 755 756 757
            }
        }
        socket_set_cork(csock, 0);
    }
P
Paolo Bonzini 已提交
758 759

    client->send_coroutine = NULL;
M
Max Reitz 已提交
760
    nbd_set_handlers(client);
P
Paolo Bonzini 已提交
761
    qemu_co_mutex_unlock(&client->send_lock);
762 763 764
    return rc;
}

P
Paolo Bonzini 已提交
765
static ssize_t nbd_co_receive_request(NBDRequest *req, struct nbd_request *request)
766
{
767 768
    NBDClient *client = req->client;
    int csock = client->sock;
769
    uint32_t command;
P
Paolo Bonzini 已提交
770
    ssize_t rc;
771

P
Paolo Bonzini 已提交
772
    client->recv_coroutine = qemu_coroutine_self();
M
Max Reitz 已提交
773 774
    nbd_update_can_read(client);

775 776 777 778 779
    rc = nbd_receive_request(csock, request);
    if (rc < 0) {
        if (rc != -EAGAIN) {
            rc = -EIO;
        }
780 781 782
        goto out;
    }

783
    if (request->len > NBD_MAX_BUFFER_SIZE) {
784
        LOG("len (%u) is larger than max len (%u)",
785
            request->len, NBD_MAX_BUFFER_SIZE);
786 787 788 789 790 791 792 793 794 795 796 797 798
        rc = -EINVAL;
        goto out;
    }

    if ((request->from + request->len) < request->from) {
        LOG("integer overflow detected! "
            "you're probably being attacked");
        rc = -EINVAL;
        goto out;
    }

    TRACE("Decoding type");

799 800
    command = request->type & NBD_CMD_MASK_COMMAND;
    if (command == NBD_CMD_READ || command == NBD_CMD_WRITE) {
M
Max Reitz 已提交
801
        req->data = blk_blockalign(client->exp->blk, request->len);
802 803
    }
    if (command == NBD_CMD_WRITE) {
804 805
        TRACE("Reading %u byte(s)", request->len);

P
Paolo Bonzini 已提交
806
        if (qemu_co_recv(csock, req->data, request->len) != request->len) {
807 808 809 810 811 812 813 814
            LOG("reading from socket failed");
            rc = -EIO;
            goto out;
        }
    }
    rc = 0;

out:
P
Paolo Bonzini 已提交
815
    client->recv_coroutine = NULL;
M
Max Reitz 已提交
816 817
    nbd_update_can_read(client);

818 819 820
    return rc;
}

P
Paolo Bonzini 已提交
821
static void nbd_trip(void *opaque)
822
{
P
Paolo Bonzini 已提交
823
    NBDClient *client = opaque;
824
    NBDExport *exp = client->exp;
825
    NBDRequest *req;
N
Nick Thomas 已提交
826 827
    struct nbd_request request;
    struct nbd_reply reply;
P
Paolo Bonzini 已提交
828
    ssize_t ret;
829
    uint32_t command;
N
Nick Thomas 已提交
830 831

    TRACE("Reading request.");
832 833 834
    if (client->closing) {
        return;
    }
N
Nick Thomas 已提交
835

836
    req = nbd_request_get(client);
P
Paolo Bonzini 已提交
837
    ret = nbd_co_receive_request(req, &request);
838 839 840
    if (ret == -EAGAIN) {
        goto done;
    }
841
    if (ret == -EIO) {
P
Paolo Bonzini 已提交
842
        goto out;
843
    }
N
Nick Thomas 已提交
844

845 846 847
    reply.handle = request.handle;
    reply.error = 0;

848 849 850
    if (ret < 0) {
        reply.error = -ret;
        goto error_reply;
N
Nick Thomas 已提交
851
    }
852 853
    command = request.type & NBD_CMD_MASK_COMMAND;
    if (command != NBD_CMD_DISC && (request.from + request.len) > exp->size) {
N
Nick Thomas 已提交
854 855
            LOG("From: %" PRIu64 ", Len: %u, Size: %" PRIu64
            ", Offset: %" PRIu64 "\n",
P
Paolo Bonzini 已提交
856
                    request.from, request.len,
S
Stefan Weil 已提交
857
                    (uint64_t)exp->size, (uint64_t)exp->dev_offset);
N
Nick Thomas 已提交
858
        LOG("requested operation past EOF--bad client?");
859
        goto invalid_request;
N
Nick Thomas 已提交
860 861
    }

862 863 864 865 866 867 868 869
    if (client->closing) {
        /*
         * The client may be closed when we are blocked in
         * nbd_co_receive_request()
         */
        goto done;
    }

870
    switch (command) {
N
Nick Thomas 已提交
871 872 873
    case NBD_CMD_READ:
        TRACE("Request type is READ");

P
Paolo Bonzini 已提交
874
        if (request.type & NBD_CMD_FLAG_FUA) {
M
Max Reitz 已提交
875
            ret = blk_co_flush(exp->blk);
P
Paolo Bonzini 已提交
876 877 878 879 880 881 882
            if (ret < 0) {
                LOG("flush failed");
                reply.error = -ret;
                goto error_reply;
            }
        }

M
Max Reitz 已提交
883 884 885
        ret = blk_read(exp->blk,
                       (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE,
                       req->data, request.len / BDRV_SECTOR_SIZE);
886
        if (ret < 0) {
N
Nick Thomas 已提交
887
            LOG("reading from file failed");
888
            reply.error = -ret;
889
            goto error_reply;
N
Nick Thomas 已提交
890 891 892
        }

        TRACE("Read %u byte(s)", request.len);
P
Paolo Bonzini 已提交
893
        if (nbd_co_send_reply(req, &reply, request.len) < 0)
P
Paolo Bonzini 已提交
894
            goto out;
N
Nick Thomas 已提交
895 896 897 898
        break;
    case NBD_CMD_WRITE:
        TRACE("Request type is WRITE");

P
Paolo Bonzini 已提交
899
        if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
N
Nick Thomas 已提交
900
            TRACE("Server is read-only, return error");
901 902 903 904 905 906
            reply.error = EROFS;
            goto error_reply;
        }

        TRACE("Writing to device");

M
Max Reitz 已提交
907 908 909
        ret = blk_write(exp->blk,
                        (request.from + exp->dev_offset) / BDRV_SECTOR_SIZE,
                        req->data, request.len / BDRV_SECTOR_SIZE);
910 911 912 913 914
        if (ret < 0) {
            LOG("writing to file failed");
            reply.error = -ret;
            goto error_reply;
        }
N
Nick Thomas 已提交
915

916
        if (request.type & NBD_CMD_FLAG_FUA) {
M
Max Reitz 已提交
917
            ret = blk_co_flush(exp->blk);
918
            if (ret < 0) {
919
                LOG("flush failed");
920
                reply.error = -ret;
921
                goto error_reply;
922
            }
N
Nick Thomas 已提交
923 924
        }

925
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
P
Paolo Bonzini 已提交
926
            goto out;
927
        }
N
Nick Thomas 已提交
928 929 930 931
        break;
    case NBD_CMD_DISC:
        TRACE("Request type is DISCONNECT");
        errno = 0;
P
Paolo Bonzini 已提交
932
        goto out;
P
Paolo Bonzini 已提交
933 934 935
    case NBD_CMD_FLUSH:
        TRACE("Request type is FLUSH");

M
Max Reitz 已提交
936
        ret = blk_co_flush(exp->blk);
P
Paolo Bonzini 已提交
937 938 939 940
        if (ret < 0) {
            LOG("flush failed");
            reply.error = -ret;
        }
941
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
P
Paolo Bonzini 已提交
942
            goto out;
943
        }
P
Paolo Bonzini 已提交
944 945 946
        break;
    case NBD_CMD_TRIM:
        TRACE("Request type is TRIM");
M
Max Reitz 已提交
947 948 949
        ret = blk_co_discard(exp->blk, (request.from + exp->dev_offset)
                                       / BDRV_SECTOR_SIZE,
                             request.len / BDRV_SECTOR_SIZE);
P
Paolo Bonzini 已提交
950 951 952 953
        if (ret < 0) {
            LOG("discard failed");
            reply.error = -ret;
        }
954
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
P
Paolo Bonzini 已提交
955
            goto out;
956
        }
P
Paolo Bonzini 已提交
957
        break;
N
Nick Thomas 已提交
958 959
    default:
        LOG("invalid request type (%u) received", request.type);
960
    invalid_request:
Y
Yik Fang 已提交
961
        reply.error = EINVAL;
962
    error_reply:
963
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
P
Paolo Bonzini 已提交
964
            goto out;
965
        }
966
        break;
N
Nick Thomas 已提交
967 968 969 970
    }

    TRACE("Request/Reply complete");

971
done:
P
Paolo Bonzini 已提交
972 973 974
    nbd_request_put(req);
    return;

P
Paolo Bonzini 已提交
975
out:
976
    nbd_request_put(req);
M
Max Reitz 已提交
977
    client_close(client);
B
bellard 已提交
978
}
P
Paolo Bonzini 已提交
979

980 981 982 983
static void nbd_read(void *opaque)
{
    NBDClient *client = opaque;

P
Paolo Bonzini 已提交
984 985 986 987
    if (client->recv_coroutine) {
        qemu_coroutine_enter(client->recv_coroutine, NULL);
    } else {
        qemu_coroutine_enter(qemu_coroutine_create(nbd_trip), client);
988 989 990
    }
}

P
Paolo Bonzini 已提交
991 992 993 994 995 996 997
static void nbd_restart_write(void *opaque)
{
    NBDClient *client = opaque;

    qemu_coroutine_enter(client->send_coroutine, NULL);
}

M
Max Reitz 已提交
998 999 1000 1001
static void nbd_set_handlers(NBDClient *client)
{
    if (client->exp && client->exp->ctx) {
        aio_set_fd_handler(client->exp->ctx, client->sock,
1002
                           true,
M
Max Reitz 已提交
1003 1004 1005 1006 1007 1008 1009 1010 1011
                           client->can_read ? nbd_read : NULL,
                           client->send_coroutine ? nbd_restart_write : NULL,
                           client);
    }
}

static void nbd_unset_handlers(NBDClient *client)
{
    if (client->exp && client->exp->ctx) {
1012
        aio_set_fd_handler(client->exp->ctx, client->sock,
1013
                           true, NULL, NULL, NULL);
M
Max Reitz 已提交
1014 1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025 1026 1027 1028 1029 1030
    }
}

static void nbd_update_can_read(NBDClient *client)
{
    bool can_read = client->recv_coroutine ||
                    client->nb_requests < MAX_NBD_REQUESTS;

    if (can_read != client->can_read) {
        client->can_read = can_read;
        nbd_set_handlers(client);

        /* There is no need to invoke aio_notify(), since aio_set_fd_handler()
         * in nbd_set_handlers() will have taken care of that */
    }
}

1031
void nbd_client_new(NBDExport *exp, int csock, void (*close_fn)(NBDClient *))
P
Paolo Bonzini 已提交
1032
{
1033 1034 1035 1036 1037
    NBDClient *client;
    client = g_malloc0(sizeof(NBDClient));
    client->refcount = 1;
    client->exp = exp;
    client->sock = csock;
M
Max Reitz 已提交
1038
    client->can_read = true;
1039
    if (nbd_send_negotiate(client)) {
1040 1041 1042
        shutdown(client->sock, 2);
        close_fn(client);
        return;
1043
    }
1044
    client->close = close_fn;
P
Paolo Bonzini 已提交
1045
    qemu_co_mutex_init(&client->send_lock);
M
Max Reitz 已提交
1046
    nbd_set_handlers(client);
1047

1048 1049 1050 1051
    if (exp) {
        QTAILQ_INSERT_TAIL(&exp->clients, client, next);
        nbd_export_get(exp);
    }
P
Paolo Bonzini 已提交
1052
}