server.c 39.0 KB
Newer Older
1
/*
2
 *  Copyright (C) 2016 Red Hat, Inc.
B
bellard 已提交
3 4
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
 *
F
Fam Zheng 已提交
5
 *  Network Block Device Server Side
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; under version 2 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
17
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18
 */
B
bellard 已提交
19

P
Peter Maydell 已提交
20
#include "qemu/osdep.h"
21
#include "qapi/error.h"
F
Fam Zheng 已提交
22
#include "nbd-internal.h"
23 24 25 26 27 28 29

static int system_errno_to_nbd_errno(int err)
{
    switch (err) {
    case 0:
        return NBD_SUCCESS;
    case EPERM:
30
    case EROFS:
31 32 33 34 35 36 37 38 39 40 41
        return NBD_EPERM;
    case EIO:
        return NBD_EIO;
    case ENOMEM:
        return NBD_ENOMEM;
#ifdef EDQUOT
    case EDQUOT:
#endif
    case EFBIG:
    case ENOSPC:
        return NBD_ENOSPC;
42 43
    case ESHUTDOWN:
        return NBD_ESHUTDOWN;
44 45 46 47 48 49
    case EINVAL:
    default:
        return NBD_EINVAL;
    }
}

50 51
/* Definitions for opaque data types */

52
typedef struct NBDRequestData NBDRequestData;
53

54 55
struct NBDRequestData {
    QSIMPLEQ_ENTRY(NBDRequestData) entry;
56 57
    NBDClient *client;
    uint8_t *data;
58
    bool complete;
59 60 61
};

struct NBDExport {
62
    int refcount;
63 64
    void (*close)(NBDExport *exp);

M
Max Reitz 已提交
65
    BlockBackend *blk;
P
Paolo Bonzini 已提交
66
    char *name;
67
    char *description;
68 69
    off_t dev_offset;
    off_t size;
E
Eric Blake 已提交
70
    uint16_t nbdflags;
71
    QTAILQ_HEAD(, NBDClient) clients;
P
Paolo Bonzini 已提交
72
    QTAILQ_ENTRY(NBDExport) next;
M
Max Reitz 已提交
73 74

    AioContext *ctx;
75

76
    BlockBackend *eject_notifier_blk;
77
    Notifier eject_notifier;
78 79
};

P
Paolo Bonzini 已提交
80 81
static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);

82 83
struct NBDClient {
    int refcount;
84
    void (*close_fn)(NBDClient *client, bool negotiated);
85

E
Eric Blake 已提交
86
    bool no_zeroes;
87
    NBDExport *exp;
88 89
    QCryptoTLSCreds *tlscreds;
    char *tlsaclname;
90 91
    QIOChannelSocket *sioc; /* The underlying data channel */
    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
92 93 94 95 96 97

    Coroutine *recv_coroutine;

    CoMutex send_lock;
    Coroutine *send_coroutine;

98
    QTAILQ_ENTRY(NBDClient) next;
99
    int nb_requests;
100
    bool closing;
101 102
};

B
bellard 已提交
103 104
/* That's all folks */

105
static void nbd_client_receive_next_request(NBDClient *client);
M
Max Reitz 已提交
106

107 108 109
static gboolean nbd_negotiate_continue(QIOChannel *ioc,
                                       GIOCondition condition,
                                       void *opaque)
110
{
111
    qemu_coroutine_enter(opaque);
112
    return TRUE;
113 114
}

115
static int nbd_negotiate_read(QIOChannel *ioc, void *buffer, size_t size)
116 117
{
    ssize_t ret;
118
    guint watch;
119 120 121

    assert(qemu_in_coroutine());
    /* Negotiation are always in main loop. */
122 123 124 125 126
    watch = qio_channel_add_watch(ioc,
                                  G_IO_IN,
                                  nbd_negotiate_continue,
                                  qemu_coroutine_self(),
                                  NULL);
127
    ret = nbd_read(ioc, buffer, size, NULL);
128
    g_source_remove(watch);
129 130 131 132
    return ret;

}

133
static int nbd_negotiate_write(QIOChannel *ioc, const void *buffer, size_t size)
134 135
{
    ssize_t ret;
136
    guint watch;
137 138 139

    assert(qemu_in_coroutine());
    /* Negotiation are always in main loop. */
140 141 142 143 144
    watch = qio_channel_add_watch(ioc,
                                  G_IO_OUT,
                                  nbd_negotiate_continue,
                                  qemu_coroutine_self(),
                                  NULL);
145
    ret = nbd_write(ioc, buffer, size, NULL);
146
    g_source_remove(watch);
147 148 149
    return ret;
}

150
static int nbd_negotiate_drop_sync(QIOChannel *ioc, size_t size)
151
{
152
    ssize_t ret;
153 154 155
    uint8_t *buffer = g_malloc(MIN(65536, size));

    while (size > 0) {
156 157
        size_t count = MIN(65536, size);
        ret = nbd_negotiate_read(ioc, buffer, count);
158 159 160 161 162
        if (ret < 0) {
            g_free(buffer);
            return ret;
        }

163
        size -= count;
164 165 166
    }

    g_free(buffer);
167
    return 0;
168 169
}

170
/* Basic flow for negotiation
B
bellard 已提交
171 172 173

   Server         Client
   Negotiate
174 175 176 177 178 179 180 181 182 183 184 185 186

   or

   Server         Client
   Negotiate #1
                  Option
   Negotiate #2

   ----

   followed by

   Server         Client
B
bellard 已提交
187 188 189 190 191 192 193
                  Request
   Response
                  Request
   Response
                  ...
   ...
                  Request (type == 2)
194

B
bellard 已提交
195 196
*/

197 198 199 200
/* Send a reply header, including length, but no payload.
 * Return -errno on error, 0 on success. */
static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
                                      uint32_t opt, uint32_t len)
201 202 203
{
    uint64_t magic;

204 205
    TRACE("Reply opt=%" PRIx32 " type=%" PRIx32 " len=%" PRIu32,
          type, opt, len);
206

207
    magic = cpu_to_be64(NBD_REP_MAGIC);
208
    if (nbd_negotiate_write(ioc, &magic, sizeof(magic)) < 0) {
209 210
        LOG("write failed (rep magic)");
        return -EINVAL;
211
    }
212
    opt = cpu_to_be32(opt);
213
    if (nbd_negotiate_write(ioc, &opt, sizeof(opt)) < 0) {
214 215
        LOG("write failed (rep opt)");
        return -EINVAL;
216
    }
217
    type = cpu_to_be32(type);
218
    if (nbd_negotiate_write(ioc, &type, sizeof(type)) < 0) {
219 220
        LOG("write failed (rep type)");
        return -EINVAL;
221
    }
222
    len = cpu_to_be32(len);
223
    if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) {
224 225
        LOG("write failed (rep data length)");
        return -EINVAL;
226
    }
227 228
    return 0;
}
229

230 231 232 233 234 235 236
/* Send a reply header with default 0 length.
 * Return -errno on error, 0 on success. */
static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt)
{
    return nbd_negotiate_send_rep_len(ioc, type, opt, 0);
}

237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
/* Send an error reply.
 * Return -errno on error, 0 on success. */
static int GCC_FMT_ATTR(4, 5)
nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
                           uint32_t opt, const char *fmt, ...)
{
    va_list va;
    char *msg;
    int ret;
    size_t len;

    va_start(va, fmt);
    msg = g_strdup_vprintf(fmt, va);
    va_end(va);
    len = strlen(msg);
    assert(len < 4096);
    TRACE("sending error message \"%s\"", msg);
    ret = nbd_negotiate_send_rep_len(ioc, type, opt, len);
    if (ret < 0) {
        goto out;
    }
258
    if (nbd_negotiate_write(ioc, msg, len) < 0) {
259 260 261 262 263 264 265 266 267 268
        LOG("write failed (error message)");
        ret = -EIO;
    } else {
        ret = 0;
    }
out:
    g_free(msg);
    return ret;
}

269 270
/* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
 * Return -errno on error, 0 on success. */
271
static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp)
272
{
273
    size_t name_len, desc_len;
274
    uint32_t len;
275 276
    const char *name = exp->name ? exp->name : "";
    const char *desc = exp->description ? exp->description : "";
277
    int rc;
278

279 280 281
    TRACE("Advertising export name '%s' description '%s'", name, desc);
    name_len = strlen(name);
    desc_len = strlen(desc);
282 283 284 285
    len = name_len + desc_len + sizeof(len);
    rc = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len);
    if (rc < 0) {
        return rc;
286
    }
287

288
    len = cpu_to_be32(name_len);
289
    if (nbd_negotiate_write(ioc, &len, sizeof(len)) < 0) {
290 291 292
        LOG("write failed (name length)");
        return -EINVAL;
    }
293
    if (nbd_negotiate_write(ioc, name, name_len) < 0) {
294
        LOG("write failed (name buffer)");
295 296
        return -EINVAL;
    }
297
    if (nbd_negotiate_write(ioc, desc, desc_len) < 0) {
298
        LOG("write failed (description buffer)");
299 300 301 302 303
        return -EINVAL;
    }
    return 0;
}

304 305
/* Process the NBD_OPT_LIST command, with a potential series of replies.
 * Return -errno on error, 0 on success. */
306
static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length)
307 308 309 310
{
    NBDExport *exp;

    if (length) {
311
        if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
312 313
            return -EIO;
        }
314 315 316
        return nbd_negotiate_send_rep_err(client->ioc,
                                          NBD_REP_ERR_INVALID, NBD_OPT_LIST,
                                          "OPT_LIST should not have length");
317 318 319 320
    }

    /* For each export, send a NBD_REP_SERVER reply. */
    QTAILQ_FOREACH(exp, &exports, next) {
321
        if (nbd_negotiate_send_rep_list(client->ioc, exp)) {
322 323 324 325
            return -EINVAL;
        }
    }
    /* Finish with a NBD_REP_ACK. */
326
    return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST);
327 328
}

329
static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length)
330
{
331
    int rc = -EINVAL;
332
    char name[NBD_MAX_NAME_SIZE + 1];
333

334 335 336
    /* Client sends:
        [20 ..  xx]   export name (length bytes)
     */
337
    TRACE("Checking length");
338
    if (length >= sizeof(name)) {
339 340 341
        LOG("Bad length received");
        goto fail;
    }
342
    if (nbd_negotiate_read(client->ioc, name, length) < 0) {
343 344 345 346 347
        LOG("read failed");
        goto fail;
    }
    name[length] = '\0';

348 349
    TRACE("Client requested export '%s'", name);

350 351 352 353 354 355 356 357 358 359 360 361 362
    client->exp = nbd_export_find(name);
    if (!client->exp) {
        LOG("export not found");
        goto fail;
    }

    QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
    nbd_export_get(client->exp);
    rc = 0;
fail:
    return rc;
}

363 364
/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
 * new channel for all further (now-encrypted) communication. */
365 366 367 368 369 370 371 372 373 374
static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
                                                 uint32_t length)
{
    QIOChannel *ioc;
    QIOChannelTLS *tioc;
    struct NBDTLSHandshakeData data = { 0 };

    TRACE("Setting up TLS");
    ioc = client->ioc;
    if (length) {
375
        if (nbd_negotiate_drop_sync(ioc, length) < 0) {
376 377
            return NULL;
        }
378 379
        nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
                                   "OPT_STARTTLS should not have length");
380 381 382
        return NULL;
    }

383 384 385 386
    if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
                               NBD_OPT_STARTTLS) < 0) {
        return NULL;
    }
387 388 389 390 391 392 393 394 395

    tioc = qio_channel_tls_new_server(ioc,
                                      client->tlscreds,
                                      client->tlsaclname,
                                      NULL);
    if (!tioc) {
        return NULL;
    }

396
    qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417
    TRACE("Starting TLS handshake");
    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
    qio_channel_tls_handshake(tioc,
                              nbd_tls_handshake,
                              &data,
                              NULL);

    if (!data.complete) {
        g_main_loop_run(data.loop);
    }
    g_main_loop_unref(data.loop);
    if (data.error) {
        object_unref(OBJECT(tioc));
        error_free(data.error);
        return NULL;
    }

    return QIO_CHANNEL(tioc);
}


418 419
/* Process all NBD_OPT_* client option commands.
 * Return -errno on error, 0 on success. */
420
static int nbd_negotiate_options(NBDClient *client)
421
{
M
Max Reitz 已提交
422
    uint32_t flags;
423
    bool fixedNewstyle = false;
M
Max Reitz 已提交
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438

    /* Client sends:
        [ 0 ..   3]   client flags

        [ 0 ..   7]   NBD_OPTS_MAGIC
        [ 8 ..  11]   NBD option
        [12 ..  15]   Data length
        ...           Rest of request

        [ 0 ..   7]   NBD_OPTS_MAGIC
        [ 8 ..  11]   Second NBD option
        [12 ..  15]   Data length
        ...           Rest of request
    */

439
    if (nbd_negotiate_read(client->ioc, &flags, sizeof(flags)) < 0) {
M
Max Reitz 已提交
440 441 442 443 444
        LOG("read failed");
        return -EIO;
    }
    TRACE("Checking client flags");
    be32_to_cpus(&flags);
445
    if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
446
        TRACE("Client supports fixed newstyle handshake");
447 448 449
        fixedNewstyle = true;
        flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
    }
E
Eric Blake 已提交
450 451 452 453 454
    if (flags & NBD_FLAG_C_NO_ZEROES) {
        TRACE("Client supports no zeroes at handshake end");
        client->no_zeroes = true;
        flags &= ~NBD_FLAG_C_NO_ZEROES;
    }
455
    if (flags != 0) {
456
        TRACE("Unknown client flags 0x%" PRIx32 " received", flags);
M
Max Reitz 已提交
457 458 459
        return -EIO;
    }

460
    while (1) {
M
Max Reitz 已提交
461
        int ret;
462
        uint32_t clientflags, length;
463 464
        uint64_t magic;

465
        if (nbd_negotiate_read(client->ioc, &magic, sizeof(magic)) < 0) {
466 467 468 469 470 471 472 473 474
            LOG("read failed");
            return -EINVAL;
        }
        TRACE("Checking opts magic");
        if (magic != be64_to_cpu(NBD_OPTS_MAGIC)) {
            LOG("Bad magic received");
            return -EINVAL;
        }

475
        if (nbd_negotiate_read(client->ioc, &clientflags,
476 477
                               sizeof(clientflags)) < 0)
        {
478 479 480
            LOG("read failed");
            return -EINVAL;
        }
481
        clientflags = be32_to_cpu(clientflags);
482

483
        if (nbd_negotiate_read(client->ioc, &length, sizeof(length)) < 0) {
484 485 486 487 488
            LOG("read failed");
            return -EINVAL;
        }
        length = be32_to_cpu(length);

489
        TRACE("Checking option 0x%" PRIx32, clientflags);
490 491 492 493
        if (client->tlscreds &&
            client->ioc == (QIOChannel *)client->sioc) {
            QIOChannel *tioc;
            if (!fixedNewstyle) {
494
                TRACE("Unsupported option 0x%" PRIx32, clientflags);
495 496 497 498 499 500 501 502 503 504 505 506
                return -EINVAL;
            }
            switch (clientflags) {
            case NBD_OPT_STARTTLS:
                tioc = nbd_negotiate_handle_starttls(client, length);
                if (!tioc) {
                    return -EIO;
                }
                object_unref(OBJECT(client->ioc));
                client->ioc = QIO_CHANNEL(tioc);
                break;

507 508 509 510 511
            case NBD_OPT_EXPORT_NAME:
                /* No way to return an error to client, so drop connection */
                TRACE("Option 0x%x not permitted before TLS", clientflags);
                return -EINVAL;

512
            default:
513
                if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
514 515
                    return -EIO;
                }
516 517 518 519 520 521
                ret = nbd_negotiate_send_rep_err(client->ioc,
                                                 NBD_REP_ERR_TLS_REQD,
                                                 clientflags,
                                                 "Option 0x%" PRIx32
                                                 "not permitted before TLS",
                                                 clientflags);
522 523 524
                if (ret < 0) {
                    return ret;
                }
525 526 527 528
                /* Let the client keep trying, unless they asked to quit */
                if (clientflags == NBD_OPT_ABORT) {
                    return -EINVAL;
                }
529
                break;
530 531
            }
        } else if (fixedNewstyle) {
532 533 534 535 536 537 538 539 540
            switch (clientflags) {
            case NBD_OPT_LIST:
                ret = nbd_negotiate_handle_list(client, length);
                if (ret < 0) {
                    return ret;
                }
                break;

            case NBD_OPT_ABORT:
541 542 543 544
                /* NBD spec says we must try to reply before
                 * disconnecting, but that we must also tolerate
                 * guests that don't wait for our reply. */
                nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, clientflags);
545 546 547 548 549
                return -EINVAL;

            case NBD_OPT_EXPORT_NAME:
                return nbd_negotiate_handle_export_name(client, length);

550
            case NBD_OPT_STARTTLS:
551
                if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
552 553
                    return -EIO;
                }
554
                if (client->tlscreds) {
555 556 557 558
                    ret = nbd_negotiate_send_rep_err(client->ioc,
                                                     NBD_REP_ERR_INVALID,
                                                     clientflags,
                                                     "TLS already enabled");
559
                } else {
560 561 562 563
                    ret = nbd_negotiate_send_rep_err(client->ioc,
                                                     NBD_REP_ERR_POLICY,
                                                     clientflags,
                                                     "TLS not configured");
564 565 566
                }
                if (ret < 0) {
                    return ret;
567
                }
568
                break;
569
            default:
570
                if (nbd_negotiate_drop_sync(client->ioc, length) < 0) {
571 572
                    return -EIO;
                }
573 574 575 576 577 578
                ret = nbd_negotiate_send_rep_err(client->ioc,
                                                 NBD_REP_ERR_UNSUP,
                                                 clientflags,
                                                 "Unsupported option 0x%"
                                                 PRIx32,
                                                 clientflags);
579 580 581
                if (ret < 0) {
                    return ret;
                }
582
                break;
583 584 585 586 587 588 589 590 591 592 593
            }
        } else {
            /*
             * If broken new-style we should drop the connection
             * for anything except NBD_OPT_EXPORT_NAME
             */
            switch (clientflags) {
            case NBD_OPT_EXPORT_NAME:
                return nbd_negotiate_handle_export_name(client, length);

            default:
594
                TRACE("Unsupported option 0x%" PRIx32, clientflags);
595
                return -EINVAL;
596
            }
597 598 599 600
        }
    }
}

601 602 603 604 605 606
typedef struct {
    NBDClient *client;
    Coroutine *co;
} NBDClientNewData;

static coroutine_fn int nbd_negotiate(NBDClientNewData *data)
B
bellard 已提交
607
{
608
    NBDClient *client = data->client;
N
Nick Thomas 已提交
609
    char buf[8 + 8 + 8 + 128];
610
    int rc;
E
Eric Blake 已提交
611
    const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
612 613
                              NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
                              NBD_FLAG_SEND_WRITE_ZEROES);
614
    bool oldStyle;
E
Eric Blake 已提交
615
    size_t len;
N
Nick Thomas 已提交
616

617
    /* Old style negotiation header without options
618 619
        [ 0 ..   7]   passwd       ("NBDMAGIC")
        [ 8 ..  15]   magic        (NBD_CLIENT_MAGIC)
N
Nick Thomas 已提交
620
        [16 ..  23]   size
621
        [24 ..  25]   server flags (0)
H
Hani Benhabiles 已提交
622
        [26 ..  27]   export flags
623 624
        [28 .. 151]   reserved     (0)

625
       New style negotiation header with options
626 627 628
        [ 0 ..   7]   passwd       ("NBDMAGIC")
        [ 8 ..  15]   magic        (NBD_OPTS_MAGIC)
        [16 ..  17]   server flags (0)
629
        ....options sent....
630 631
        [18 ..  25]   size
        [26 ..  27]   export flags
E
Eric Blake 已提交
632
        [28 .. 151]   reserved     (0, omit if no_zeroes)
N
Nick Thomas 已提交
633 634
     */

635
    qio_channel_set_blocking(client->ioc, false, NULL);
636 637
    rc = -EINVAL;

N
Nick Thomas 已提交
638
    TRACE("Beginning negotiation.");
639
    memset(buf, 0, sizeof(buf));
N
Nick Thomas 已提交
640
    memcpy(buf, "NBDMAGIC", 8);
641 642 643

    oldStyle = client->exp != NULL && !client->tlscreds;
    if (oldStyle) {
644 645
        TRACE("advertising size %" PRIu64 " and flags %x",
              client->exp->size, client->exp->nbdflags | myflags);
J
John Snow 已提交
646 647 648
        stq_be_p(buf + 8, NBD_CLIENT_MAGIC);
        stq_be_p(buf + 16, client->exp->size);
        stw_be_p(buf + 26, client->exp->nbdflags | myflags);
649
    } else {
J
John Snow 已提交
650
        stq_be_p(buf + 8, NBD_OPTS_MAGIC);
E
Eric Blake 已提交
651
        stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);
652
    }
N
Nick Thomas 已提交
653

654 655 656 657 658
    if (oldStyle) {
        if (client->tlscreds) {
            TRACE("TLS cannot be enabled with oldstyle protocol");
            goto fail;
        }
659
        if (nbd_negotiate_write(client->ioc, buf, sizeof(buf)) < 0) {
660 661 662 663
            LOG("write failed");
            goto fail;
        }
    } else {
664
        if (nbd_negotiate_write(client->ioc, buf, 18) < 0) {
665 666 667
            LOG("write failed");
            goto fail;
        }
668
        rc = nbd_negotiate_options(client);
669
        if (rc != 0) {
670 671 672 673
            LOG("option negotiation failed");
            goto fail;
        }

674 675
        TRACE("advertising size %" PRIu64 " and flags %x",
              client->exp->size, client->exp->nbdflags | myflags);
J
John Snow 已提交
676 677
        stq_be_p(buf + 18, client->exp->size);
        stw_be_p(buf + 26, client->exp->nbdflags | myflags);
E
Eric Blake 已提交
678
        len = client->no_zeroes ? 10 : sizeof(buf) - 18;
679
        if (nbd_negotiate_write(client->ioc, buf + 18, len) < 0) {
680 681 682
            LOG("write failed");
            goto fail;
        }
N
Nick Thomas 已提交
683 684
    }

D
Dong Xu Wang 已提交
685
    TRACE("Negotiation succeeded.");
686 687 688
    rc = 0;
fail:
    return rc;
B
bellard 已提交
689 690
}

691
static ssize_t nbd_receive_request(QIOChannel *ioc, NBDRequest *request)
692
{
P
Paolo Bonzini 已提交
693
    uint8_t buf[NBD_REQUEST_SIZE];
N
Nick Thomas 已提交
694
    uint32_t magic;
695
    ssize_t ret;
N
Nick Thomas 已提交
696

697
    ret = nbd_read(ioc, buf, sizeof(buf), NULL);
698 699 700 701
    if (ret < 0) {
        return ret;
    }

N
Nick Thomas 已提交
702 703
    /* Request
       [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
704 705
       [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, ...)
       [ 6 ..  7]   type    (NBD_CMD_READ, ...)
N
Nick Thomas 已提交
706 707 708 709 710
       [ 8 .. 15]   handle
       [16 .. 23]   from
       [24 .. 27]   len
     */

711
    magic = ldl_be_p(buf);
712 713
    request->flags  = lduw_be_p(buf + 4);
    request->type   = lduw_be_p(buf + 6);
714 715 716
    request->handle = ldq_be_p(buf + 8);
    request->from   = ldq_be_p(buf + 16);
    request->len    = ldl_be_p(buf + 24);
N
Nick Thomas 已提交
717

718 719 720
    TRACE("Got request: { magic = 0x%" PRIx32 ", .flags = %" PRIx16
          ", .type = %" PRIx16 ", from = %" PRIu64 ", len = %" PRIu32 " }",
          magic, request->flags, request->type, request->from, request->len);
N
Nick Thomas 已提交
721 722

    if (magic != NBD_REQUEST_MAGIC) {
723
        LOG("invalid magic (got 0x%" PRIx32 ")", magic);
724
        return -EINVAL;
N
Nick Thomas 已提交
725 726
    }
    return 0;
727 728
}

729
static ssize_t nbd_send_reply(QIOChannel *ioc, NBDReply *reply)
730
{
P
Paolo Bonzini 已提交
731
    uint8_t buf[NBD_REPLY_SIZE];
N
Nick Thomas 已提交
732

733 734
    reply->error = system_errno_to_nbd_errno(reply->error);

735 736
    TRACE("Sending response to client: { .error = %" PRId32
          ", handle = %" PRIu64 " }",
737 738
          reply->error, reply->handle);

N
Nick Thomas 已提交
739 740 741 742 743
    /* Reply
       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
       [ 4 ..  7]    error   (0 == no error)
       [ 7 .. 15]    handle
     */
J
John Snow 已提交
744 745 746
    stl_be_p(buf, NBD_REPLY_MAGIC);
    stl_be_p(buf + 4, reply->error);
    stq_be_p(buf + 8, reply->handle);
N
Nick Thomas 已提交
747

748
    return nbd_write(ioc, buf, sizeof(buf), NULL);
749
}
B
bellard 已提交
750

P
Paolo Bonzini 已提交
751 752
#define MAX_NBD_REQUESTS 16

753
void nbd_client_get(NBDClient *client)
754 755 756 757
{
    client->refcount++;
}

758
void nbd_client_put(NBDClient *client)
759 760
{
    if (--client->refcount == 0) {
761
        /* The last reference should be dropped by client->close,
M
Max Reitz 已提交
762
         * which is called by client_close.
763 764 765
         */
        assert(client->closing);

766
        qio_channel_detach_aio_context(client->ioc);
767 768
        object_unref(OBJECT(client->sioc));
        object_unref(OBJECT(client->ioc));
769 770 771 772
        if (client->tlscreds) {
            object_unref(OBJECT(client->tlscreds));
        }
        g_free(client->tlsaclname);
773 774 775 776
        if (client->exp) {
            QTAILQ_REMOVE(&client->exp->clients, client, next);
            nbd_export_put(client->exp);
        }
777 778 779 780
        g_free(client);
    }
}

781
static void client_close(NBDClient *client, bool negotiated)
782
{
783 784 785 786 787 788 789 790 791
    if (client->closing) {
        return;
    }

    client->closing = true;

    /* Force requests to finish.  They will drop their own references,
     * then we'll close the socket and free the NBDClient.
     */
792 793
    qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
                         NULL);
794 795

    /* Also tell the client, so that they release their reference.  */
796 797
    if (client->close_fn) {
        client->close_fn(client, negotiated);
798 799 800
    }
}

801
static NBDRequestData *nbd_request_get(NBDClient *client)
P
Paolo Bonzini 已提交
802
{
803
    NBDRequestData *req;
804

P
Paolo Bonzini 已提交
805 806 807
    assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
    client->nb_requests++;

808
    req = g_new0(NBDRequestData, 1);
809 810
    nbd_client_get(client);
    req->client = client;
P
Paolo Bonzini 已提交
811 812 813
    return req;
}

814
static void nbd_request_put(NBDRequestData *req)
P
Paolo Bonzini 已提交
815
{
816
    NBDClient *client = req->client;
817

818 819 820
    if (req->data) {
        qemu_vfree(req->data);
    }
821
    g_free(req);
822

M
Max Reitz 已提交
823
    client->nb_requests--;
824 825
    nbd_client_receive_next_request(client);

826
    nbd_client_put(client);
P
Paolo Bonzini 已提交
827 828
}

M
Max Reitz 已提交
829
static void blk_aio_attached(AioContext *ctx, void *opaque)
M
Max Reitz 已提交
830 831 832 833 834 835 836 837 838
{
    NBDExport *exp = opaque;
    NBDClient *client;

    TRACE("Export %s: Attaching clients to AIO context %p\n", exp->name, ctx);

    exp->ctx = ctx;

    QTAILQ_FOREACH(client, &exp->clients, next) {
839 840 841 842 843 844 845
        qio_channel_attach_aio_context(client->ioc, ctx);
        if (client->recv_coroutine) {
            aio_co_schedule(ctx, client->recv_coroutine);
        }
        if (client->send_coroutine) {
            aio_co_schedule(ctx, client->send_coroutine);
        }
M
Max Reitz 已提交
846 847 848
    }
}

M
Max Reitz 已提交
849
static void blk_aio_detach(void *opaque)
M
Max Reitz 已提交
850 851 852 853 854 855 856
{
    NBDExport *exp = opaque;
    NBDClient *client;

    TRACE("Export %s: Detaching clients from AIO context %p\n", exp->name, exp->ctx);

    QTAILQ_FOREACH(client, &exp->clients, next) {
857
        qio_channel_detach_aio_context(client->ioc);
M
Max Reitz 已提交
858 859 860 861 862
    }

    exp->ctx = NULL;
}

863 864 865 866 867 868
static void nbd_eject_notifier(Notifier *n, void *data)
{
    NBDExport *exp = container_of(n, NBDExport, eject_notifier);
    nbd_export_close(exp);
}

869
NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
E
Eric Blake 已提交
870
                          uint16_t nbdflags, void (*close)(NBDExport *),
871
                          bool writethrough, BlockBackend *on_eject_blk,
M
Max Reitz 已提交
872
                          Error **errp)
P
Paolo Bonzini 已提交
873
{
874
    BlockBackend *blk;
P
Paolo Bonzini 已提交
875
    NBDExport *exp = g_malloc0(sizeof(NBDExport));
876
    uint64_t perm;
877
    int ret;
878

879 880 881 882 883 884 885 886
    /* Don't allow resize while the NBD server is running, otherwise we don't
     * care what happens with the node. */
    perm = BLK_PERM_CONSISTENT_READ;
    if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
        perm |= BLK_PERM_WRITE;
    }
    blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
                        BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
887 888 889 890
    ret = blk_insert_bs(blk, bs, errp);
    if (ret < 0) {
        goto fail;
    }
891 892
    blk_set_enable_write_cache(blk, !writethrough);

893
    exp->refcount = 1;
894
    QTAILQ_INIT(&exp->clients);
M
Max Reitz 已提交
895
    exp->blk = blk;
P
Paolo Bonzini 已提交
896 897
    exp->dev_offset = dev_offset;
    exp->nbdflags = nbdflags;
M
Max Reitz 已提交
898 899 900 901 902 903 904 905
    exp->size = size < 0 ? blk_getlength(blk) : size;
    if (exp->size < 0) {
        error_setg_errno(errp, -exp->size,
                         "Failed to determine the NBD export's length");
        goto fail;
    }
    exp->size -= exp->size % BDRV_SECTOR_SIZE;

906
    exp->close = close;
M
Max Reitz 已提交
907 908
    exp->ctx = blk_get_aio_context(blk);
    blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
909

910 911 912 913 914 915
    if (on_eject_blk) {
        blk_ref(on_eject_blk);
        exp->eject_notifier_blk = on_eject_blk;
        exp->eject_notifier.notify = nbd_eject_notifier;
        blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier);
    }
916

917 918
    /*
     * NBD exports are used for non-shared storage migration.  Make sure
919
     * that BDRV_O_INACTIVE is cleared and the image is ready for write
920 921
     * access since the export could be available before migration handover.
     */
922
    aio_context_acquire(exp->ctx);
M
Max Reitz 已提交
923
    blk_invalidate_cache(blk, NULL);
924
    aio_context_release(exp->ctx);
P
Paolo Bonzini 已提交
925
    return exp;
M
Max Reitz 已提交
926 927

fail:
928
    blk_unref(blk);
M
Max Reitz 已提交
929 930
    g_free(exp);
    return NULL;
P
Paolo Bonzini 已提交
931 932
}

P
Paolo Bonzini 已提交
933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965
NBDExport *nbd_export_find(const char *name)
{
    NBDExport *exp;
    QTAILQ_FOREACH(exp, &exports, next) {
        if (strcmp(name, exp->name) == 0) {
            return exp;
        }
    }

    return NULL;
}

void nbd_export_set_name(NBDExport *exp, const char *name)
{
    if (exp->name == name) {
        return;
    }

    nbd_export_get(exp);
    if (exp->name != NULL) {
        g_free(exp->name);
        exp->name = NULL;
        QTAILQ_REMOVE(&exports, exp, next);
        nbd_export_put(exp);
    }
    if (name != NULL) {
        nbd_export_get(exp);
        exp->name = g_strdup(name);
        QTAILQ_INSERT_TAIL(&exports, exp, next);
    }
    nbd_export_put(exp);
}

966 967 968 969 970 971
void nbd_export_set_description(NBDExport *exp, const char *description)
{
    g_free(exp->description);
    exp->description = g_strdup(description);
}

P
Paolo Bonzini 已提交
972 973
void nbd_export_close(NBDExport *exp)
{
974
    NBDClient *client, *next;
975

976 977
    nbd_export_get(exp);
    QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
978
        client_close(client, true);
979
    }
P
Paolo Bonzini 已提交
980
    nbd_export_set_name(exp, NULL);
981
    nbd_export_set_description(exp, NULL);
982
    nbd_export_put(exp);
983 984 985 986 987 988 989 990 991 992 993 994 995
}

void nbd_export_get(NBDExport *exp)
{
    assert(exp->refcount > 0);
    exp->refcount++;
}

void nbd_export_put(NBDExport *exp)
{
    assert(exp->refcount > 0);
    if (exp->refcount == 1) {
        nbd_export_close(exp);
P
Paolo Bonzini 已提交
996 997
    }

998
    if (--exp->refcount == 0) {
P
Paolo Bonzini 已提交
999
        assert(exp->name == NULL);
1000
        assert(exp->description == NULL);
P
Paolo Bonzini 已提交
1001

1002 1003 1004 1005
        if (exp->close) {
            exp->close(exp);
        }

1006
        if (exp->blk) {
1007 1008 1009 1010
            if (exp->eject_notifier_blk) {
                notifier_remove(&exp->eject_notifier);
                blk_unref(exp->eject_notifier_blk);
            }
1011 1012 1013 1014 1015 1016
            blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
                                            blk_aio_detach, exp);
            blk_unref(exp->blk);
            exp->blk = NULL;
        }

1017 1018
        g_free(exp);
    }
P
Paolo Bonzini 已提交
1019 1020
}

1021
BlockBackend *nbd_export_get_blockdev(NBDExport *exp)
P
Paolo Bonzini 已提交
1022
{
M
Max Reitz 已提交
1023
    return exp->blk;
P
Paolo Bonzini 已提交
1024 1025
}

P
Paolo Bonzini 已提交
1026 1027 1028 1029 1030 1031 1032 1033 1034
void nbd_export_close_all(void)
{
    NBDExport *exp, *next;

    QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
        nbd_export_close(exp);
    }
}

1035
static ssize_t nbd_co_send_reply(NBDRequestData *req, NBDReply *reply,
P
Paolo Bonzini 已提交
1036
                                 int len)
1037
{
1038
    NBDClient *client = req->client;
P
Paolo Bonzini 已提交
1039
    ssize_t rc, ret;
1040

1041
    g_assert(qemu_in_coroutine());
P
Paolo Bonzini 已提交
1042 1043 1044
    qemu_co_mutex_lock(&client->send_lock);
    client->send_coroutine = qemu_coroutine_self();

1045
    if (!len) {
1046
        rc = nbd_send_reply(client->ioc, reply);
1047
    } else {
1048 1049
        qio_channel_set_cork(client->ioc, true);
        rc = nbd_send_reply(client->ioc, reply);
1050
        if (rc >= 0) {
1051
            ret = nbd_write(client->ioc, req->data, len, NULL);
1052
            if (ret < 0) {
1053
                rc = -EIO;
1054 1055
            }
        }
1056
        qio_channel_set_cork(client->ioc, false);
1057
    }
P
Paolo Bonzini 已提交
1058 1059 1060

    client->send_coroutine = NULL;
    qemu_co_mutex_unlock(&client->send_lock);
1061 1062 1063
    return rc;
}

1064 1065 1066 1067 1068
/* Collect a client request.  Return 0 if request looks valid, -EAGAIN
 * to keep trying the collection, -EIO to drop connection right away,
 * and any other negative value to report an error to the client
 * (although the caller may still need to disconnect after reporting
 * the error).  */
1069
static ssize_t nbd_co_receive_request(NBDRequestData *req,
1070
                                      NBDRequest *request)
1071
{
1072
    NBDClient *client = req->client;
P
Paolo Bonzini 已提交
1073
    ssize_t rc;
1074

1075
    g_assert(qemu_in_coroutine());
1076
    assert(client->recv_coroutine == qemu_coroutine_self());
1077
    rc = nbd_receive_request(client->ioc, request);
1078 1079 1080 1081
    if (rc < 0) {
        if (rc != -EAGAIN) {
            rc = -EIO;
        }
1082 1083 1084
        goto out;
    }

1085 1086
    TRACE("Decoding type");

1087
    if (request->type != NBD_CMD_WRITE) {
1088 1089 1090 1091
        /* No payload, we are ready to read the next request.  */
        req->complete = true;
    }

1092
    if (request->type == NBD_CMD_DISC) {
1093 1094 1095 1096 1097 1098 1099 1100 1101 1102
        /* Special case: we're going to disconnect without a reply,
         * whether or not flags, from, or len are bogus */
        TRACE("Request type is DISCONNECT");
        rc = -EIO;
        goto out;
    }

    /* Check for sanity in the parameters, part 1.  Defer as many
     * checks as possible until after reading any NBD_CMD_WRITE
     * payload, so we can try and keep the connection alive.  */
1103
    if ((request->from + request->len) < request->from) {
1104
        LOG("integer overflow detected, you're probably being attacked");
1105 1106 1107 1108
        rc = -EINVAL;
        goto out;
    }

1109
    if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
1110
        if (request->len > NBD_MAX_BUFFER_SIZE) {
1111
            LOG("len (%" PRIu32" ) is larger than max len (%u)",
1112 1113 1114 1115 1116
                request->len, NBD_MAX_BUFFER_SIZE);
            rc = -EINVAL;
            goto out;
        }

1117 1118 1119 1120 1121
        req->data = blk_try_blockalign(client->exp->blk, request->len);
        if (req->data == NULL) {
            rc = -ENOMEM;
            goto out;
        }
1122
    }
1123
    if (request->type == NBD_CMD_WRITE) {
1124
        TRACE("Reading %" PRIu32 " byte(s)", request->len);
1125

1126
        if (nbd_read(client->ioc, req->data, request->len, NULL) < 0) {
1127 1128 1129 1130
            LOG("reading from socket failed");
            rc = -EIO;
            goto out;
        }
1131
        req->complete = true;
1132
    }
1133 1134 1135 1136 1137 1138

    /* Sanity checks, part 2. */
    if (request->from + request->len > client->exp->size) {
        LOG("operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
            ", Size: %" PRIu64, request->from, request->len,
            (uint64_t)client->exp->size);
1139
        rc = request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
1140 1141
        goto out;
    }
1142
    if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
1143
        LOG("unsupported flags (got 0x%x)", request->flags);
1144 1145
        rc = -EINVAL;
        goto out;
E
Eric Blake 已提交
1146
    }
1147 1148 1149 1150 1151 1152
    if (request->type != NBD_CMD_WRITE_ZEROES &&
        (request->flags & NBD_CMD_FLAG_NO_HOLE)) {
        LOG("unexpected flags (got 0x%x)", request->flags);
        rc = -EINVAL;
        goto out;
    }
1153

1154 1155 1156
    rc = 0;

out:
P
Paolo Bonzini 已提交
1157
    client->recv_coroutine = NULL;
1158
    nbd_client_receive_next_request(client);
M
Max Reitz 已提交
1159

1160 1161 1162
    return rc;
}

1163 1164
/* Owns a reference to the NBDClient passed as opaque.  */
static coroutine_fn void nbd_trip(void *opaque)
1165
{
P
Paolo Bonzini 已提交
1166
    NBDClient *client = opaque;
1167
    NBDExport *exp = client->exp;
1168
    NBDRequestData *req;
1169
    NBDRequest request = { 0 };    /* GCC thinks it can be used uninitialized */
1170
    NBDReply reply;
P
Paolo Bonzini 已提交
1171
    ssize_t ret;
1172
    int flags;
N
Nick Thomas 已提交
1173 1174

    TRACE("Reading request.");
1175
    if (client->closing) {
1176
        nbd_client_put(client);
1177 1178
        return;
    }
N
Nick Thomas 已提交
1179

1180
    req = nbd_request_get(client);
P
Paolo Bonzini 已提交
1181
    ret = nbd_co_receive_request(req, &request);
1182 1183 1184
    if (ret == -EAGAIN) {
        goto done;
    }
1185
    if (ret == -EIO) {
P
Paolo Bonzini 已提交
1186
        goto out;
1187
    }
N
Nick Thomas 已提交
1188

1189 1190 1191
    reply.handle = request.handle;
    reply.error = 0;

1192 1193 1194
    if (ret < 0) {
        reply.error = -ret;
        goto error_reply;
N
Nick Thomas 已提交
1195 1196
    }

1197 1198 1199 1200 1201 1202 1203 1204
    if (client->closing) {
        /*
         * The client may be closed when we are blocked in
         * nbd_co_receive_request()
         */
        goto done;
    }

1205
    switch (request.type) {
N
Nick Thomas 已提交
1206 1207 1208
    case NBD_CMD_READ:
        TRACE("Request type is READ");

1209 1210
        /* XXX: NBD Protocol only documents use of FUA with WRITE */
        if (request.flags & NBD_CMD_FLAG_FUA) {
M
Max Reitz 已提交
1211
            ret = blk_co_flush(exp->blk);
P
Paolo Bonzini 已提交
1212 1213 1214 1215 1216 1217 1218
            if (ret < 0) {
                LOG("flush failed");
                reply.error = -ret;
                goto error_reply;
            }
        }

1219 1220
        ret = blk_pread(exp->blk, request.from + exp->dev_offset,
                        req->data, request.len);
1221
        if (ret < 0) {
N
Nick Thomas 已提交
1222
            LOG("reading from file failed");
1223
            reply.error = -ret;
1224
            goto error_reply;
N
Nick Thomas 已提交
1225 1226
        }

1227
        TRACE("Read %" PRIu32" byte(s)", request.len);
P
Paolo Bonzini 已提交
1228
        if (nbd_co_send_reply(req, &reply, request.len) < 0)
P
Paolo Bonzini 已提交
1229
            goto out;
N
Nick Thomas 已提交
1230 1231 1232 1233
        break;
    case NBD_CMD_WRITE:
        TRACE("Request type is WRITE");

P
Paolo Bonzini 已提交
1234
        if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
N
Nick Thomas 已提交
1235
            TRACE("Server is read-only, return error");
1236 1237 1238 1239 1240 1241
            reply.error = EROFS;
            goto error_reply;
        }

        TRACE("Writing to device");

1242
        flags = 0;
1243
        if (request.flags & NBD_CMD_FLAG_FUA) {
1244 1245
            flags |= BDRV_REQ_FUA;
        }
1246
        ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
1247
                         req->data, request.len, flags);
1248 1249 1250 1251 1252
        if (ret < 0) {
            LOG("writing to file failed");
            reply.error = -ret;
            goto error_reply;
        }
N
Nick Thomas 已提交
1253

1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
            goto out;
        }
        break;

    case NBD_CMD_WRITE_ZEROES:
        TRACE("Request type is WRITE_ZEROES");

        if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
            TRACE("Server is read-only, return error");
            reply.error = EROFS;
            goto error_reply;
        }

        TRACE("Writing to device");

        flags = 0;
        if (request.flags & NBD_CMD_FLAG_FUA) {
            flags |= BDRV_REQ_FUA;
        }
        if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
            flags |= BDRV_REQ_MAY_UNMAP;
        }
        ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
                                request.len, flags);
        if (ret < 0) {
            LOG("writing to file failed");
            reply.error = -ret;
            goto error_reply;
        }

1285
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
P
Paolo Bonzini 已提交
1286
            goto out;
1287
        }
N
Nick Thomas 已提交
1288
        break;
1289

N
Nick Thomas 已提交
1290
    case NBD_CMD_DISC:
1291 1292 1293
        /* unreachable, thanks to special case in nbd_co_receive_request() */
        abort();

P
Paolo Bonzini 已提交
1294 1295 1296
    case NBD_CMD_FLUSH:
        TRACE("Request type is FLUSH");

M
Max Reitz 已提交
1297
        ret = blk_co_flush(exp->blk);
P
Paolo Bonzini 已提交
1298 1299 1300 1301
        if (ret < 0) {
            LOG("flush failed");
            reply.error = -ret;
        }
1302
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
P
Paolo Bonzini 已提交
1303
            goto out;
1304
        }
P
Paolo Bonzini 已提交
1305 1306 1307
        break;
    case NBD_CMD_TRIM:
        TRACE("Request type is TRIM");
1308 1309 1310 1311 1312
        ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset,
                              request.len);
        if (ret < 0) {
            LOG("discard failed");
            reply.error = -ret;
P
Paolo Bonzini 已提交
1313
        }
1314
        if (nbd_co_send_reply(req, &reply, 0) < 0) {
P
Paolo Bonzini 已提交
1315
            goto out;
1316
        }
P
Paolo Bonzini 已提交
1317
        break;
N
Nick Thomas 已提交
1318
    default:
1319
        LOG("invalid request type (%" PRIu32 ") received", request.type);
Y
Yik Fang 已提交
1320
        reply.error = EINVAL;
1321
    error_reply:
1322 1323 1324 1325
        /* We must disconnect after NBD_CMD_WRITE if we did not
         * read the payload.
         */
        if (nbd_co_send_reply(req, &reply, 0) < 0 || !req->complete) {
P
Paolo Bonzini 已提交
1326
            goto out;
1327
        }
1328
        break;
N
Nick Thomas 已提交
1329 1330 1331 1332
    }

    TRACE("Request/Reply complete");

1333
done:
P
Paolo Bonzini 已提交
1334
    nbd_request_put(req);
1335
    nbd_client_put(client);
P
Paolo Bonzini 已提交
1336 1337
    return;

P
Paolo Bonzini 已提交
1338
out:
1339
    nbd_request_put(req);
1340
    client_close(client, true);
1341
    nbd_client_put(client);
B
bellard 已提交
1342
}
P
Paolo Bonzini 已提交
1343

1344
static void nbd_client_receive_next_request(NBDClient *client)
M
Max Reitz 已提交
1345
{
1346 1347 1348 1349
    if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) {
        nbd_client_get(client);
        client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
        aio_co_schedule(client->exp->ctx, client->recv_coroutine);
M
Max Reitz 已提交
1350 1351 1352
    }
}

1353 1354 1355 1356 1357 1358 1359 1360
static coroutine_fn void nbd_co_client_start(void *opaque)
{
    NBDClientNewData *data = opaque;
    NBDClient *client = data->client;
    NBDExport *exp = client->exp;

    if (exp) {
        nbd_export_get(exp);
1361
        QTAILQ_INSERT_TAIL(&exp->clients, client, next);
1362
    }
1363 1364
    qemu_co_mutex_init(&client->send_lock);

1365
    if (nbd_negotiate(data)) {
1366
        client_close(client, false);
1367 1368
        goto out;
    }
1369 1370 1371

    nbd_client_receive_next_request(client);

1372 1373 1374 1375
out:
    g_free(data);
}

1376 1377 1378 1379 1380 1381
/*
 * Create a new client listener on the given export @exp, using the
 * given channel @sioc.  Begin servicing it in a coroutine.  When the
 * connection closes, call @close_fn with an indication of whether the
 * client completed negotiation.
 */
1382 1383
void nbd_client_new(NBDExport *exp,
                    QIOChannelSocket *sioc,
1384 1385
                    QCryptoTLSCreds *tlscreds,
                    const char *tlsaclname,
1386
                    void (*close_fn)(NBDClient *, bool))
P
Paolo Bonzini 已提交
1387
{
1388
    NBDClient *client;
1389 1390
    NBDClientNewData *data = g_new(NBDClientNewData, 1);

1391 1392 1393
    client = g_malloc0(sizeof(NBDClient));
    client->refcount = 1;
    client->exp = exp;
1394 1395 1396 1397 1398
    client->tlscreds = tlscreds;
    if (tlscreds) {
        object_ref(OBJECT(client->tlscreds));
    }
    client->tlsaclname = g_strdup(tlsaclname);
1399 1400 1401 1402
    client->sioc = sioc;
    object_ref(OBJECT(client->sioc));
    client->ioc = QIO_CHANNEL(sioc);
    object_ref(OBJECT(client->ioc));
1403
    client->close_fn = close_fn;
1404

1405
    data->client = client;
1406 1407
    data->co = qemu_coroutine_create(nbd_co_client_start, data);
    qemu_coroutine_enter(data->co);
P
Paolo Bonzini 已提交
1408
}