server.c 45.8 KB
Newer Older
1
/*
2
 *  Copyright (C) 2016-2017 Red Hat, Inc.
B
bellard 已提交
3 4
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
 *
F
Fam Zheng 已提交
5
 *  Network Block Device Server Side
B
bellard 已提交
6 7 8 9 10 11 12 13 14 15 16
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; under version 2 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
17
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
18
 */
B
bellard 已提交
19

P
Peter Maydell 已提交
20
#include "qemu/osdep.h"
21
#include "qapi/error.h"
22
#include "trace.h"
F
Fam Zheng 已提交
23
#include "nbd-internal.h"
24 25 26 27 28 29 30

static int system_errno_to_nbd_errno(int err)
{
    switch (err) {
    case 0:
        return NBD_SUCCESS;
    case EPERM:
31
    case EROFS:
32 33 34 35 36 37 38 39 40 41 42
        return NBD_EPERM;
    case EIO:
        return NBD_EIO;
    case ENOMEM:
        return NBD_ENOMEM;
#ifdef EDQUOT
    case EDQUOT:
#endif
    case EFBIG:
    case ENOSPC:
        return NBD_ENOSPC;
43 44
    case ESHUTDOWN:
        return NBD_ESHUTDOWN;
45 46 47 48 49 50
    case EINVAL:
    default:
        return NBD_EINVAL;
    }
}

51 52
/* Definitions for opaque data types */

53
typedef struct NBDRequestData NBDRequestData;
54

55 56
struct NBDRequestData {
    QSIMPLEQ_ENTRY(NBDRequestData) entry;
57 58
    NBDClient *client;
    uint8_t *data;
59
    bool complete;
60 61 62
};

struct NBDExport {
63
    int refcount;
64 65
    void (*close)(NBDExport *exp);

M
Max Reitz 已提交
66
    BlockBackend *blk;
P
Paolo Bonzini 已提交
67
    char *name;
68
    char *description;
69 70
    off_t dev_offset;
    off_t size;
E
Eric Blake 已提交
71
    uint16_t nbdflags;
72
    QTAILQ_HEAD(, NBDClient) clients;
P
Paolo Bonzini 已提交
73
    QTAILQ_ENTRY(NBDExport) next;
M
Max Reitz 已提交
74 75

    AioContext *ctx;
76

77
    BlockBackend *eject_notifier_blk;
78
    Notifier eject_notifier;
79 80
};

P
Paolo Bonzini 已提交
81 82
static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);

83 84
struct NBDClient {
    int refcount;
85
    void (*close_fn)(NBDClient *client, bool negotiated);
86 87

    NBDExport *exp;
88 89
    QCryptoTLSCreds *tlscreds;
    char *tlsaclname;
90 91
    QIOChannelSocket *sioc; /* The underlying data channel */
    QIOChannel *ioc; /* The current I/O channel which may differ (eg TLS) */
92 93 94 95 96 97

    Coroutine *recv_coroutine;

    CoMutex send_lock;
    Coroutine *send_coroutine;

98
    QTAILQ_ENTRY(NBDClient) next;
99
    int nb_requests;
100
    bool closing;
101 102
};

B
bellard 已提交
103 104
/* That's all folks */

105
static void nbd_client_receive_next_request(NBDClient *client);
M
Max Reitz 已提交
106

107
/* Basic flow for negotiation
B
bellard 已提交
108 109 110

   Server         Client
   Negotiate
111 112 113 114 115 116 117 118 119 120 121 122 123

   or

   Server         Client
   Negotiate #1
                  Option
   Negotiate #2

   ----

   followed by

   Server         Client
B
bellard 已提交
124 125 126 127 128 129 130
                  Request
   Response
                  Request
   Response
                  ...
   ...
                  Request (type == 2)
131

B
bellard 已提交
132 133
*/

134 135 136
/* Send a reply header, including length, but no payload.
 * Return -errno on error, 0 on success. */
static int nbd_negotiate_send_rep_len(QIOChannel *ioc, uint32_t type,
137
                                      uint32_t opt, uint32_t len, Error **errp)
138 139 140
{
    uint64_t magic;

141 142
    trace_nbd_negotiate_send_rep_len(opt, nbd_opt_lookup(opt),
                                     type, nbd_rep_lookup(type), len);
143

E
Eric Blake 已提交
144
    assert(len < NBD_MAX_BUFFER_SIZE);
145
    magic = cpu_to_be64(NBD_REP_MAGIC);
146 147
    if (nbd_write(ioc, &magic, sizeof(magic), errp) < 0) {
        error_prepend(errp, "write failed (rep magic): ");
148
        return -EINVAL;
149
    }
150

151
    opt = cpu_to_be32(opt);
152 153
    if (nbd_write(ioc, &opt, sizeof(opt), errp) < 0) {
        error_prepend(errp, "write failed (rep opt): ");
154
        return -EINVAL;
155
    }
156

157
    type = cpu_to_be32(type);
158 159
    if (nbd_write(ioc, &type, sizeof(type), errp) < 0) {
        error_prepend(errp, "write failed (rep type): ");
160
        return -EINVAL;
161
    }
162

163
    len = cpu_to_be32(len);
164 165
    if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
        error_prepend(errp, "write failed (rep data length): ");
166
        return -EINVAL;
167
    }
168 169
    return 0;
}
170

171 172
/* Send a reply header with default 0 length.
 * Return -errno on error, 0 on success. */
173 174
static int nbd_negotiate_send_rep(QIOChannel *ioc, uint32_t type, uint32_t opt,
                                  Error **errp)
175
{
176
    return nbd_negotiate_send_rep_len(ioc, type, opt, 0, errp);
177 178
}

179 180
/* Send an error reply.
 * Return -errno on error, 0 on success. */
181
static int GCC_FMT_ATTR(5, 6)
182
nbd_negotiate_send_rep_err(QIOChannel *ioc, uint32_t type,
183
                           uint32_t opt, Error **errp, const char *fmt, ...)
184 185 186 187 188 189 190 191 192 193 194
{
    va_list va;
    char *msg;
    int ret;
    size_t len;

    va_start(va, fmt);
    msg = g_strdup_vprintf(fmt, va);
    va_end(va);
    len = strlen(msg);
    assert(len < 4096);
195
    trace_nbd_negotiate_send_rep_err(msg);
196
    ret = nbd_negotiate_send_rep_len(ioc, type, opt, len, errp);
197 198 199
    if (ret < 0) {
        goto out;
    }
200 201
    if (nbd_write(ioc, msg, len, errp) < 0) {
        error_prepend(errp, "write failed (error message): ");
202 203 204 205
        ret = -EIO;
    } else {
        ret = 0;
    }
206

207 208 209 210 211
out:
    g_free(msg);
    return ret;
}

212 213
/* Send a single NBD_REP_SERVER reply to NBD_OPT_LIST, including payload.
 * Return -errno on error, 0 on success. */
214 215
static int nbd_negotiate_send_rep_list(QIOChannel *ioc, NBDExport *exp,
                                       Error **errp)
216
{
217
    size_t name_len, desc_len;
218
    uint32_t len;
219 220
    const char *name = exp->name ? exp->name : "";
    const char *desc = exp->description ? exp->description : "";
221
    int ret;
222

223
    trace_nbd_negotiate_send_rep_list(name, desc);
224 225
    name_len = strlen(name);
    desc_len = strlen(desc);
226
    len = name_len + desc_len + sizeof(len);
227 228
    ret = nbd_negotiate_send_rep_len(ioc, NBD_REP_SERVER, NBD_OPT_LIST, len,
                                     errp);
229 230
    if (ret < 0) {
        return ret;
231
    }
232

233
    len = cpu_to_be32(name_len);
234 235
    if (nbd_write(ioc, &len, sizeof(len), errp) < 0) {
        error_prepend(errp, "write failed (name length): ");
236 237
        return -EINVAL;
    }
238 239 240

    if (nbd_write(ioc, name, name_len, errp) < 0) {
        error_prepend(errp, "write failed (name buffer): ");
241 242
        return -EINVAL;
    }
243 244 245

    if (nbd_write(ioc, desc, desc_len, errp) < 0) {
        error_prepend(errp, "write failed (description buffer): ");
246 247
        return -EINVAL;
    }
248

249 250 251
    return 0;
}

252 253
/* Process the NBD_OPT_LIST command, with a potential series of replies.
 * Return -errno on error, 0 on success. */
254 255
static int nbd_negotiate_handle_list(NBDClient *client, uint32_t length,
                                     Error **errp)
256 257 258 259
{
    NBDExport *exp;

    if (length) {
260
        if (nbd_drop(client->ioc, length, errp) < 0) {
261 262
            return -EIO;
        }
263 264
        return nbd_negotiate_send_rep_err(client->ioc,
                                          NBD_REP_ERR_INVALID, NBD_OPT_LIST,
265
                                          errp,
266
                                          "OPT_LIST should not have length");
267 268 269 270
    }

    /* For each export, send a NBD_REP_SERVER reply. */
    QTAILQ_FOREACH(exp, &exports, next) {
271
        if (nbd_negotiate_send_rep_list(client->ioc, exp, errp)) {
272 273 274 275
            return -EINVAL;
        }
    }
    /* Finish with a NBD_REP_ACK. */
276
    return nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, NBD_OPT_LIST, errp);
277 278
}

E
Eric Blake 已提交
279 280
/* Send a reply to NBD_OPT_EXPORT_NAME.
 * Return -errno on error, 0 on success. */
281
static int nbd_negotiate_handle_export_name(NBDClient *client, uint32_t length,
282
                                            uint16_t myflags, bool no_zeroes,
283
                                            Error **errp)
284
{
285
    char name[NBD_MAX_NAME_SIZE + 1];
286
    char buf[NBD_REPLY_EXPORT_NAME_SIZE] = "";
287 288
    size_t len;
    int ret;
289

290 291
    /* Client sends:
        [20 ..  xx]   export name (length bytes)
292 293 294 295
       Server replies:
        [ 0 ..   7]   size
        [ 8 ..   9]   export flags
        [10 .. 133]   reserved     (0) [unless no_zeroes]
296
     */
297
    trace_nbd_negotiate_handle_export_name();
298
    if (length >= sizeof(name)) {
299
        error_setg(errp, "Bad length received");
300
        return -EINVAL;
301
    }
302 303
    if (nbd_read(client->ioc, name, length, errp) < 0) {
        error_prepend(errp, "read failed: ");
304
        return -EINVAL;
305 306 307
    }
    name[length] = '\0';

308
    trace_nbd_negotiate_handle_export_name_request(name);
309

310 311
    client->exp = nbd_export_find(name);
    if (!client->exp) {
312
        error_setg(errp, "export not found");
313
        return -EINVAL;
314 315
    }

316 317 318 319 320 321 322 323 324 325 326
    trace_nbd_negotiate_new_style_size_flags(client->exp->size,
                                             client->exp->nbdflags | myflags);
    stq_be_p(buf, client->exp->size);
    stw_be_p(buf + 8, client->exp->nbdflags | myflags);
    len = no_zeroes ? 10 : sizeof(buf);
    ret = nbd_write(client->ioc, buf, len, errp);
    if (ret < 0) {
        error_prepend(errp, "write failed: ");
        return ret;
    }

327 328
    QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
    nbd_export_get(client->exp);
329 330

    return 0;
331 332
}

E
Eric Blake 已提交
333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371
/* Send a single NBD_REP_INFO, with a buffer @buf of @length bytes.
 * The buffer does NOT include the info type prefix.
 * Return -errno on error, 0 if ready to send more. */
static int nbd_negotiate_send_info(NBDClient *client, uint32_t opt,
                                   uint16_t info, uint32_t length, void *buf,
                                   Error **errp)
{
    int rc;

    trace_nbd_negotiate_send_info(info, nbd_info_lookup(info), length);
    rc = nbd_negotiate_send_rep_len(client->ioc, NBD_REP_INFO, opt,
                                    sizeof(info) + length, errp);
    if (rc < 0) {
        return rc;
    }
    cpu_to_be16s(&info);
    if (nbd_write(client->ioc, &info, sizeof(info), errp) < 0) {
        return -EIO;
    }
    if (nbd_write(client->ioc, buf, length, errp) < 0) {
        return -EIO;
    }
    return 0;
}

/* Handle NBD_OPT_INFO and NBD_OPT_GO.
 * Return -errno on error, 0 if ready for next option, and 1 to move
 * into transmission phase.  */
static int nbd_negotiate_handle_info(NBDClient *client, uint32_t length,
                                     uint32_t opt, uint16_t myflags,
                                     Error **errp)
{
    int rc;
    char name[NBD_MAX_NAME_SIZE + 1];
    NBDExport *exp;
    uint16_t requests;
    uint16_t request;
    uint32_t namelen;
    bool sendname = false;
372 373
    bool blocksize = false;
    uint32_t sizes[3];
E
Eric Blake 已提交
374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420
    char buf[sizeof(uint64_t) + sizeof(uint16_t)];
    const char *msg;

    /* Client sends:
        4 bytes: L, name length (can be 0)
        L bytes: export name
        2 bytes: N, number of requests (can be 0)
        N * 2 bytes: N requests
    */
    if (length < sizeof(namelen) + sizeof(requests)) {
        msg = "overall request too short";
        goto invalid;
    }
    if (nbd_read(client->ioc, &namelen, sizeof(namelen), errp) < 0) {
        return -EIO;
    }
    be32_to_cpus(&namelen);
    length -= sizeof(namelen);
    if (namelen > length - sizeof(requests) || (length - namelen) % 2) {
        msg = "name length is incorrect";
        goto invalid;
    }
    if (nbd_read(client->ioc, name, namelen, errp) < 0) {
        return -EIO;
    }
    name[namelen] = '\0';
    length -= namelen;
    trace_nbd_negotiate_handle_export_name_request(name);

    if (nbd_read(client->ioc, &requests, sizeof(requests), errp) < 0) {
        return -EIO;
    }
    be16_to_cpus(&requests);
    length -= sizeof(requests);
    trace_nbd_negotiate_handle_info_requests(requests);
    if (requests != length / sizeof(request)) {
        msg = "incorrect number of  requests for overall length";
        goto invalid;
    }
    while (requests--) {
        if (nbd_read(client->ioc, &request, sizeof(request), errp) < 0) {
            return -EIO;
        }
        be16_to_cpus(&request);
        length -= sizeof(request);
        trace_nbd_negotiate_handle_info_request(request,
                                                nbd_info_lookup(request));
421 422 423 424 425
        /* We care about NBD_INFO_NAME and NBD_INFO_BLOCK_SIZE;
         * everything else is either a request we don't know or
         * something we send regardless of request */
        switch (request) {
        case NBD_INFO_NAME:
E
Eric Blake 已提交
426
            sendname = true;
427 428 429 430
            break;
        case NBD_INFO_BLOCK_SIZE:
            blocksize = true;
            break;
E
Eric Blake 已提交
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461
        }
    }

    exp = nbd_export_find(name);
    if (!exp) {
        return nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_UNKNOWN,
                                          opt, errp, "export '%s' not present",
                                          name);
    }

    /* Don't bother sending NBD_INFO_NAME unless client requested it */
    if (sendname) {
        rc = nbd_negotiate_send_info(client, opt, NBD_INFO_NAME, length, name,
                                     errp);
        if (rc < 0) {
            return rc;
        }
    }

    /* Send NBD_INFO_DESCRIPTION only if available, regardless of
     * client request */
    if (exp->description) {
        size_t len = strlen(exp->description);

        rc = nbd_negotiate_send_info(client, opt, NBD_INFO_DESCRIPTION,
                                     len, exp->description, errp);
        if (rc < 0) {
            return rc;
        }
    }

462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482
    /* Send NBD_INFO_BLOCK_SIZE always, but tweak the minimum size
     * according to whether the client requested it, and according to
     * whether this is OPT_INFO or OPT_GO. */
    /* minimum - 1 for back-compat, or 512 if client is new enough.
     * TODO: consult blk_bs(blk)->bl.request_alignment? */
    sizes[0] = (opt == NBD_OPT_INFO || blocksize) ? BDRV_SECTOR_SIZE : 1;
    /* preferred - Hard-code to 4096 for now.
     * TODO: is blk_bs(blk)->bl.opt_transfer appropriate? */
    sizes[1] = 4096;
    /* maximum - At most 32M, but smaller as appropriate. */
    sizes[2] = MIN(blk_get_max_transfer(exp->blk), NBD_MAX_BUFFER_SIZE);
    trace_nbd_negotiate_handle_info_block_size(sizes[0], sizes[1], sizes[2]);
    cpu_to_be32s(&sizes[0]);
    cpu_to_be32s(&sizes[1]);
    cpu_to_be32s(&sizes[2]);
    rc = nbd_negotiate_send_info(client, opt, NBD_INFO_BLOCK_SIZE,
                                 sizeof(sizes), sizes, errp);
    if (rc < 0) {
        return rc;
    }

E
Eric Blake 已提交
483 484 485 486 487 488 489 490 491 492 493
    /* Send NBD_INFO_EXPORT always */
    trace_nbd_negotiate_new_style_size_flags(exp->size,
                                             exp->nbdflags | myflags);
    stq_be_p(buf, exp->size);
    stw_be_p(buf + 8, exp->nbdflags | myflags);
    rc = nbd_negotiate_send_info(client, opt, NBD_INFO_EXPORT,
                                 sizeof(buf), buf, errp);
    if (rc < 0) {
        return rc;
    }

494 495 496 497 498 499 500 501 502 503 504 505
    /* If the client is just asking for NBD_OPT_INFO, but forgot to
     * request block sizes, return an error.
     * TODO: consult blk_bs(blk)->request_align, and only error if it
     * is not 1? */
    if (opt == NBD_OPT_INFO && !blocksize) {
        return nbd_negotiate_send_rep_err(client->ioc,
                                          NBD_REP_ERR_BLOCK_SIZE_REQD, opt,
                                          errp,
                                          "request NBD_INFO_BLOCK_SIZE to "
                                          "use this export");
    }

E
Eric Blake 已提交
506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528
    /* Final reply */
    rc = nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, opt, errp);
    if (rc < 0) {
        return rc;
    }

    if (opt == NBD_OPT_GO) {
        client->exp = exp;
        QTAILQ_INSERT_TAIL(&client->exp->clients, client, next);
        nbd_export_get(client->exp);
        rc = 1;
    }
    return rc;

 invalid:
    if (nbd_drop(client->ioc, length, errp) < 0) {
        return -EIO;
    }
    return nbd_negotiate_send_rep_err(client->ioc, NBD_REP_ERR_INVALID, opt,
                                      errp, "%s", msg);
}


529 530
/* Handle NBD_OPT_STARTTLS. Return NULL to drop connection, or else the
 * new channel for all further (now-encrypted) communication. */
531
static QIOChannel *nbd_negotiate_handle_starttls(NBDClient *client,
532 533
                                                 uint32_t length,
                                                 Error **errp)
534 535 536 537 538
{
    QIOChannel *ioc;
    QIOChannelTLS *tioc;
    struct NBDTLSHandshakeData data = { 0 };

539
    trace_nbd_negotiate_handle_starttls();
540 541
    ioc = client->ioc;
    if (length) {
542
        if (nbd_drop(ioc, length, errp) < 0) {
543 544
            return NULL;
        }
545
        nbd_negotiate_send_rep_err(ioc, NBD_REP_ERR_INVALID, NBD_OPT_STARTTLS,
546
                                   errp,
547
                                   "OPT_STARTTLS should not have length");
548 549 550
        return NULL;
    }

551
    if (nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK,
552
                               NBD_OPT_STARTTLS, errp) < 0) {
553 554
        return NULL;
    }
555 556 557 558

    tioc = qio_channel_tls_new_server(ioc,
                                      client->tlscreds,
                                      client->tlsaclname,
559
                                      errp);
560 561 562 563
    if (!tioc) {
        return NULL;
    }

564
    qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-server-tls");
565
    trace_nbd_negotiate_handle_starttls_handshake();
566 567 568 569 570 571 572 573 574 575 576 577
    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
    qio_channel_tls_handshake(tioc,
                              nbd_tls_handshake,
                              &data,
                              NULL);

    if (!data.complete) {
        g_main_loop_run(data.loop);
    }
    g_main_loop_unref(data.loop);
    if (data.error) {
        object_unref(OBJECT(tioc));
578
        error_propagate(errp, data.error);
579 580 581 582 583 584
        return NULL;
    }

    return QIO_CHANNEL(tioc);
}

585
/* nbd_negotiate_options
E
Eric Blake 已提交
586 587
 * Process all NBD_OPT_* client option commands, during fixed newstyle
 * negotiation.
588
 * Return:
589 590 591 592
 * -errno  on error, errp is set
 * 0       on successful negotiation, errp is not set
 * 1       if client sent NBD_OPT_ABORT, i.e. on valid disconnect,
 *         errp is not set
593
 */
594 595
static int nbd_negotiate_options(NBDClient *client, uint16_t myflags,
                                 Error **errp)
596
{
M
Max Reitz 已提交
597
    uint32_t flags;
598
    bool fixedNewstyle = false;
599
    bool no_zeroes = false;
M
Max Reitz 已提交
600 601 602 603

    /* Client sends:
        [ 0 ..   3]   client flags

E
Eric Blake 已提交
604
       Then we loop until NBD_OPT_EXPORT_NAME or NBD_OPT_GO:
M
Max Reitz 已提交
605 606 607 608 609 610 611 612 613 614 615
        [ 0 ..   7]   NBD_OPTS_MAGIC
        [ 8 ..  11]   NBD option
        [12 ..  15]   Data length
        ...           Rest of request

        [ 0 ..   7]   NBD_OPTS_MAGIC
        [ 8 ..  11]   Second NBD option
        [12 ..  15]   Data length
        ...           Rest of request
    */

616 617
    if (nbd_read(client->ioc, &flags, sizeof(flags), errp) < 0) {
        error_prepend(errp, "read failed: ");
M
Max Reitz 已提交
618 619 620
        return -EIO;
    }
    be32_to_cpus(&flags);
621
    trace_nbd_negotiate_options_flags(flags);
622 623 624 625
    if (flags & NBD_FLAG_C_FIXED_NEWSTYLE) {
        fixedNewstyle = true;
        flags &= ~NBD_FLAG_C_FIXED_NEWSTYLE;
    }
E
Eric Blake 已提交
626
    if (flags & NBD_FLAG_C_NO_ZEROES) {
627
        no_zeroes = true;
E
Eric Blake 已提交
628 629
        flags &= ~NBD_FLAG_C_NO_ZEROES;
    }
630
    if (flags != 0) {
631
        error_setg(errp, "Unknown client flags 0x%" PRIx32 " received", flags);
632
        return -EINVAL;
M
Max Reitz 已提交
633 634
    }

635
    while (1) {
M
Max Reitz 已提交
636
        int ret;
637
        uint32_t option, length;
638 639
        uint64_t magic;

640 641
        if (nbd_read(client->ioc, &magic, sizeof(magic), errp) < 0) {
            error_prepend(errp, "read failed: ");
642 643
            return -EINVAL;
        }
644 645 646
        magic = be64_to_cpu(magic);
        trace_nbd_negotiate_options_check_magic(magic);
        if (magic != NBD_OPTS_MAGIC) {
647
            error_setg(errp, "Bad magic received");
648 649 650
            return -EINVAL;
        }

651 652
        if (nbd_read(client->ioc, &option,
                     sizeof(option), errp) < 0) {
653
            error_prepend(errp, "read failed: ");
654 655
            return -EINVAL;
        }
656
        option = be32_to_cpu(option);
657

658 659
        if (nbd_read(client->ioc, &length, sizeof(length), errp) < 0) {
            error_prepend(errp, "read failed: ");
660 661 662 663
            return -EINVAL;
        }
        length = be32_to_cpu(length);

664 665
        trace_nbd_negotiate_options_check_option(option,
                                                 nbd_opt_lookup(option));
666 667 668 669
        if (client->tlscreds &&
            client->ioc == (QIOChannel *)client->sioc) {
            QIOChannel *tioc;
            if (!fixedNewstyle) {
670
                error_setg(errp, "Unsupported option 0x%" PRIx32, option);
671 672
                return -EINVAL;
            }
673
            switch (option) {
674
            case NBD_OPT_STARTTLS:
675
                tioc = nbd_negotiate_handle_starttls(client, length, errp);
676 677 678 679 680 681 682
                if (!tioc) {
                    return -EIO;
                }
                object_unref(OBJECT(client->ioc));
                client->ioc = QIO_CHANNEL(tioc);
                break;

683 684
            case NBD_OPT_EXPORT_NAME:
                /* No way to return an error to client, so drop connection */
685
                error_setg(errp, "Option 0x%x not permitted before TLS",
686
                           option);
687 688
                return -EINVAL;

689
            default:
690
                if (nbd_drop(client->ioc, length, errp) < 0) {
691 692
                    return -EIO;
                }
693 694
                ret = nbd_negotiate_send_rep_err(client->ioc,
                                                 NBD_REP_ERR_TLS_REQD,
695
                                                 option, errp,
696 697
                                                 "Option 0x%" PRIx32
                                                 "not permitted before TLS",
698
                                                 option);
699 700 701
                if (ret < 0) {
                    return ret;
                }
702 703 704
                /* Let the client keep trying, unless they asked to
                 * quit. In this mode, we've already sent an error, so
                 * we can't ack the abort.  */
705
                if (option == NBD_OPT_ABORT) {
706
                    return 1;
707
                }
708
                break;
709 710
            }
        } else if (fixedNewstyle) {
711
            switch (option) {
712
            case NBD_OPT_LIST:
713
                ret = nbd_negotiate_handle_list(client, length, errp);
714 715 716 717 718 719
                if (ret < 0) {
                    return ret;
                }
                break;

            case NBD_OPT_ABORT:
720 721 722
                /* NBD spec says we must try to reply before
                 * disconnecting, but that we must also tolerate
                 * guests that don't wait for our reply. */
723
                nbd_negotiate_send_rep(client->ioc, NBD_REP_ACK, option, NULL);
724
                return 1;
725 726

            case NBD_OPT_EXPORT_NAME:
727 728 729
                return nbd_negotiate_handle_export_name(client, length,
                                                        myflags, no_zeroes,
                                                        errp);
730

E
Eric Blake 已提交
731 732 733 734 735 736 737 738 739 740 741 742 743
            case NBD_OPT_INFO:
            case NBD_OPT_GO:
                ret = nbd_negotiate_handle_info(client, length, option,
                                                myflags, errp);
                if (ret == 1) {
                    assert(option == NBD_OPT_GO);
                    return 0;
                }
                if (ret) {
                    return ret;
                }
                break;

744
            case NBD_OPT_STARTTLS:
745
                if (nbd_drop(client->ioc, length, errp) < 0) {
746 747
                    return -EIO;
                }
748
                if (client->tlscreds) {
749 750
                    ret = nbd_negotiate_send_rep_err(client->ioc,
                                                     NBD_REP_ERR_INVALID,
751
                                                     option, errp,
752
                                                     "TLS already enabled");
753
                } else {
754 755
                    ret = nbd_negotiate_send_rep_err(client->ioc,
                                                     NBD_REP_ERR_POLICY,
756
                                                     option, errp,
757
                                                     "TLS not configured");
758 759 760
                }
                if (ret < 0) {
                    return ret;
761
                }
762
                break;
763
            default:
764
                if (nbd_drop(client->ioc, length, errp) < 0) {
765 766
                    return -EIO;
                }
767 768
                ret = nbd_negotiate_send_rep_err(client->ioc,
                                                 NBD_REP_ERR_UNSUP,
769
                                                 option, errp,
770
                                                 "Unsupported option 0x%"
771 772
                                                 PRIx32 " (%s)", option,
                                                 nbd_opt_lookup(option));
773 774 775
                if (ret < 0) {
                    return ret;
                }
776
                break;
777 778 779 780 781 782
            }
        } else {
            /*
             * If broken new-style we should drop the connection
             * for anything except NBD_OPT_EXPORT_NAME
             */
783
            switch (option) {
784
            case NBD_OPT_EXPORT_NAME:
785 786 787
                return nbd_negotiate_handle_export_name(client, length,
                                                        myflags, no_zeroes,
                                                        errp);
788 789

            default:
790 791
                error_setg(errp, "Unsupported option 0x%" PRIx32 " (%s)",
                           option, nbd_opt_lookup(option));
792
                return -EINVAL;
793
            }
794 795 796 797
        }
    }
}

798 799
/* nbd_negotiate
 * Return:
800 801 802 803
 * -errno  on error, errp is set
 * 0       on successful negotiation, errp is not set
 * 1       if client sent NBD_OPT_ABORT, i.e. on valid disconnect,
 *         errp is not set
804
 */
805
static coroutine_fn int nbd_negotiate(NBDClient *client, Error **errp)
B
bellard 已提交
806
{
807
    char buf[NBD_OLDSTYLE_NEGOTIATE_SIZE] = "";
808
    int ret;
E
Eric Blake 已提交
809
    const uint16_t myflags = (NBD_FLAG_HAS_FLAGS | NBD_FLAG_SEND_TRIM |
810 811
                              NBD_FLAG_SEND_FLUSH | NBD_FLAG_SEND_FUA |
                              NBD_FLAG_SEND_WRITE_ZEROES);
812
    bool oldStyle;
N
Nick Thomas 已提交
813

814
    /* Old style negotiation header, no room for options
815 816
        [ 0 ..   7]   passwd       ("NBDMAGIC")
        [ 8 ..  15]   magic        (NBD_CLIENT_MAGIC)
N
Nick Thomas 已提交
817
        [16 ..  23]   size
818
        [24 ..  27]   export flags (zero-extended)
819 820
        [28 .. 151]   reserved     (0)

821
       New style negotiation header, client can send options
822 823 824
        [ 0 ..   7]   passwd       ("NBDMAGIC")
        [ 8 ..  15]   magic        (NBD_OPTS_MAGIC)
        [16 ..  17]   server flags (0)
E
Eric Blake 已提交
825
        ....options sent, ending in NBD_OPT_EXPORT_NAME or NBD_OPT_GO....
N
Nick Thomas 已提交
826 827
     */

828
    qio_channel_set_blocking(client->ioc, false, NULL);
829

830
    trace_nbd_negotiate_begin();
N
Nick Thomas 已提交
831
    memcpy(buf, "NBDMAGIC", 8);
832 833 834

    oldStyle = client->exp != NULL && !client->tlscreds;
    if (oldStyle) {
835 836
        trace_nbd_negotiate_old_style(client->exp->size,
                                      client->exp->nbdflags | myflags);
J
John Snow 已提交
837 838
        stq_be_p(buf + 8, NBD_CLIENT_MAGIC);
        stq_be_p(buf + 16, client->exp->size);
839
        stl_be_p(buf + 24, client->exp->nbdflags | myflags);
N
Nick Thomas 已提交
840

841 842
        if (nbd_write(client->ioc, buf, sizeof(buf), errp) < 0) {
            error_prepend(errp, "write failed: ");
843
            return -EINVAL;
844 845
        }
    } else {
846 847 848
        stq_be_p(buf + 8, NBD_OPTS_MAGIC);
        stw_be_p(buf + 16, NBD_FLAG_FIXED_NEWSTYLE | NBD_FLAG_NO_ZEROES);

849 850
        if (nbd_write(client->ioc, buf, 18, errp) < 0) {
            error_prepend(errp, "write failed: ");
851
            return -EINVAL;
852
        }
853
        ret = nbd_negotiate_options(client, myflags, errp);
854
        if (ret != 0) {
855 856 857
            if (ret < 0) {
                error_prepend(errp, "option negotiation failed: ");
            }
858
            return ret;
859
        }
N
Nick Thomas 已提交
860 861
    }

862
    trace_nbd_negotiate_success();
863 864

    return 0;
B
bellard 已提交
865 866
}

867 868
static int nbd_receive_request(QIOChannel *ioc, NBDRequest *request,
                               Error **errp)
869
{
P
Paolo Bonzini 已提交
870
    uint8_t buf[NBD_REQUEST_SIZE];
N
Nick Thomas 已提交
871
    uint32_t magic;
872
    int ret;
N
Nick Thomas 已提交
873

874
    ret = nbd_read(ioc, buf, sizeof(buf), errp);
875 876 877 878
    if (ret < 0) {
        return ret;
    }

N
Nick Thomas 已提交
879 880
    /* Request
       [ 0 ..  3]   magic   (NBD_REQUEST_MAGIC)
881 882
       [ 4 ..  5]   flags   (NBD_CMD_FLAG_FUA, ...)
       [ 6 ..  7]   type    (NBD_CMD_READ, ...)
N
Nick Thomas 已提交
883 884 885 886 887
       [ 8 .. 15]   handle
       [16 .. 23]   from
       [24 .. 27]   len
     */

888
    magic = ldl_be_p(buf);
889 890
    request->flags  = lduw_be_p(buf + 4);
    request->type   = lduw_be_p(buf + 6);
891 892 893
    request->handle = ldq_be_p(buf + 8);
    request->from   = ldq_be_p(buf + 16);
    request->len    = ldl_be_p(buf + 24);
N
Nick Thomas 已提交
894

895 896
    trace_nbd_receive_request(magic, request->flags, request->type,
                              request->from, request->len);
N
Nick Thomas 已提交
897 898

    if (magic != NBD_REQUEST_MAGIC) {
899
        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
900
        return -EINVAL;
N
Nick Thomas 已提交
901 902
    }
    return 0;
903 904
}

P
Paolo Bonzini 已提交
905 906
#define MAX_NBD_REQUESTS 16

907
void nbd_client_get(NBDClient *client)
908 909 910 911
{
    client->refcount++;
}

912
void nbd_client_put(NBDClient *client)
913 914
{
    if (--client->refcount == 0) {
915
        /* The last reference should be dropped by client->close,
M
Max Reitz 已提交
916
         * which is called by client_close.
917 918 919
         */
        assert(client->closing);

920
        qio_channel_detach_aio_context(client->ioc);
921 922
        object_unref(OBJECT(client->sioc));
        object_unref(OBJECT(client->ioc));
923 924 925 926
        if (client->tlscreds) {
            object_unref(OBJECT(client->tlscreds));
        }
        g_free(client->tlsaclname);
927 928 929 930
        if (client->exp) {
            QTAILQ_REMOVE(&client->exp->clients, client, next);
            nbd_export_put(client->exp);
        }
931 932 933 934
        g_free(client);
    }
}

935
static void client_close(NBDClient *client, bool negotiated)
936
{
937 938 939 940 941 942 943 944 945
    if (client->closing) {
        return;
    }

    client->closing = true;

    /* Force requests to finish.  They will drop their own references,
     * then we'll close the socket and free the NBDClient.
     */
946 947
    qio_channel_shutdown(client->ioc, QIO_CHANNEL_SHUTDOWN_BOTH,
                         NULL);
948 949

    /* Also tell the client, so that they release their reference.  */
950 951
    if (client->close_fn) {
        client->close_fn(client, negotiated);
952 953 954
    }
}

955
static NBDRequestData *nbd_request_get(NBDClient *client)
P
Paolo Bonzini 已提交
956
{
957
    NBDRequestData *req;
958

P
Paolo Bonzini 已提交
959 960 961
    assert(client->nb_requests <= MAX_NBD_REQUESTS - 1);
    client->nb_requests++;

962
    req = g_new0(NBDRequestData, 1);
963 964
    nbd_client_get(client);
    req->client = client;
P
Paolo Bonzini 已提交
965 966 967
    return req;
}

968
static void nbd_request_put(NBDRequestData *req)
P
Paolo Bonzini 已提交
969
{
970
    NBDClient *client = req->client;
971

972 973 974
    if (req->data) {
        qemu_vfree(req->data);
    }
975
    g_free(req);
976

M
Max Reitz 已提交
977
    client->nb_requests--;
978 979
    nbd_client_receive_next_request(client);

980
    nbd_client_put(client);
P
Paolo Bonzini 已提交
981 982
}

M
Max Reitz 已提交
983
static void blk_aio_attached(AioContext *ctx, void *opaque)
M
Max Reitz 已提交
984 985 986 987
{
    NBDExport *exp = opaque;
    NBDClient *client;

988
    trace_nbd_blk_aio_attached(exp->name, ctx);
M
Max Reitz 已提交
989 990 991 992

    exp->ctx = ctx;

    QTAILQ_FOREACH(client, &exp->clients, next) {
993 994 995 996 997 998 999
        qio_channel_attach_aio_context(client->ioc, ctx);
        if (client->recv_coroutine) {
            aio_co_schedule(ctx, client->recv_coroutine);
        }
        if (client->send_coroutine) {
            aio_co_schedule(ctx, client->send_coroutine);
        }
M
Max Reitz 已提交
1000 1001 1002
    }
}

M
Max Reitz 已提交
1003
static void blk_aio_detach(void *opaque)
M
Max Reitz 已提交
1004 1005 1006 1007
{
    NBDExport *exp = opaque;
    NBDClient *client;

1008
    trace_nbd_blk_aio_detach(exp->name, exp->ctx);
M
Max Reitz 已提交
1009 1010

    QTAILQ_FOREACH(client, &exp->clients, next) {
1011
        qio_channel_detach_aio_context(client->ioc);
M
Max Reitz 已提交
1012 1013 1014 1015 1016
    }

    exp->ctx = NULL;
}

1017 1018 1019 1020 1021 1022
static void nbd_eject_notifier(Notifier *n, void *data)
{
    NBDExport *exp = container_of(n, NBDExport, eject_notifier);
    nbd_export_close(exp);
}

1023
NBDExport *nbd_export_new(BlockDriverState *bs, off_t dev_offset, off_t size,
E
Eric Blake 已提交
1024
                          uint16_t nbdflags, void (*close)(NBDExport *),
1025
                          bool writethrough, BlockBackend *on_eject_blk,
M
Max Reitz 已提交
1026
                          Error **errp)
P
Paolo Bonzini 已提交
1027
{
1028
    AioContext *ctx;
1029
    BlockBackend *blk;
1030
    NBDExport *exp = g_new0(NBDExport, 1);
1031
    uint64_t perm;
1032
    int ret;
1033

1034 1035 1036 1037 1038 1039 1040 1041 1042 1043
    /*
     * NBD exports are used for non-shared storage migration.  Make sure
     * that BDRV_O_INACTIVE is cleared and the image is ready for write
     * access since the export could be available before migration handover.
     */
    ctx = bdrv_get_aio_context(bs);
    aio_context_acquire(ctx);
    bdrv_invalidate_cache(bs, NULL);
    aio_context_release(ctx);

1044 1045 1046 1047 1048 1049 1050 1051
    /* Don't allow resize while the NBD server is running, otherwise we don't
     * care what happens with the node. */
    perm = BLK_PERM_CONSISTENT_READ;
    if ((nbdflags & NBD_FLAG_READ_ONLY) == 0) {
        perm |= BLK_PERM_WRITE;
    }
    blk = blk_new(perm, BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED |
                        BLK_PERM_WRITE | BLK_PERM_GRAPH_MOD);
1052 1053 1054 1055
    ret = blk_insert_bs(blk, bs, errp);
    if (ret < 0) {
        goto fail;
    }
1056 1057
    blk_set_enable_write_cache(blk, !writethrough);

1058
    exp->refcount = 1;
1059
    QTAILQ_INIT(&exp->clients);
M
Max Reitz 已提交
1060
    exp->blk = blk;
P
Paolo Bonzini 已提交
1061 1062
    exp->dev_offset = dev_offset;
    exp->nbdflags = nbdflags;
M
Max Reitz 已提交
1063 1064 1065 1066 1067 1068 1069 1070
    exp->size = size < 0 ? blk_getlength(blk) : size;
    if (exp->size < 0) {
        error_setg_errno(errp, -exp->size,
                         "Failed to determine the NBD export's length");
        goto fail;
    }
    exp->size -= exp->size % BDRV_SECTOR_SIZE;

1071
    exp->close = close;
M
Max Reitz 已提交
1072 1073
    exp->ctx = blk_get_aio_context(blk);
    blk_add_aio_context_notifier(blk, blk_aio_attached, blk_aio_detach, exp);
1074

1075 1076 1077 1078 1079 1080
    if (on_eject_blk) {
        blk_ref(on_eject_blk);
        exp->eject_notifier_blk = on_eject_blk;
        exp->eject_notifier.notify = nbd_eject_notifier;
        blk_add_remove_bs_notifier(on_eject_blk, &exp->eject_notifier);
    }
P
Paolo Bonzini 已提交
1081
    return exp;
M
Max Reitz 已提交
1082 1083

fail:
1084
    blk_unref(blk);
M
Max Reitz 已提交
1085 1086
    g_free(exp);
    return NULL;
P
Paolo Bonzini 已提交
1087 1088
}

P
Paolo Bonzini 已提交
1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121
NBDExport *nbd_export_find(const char *name)
{
    NBDExport *exp;
    QTAILQ_FOREACH(exp, &exports, next) {
        if (strcmp(name, exp->name) == 0) {
            return exp;
        }
    }

    return NULL;
}

void nbd_export_set_name(NBDExport *exp, const char *name)
{
    if (exp->name == name) {
        return;
    }

    nbd_export_get(exp);
    if (exp->name != NULL) {
        g_free(exp->name);
        exp->name = NULL;
        QTAILQ_REMOVE(&exports, exp, next);
        nbd_export_put(exp);
    }
    if (name != NULL) {
        nbd_export_get(exp);
        exp->name = g_strdup(name);
        QTAILQ_INSERT_TAIL(&exports, exp, next);
    }
    nbd_export_put(exp);
}

1122 1123 1124 1125 1126 1127
void nbd_export_set_description(NBDExport *exp, const char *description)
{
    g_free(exp->description);
    exp->description = g_strdup(description);
}

P
Paolo Bonzini 已提交
1128 1129
void nbd_export_close(NBDExport *exp)
{
1130
    NBDClient *client, *next;
1131

1132 1133
    nbd_export_get(exp);
    QTAILQ_FOREACH_SAFE(client, &exp->clients, next, next) {
1134
        client_close(client, true);
1135
    }
P
Paolo Bonzini 已提交
1136
    nbd_export_set_name(exp, NULL);
1137
    nbd_export_set_description(exp, NULL);
1138
    nbd_export_put(exp);
1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151
}

void nbd_export_get(NBDExport *exp)
{
    assert(exp->refcount > 0);
    exp->refcount++;
}

void nbd_export_put(NBDExport *exp)
{
    assert(exp->refcount > 0);
    if (exp->refcount == 1) {
        nbd_export_close(exp);
P
Paolo Bonzini 已提交
1152 1153
    }

1154
    if (--exp->refcount == 0) {
P
Paolo Bonzini 已提交
1155
        assert(exp->name == NULL);
1156
        assert(exp->description == NULL);
P
Paolo Bonzini 已提交
1157

1158 1159 1160 1161
        if (exp->close) {
            exp->close(exp);
        }

1162
        if (exp->blk) {
1163 1164 1165 1166
            if (exp->eject_notifier_blk) {
                notifier_remove(&exp->eject_notifier);
                blk_unref(exp->eject_notifier_blk);
            }
1167 1168 1169 1170 1171 1172
            blk_remove_aio_context_notifier(exp->blk, blk_aio_attached,
                                            blk_aio_detach, exp);
            blk_unref(exp->blk);
            exp->blk = NULL;
        }

1173 1174
        g_free(exp);
    }
P
Paolo Bonzini 已提交
1175 1176
}

1177
BlockBackend *nbd_export_get_blockdev(NBDExport *exp)
P
Paolo Bonzini 已提交
1178
{
M
Max Reitz 已提交
1179
    return exp->blk;
P
Paolo Bonzini 已提交
1180 1181
}

P
Paolo Bonzini 已提交
1182 1183 1184 1185 1186 1187 1188 1189 1190
void nbd_export_close_all(void)
{
    NBDExport *exp, *next;

    QTAILQ_FOREACH_SAFE(exp, &exports, next, next) {
        nbd_export_close(exp);
    }
}

1191 1192 1193 1194 1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
static int coroutine_fn nbd_co_send_iov(NBDClient *client, struct iovec *iov,
                                        unsigned niov, Error **errp)
{
    int ret;

    g_assert(qemu_in_coroutine());
    qemu_co_mutex_lock(&client->send_lock);
    client->send_coroutine = qemu_coroutine_self();

    ret = qio_channel_writev_all(client->ioc, iov, niov, errp) < 0 ? -EIO : 0;

    client->send_coroutine = NULL;
    qemu_co_mutex_unlock(&client->send_lock);

    return ret;
}

1208 1209 1210 1211 1212 1213 1214 1215
static inline void set_be_simple_reply(NBDSimpleReply *reply, uint64_t error,
                                       uint64_t handle)
{
    stl_be_p(&reply->magic, NBD_SIMPLE_REPLY_MAGIC);
    stl_be_p(&reply->error, error);
    stq_be_p(&reply->handle, handle);
}

1216
static int nbd_co_send_simple_reply(NBDClient *client,
1217 1218
                                    uint64_t handle,
                                    uint32_t error,
1219 1220 1221
                                    void *data,
                                    size_t len,
                                    Error **errp)
1222
{
1223
    NBDSimpleReply reply;
1224
    int nbd_err = system_errno_to_nbd_errno(error);
1225 1226 1227 1228
    struct iovec iov[] = {
        {.iov_base = &reply, .iov_len = sizeof(reply)},
        {.iov_base = data, .iov_len = len}
    };
1229

1230
    trace_nbd_co_send_simple_reply(handle, nbd_err, len);
1231
    set_be_simple_reply(&reply, nbd_err, handle);
P
Paolo Bonzini 已提交
1232

1233
    return nbd_co_send_iov(client, iov, len ? 2 : 1, errp);
1234 1235
}

1236 1237 1238 1239 1240 1241
/* nbd_co_receive_request
 * Collect a client request. Return 0 if request looks valid, -EIO to drop
 * connection right away, and any other negative value to report an error to
 * the client (although the caller may still need to disconnect after reporting
 * the error).
 */
1242 1243
static int nbd_co_receive_request(NBDRequestData *req, NBDRequest *request,
                                  Error **errp)
1244
{
1245
    NBDClient *client = req->client;
1246

1247
    g_assert(qemu_in_coroutine());
1248
    assert(client->recv_coroutine == qemu_coroutine_self());
1249
    if (nbd_receive_request(client->ioc, request, errp) < 0) {
1250
        return -EIO;
1251 1252
    }

1253 1254
    trace_nbd_co_receive_request_decode_type(request->handle, request->type,
                                             nbd_cmd_lookup(request->type));
1255

1256
    if (request->type != NBD_CMD_WRITE) {
1257 1258 1259 1260
        /* No payload, we are ready to read the next request.  */
        req->complete = true;
    }

1261
    if (request->type == NBD_CMD_DISC) {
1262 1263
        /* Special case: we're going to disconnect without a reply,
         * whether or not flags, from, or len are bogus */
1264
        return -EIO;
1265 1266 1267 1268 1269
    }

    /* Check for sanity in the parameters, part 1.  Defer as many
     * checks as possible until after reading any NBD_CMD_WRITE
     * payload, so we can try and keep the connection alive.  */
1270
    if ((request->from + request->len) < request->from) {
1271 1272
        error_setg(errp,
                   "integer overflow detected, you're probably being attacked");
1273
        return -EINVAL;
1274 1275
    }

1276
    if (request->type == NBD_CMD_READ || request->type == NBD_CMD_WRITE) {
1277
        if (request->len > NBD_MAX_BUFFER_SIZE) {
1278 1279
            error_setg(errp, "len (%" PRIu32" ) is larger than max len (%u)",
                       request->len, NBD_MAX_BUFFER_SIZE);
1280
            return -EINVAL;
1281 1282
        }

1283 1284
        req->data = blk_try_blockalign(client->exp->blk, request->len);
        if (req->data == NULL) {
1285
            error_setg(errp, "No memory");
1286
            return -ENOMEM;
1287
        }
1288
    }
1289
    if (request->type == NBD_CMD_WRITE) {
1290 1291
        if (nbd_read(client->ioc, req->data, request->len, errp) < 0) {
            error_prepend(errp, "reading from socket failed: ");
1292
            return -EIO;
1293
        }
1294
        req->complete = true;
1295

1296 1297
        trace_nbd_co_receive_request_payload_received(request->handle,
                                                      request->len);
1298
    }
1299 1300 1301

    /* Sanity checks, part 2. */
    if (request->from + request->len > client->exp->size) {
1302 1303 1304
        error_setg(errp, "operation past EOF; From: %" PRIu64 ", Len: %" PRIu32
                   ", Size: %" PRIu64, request->from, request->len,
                   (uint64_t)client->exp->size);
1305
        return request->type == NBD_CMD_WRITE ? -ENOSPC : -EINVAL;
1306
    }
1307
    if (request->flags & ~(NBD_CMD_FLAG_FUA | NBD_CMD_FLAG_NO_HOLE)) {
1308
        error_setg(errp, "unsupported flags (got 0x%x)", request->flags);
1309
        return -EINVAL;
E
Eric Blake 已提交
1310
    }
1311 1312
    if (request->type != NBD_CMD_WRITE_ZEROES &&
        (request->flags & NBD_CMD_FLAG_NO_HOLE)) {
1313
        error_setg(errp, "unexpected flags (got 0x%x)", request->flags);
1314
        return -EINVAL;
1315
    }
1316

1317
    return 0;
1318 1319
}

1320 1321
/* Owns a reference to the NBDClient passed as opaque.  */
static coroutine_fn void nbd_trip(void *opaque)
1322
{
P
Paolo Bonzini 已提交
1323
    NBDClient *client = opaque;
1324
    NBDExport *exp = client->exp;
1325
    NBDRequestData *req;
1326
    NBDRequest request = { 0 };    /* GCC thinks it can be used uninitialized */
1327
    int ret;
1328
    int flags;
1329
    int reply_data_len = 0;
1330
    Error *local_err = NULL;
N
Nick Thomas 已提交
1331

1332
    trace_nbd_trip();
1333
    if (client->closing) {
1334
        nbd_client_put(client);
1335 1336
        return;
    }
N
Nick Thomas 已提交
1337

1338
    req = nbd_request_get(client);
1339
    ret = nbd_co_receive_request(req, &request, &local_err);
1340 1341
    client->recv_coroutine = NULL;
    nbd_client_receive_next_request(client);
1342
    if (ret == -EIO) {
1343
        goto disconnect;
1344
    }
N
Nick Thomas 已提交
1345

1346
    if (ret < 0) {
1347
        goto reply;
N
Nick Thomas 已提交
1348 1349
    }

1350 1351 1352 1353 1354 1355 1356 1357
    if (client->closing) {
        /*
         * The client may be closed when we are blocked in
         * nbd_co_receive_request()
         */
        goto done;
    }

1358
    switch (request.type) {
N
Nick Thomas 已提交
1359
    case NBD_CMD_READ:
1360 1361
        /* XXX: NBD Protocol only documents use of FUA with WRITE */
        if (request.flags & NBD_CMD_FLAG_FUA) {
M
Max Reitz 已提交
1362
            ret = blk_co_flush(exp->blk);
P
Paolo Bonzini 已提交
1363
            if (ret < 0) {
1364
                error_setg_errno(&local_err, -ret, "flush failed");
1365
                break;
P
Paolo Bonzini 已提交
1366 1367 1368
            }
        }

1369 1370
        ret = blk_pread(exp->blk, request.from + exp->dev_offset,
                        req->data, request.len);
1371
        if (ret < 0) {
1372
            error_setg_errno(&local_err, -ret, "reading from file failed");
1373
            break;
N
Nick Thomas 已提交
1374 1375
        }

1376 1377
        reply_data_len = request.len;

N
Nick Thomas 已提交
1378 1379
        break;
    case NBD_CMD_WRITE:
P
Paolo Bonzini 已提交
1380
        if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
1381
            ret = -EROFS;
1382
            break;
1383 1384
        }

1385
        flags = 0;
1386
        if (request.flags & NBD_CMD_FLAG_FUA) {
1387 1388
            flags |= BDRV_REQ_FUA;
        }
1389
        ret = blk_pwrite(exp->blk, request.from + exp->dev_offset,
1390
                         req->data, request.len, flags);
1391
        if (ret < 0) {
1392
            error_setg_errno(&local_err, -ret, "writing to file failed");
1393
        }
N
Nick Thomas 已提交
1394

1395 1396 1397
        break;
    case NBD_CMD_WRITE_ZEROES:
        if (exp->nbdflags & NBD_FLAG_READ_ONLY) {
1398
            error_setg(&local_err, "Server is read-only, return error");
1399
            ret = -EROFS;
1400
            break;
1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412
        }

        flags = 0;
        if (request.flags & NBD_CMD_FLAG_FUA) {
            flags |= BDRV_REQ_FUA;
        }
        if (!(request.flags & NBD_CMD_FLAG_NO_HOLE)) {
            flags |= BDRV_REQ_MAY_UNMAP;
        }
        ret = blk_pwrite_zeroes(exp->blk, request.from + exp->dev_offset,
                                request.len, flags);
        if (ret < 0) {
1413
            error_setg_errno(&local_err, -ret, "writing to file failed");
1414 1415
        }

N
Nick Thomas 已提交
1416 1417
        break;
    case NBD_CMD_DISC:
1418 1419 1420
        /* unreachable, thanks to special case in nbd_co_receive_request() */
        abort();

P
Paolo Bonzini 已提交
1421
    case NBD_CMD_FLUSH:
M
Max Reitz 已提交
1422
        ret = blk_co_flush(exp->blk);
P
Paolo Bonzini 已提交
1423
        if (ret < 0) {
1424
            error_setg_errno(&local_err, -ret, "flush failed");
P
Paolo Bonzini 已提交
1425
        }
1426

P
Paolo Bonzini 已提交
1427 1428
        break;
    case NBD_CMD_TRIM:
1429 1430 1431
        ret = blk_co_pdiscard(exp->blk, request.from + exp->dev_offset,
                              request.len);
        if (ret < 0) {
1432
            error_setg_errno(&local_err, -ret, "discard failed");
P
Paolo Bonzini 已提交
1433
        }
1434

P
Paolo Bonzini 已提交
1435
        break;
N
Nick Thomas 已提交
1436
    default:
1437 1438
        error_setg(&local_err, "invalid request type (%" PRIu32 ") received",
                   request.type);
1439
        ret = -EINVAL;
1440 1441 1442
    }

reply:
1443
    if (local_err) {
1444 1445
        /* If we get here, local_err was not a fatal error, and should be sent
         * to the client. */
1446 1447 1448 1449
        error_report_err(local_err);
        local_err = NULL;
    }

1450
    if (nbd_co_send_simple_reply(req->client, request.handle,
1451
                                 ret < 0 ? -ret : 0,
1452
                                 req->data, reply_data_len, &local_err) < 0)
1453
    {
1454
        error_prepend(&local_err, "Failed to send reply: ");
1455 1456 1457
        goto disconnect;
    }

1458 1459 1460
    /* We must disconnect after NBD_CMD_WRITE if we did not
     * read the payload.
     */
1461 1462
    if (!req->complete) {
        error_setg(&local_err, "Request handling failed in intermediate state");
1463
        goto disconnect;
N
Nick Thomas 已提交
1464 1465
    }

1466
done:
P
Paolo Bonzini 已提交
1467
    nbd_request_put(req);
1468
    nbd_client_put(client);
P
Paolo Bonzini 已提交
1469 1470
    return;

1471
disconnect:
1472 1473 1474
    if (local_err) {
        error_reportf_err(local_err, "Disconnect client, due to: ");
    }
1475
    nbd_request_put(req);
1476
    client_close(client, true);
1477
    nbd_client_put(client);
B
bellard 已提交
1478
}
P
Paolo Bonzini 已提交
1479

1480
static void nbd_client_receive_next_request(NBDClient *client)
M
Max Reitz 已提交
1481
{
1482 1483 1484 1485
    if (!client->recv_coroutine && client->nb_requests < MAX_NBD_REQUESTS) {
        nbd_client_get(client);
        client->recv_coroutine = qemu_coroutine_create(nbd_trip, client);
        aio_co_schedule(client->exp->ctx, client->recv_coroutine);
M
Max Reitz 已提交
1486 1487 1488
    }
}

1489 1490
static coroutine_fn void nbd_co_client_start(void *opaque)
{
1491
    NBDClient *client = opaque;
1492
    NBDExport *exp = client->exp;
1493
    Error *local_err = NULL;
1494 1495 1496

    if (exp) {
        nbd_export_get(exp);
1497
        QTAILQ_INSERT_TAIL(&exp->clients, client, next);
1498
    }
1499 1500
    qemu_co_mutex_init(&client->send_lock);

1501 1502 1503 1504
    if (nbd_negotiate(client, &local_err)) {
        if (local_err) {
            error_report_err(local_err);
        }
1505
        client_close(client, false);
1506
        return;
1507
    }
1508 1509

    nbd_client_receive_next_request(client);
1510 1511
}

1512 1513 1514 1515 1516 1517
/*
 * Create a new client listener on the given export @exp, using the
 * given channel @sioc.  Begin servicing it in a coroutine.  When the
 * connection closes, call @close_fn with an indication of whether the
 * client completed negotiation.
 */
1518 1519
void nbd_client_new(NBDExport *exp,
                    QIOChannelSocket *sioc,
1520 1521
                    QCryptoTLSCreds *tlscreds,
                    const char *tlsaclname,
1522
                    void (*close_fn)(NBDClient *, bool))
P
Paolo Bonzini 已提交
1523
{
1524
    NBDClient *client;
1525
    Coroutine *co;
1526

1527
    client = g_new0(NBDClient, 1);
1528 1529
    client->refcount = 1;
    client->exp = exp;
1530 1531 1532 1533 1534
    client->tlscreds = tlscreds;
    if (tlscreds) {
        object_ref(OBJECT(client->tlscreds));
    }
    client->tlsaclname = g_strdup(tlsaclname);
1535 1536 1537 1538
    client->sioc = sioc;
    object_ref(OBJECT(client->sioc));
    client->ioc = QIO_CHANNEL(sioc);
    object_ref(OBJECT(client->ioc));
1539
    client->close_fn = close_fn;
1540

1541 1542
    co = qemu_coroutine_create(nbd_co_client_start, client);
    qemu_coroutine_enter(co);
P
Paolo Bonzini 已提交
1543
}