client.c 26.7 KB
Newer Older
F
Fam Zheng 已提交
1
/*
2
 *  Copyright (C) 2016-2017 Red Hat, Inc.
F
Fam Zheng 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
 *
 *  Network Block Device Client Side
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; under version 2 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
 */

P
Peter Maydell 已提交
20
#include "qemu/osdep.h"
21
#include "qapi/error.h"
22
#include "trace.h"
F
Fam Zheng 已提交
23 24 25 26
#include "nbd-internal.h"

static int nbd_errno_to_system_errno(int err)
{
27
    int ret;
F
Fam Zheng 已提交
28 29
    switch (err) {
    case NBD_SUCCESS:
30 31
        ret = 0;
        break;
F
Fam Zheng 已提交
32
    case NBD_EPERM:
33 34
        ret = EPERM;
        break;
F
Fam Zheng 已提交
35
    case NBD_EIO:
36 37
        ret = EIO;
        break;
F
Fam Zheng 已提交
38
    case NBD_ENOMEM:
39 40
        ret = ENOMEM;
        break;
F
Fam Zheng 已提交
41
    case NBD_ENOSPC:
42 43
        ret = ENOSPC;
        break;
44 45 46
    case NBD_ESHUTDOWN:
        ret = ESHUTDOWN;
        break;
F
Fam Zheng 已提交
47
    default:
48
        trace_nbd_unknown_error(err);
49 50
        /* fallthrough */
    case NBD_EINVAL:
51 52
        ret = EINVAL;
        break;
F
Fam Zheng 已提交
53
    }
54
    return ret;
F
Fam Zheng 已提交
55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
}

/* Definitions for opaque data types */

static QTAILQ_HEAD(, NBDExport) exports = QTAILQ_HEAD_INITIALIZER(exports);

/* That's all folks */

/* Basic flow for negotiation

   Server         Client
   Negotiate

   or

   Server         Client
   Negotiate #1
                  Option
   Negotiate #2

   ----

   followed by

   Server         Client
                  Request
   Response
                  Request
   Response
                  ...
   ...
                  Request (type == 2)

*/

90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106
/* Send an option request.
 *
 * The request is for option @opt, with @data containing @len bytes of
 * additional payload for the request (@len may be -1 to treat @data as
 * a C string; and @data may be NULL if @len is 0).
 * Return 0 if successful, -1 with errp set if it is impossible to
 * continue. */
static int nbd_send_option_request(QIOChannel *ioc, uint32_t opt,
                                   uint32_t len, const char *data,
                                   Error **errp)
{
    nbd_option req;
    QEMU_BUILD_BUG_ON(sizeof(req) != 16);

    if (len == -1) {
        req.length = len = strlen(data);
    }
107
    trace_nbd_send_option_request(opt, nbd_opt_lookup(opt), len);
108 109 110 111 112

    stq_be_p(&req.magic, NBD_OPTS_MAGIC);
    stl_be_p(&req.option, opt);
    stl_be_p(&req.length, len);

113
    if (nbd_write(ioc, &req, sizeof(req), errp) < 0) {
114
        error_prepend(errp, "Failed to send option request header");
115 116 117
        return -1;
    }

118
    if (len && nbd_write(ioc, (char *) data, len, errp) < 0) {
119
        error_prepend(errp, "Failed to send option request data");
120 121 122 123 124 125
        return -1;
    }

    return 0;
}

126 127 128 129 130 131 132 133 134 135 136 137 138
/* Send NBD_OPT_ABORT as a courtesy to let the server know that we are
 * not going to attempt further negotiation. */
static void nbd_send_opt_abort(QIOChannel *ioc)
{
    /* Technically, a compliant server is supposed to reply to us; but
     * older servers disconnected instead. At any rate, we're allowed
     * to disconnect without waiting for the server reply, so we don't
     * even care if the request makes it to the server, let alone
     * waiting around for whether the server replies. */
    nbd_send_option_request(ioc, NBD_OPT_ABORT, 0, NULL, NULL);
}


139 140 141 142 143 144 145 146
/* Receive the header of an option reply, which should match the given
 * opt.  Read through the length field, but NOT the length bytes of
 * payload. Return 0 if successful, -1 with errp set if it is
 * impossible to continue. */
static int nbd_receive_option_reply(QIOChannel *ioc, uint32_t opt,
                                    nbd_opt_reply *reply, Error **errp)
{
    QEMU_BUILD_BUG_ON(sizeof(*reply) != 20);
147
    if (nbd_read(ioc, reply, sizeof(*reply), errp) < 0) {
148
        error_prepend(errp, "failed to read option reply");
149
        nbd_send_opt_abort(ioc);
150 151 152 153 154 155 156
        return -1;
    }
    be64_to_cpus(&reply->magic);
    be32_to_cpus(&reply->option);
    be32_to_cpus(&reply->type);
    be32_to_cpus(&reply->length);

157 158 159
    trace_nbd_receive_option_reply(reply->option, nbd_opt_lookup(reply->option),
                                   reply->type, nbd_rep_lookup(reply->type),
                                   reply->length);
160

161 162
    if (reply->magic != NBD_REP_MAGIC) {
        error_setg(errp, "Unexpected option reply magic");
163
        nbd_send_opt_abort(ioc);
164 165 166 167 168
        return -1;
    }
    if (reply->option != opt) {
        error_setg(errp, "Unexpected option type %x expected %x",
                   reply->option, opt);
169
        nbd_send_opt_abort(ioc);
170 171 172 173 174 175 176 177 178 179
        return -1;
    }
    return 0;
}

/* If reply represents success, return 1 without further action.
 * If reply represents an error, consume the optional payload of
 * the packet on ioc.  Then return 0 for unsupported (so the client
 * can fall back to other approaches), or -1 with errp set for other
 * errors.
A
Alex Bligh 已提交
180
 */
181
static int nbd_handle_reply_err(QIOChannel *ioc, nbd_opt_reply *reply,
A
Alex Bligh 已提交
182
                                Error **errp)
183
{
A
Alex Bligh 已提交
184 185 186
    char *msg = NULL;
    int result = -1;

187
    if (!(reply->type & (1 << 31))) {
A
Alex Bligh 已提交
188 189 190
        return 1;
    }

191 192
    if (reply->length) {
        if (reply->length > NBD_MAX_BUFFER_SIZE) {
193 194 195
            error_setg(errp, "server error 0x%" PRIx32
                       " (%s) message is too long",
                       reply->type, nbd_rep_lookup(reply->type));
A
Alex Bligh 已提交
196 197
            goto cleanup;
        }
198
        msg = g_malloc(reply->length + 1);
199
        if (nbd_read(ioc, msg, reply->length, errp) < 0) {
200 201 202
            error_prepend(errp, "failed to read option error 0x%" PRIx32
                          " (%s) message",
                          reply->type, nbd_rep_lookup(reply->type));
A
Alex Bligh 已提交
203 204
            goto cleanup;
        }
205
        msg[reply->length] = '\0';
206 207
    }

208
    switch (reply->type) {
209
    case NBD_REP_ERR_UNSUP:
210
        trace_nbd_reply_err_unsup(reply->option, nbd_opt_lookup(reply->option));
A
Alex Bligh 已提交
211 212
        result = 0;
        goto cleanup;
213

214
    case NBD_REP_ERR_POLICY:
215 216
        error_setg(errp, "Denied by server for option %" PRIx32 " (%s)",
                   reply->option, nbd_opt_lookup(reply->option));
217 218
        break;

219
    case NBD_REP_ERR_INVALID:
220 221
        error_setg(errp, "Invalid data length for option %" PRIx32 " (%s)",
                   reply->option, nbd_opt_lookup(reply->option));
222 223
        break;

224
    case NBD_REP_ERR_PLATFORM:
225 226
        error_setg(errp, "Server lacks support for option %" PRIx32 " (%s)",
                   reply->option, nbd_opt_lookup(reply->option));
227 228
        break;

229
    case NBD_REP_ERR_TLS_REQD:
230 231 232 233 234 235 236
        error_setg(errp, "TLS negotiation required before option %" PRIx32
                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
        break;

    case NBD_REP_ERR_UNKNOWN:
        error_setg(errp, "Requested export not available for option %" PRIx32
                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
237 238
        break;

239
    case NBD_REP_ERR_SHUTDOWN:
240 241 242 243 244 245 246
        error_setg(errp, "Server shutting down before option %" PRIx32 " (%s)",
                   reply->option, nbd_opt_lookup(reply->option));
        break;

    case NBD_REP_ERR_BLOCK_SIZE_REQD:
        error_setg(errp, "Server requires INFO_BLOCK_SIZE for option %" PRIx32
                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
247 248
        break;

249
    default:
250 251
        error_setg(errp, "Unknown error code when asking for option %" PRIx32
                   " (%s)", reply->option, nbd_opt_lookup(reply->option));
252 253 254
        break;
    }

A
Alex Bligh 已提交
255 256 257 258 259 260
    if (msg) {
        error_append_hint(errp, "%s\n", msg);
    }

 cleanup:
    g_free(msg);
261 262 263
    if (result < 0) {
        nbd_send_opt_abort(ioc);
    }
A
Alex Bligh 已提交
264
    return result;
265 266
}

267 268 269 270 271 272 273
/* Process another portion of the NBD_OPT_LIST reply.  Set *@match if
 * the current reply matches @want or if the server does not support
 * NBD_OPT_LIST, otherwise leave @match alone.  Return 0 if iteration
 * is complete, positive if more replies are expected, or negative
 * with @errp set if an unrecoverable error occurred. */
static int nbd_receive_list(QIOChannel *ioc, const char *want, bool *match,
                            Error **errp)
274
{
275
    nbd_opt_reply reply;
276 277
    uint32_t len;
    uint32_t namelen;
278
    char name[NBD_MAX_NAME_SIZE + 1];
A
Alex Bligh 已提交
279
    int error;
280

281
    if (nbd_receive_option_reply(ioc, NBD_OPT_LIST, &reply, errp) < 0) {
282 283
        return -1;
    }
284
    error = nbd_handle_reply_err(ioc, &reply, errp);
A
Alex Bligh 已提交
285
    if (error <= 0) {
286 287 288
        /* The server did not support NBD_OPT_LIST, so set *match on
         * the assumption that any name will be accepted.  */
        *match = true;
A
Alex Bligh 已提交
289
        return error;
290
    }
291
    len = reply.length;
292

293
    if (reply.type == NBD_REP_ACK) {
294 295
        if (len != 0) {
            error_setg(errp, "length too long for option end");
296
            nbd_send_opt_abort(ioc);
297 298
            return -1;
        }
299 300 301 302 303 304 305
        return 0;
    } else if (reply.type != NBD_REP_SERVER) {
        error_setg(errp, "Unexpected reply type %" PRIx32 " expected %x",
                   reply.type, NBD_REP_SERVER);
        nbd_send_opt_abort(ioc);
        return -1;
    }
306

307 308 309 310 311
    if (len < sizeof(namelen) || len > NBD_MAX_BUFFER_SIZE) {
        error_setg(errp, "incorrect option length %" PRIu32, len);
        nbd_send_opt_abort(ioc);
        return -1;
    }
312
    if (nbd_read(ioc, &namelen, sizeof(namelen), errp) < 0) {
313
        error_prepend(errp, "failed to read option name length");
314 315 316 317 318 319 320 321 322 323 324
        nbd_send_opt_abort(ioc);
        return -1;
    }
    namelen = be32_to_cpu(namelen);
    len -= sizeof(namelen);
    if (len < namelen) {
        error_setg(errp, "incorrect option name length");
        nbd_send_opt_abort(ioc);
        return -1;
    }
    if (namelen != strlen(want)) {
325
        if (nbd_drop(ioc, len, errp) < 0) {
326
            error_prepend(errp, "failed to skip export name with wrong length");
327 328
            nbd_send_opt_abort(ioc);
            return -1;
329
        }
330 331 332 333
        return 1;
    }

    assert(namelen < sizeof(name));
334
    if (nbd_read(ioc, name, namelen, errp) < 0) {
335
        error_prepend(errp, "failed to read export name");
336 337 338 339 340
        nbd_send_opt_abort(ioc);
        return -1;
    }
    name[namelen] = '\0';
    len -= namelen;
341
    if (nbd_drop(ioc, len, errp) < 0) {
342
        error_prepend(errp, "failed to read export description");
343
        nbd_send_opt_abort(ioc);
344 345
        return -1;
    }
346 347 348
    if (!strcmp(name, want)) {
        *match = true;
    }
349 350 351 352
    return 1;
}


E
Eric Blake 已提交
353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
/* Returns -1 if NBD_OPT_GO proves the export @wantname cannot be
 * used, 0 if NBD_OPT_GO is unsupported (fall back to NBD_OPT_LIST and
 * NBD_OPT_EXPORT_NAME in that case), and > 0 if the export is good to
 * go (with @info populated). */
static int nbd_opt_go(QIOChannel *ioc, const char *wantname,
                      NBDExportInfo *info, Error **errp)
{
    nbd_opt_reply reply;
    uint32_t len = strlen(wantname);
    uint16_t type;
    int error;
    char *buf;

    /* The protocol requires that the server send NBD_INFO_EXPORT with
     * a non-zero flags (at least NBD_FLAG_HAS_FLAGS must be set); so
     * flags still 0 is a witness of a broken server. */
    info->flags = 0;

    trace_nbd_opt_go_start(wantname);
    buf = g_malloc(4 + len + 2 + 1);
    stl_be_p(buf, len);
    memcpy(buf + 4, wantname, len);
    /* No requests, live with whatever server sends */
    stw_be_p(buf + 4 + len, 0);
    if (nbd_send_option_request(ioc, NBD_OPT_GO, len + 6, buf, errp) < 0) {
        return -1;
    }

    while (1) {
        if (nbd_receive_option_reply(ioc, NBD_OPT_GO, &reply, errp) < 0) {
            return -1;
        }
        error = nbd_handle_reply_err(ioc, &reply, errp);
        if (error <= 0) {
            return error;
        }
        len = reply.length;

        if (reply.type == NBD_REP_ACK) {
            /* Server is done sending info and moved into transmission
               phase, but make sure it sent flags */
            if (len) {
                error_setg(errp, "server sent invalid NBD_REP_ACK");
                nbd_send_opt_abort(ioc);
                return -1;
            }
            if (!info->flags) {
                error_setg(errp, "broken server omitted NBD_INFO_EXPORT");
                nbd_send_opt_abort(ioc);
                return -1;
            }
            trace_nbd_opt_go_success();
            return 1;
        }
        if (reply.type != NBD_REP_INFO) {
            error_setg(errp, "unexpected reply type %" PRIx32 ", expected %x",
                       reply.type, NBD_REP_INFO);
            nbd_send_opt_abort(ioc);
            return -1;
        }
        if (len < sizeof(type)) {
            error_setg(errp, "NBD_REP_INFO length %" PRIu32 " is too short",
                       len);
            nbd_send_opt_abort(ioc);
            return -1;
        }
        if (nbd_read(ioc, &type, sizeof(type), errp) < 0) {
            error_prepend(errp, "failed to read info type");
            nbd_send_opt_abort(ioc);
            return -1;
        }
        len -= sizeof(type);
        be16_to_cpus(&type);
        switch (type) {
        case NBD_INFO_EXPORT:
            if (len != sizeof(info->size) + sizeof(info->flags)) {
                error_setg(errp, "remaining export info len %" PRIu32
                           " is unexpected size", len);
                nbd_send_opt_abort(ioc);
                return -1;
            }
            if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
                error_prepend(errp, "failed to read info size");
                nbd_send_opt_abort(ioc);
                return -1;
            }
            be64_to_cpus(&info->size);
            if (nbd_read(ioc, &info->flags, sizeof(info->flags), errp) < 0) {
                error_prepend(errp, "failed to read info flags");
                nbd_send_opt_abort(ioc);
                return -1;
            }
            be16_to_cpus(&info->flags);
            trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
            break;

        default:
            trace_nbd_opt_go_info_unknown(type, nbd_info_lookup(type));
            if (nbd_drop(ioc, len, errp) < 0) {
                error_prepend(errp, "Failed to read info payload");
                nbd_send_opt_abort(ioc);
                return -1;
            }
            break;
        }
    }
}

461
/* Return -1 on failure, 0 if wantname is an available export. */
462 463 464 465 466 467
static int nbd_receive_query_exports(QIOChannel *ioc,
                                     const char *wantname,
                                     Error **errp)
{
    bool foundExport = false;

468
    trace_nbd_receive_query_exports_start(wantname);
469
    if (nbd_send_option_request(ioc, NBD_OPT_LIST, 0, NULL, errp) < 0) {
470 471 472 473
        return -1;
    }

    while (1) {
474
        int ret = nbd_receive_list(ioc, wantname, &foundExport, errp);
475 476

        if (ret < 0) {
477
            /* Server gave unexpected reply */
478
            return -1;
479 480 481 482 483 484 485 486
        } else if (ret == 0) {
            /* Done iterating. */
            if (!foundExport) {
                error_setg(errp, "No export with name '%s' available",
                           wantname);
                nbd_send_opt_abort(ioc);
                return -1;
            }
487
            trace_nbd_receive_query_exports_success(wantname);
488
            return 0;
489 490 491 492
        }
    }
}

493 494 495 496
static QIOChannel *nbd_receive_starttls(QIOChannel *ioc,
                                        QCryptoTLSCreds *tlscreds,
                                        const char *hostname, Error **errp)
{
497
    nbd_opt_reply reply;
498 499 500
    QIOChannelTLS *tioc;
    struct NBDTLSHandshakeData data = { 0 };

501
    trace_nbd_receive_starttls_request();
502
    if (nbd_send_option_request(ioc, NBD_OPT_STARTTLS, 0, NULL, errp) < 0) {
503 504 505
        return NULL;
    }

506
    trace_nbd_receive_starttls_reply();
507
    if (nbd_receive_option_reply(ioc, NBD_OPT_STARTTLS, &reply, errp) < 0) {
508 509
        return NULL;
    }
510 511

    if (reply.type != NBD_REP_ACK) {
512
        error_setg(errp, "Server rejected request to start TLS %" PRIx32,
513
                   reply.type);
514
        nbd_send_opt_abort(ioc);
515 516 517
        return NULL;
    }

518
    if (reply.length != 0) {
519
        error_setg(errp, "Start TLS response was not zero %" PRIu32,
520
                   reply.length);
521
        nbd_send_opt_abort(ioc);
522 523 524
        return NULL;
    }

525
    trace_nbd_receive_starttls_new_client();
526 527 528 529
    tioc = qio_channel_tls_new_client(ioc, tlscreds, hostname, errp);
    if (!tioc) {
        return NULL;
    }
530
    qio_channel_set_name(QIO_CHANNEL(tioc), "nbd-client-tls");
531
    data.loop = g_main_loop_new(g_main_context_default(), FALSE);
532
    trace_nbd_receive_starttls_tls_handshake();
533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551
    qio_channel_tls_handshake(tioc,
                              nbd_tls_handshake,
                              &data,
                              NULL);

    if (!data.complete) {
        g_main_loop_run(data.loop);
    }
    g_main_loop_unref(data.loop);
    if (data.error) {
        error_propagate(errp, data.error);
        object_unref(OBJECT(tioc));
        return NULL;
    }

    return QIO_CHANNEL(tioc);
}


552
int nbd_receive_negotiate(QIOChannel *ioc, const char *name,
553
                          QCryptoTLSCreds *tlscreds, const char *hostname,
554 555
                          QIOChannel **outioc, NBDExportInfo *info,
                          Error **errp)
F
Fam Zheng 已提交
556 557
{
    char buf[256];
558
    uint64_t magic;
F
Fam Zheng 已提交
559
    int rc;
E
Eric Blake 已提交
560
    bool zeroes = true;
F
Fam Zheng 已提交
561

562
    trace_nbd_receive_negotiate(tlscreds, hostname ? hostname : "<null>");
F
Fam Zheng 已提交
563 564 565

    rc = -EINVAL;

566 567 568 569 570 571 572 573
    if (outioc) {
        *outioc = NULL;
    }
    if (tlscreds && !outioc) {
        error_setg(errp, "Output I/O channel required for TLS");
        goto fail;
    }

574
    if (nbd_read(ioc, buf, 8, errp) < 0) {
575
        error_prepend(errp, "Failed to read data");
F
Fam Zheng 已提交
576 577 578 579 580 581 582 583 584
        goto fail;
    }

    buf[8] = '\0';
    if (strlen(buf) == 0) {
        error_setg(errp, "Server connection closed unexpectedly");
        goto fail;
    }

585
    magic = ldq_be_p(buf);
586
    trace_nbd_receive_negotiate_magic(magic);
F
Fam Zheng 已提交
587 588 589 590 591 592

    if (memcmp(buf, "NBDMAGIC", 8) != 0) {
        error_setg(errp, "Invalid magic received");
        goto fail;
    }

593
    if (nbd_read(ioc, &magic, sizeof(magic), errp) < 0) {
594
        error_prepend(errp, "Failed to read magic");
F
Fam Zheng 已提交
595 596 597
        goto fail;
    }
    magic = be64_to_cpu(magic);
598
    trace_nbd_receive_negotiate_magic(magic);
F
Fam Zheng 已提交
599

600
    if (magic == NBD_OPTS_MAGIC) {
601 602
        uint32_t clientflags = 0;
        uint16_t globalflags;
603
        bool fixedNewStyle = false;
F
Fam Zheng 已提交
604

605
        if (nbd_read(ioc, &globalflags, sizeof(globalflags), errp) < 0) {
606
            error_prepend(errp, "Failed to read server flags");
F
Fam Zheng 已提交
607 608
            goto fail;
        }
609
        globalflags = be16_to_cpu(globalflags);
610
        trace_nbd_receive_negotiate_server_flags(globalflags);
611
        if (globalflags & NBD_FLAG_FIXED_NEWSTYLE) {
612
            fixedNewStyle = true;
613 614
            clientflags |= NBD_FLAG_C_FIXED_NEWSTYLE;
        }
E
Eric Blake 已提交
615 616 617 618
        if (globalflags & NBD_FLAG_NO_ZEROES) {
            zeroes = false;
            clientflags |= NBD_FLAG_C_NO_ZEROES;
        }
619
        /* client requested flags */
620
        clientflags = cpu_to_be32(clientflags);
621
        if (nbd_write(ioc, &clientflags, sizeof(clientflags), errp) < 0) {
622
            error_prepend(errp, "Failed to send clientflags field");
F
Fam Zheng 已提交
623 624
            goto fail;
        }
625 626 627 628 629 630 631 632 633 634 635 636
        if (tlscreds) {
            if (fixedNewStyle) {
                *outioc = nbd_receive_starttls(ioc, tlscreds, hostname, errp);
                if (!*outioc) {
                    goto fail;
                }
                ioc = *outioc;
            } else {
                error_setg(errp, "Server does not support STARTTLS");
                goto fail;
            }
        }
637
        if (!name) {
638
            trace_nbd_receive_negotiate_default_name();
639
            name = "";
640
        }
641
        if (fixedNewStyle) {
E
Eric Blake 已提交
642 643 644 645 646 647 648 649 650 651 652 653 654 655
            int result;

            /* Try NBD_OPT_GO first - if it works, we are done (it
             * also gives us a good message if the server requires
             * TLS).  If it is not available, fall back to
             * NBD_OPT_LIST for nicer error messages about a missing
             * export, then use NBD_OPT_EXPORT_NAME.  */
            result = nbd_opt_go(ioc, name, info, errp);
            if (result < 0) {
                goto fail;
            }
            if (result > 0) {
                return 0;
            }
656 657 658
            /* Check our desired export is present in the
             * server export list. Since NBD_OPT_EXPORT_NAME
             * cannot return an error message, running this
E
Eric Blake 已提交
659 660
             * query gives us better error reporting if the
             * export name is not available.
661 662 663 664 665
             */
            if (nbd_receive_query_exports(ioc, name, errp) < 0) {
                goto fail;
            }
        }
666 667 668
        /* write the export name request */
        if (nbd_send_option_request(ioc, NBD_OPT_EXPORT_NAME, -1, name,
                                    errp) < 0) {
F
Fam Zheng 已提交
669 670
            goto fail;
        }
671

672
        /* Read the response */
673
        if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
674
            error_prepend(errp, "Failed to read export length");
F
Fam Zheng 已提交
675 676
            goto fail;
        }
677
        be64_to_cpus(&info->size);
F
Fam Zheng 已提交
678

679
        if (nbd_read(ioc, &info->flags, sizeof(info->flags), errp) < 0) {
680
            error_prepend(errp, "Failed to read export flags");
681 682
            goto fail;
        }
683
        be16_to_cpus(&info->flags);
684
    } else if (magic == NBD_CLIENT_MAGIC) {
E
Eric Blake 已提交
685 686
        uint32_t oldflags;

687 688 689 690
        if (name) {
            error_setg(errp, "Server does not support export names");
            goto fail;
        }
691 692 693 694
        if (tlscreds) {
            error_setg(errp, "Server does not support STARTTLS");
            goto fail;
        }
695

696
        if (nbd_read(ioc, &info->size, sizeof(info->size), errp) < 0) {
697
            error_prepend(errp, "Failed to read export length");
698 699
            goto fail;
        }
700
        be64_to_cpus(&info->size);
F
Fam Zheng 已提交
701

702
        if (nbd_read(ioc, &oldflags, sizeof(oldflags), errp) < 0) {
703
            error_prepend(errp, "Failed to read export flags");
F
Fam Zheng 已提交
704 705
            goto fail;
        }
E
Eric Blake 已提交
706 707 708 709 710
        be32_to_cpus(&oldflags);
        if (oldflags & ~0xffff) {
            error_setg(errp, "Unexpected export flags %0x" PRIx32, oldflags);
            goto fail;
        }
711
        info->flags = oldflags;
F
Fam Zheng 已提交
712
    } else {
713 714
        error_setg(errp, "Bad magic received");
        goto fail;
F
Fam Zheng 已提交
715
    }
716

717
    trace_nbd_receive_negotiate_size_flags(info->size, info->flags);
718
    if (zeroes && nbd_drop(ioc, 124, errp) < 0) {
719
        error_prepend(errp, "Failed to read reserved block");
F
Fam Zheng 已提交
720 721 722 723 724 725 726 727 728
        goto fail;
    }
    rc = 0;

fail:
    return rc;
}

#ifdef __linux__
729
int nbd_init(int fd, QIOChannelSocket *sioc, NBDExportInfo *info,
730
             Error **errp)
F
Fam Zheng 已提交
731
{
732 733 734 735
    unsigned long sectors = info->size / BDRV_SECTOR_SIZE;
    if (info->size / BDRV_SECTOR_SIZE != sectors) {
        error_setg(errp, "Export size %" PRIu64 " too large for 32-bit kernel",
                   info->size);
736 737 738
        return -E2BIG;
    }

739
    trace_nbd_init_set_socket();
F
Fam Zheng 已提交
740

741
    if (ioctl(fd, NBD_SET_SOCK, (unsigned long) sioc->fd) < 0) {
F
Fam Zheng 已提交
742
        int serrno = errno;
743
        error_setg(errp, "Failed to set NBD socket");
F
Fam Zheng 已提交
744 745 746
        return -serrno;
    }

747
    trace_nbd_init_set_block_size(BDRV_SECTOR_SIZE);
F
Fam Zheng 已提交
748

749
    if (ioctl(fd, NBD_SET_BLKSIZE, (unsigned long)BDRV_SECTOR_SIZE) < 0) {
F
Fam Zheng 已提交
750
        int serrno = errno;
751
        error_setg(errp, "Failed setting NBD block size");
F
Fam Zheng 已提交
752 753 754
        return -serrno;
    }

755
    trace_nbd_init_set_size(sectors);
756 757
    if (info->size % BDRV_SECTOR_SIZE) {
        trace_nbd_init_trailing_bytes(info->size % BDRV_SECTOR_SIZE);
758
    }
F
Fam Zheng 已提交
759

760
    if (ioctl(fd, NBD_SET_SIZE_BLOCKS, sectors) < 0) {
F
Fam Zheng 已提交
761
        int serrno = errno;
762
        error_setg(errp, "Failed setting size (in blocks)");
F
Fam Zheng 已提交
763 764 765
        return -serrno;
    }

766
    if (ioctl(fd, NBD_SET_FLAGS, (unsigned long) info->flags) < 0) {
F
Fam Zheng 已提交
767
        if (errno == ENOTTY) {
768
            int read_only = (info->flags & NBD_FLAG_READ_ONLY) != 0;
769
            trace_nbd_init_set_readonly();
F
Fam Zheng 已提交
770 771 772

            if (ioctl(fd, BLKROSET, (unsigned long) &read_only) < 0) {
                int serrno = errno;
773
                error_setg(errp, "Failed setting read-only attribute");
F
Fam Zheng 已提交
774 775 776 777
                return -serrno;
            }
        } else {
            int serrno = errno;
778
            error_setg(errp, "Failed setting flags");
F
Fam Zheng 已提交
779 780 781 782
            return -serrno;
        }
    }

783
    trace_nbd_init_finish();
F
Fam Zheng 已提交
784 785 786 787 788 789 790 791 792

    return 0;
}

int nbd_client(int fd)
{
    int ret;
    int serrno;

793
    trace_nbd_client_loop();
F
Fam Zheng 已提交
794 795 796 797 798 799 800 801 802 803 804

    ret = ioctl(fd, NBD_DO_IT);
    if (ret < 0 && errno == EPIPE) {
        /* NBD_DO_IT normally returns EPIPE when someone has disconnected
         * the socket via NBD_DISCONNECT.  We do not want to return 1 in
         * that case.
         */
        ret = 0;
    }
    serrno = errno;

805
    trace_nbd_client_loop_ret(ret, strerror(serrno));
F
Fam Zheng 已提交
806

807
    trace_nbd_client_clear_queue();
F
Fam Zheng 已提交
808 809
    ioctl(fd, NBD_CLEAR_QUE);

810
    trace_nbd_client_clear_socket();
F
Fam Zheng 已提交
811 812 813 814 815
    ioctl(fd, NBD_CLEAR_SOCK);

    errno = serrno;
    return ret;
}
816 817 818 819 820 821 822 823 824

int nbd_disconnect(int fd)
{
    ioctl(fd, NBD_CLEAR_QUE);
    ioctl(fd, NBD_DISCONNECT);
    ioctl(fd, NBD_CLEAR_SOCK);
    return 0;
}

F
Fam Zheng 已提交
825
#else
826
int nbd_init(int fd, QIOChannelSocket *ioc, NBDExportInfo *info,
827
	     Error **errp)
F
Fam Zheng 已提交
828
{
829
    error_setg(errp, "nbd_init is only supported on Linux");
F
Fam Zheng 已提交
830 831 832 833 834 835 836
    return -ENOTSUP;
}

int nbd_client(int fd)
{
    return -ENOTSUP;
}
837 838 839 840
int nbd_disconnect(int fd)
{
    return -ENOTSUP;
}
F
Fam Zheng 已提交
841 842
#endif

843
ssize_t nbd_send_request(QIOChannel *ioc, NBDRequest *request)
F
Fam Zheng 已提交
844 845 846
{
    uint8_t buf[NBD_REQUEST_SIZE];

847 848
    trace_nbd_send_request(request->from, request->len, request->handle,
                           request->flags, request->type);
849

850
    stl_be_p(buf, NBD_REQUEST_MAGIC);
851 852
    stw_be_p(buf + 4, request->flags);
    stw_be_p(buf + 6, request->type);
853 854 855
    stq_be_p(buf + 8, request->handle);
    stq_be_p(buf + 16, request->from);
    stl_be_p(buf + 24, request->len);
F
Fam Zheng 已提交
856

857
    return nbd_write(ioc, buf, sizeof(buf), NULL);
F
Fam Zheng 已提交
858 859
}

860
ssize_t nbd_receive_reply(QIOChannel *ioc, NBDReply *reply, Error **errp)
F
Fam Zheng 已提交
861 862 863 864 865
{
    uint8_t buf[NBD_REPLY_SIZE];
    uint32_t magic;
    ssize_t ret;

866
    ret = nbd_read_eof(ioc, buf, sizeof(buf), errp);
867
    if (ret <= 0) {
F
Fam Zheng 已提交
868 869 870 871
        return ret;
    }

    if (ret != sizeof(buf)) {
872
        error_setg(errp, "read failed");
F
Fam Zheng 已提交
873 874 875 876 877 878 879 880 881
        return -EINVAL;
    }

    /* Reply
       [ 0 ..  3]    magic   (NBD_REPLY_MAGIC)
       [ 4 ..  7]    error   (0 == no error)
       [ 7 .. 15]    handle
     */

882 883 884
    magic = ldl_be_p(buf);
    reply->error  = ldl_be_p(buf + 4);
    reply->handle = ldq_be_p(buf + 8);
F
Fam Zheng 已提交
885 886 887

    reply->error = nbd_errno_to_system_errno(reply->error);

888 889
    if (reply->error == ESHUTDOWN) {
        /* This works even on mingw which lacks a native ESHUTDOWN */
890
        error_setg(errp, "server shutting down");
891 892
        return -EINVAL;
    }
893
    trace_nbd_receive_reply(magic, reply->error, reply->handle);
F
Fam Zheng 已提交
894 895

    if (magic != NBD_REPLY_MAGIC) {
896
        error_setg(errp, "invalid magic (got 0x%" PRIx32 ")", magic);
F
Fam Zheng 已提交
897 898
        return -EINVAL;
    }
899
    return sizeof(buf);
F
Fam Zheng 已提交
900 901
}