qemu-nbd.c 28.9 KB
Newer Older
1
/*
B
bellard 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
 *
 *  Network Block Device
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; under version 2 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
16
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
17 18
 */

P
Peter Maydell 已提交
19
#include "qemu/osdep.h"
20
#include "qapi/error.h"
21
#include "qemu-common.h"
22
#include "qemu/cutils.h"
M
Markus Armbruster 已提交
23
#include "sysemu/block-backend.h"
24
#include "block/block_int.h"
25
#include "block/nbd.h"
26
#include "qemu/main-loop.h"
27
#include "qemu/error-report.h"
28
#include "qemu/config-file.h"
29
#include "block/snapshot.h"
30
#include "qapi/util.h"
31
#include "qapi/qmp/qstring.h"
32
#include "qom/object_interfaces.h"
33
#include "io/channel-socket.h"
34
#include "crypto/init.h"
B
bellard 已提交
35 36

#include <getopt.h>
37
#include <libgen.h>
P
Paolo Bonzini 已提交
38
#include <pthread.h>
39

40
#define SOCKET_PATH                "/var/lock/qemu-nbd-%s"
41 42 43 44 45 46 47
#define QEMU_NBD_OPT_CACHE         256
#define QEMU_NBD_OPT_AIO           257
#define QEMU_NBD_OPT_DISCARD       258
#define QEMU_NBD_OPT_DETECT_ZEROES 259
#define QEMU_NBD_OPT_OBJECT        260
#define QEMU_NBD_OPT_TLSCREDS      261
#define QEMU_NBD_OPT_IMAGE_OPTS    262
B
bellard 已提交
48

49 50
#define MBR_SIZE 512

P
Paolo Bonzini 已提交
51
static NBDExport *exp;
52
static bool newproto;
53
static int verbose;
P
Paolo Bonzini 已提交
54
static char *srcpath;
55
static SocketAddress *saddr;
56 57
static int persistent = 0;
static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state;
P
Paolo Bonzini 已提交
58 59
static int shared = 1;
static int nb_fds;
60 61
static QIOChannelSocket *server_ioc;
static int server_watch = -1;
62
static QCryptoTLSCreds *tlscreds;
B
bellard 已提交
63 64 65

static void usage(const char *name)
{
66
    (printf) (
B
bellard 已提交
67 68 69
"Usage: %s [OPTIONS] FILE\n"
"QEMU Disk Network Block Device Server\n"
"\n"
70 71
"  -h, --help                display this help and exit\n"
"  -V, --version             output version information and exit\n"
72 73
"\n"
"Connection properties:\n"
74 75 76 77 78 79 80
"  -p, --port=PORT           port to listen on (default `%d')\n"
"  -b, --bind=IFACE          interface to bind to (default `0.0.0.0')\n"
"  -k, --socket=PATH         path to the unix socket\n"
"                            (default '"SOCKET_PATH"')\n"
"  -e, --shared=NUM          device can be shared by NUM clients (default '1')\n"
"  -t, --persistent          don't exit on the last connection\n"
"  -v, --verbose             display extra debugging information\n"
E
Eric Blake 已提交
81
"  -x, --export-name=NAME    expose export by name\n"
B
bellard 已提交
82
"\n"
83
"Exposing part of the image:\n"
84 85
"  -o, --offset=OFFSET       offset into the image\n"
"  -P, --partition=NUM       only expose partition NUM\n"
86
"\n"
87 88 89
"General purpose options:\n"
"  --object type,id=ID,...   define an object such as 'secret' for providing\n"
"                            passwords and/or encryption keys\n"
90 91
#ifdef __linux__
"Kernel NBD client support:\n"
92 93
"  -c, --connect=DEV         connect FILE to the local NBD device DEV\n"
"  -d, --disconnect          disconnect the specified device\n"
94 95 96 97
"\n"
#endif
"\n"
"Block device options:\n"
98 99 100 101 102
"  -f, --format=FORMAT       set image format (raw, qcow2, ...)\n"
"  -r, --read-only           export read-only\n"
"  -s, --snapshot            use FILE as an external snapshot, create a temporary\n"
"                            file with backing_file=FILE, redirect the write to\n"
"                            the temporary one\n"
103
"  -l, --load-snapshot=SNAPSHOT_PARAM\n"
104 105 106 107 108 109 110
"                            load an internal snapshot inside FILE and export it\n"
"                            as an read-only device, SNAPSHOT_PARAM format is\n"
"                            'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
"                            '[ID_OR_NAME]'\n"
"  -n, --nocache             disable host cache\n"
"      --cache=MODE          set cache mode (none, writeback, ...)\n"
"      --aio=MODE            set AIO mode (native or threads)\n"
111
"      --discard=MODE        set discard mode (ignore, unmap)\n"
112
"      --detect-zeroes=MODE  set detect-zeroes mode (off, on, unmap)\n"
113
"      --image-opts          treat FILE as a full set of image options\n"
114 115
"\n"
"Report bugs to <qemu-devel@nongnu.org>\n"
116
    , name, NBD_DEFAULT_PORT, "DEVICE");
B
bellard 已提交
117 118 119 120 121
}

static void version(const char *name)
{
    printf(
122
"%s version 0.0.1\n"
B
bellard 已提交
123 124 125 126 127
"Written by Anthony Liguori.\n"
"\n"
"Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>.\n"
"This is free software; see the source for copying conditions.  There is NO\n"
"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
128
    , name);
B
bellard 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
}

struct partition_record
{
    uint8_t bootable;
    uint8_t start_head;
    uint32_t start_cylinder;
    uint8_t start_sector;
    uint8_t system;
    uint8_t end_head;
    uint8_t end_cylinder;
    uint8_t end_sector;
    uint32_t start_sector_abs;
    uint32_t nb_sectors_abs;
};

static void read_partition(uint8_t *p, struct partition_record *r)
{
    r->bootable = p[0];
    r->start_head = p[1];
    r->start_cylinder = p[3] | ((p[2] << 2) & 0x0300);
    r->start_sector = p[2] & 0x3f;
    r->system = p[4];
    r->end_head = p[5];
    r->end_cylinder = p[7] | ((p[6] << 2) & 0x300);
    r->end_sector = p[6] & 0x3f;
155 156 157

    r->start_sector_abs = le32_to_cpup((uint32_t *)(p +  8));
    r->nb_sectors_abs   = le32_to_cpup((uint32_t *)(p + 12));
B
bellard 已提交
158 159
}

160
static int find_partition(BlockBackend *blk, int partition,
B
bellard 已提交
161 162 163
                          off_t *offset, off_t *size)
{
    struct partition_record mbr[4];
164
    uint8_t data[MBR_SIZE];
B
bellard 已提交
165 166
    int i;
    int ext_partnum = 4;
R
Ryota Ozaki 已提交
167
    int ret;
B
bellard 已提交
168

169 170
    ret = blk_pread(blk, 0, data, sizeof(data));
    if (ret < 0) {
171
        error_report("error while reading: %s", strerror(-ret));
172
        exit(EXIT_FAILURE);
R
Ryota Ozaki 已提交
173
    }
B
bellard 已提交
174 175

    if (data[510] != 0x55 || data[511] != 0xaa) {
176
        return -EINVAL;
B
bellard 已提交
177 178 179 180 181
    }

    for (i = 0; i < 4; i++) {
        read_partition(&data[446 + 16 * i], &mbr[i]);

182
        if (!mbr[i].system || !mbr[i].nb_sectors_abs) {
B
bellard 已提交
183
            continue;
184
        }
B
bellard 已提交
185 186 187

        if (mbr[i].system == 0xF || mbr[i].system == 0x5) {
            struct partition_record ext[4];
188
            uint8_t data1[MBR_SIZE];
B
bellard 已提交
189 190
            int j;

191 192 193
            ret = blk_pread(blk, mbr[i].start_sector_abs * MBR_SIZE,
                            data1, sizeof(data1));
            if (ret < 0) {
194
                error_report("error while reading: %s", strerror(-ret));
195
                exit(EXIT_FAILURE);
R
Ryota Ozaki 已提交
196
            }
B
bellard 已提交
197 198 199

            for (j = 0; j < 4; j++) {
                read_partition(&data1[446 + 16 * j], &ext[j]);
200
                if (!ext[j].system || !ext[j].nb_sectors_abs) {
B
bellard 已提交
201
                    continue;
202
                }
B
bellard 已提交
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217

                if ((ext_partnum + j + 1) == partition) {
                    *offset = (uint64_t)ext[j].start_sector_abs << 9;
                    *size = (uint64_t)ext[j].nb_sectors_abs << 9;
                    return 0;
                }
            }
            ext_partnum += 4;
        } else if ((i + 1) == partition) {
            *offset = (uint64_t)mbr[i].start_sector_abs << 9;
            *size = (uint64_t)mbr[i].nb_sectors_abs << 9;
            return 0;
        }
    }

218
    return -ENOENT;
B
bellard 已提交
219 220
}

P
Paolo Bonzini 已提交
221 222
static void termsig_handler(int signum)
{
223
    atomic_cmpxchg(&state, RUNNING, TERMINATE);
P
Paolo Bonzini 已提交
224
    qemu_notify_event();
P
Paolo Bonzini 已提交
225 226
}

227

P
Paolo Bonzini 已提交
228
static void *show_parts(void *arg)
229
{
230
    char *device = arg;
P
Paolo Bonzini 已提交
231 232 233 234 235 236 237 238
    int nbd;

    /* linux just needs an open() to trigger
     * the partition table update
     * but remember to load the module with max_part != 0 :
     *     modprobe nbd max_part=63
     */
    nbd = open(device, O_RDWR);
239
    if (nbd >= 0) {
P
Paolo Bonzini 已提交
240 241 242 243
        close(nbd);
    }
    return NULL;
}
244

P
Paolo Bonzini 已提交
245 246
static void *nbd_client_thread(void *arg)
{
247
    char *device = arg;
P
Paolo Bonzini 已提交
248 249
    off_t size;
    uint32_t nbdflags;
250 251
    QIOChannelSocket *sioc;
    int fd;
P
Paolo Bonzini 已提交
252 253
    int ret;
    pthread_t show_parts_thread;
M
Max Reitz 已提交
254
    Error *local_error = NULL;
P
Paolo Bonzini 已提交
255

256 257 258 259
    sioc = qio_channel_socket_new();
    if (qio_channel_socket_connect_sync(sioc,
                                        saddr,
                                        &local_error) < 0) {
260
        error_report_err(local_error);
261 262
        goto out;
    }
P
Paolo Bonzini 已提交
263

264
    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), NULL, &nbdflags,
265
                                NULL, NULL, NULL,
266
                                &size, &local_error);
267
    if (ret < 0) {
M
Max Reitz 已提交
268
        if (local_error) {
269
            error_report_err(local_error);
M
Max Reitz 已提交
270
        }
P
Paolo Bonzini 已提交
271
        goto out_socket;
P
Paolo Bonzini 已提交
272 273
    }

274
    fd = open(device, O_RDWR);
275
    if (fd < 0) {
276
        /* Linux-only, we can use %m in printf.  */
277
        error_report("Failed to open %s: %m", device);
P
Paolo Bonzini 已提交
278
        goto out_socket;
279 280
    }

281
    ret = nbd_init(fd, sioc, nbdflags, size);
282
    if (ret < 0) {
P
Paolo Bonzini 已提交
283
        goto out_fd;
P
Paolo Bonzini 已提交
284 285 286
    }

    /* update partition table */
287
    pthread_create(&show_parts_thread, NULL, show_parts, device);
P
Paolo Bonzini 已提交
288

289 290 291 292 293 294 295
    if (verbose) {
        fprintf(stderr, "NBD device %s is now connected to %s\n",
                device, srcpath);
    } else {
        /* Close stderr so that the qemu-nbd process exits.  */
        dup2(STDOUT_FILENO, STDERR_FILENO);
    }
P
Paolo Bonzini 已提交
296 297 298

    ret = nbd_client(fd);
    if (ret) {
P
Paolo Bonzini 已提交
299
        goto out_fd;
300
    }
P
Paolo Bonzini 已提交
301
    close(fd);
302
    object_unref(OBJECT(sioc));
P
Paolo Bonzini 已提交
303 304 305
    kill(getpid(), SIGTERM);
    return (void *) EXIT_SUCCESS;

P
Paolo Bonzini 已提交
306 307 308
out_fd:
    close(fd);
out_socket:
309
    object_unref(OBJECT(sioc));
P
Paolo Bonzini 已提交
310 311 312
out:
    kill(getpid(), SIGTERM);
    return (void *) EXIT_FAILURE;
313 314
}

315
static int nbd_can_accept(void)
P
Paolo Bonzini 已提交
316 317 318 319
{
    return nb_fds < shared;
}

320 321 322 323 324 325
static void nbd_export_closed(NBDExport *exp)
{
    assert(state == TERMINATING);
    state = TERMINATED;
}

326
static void nbd_update_server_watch(void);
327

328
static void nbd_client_closed(NBDClient *client)
P
Paolo Bonzini 已提交
329
{
330
    nb_fds--;
331 332 333
    if (nb_fds == 0 && !persistent && state == RUNNING) {
        state = TERMINATE;
    }
334
    nbd_update_server_watch();
335
    nbd_client_put(client);
P
Paolo Bonzini 已提交
336 337
}

338
static gboolean nbd_accept(QIOChannel *ioc, GIOCondition cond, gpointer opaque)
P
Paolo Bonzini 已提交
339
{
340
    QIOChannelSocket *cioc;
P
Paolo Bonzini 已提交
341

342 343 344 345
    cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
                                     NULL);
    if (!cioc) {
        return TRUE;
P
Paolo Bonzini 已提交
346 347
    }

348
    if (state >= TERMINATE) {
349 350
        object_unref(OBJECT(cioc));
        return TRUE;
351 352
    }

353
    nb_fds++;
354
    nbd_update_server_watch();
355
    nbd_client_new(newproto ? NULL : exp, cioc,
356
                   tlscreds, NULL, nbd_client_closed);
357 358 359
    object_unref(OBJECT(cioc));

    return TRUE;
P
Paolo Bonzini 已提交
360 361
}

362
static void nbd_update_server_watch(void)
363 364
{
    if (nbd_can_accept()) {
365 366 367 368 369 370
        if (server_watch == -1) {
            server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
                                                 G_IO_IN,
                                                 nbd_accept,
                                                 NULL, NULL);
        }
371
    } else {
372 373 374 375
        if (server_watch != -1) {
            g_source_remove(server_watch);
            server_watch = -1;
        }
376 377 378
    }
}

379 380 381 382 383 384 385 386 387

static SocketAddress *nbd_build_socket_address(const char *sockpath,
                                               const char *bindto,
                                               const char *port)
{
    SocketAddress *saddr;

    saddr = g_new0(SocketAddress, 1);
    if (sockpath) {
388
        saddr->type = SOCKET_ADDRESS_KIND_UNIX;
389 390
        saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
        saddr->u.q_unix.data->path = g_strdup(sockpath);
391
    } else {
392
        InetSocketAddress *inet;
393
        saddr->type = SOCKET_ADDRESS_KIND_INET;
394
        inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
395
        inet->host = g_strdup(bindto);
396
        if (port) {
397
            inet->port = g_strdup(port);
398
        } else  {
399
            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
400 401 402 403 404 405 406
        }
    }

    return saddr;
}


407 408 409 410 411 412 413 414 415 416
static QemuOptsList file_opts = {
    .name = "file",
    .implied_opt_name = "file",
    .head = QTAILQ_HEAD_INITIALIZER(file_opts.head),
    .desc = {
        /* no elements => accept any params */
        { /* end of list */ }
    },
};

417 418 419 420 421 422 423 424 425 426
static QemuOptsList qemu_object_opts = {
    .name = "object",
    .implied_opt_name = "qom-type",
    .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
    .desc = {
        { }
    },
};


427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457

static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
{
    Object *obj;
    QCryptoTLSCreds *creds;

    obj = object_resolve_path_component(
        object_get_objects_root(), id);
    if (!obj) {
        error_setg(errp, "No TLS credentials with id '%s'",
                   id);
        return NULL;
    }
    creds = (QCryptoTLSCreds *)
        object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
    if (!creds) {
        error_setg(errp, "Object with id '%s' is not TLS credentials",
                   id);
        return NULL;
    }

    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
        error_setg(errp,
                   "Expecting TLS credentials with a server endpoint");
        return NULL;
    }
    object_ref(obj);
    return creds;
}


B
bellard 已提交
458 459
int main(int argc, char **argv)
{
M
Markus Armbruster 已提交
460
    BlockBackend *blk;
B
bellard 已提交
461 462
    BlockDriverState *bs;
    off_t dev_offset = 0;
P
Paolo Bonzini 已提交
463
    uint32_t nbdflags = 0;
464
    bool disconnect = false;
B
bellard 已提交
465
    const char *bindto = "0.0.0.0";
466 467
    const char *port = NULL;
    char *sockpath = NULL;
468
    char *device = NULL;
B
bellard 已提交
469
    off_t fd_size;
470 471
    QemuOpts *sn_opts = NULL;
    const char *sn_id_or_name = NULL;
472
    const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:";
B
bellard 已提交
473
    struct option lopt[] = {
474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499
        { "help", no_argument, NULL, 'h' },
        { "version", no_argument, NULL, 'V' },
        { "bind", required_argument, NULL, 'b' },
        { "port", required_argument, NULL, 'p' },
        { "socket", required_argument, NULL, 'k' },
        { "offset", required_argument, NULL, 'o' },
        { "read-only", no_argument, NULL, 'r' },
        { "partition", required_argument, NULL, 'P' },
        { "connect", required_argument, NULL, 'c' },
        { "disconnect", no_argument, NULL, 'd' },
        { "snapshot", no_argument, NULL, 's' },
        { "load-snapshot", required_argument, NULL, 'l' },
        { "nocache", no_argument, NULL, 'n' },
        { "cache", required_argument, NULL, QEMU_NBD_OPT_CACHE },
        { "aio", required_argument, NULL, QEMU_NBD_OPT_AIO },
        { "discard", required_argument, NULL, QEMU_NBD_OPT_DISCARD },
        { "detect-zeroes", required_argument, NULL,
          QEMU_NBD_OPT_DETECT_ZEROES },
        { "shared", required_argument, NULL, 'e' },
        { "format", required_argument, NULL, 'f' },
        { "persistent", no_argument, NULL, 't' },
        { "verbose", no_argument, NULL, 'v' },
        { "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT },
        { "export-name", required_argument, NULL, 'x' },
        { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS },
        { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
500
        { NULL, 0, NULL, 0 }
B
bellard 已提交
501 502 503 504
    };
    int ch;
    int opt_ind = 0;
    char *end;
505
    int flags = BDRV_O_RDWR;
B
bellard 已提交
506
    int partition = -1;
507
    int ret = 0;
508
    bool seen_cache = false;
P
Paolo Bonzini 已提交
509
    bool seen_discard = false;
510
    bool seen_aio = false;
P
Paolo Bonzini 已提交
511
    pthread_t client_thread;
512
    const char *fmt = NULL;
513
    Error *local_err = NULL;
514
    BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
515
    QDict *options = NULL;
516
    const char *export_name = NULL;
517
    const char *tlscredsid = NULL;
518
    bool imageOpts = false;
519
    bool writethrough = true;
B
bellard 已提交
520

P
Paolo Bonzini 已提交
521 522 523
    /* The client thread uses SIGTERM to interrupt the server.  A signal
     * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
     */
P
Paolo Bonzini 已提交
524 525 526 527
    struct sigaction sa_sigterm;
    memset(&sa_sigterm, 0, sizeof(sa_sigterm));
    sa_sigterm.sa_handler = termsig_handler;
    sigaction(SIGTERM, &sa_sigterm, NULL);
528 529 530 531 532 533

    if (qcrypto_init(&local_err) < 0) {
        error_reportf_err(local_err, "cannot initialize crypto: ");
        exit(1);
    }

534 535
    module_call_init(MODULE_INIT_QOM);
    qemu_add_opts(&qemu_object_opts);
536
    qemu_init_exec_dir(argv[0]);
P
Paolo Bonzini 已提交
537

B
bellard 已提交
538 539 540
    while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
        switch (ch) {
        case 's':
541 542 543
            flags |= BDRV_O_SNAPSHOT;
            break;
        case 'n':
544 545 546 547
            optarg = (char *) "none";
            /* fallthrough */
        case QEMU_NBD_OPT_CACHE:
            if (seen_cache) {
548 549
                error_report("-n and --cache can only be specified once");
                exit(EXIT_FAILURE);
550 551
            }
            seen_cache = true;
552
            if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) == -1) {
553 554
                error_report("Invalid cache mode `%s'", optarg);
                exit(EXIT_FAILURE);
555
            }
B
bellard 已提交
556
            break;
557 558
        case QEMU_NBD_OPT_AIO:
            if (seen_aio) {
559 560
                error_report("--aio can only be specified once");
                exit(EXIT_FAILURE);
561 562 563 564 565 566 567
            }
            seen_aio = true;
            if (!strcmp(optarg, "native")) {
                flags |= BDRV_O_NATIVE_AIO;
            } else if (!strcmp(optarg, "threads")) {
                /* this is the default */
            } else {
568 569
               error_report("invalid aio mode `%s'", optarg);
               exit(EXIT_FAILURE);
570 571
            }
            break;
P
Paolo Bonzini 已提交
572 573
        case QEMU_NBD_OPT_DISCARD:
            if (seen_discard) {
574 575
                error_report("--discard can only be specified once");
                exit(EXIT_FAILURE);
P
Paolo Bonzini 已提交
576 577 578
            }
            seen_discard = true;
            if (bdrv_parse_discard_flags(optarg, &flags) == -1) {
579 580
                error_report("Invalid discard mode `%s'", optarg);
                exit(EXIT_FAILURE);
P
Paolo Bonzini 已提交
581 582
            }
            break;
583 584 585 586
        case QEMU_NBD_OPT_DETECT_ZEROES:
            detect_zeroes =
                qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
                                optarg,
587
                                BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX,
588 589 590
                                BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
                                &local_err);
            if (local_err) {
591 592
                error_reportf_err(local_err,
                                  "Failed to parse detect_zeroes mode: ");
593
                exit(EXIT_FAILURE);
594 595 596
            }
            if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
                !(flags & BDRV_O_UNMAP)) {
597 598 599
                error_report("setting detect-zeroes to unmap is not allowed "
                             "without setting discard operation to unmap");
                exit(EXIT_FAILURE);
600 601
            }
            break;
B
bellard 已提交
602 603 604 605
        case 'b':
            bindto = optarg;
            break;
        case 'p':
606
            port = optarg;
B
bellard 已提交
607 608 609 610
            break;
        case 'o':
                dev_offset = strtoll (optarg, &end, 0);
            if (*end) {
611 612
                error_report("Invalid offset `%s'", optarg);
                exit(EXIT_FAILURE);
B
bellard 已提交
613 614
            }
            if (dev_offset < 0) {
615 616
                error_report("Offset must be positive `%s'", optarg);
                exit(EXIT_FAILURE);
B
bellard 已提交
617 618
            }
            break;
619 620
        case 'l':
            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
621 622
                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
                                                  optarg, false);
623
                if (!sn_opts) {
624 625 626
                    error_report("Failed in parsing snapshot param `%s'",
                                 optarg);
                    exit(EXIT_FAILURE);
627 628 629 630 631
                }
            } else {
                sn_id_or_name = optarg;
            }
            /* fall through */
B
bellard 已提交
632
        case 'r':
P
Paolo Bonzini 已提交
633
            nbdflags |= NBD_FLAG_READ_ONLY;
N
Naphtali Sprei 已提交
634
            flags &= ~BDRV_O_RDWR;
B
bellard 已提交
635 636 637
            break;
        case 'P':
            partition = strtol(optarg, &end, 0);
638
            if (*end) {
639 640
                error_report("Invalid partition `%s'", optarg);
                exit(EXIT_FAILURE);
641 642
            }
            if (partition < 1 || partition > 8) {
643 644
                error_report("Invalid partition %d", partition);
                exit(EXIT_FAILURE);
645
            }
B
bellard 已提交
646
            break;
647
        case 'k':
P
Paolo Bonzini 已提交
648
            sockpath = optarg;
649
            if (sockpath[0] != '/') {
650
                error_report("socket path must be absolute");
651
                exit(EXIT_FAILURE);
652
            }
653 654 655 656 657 658 659
            break;
        case 'd':
            disconnect = true;
            break;
        case 'c':
            device = optarg;
            break;
660 661 662
        case 'e':
            shared = strtol(optarg, &end, 0);
            if (*end) {
663 664
                error_report("Invalid shared device number '%s'", optarg);
                exit(EXIT_FAILURE);
665 666
            }
            if (shared < 1) {
667
                error_report("Shared device number must be greater than 0");
668
                exit(EXIT_FAILURE);
669 670
            }
            break;
671 672 673
        case 'f':
            fmt = optarg;
            break;
674 675 676
        case 't':
            persistent = 1;
            break;
677 678 679
        case 'x':
            export_name = optarg;
            break;
B
bellard 已提交
680 681 682 683 684 685 686 687 688 689 690 691
        case 'v':
            verbose = 1;
            break;
        case 'V':
            version(argv[0]);
            exit(0);
            break;
        case 'h':
            usage(argv[0]);
            exit(0);
            break;
        case '?':
692 693
            error_report("Try `%s --help' for more information.", argv[0]);
            exit(EXIT_FAILURE);
694 695 696 697 698 699 700 701
        case QEMU_NBD_OPT_OBJECT: {
            QemuOpts *opts;
            opts = qemu_opts_parse_noisily(&qemu_object_opts,
                                           optarg, true);
            if (!opts) {
                exit(EXIT_FAILURE);
            }
        }   break;
702 703 704
        case QEMU_NBD_OPT_TLSCREDS:
            tlscredsid = optarg;
            break;
705 706 707
        case QEMU_NBD_OPT_IMAGE_OPTS:
            imageOpts = true;
            break;
B
bellard 已提交
708 709 710 711
        }
    }

    if ((argc - optind) != 1) {
712 713
        error_report("Invalid number of arguments");
        error_printf("Try `%s --help' for more information.\n", argv[0]);
714
        exit(EXIT_FAILURE);
B
bellard 已提交
715 716
    }

717 718
    if (qemu_opts_foreach(&qemu_object_opts,
                          user_creatable_add_opts_foreach,
719
                          NULL, NULL)) {
720 721 722
        exit(EXIT_FAILURE);
    }

723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744
    if (tlscredsid) {
        if (sockpath) {
            error_report("TLS is only supported with IPv4/IPv6");
            exit(EXIT_FAILURE);
        }
        if (device) {
            error_report("TLS is not supported with a host device");
            exit(EXIT_FAILURE);
        }
        if (!export_name) {
            /* Set the default NBD protocol export name, since
             * we *must* use new style protocol for TLS */
            export_name = "";
        }
        tlscreds = nbd_get_tls_creds(tlscredsid, &local_err);
        if (local_err) {
            error_report("Failed to get TLS creds %s",
                         error_get_pretty(local_err));
            exit(EXIT_FAILURE);
        }
    }

745
    if (disconnect) {
746 747
        int nbdfd = open(argv[optind], O_RDWR);
        if (nbdfd < 0) {
748 749 750
            error_report("Cannot open %s: %s", argv[optind],
                         strerror(errno));
            exit(EXIT_FAILURE);
751
        }
752
        nbd_disconnect(nbdfd);
753

754
        close(nbdfd);
755 756 757

        printf("%s disconnected\n", argv[optind]);

758
        return 0;
759 760
    }

761 762 763 764 765
    if (device && !verbose) {
        int stderr_fd[2];
        pid_t pid;
        int ret;

766
        if (qemu_pipe(stderr_fd) < 0) {
767 768 769
            error_report("Error setting up communication pipe: %s",
                         strerror(errno));
            exit(EXIT_FAILURE);
770 771 772 773 774 775
        }

        /* Now daemonize, but keep a communication channel open to
         * print errors and exit with the proper status code.
         */
        pid = fork();
M
Max Reitz 已提交
776
        if (pid < 0) {
777 778
            error_report("Failed to fork: %s", strerror(errno));
            exit(EXIT_FAILURE);
M
Max Reitz 已提交
779
        } else if (pid == 0) {
780
            close(stderr_fd[0]);
781
            ret = qemu_daemon(1, 0);
782 783 784

            /* Temporarily redirect stderr to the parent's pipe...  */
            dup2(stderr_fd[1], STDERR_FILENO);
785
            if (ret < 0) {
786 787
                error_report("Failed to daemonize: %s", strerror(errno));
                exit(EXIT_FAILURE);
788 789 790 791 792 793 794 795 796 797 798 799 800 801 802 803
            }

            /* ... close the descriptor we inherited and go on.  */
            close(stderr_fd[1]);
        } else {
            bool errors = false;
            char *buf;

            /* In the parent.  Print error messages from the child until
             * it closes the pipe.
             */
            close(stderr_fd[1]);
            buf = g_malloc(1024);
            while ((ret = read(stderr_fd[0], buf, 1024)) > 0) {
                errors = true;
                ret = qemu_write_full(STDERR_FILENO, buf, ret);
804
                if (ret < 0) {
805 806 807
                    exit(EXIT_FAILURE);
                }
            }
808
            if (ret < 0) {
809 810 811
                error_report("Cannot read from daemon: %s",
                             strerror(errno));
                exit(EXIT_FAILURE);
812 813 814 815 816 817 818 819 820
            }

            /* Usually the daemon should not print any message.
             * Exit with zero status in that case.
             */
            exit(errors);
        }
    }

821 822 823
    if (device != NULL && sockpath == NULL) {
        sockpath = g_malloc(128);
        snprintf(sockpath, 128, SOCKET_PATH, basename(device));
824 825
    }

826 827
    saddr = nbd_build_socket_address(sockpath, bindto, port);

828
    if (qemu_init_main_loop(&local_err)) {
829
        error_report_err(local_err);
830 831
        exit(EXIT_FAILURE);
    }
832 833 834
    bdrv_init();
    atexit(bdrv_close_all);

835 836 837 838 839 840 841 842 843 844 845 846 847 848
    srcpath = argv[optind];
    if (imageOpts) {
        QemuOpts *opts;
        if (fmt) {
            error_report("--image-opts and -f are mutually exclusive");
            exit(EXIT_FAILURE);
        }
        opts = qemu_opts_parse_noisily(&file_opts, srcpath, true);
        if (!opts) {
            qemu_opts_reset(&file_opts);
            exit(EXIT_FAILURE);
        }
        options = qemu_opts_to_qdict(opts, NULL);
        qemu_opts_reset(&file_opts);
849
        blk = blk_new_open(NULL, NULL, options, flags, &local_err);
850 851 852 853 854
    } else {
        if (fmt) {
            options = qdict_new();
            qdict_put(options, "driver", qstring_from_str(fmt));
        }
855
        blk = blk_new_open(srcpath, NULL, options, flags, &local_err);
856 857
    }

858
    if (!blk) {
859 860
        error_reportf_err(local_err, "Failed to blk_new_open '%s': ",
                          argv[optind]);
861
        exit(EXIT_FAILURE);
862
    }
863
    bs = blk_bs(blk);
864

865 866
    blk_set_enable_write_cache(blk, !writethrough);

867 868 869 870 871 872 873 874 875 876
    if (sn_opts) {
        ret = bdrv_snapshot_load_tmp(bs,
                                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
                                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
                                     &local_err);
    } else if (sn_id_or_name) {
        ret = bdrv_snapshot_load_tmp_by_id_or_name(bs, sn_id_or_name,
                                                   &local_err);
    }
    if (ret < 0) {
877
        error_reportf_err(local_err, "Failed to load snapshot: ");
878
        exit(EXIT_FAILURE);
879 880
    }

881
    bs->detect_zeroes = detect_zeroes;
882
    fd_size = blk_getlength(blk);
M
Max Reitz 已提交
883
    if (fd_size < 0) {
884 885 886
        error_report("Failed to determine the image length: %s",
                     strerror(-fd_size));
        exit(EXIT_FAILURE);
M
Max Reitz 已提交
887
    }
888

889
    if (partition != -1) {
890
        ret = find_partition(blk, partition, &dev_offset, &fd_size);
891
        if (ret < 0) {
892
            error_report("Could not find partition %d: %s", partition,
893
                         strerror(-ret));
894
            exit(EXIT_FAILURE);
895
        }
896 897
    }

M
Max Reitz 已提交
898 899 900
    exp = nbd_export_new(blk, dev_offset, fd_size, nbdflags, nbd_export_closed,
                         &local_err);
    if (!exp) {
901
        error_report_err(local_err);
902
        exit(EXIT_FAILURE);
M
Max Reitz 已提交
903
    }
904 905 906 907
    if (export_name) {
        nbd_export_set_name(exp, export_name);
        newproto = true;
    }
908

909 910 911
    server_ioc = qio_channel_socket_new();
    if (qio_channel_socket_listen_sync(server_ioc, saddr, &local_err) < 0) {
        object_unref(OBJECT(server_ioc));
912
        error_report_err(local_err);
B
bellard 已提交
913
        return 1;
P
Paolo Bonzini 已提交
914
    }
P
Paolo Bonzini 已提交
915 916 917 918

    if (device) {
        int ret;

919
        ret = pthread_create(&client_thread, NULL, nbd_client_thread, device);
P
Paolo Bonzini 已提交
920
        if (ret != 0) {
921 922
            error_report("Failed to create client thread: %s", strerror(ret));
            exit(EXIT_FAILURE);
P
Paolo Bonzini 已提交
923 924 925 926 927 928
        }
    } else {
        /* Shut up GCC warnings.  */
        memset(&client_thread, 0, sizeof(client_thread));
    }

929
    nbd_update_server_watch();
B
bellard 已提交
930

931 932 933
    /* now when the initialization is (almost) complete, chdir("/")
     * to free any busy filesystems */
    if (chdir("/") < 0) {
934 935 936
        error_report("Could not chdir to root directory: %s",
                     strerror(errno));
        exit(EXIT_FAILURE);
937 938
    }

939
    state = RUNNING;
940
    do {
P
Paolo Bonzini 已提交
941
        main_loop_wait(false);
942 943 944 945 946 947 948
        if (state == TERMINATE) {
            state = TERMINATING;
            nbd_export_close(exp);
            nbd_export_put(exp);
            exp = NULL;
        }
    } while (state != TERMINATED);
B
bellard 已提交
949

M
Markus Armbruster 已提交
950
    blk_unref(blk);
P
Paolo Bonzini 已提交
951 952 953
    if (sockpath) {
        unlink(sockpath);
    }
B
bellard 已提交
954

955
    qemu_opts_del(sn_opts);
956

P
Paolo Bonzini 已提交
957 958 959 960 961 962 963
    if (device) {
        void *ret;
        pthread_join(client_thread, &ret);
        exit(ret != NULL);
    } else {
        exit(EXIT_SUCCESS);
    }
B
bellard 已提交
964
}