qemu-nbd.c 28.8 KB
Newer Older
1
/*
B
bellard 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
 *
 *  Network Block Device
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; under version 2 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
16
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
17 18
 */

P
Peter Maydell 已提交
19
#include "qemu/osdep.h"
20
#include "qapi/error.h"
21
#include "qemu-common.h"
22
#include "qemu/cutils.h"
M
Markus Armbruster 已提交
23
#include "sysemu/block-backend.h"
24
#include "block/block_int.h"
25
#include "block/nbd.h"
26
#include "qemu/main-loop.h"
27
#include "qemu/error-report.h"
28
#include "qemu/config-file.h"
29
#include "block/snapshot.h"
30
#include "qapi/util.h"
31
#include "qapi/qmp/qstring.h"
32
#include "qom/object_interfaces.h"
33
#include "io/channel-socket.h"
34
#include "crypto/init.h"
B
bellard 已提交
35 36

#include <getopt.h>
37
#include <libgen.h>
P
Paolo Bonzini 已提交
38
#include <pthread.h>
39

40
#define SOCKET_PATH                "/var/lock/qemu-nbd-%s"
41 42 43 44 45 46 47
#define QEMU_NBD_OPT_CACHE         256
#define QEMU_NBD_OPT_AIO           257
#define QEMU_NBD_OPT_DISCARD       258
#define QEMU_NBD_OPT_DETECT_ZEROES 259
#define QEMU_NBD_OPT_OBJECT        260
#define QEMU_NBD_OPT_TLSCREDS      261
#define QEMU_NBD_OPT_IMAGE_OPTS    262
B
bellard 已提交
48

P
Paolo Bonzini 已提交
49
static NBDExport *exp;
50
static bool newproto;
51
static int verbose;
P
Paolo Bonzini 已提交
52
static char *srcpath;
53
static SocketAddress *saddr;
54 55
static int persistent = 0;
static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state;
P
Paolo Bonzini 已提交
56 57
static int shared = 1;
static int nb_fds;
58 59
static QIOChannelSocket *server_ioc;
static int server_watch = -1;
60
static QCryptoTLSCreds *tlscreds;
B
bellard 已提交
61 62 63

static void usage(const char *name)
{
64
    (printf) (
B
bellard 已提交
65 66 67
"Usage: %s [OPTIONS] FILE\n"
"QEMU Disk Network Block Device Server\n"
"\n"
68 69
"  -h, --help                display this help and exit\n"
"  -V, --version             output version information and exit\n"
70 71
"\n"
"Connection properties:\n"
72 73 74 75 76 77 78
"  -p, --port=PORT           port to listen on (default `%d')\n"
"  -b, --bind=IFACE          interface to bind to (default `0.0.0.0')\n"
"  -k, --socket=PATH         path to the unix socket\n"
"                            (default '"SOCKET_PATH"')\n"
"  -e, --shared=NUM          device can be shared by NUM clients (default '1')\n"
"  -t, --persistent          don't exit on the last connection\n"
"  -v, --verbose             display extra debugging information\n"
E
Eric Blake 已提交
79
"  -x, --export-name=NAME    expose export by name\n"
B
bellard 已提交
80
"\n"
81
"Exposing part of the image:\n"
82 83
"  -o, --offset=OFFSET       offset into the image\n"
"  -P, --partition=NUM       only expose partition NUM\n"
84
"\n"
85 86 87
"General purpose options:\n"
"  --object type,id=ID,...   define an object such as 'secret' for providing\n"
"                            passwords and/or encryption keys\n"
88 89
#ifdef __linux__
"Kernel NBD client support:\n"
90 91
"  -c, --connect=DEV         connect FILE to the local NBD device DEV\n"
"  -d, --disconnect          disconnect the specified device\n"
92 93 94 95
"\n"
#endif
"\n"
"Block device options:\n"
96 97 98 99 100
"  -f, --format=FORMAT       set image format (raw, qcow2, ...)\n"
"  -r, --read-only           export read-only\n"
"  -s, --snapshot            use FILE as an external snapshot, create a temporary\n"
"                            file with backing_file=FILE, redirect the write to\n"
"                            the temporary one\n"
101
"  -l, --load-snapshot=SNAPSHOT_PARAM\n"
102 103 104 105 106 107 108
"                            load an internal snapshot inside FILE and export it\n"
"                            as an read-only device, SNAPSHOT_PARAM format is\n"
"                            'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
"                            '[ID_OR_NAME]'\n"
"  -n, --nocache             disable host cache\n"
"      --cache=MODE          set cache mode (none, writeback, ...)\n"
"      --aio=MODE            set AIO mode (native or threads)\n"
109
"      --discard=MODE        set discard mode (ignore, unmap)\n"
110
"      --detect-zeroes=MODE  set detect-zeroes mode (off, on, unmap)\n"
111
"      --image-opts          treat FILE as a full set of image options\n"
112 113
"\n"
"Report bugs to <qemu-devel@nongnu.org>\n"
114
    , name, NBD_DEFAULT_PORT, "DEVICE");
B
bellard 已提交
115 116 117 118 119
}

static void version(const char *name)
{
    printf(
120
"%s version 0.0.1\n"
B
bellard 已提交
121 122 123 124 125
"Written by Anthony Liguori.\n"
"\n"
"Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>.\n"
"This is free software; see the source for copying conditions.  There is NO\n"
"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
126
    , name);
B
bellard 已提交
127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
}

struct partition_record
{
    uint8_t bootable;
    uint8_t start_head;
    uint32_t start_cylinder;
    uint8_t start_sector;
    uint8_t system;
    uint8_t end_head;
    uint8_t end_cylinder;
    uint8_t end_sector;
    uint32_t start_sector_abs;
    uint32_t nb_sectors_abs;
};

static void read_partition(uint8_t *p, struct partition_record *r)
{
    r->bootable = p[0];
    r->start_head = p[1];
    r->start_cylinder = p[3] | ((p[2] << 2) & 0x0300);
    r->start_sector = p[2] & 0x3f;
    r->system = p[4];
    r->end_head = p[5];
    r->end_cylinder = p[7] | ((p[6] << 2) & 0x300);
    r->end_sector = p[6] & 0x3f;
153 154 155

    r->start_sector_abs = le32_to_cpup((uint32_t *)(p +  8));
    r->nb_sectors_abs   = le32_to_cpup((uint32_t *)(p + 12));
B
bellard 已提交
156 157
}

158
static int find_partition(BlockBackend *blk, int partition,
B
bellard 已提交
159 160 161 162 163 164
                          off_t *offset, off_t *size)
{
    struct partition_record mbr[4];
    uint8_t data[512];
    int i;
    int ext_partnum = 4;
R
Ryota Ozaki 已提交
165
    int ret;
B
bellard 已提交
166

167
    if ((ret = blk_read(blk, 0, data, 1)) < 0) {
168
        error_report("error while reading: %s", strerror(-ret));
169
        exit(EXIT_FAILURE);
R
Ryota Ozaki 已提交
170
    }
B
bellard 已提交
171 172

    if (data[510] != 0x55 || data[511] != 0xaa) {
173
        return -EINVAL;
B
bellard 已提交
174 175 176 177 178
    }

    for (i = 0; i < 4; i++) {
        read_partition(&data[446 + 16 * i], &mbr[i]);

179
        if (!mbr[i].system || !mbr[i].nb_sectors_abs) {
B
bellard 已提交
180
            continue;
181
        }
B
bellard 已提交
182 183 184 185 186 187

        if (mbr[i].system == 0xF || mbr[i].system == 0x5) {
            struct partition_record ext[4];
            uint8_t data1[512];
            int j;

188
            if ((ret = blk_read(blk, mbr[i].start_sector_abs, data1, 1)) < 0) {
189
                error_report("error while reading: %s", strerror(-ret));
190
                exit(EXIT_FAILURE);
R
Ryota Ozaki 已提交
191
            }
B
bellard 已提交
192 193 194

            for (j = 0; j < 4; j++) {
                read_partition(&data1[446 + 16 * j], &ext[j]);
195
                if (!ext[j].system || !ext[j].nb_sectors_abs) {
B
bellard 已提交
196
                    continue;
197
                }
B
bellard 已提交
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212

                if ((ext_partnum + j + 1) == partition) {
                    *offset = (uint64_t)ext[j].start_sector_abs << 9;
                    *size = (uint64_t)ext[j].nb_sectors_abs << 9;
                    return 0;
                }
            }
            ext_partnum += 4;
        } else if ((i + 1) == partition) {
            *offset = (uint64_t)mbr[i].start_sector_abs << 9;
            *size = (uint64_t)mbr[i].nb_sectors_abs << 9;
            return 0;
        }
    }

213
    return -ENOENT;
B
bellard 已提交
214 215
}

P
Paolo Bonzini 已提交
216 217
static void termsig_handler(int signum)
{
218
    atomic_cmpxchg(&state, RUNNING, TERMINATE);
P
Paolo Bonzini 已提交
219
    qemu_notify_event();
P
Paolo Bonzini 已提交
220 221
}

222

P
Paolo Bonzini 已提交
223
static void *show_parts(void *arg)
224
{
225
    char *device = arg;
P
Paolo Bonzini 已提交
226 227 228 229 230 231 232 233
    int nbd;

    /* linux just needs an open() to trigger
     * the partition table update
     * but remember to load the module with max_part != 0 :
     *     modprobe nbd max_part=63
     */
    nbd = open(device, O_RDWR);
234
    if (nbd >= 0) {
P
Paolo Bonzini 已提交
235 236 237 238
        close(nbd);
    }
    return NULL;
}
239

P
Paolo Bonzini 已提交
240 241
static void *nbd_client_thread(void *arg)
{
242
    char *device = arg;
P
Paolo Bonzini 已提交
243 244
    off_t size;
    uint32_t nbdflags;
245 246
    QIOChannelSocket *sioc;
    int fd;
P
Paolo Bonzini 已提交
247 248
    int ret;
    pthread_t show_parts_thread;
M
Max Reitz 已提交
249
    Error *local_error = NULL;
P
Paolo Bonzini 已提交
250

251 252 253 254
    sioc = qio_channel_socket_new();
    if (qio_channel_socket_connect_sync(sioc,
                                        saddr,
                                        &local_error) < 0) {
255
        error_report_err(local_error);
256 257
        goto out;
    }
P
Paolo Bonzini 已提交
258

259
    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), NULL, &nbdflags,
260
                                NULL, NULL, NULL,
261
                                &size, &local_error);
262
    if (ret < 0) {
M
Max Reitz 已提交
263
        if (local_error) {
264
            error_report_err(local_error);
M
Max Reitz 已提交
265
        }
P
Paolo Bonzini 已提交
266
        goto out_socket;
P
Paolo Bonzini 已提交
267 268
    }

269
    fd = open(device, O_RDWR);
270
    if (fd < 0) {
271
        /* Linux-only, we can use %m in printf.  */
272
        error_report("Failed to open %s: %m", device);
P
Paolo Bonzini 已提交
273
        goto out_socket;
274 275
    }

276
    ret = nbd_init(fd, sioc, nbdflags, size);
277
    if (ret < 0) {
P
Paolo Bonzini 已提交
278
        goto out_fd;
P
Paolo Bonzini 已提交
279 280 281
    }

    /* update partition table */
282
    pthread_create(&show_parts_thread, NULL, show_parts, device);
P
Paolo Bonzini 已提交
283

284 285 286 287 288 289 290
    if (verbose) {
        fprintf(stderr, "NBD device %s is now connected to %s\n",
                device, srcpath);
    } else {
        /* Close stderr so that the qemu-nbd process exits.  */
        dup2(STDOUT_FILENO, STDERR_FILENO);
    }
P
Paolo Bonzini 已提交
291 292 293

    ret = nbd_client(fd);
    if (ret) {
P
Paolo Bonzini 已提交
294
        goto out_fd;
295
    }
P
Paolo Bonzini 已提交
296
    close(fd);
297
    object_unref(OBJECT(sioc));
P
Paolo Bonzini 已提交
298 299 300
    kill(getpid(), SIGTERM);
    return (void *) EXIT_SUCCESS;

P
Paolo Bonzini 已提交
301 302 303
out_fd:
    close(fd);
out_socket:
304
    object_unref(OBJECT(sioc));
P
Paolo Bonzini 已提交
305 306 307
out:
    kill(getpid(), SIGTERM);
    return (void *) EXIT_FAILURE;
308 309
}

310
static int nbd_can_accept(void)
P
Paolo Bonzini 已提交
311 312 313 314
{
    return nb_fds < shared;
}

315 316 317 318 319 320
static void nbd_export_closed(NBDExport *exp)
{
    assert(state == TERMINATING);
    state = TERMINATED;
}

321
static void nbd_update_server_watch(void);
322

323
static void nbd_client_closed(NBDClient *client)
P
Paolo Bonzini 已提交
324
{
325
    nb_fds--;
326 327 328
    if (nb_fds == 0 && !persistent && state == RUNNING) {
        state = TERMINATE;
    }
329
    nbd_update_server_watch();
330
    nbd_client_put(client);
P
Paolo Bonzini 已提交
331 332
}

333
static gboolean nbd_accept(QIOChannel *ioc, GIOCondition cond, gpointer opaque)
P
Paolo Bonzini 已提交
334
{
335
    QIOChannelSocket *cioc;
P
Paolo Bonzini 已提交
336

337 338 339 340
    cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
                                     NULL);
    if (!cioc) {
        return TRUE;
P
Paolo Bonzini 已提交
341 342
    }

343
    if (state >= TERMINATE) {
344 345
        object_unref(OBJECT(cioc));
        return TRUE;
346 347
    }

348
    nb_fds++;
349
    nbd_update_server_watch();
350
    nbd_client_new(newproto ? NULL : exp, cioc,
351
                   tlscreds, NULL, nbd_client_closed);
352 353 354
    object_unref(OBJECT(cioc));

    return TRUE;
P
Paolo Bonzini 已提交
355 356
}

357
static void nbd_update_server_watch(void)
358 359
{
    if (nbd_can_accept()) {
360 361 362 363 364 365
        if (server_watch == -1) {
            server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
                                                 G_IO_IN,
                                                 nbd_accept,
                                                 NULL, NULL);
        }
366
    } else {
367 368 369 370
        if (server_watch != -1) {
            g_source_remove(server_watch);
            server_watch = -1;
        }
371 372 373
    }
}

374 375 376 377 378 379 380 381 382

static SocketAddress *nbd_build_socket_address(const char *sockpath,
                                               const char *bindto,
                                               const char *port)
{
    SocketAddress *saddr;

    saddr = g_new0(SocketAddress, 1);
    if (sockpath) {
383
        saddr->type = SOCKET_ADDRESS_KIND_UNIX;
384 385
        saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
        saddr->u.q_unix.data->path = g_strdup(sockpath);
386
    } else {
387
        InetSocketAddress *inet;
388
        saddr->type = SOCKET_ADDRESS_KIND_INET;
389
        inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
390
        inet->host = g_strdup(bindto);
391
        if (port) {
392
            inet->port = g_strdup(port);
393
        } else  {
394
            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
395 396 397 398 399 400 401
        }
    }

    return saddr;
}


402 403 404 405 406 407 408 409 410 411
static QemuOptsList file_opts = {
    .name = "file",
    .implied_opt_name = "file",
    .head = QTAILQ_HEAD_INITIALIZER(file_opts.head),
    .desc = {
        /* no elements => accept any params */
        { /* end of list */ }
    },
};

412 413 414 415 416 417 418 419 420 421
static QemuOptsList qemu_object_opts = {
    .name = "object",
    .implied_opt_name = "qom-type",
    .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
    .desc = {
        { }
    },
};


422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452

static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
{
    Object *obj;
    QCryptoTLSCreds *creds;

    obj = object_resolve_path_component(
        object_get_objects_root(), id);
    if (!obj) {
        error_setg(errp, "No TLS credentials with id '%s'",
                   id);
        return NULL;
    }
    creds = (QCryptoTLSCreds *)
        object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
    if (!creds) {
        error_setg(errp, "Object with id '%s' is not TLS credentials",
                   id);
        return NULL;
    }

    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
        error_setg(errp,
                   "Expecting TLS credentials with a server endpoint");
        return NULL;
    }
    object_ref(obj);
    return creds;
}


B
bellard 已提交
453 454
int main(int argc, char **argv)
{
M
Markus Armbruster 已提交
455
    BlockBackend *blk;
B
bellard 已提交
456 457
    BlockDriverState *bs;
    off_t dev_offset = 0;
P
Paolo Bonzini 已提交
458
    uint32_t nbdflags = 0;
459
    bool disconnect = false;
B
bellard 已提交
460
    const char *bindto = "0.0.0.0";
461 462
    const char *port = NULL;
    char *sockpath = NULL;
463
    char *device = NULL;
B
bellard 已提交
464
    off_t fd_size;
465 466
    QemuOpts *sn_opts = NULL;
    const char *sn_id_or_name = NULL;
467
    const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:";
B
bellard 已提交
468
    struct option lopt[] = {
469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494
        { "help", no_argument, NULL, 'h' },
        { "version", no_argument, NULL, 'V' },
        { "bind", required_argument, NULL, 'b' },
        { "port", required_argument, NULL, 'p' },
        { "socket", required_argument, NULL, 'k' },
        { "offset", required_argument, NULL, 'o' },
        { "read-only", no_argument, NULL, 'r' },
        { "partition", required_argument, NULL, 'P' },
        { "connect", required_argument, NULL, 'c' },
        { "disconnect", no_argument, NULL, 'd' },
        { "snapshot", no_argument, NULL, 's' },
        { "load-snapshot", required_argument, NULL, 'l' },
        { "nocache", no_argument, NULL, 'n' },
        { "cache", required_argument, NULL, QEMU_NBD_OPT_CACHE },
        { "aio", required_argument, NULL, QEMU_NBD_OPT_AIO },
        { "discard", required_argument, NULL, QEMU_NBD_OPT_DISCARD },
        { "detect-zeroes", required_argument, NULL,
          QEMU_NBD_OPT_DETECT_ZEROES },
        { "shared", required_argument, NULL, 'e' },
        { "format", required_argument, NULL, 'f' },
        { "persistent", no_argument, NULL, 't' },
        { "verbose", no_argument, NULL, 'v' },
        { "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT },
        { "export-name", required_argument, NULL, 'x' },
        { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS },
        { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
495
        { NULL, 0, NULL, 0 }
B
bellard 已提交
496 497 498 499
    };
    int ch;
    int opt_ind = 0;
    char *end;
500
    int flags = BDRV_O_RDWR;
B
bellard 已提交
501
    int partition = -1;
502
    int ret = 0;
503
    bool seen_cache = false;
P
Paolo Bonzini 已提交
504
    bool seen_discard = false;
505
    bool seen_aio = false;
P
Paolo Bonzini 已提交
506
    pthread_t client_thread;
507
    const char *fmt = NULL;
508
    Error *local_err = NULL;
509
    BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
510
    QDict *options = NULL;
511
    const char *export_name = NULL;
512
    const char *tlscredsid = NULL;
513
    bool imageOpts = false;
514
    bool writethrough = true;
B
bellard 已提交
515

P
Paolo Bonzini 已提交
516 517 518
    /* The client thread uses SIGTERM to interrupt the server.  A signal
     * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
     */
P
Paolo Bonzini 已提交
519 520 521 522
    struct sigaction sa_sigterm;
    memset(&sa_sigterm, 0, sizeof(sa_sigterm));
    sa_sigterm.sa_handler = termsig_handler;
    sigaction(SIGTERM, &sa_sigterm, NULL);
523 524 525 526 527 528

    if (qcrypto_init(&local_err) < 0) {
        error_reportf_err(local_err, "cannot initialize crypto: ");
        exit(1);
    }

529 530
    module_call_init(MODULE_INIT_QOM);
    qemu_add_opts(&qemu_object_opts);
531
    qemu_init_exec_dir(argv[0]);
P
Paolo Bonzini 已提交
532

B
bellard 已提交
533 534 535
    while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
        switch (ch) {
        case 's':
536 537 538
            flags |= BDRV_O_SNAPSHOT;
            break;
        case 'n':
539 540 541 542
            optarg = (char *) "none";
            /* fallthrough */
        case QEMU_NBD_OPT_CACHE:
            if (seen_cache) {
543 544
                error_report("-n and --cache can only be specified once");
                exit(EXIT_FAILURE);
545 546
            }
            seen_cache = true;
547
            if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) == -1) {
548 549
                error_report("Invalid cache mode `%s'", optarg);
                exit(EXIT_FAILURE);
550
            }
B
bellard 已提交
551
            break;
552 553
        case QEMU_NBD_OPT_AIO:
            if (seen_aio) {
554 555
                error_report("--aio can only be specified once");
                exit(EXIT_FAILURE);
556 557 558 559 560 561 562
            }
            seen_aio = true;
            if (!strcmp(optarg, "native")) {
                flags |= BDRV_O_NATIVE_AIO;
            } else if (!strcmp(optarg, "threads")) {
                /* this is the default */
            } else {
563 564
               error_report("invalid aio mode `%s'", optarg);
               exit(EXIT_FAILURE);
565 566
            }
            break;
P
Paolo Bonzini 已提交
567 568
        case QEMU_NBD_OPT_DISCARD:
            if (seen_discard) {
569 570
                error_report("--discard can only be specified once");
                exit(EXIT_FAILURE);
P
Paolo Bonzini 已提交
571 572 573
            }
            seen_discard = true;
            if (bdrv_parse_discard_flags(optarg, &flags) == -1) {
574 575
                error_report("Invalid discard mode `%s'", optarg);
                exit(EXIT_FAILURE);
P
Paolo Bonzini 已提交
576 577
            }
            break;
578 579 580 581
        case QEMU_NBD_OPT_DETECT_ZEROES:
            detect_zeroes =
                qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
                                optarg,
582
                                BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX,
583 584 585
                                BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
                                &local_err);
            if (local_err) {
586 587
                error_reportf_err(local_err,
                                  "Failed to parse detect_zeroes mode: ");
588
                exit(EXIT_FAILURE);
589 590 591
            }
            if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
                !(flags & BDRV_O_UNMAP)) {
592 593 594
                error_report("setting detect-zeroes to unmap is not allowed "
                             "without setting discard operation to unmap");
                exit(EXIT_FAILURE);
595 596
            }
            break;
B
bellard 已提交
597 598 599 600
        case 'b':
            bindto = optarg;
            break;
        case 'p':
601
            port = optarg;
B
bellard 已提交
602 603 604 605
            break;
        case 'o':
                dev_offset = strtoll (optarg, &end, 0);
            if (*end) {
606 607
                error_report("Invalid offset `%s'", optarg);
                exit(EXIT_FAILURE);
B
bellard 已提交
608 609
            }
            if (dev_offset < 0) {
610 611
                error_report("Offset must be positive `%s'", optarg);
                exit(EXIT_FAILURE);
B
bellard 已提交
612 613
            }
            break;
614 615
        case 'l':
            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
616 617
                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
                                                  optarg, false);
618
                if (!sn_opts) {
619 620 621
                    error_report("Failed in parsing snapshot param `%s'",
                                 optarg);
                    exit(EXIT_FAILURE);
622 623 624 625 626
                }
            } else {
                sn_id_or_name = optarg;
            }
            /* fall through */
B
bellard 已提交
627
        case 'r':
P
Paolo Bonzini 已提交
628
            nbdflags |= NBD_FLAG_READ_ONLY;
N
Naphtali Sprei 已提交
629
            flags &= ~BDRV_O_RDWR;
B
bellard 已提交
630 631 632
            break;
        case 'P':
            partition = strtol(optarg, &end, 0);
633
            if (*end) {
634 635
                error_report("Invalid partition `%s'", optarg);
                exit(EXIT_FAILURE);
636 637
            }
            if (partition < 1 || partition > 8) {
638 639
                error_report("Invalid partition %d", partition);
                exit(EXIT_FAILURE);
640
            }
B
bellard 已提交
641
            break;
642
        case 'k':
P
Paolo Bonzini 已提交
643
            sockpath = optarg;
644
            if (sockpath[0] != '/') {
645
                error_report("socket path must be absolute");
646
                exit(EXIT_FAILURE);
647
            }
648 649 650 651 652 653 654
            break;
        case 'd':
            disconnect = true;
            break;
        case 'c':
            device = optarg;
            break;
655 656 657
        case 'e':
            shared = strtol(optarg, &end, 0);
            if (*end) {
658 659
                error_report("Invalid shared device number '%s'", optarg);
                exit(EXIT_FAILURE);
660 661
            }
            if (shared < 1) {
662
                error_report("Shared device number must be greater than 0");
663
                exit(EXIT_FAILURE);
664 665
            }
            break;
666 667 668
        case 'f':
            fmt = optarg;
            break;
669 670 671
        case 't':
            persistent = 1;
            break;
672 673 674
        case 'x':
            export_name = optarg;
            break;
B
bellard 已提交
675 676 677 678 679 680 681 682 683 684 685 686
        case 'v':
            verbose = 1;
            break;
        case 'V':
            version(argv[0]);
            exit(0);
            break;
        case 'h':
            usage(argv[0]);
            exit(0);
            break;
        case '?':
687 688
            error_report("Try `%s --help' for more information.", argv[0]);
            exit(EXIT_FAILURE);
689 690 691 692 693 694 695 696
        case QEMU_NBD_OPT_OBJECT: {
            QemuOpts *opts;
            opts = qemu_opts_parse_noisily(&qemu_object_opts,
                                           optarg, true);
            if (!opts) {
                exit(EXIT_FAILURE);
            }
        }   break;
697 698 699
        case QEMU_NBD_OPT_TLSCREDS:
            tlscredsid = optarg;
            break;
700 701 702
        case QEMU_NBD_OPT_IMAGE_OPTS:
            imageOpts = true;
            break;
B
bellard 已提交
703 704 705 706
        }
    }

    if ((argc - optind) != 1) {
707 708
        error_report("Invalid number of arguments");
        error_printf("Try `%s --help' for more information.\n", argv[0]);
709
        exit(EXIT_FAILURE);
B
bellard 已提交
710 711
    }

712 713 714 715 716 717 718
    if (qemu_opts_foreach(&qemu_object_opts,
                          user_creatable_add_opts_foreach,
                          NULL, &local_err)) {
        error_report_err(local_err);
        exit(EXIT_FAILURE);
    }

719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740
    if (tlscredsid) {
        if (sockpath) {
            error_report("TLS is only supported with IPv4/IPv6");
            exit(EXIT_FAILURE);
        }
        if (device) {
            error_report("TLS is not supported with a host device");
            exit(EXIT_FAILURE);
        }
        if (!export_name) {
            /* Set the default NBD protocol export name, since
             * we *must* use new style protocol for TLS */
            export_name = "";
        }
        tlscreds = nbd_get_tls_creds(tlscredsid, &local_err);
        if (local_err) {
            error_report("Failed to get TLS creds %s",
                         error_get_pretty(local_err));
            exit(EXIT_FAILURE);
        }
    }

741
    if (disconnect) {
742 743
        int nbdfd = open(argv[optind], O_RDWR);
        if (nbdfd < 0) {
744 745 746
            error_report("Cannot open %s: %s", argv[optind],
                         strerror(errno));
            exit(EXIT_FAILURE);
747
        }
748
        nbd_disconnect(nbdfd);
749

750
        close(nbdfd);
751 752 753

        printf("%s disconnected\n", argv[optind]);

754
        return 0;
755 756
    }

757 758 759 760 761
    if (device && !verbose) {
        int stderr_fd[2];
        pid_t pid;
        int ret;

762
        if (qemu_pipe(stderr_fd) < 0) {
763 764 765
            error_report("Error setting up communication pipe: %s",
                         strerror(errno));
            exit(EXIT_FAILURE);
766 767 768 769 770 771
        }

        /* Now daemonize, but keep a communication channel open to
         * print errors and exit with the proper status code.
         */
        pid = fork();
M
Max Reitz 已提交
772
        if (pid < 0) {
773 774
            error_report("Failed to fork: %s", strerror(errno));
            exit(EXIT_FAILURE);
M
Max Reitz 已提交
775
        } else if (pid == 0) {
776
            close(stderr_fd[0]);
777
            ret = qemu_daemon(1, 0);
778 779 780

            /* Temporarily redirect stderr to the parent's pipe...  */
            dup2(stderr_fd[1], STDERR_FILENO);
781
            if (ret < 0) {
782 783
                error_report("Failed to daemonize: %s", strerror(errno));
                exit(EXIT_FAILURE);
784 785 786 787 788 789 790 791 792 793 794 795 796 797 798 799
            }

            /* ... close the descriptor we inherited and go on.  */
            close(stderr_fd[1]);
        } else {
            bool errors = false;
            char *buf;

            /* In the parent.  Print error messages from the child until
             * it closes the pipe.
             */
            close(stderr_fd[1]);
            buf = g_malloc(1024);
            while ((ret = read(stderr_fd[0], buf, 1024)) > 0) {
                errors = true;
                ret = qemu_write_full(STDERR_FILENO, buf, ret);
800
                if (ret < 0) {
801 802 803
                    exit(EXIT_FAILURE);
                }
            }
804
            if (ret < 0) {
805 806 807
                error_report("Cannot read from daemon: %s",
                             strerror(errno));
                exit(EXIT_FAILURE);
808 809 810 811 812 813 814 815 816
            }

            /* Usually the daemon should not print any message.
             * Exit with zero status in that case.
             */
            exit(errors);
        }
    }

817 818 819
    if (device != NULL && sockpath == NULL) {
        sockpath = g_malloc(128);
        snprintf(sockpath, 128, SOCKET_PATH, basename(device));
820 821
    }

822 823
    saddr = nbd_build_socket_address(sockpath, bindto, port);

824
    if (qemu_init_main_loop(&local_err)) {
825
        error_report_err(local_err);
826 827
        exit(EXIT_FAILURE);
    }
828 829 830
    bdrv_init();
    atexit(bdrv_close_all);

831 832 833 834 835 836 837 838 839 840 841 842 843 844
    srcpath = argv[optind];
    if (imageOpts) {
        QemuOpts *opts;
        if (fmt) {
            error_report("--image-opts and -f are mutually exclusive");
            exit(EXIT_FAILURE);
        }
        opts = qemu_opts_parse_noisily(&file_opts, srcpath, true);
        if (!opts) {
            qemu_opts_reset(&file_opts);
            exit(EXIT_FAILURE);
        }
        options = qemu_opts_to_qdict(opts, NULL);
        qemu_opts_reset(&file_opts);
845
        blk = blk_new_open(NULL, NULL, options, flags, &local_err);
846 847 848 849 850
    } else {
        if (fmt) {
            options = qdict_new();
            qdict_put(options, "driver", qstring_from_str(fmt));
        }
851
        blk = blk_new_open(srcpath, NULL, options, flags, &local_err);
852 853
    }

854
    if (!blk) {
855 856
        error_reportf_err(local_err, "Failed to blk_new_open '%s': ",
                          argv[optind]);
857
        exit(EXIT_FAILURE);
858
    }
859
    bs = blk_bs(blk);
860

861 862
    blk_set_enable_write_cache(blk, !writethrough);

863 864 865 866 867 868 869 870 871 872
    if (sn_opts) {
        ret = bdrv_snapshot_load_tmp(bs,
                                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
                                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
                                     &local_err);
    } else if (sn_id_or_name) {
        ret = bdrv_snapshot_load_tmp_by_id_or_name(bs, sn_id_or_name,
                                                   &local_err);
    }
    if (ret < 0) {
873
        error_reportf_err(local_err, "Failed to load snapshot: ");
874
        exit(EXIT_FAILURE);
875 876
    }

877
    bs->detect_zeroes = detect_zeroes;
878
    fd_size = blk_getlength(blk);
M
Max Reitz 已提交
879
    if (fd_size < 0) {
880 881 882
        error_report("Failed to determine the image length: %s",
                     strerror(-fd_size));
        exit(EXIT_FAILURE);
M
Max Reitz 已提交
883
    }
884

885
    if (partition != -1) {
886
        ret = find_partition(blk, partition, &dev_offset, &fd_size);
887
        if (ret < 0) {
888
            error_report("Could not find partition %d: %s", partition,
889
                         strerror(-ret));
890
            exit(EXIT_FAILURE);
891
        }
892 893
    }

M
Max Reitz 已提交
894 895 896
    exp = nbd_export_new(blk, dev_offset, fd_size, nbdflags, nbd_export_closed,
                         &local_err);
    if (!exp) {
897
        error_report_err(local_err);
898
        exit(EXIT_FAILURE);
M
Max Reitz 已提交
899
    }
900 901 902 903
    if (export_name) {
        nbd_export_set_name(exp, export_name);
        newproto = true;
    }
904

905 906 907
    server_ioc = qio_channel_socket_new();
    if (qio_channel_socket_listen_sync(server_ioc, saddr, &local_err) < 0) {
        object_unref(OBJECT(server_ioc));
908
        error_report_err(local_err);
B
bellard 已提交
909
        return 1;
P
Paolo Bonzini 已提交
910
    }
P
Paolo Bonzini 已提交
911 912 913 914

    if (device) {
        int ret;

915
        ret = pthread_create(&client_thread, NULL, nbd_client_thread, device);
P
Paolo Bonzini 已提交
916
        if (ret != 0) {
917 918
            error_report("Failed to create client thread: %s", strerror(ret));
            exit(EXIT_FAILURE);
P
Paolo Bonzini 已提交
919 920 921 922 923 924
        }
    } else {
        /* Shut up GCC warnings.  */
        memset(&client_thread, 0, sizeof(client_thread));
    }

925
    nbd_update_server_watch();
B
bellard 已提交
926

927 928 929
    /* now when the initialization is (almost) complete, chdir("/")
     * to free any busy filesystems */
    if (chdir("/") < 0) {
930 931 932
        error_report("Could not chdir to root directory: %s",
                     strerror(errno));
        exit(EXIT_FAILURE);
933 934
    }

935
    state = RUNNING;
936
    do {
P
Paolo Bonzini 已提交
937
        main_loop_wait(false);
938 939 940 941 942 943 944
        if (state == TERMINATE) {
            state = TERMINATING;
            nbd_export_close(exp);
            nbd_export_put(exp);
            exp = NULL;
        }
    } while (state != TERMINATED);
B
bellard 已提交
945

M
Markus Armbruster 已提交
946
    blk_unref(blk);
P
Paolo Bonzini 已提交
947 948 949
    if (sockpath) {
        unlink(sockpath);
    }
B
bellard 已提交
950

951
    qemu_opts_del(sn_opts);
952

P
Paolo Bonzini 已提交
953 954 955 956 957 958 959
    if (device) {
        void *ret;
        pthread_join(client_thread, &ret);
        exit(ret != NULL);
    } else {
        exit(EXIT_SUCCESS);
    }
B
bellard 已提交
960
}