qemu-nbd.c 32.9 KB
Newer Older
1
/*
B
bellard 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
 *  Copyright (C) 2005  Anthony Liguori <anthony@codemonkey.ws>
 *
 *  Network Block Device
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; under version 2 of the License.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
16
 *  along with this program; if not, see <http://www.gnu.org/licenses/>.
B
bellard 已提交
17 18
 */

P
Peter Maydell 已提交
19
#include "qemu/osdep.h"
20
#include "qapi/error.h"
21
#include "qemu-common.h"
22
#include "qemu/cutils.h"
M
Markus Armbruster 已提交
23
#include "sysemu/block-backend.h"
24
#include "block/block_int.h"
25
#include "block/nbd.h"
26
#include "qemu/main-loop.h"
27
#include "qemu/error-report.h"
28
#include "qemu/config-file.h"
29
#include "qemu/bswap.h"
30
#include "qemu/log.h"
31
#include "qemu/systemd.h"
32
#include "block/snapshot.h"
33
#include "qapi/util.h"
34
#include "qapi/qmp/qstring.h"
35
#include "qom/object_interfaces.h"
36
#include "io/channel-socket.h"
37
#include "crypto/init.h"
38
#include "trace/control.h"
B
bellard 已提交
39 40

#include <getopt.h>
41
#include <libgen.h>
P
Paolo Bonzini 已提交
42
#include <pthread.h>
43

44
#define SOCKET_PATH                "/var/lock/qemu-nbd-%s"
45 46 47 48 49 50 51
#define QEMU_NBD_OPT_CACHE         256
#define QEMU_NBD_OPT_AIO           257
#define QEMU_NBD_OPT_DISCARD       258
#define QEMU_NBD_OPT_DETECT_ZEROES 259
#define QEMU_NBD_OPT_OBJECT        260
#define QEMU_NBD_OPT_TLSCREDS      261
#define QEMU_NBD_OPT_IMAGE_OPTS    262
M
Max Reitz 已提交
52
#define QEMU_NBD_OPT_FORK          263
B
bellard 已提交
53

54 55
#define MBR_SIZE 512

P
Paolo Bonzini 已提交
56
static NBDExport *exp;
57
static bool newproto;
58
static int verbose;
P
Paolo Bonzini 已提交
59
static char *srcpath;
60
static SocketAddress *saddr;
61 62
static int persistent = 0;
static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state;
P
Paolo Bonzini 已提交
63 64
static int shared = 1;
static int nb_fds;
65 66
static QIOChannelSocket *server_ioc;
static int server_watch = -1;
67
static QCryptoTLSCreds *tlscreds;
B
bellard 已提交
68 69 70

static void usage(const char *name)
{
71
    (printf) (
B
bellard 已提交
72 73 74
"Usage: %s [OPTIONS] FILE\n"
"QEMU Disk Network Block Device Server\n"
"\n"
75 76
"  -h, --help                display this help and exit\n"
"  -V, --version             output version information and exit\n"
77 78
"\n"
"Connection properties:\n"
79 80 81 82 83 84 85
"  -p, --port=PORT           port to listen on (default `%d')\n"
"  -b, --bind=IFACE          interface to bind to (default `0.0.0.0')\n"
"  -k, --socket=PATH         path to the unix socket\n"
"                            (default '"SOCKET_PATH"')\n"
"  -e, --shared=NUM          device can be shared by NUM clients (default '1')\n"
"  -t, --persistent          don't exit on the last connection\n"
"  -v, --verbose             display extra debugging information\n"
E
Eric Blake 已提交
86
"  -x, --export-name=NAME    expose export by name\n"
87
"  -D, --description=TEXT    with -x, also export a human-readable description\n"
B
bellard 已提交
88
"\n"
89
"Exposing part of the image:\n"
90 91
"  -o, --offset=OFFSET       offset into the image\n"
"  -P, --partition=NUM       only expose partition NUM\n"
92
"\n"
93 94 95
"General purpose options:\n"
"  --object type,id=ID,...   define an object such as 'secret' for providing\n"
"                            passwords and/or encryption keys\n"
96 97
"  -T, --trace [[enable=]<pattern>][,events=<file>][,file=<file>]\n"
"                            specify tracing options\n"
M
Max Reitz 已提交
98 99
"  --fork                    fork off the server process and exit the parent\n"
"                            once the server is running\n"
100 101
#ifdef __linux__
"Kernel NBD client support:\n"
102 103
"  -c, --connect=DEV         connect FILE to the local NBD device DEV\n"
"  -d, --disconnect          disconnect the specified device\n"
104 105 106 107
"\n"
#endif
"\n"
"Block device options:\n"
108 109 110 111 112
"  -f, --format=FORMAT       set image format (raw, qcow2, ...)\n"
"  -r, --read-only           export read-only\n"
"  -s, --snapshot            use FILE as an external snapshot, create a temporary\n"
"                            file with backing_file=FILE, redirect the write to\n"
"                            the temporary one\n"
113
"  -l, --load-snapshot=SNAPSHOT_PARAM\n"
114 115 116 117 118 119 120
"                            load an internal snapshot inside FILE and export it\n"
"                            as an read-only device, SNAPSHOT_PARAM format is\n"
"                            'snapshot.id=[ID],snapshot.name=[NAME]', or\n"
"                            '[ID_OR_NAME]'\n"
"  -n, --nocache             disable host cache\n"
"      --cache=MODE          set cache mode (none, writeback, ...)\n"
"      --aio=MODE            set AIO mode (native or threads)\n"
121
"      --discard=MODE        set discard mode (ignore, unmap)\n"
122
"      --detect-zeroes=MODE  set detect-zeroes mode (off, on, unmap)\n"
123
"      --image-opts          treat FILE as a full set of image options\n"
124 125
"\n"
"Report bugs to <qemu-devel@nongnu.org>\n"
126
    , name, NBD_DEFAULT_PORT, "DEVICE");
B
bellard 已提交
127 128 129 130 131
}

static void version(const char *name)
{
    printf(
132
"%s version 0.0.1\n"
B
bellard 已提交
133 134 135 136 137
"Written by Anthony Liguori.\n"
"\n"
"Copyright (C) 2006 Anthony Liguori <anthony@codemonkey.ws>.\n"
"This is free software; see the source for copying conditions.  There is NO\n"
"warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n"
138
    , name);
B
bellard 已提交
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
}

struct partition_record
{
    uint8_t bootable;
    uint8_t start_head;
    uint32_t start_cylinder;
    uint8_t start_sector;
    uint8_t system;
    uint8_t end_head;
    uint8_t end_cylinder;
    uint8_t end_sector;
    uint32_t start_sector_abs;
    uint32_t nb_sectors_abs;
};

static void read_partition(uint8_t *p, struct partition_record *r)
{
    r->bootable = p[0];
    r->start_head = p[1];
    r->start_cylinder = p[3] | ((p[2] << 2) & 0x0300);
    r->start_sector = p[2] & 0x3f;
    r->system = p[4];
    r->end_head = p[5];
    r->end_cylinder = p[7] | ((p[6] << 2) & 0x300);
    r->end_sector = p[6] & 0x3f;
165

166 167
    r->start_sector_abs = ldl_le_p(p + 8);
    r->nb_sectors_abs   = ldl_le_p(p + 12);
B
bellard 已提交
168 169
}

170
static int find_partition(BlockBackend *blk, int partition,
B
bellard 已提交
171 172 173
                          off_t *offset, off_t *size)
{
    struct partition_record mbr[4];
174
    uint8_t data[MBR_SIZE];
B
bellard 已提交
175 176
    int i;
    int ext_partnum = 4;
R
Ryota Ozaki 已提交
177
    int ret;
B
bellard 已提交
178

179 180
    ret = blk_pread(blk, 0, data, sizeof(data));
    if (ret < 0) {
181
        error_report("error while reading: %s", strerror(-ret));
182
        exit(EXIT_FAILURE);
R
Ryota Ozaki 已提交
183
    }
B
bellard 已提交
184 185

    if (data[510] != 0x55 || data[511] != 0xaa) {
186
        return -EINVAL;
B
bellard 已提交
187 188 189 190 191
    }

    for (i = 0; i < 4; i++) {
        read_partition(&data[446 + 16 * i], &mbr[i]);

192
        if (!mbr[i].system || !mbr[i].nb_sectors_abs) {
B
bellard 已提交
193
            continue;
194
        }
B
bellard 已提交
195 196 197

        if (mbr[i].system == 0xF || mbr[i].system == 0x5) {
            struct partition_record ext[4];
198
            uint8_t data1[MBR_SIZE];
B
bellard 已提交
199 200
            int j;

201 202 203
            ret = blk_pread(blk, mbr[i].start_sector_abs * MBR_SIZE,
                            data1, sizeof(data1));
            if (ret < 0) {
204
                error_report("error while reading: %s", strerror(-ret));
205
                exit(EXIT_FAILURE);
R
Ryota Ozaki 已提交
206
            }
B
bellard 已提交
207 208 209

            for (j = 0; j < 4; j++) {
                read_partition(&data1[446 + 16 * j], &ext[j]);
210
                if (!ext[j].system || !ext[j].nb_sectors_abs) {
B
bellard 已提交
211
                    continue;
212
                }
B
bellard 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227

                if ((ext_partnum + j + 1) == partition) {
                    *offset = (uint64_t)ext[j].start_sector_abs << 9;
                    *size = (uint64_t)ext[j].nb_sectors_abs << 9;
                    return 0;
                }
            }
            ext_partnum += 4;
        } else if ((i + 1) == partition) {
            *offset = (uint64_t)mbr[i].start_sector_abs << 9;
            *size = (uint64_t)mbr[i].nb_sectors_abs << 9;
            return 0;
        }
    }

228
    return -ENOENT;
B
bellard 已提交
229 230
}

P
Paolo Bonzini 已提交
231 232
static void termsig_handler(int signum)
{
233
    atomic_cmpxchg(&state, RUNNING, TERMINATE);
P
Paolo Bonzini 已提交
234
    qemu_notify_event();
P
Paolo Bonzini 已提交
235 236
}

237

P
Paolo Bonzini 已提交
238
static void *show_parts(void *arg)
239
{
240
    char *device = arg;
P
Paolo Bonzini 已提交
241 242 243 244 245 246 247 248
    int nbd;

    /* linux just needs an open() to trigger
     * the partition table update
     * but remember to load the module with max_part != 0 :
     *     modprobe nbd max_part=63
     */
    nbd = open(device, O_RDWR);
249
    if (nbd >= 0) {
P
Paolo Bonzini 已提交
250 251 252 253
        close(nbd);
    }
    return NULL;
}
254

P
Paolo Bonzini 已提交
255 256
static void *nbd_client_thread(void *arg)
{
257
    char *device = arg;
P
Paolo Bonzini 已提交
258
    off_t size;
E
Eric Blake 已提交
259
    uint16_t nbdflags;
260 261
    QIOChannelSocket *sioc;
    int fd;
P
Paolo Bonzini 已提交
262 263
    int ret;
    pthread_t show_parts_thread;
M
Max Reitz 已提交
264
    Error *local_error = NULL;
P
Paolo Bonzini 已提交
265

266 267 268 269
    sioc = qio_channel_socket_new();
    if (qio_channel_socket_connect_sync(sioc,
                                        saddr,
                                        &local_error) < 0) {
270
        error_report_err(local_error);
271 272
        goto out;
    }
P
Paolo Bonzini 已提交
273

274
    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), NULL, &nbdflags,
275
                                NULL, NULL, NULL,
276
                                &size, &local_error);
277
    if (ret < 0) {
M
Max Reitz 已提交
278
        if (local_error) {
279
            error_report_err(local_error);
M
Max Reitz 已提交
280
        }
P
Paolo Bonzini 已提交
281
        goto out_socket;
P
Paolo Bonzini 已提交
282 283
    }

284
    fd = open(device, O_RDWR);
285
    if (fd < 0) {
286
        /* Linux-only, we can use %m in printf.  */
287
        error_report("Failed to open %s: %m", device);
P
Paolo Bonzini 已提交
288
        goto out_socket;
289 290
    }

291
    ret = nbd_init(fd, sioc, nbdflags, size);
292
    if (ret < 0) {
P
Paolo Bonzini 已提交
293
        goto out_fd;
P
Paolo Bonzini 已提交
294 295 296
    }

    /* update partition table */
297
    pthread_create(&show_parts_thread, NULL, show_parts, device);
P
Paolo Bonzini 已提交
298

299 300 301 302 303 304 305
    if (verbose) {
        fprintf(stderr, "NBD device %s is now connected to %s\n",
                device, srcpath);
    } else {
        /* Close stderr so that the qemu-nbd process exits.  */
        dup2(STDOUT_FILENO, STDERR_FILENO);
    }
P
Paolo Bonzini 已提交
306 307 308

    ret = nbd_client(fd);
    if (ret) {
P
Paolo Bonzini 已提交
309
        goto out_fd;
310
    }
P
Paolo Bonzini 已提交
311
    close(fd);
312
    object_unref(OBJECT(sioc));
P
Paolo Bonzini 已提交
313 314 315
    kill(getpid(), SIGTERM);
    return (void *) EXIT_SUCCESS;

P
Paolo Bonzini 已提交
316 317 318
out_fd:
    close(fd);
out_socket:
319
    object_unref(OBJECT(sioc));
P
Paolo Bonzini 已提交
320 321 322
out:
    kill(getpid(), SIGTERM);
    return (void *) EXIT_FAILURE;
323 324
}

325
static int nbd_can_accept(void)
P
Paolo Bonzini 已提交
326 327 328 329
{
    return nb_fds < shared;
}

330 331 332 333 334 335
static void nbd_export_closed(NBDExport *exp)
{
    assert(state == TERMINATING);
    state = TERMINATED;
}

336
static void nbd_update_server_watch(void);
337

338
static void nbd_client_closed(NBDClient *client)
P
Paolo Bonzini 已提交
339
{
340
    nb_fds--;
341 342 343
    if (nb_fds == 0 && !persistent && state == RUNNING) {
        state = TERMINATE;
    }
344
    nbd_update_server_watch();
345
    nbd_client_put(client);
P
Paolo Bonzini 已提交
346 347
}

348
static gboolean nbd_accept(QIOChannel *ioc, GIOCondition cond, gpointer opaque)
P
Paolo Bonzini 已提交
349
{
350
    QIOChannelSocket *cioc;
P
Paolo Bonzini 已提交
351

352 353 354 355
    cioc = qio_channel_socket_accept(QIO_CHANNEL_SOCKET(ioc),
                                     NULL);
    if (!cioc) {
        return TRUE;
P
Paolo Bonzini 已提交
356 357
    }

358
    if (state >= TERMINATE) {
359 360
        object_unref(OBJECT(cioc));
        return TRUE;
361 362
    }

363
    nb_fds++;
364
    nbd_update_server_watch();
365
    nbd_client_new(newproto ? NULL : exp, cioc,
366
                   tlscreds, NULL, nbd_client_closed);
367 368 369
    object_unref(OBJECT(cioc));

    return TRUE;
P
Paolo Bonzini 已提交
370 371
}

372
static void nbd_update_server_watch(void)
373 374
{
    if (nbd_can_accept()) {
375 376 377 378 379 380
        if (server_watch == -1) {
            server_watch = qio_channel_add_watch(QIO_CHANNEL(server_ioc),
                                                 G_IO_IN,
                                                 nbd_accept,
                                                 NULL, NULL);
        }
381
    } else {
382 383 384 385
        if (server_watch != -1) {
            g_source_remove(server_watch);
            server_watch = -1;
        }
386 387 388
    }
}

389 390 391 392 393 394 395 396 397

static SocketAddress *nbd_build_socket_address(const char *sockpath,
                                               const char *bindto,
                                               const char *port)
{
    SocketAddress *saddr;

    saddr = g_new0(SocketAddress, 1);
    if (sockpath) {
398
        saddr->type = SOCKET_ADDRESS_KIND_UNIX;
399 400
        saddr->u.q_unix.data = g_new0(UnixSocketAddress, 1);
        saddr->u.q_unix.data->path = g_strdup(sockpath);
401
    } else {
402
        InetSocketAddress *inet;
403
        saddr->type = SOCKET_ADDRESS_KIND_INET;
404
        inet = saddr->u.inet.data = g_new0(InetSocketAddress, 1);
405
        inet->host = g_strdup(bindto);
406
        if (port) {
407
            inet->port = g_strdup(port);
408
        } else  {
409
            inet->port = g_strdup_printf("%d", NBD_DEFAULT_PORT);
410 411 412 413 414 415 416
        }
    }

    return saddr;
}


417 418 419 420 421 422 423 424 425 426
static QemuOptsList file_opts = {
    .name = "file",
    .implied_opt_name = "file",
    .head = QTAILQ_HEAD_INITIALIZER(file_opts.head),
    .desc = {
        /* no elements => accept any params */
        { /* end of list */ }
    },
};

427 428 429 430 431 432 433 434 435 436
static QemuOptsList qemu_object_opts = {
    .name = "object",
    .implied_opt_name = "qom-type",
    .head = QTAILQ_HEAD_INITIALIZER(qemu_object_opts.head),
    .desc = {
        { }
    },
};


437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466

static QCryptoTLSCreds *nbd_get_tls_creds(const char *id, Error **errp)
{
    Object *obj;
    QCryptoTLSCreds *creds;

    obj = object_resolve_path_component(
        object_get_objects_root(), id);
    if (!obj) {
        error_setg(errp, "No TLS credentials with id '%s'",
                   id);
        return NULL;
    }
    creds = (QCryptoTLSCreds *)
        object_dynamic_cast(obj, TYPE_QCRYPTO_TLS_CREDS);
    if (!creds) {
        error_setg(errp, "Object with id '%s' is not TLS credentials",
                   id);
        return NULL;
    }

    if (creds->endpoint != QCRYPTO_TLS_CREDS_ENDPOINT_SERVER) {
        error_setg(errp,
                   "Expecting TLS credentials with a server endpoint");
        return NULL;
    }
    object_ref(obj);
    return creds;
}

467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503
static void setup_address_and_port(const char **address, const char **port)
{
    if (*address == NULL) {
        *address = "0.0.0.0";
    }

    if (*port == NULL) {
        *port = stringify(NBD_DEFAULT_PORT);
    }
}

/*
 * Check socket parameters compatibility when socket activation is used.
 */
static const char *socket_activation_validate_opts(const char *device,
                                                   const char *sockpath,
                                                   const char *address,
                                                   const char *port)
{
    if (device != NULL) {
        return "NBD device can't be set when using socket activation";
    }

    if (sockpath != NULL) {
        return "Unix socket can't be set when using socket activation";
    }

    if (address != NULL) {
        return "The interface can't be set when using socket activation";
    }

    if (port != NULL) {
        return "TCP port number can't be set when using socket activation";
    }

    return NULL;
}
504

B
bellard 已提交
505 506
int main(int argc, char **argv)
{
M
Markus Armbruster 已提交
507
    BlockBackend *blk;
B
bellard 已提交
508 509
    BlockDriverState *bs;
    off_t dev_offset = 0;
E
Eric Blake 已提交
510
    uint16_t nbdflags = 0;
511
    bool disconnect = false;
512
    const char *bindto = NULL;
513 514
    const char *port = NULL;
    char *sockpath = NULL;
515
    char *device = NULL;
B
bellard 已提交
516
    off_t fd_size;
517 518
    QemuOpts *sn_opts = NULL;
    const char *sn_id_or_name = NULL;
519
    const char *sopt = "hVb:o:p:rsnP:c:dvk:e:f:tl:x:T:D:";
B
bellard 已提交
520
    struct option lopt[] = {
521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544
        { "help", no_argument, NULL, 'h' },
        { "version", no_argument, NULL, 'V' },
        { "bind", required_argument, NULL, 'b' },
        { "port", required_argument, NULL, 'p' },
        { "socket", required_argument, NULL, 'k' },
        { "offset", required_argument, NULL, 'o' },
        { "read-only", no_argument, NULL, 'r' },
        { "partition", required_argument, NULL, 'P' },
        { "connect", required_argument, NULL, 'c' },
        { "disconnect", no_argument, NULL, 'd' },
        { "snapshot", no_argument, NULL, 's' },
        { "load-snapshot", required_argument, NULL, 'l' },
        { "nocache", no_argument, NULL, 'n' },
        { "cache", required_argument, NULL, QEMU_NBD_OPT_CACHE },
        { "aio", required_argument, NULL, QEMU_NBD_OPT_AIO },
        { "discard", required_argument, NULL, QEMU_NBD_OPT_DISCARD },
        { "detect-zeroes", required_argument, NULL,
          QEMU_NBD_OPT_DETECT_ZEROES },
        { "shared", required_argument, NULL, 'e' },
        { "format", required_argument, NULL, 'f' },
        { "persistent", no_argument, NULL, 't' },
        { "verbose", no_argument, NULL, 'v' },
        { "object", required_argument, NULL, QEMU_NBD_OPT_OBJECT },
        { "export-name", required_argument, NULL, 'x' },
545
        { "description", required_argument, NULL, 'D' },
546 547
        { "tls-creds", required_argument, NULL, QEMU_NBD_OPT_TLSCREDS },
        { "image-opts", no_argument, NULL, QEMU_NBD_OPT_IMAGE_OPTS },
548
        { "trace", required_argument, NULL, 'T' },
M
Max Reitz 已提交
549
        { "fork", no_argument, NULL, QEMU_NBD_OPT_FORK },
550
        { NULL, 0, NULL, 0 }
B
bellard 已提交
551 552 553 554
    };
    int ch;
    int opt_ind = 0;
    char *end;
555
    int flags = BDRV_O_RDWR;
B
bellard 已提交
556
    int partition = -1;
557
    int ret = 0;
558
    bool seen_cache = false;
P
Paolo Bonzini 已提交
559
    bool seen_discard = false;
560
    bool seen_aio = false;
P
Paolo Bonzini 已提交
561
    pthread_t client_thread;
562
    const char *fmt = NULL;
563
    Error *local_err = NULL;
564
    BlockdevDetectZeroesOptions detect_zeroes = BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF;
565
    QDict *options = NULL;
566
    const char *export_name = NULL;
567
    const char *export_description = NULL;
568
    const char *tlscredsid = NULL;
569
    bool imageOpts = false;
570
    bool writethrough = true;
571
    char *trace_file = NULL;
M
Max Reitz 已提交
572 573
    bool fork_process = false;
    int old_stderr = -1;
574
    unsigned socket_activation;
B
bellard 已提交
575

P
Paolo Bonzini 已提交
576 577 578
    /* The client thread uses SIGTERM to interrupt the server.  A signal
     * handler ensures that "qemu-nbd -v -c" exits with a nice status code.
     */
P
Paolo Bonzini 已提交
579 580 581 582
    struct sigaction sa_sigterm;
    memset(&sa_sigterm, 0, sizeof(sa_sigterm));
    sa_sigterm.sa_handler = termsig_handler;
    sigaction(SIGTERM, &sa_sigterm, NULL);
583

584
    module_call_init(MODULE_INIT_TRACE);
585
    qcrypto_init(&error_fatal);
586

587 588
    module_call_init(MODULE_INIT_QOM);
    qemu_add_opts(&qemu_object_opts);
589
    qemu_add_opts(&qemu_trace_opts);
590
    qemu_init_exec_dir(argv[0]);
P
Paolo Bonzini 已提交
591

B
bellard 已提交
592 593 594
    while ((ch = getopt_long(argc, argv, sopt, lopt, &opt_ind)) != -1) {
        switch (ch) {
        case 's':
595 596 597
            flags |= BDRV_O_SNAPSHOT;
            break;
        case 'n':
598 599 600 601
            optarg = (char *) "none";
            /* fallthrough */
        case QEMU_NBD_OPT_CACHE:
            if (seen_cache) {
602 603
                error_report("-n and --cache can only be specified once");
                exit(EXIT_FAILURE);
604 605
            }
            seen_cache = true;
606
            if (bdrv_parse_cache_mode(optarg, &flags, &writethrough) == -1) {
607 608
                error_report("Invalid cache mode `%s'", optarg);
                exit(EXIT_FAILURE);
609
            }
B
bellard 已提交
610
            break;
611 612
        case QEMU_NBD_OPT_AIO:
            if (seen_aio) {
613 614
                error_report("--aio can only be specified once");
                exit(EXIT_FAILURE);
615 616 617 618 619 620 621
            }
            seen_aio = true;
            if (!strcmp(optarg, "native")) {
                flags |= BDRV_O_NATIVE_AIO;
            } else if (!strcmp(optarg, "threads")) {
                /* this is the default */
            } else {
622 623
               error_report("invalid aio mode `%s'", optarg);
               exit(EXIT_FAILURE);
624 625
            }
            break;
P
Paolo Bonzini 已提交
626 627
        case QEMU_NBD_OPT_DISCARD:
            if (seen_discard) {
628 629
                error_report("--discard can only be specified once");
                exit(EXIT_FAILURE);
P
Paolo Bonzini 已提交
630 631 632
            }
            seen_discard = true;
            if (bdrv_parse_discard_flags(optarg, &flags) == -1) {
633 634
                error_report("Invalid discard mode `%s'", optarg);
                exit(EXIT_FAILURE);
P
Paolo Bonzini 已提交
635 636
            }
            break;
637 638 639 640
        case QEMU_NBD_OPT_DETECT_ZEROES:
            detect_zeroes =
                qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
                                optarg,
641
                                BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX,
642 643 644
                                BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
                                &local_err);
            if (local_err) {
645 646
                error_reportf_err(local_err,
                                  "Failed to parse detect_zeroes mode: ");
647
                exit(EXIT_FAILURE);
648 649 650
            }
            if (detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
                !(flags & BDRV_O_UNMAP)) {
651 652 653
                error_report("setting detect-zeroes to unmap is not allowed "
                             "without setting discard operation to unmap");
                exit(EXIT_FAILURE);
654 655
            }
            break;
B
bellard 已提交
656 657 658 659
        case 'b':
            bindto = optarg;
            break;
        case 'p':
660
            port = optarg;
B
bellard 已提交
661 662 663 664
            break;
        case 'o':
                dev_offset = strtoll (optarg, &end, 0);
            if (*end) {
665 666
                error_report("Invalid offset `%s'", optarg);
                exit(EXIT_FAILURE);
B
bellard 已提交
667 668
            }
            if (dev_offset < 0) {
669 670
                error_report("Offset must be positive `%s'", optarg);
                exit(EXIT_FAILURE);
B
bellard 已提交
671 672
            }
            break;
673 674
        case 'l':
            if (strstart(optarg, SNAPSHOT_OPT_BASE, NULL)) {
675 676
                sn_opts = qemu_opts_parse_noisily(&internal_snapshot_opts,
                                                  optarg, false);
677
                if (!sn_opts) {
678 679 680
                    error_report("Failed in parsing snapshot param `%s'",
                                 optarg);
                    exit(EXIT_FAILURE);
681 682 683 684 685
                }
            } else {
                sn_id_or_name = optarg;
            }
            /* fall through */
B
bellard 已提交
686
        case 'r':
P
Paolo Bonzini 已提交
687
            nbdflags |= NBD_FLAG_READ_ONLY;
N
Naphtali Sprei 已提交
688
            flags &= ~BDRV_O_RDWR;
B
bellard 已提交
689 690 691
            break;
        case 'P':
            partition = strtol(optarg, &end, 0);
692
            if (*end) {
693 694
                error_report("Invalid partition `%s'", optarg);
                exit(EXIT_FAILURE);
695 696
            }
            if (partition < 1 || partition > 8) {
697 698
                error_report("Invalid partition %d", partition);
                exit(EXIT_FAILURE);
699
            }
B
bellard 已提交
700
            break;
701
        case 'k':
P
Paolo Bonzini 已提交
702
            sockpath = optarg;
703
            if (sockpath[0] != '/') {
704
                error_report("socket path must be absolute");
705
                exit(EXIT_FAILURE);
706
            }
707 708 709 710 711 712 713
            break;
        case 'd':
            disconnect = true;
            break;
        case 'c':
            device = optarg;
            break;
714 715 716
        case 'e':
            shared = strtol(optarg, &end, 0);
            if (*end) {
717 718
                error_report("Invalid shared device number '%s'", optarg);
                exit(EXIT_FAILURE);
719 720
            }
            if (shared < 1) {
721
                error_report("Shared device number must be greater than 0");
722
                exit(EXIT_FAILURE);
723 724
            }
            break;
725 726 727
        case 'f':
            fmt = optarg;
            break;
728 729 730
        case 't':
            persistent = 1;
            break;
731 732 733
        case 'x':
            export_name = optarg;
            break;
734 735 736
        case 'D':
            export_description = optarg;
            break;
B
bellard 已提交
737 738 739 740 741 742 743 744 745 746 747 748
        case 'v':
            verbose = 1;
            break;
        case 'V':
            version(argv[0]);
            exit(0);
            break;
        case 'h':
            usage(argv[0]);
            exit(0);
            break;
        case '?':
749 750
            error_report("Try `%s --help' for more information.", argv[0]);
            exit(EXIT_FAILURE);
751 752 753 754 755 756 757 758
        case QEMU_NBD_OPT_OBJECT: {
            QemuOpts *opts;
            opts = qemu_opts_parse_noisily(&qemu_object_opts,
                                           optarg, true);
            if (!opts) {
                exit(EXIT_FAILURE);
            }
        }   break;
759 760 761
        case QEMU_NBD_OPT_TLSCREDS:
            tlscredsid = optarg;
            break;
762 763 764
        case QEMU_NBD_OPT_IMAGE_OPTS:
            imageOpts = true;
            break;
765 766 767 768
        case 'T':
            g_free(trace_file);
            trace_file = trace_opt_parse(optarg);
            break;
M
Max Reitz 已提交
769 770 771
        case QEMU_NBD_OPT_FORK:
            fork_process = true;
            break;
B
bellard 已提交
772 773 774 775
        }
    }

    if ((argc - optind) != 1) {
776 777
        error_report("Invalid number of arguments");
        error_printf("Try `%s --help' for more information.\n", argv[0]);
778
        exit(EXIT_FAILURE);
B
bellard 已提交
779 780
    }

781 782
    if (qemu_opts_foreach(&qemu_object_opts,
                          user_creatable_add_opts_foreach,
783
                          NULL, NULL)) {
784 785 786
        exit(EXIT_FAILURE);
    }

787 788 789 790 791 792
    if (!trace_init_backends()) {
        exit(1);
    }
    trace_init_file(trace_file);
    qemu_set_log(LOG_TRACE);

793 794 795 796 797 798 799 800 801 802 803
    socket_activation = check_socket_activation();
    if (socket_activation == 0) {
        setup_address_and_port(&bindto, &port);
    } else {
        /* Using socket activation - check user didn't use -p etc. */
        const char *err_msg = socket_activation_validate_opts(device, sockpath,
                                                              bindto, port);
        if (err_msg != NULL) {
            error_report("%s", err_msg);
            exit(EXIT_FAILURE);
        }
804 805 806 807 808 809 810

        /* qemu-nbd can only listen on a single socket.  */
        if (socket_activation > 1) {
            error_report("qemu-nbd does not support socket activation with %s > 1",
                         "LISTEN_FDS");
            exit(EXIT_FAILURE);
        }
811 812
    }

813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834
    if (tlscredsid) {
        if (sockpath) {
            error_report("TLS is only supported with IPv4/IPv6");
            exit(EXIT_FAILURE);
        }
        if (device) {
            error_report("TLS is not supported with a host device");
            exit(EXIT_FAILURE);
        }
        if (!export_name) {
            /* Set the default NBD protocol export name, since
             * we *must* use new style protocol for TLS */
            export_name = "";
        }
        tlscreds = nbd_get_tls_creds(tlscredsid, &local_err);
        if (local_err) {
            error_report("Failed to get TLS creds %s",
                         error_get_pretty(local_err));
            exit(EXIT_FAILURE);
        }
    }

835
    if (disconnect) {
836 837
        int nbdfd = open(argv[optind], O_RDWR);
        if (nbdfd < 0) {
838 839 840
            error_report("Cannot open %s: %s", argv[optind],
                         strerror(errno));
            exit(EXIT_FAILURE);
841
        }
842
        nbd_disconnect(nbdfd);
843

844
        close(nbdfd);
845 846 847

        printf("%s disconnected\n", argv[optind]);

848
        return 0;
849 850
    }

M
Max Reitz 已提交
851
    if ((device && !verbose) || fork_process) {
852 853 854 855
        int stderr_fd[2];
        pid_t pid;
        int ret;

856
        if (qemu_pipe(stderr_fd) < 0) {
857 858 859
            error_report("Error setting up communication pipe: %s",
                         strerror(errno));
            exit(EXIT_FAILURE);
860 861 862 863 864 865
        }

        /* Now daemonize, but keep a communication channel open to
         * print errors and exit with the proper status code.
         */
        pid = fork();
M
Max Reitz 已提交
866
        if (pid < 0) {
867 868
            error_report("Failed to fork: %s", strerror(errno));
            exit(EXIT_FAILURE);
M
Max Reitz 已提交
869
        } else if (pid == 0) {
870
            close(stderr_fd[0]);
871
            ret = qemu_daemon(1, 0);
872 873

            /* Temporarily redirect stderr to the parent's pipe...  */
M
Max Reitz 已提交
874
            old_stderr = dup(STDERR_FILENO);
875
            dup2(stderr_fd[1], STDERR_FILENO);
876
            if (ret < 0) {
877 878
                error_report("Failed to daemonize: %s", strerror(errno));
                exit(EXIT_FAILURE);
879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894
            }

            /* ... close the descriptor we inherited and go on.  */
            close(stderr_fd[1]);
        } else {
            bool errors = false;
            char *buf;

            /* In the parent.  Print error messages from the child until
             * it closes the pipe.
             */
            close(stderr_fd[1]);
            buf = g_malloc(1024);
            while ((ret = read(stderr_fd[0], buf, 1024)) > 0) {
                errors = true;
                ret = qemu_write_full(STDERR_FILENO, buf, ret);
895
                if (ret < 0) {
896 897 898
                    exit(EXIT_FAILURE);
                }
            }
899
            if (ret < 0) {
900 901 902
                error_report("Cannot read from daemon: %s",
                             strerror(errno));
                exit(EXIT_FAILURE);
903 904 905 906 907 908 909 910 911
            }

            /* Usually the daemon should not print any message.
             * Exit with zero status in that case.
             */
            exit(errors);
        }
    }

912 913 914
    if (device != NULL && sockpath == NULL) {
        sockpath = g_malloc(128);
        snprintf(sockpath, 128, SOCKET_PATH, basename(device));
915 916
    }

917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935
    if (socket_activation == 0) {
        server_ioc = qio_channel_socket_new();
        saddr = nbd_build_socket_address(sockpath, bindto, port);
        if (qio_channel_socket_listen_sync(server_ioc, saddr, &local_err) < 0) {
            object_unref(OBJECT(server_ioc));
            error_report_err(local_err);
            return 1;
        }
    } else {
        /* See comment in check_socket_activation above. */
        assert(socket_activation == 1);
        server_ioc = qio_channel_socket_new_fd(FIRST_SOCKET_ACTIVATION_FD,
                                               &local_err);
        if (server_ioc == NULL) {
            error_report("Failed to use socket activation: %s",
                         error_get_pretty(local_err));
            exit(EXIT_FAILURE);
        }
    }
936

937
    if (qemu_init_main_loop(&local_err)) {
938
        error_report_err(local_err);
939 940
        exit(EXIT_FAILURE);
    }
941 942 943
    bdrv_init();
    atexit(bdrv_close_all);

944 945 946 947 948 949 950 951 952 953 954 955 956 957
    srcpath = argv[optind];
    if (imageOpts) {
        QemuOpts *opts;
        if (fmt) {
            error_report("--image-opts and -f are mutually exclusive");
            exit(EXIT_FAILURE);
        }
        opts = qemu_opts_parse_noisily(&file_opts, srcpath, true);
        if (!opts) {
            qemu_opts_reset(&file_opts);
            exit(EXIT_FAILURE);
        }
        options = qemu_opts_to_qdict(opts, NULL);
        qemu_opts_reset(&file_opts);
958
        blk = blk_new_open(NULL, NULL, options, flags, &local_err);
959 960 961 962 963
    } else {
        if (fmt) {
            options = qdict_new();
            qdict_put(options, "driver", qstring_from_str(fmt));
        }
964
        blk = blk_new_open(srcpath, NULL, options, flags, &local_err);
965 966
    }

967
    if (!blk) {
968 969
        error_reportf_err(local_err, "Failed to blk_new_open '%s': ",
                          argv[optind]);
970
        exit(EXIT_FAILURE);
971
    }
972
    bs = blk_bs(blk);
973

974 975
    blk_set_enable_write_cache(blk, !writethrough);

976 977 978 979 980 981 982 983 984 985
    if (sn_opts) {
        ret = bdrv_snapshot_load_tmp(bs,
                                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_ID),
                                     qemu_opt_get(sn_opts, SNAPSHOT_OPT_NAME),
                                     &local_err);
    } else if (sn_id_or_name) {
        ret = bdrv_snapshot_load_tmp_by_id_or_name(bs, sn_id_or_name,
                                                   &local_err);
    }
    if (ret < 0) {
986
        error_reportf_err(local_err, "Failed to load snapshot: ");
987
        exit(EXIT_FAILURE);
988 989
    }

990
    bs->detect_zeroes = detect_zeroes;
991
    fd_size = blk_getlength(blk);
M
Max Reitz 已提交
992
    if (fd_size < 0) {
993 994 995
        error_report("Failed to determine the image length: %s",
                     strerror(-fd_size));
        exit(EXIT_FAILURE);
M
Max Reitz 已提交
996
    }
997

998 999 1000 1001 1002 1003 1004 1005
    if (dev_offset >= fd_size) {
        error_report("Offset (%lld) has to be smaller than the image size "
                     "(%lld)",
                     (long long int)dev_offset, (long long int)fd_size);
        exit(EXIT_FAILURE);
    }
    fd_size -= dev_offset;

1006
    if (partition != -1) {
1007
        ret = find_partition(blk, partition, &dev_offset, &fd_size);
1008
        if (ret < 0) {
1009
            error_report("Could not find partition %d: %s", partition,
1010
                         strerror(-ret));
1011
            exit(EXIT_FAILURE);
1012
        }
1013 1014
    }

1015 1016
    exp = nbd_export_new(bs, dev_offset, fd_size, nbdflags, nbd_export_closed,
                         writethrough, NULL, &local_err);
M
Max Reitz 已提交
1017
    if (!exp) {
1018
        error_report_err(local_err);
1019
        exit(EXIT_FAILURE);
M
Max Reitz 已提交
1020
    }
1021 1022
    if (export_name) {
        nbd_export_set_name(exp, export_name);
1023
        nbd_export_set_description(exp, export_description);
1024
        newproto = true;
1025 1026 1027
    } else if (export_description) {
        error_report("Export description requires an export name");
        exit(EXIT_FAILURE);
1028
    }
1029

P
Paolo Bonzini 已提交
1030 1031 1032
    if (device) {
        int ret;

1033
        ret = pthread_create(&client_thread, NULL, nbd_client_thread, device);
P
Paolo Bonzini 已提交
1034
        if (ret != 0) {
1035 1036
            error_report("Failed to create client thread: %s", strerror(ret));
            exit(EXIT_FAILURE);
P
Paolo Bonzini 已提交
1037 1038 1039 1040 1041 1042
        }
    } else {
        /* Shut up GCC warnings.  */
        memset(&client_thread, 0, sizeof(client_thread));
    }

1043
    nbd_update_server_watch();
B
bellard 已提交
1044

1045 1046 1047
    /* now when the initialization is (almost) complete, chdir("/")
     * to free any busy filesystems */
    if (chdir("/") < 0) {
1048 1049 1050
        error_report("Could not chdir to root directory: %s",
                     strerror(errno));
        exit(EXIT_FAILURE);
1051 1052
    }

M
Max Reitz 已提交
1053 1054 1055 1056 1057
    if (fork_process) {
        dup2(old_stderr, STDERR_FILENO);
        close(old_stderr);
    }

1058
    state = RUNNING;
1059
    do {
P
Paolo Bonzini 已提交
1060
        main_loop_wait(false);
1061 1062 1063 1064 1065 1066 1067
        if (state == TERMINATE) {
            state = TERMINATING;
            nbd_export_close(exp);
            nbd_export_put(exp);
            exp = NULL;
        }
    } while (state != TERMINATED);
B
bellard 已提交
1068

M
Markus Armbruster 已提交
1069
    blk_unref(blk);
P
Paolo Bonzini 已提交
1070 1071 1072
    if (sockpath) {
        unlink(sockpath);
    }
B
bellard 已提交
1073

1074
    qemu_opts_del(sn_opts);
1075

P
Paolo Bonzini 已提交
1076 1077 1078 1079 1080 1081 1082
    if (device) {
        void *ret;
        pthread_join(client_thread, &ret);
        exit(ret != NULL);
    } else {
        exit(EXIT_SUCCESS);
    }
B
bellard 已提交
1083
}