nbd-client.c 12.0 KB
Newer Older
M
Marc-André Lureau 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34
/*
 * QEMU Block driver for  NBD
 *
 * Copyright (C) 2008 Bull S.A.S.
 *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
 *
 * Some parts:
 *    Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

#include "nbd-client.h"
#include "qemu/sockets.h"

#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))

35 36 37 38 39 40 41 42 43 44 45
static void nbd_recv_coroutines_enter_all(NbdClientSession *s)
{
    int i;

    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
        if (s->recv_coroutine[i]) {
            qemu_coroutine_enter(s->recv_coroutine[i], NULL);
        }
    }
}

M
Max Reitz 已提交
46
static void nbd_teardown_connection(BlockDriverState *bs)
47
{
M
Max Reitz 已提交
48 49
    NbdClientSession *client = nbd_get_client_session(bs);

50 51 52 53
    /* finish any pending coroutines */
    shutdown(client->sock, 2);
    nbd_recv_coroutines_enter_all(client);

M
Max Reitz 已提交
54
    nbd_client_detach_aio_context(bs);
55 56 57 58
    closesocket(client->sock);
    client->sock = -1;
}

M
Marc-André Lureau 已提交
59 60
static void nbd_reply_ready(void *opaque)
{
M
Max Reitz 已提交
61 62
    BlockDriverState *bs = opaque;
    NbdClientSession *s = nbd_get_client_session(bs);
M
Marc-André Lureau 已提交
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
    uint64_t i;
    int ret;

    if (s->reply.handle == 0) {
        /* No reply already in flight.  Fetch a header.  It is possible
         * that another thread has done the same thing in parallel, so
         * the socket is not readable anymore.
         */
        ret = nbd_receive_reply(s->sock, &s->reply);
        if (ret == -EAGAIN) {
            return;
        }
        if (ret < 0) {
            s->reply.handle = 0;
            goto fail;
        }
    }

    /* There's no need for a mutex on the receive side, because the
     * handler acts as a synchronization point and ensures that only
     * one coroutine is called until the reply finishes.  */
    i = HANDLE_TO_INDEX(s, s->reply.handle);
    if (i >= MAX_NBD_REQUESTS) {
        goto fail;
    }

    if (s->recv_coroutine[i]) {
        qemu_coroutine_enter(s->recv_coroutine[i], NULL);
        return;
    }

fail:
M
Max Reitz 已提交
95
    nbd_teardown_connection(bs);
M
Marc-André Lureau 已提交
96 97 98 99
}

static void nbd_restart_write(void *opaque)
{
M
Max Reitz 已提交
100
    BlockDriverState *bs = opaque;
M
Marc-André Lureau 已提交
101

M
Max Reitz 已提交
102
    qemu_coroutine_enter(nbd_get_client_session(bs)->send_coroutine, NULL);
M
Marc-André Lureau 已提交
103 104
}

M
Max Reitz 已提交
105 106 107
static int nbd_co_send_request(BlockDriverState *bs,
                               struct nbd_request *request,
                               QEMUIOVector *qiov, int offset)
M
Marc-André Lureau 已提交
108
{
M
Max Reitz 已提交
109
    NbdClientSession *s = nbd_get_client_session(bs);
110
    AioContext *aio_context;
B
Bin Wu 已提交
111
    int rc, ret, i;
M
Marc-André Lureau 已提交
112 113

    qemu_co_mutex_lock(&s->send_mutex);
B
Bin Wu 已提交
114 115 116 117 118 119 120 121 122 123

    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
        if (s->recv_coroutine[i] == NULL) {
            s->recv_coroutine[i] = qemu_coroutine_self();
            break;
        }
    }

    assert(i < MAX_NBD_REQUESTS);
    request->handle = INDEX_TO_HANDLE(s, i);
M
Marc-André Lureau 已提交
124
    s->send_coroutine = qemu_coroutine_self();
M
Max Reitz 已提交
125
    aio_context = bdrv_get_aio_context(bs);
B
Bin Wu 已提交
126

127
    aio_set_fd_handler(aio_context, s->sock,
M
Max Reitz 已提交
128
                       nbd_reply_ready, nbd_restart_write, bs);
M
Marc-André Lureau 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
    if (qiov) {
        if (!s->is_unix) {
            socket_set_cork(s->sock, 1);
        }
        rc = nbd_send_request(s->sock, request);
        if (rc >= 0) {
            ret = qemu_co_sendv(s->sock, qiov->iov, qiov->niov,
                                offset, request->len);
            if (ret != request->len) {
                rc = -EIO;
            }
        }
        if (!s->is_unix) {
            socket_set_cork(s->sock, 0);
        }
    } else {
        rc = nbd_send_request(s->sock, request);
    }
M
Max Reitz 已提交
147
    aio_set_fd_handler(aio_context, s->sock, nbd_reply_ready, NULL, bs);
M
Marc-André Lureau 已提交
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
    s->send_coroutine = NULL;
    qemu_co_mutex_unlock(&s->send_mutex);
    return rc;
}

static void nbd_co_receive_reply(NbdClientSession *s,
    struct nbd_request *request, struct nbd_reply *reply,
    QEMUIOVector *qiov, int offset)
{
    int ret;

    /* Wait until we're woken up by the read handler.  TODO: perhaps
     * peek at the next reply and avoid yielding if it's ours?  */
    qemu_coroutine_yield();
    *reply = s->reply;
    if (reply->handle != request->handle) {
        reply->error = EIO;
    } else {
        if (qiov && reply->error == 0) {
            ret = qemu_co_recvv(s->sock, qiov->iov, qiov->niov,
                                offset, request->len);
            if (ret != request->len) {
                reply->error = EIO;
            }
        }

        /* Tell the read handler to read another header.  */
        s->reply.handle = 0;
    }
}

static void nbd_coroutine_start(NbdClientSession *s,
   struct nbd_request *request)
{
    /* Poor man semaphore.  The free_sema is locked when no other request
     * can be accepted, and unlocked after receiving one reply.  */
    if (s->in_flight >= MAX_NBD_REQUESTS - 1) {
        qemu_co_mutex_lock(&s->free_sema);
        assert(s->in_flight < MAX_NBD_REQUESTS);
    }
    s->in_flight++;

B
Bin Wu 已提交
190
    /* s->recv_coroutine[i] is set as soon as we get the send_lock.  */
M
Marc-André Lureau 已提交
191 192 193 194 195 196 197 198 199 200 201 202
}

static void nbd_coroutine_end(NbdClientSession *s,
    struct nbd_request *request)
{
    int i = HANDLE_TO_INDEX(s, request->handle);
    s->recv_coroutine[i] = NULL;
    if (s->in_flight-- == MAX_NBD_REQUESTS) {
        qemu_co_mutex_unlock(&s->free_sema);
    }
}

M
Max Reitz 已提交
203
static int nbd_co_readv_1(BlockDriverState *bs, int64_t sector_num,
M
Marc-André Lureau 已提交
204 205 206
                          int nb_sectors, QEMUIOVector *qiov,
                          int offset)
{
M
Max Reitz 已提交
207
    NbdClientSession *client = nbd_get_client_session(bs);
208
    struct nbd_request request = { .type = NBD_CMD_READ };
M
Marc-André Lureau 已提交
209 210 211 212 213 214 215
    struct nbd_reply reply;
    ssize_t ret;

    request.from = sector_num * 512;
    request.len = nb_sectors * 512;

    nbd_coroutine_start(client, &request);
M
Max Reitz 已提交
216
    ret = nbd_co_send_request(bs, &request, NULL, 0);
M
Marc-André Lureau 已提交
217 218 219 220 221 222 223 224 225 226
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, qiov, offset);
    }
    nbd_coroutine_end(client, &request);
    return -reply.error;

}

M
Max Reitz 已提交
227
static int nbd_co_writev_1(BlockDriverState *bs, int64_t sector_num,
M
Marc-André Lureau 已提交
228 229 230
                           int nb_sectors, QEMUIOVector *qiov,
                           int offset)
{
M
Max Reitz 已提交
231
    NbdClientSession *client = nbd_get_client_session(bs);
232
    struct nbd_request request = { .type = NBD_CMD_WRITE };
M
Marc-André Lureau 已提交
233 234 235
    struct nbd_reply reply;
    ssize_t ret;

M
Max Reitz 已提交
236
    if (!bdrv_enable_write_cache(bs) &&
M
Marc-André Lureau 已提交
237 238 239 240 241 242 243 244
        (client->nbdflags & NBD_FLAG_SEND_FUA)) {
        request.type |= NBD_CMD_FLAG_FUA;
    }

    request.from = sector_num * 512;
    request.len = nb_sectors * 512;

    nbd_coroutine_start(client, &request);
M
Max Reitz 已提交
245
    ret = nbd_co_send_request(bs, &request, qiov, offset);
M
Marc-André Lureau 已提交
246 247 248 249 250 251 252 253 254 255 256 257 258
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
    }
    nbd_coroutine_end(client, &request);
    return -reply.error;
}

/* qemu-nbd has a limit of slightly less than 1M per request.  Try to
 * remain aligned to 4K. */
#define NBD_MAX_SECTORS 2040

M
Max Reitz 已提交
259 260
int nbd_client_co_readv(BlockDriverState *bs, int64_t sector_num,
                        int nb_sectors, QEMUIOVector *qiov)
M
Marc-André Lureau 已提交
261 262 263 264
{
    int offset = 0;
    int ret;
    while (nb_sectors > NBD_MAX_SECTORS) {
M
Max Reitz 已提交
265
        ret = nbd_co_readv_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
M
Marc-André Lureau 已提交
266 267 268 269 270 271 272
        if (ret < 0) {
            return ret;
        }
        offset += NBD_MAX_SECTORS * 512;
        sector_num += NBD_MAX_SECTORS;
        nb_sectors -= NBD_MAX_SECTORS;
    }
M
Max Reitz 已提交
273
    return nbd_co_readv_1(bs, sector_num, nb_sectors, qiov, offset);
M
Marc-André Lureau 已提交
274 275
}

M
Max Reitz 已提交
276 277
int nbd_client_co_writev(BlockDriverState *bs, int64_t sector_num,
                         int nb_sectors, QEMUIOVector *qiov)
M
Marc-André Lureau 已提交
278 279 280 281
{
    int offset = 0;
    int ret;
    while (nb_sectors > NBD_MAX_SECTORS) {
M
Max Reitz 已提交
282
        ret = nbd_co_writev_1(bs, sector_num, NBD_MAX_SECTORS, qiov, offset);
M
Marc-André Lureau 已提交
283 284 285 286 287 288 289
        if (ret < 0) {
            return ret;
        }
        offset += NBD_MAX_SECTORS * 512;
        sector_num += NBD_MAX_SECTORS;
        nb_sectors -= NBD_MAX_SECTORS;
    }
M
Max Reitz 已提交
290
    return nbd_co_writev_1(bs, sector_num, nb_sectors, qiov, offset);
M
Marc-André Lureau 已提交
291 292
}

M
Max Reitz 已提交
293
int nbd_client_co_flush(BlockDriverState *bs)
M
Marc-André Lureau 已提交
294
{
M
Max Reitz 已提交
295
    NbdClientSession *client = nbd_get_client_session(bs);
296
    struct nbd_request request = { .type = NBD_CMD_FLUSH };
M
Marc-André Lureau 已提交
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311
    struct nbd_reply reply;
    ssize_t ret;

    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
        return 0;
    }

    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
        request.type |= NBD_CMD_FLAG_FUA;
    }

    request.from = 0;
    request.len = 0;

    nbd_coroutine_start(client, &request);
M
Max Reitz 已提交
312
    ret = nbd_co_send_request(bs, &request, NULL, 0);
M
Marc-André Lureau 已提交
313 314 315 316 317 318 319 320 321
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
    }
    nbd_coroutine_end(client, &request);
    return -reply.error;
}

M
Max Reitz 已提交
322 323
int nbd_client_co_discard(BlockDriverState *bs, int64_t sector_num,
                          int nb_sectors)
M
Marc-André Lureau 已提交
324
{
M
Max Reitz 已提交
325
    NbdClientSession *client = nbd_get_client_session(bs);
326
    struct nbd_request request = { .type = NBD_CMD_TRIM };
M
Marc-André Lureau 已提交
327 328 329 330 331 332 333 334 335 336
    struct nbd_reply reply;
    ssize_t ret;

    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
        return 0;
    }
    request.from = sector_num * 512;
    request.len = nb_sectors * 512;

    nbd_coroutine_start(client, &request);
M
Max Reitz 已提交
337
    ret = nbd_co_send_request(bs, &request, NULL, 0);
M
Marc-André Lureau 已提交
338 339 340 341 342 343 344 345 346 347
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL, 0);
    }
    nbd_coroutine_end(client, &request);
    return -reply.error;

}

M
Max Reitz 已提交
348
void nbd_client_detach_aio_context(BlockDriverState *bs)
349
{
M
Max Reitz 已提交
350 351
    aio_set_fd_handler(bdrv_get_aio_context(bs),
                       nbd_get_client_session(bs)->sock, NULL, NULL, NULL);
352 353
}

M
Max Reitz 已提交
354 355
void nbd_client_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
356
{
M
Max Reitz 已提交
357 358
    aio_set_fd_handler(new_context, nbd_get_client_session(bs)->sock,
                       nbd_reply_ready, NULL, bs);
359 360
}

M
Max Reitz 已提交
361
void nbd_client_close(BlockDriverState *bs)
M
Marc-André Lureau 已提交
362
{
M
Max Reitz 已提交
363
    NbdClientSession *client = nbd_get_client_session(bs);
364 365 366 367 368
    struct nbd_request request = {
        .type = NBD_CMD_DISC,
        .from = 0,
        .len = 0
    };
M
Marc-André Lureau 已提交
369

370 371 372 373 374
    if (client->sock == -1) {
        return;
    }

    nbd_send_request(client->sock, &request);
375

M
Max Reitz 已提交
376
    nbd_teardown_connection(bs);
M
Marc-André Lureau 已提交
377 378
}

M
Max Reitz 已提交
379 380
int nbd_client_init(BlockDriverState *bs, int sock, const char *export,
                    Error **errp)
M
Marc-André Lureau 已提交
381
{
M
Max Reitz 已提交
382
    NbdClientSession *client = nbd_get_client_session(bs);
M
Marc-André Lureau 已提交
383 384 385
    int ret;

    /* NBD handshake */
386
    logout("session init %s\n", export);
387
    qemu_set_block(sock);
388
    ret = nbd_receive_negotiate(sock, export,
M
Marc-André Lureau 已提交
389
                                &client->nbdflags, &client->size,
M
Max Reitz 已提交
390
                                &client->blocksize, errp);
M
Marc-André Lureau 已提交
391 392 393 394 395 396 397 398 399 400 401 402 403
    if (ret < 0) {
        logout("Failed to negotiate with the NBD server\n");
        closesocket(sock);
        return ret;
    }

    qemu_co_mutex_init(&client->send_mutex);
    qemu_co_mutex_init(&client->free_sema);
    client->sock = sock;

    /* Now that we're connected, set the socket to be non-blocking and
     * kick the reply mechanism.  */
    qemu_set_nonblock(sock);
M
Max Reitz 已提交
404
    nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
M
Marc-André Lureau 已提交
405 406 407 408

    logout("Established connection with NBD server\n");
    return 0;
}