nbd-client.c 12.1 KB
Newer Older
M
Marc-André Lureau 已提交
1 2 3
/*
 * QEMU Block driver for  NBD
 *
4
 * Copyright (C) 2016 Red Hat, Inc.
M
Marc-André Lureau 已提交
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
 * Copyright (C) 2008 Bull S.A.S.
 *     Author: Laurent Vivier <Laurent.Vivier@bull.net>
 *
 * Some parts:
 *    Copyright (C) 2007 Anthony Liguori <anthony@codemonkey.ws>
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

P
Peter Maydell 已提交
30
#include "qemu/osdep.h"
31
#include "qapi/error.h"
M
Marc-André Lureau 已提交
32 33 34 35 36
#include "nbd-client.h"

#define HANDLE_TO_INDEX(bs, handle) ((handle) ^ ((uint64_t)(intptr_t)bs))
#define INDEX_TO_HANDLE(bs, index)  ((index)  ^ ((uint64_t)(intptr_t)bs))

37
static void nbd_recv_coroutines_enter_all(NBDClientSession *s)
38 39 40 41 42
{
    int i;

    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
        if (s->recv_coroutine[i]) {
43
            aio_co_wake(s->recv_coroutine[i]);
44 45 46 47
        }
    }
}

M
Max Reitz 已提交
48
static void nbd_teardown_connection(BlockDriverState *bs)
49
{
50
    NBDClientSession *client = nbd_get_client_session(bs);
M
Max Reitz 已提交
51

52 53 54 55
    if (!client->ioc) { /* Already closed */
        return;
    }

56
    /* finish any pending coroutines */
57 58 59
    qio_channel_shutdown(client->ioc,
                         QIO_CHANNEL_SHUTDOWN_BOTH,
                         NULL);
60
    BDRV_POLL_WHILE(bs, client->read_reply_co);
61

M
Max Reitz 已提交
62
    nbd_client_detach_aio_context(bs);
63 64 65 66
    object_unref(OBJECT(client->sioc));
    client->sioc = NULL;
    object_unref(OBJECT(client->ioc));
    client->ioc = NULL;
67 68
}

69
static coroutine_fn void nbd_read_reply_entry(void *opaque)
M
Marc-André Lureau 已提交
70
{
71
    NBDClientSession *s = opaque;
M
Marc-André Lureau 已提交
72 73
    uint64_t i;
    int ret;
74
    Error *local_err = NULL;
M
Marc-André Lureau 已提交
75

76 77
    for (;;) {
        assert(s->reply.handle == 0);
78 79 80 81
        ret = nbd_receive_reply(s->ioc, &s->reply, &local_err);
        if (ret < 0) {
            error_report_err(local_err);
        }
82
        if (ret <= 0) {
83
            break;
M
Marc-André Lureau 已提交
84 85
        }

86 87 88 89 90 91 92 93
        /* There's no need for a mutex on the receive side, because the
         * handler acts as a synchronization point and ensures that only
         * one coroutine is called until the reply finishes.
         */
        i = HANDLE_TO_INDEX(s, s->reply.handle);
        if (i >= MAX_NBD_REQUESTS || !s->recv_coroutine[i]) {
            break;
        }
M
Marc-André Lureau 已提交
94

95 96 97 98 99 100 101 102 103 104 105 106 107
        /* We're woken up by the recv_coroutine itself.  Note that there
         * is no race between yielding and reentering read_reply_co.  This
         * is because:
         *
         * - if recv_coroutine[i] runs on the same AioContext, it is only
         *   entered after we yield
         *
         * - if recv_coroutine[i] runs on a different AioContext, reentering
         *   read_reply_co happens through a bottom half, which can only
         *   run after we yield.
         */
        aio_co_wake(s->recv_coroutine[i]);
        qemu_coroutine_yield();
M
Marc-André Lureau 已提交
108
    }
109 110

    nbd_recv_coroutines_enter_all(s);
111
    s->read_reply_co = NULL;
M
Marc-André Lureau 已提交
112 113
}

M
Max Reitz 已提交
114
static int nbd_co_send_request(BlockDriverState *bs,
115
                               NBDRequest *request,
E
Eric Blake 已提交
116
                               QEMUIOVector *qiov)
M
Marc-André Lureau 已提交
117
{
118
    NBDClientSession *s = nbd_get_client_session(bs);
B
Bin Wu 已提交
119
    int rc, ret, i;
M
Marc-André Lureau 已提交
120 121

    qemu_co_mutex_lock(&s->send_mutex);
122 123 124 125
    while (s->in_flight == MAX_NBD_REQUESTS) {
        qemu_co_queue_wait(&s->free_sema, &s->send_mutex);
    }
    s->in_flight++;
B
Bin Wu 已提交
126 127 128 129 130 131 132 133

    for (i = 0; i < MAX_NBD_REQUESTS; i++) {
        if (s->recv_coroutine[i] == NULL) {
            s->recv_coroutine[i] = qemu_coroutine_self();
            break;
        }
    }

134
    g_assert(qemu_in_coroutine());
B
Bin Wu 已提交
135 136
    assert(i < MAX_NBD_REQUESTS);
    request->handle = INDEX_TO_HANDLE(s, i);
137 138 139 140 141 142

    if (!s->ioc) {
        qemu_co_mutex_unlock(&s->send_mutex);
        return -EPIPE;
    }

M
Marc-André Lureau 已提交
143
    if (qiov) {
144
        qio_channel_set_cork(s->ioc, true);
145
        rc = nbd_send_request(s->ioc, request);
M
Marc-André Lureau 已提交
146
        if (rc >= 0) {
147 148
            ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, false,
                          NULL);
M
Marc-André Lureau 已提交
149 150 151 152
            if (ret != request->len) {
                rc = -EIO;
            }
        }
153
        qio_channel_set_cork(s->ioc, false);
M
Marc-André Lureau 已提交
154
    } else {
155
        rc = nbd_send_request(s->ioc, request);
M
Marc-André Lureau 已提交
156 157 158 159 160
    }
    qemu_co_mutex_unlock(&s->send_mutex);
    return rc;
}

161
static void nbd_co_receive_reply(NBDClientSession *s,
162 163
                                 NBDRequest *request,
                                 NBDReply *reply,
E
Eric Blake 已提交
164
                                 QEMUIOVector *qiov)
M
Marc-André Lureau 已提交
165 166 167
{
    int ret;

168
    /* Wait until we're woken up by nbd_read_reply_entry.  */
M
Marc-André Lureau 已提交
169 170
    qemu_coroutine_yield();
    *reply = s->reply;
171 172
    if (reply->handle != request->handle ||
        !s->ioc) {
M
Marc-André Lureau 已提交
173 174 175
        reply->error = EIO;
    } else {
        if (qiov && reply->error == 0) {
176 177
            ret = nbd_rwv(s->ioc, qiov->iov, qiov->niov, request->len, true,
                          NULL);
M
Marc-André Lureau 已提交
178 179 180 181 182 183 184 185 186 187
            if (ret != request->len) {
                reply->error = EIO;
            }
        }

        /* Tell the read handler to read another header.  */
        s->reply.handle = 0;
    }
}

188
static void nbd_coroutine_end(BlockDriverState *bs,
189
                              NBDRequest *request)
M
Marc-André Lureau 已提交
190
{
191
    NBDClientSession *s = nbd_get_client_session(bs);
M
Marc-André Lureau 已提交
192
    int i = HANDLE_TO_INDEX(s, request->handle);
193

M
Marc-André Lureau 已提交
194
    s->recv_coroutine[i] = NULL;
195 196 197 198

    /* Kick the read_reply_co to get the next reply.  */
    if (s->read_reply_co) {
        aio_co_wake(s->read_reply_co);
M
Marc-André Lureau 已提交
199
    }
200 201 202 203 204

    qemu_co_mutex_lock(&s->send_mutex);
    s->in_flight--;
    qemu_co_queue_next(&s->free_sema);
    qemu_co_mutex_unlock(&s->send_mutex);
M
Marc-André Lureau 已提交
205 206
}

207 208
int nbd_client_co_preadv(BlockDriverState *bs, uint64_t offset,
                         uint64_t bytes, QEMUIOVector *qiov, int flags)
M
Marc-André Lureau 已提交
209
{
210
    NBDClientSession *client = nbd_get_client_session(bs);
211
    NBDRequest request = {
212 213 214 215
        .type = NBD_CMD_READ,
        .from = offset,
        .len = bytes,
    };
216
    NBDReply reply;
M
Marc-André Lureau 已提交
217 218
    ssize_t ret;

219 220
    assert(bytes <= NBD_MAX_BUFFER_SIZE);
    assert(!flags);
M
Marc-André Lureau 已提交
221

E
Eric Blake 已提交
222
    ret = nbd_co_send_request(bs, &request, NULL);
M
Marc-André Lureau 已提交
223 224 225
    if (ret < 0) {
        reply.error = -ret;
    } else {
E
Eric Blake 已提交
226
        nbd_co_receive_reply(client, &request, &reply, qiov);
M
Marc-André Lureau 已提交
227
    }
228
    nbd_coroutine_end(bs, &request);
M
Marc-André Lureau 已提交
229 230 231
    return -reply.error;
}

232 233
int nbd_client_co_pwritev(BlockDriverState *bs, uint64_t offset,
                          uint64_t bytes, QEMUIOVector *qiov, int flags)
M
Marc-André Lureau 已提交
234
{
235
    NBDClientSession *client = nbd_get_client_session(bs);
236
    NBDRequest request = {
237 238 239 240
        .type = NBD_CMD_WRITE,
        .from = offset,
        .len = bytes,
    };
241
    NBDReply reply;
M
Marc-André Lureau 已提交
242 243
    ssize_t ret;

E
Eric Blake 已提交
244 245
    if (flags & BDRV_REQ_FUA) {
        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
246
        request.flags |= NBD_CMD_FLAG_FUA;
M
Marc-André Lureau 已提交
247 248
    }

249
    assert(bytes <= NBD_MAX_BUFFER_SIZE);
M
Marc-André Lureau 已提交
250

E
Eric Blake 已提交
251
    ret = nbd_co_send_request(bs, &request, qiov);
M
Marc-André Lureau 已提交
252 253 254
    if (ret < 0) {
        reply.error = -ret;
    } else {
E
Eric Blake 已提交
255
        nbd_co_receive_reply(client, &request, &reply, NULL);
M
Marc-André Lureau 已提交
256
    }
257
    nbd_coroutine_end(bs, &request);
M
Marc-André Lureau 已提交
258 259 260
    return -reply.error;
}

261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290
int nbd_client_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset,
                                int count, BdrvRequestFlags flags)
{
    ssize_t ret;
    NBDClientSession *client = nbd_get_client_session(bs);
    NBDRequest request = {
        .type = NBD_CMD_WRITE_ZEROES,
        .from = offset,
        .len = count,
    };
    NBDReply reply;

    if (!(client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES)) {
        return -ENOTSUP;
    }

    if (flags & BDRV_REQ_FUA) {
        assert(client->nbdflags & NBD_FLAG_SEND_FUA);
        request.flags |= NBD_CMD_FLAG_FUA;
    }
    if (!(flags & BDRV_REQ_MAY_UNMAP)) {
        request.flags |= NBD_CMD_FLAG_NO_HOLE;
    }

    ret = nbd_co_send_request(bs, &request, NULL);
    if (ret < 0) {
        reply.error = -ret;
    } else {
        nbd_co_receive_reply(client, &request, &reply, NULL);
    }
291
    nbd_coroutine_end(bs, &request);
292 293 294
    return -reply.error;
}

M
Max Reitz 已提交
295
int nbd_client_co_flush(BlockDriverState *bs)
M
Marc-André Lureau 已提交
296
{
297
    NBDClientSession *client = nbd_get_client_session(bs);
298 299
    NBDRequest request = { .type = NBD_CMD_FLUSH };
    NBDReply reply;
M
Marc-André Lureau 已提交
300 301 302 303 304 305 306 307 308
    ssize_t ret;

    if (!(client->nbdflags & NBD_FLAG_SEND_FLUSH)) {
        return 0;
    }

    request.from = 0;
    request.len = 0;

E
Eric Blake 已提交
309
    ret = nbd_co_send_request(bs, &request, NULL);
M
Marc-André Lureau 已提交
310 311 312
    if (ret < 0) {
        reply.error = -ret;
    } else {
E
Eric Blake 已提交
313
        nbd_co_receive_reply(client, &request, &reply, NULL);
M
Marc-André Lureau 已提交
314
    }
315
    nbd_coroutine_end(bs, &request);
M
Marc-André Lureau 已提交
316 317 318
    return -reply.error;
}

319
int nbd_client_co_pdiscard(BlockDriverState *bs, int64_t offset, int count)
M
Marc-André Lureau 已提交
320
{
321
    NBDClientSession *client = nbd_get_client_session(bs);
322
    NBDRequest request = {
323 324 325 326
        .type = NBD_CMD_TRIM,
        .from = offset,
        .len = count,
    };
327
    NBDReply reply;
M
Marc-André Lureau 已提交
328 329 330 331 332 333
    ssize_t ret;

    if (!(client->nbdflags & NBD_FLAG_SEND_TRIM)) {
        return 0;
    }

E
Eric Blake 已提交
334
    ret = nbd_co_send_request(bs, &request, NULL);
M
Marc-André Lureau 已提交
335 336 337
    if (ret < 0) {
        reply.error = -ret;
    } else {
E
Eric Blake 已提交
338
        nbd_co_receive_reply(client, &request, &reply, NULL);
M
Marc-André Lureau 已提交
339
    }
340
    nbd_coroutine_end(bs, &request);
M
Marc-André Lureau 已提交
341 342 343 344
    return -reply.error;

}

M
Max Reitz 已提交
345
void nbd_client_detach_aio_context(BlockDriverState *bs)
346
{
347 348
    NBDClientSession *client = nbd_get_client_session(bs);
    qio_channel_detach_aio_context(QIO_CHANNEL(client->sioc));
349 350
}

M
Max Reitz 已提交
351 352
void nbd_client_attach_aio_context(BlockDriverState *bs,
                                   AioContext *new_context)
353
{
354 355 356
    NBDClientSession *client = nbd_get_client_session(bs);
    qio_channel_attach_aio_context(QIO_CHANNEL(client->sioc), new_context);
    aio_co_schedule(new_context, client->read_reply_co);
357 358
}

M
Max Reitz 已提交
359
void nbd_client_close(BlockDriverState *bs)
M
Marc-André Lureau 已提交
360
{
361
    NBDClientSession *client = nbd_get_client_session(bs);
362
    NBDRequest request = { .type = NBD_CMD_DISC };
M
Marc-André Lureau 已提交
363

364
    if (client->ioc == NULL) {
365 366 367
        return;
    }

368
    nbd_send_request(client->ioc, &request);
369

M
Max Reitz 已提交
370
    nbd_teardown_connection(bs);
M
Marc-André Lureau 已提交
371 372
}

373 374 375 376 377 378
int nbd_client_init(BlockDriverState *bs,
                    QIOChannelSocket *sioc,
                    const char *export,
                    QCryptoTLSCreds *tlscreds,
                    const char *hostname,
                    Error **errp)
M
Marc-André Lureau 已提交
379
{
380
    NBDClientSession *client = nbd_get_client_session(bs);
M
Marc-André Lureau 已提交
381 382 383
    int ret;

    /* NBD handshake */
384
    logout("session init %s\n", export);
385 386
    qio_channel_set_blocking(QIO_CHANNEL(sioc), true, NULL);

387
    ret = nbd_receive_negotiate(QIO_CHANNEL(sioc), export,
388
                                &client->nbdflags,
389
                                tlscreds, hostname,
390 391
                                &client->ioc,
                                &client->size, errp);
M
Marc-André Lureau 已提交
392 393 394 395
    if (ret < 0) {
        logout("Failed to negotiate with the NBD server\n");
        return ret;
    }
396 397
    if (client->nbdflags & NBD_FLAG_SEND_FUA) {
        bs->supported_write_flags = BDRV_REQ_FUA;
398 399 400 401
        bs->supported_zero_flags |= BDRV_REQ_FUA;
    }
    if (client->nbdflags & NBD_FLAG_SEND_WRITE_ZEROES) {
        bs->supported_zero_flags |= BDRV_REQ_MAY_UNMAP;
402
    }
M
Marc-André Lureau 已提交
403 404

    qemu_co_mutex_init(&client->send_mutex);
405
    qemu_co_queue_init(&client->free_sema);
406 407
    client->sioc = sioc;
    object_ref(OBJECT(client->sioc));
408 409 410 411 412

    if (!client->ioc) {
        client->ioc = QIO_CHANNEL(sioc);
        object_ref(OBJECT(client->ioc));
    }
M
Marc-André Lureau 已提交
413 414 415

    /* Now that we're connected, set the socket to be non-blocking and
     * kick the reply mechanism.  */
416
    qio_channel_set_blocking(QIO_CHANNEL(sioc), false, NULL);
417
    client->read_reply_co = qemu_coroutine_create(nbd_read_reply_entry, client);
M
Max Reitz 已提交
418
    nbd_client_attach_aio_context(bs, bdrv_get_aio_context(bs));
M
Marc-André Lureau 已提交
419 420 421 422

    logout("Established connection with NBD server\n");
    return 0;
}