linux-aio.c 5.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10
/*
 * Linux native AIO support.
 *
 * Copyright (C) 2009 IBM, Corp.
 * Copyright (C) 2009 Red Hat, Inc.
 *
 * This work is licensed under the terms of the GNU GPL, version 2 or later.
 * See the COPYING file in the top-level directory.
 */
#include "qemu-common.h"
11
#include "block/aio.h"
12
#include "qemu/queue.h"
13
#include "block/raw-aio.h"
14
#include "qemu/event_notifier.h"
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33

#include <libaio.h>

/*
 * Queue size (per-device).
 *
 * XXX: eventually we need to communicate this to the guest and/or make it
 *      tunable by the guest.  If we get more outstanding requests at a time
 *      than this we will get EAGAIN from io_submit which is communicated to
 *      the guest as an I/O error.
 */
#define MAX_EVENTS 128

struct qemu_laiocb {
    BlockDriverAIOCB common;
    struct qemu_laio_state *ctx;
    struct iocb iocb;
    ssize_t ret;
    size_t nbytes;
34 35
    QEMUIOVector *qiov;
    bool is_read;
K
Kevin Wolf 已提交
36
    QLIST_ENTRY(qemu_laiocb) node;
37 38 39 40
};

struct qemu_laio_state {
    io_context_t ctx;
P
Paolo Bonzini 已提交
41
    EventNotifier e;
42 43 44 45 46 47 48
};

static inline ssize_t io_event_ret(struct io_event *ev)
{
    return (ssize_t)(((uint64_t)ev->res2 << 32) | ev->res);
}

K
Kevin Wolf 已提交
49 50 51 52 53 54 55 56 57 58
/*
 * Completes an AIO request (calls the callback and frees the ACB).
 */
static void qemu_laio_process_completion(struct qemu_laio_state *s,
    struct qemu_laiocb *laiocb)
{
    int ret;

    ret = laiocb->ret;
    if (ret != -ECANCELED) {
59
        if (ret == laiocb->nbytes) {
K
Kevin Wolf 已提交
60
            ret = 0;
61 62 63
        } else if (ret >= 0) {
            /* Short reads mean EOF, pad with zeros. */
            if (laiocb->is_read) {
64 65
                qemu_iovec_memset(laiocb->qiov, ret, 0,
                    laiocb->qiov->size - ret);
66 67 68 69
            } else {
                ret = -EINVAL;
            }
        }
K
Kevin Wolf 已提交
70 71 72 73 74 75 76

        laiocb->common.cb(laiocb->common.opaque, ret);
    }

    qemu_aio_release(laiocb);
}

P
Paolo Bonzini 已提交
77
static void qemu_laio_completion_cb(EventNotifier *e)
78
{
P
Paolo Bonzini 已提交
79
    struct qemu_laio_state *s = container_of(e, struct qemu_laio_state, e);
80

P
Paolo Bonzini 已提交
81
    while (event_notifier_test_and_clear(&s->e)) {
82 83 84 85 86
        struct io_event events[MAX_EVENTS];
        struct timespec ts = { 0 };
        int nevents, i;

        do {
P
Paolo Bonzini 已提交
87
            nevents = io_getevents(s->ctx, MAX_EVENTS, MAX_EVENTS, events, &ts);
88 89 90 91 92 93 94
        } while (nevents == -EINTR);

        for (i = 0; i < nevents; i++) {
            struct iocb *iocb = events[i].obj;
            struct qemu_laiocb *laiocb =
                    container_of(iocb, struct qemu_laiocb, iocb);

K
Kevin Wolf 已提交
95
            laiocb->ret = io_event_ret(&events[i]);
K
Kevin Wolf 已提交
96
            qemu_laio_process_completion(s, laiocb);
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127
        }
    }
}

static void laio_cancel(BlockDriverAIOCB *blockacb)
{
    struct qemu_laiocb *laiocb = (struct qemu_laiocb *)blockacb;
    struct io_event event;
    int ret;

    if (laiocb->ret != -EINPROGRESS)
        return;

    /*
     * Note that as of Linux 2.6.31 neither the block device code nor any
     * filesystem implements cancellation of AIO request.
     * Thus the polling loop below is the normal code path.
     */
    ret = io_cancel(laiocb->ctx->ctx, &laiocb->iocb, &event);
    if (ret == 0) {
        laiocb->ret = -ECANCELED;
        return;
    }

    /*
     * We have to wait for the iocb to finish.
     *
     * The only way to get the iocb status update is by polling the io context.
     * We might be able to do this slightly more optimal by removing the
     * O_NONBLOCK flag.
     */
P
Paolo Bonzini 已提交
128 129 130
    while (laiocb->ret == -EINPROGRESS) {
        qemu_laio_completion_cb(&laiocb->ctx->e);
    }
131 132
}

S
Stefan Hajnoczi 已提交
133
static const AIOCBInfo laio_aiocb_info = {
134 135 136 137 138 139 140 141 142 143 144 145 146
    .aiocb_size         = sizeof(struct qemu_laiocb),
    .cancel             = laio_cancel,
};

BlockDriverAIOCB *laio_submit(BlockDriverState *bs, void *aio_ctx, int fd,
        int64_t sector_num, QEMUIOVector *qiov, int nb_sectors,
        BlockDriverCompletionFunc *cb, void *opaque, int type)
{
    struct qemu_laio_state *s = aio_ctx;
    struct qemu_laiocb *laiocb;
    struct iocb *iocbs;
    off_t offset = sector_num * 512;

S
Stefan Hajnoczi 已提交
147
    laiocb = qemu_aio_get(&laio_aiocb_info, bs, cb, opaque);
148 149 150
    laiocb->nbytes = nb_sectors * 512;
    laiocb->ctx = s;
    laiocb->ret = -EINPROGRESS;
151 152
    laiocb->is_read = (type == QEMU_AIO_READ);
    laiocb->qiov = qiov;
153 154 155 156 157 158 159 160 161 162

    iocbs = &laiocb->iocb;

    switch (type) {
    case QEMU_AIO_WRITE:
        io_prep_pwritev(iocbs, fd, qiov->iov, qiov->niov, offset);
	break;
    case QEMU_AIO_READ:
        io_prep_preadv(iocbs, fd, qiov->iov, qiov->niov, offset);
	break;
F
Frediano Ziglio 已提交
163
    /* Currently Linux kernel does not support other operations */
164 165 166 167 168
    default:
        fprintf(stderr, "%s: invalid AIO request type 0x%x.\n",
                        __func__, type);
        goto out_free_aiocb;
    }
P
Paolo Bonzini 已提交
169
    io_set_eventfd(&laiocb->iocb, event_notifier_get_fd(&s->e));
170 171

    if (io_submit(s->ctx, 1, &iocbs) < 0)
172
        goto out_free_aiocb;
173 174
    return &laiocb->common;

175 176
out_free_aiocb:
    qemu_aio_release(laiocb);
177 178 179
    return NULL;
}

180 181 182 183 184 185 186 187 188 189 190 191 192 193
void laio_detach_aio_context(void *s_, AioContext *old_context)
{
    struct qemu_laio_state *s = s_;

    aio_set_event_notifier(old_context, &s->e, NULL);
}

void laio_attach_aio_context(void *s_, AioContext *new_context)
{
    struct qemu_laio_state *s = s_;

    aio_set_event_notifier(new_context, &s->e, qemu_laio_completion_cb);
}

194 195 196 197
void *laio_init(void)
{
    struct qemu_laio_state *s;

198
    s = g_malloc0(sizeof(*s));
P
Paolo Bonzini 已提交
199
    if (event_notifier_init(&s->e, false) < 0) {
200
        goto out_free_state;
P
Paolo Bonzini 已提交
201
    }
202

P
Paolo Bonzini 已提交
203
    if (io_setup(MAX_EVENTS, &s->ctx) != 0) {
204
        goto out_close_efd;
P
Paolo Bonzini 已提交
205
    }
206 207 208 209

    return s;

out_close_efd:
P
Paolo Bonzini 已提交
210
    event_notifier_cleanup(&s->e);
211
out_free_state:
212
    g_free(s);
213 214
    return NULL;
}