async.c 10.5 KB
Newer Older
K
Kevin Wolf 已提交
1
/*
2
 * Data plane event loop
K
Kevin Wolf 已提交
3 4
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
5
 * Copyright (c) 2009-2017 QEMU contributors
K
Kevin Wolf 已提交
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

P
Peter Maydell 已提交
26
#include "qemu/osdep.h"
27
#include "qapi/error.h"
K
Kevin Wolf 已提交
28
#include "qemu-common.h"
29
#include "block/aio.h"
30
#include "block/thread-pool.h"
31
#include "qemu/main-loop.h"
P
Paolo Bonzini 已提交
32
#include "qemu/atomic.h"
33
#include "block/raw-aio.h"
34

K
Kevin Wolf 已提交
35 36 37 38
/***********************************************************/
/* bottom halves (can be seen as timers which expire ASAP) */

struct QEMUBH {
P
Paolo Bonzini 已提交
39
    AioContext *ctx;
K
Kevin Wolf 已提交
40 41 42
    QEMUBHFunc *cb;
    void *opaque;
    QEMUBH *next;
43 44 45
    bool scheduled;
    bool idle;
    bool deleted;
K
Kevin Wolf 已提交
46 47
};

P
Paolo Bonzini 已提交
48 49 50 51 52 53 54 55 56
void aio_bh_schedule_oneshot(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
{
    QEMUBH *bh;
    bh = g_new(QEMUBH, 1);
    *bh = (QEMUBH){
        .ctx = ctx,
        .cb = cb,
        .opaque = opaque,
    };
57
    qemu_lockcnt_lock(&ctx->list_lock);
P
Paolo Bonzini 已提交
58 59 60 61 62 63
    bh->next = ctx->first_bh;
    bh->scheduled = 1;
    bh->deleted = 1;
    /* Make sure that the members are ready before putting bh into list */
    smp_wmb();
    ctx->first_bh = bh;
64
    qemu_lockcnt_unlock(&ctx->list_lock);
65
    aio_notify(ctx);
P
Paolo Bonzini 已提交
66 67
}

68
QEMUBH *aio_bh_new(AioContext *ctx, QEMUBHFunc *cb, void *opaque)
K
Kevin Wolf 已提交
69 70
{
    QEMUBH *bh;
71 72 73 74 75 76
    bh = g_new(QEMUBH, 1);
    *bh = (QEMUBH){
        .ctx = ctx,
        .cb = cb,
        .opaque = opaque,
    };
77
    qemu_lockcnt_lock(&ctx->list_lock);
78
    bh->next = ctx->first_bh;
79 80
    /* Make sure that the members are ready before putting bh into list */
    smp_wmb();
81
    ctx->first_bh = bh;
82
    qemu_lockcnt_unlock(&ctx->list_lock);
K
Kevin Wolf 已提交
83 84 85
    return bh;
}

86 87 88 89 90
void aio_bh_call(QEMUBH *bh)
{
    bh->cb(bh->opaque);
}

91
/* Multiple occurrences of aio_bh_poll cannot be called concurrently */
92
int aio_bh_poll(AioContext *ctx)
K
Kevin Wolf 已提交
93
{
94
    QEMUBH *bh, **bhp, *next;
K
Kevin Wolf 已提交
95
    int ret;
P
Paolo Bonzini 已提交
96
    bool deleted = false;
97

98
    qemu_lockcnt_inc(&ctx->list_lock);
K
Kevin Wolf 已提交
99 100

    ret = 0;
101 102
    for (bh = atomic_rcu_read(&ctx->first_bh); bh; bh = next) {
        next = atomic_rcu_read(&bh->next);
103 104 105 106 107 108
        /* The atomic_xchg is paired with the one in qemu_bh_schedule.  The
         * implicit memory barrier ensures that the callback sees all writes
         * done by the scheduling thread.  It also ensures that the scheduling
         * thread sees the zero before bh->cb has run, and thus will call
         * aio_notify again if necessary.
         */
P
Paolo Bonzini 已提交
109
        if (atomic_xchg(&bh->scheduled, 0)) {
110 111
            /* Idle BHs don't count as progress */
            if (!bh->idle) {
K
Kevin Wolf 已提交
112
                ret = 1;
113
            }
K
Kevin Wolf 已提交
114
            bh->idle = 0;
115
            aio_bh_call(bh);
K
Kevin Wolf 已提交
116
        }
P
Paolo Bonzini 已提交
117 118 119
        if (bh->deleted) {
            deleted = true;
        }
K
Kevin Wolf 已提交
120 121 122
    }

    /* remove deleted bhs */
P
Paolo Bonzini 已提交
123 124 125 126 127
    if (!deleted) {
        qemu_lockcnt_dec(&ctx->list_lock);
        return ret;
    }

128
    if (qemu_lockcnt_dec_and_lock(&ctx->list_lock)) {
129
        bhp = &ctx->first_bh;
130 131
        while (*bhp) {
            bh = *bhp;
P
Paolo Bonzini 已提交
132
            if (bh->deleted && !bh->scheduled) {
133 134 135 136 137 138
                *bhp = bh->next;
                g_free(bh);
            } else {
                bhp = &bh->next;
            }
        }
139
        qemu_lockcnt_unlock(&ctx->list_lock);
K
Kevin Wolf 已提交
140 141 142 143 144 145 146
    }
    return ret;
}

void qemu_bh_schedule_idle(QEMUBH *bh)
{
    bh->idle = 1;
147 148 149
    /* Make sure that idle & any writes needed by the callback are done
     * before the locations are read in the aio_bh_poll.
     */
150
    atomic_mb_set(&bh->scheduled, 1);
K
Kevin Wolf 已提交
151 152 153 154
}

void qemu_bh_schedule(QEMUBH *bh)
{
155 156 157
    AioContext *ctx;

    ctx = bh->ctx;
K
Kevin Wolf 已提交
158
    bh->idle = 0;
159
    /* The memory barrier implicit in atomic_xchg makes sure that:
160 161 162 163
     * 1. idle & any writes needed by the callback are done before the
     *    locations are read in the aio_bh_poll.
     * 2. ctx is loaded before scheduled is set and the callback has a chance
     *    to execute.
164
     */
165 166 167
    if (atomic_xchg(&bh->scheduled, 1) == 0) {
        aio_notify(ctx);
    }
K
Kevin Wolf 已提交
168 169
}

170 171 172

/* This func is async.
 */
K
Kevin Wolf 已提交
173 174 175 176 177
void qemu_bh_cancel(QEMUBH *bh)
{
    bh->scheduled = 0;
}

178 179 180
/* This func is async.The bottom half will do the delete action at the finial
 * end.
 */
K
Kevin Wolf 已提交
181 182 183 184 185 186
void qemu_bh_delete(QEMUBH *bh)
{
    bh->scheduled = 0;
    bh->deleted = 1;
}

187 188
int64_t
aio_compute_timeout(AioContext *ctx)
K
Kevin Wolf 已提交
189
{
190 191
    int64_t deadline;
    int timeout = -1;
K
Kevin Wolf 已提交
192 193
    QEMUBH *bh;

194 195
    for (bh = atomic_rcu_read(&ctx->first_bh); bh;
         bh = atomic_rcu_read(&bh->next)) {
P
Paolo Bonzini 已提交
196
        if (bh->scheduled) {
K
Kevin Wolf 已提交
197 198 199
            if (bh->idle) {
                /* idle bottom halves will be polled at least
                 * every 10ms */
200
                timeout = 10000000;
K
Kevin Wolf 已提交
201 202 203
            } else {
                /* non-idle bottom halves will be executed
                 * immediately */
204
                return 0;
K
Kevin Wolf 已提交
205 206 207
            }
        }
    }
P
Paolo Bonzini 已提交
208

209
    deadline = timerlistgroup_deadline_ns(&ctx->tlg);
210
    if (deadline == 0) {
211
        return 0;
212
    } else {
213
        return qemu_soonest_timeout(timeout, deadline);
214
    }
215
}
216

217 218 219 220 221
static gboolean
aio_ctx_prepare(GSource *source, gint    *timeout)
{
    AioContext *ctx = (AioContext *) source;

222 223
    atomic_or(&ctx->notify_me, 1);

224 225
    /* We assume there is no timeout already supplied */
    *timeout = qemu_timeout_ns_to_ms(aio_compute_timeout(ctx));
226 227 228 229 230

    if (aio_prepare(ctx)) {
        *timeout = 0;
    }

231
    return *timeout == 0;
P
Paolo Bonzini 已提交
232 233 234 235 236 237 238 239
}

static gboolean
aio_ctx_check(GSource *source)
{
    AioContext *ctx = (AioContext *) source;
    QEMUBH *bh;

240
    atomic_and(&ctx->notify_me, ~1);
241
    aio_notify_accept(ctx);
242

P
Paolo Bonzini 已提交
243
    for (bh = ctx->first_bh; bh; bh = bh->next) {
P
Paolo Bonzini 已提交
244
        if (bh->scheduled) {
P
Paolo Bonzini 已提交
245
            return true;
C
Cao jin 已提交
246
        }
P
Paolo Bonzini 已提交
247
    }
248
    return aio_pending(ctx) || (timerlistgroup_deadline_ns(&ctx->tlg) == 0);
P
Paolo Bonzini 已提交
249 250 251 252 253 254 255 256 257 258
}

static gboolean
aio_ctx_dispatch(GSource     *source,
                 GSourceFunc  callback,
                 gpointer     user_data)
{
    AioContext *ctx = (AioContext *) source;

    assert(callback == NULL);
259
    aio_dispatch(ctx, true);
P
Paolo Bonzini 已提交
260 261 262
    return true;
}

P
Paolo Bonzini 已提交
263 264 265 266 267
static void
aio_ctx_finalize(GSource     *source)
{
    AioContext *ctx = (AioContext *) source;

268
    thread_pool_free(ctx->thread_pool);
269

270 271 272 273 274 275 276 277
#ifdef CONFIG_LINUX_AIO
    if (ctx->linux_aio) {
        laio_detach_aio_context(ctx->linux_aio, ctx);
        laio_cleanup(ctx->linux_aio);
        ctx->linux_aio = NULL;
    }
#endif

278 279
    qemu_lockcnt_lock(&ctx->list_lock);
    assert(!qemu_lockcnt_count(&ctx->list_lock));
280 281 282 283 284 285 286 287 288
    while (ctx->first_bh) {
        QEMUBH *next = ctx->first_bh->next;

        /* qemu_bh_delete() must have been called on BHs in this AioContext */
        assert(ctx->first_bh->deleted);

        g_free(ctx->first_bh);
        ctx->first_bh = next;
    }
289
    qemu_lockcnt_unlock(&ctx->list_lock);
290

291
    aio_set_event_notifier(ctx, &ctx->notifier, false, NULL, NULL);
P
Paolo Bonzini 已提交
292
    event_notifier_cleanup(&ctx->notifier);
293
    qemu_rec_mutex_destroy(&ctx->lock);
294
    qemu_lockcnt_destroy(&ctx->list_lock);
295
    timerlistgroup_deinit(&ctx->tlg);
P
Paolo Bonzini 已提交
296 297
}

P
Paolo Bonzini 已提交
298 299 300 301
static GSourceFuncs aio_source_funcs = {
    aio_ctx_prepare,
    aio_ctx_check,
    aio_ctx_dispatch,
P
Paolo Bonzini 已提交
302
    aio_ctx_finalize
P
Paolo Bonzini 已提交
303 304 305 306 307 308 309
};

GSource *aio_get_g_source(AioContext *ctx)
{
    g_source_ref(&ctx->source);
    return &ctx->source;
}
310

311 312 313 314 315 316 317 318
ThreadPool *aio_get_thread_pool(AioContext *ctx)
{
    if (!ctx->thread_pool) {
        ctx->thread_pool = thread_pool_new(ctx);
    }
    return ctx->thread_pool;
}

319 320 321 322 323 324 325 326 327 328 329
#ifdef CONFIG_LINUX_AIO
LinuxAioState *aio_get_linux_aio(AioContext *ctx)
{
    if (!ctx->linux_aio) {
        ctx->linux_aio = laio_init();
        laio_attach_aio_context(ctx->linux_aio, ctx);
    }
    return ctx->linux_aio;
}
#endif

P
Paolo Bonzini 已提交
330 331
void aio_notify(AioContext *ctx)
{
332 333 334
    /* Write e.g. bh->scheduled before reading ctx->notify_me.  Pairs
     * with atomic_or in aio_ctx_prepare or atomic_add in aio_poll.
     */
P
Paolo Bonzini 已提交
335
    smp_mb();
336
    if (ctx->notify_me) {
P
Paolo Bonzini 已提交
337
        event_notifier_set(&ctx->notifier);
338 339 340 341 342 343 344 345
        atomic_mb_set(&ctx->notified, true);
    }
}

void aio_notify_accept(AioContext *ctx)
{
    if (atomic_xchg(&ctx->notified, false)) {
        event_notifier_test_and_clear(&ctx->notifier);
P
Paolo Bonzini 已提交
346
    }
P
Paolo Bonzini 已提交
347 348
}

349 350 351 352 353
static void aio_timerlist_notify(void *opaque)
{
    aio_notify(opaque);
}

354 355 356 357
static void event_notifier_dummy_cb(EventNotifier *e)
{
}

358 359 360 361 362 363 364 365 366
/* Returns true if aio_notify() was called (e.g. a BH was scheduled) */
static bool event_notifier_poll(void *opaque)
{
    EventNotifier *e = opaque;
    AioContext *ctx = container_of(e, AioContext, notifier);

    return atomic_read(&ctx->notified);
}

367
AioContext *aio_context_new(Error **errp)
368
{
369
    int ret;
P
Paolo Bonzini 已提交
370
    AioContext *ctx;
F
Fam Zheng 已提交
371

P
Paolo Bonzini 已提交
372
    ctx = (AioContext *) g_source_new(&aio_source_funcs, sizeof(AioContext));
C
Cao jin 已提交
373 374
    aio_context_setup(ctx);

375 376 377
    ret = event_notifier_init(&ctx->notifier, false);
    if (ret < 0) {
        error_setg_errno(errp, -ret, "Failed to initialize event notifier");
F
Fam Zheng 已提交
378
        goto fail;
379
    }
380
    g_source_set_can_recurse(&ctx->source, true);
381
    qemu_lockcnt_init(&ctx->list_lock);
382
    aio_set_event_notifier(ctx, &ctx->notifier,
383
                           false,
384
                           (EventNotifierHandler *)
385
                           event_notifier_dummy_cb,
386
                           event_notifier_poll);
387 388 389
#ifdef CONFIG_LINUX_AIO
    ctx->linux_aio = NULL;
#endif
390
    ctx->thread_pool = NULL;
391
    qemu_rec_mutex_init(&ctx->lock);
392
    timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
P
Paolo Bonzini 已提交
393

S
Stefan Hajnoczi 已提交
394
    ctx->poll_ns = 0;
395
    ctx->poll_max_ns = 0;
S
Stefan Hajnoczi 已提交
396 397
    ctx->poll_grow = 0;
    ctx->poll_shrink = 0;
398

P
Paolo Bonzini 已提交
399
    return ctx;
F
Fam Zheng 已提交
400 401 402
fail:
    g_source_destroy(&ctx->source);
    return NULL;
P
Paolo Bonzini 已提交
403 404 405 406 407 408 409 410 411 412
}

void aio_context_ref(AioContext *ctx)
{
    g_source_ref(&ctx->source);
}

void aio_context_unref(AioContext *ctx)
{
    g_source_unref(&ctx->source);
413
}
414 415 416

void aio_context_acquire(AioContext *ctx)
{
417
    qemu_rec_mutex_lock(&ctx->lock);
418 419 420 421
}

void aio_context_release(AioContext *ctx)
{
422
    qemu_rec_mutex_unlock(&ctx->lock);
423
}