main-loop.c 13.9 KB
Newer Older
P
Paolo Bonzini 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * QEMU System Emulator
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

P
Peter Maydell 已提交
25
#include "qemu/osdep.h"
26
#include "qapi/error.h"
27
#include "qemu/cutils.h"
28
#include "qemu/timer.h"
29
#include "qemu/sockets.h"	// struct in_addr needed for libslirp.h
30
#include "sysemu/qtest.h"
31
#include "sysemu/cpus.h"
32
#include "slirp/libslirp.h"
33
#include "qemu/main-loop.h"
34
#include "block/aio.h"
P
Paolo Bonzini 已提交
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64

#ifndef _WIN32

/* If we have signalfd, we mask out the signals we want to handle and then
 * use signalfd to listen for them.  We rely on whatever the current signal
 * handler is to dispatch the signals when we receive them.
 */
static void sigfd_handler(void *opaque)
{
    int fd = (intptr_t)opaque;
    struct qemu_signalfd_siginfo info;
    struct sigaction action;
    ssize_t len;

    while (1) {
        do {
            len = read(fd, &info, sizeof(info));
        } while (len == -1 && errno == EINTR);

        if (len == -1 && errno == EAGAIN) {
            break;
        }

        if (len != sizeof(info)) {
            printf("read from sigfd returned %zd: %m\n", len);
            return;
        }

        sigaction(info.ssi_signo, NULL, &action);
        if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
65
            sigaction_invoke(&action, &info);
P
Paolo Bonzini 已提交
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
        } else if (action.sa_handler) {
            action.sa_handler(info.ssi_signo);
        }
    }
}

static int qemu_signal_init(void)
{
    int sigfd;
    sigset_t set;

    /*
     * SIG_IPI must be blocked in the main thread and must not be caught
     * by sigwait() in the signal thread. Otherwise, the cpu thread will
     * not catch it reliably.
     */
    sigemptyset(&set);
    sigaddset(&set, SIG_IPI);
    sigaddset(&set, SIGIO);
    sigaddset(&set, SIGALRM);
    sigaddset(&set, SIGBUS);
87 88 89 90 91
    /* SIGINT cannot be handled via signalfd, so that ^C can be used
     * to interrupt QEMU when it is being run under gdb.  SIGHUP and
     * SIGTERM are also handled asynchronously, even though it is not
     * strictly necessary, because they use the same handler as SIGINT.
     */
P
Paolo Bonzini 已提交
92 93
    pthread_sigmask(SIG_BLOCK, &set, NULL);

L
Lai Jiangshan 已提交
94
    sigdelset(&set, SIG_IPI);
P
Paolo Bonzini 已提交
95 96 97 98 99 100 101 102
    sigfd = qemu_signalfd(&set);
    if (sigfd == -1) {
        fprintf(stderr, "failed to create signalfd\n");
        return -errno;
    }

    fcntl_setfl(sigfd, O_NONBLOCK);

103
    qemu_set_fd_handler(sigfd, sigfd_handler, NULL, (void *)(intptr_t)sigfd);
P
Paolo Bonzini 已提交
104 105 106 107 108 109

    return 0;
}

#else /* _WIN32 */

110
static int qemu_signal_init(void)
P
Paolo Bonzini 已提交
111 112 113
{
    return 0;
}
114 115 116
#endif

static AioContext *qemu_aio_context;
117 118 119 120 121 122 123 124
static QEMUBH *qemu_notify_bh;

static void notify_event_cb(void *opaque)
{
    /* No need to do anything; this bottom half is only used to
     * kick the kernel out of ppoll/poll/WaitForMultipleObjects.
     */
}
P
Paolo Bonzini 已提交
125

126 127 128 129 130
AioContext *qemu_get_aio_context(void)
{
    return qemu_aio_context;
}

P
Paolo Bonzini 已提交
131 132
void qemu_notify_event(void)
{
133
    if (!qemu_aio_context) {
134 135
        return;
    }
136
    qemu_bh_schedule(qemu_notify_bh);
P
Paolo Bonzini 已提交
137 138
}

139 140
static GArray *gpollfds;

141
int qemu_init_main_loop(Error **errp)
P
Paolo Bonzini 已提交
142 143
{
    int ret;
144
    GSource *src;
145
    Error *local_error = NULL;
P
Paolo Bonzini 已提交
146

147
    init_clocks(qemu_timer_notify_cb);
148

P
Paolo Bonzini 已提交
149 150 151 152 153
    ret = qemu_signal_init();
    if (ret) {
        return ret;
    }

154 155 156 157 158
    qemu_aio_context = aio_context_new(&local_error);
    if (!qemu_aio_context) {
        error_propagate(errp, local_error);
        return -EMFILE;
    }
159
    qemu_notify_bh = qemu_bh_new(notify_event_cb, NULL);
160
    gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
161
    src = aio_get_g_source(qemu_aio_context);
162
    g_source_set_name(src, "aio-context");
163 164
    g_source_attach(src, NULL);
    g_source_unref(src);
F
Fam Zheng 已提交
165
    src = iohandler_get_g_source();
166
    g_source_set_name(src, "io-handler");
F
Fam Zheng 已提交
167 168
    g_source_attach(src, NULL);
    g_source_unref(src);
P
Paolo Bonzini 已提交
169 170 171 172 173
    return 0;
}

static int max_priority;

174
#ifndef _WIN32
175 176 177
static int glib_pollfds_idx;
static int glib_n_poll_fds;

178
static void glib_pollfds_fill(int64_t *cur_timeout)
P
Paolo Bonzini 已提交
179 180
{
    GMainContext *context = g_main_context_default();
181
    int timeout = 0;
182
    int64_t timeout_ns;
183
    int n;
P
Paolo Bonzini 已提交
184 185 186

    g_main_context_prepare(context, &max_priority);

187 188 189 190 191 192 193 194 195 196
    glib_pollfds_idx = gpollfds->len;
    n = glib_n_poll_fds;
    do {
        GPollFD *pfds;
        glib_n_poll_fds = n;
        g_array_set_size(gpollfds, glib_pollfds_idx + glib_n_poll_fds);
        pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
        n = g_main_context_query(context, max_priority, &timeout, pfds,
                                 glib_n_poll_fds);
    } while (n != glib_n_poll_fds);
P
Paolo Bonzini 已提交
197

198 199 200 201
    if (timeout < 0) {
        timeout_ns = -1;
    } else {
        timeout_ns = (int64_t)timeout * (int64_t)SCALE_MS;
P
Paolo Bonzini 已提交
202
    }
203 204

    *cur_timeout = qemu_soonest_timeout(timeout_ns, *cur_timeout);
P
Paolo Bonzini 已提交
205 206
}

207
static void glib_pollfds_poll(void)
P
Paolo Bonzini 已提交
208 209
{
    GMainContext *context = g_main_context_default();
210
    GPollFD *pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
P
Paolo Bonzini 已提交
211

212
    if (g_main_context_check(context, max_priority, pfds, glib_n_poll_fds)) {
P
Paolo Bonzini 已提交
213 214 215 216
        g_main_context_dispatch(context);
    }
}

217 218
#define MAX_MAIN_LOOP_SPIN (1000)

219
static int os_host_main_loop_wait(int64_t timeout)
220 221
{
    int ret;
222
    static int spin_counter;
223

224
    glib_pollfds_fill(&timeout);
225

226 227 228 229 230 231
    /* If the I/O thread is very busy or we are incorrectly busy waiting in
     * the I/O thread, this can lead to starvation of the BQL such that the
     * VCPU threads never run.  To make sure we can detect the later case,
     * print a message to the screen.  If we run into this condition, create
     * a fake timeout in order to give the VCPU threads a chance to run.
     */
232
    if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) {
233 234
        static bool notified;

235
        if (!notified && !qtest_enabled() && !qtest_driver()) {
236 237 238 239 240 241
            fprintf(stderr,
                    "main-loop: WARNING: I/O thread spun for %d iterations\n",
                    MAX_MAIN_LOOP_SPIN);
            notified = true;
        }

242
        timeout = SCALE_MS;
243 244
    }

245
    if (timeout) {
246
        spin_counter = 0;
247
        qemu_mutex_unlock_iothread();
248 249
    } else {
        spin_counter++;
250 251
    }

252
    ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
253

254
    if (timeout) {
255 256 257
        qemu_mutex_lock_iothread();
    }

258
    glib_pollfds_poll();
259 260 261
    return ret;
}
#else
P
Paolo Bonzini 已提交
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
/***********************************************************/
/* Polling handling */

typedef struct PollingEntry {
    PollingFunc *func;
    void *opaque;
    struct PollingEntry *next;
} PollingEntry;

static PollingEntry *first_polling_entry;

int qemu_add_polling_cb(PollingFunc *func, void *opaque)
{
    PollingEntry **ppe, *pe;
    pe = g_malloc0(sizeof(PollingEntry));
    pe->func = func;
    pe->opaque = opaque;
    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
    *ppe = pe;
    return 0;
}

void qemu_del_polling_cb(PollingFunc *func, void *opaque)
{
    PollingEntry **ppe, *pe;
    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
        pe = *ppe;
        if (pe->func == func && pe->opaque == opaque) {
            *ppe = pe->next;
            g_free(pe);
            break;
        }
    }
}

/***********************************************************/
/* Wait objects support */
typedef struct WaitObjects {
    int num;
301
    int revents[MAXIMUM_WAIT_OBJECTS + 1];
P
Paolo Bonzini 已提交
302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317
    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
    WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
    void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
} WaitObjects;

static WaitObjects wait_objects = {0};

int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
{
    WaitObjects *w = &wait_objects;
    if (w->num >= MAXIMUM_WAIT_OBJECTS) {
        return -1;
    }
    w->events[w->num] = handle;
    w->func[w->num] = func;
    w->opaque[w->num] = opaque;
318
    w->revents[w->num] = 0;
P
Paolo Bonzini 已提交
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
    w->num++;
    return 0;
}

void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
{
    int i, found;
    WaitObjects *w = &wait_objects;

    found = 0;
    for (i = 0; i < w->num; i++) {
        if (w->events[i] == handle) {
            found = 1;
        }
        if (found) {
            w->events[i] = w->events[i + 1];
            w->func[i] = w->func[i + 1];
            w->opaque[i] = w->opaque[i + 1];
337
            w->revents[i] = w->revents[i + 1];
P
Paolo Bonzini 已提交
338 339 340 341 342 343 344
        }
    }
    if (found) {
        w->num--;
    }
}

345 346
void qemu_fd_register(int fd)
{
347 348
    WSAEventSelect(fd, event_notifier_get_handle(&qemu_aio_context->notifier),
                   FD_READ | FD_ACCEPT | FD_CLOSE |
349 350 351
                   FD_CONNECT | FD_WRITE | FD_OOB);
}

352 353 354 355 356 357 358 359 360 361
static int pollfds_fill(GArray *pollfds, fd_set *rfds, fd_set *wfds,
                        fd_set *xfds)
{
    int nfds = -1;
    int i;

    for (i = 0; i < pollfds->len; i++) {
        GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
        int fd = pfd->fd;
        int events = pfd->events;
362
        if (events & G_IO_IN) {
363 364 365
            FD_SET(fd, rfds);
            nfds = MAX(nfds, fd);
        }
366
        if (events & G_IO_OUT) {
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388
            FD_SET(fd, wfds);
            nfds = MAX(nfds, fd);
        }
        if (events & G_IO_PRI) {
            FD_SET(fd, xfds);
            nfds = MAX(nfds, fd);
        }
    }
    return nfds;
}

static void pollfds_poll(GArray *pollfds, int nfds, fd_set *rfds,
                         fd_set *wfds, fd_set *xfds)
{
    int i;

    for (i = 0; i < pollfds->len; i++) {
        GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
        int fd = pfd->fd;
        int revents = 0;

        if (FD_ISSET(fd, rfds)) {
389
            revents |= G_IO_IN;
390 391
        }
        if (FD_ISSET(fd, wfds)) {
392
            revents |= G_IO_OUT;
393 394 395 396 397 398 399 400
        }
        if (FD_ISSET(fd, xfds)) {
            revents |= G_IO_PRI;
        }
        pfd->revents = revents & pfd->events;
    }
}

401
static int os_host_main_loop_wait(int64_t timeout)
P
Paolo Bonzini 已提交
402
{
403
    GMainContext *context = g_main_context_default();
404
    GPollFD poll_fds[1024 * 2]; /* this is probably overkill */
405
    int select_ret = 0;
406
    int g_poll_ret, ret, i, n_poll_fds;
P
Paolo Bonzini 已提交
407
    PollingEntry *pe;
408
    WaitObjects *w = &wait_objects;
409
    gint poll_timeout;
410
    int64_t poll_timeout_ns;
411
    static struct timeval tv0;
412 413
    fd_set rfds, wfds, xfds;
    int nfds;
P
Paolo Bonzini 已提交
414 415 416 417 418 419

    /* XXX: need to suppress polling by better using win32 events */
    ret = 0;
    for (pe = first_polling_entry; pe != NULL; pe = pe->next) {
        ret |= pe->func(pe->opaque);
    }
420 421 422
    if (ret != 0) {
        return ret;
    }
P
Paolo Bonzini 已提交
423

424 425 426 427 428 429 430 431 432 433 434 435 436 437
    FD_ZERO(&rfds);
    FD_ZERO(&wfds);
    FD_ZERO(&xfds);
    nfds = pollfds_fill(gpollfds, &rfds, &wfds, &xfds);
    if (nfds >= 0) {
        select_ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv0);
        if (select_ret != 0) {
            timeout = 0;
        }
        if (select_ret > 0) {
            pollfds_poll(gpollfds, nfds, &rfds, &wfds, &xfds);
        }
    }

438
    g_main_context_prepare(context, &max_priority);
439
    n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
440 441 442
                                      poll_fds, ARRAY_SIZE(poll_fds));
    g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds));

443
    for (i = 0; i < w->num; i++) {
444
        poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];
445
        poll_fds[n_poll_fds + i].events = G_IO_IN;
446 447
    }

448 449 450 451
    if (poll_timeout < 0) {
        poll_timeout_ns = -1;
    } else {
        poll_timeout_ns = (int64_t)poll_timeout * (int64_t)SCALE_MS;
452 453
    }

454 455
    poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);

456
    qemu_mutex_unlock_iothread();
457 458
    g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);

459
    qemu_mutex_lock_iothread();
460
    if (g_poll_ret > 0) {
461
        for (i = 0; i < w->num; i++) {
462
            w->revents[i] = poll_fds[n_poll_fds + i].revents;
463
        }
464 465 466
        for (i = 0; i < w->num; i++) {
            if (w->revents[i] && w->func[i]) {
                w->func[i](w->opaque[i]);
P
Paolo Bonzini 已提交
467 468 469 470
            }
        }
    }

471 472 473 474
    if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) {
        g_main_context_dispatch(context);
    }

475
    return select_ret || g_poll_ret;
P
Paolo Bonzini 已提交
476 477 478 479 480
}
#endif

int main_loop_wait(int nonblocking)
{
481 482
    int ret;
    uint32_t timeout = UINT32_MAX;
483
    int64_t timeout_ns;
P
Paolo Bonzini 已提交
484 485 486 487 488 489

    if (nonblocking) {
        timeout = 0;
    }

    /* poll any events */
490
    g_array_set_size(gpollfds, 0); /* reset for new iteration */
P
Paolo Bonzini 已提交
491 492
    /* XXX: separate device handlers from system ones */
#ifdef CONFIG_SLIRP
493
    slirp_pollfds_fill(gpollfds, &timeout);
P
Paolo Bonzini 已提交
494
#endif
495 496 497 498 499 500 501 502 503 504 505 506

    if (timeout == UINT32_MAX) {
        timeout_ns = -1;
    } else {
        timeout_ns = (uint64_t)timeout * (int64_t)(SCALE_MS);
    }

    timeout_ns = qemu_soonest_timeout(timeout_ns,
                                      timerlistgroup_deadline_ns(
                                          &main_loop_tlg));

    ret = os_host_main_loop_wait(timeout_ns);
P
Paolo Bonzini 已提交
507
#ifdef CONFIG_SLIRP
S
Stefan Hajnoczi 已提交
508
    slirp_pollfds_poll(gpollfds, (ret < 0));
P
Paolo Bonzini 已提交
509 510
#endif

511 512
    /* CPU thread can infinitely wait for event after
       missing the warp */
P
Pavel Dovgalyuk 已提交
513
    qemu_start_warp_timer();
514
    qemu_clock_run_all_timers();
P
Paolo Bonzini 已提交
515 516 517

    return ret;
}
518 519 520 521 522 523 524

/* Functions to operate on the main QEMU AioContext.  */

QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
{
    return aio_bh_new(qemu_aio_context, cb, opaque);
}