main-loop.c 13.9 KB
Newer Older
P
Paolo Bonzini 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * QEMU System Emulator
 *
 * Copyright (c) 2003-2008 Fabrice Bellard
 *
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 * THE SOFTWARE.
 */

P
Peter Maydell 已提交
25
#include "qemu/osdep.h"
26
#include "qapi/error.h"
27
#include "qemu/cutils.h"
28
#include "qemu/timer.h"
29
#include "qemu/sockets.h"	// struct in_addr needed for libslirp.h
30
#include "sysemu/qtest.h"
31
#include "slirp/libslirp.h"
32
#include "qemu/main-loop.h"
33
#include "block/aio.h"
P
Paolo Bonzini 已提交
34 35 36

#ifndef _WIN32

37
#include "qemu/compatfd.h"
S
Stefan Weil 已提交
38

P
Paolo Bonzini 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
/* If we have signalfd, we mask out the signals we want to handle and then
 * use signalfd to listen for them.  We rely on whatever the current signal
 * handler is to dispatch the signals when we receive them.
 */
static void sigfd_handler(void *opaque)
{
    int fd = (intptr_t)opaque;
    struct qemu_signalfd_siginfo info;
    struct sigaction action;
    ssize_t len;

    while (1) {
        do {
            len = read(fd, &info, sizeof(info));
        } while (len == -1 && errno == EINTR);

        if (len == -1 && errno == EAGAIN) {
            break;
        }

        if (len != sizeof(info)) {
            printf("read from sigfd returned %zd: %m\n", len);
            return;
        }

        sigaction(info.ssi_signo, NULL, &action);
        if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) {
            action.sa_sigaction(info.ssi_signo,
                                (siginfo_t *)&info, NULL);
        } else if (action.sa_handler) {
            action.sa_handler(info.ssi_signo);
        }
    }
}

static int qemu_signal_init(void)
{
    int sigfd;
    sigset_t set;

    /*
     * SIG_IPI must be blocked in the main thread and must not be caught
     * by sigwait() in the signal thread. Otherwise, the cpu thread will
     * not catch it reliably.
     */
    sigemptyset(&set);
    sigaddset(&set, SIG_IPI);
    sigaddset(&set, SIGIO);
    sigaddset(&set, SIGALRM);
    sigaddset(&set, SIGBUS);
89 90 91 92 93
    /* SIGINT cannot be handled via signalfd, so that ^C can be used
     * to interrupt QEMU when it is being run under gdb.  SIGHUP and
     * SIGTERM are also handled asynchronously, even though it is not
     * strictly necessary, because they use the same handler as SIGINT.
     */
P
Paolo Bonzini 已提交
94 95
    pthread_sigmask(SIG_BLOCK, &set, NULL);

L
Lai Jiangshan 已提交
96
    sigdelset(&set, SIG_IPI);
P
Paolo Bonzini 已提交
97 98 99 100 101 102 103 104
    sigfd = qemu_signalfd(&set);
    if (sigfd == -1) {
        fprintf(stderr, "failed to create signalfd\n");
        return -errno;
    }

    fcntl_setfl(sigfd, O_NONBLOCK);

105
    qemu_set_fd_handler(sigfd, sigfd_handler, NULL, (void *)(intptr_t)sigfd);
P
Paolo Bonzini 已提交
106 107 108 109 110 111

    return 0;
}

#else /* _WIN32 */

112
static int qemu_signal_init(void)
P
Paolo Bonzini 已提交
113 114 115
{
    return 0;
}
116 117 118
#endif

static AioContext *qemu_aio_context;
119 120 121 122 123 124 125 126
static QEMUBH *qemu_notify_bh;

static void notify_event_cb(void *opaque)
{
    /* No need to do anything; this bottom half is only used to
     * kick the kernel out of ppoll/poll/WaitForMultipleObjects.
     */
}
P
Paolo Bonzini 已提交
127

128 129 130 131 132
AioContext *qemu_get_aio_context(void)
{
    return qemu_aio_context;
}

P
Paolo Bonzini 已提交
133 134
void qemu_notify_event(void)
{
135
    if (!qemu_aio_context) {
136 137
        return;
    }
138
    qemu_bh_schedule(qemu_notify_bh);
P
Paolo Bonzini 已提交
139 140
}

141 142
static GArray *gpollfds;

143
int qemu_init_main_loop(Error **errp)
P
Paolo Bonzini 已提交
144 145
{
    int ret;
146
    GSource *src;
147
    Error *local_error = NULL;
P
Paolo Bonzini 已提交
148

149 150
    init_clocks();

P
Paolo Bonzini 已提交
151 152 153 154 155
    ret = qemu_signal_init();
    if (ret) {
        return ret;
    }

156 157 158 159 160
    qemu_aio_context = aio_context_new(&local_error);
    if (!qemu_aio_context) {
        error_propagate(errp, local_error);
        return -EMFILE;
    }
161
    qemu_notify_bh = qemu_bh_new(notify_event_cb, NULL);
162
    gpollfds = g_array_new(FALSE, FALSE, sizeof(GPollFD));
163
    src = aio_get_g_source(qemu_aio_context);
164
    g_source_set_name(src, "aio-context");
165 166
    g_source_attach(src, NULL);
    g_source_unref(src);
F
Fam Zheng 已提交
167
    src = iohandler_get_g_source();
168
    g_source_set_name(src, "io-handler");
F
Fam Zheng 已提交
169 170
    g_source_attach(src, NULL);
    g_source_unref(src);
P
Paolo Bonzini 已提交
171 172 173 174 175
    return 0;
}

static int max_priority;

176
#ifndef _WIN32
177 178 179
static int glib_pollfds_idx;
static int glib_n_poll_fds;

180
static void glib_pollfds_fill(int64_t *cur_timeout)
P
Paolo Bonzini 已提交
181 182
{
    GMainContext *context = g_main_context_default();
183
    int timeout = 0;
184
    int64_t timeout_ns;
185
    int n;
P
Paolo Bonzini 已提交
186 187 188

    g_main_context_prepare(context, &max_priority);

189 190 191 192 193 194 195 196 197 198
    glib_pollfds_idx = gpollfds->len;
    n = glib_n_poll_fds;
    do {
        GPollFD *pfds;
        glib_n_poll_fds = n;
        g_array_set_size(gpollfds, glib_pollfds_idx + glib_n_poll_fds);
        pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
        n = g_main_context_query(context, max_priority, &timeout, pfds,
                                 glib_n_poll_fds);
    } while (n != glib_n_poll_fds);
P
Paolo Bonzini 已提交
199

200 201 202 203
    if (timeout < 0) {
        timeout_ns = -1;
    } else {
        timeout_ns = (int64_t)timeout * (int64_t)SCALE_MS;
P
Paolo Bonzini 已提交
204
    }
205 206

    *cur_timeout = qemu_soonest_timeout(timeout_ns, *cur_timeout);
P
Paolo Bonzini 已提交
207 208
}

209
static void glib_pollfds_poll(void)
P
Paolo Bonzini 已提交
210 211
{
    GMainContext *context = g_main_context_default();
212
    GPollFD *pfds = &g_array_index(gpollfds, GPollFD, glib_pollfds_idx);
P
Paolo Bonzini 已提交
213

214
    if (g_main_context_check(context, max_priority, pfds, glib_n_poll_fds)) {
P
Paolo Bonzini 已提交
215 216 217 218
        g_main_context_dispatch(context);
    }
}

219 220
#define MAX_MAIN_LOOP_SPIN (1000)

221
static int os_host_main_loop_wait(int64_t timeout)
222 223
{
    int ret;
224
    static int spin_counter;
225

226
    glib_pollfds_fill(&timeout);
227

228 229 230 231 232 233
    /* If the I/O thread is very busy or we are incorrectly busy waiting in
     * the I/O thread, this can lead to starvation of the BQL such that the
     * VCPU threads never run.  To make sure we can detect the later case,
     * print a message to the screen.  If we run into this condition, create
     * a fake timeout in order to give the VCPU threads a chance to run.
     */
234
    if (!timeout && (spin_counter > MAX_MAIN_LOOP_SPIN)) {
235 236
        static bool notified;

237
        if (!notified && !qtest_driver()) {
238 239 240 241 242 243
            fprintf(stderr,
                    "main-loop: WARNING: I/O thread spun for %d iterations\n",
                    MAX_MAIN_LOOP_SPIN);
            notified = true;
        }

244
        timeout = SCALE_MS;
245 246
    }

247
    if (timeout) {
248
        spin_counter = 0;
249
        qemu_mutex_unlock_iothread();
250 251
    } else {
        spin_counter++;
252 253
    }

254
    ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout);
255

256
    if (timeout) {
257 258 259
        qemu_mutex_lock_iothread();
    }

260
    glib_pollfds_poll();
261 262 263
    return ret;
}
#else
P
Paolo Bonzini 已提交
264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302
/***********************************************************/
/* Polling handling */

typedef struct PollingEntry {
    PollingFunc *func;
    void *opaque;
    struct PollingEntry *next;
} PollingEntry;

static PollingEntry *first_polling_entry;

int qemu_add_polling_cb(PollingFunc *func, void *opaque)
{
    PollingEntry **ppe, *pe;
    pe = g_malloc0(sizeof(PollingEntry));
    pe->func = func;
    pe->opaque = opaque;
    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next);
    *ppe = pe;
    return 0;
}

void qemu_del_polling_cb(PollingFunc *func, void *opaque)
{
    PollingEntry **ppe, *pe;
    for(ppe = &first_polling_entry; *ppe != NULL; ppe = &(*ppe)->next) {
        pe = *ppe;
        if (pe->func == func && pe->opaque == opaque) {
            *ppe = pe->next;
            g_free(pe);
            break;
        }
    }
}

/***********************************************************/
/* Wait objects support */
typedef struct WaitObjects {
    int num;
303
    int revents[MAXIMUM_WAIT_OBJECTS + 1];
P
Paolo Bonzini 已提交
304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319
    HANDLE events[MAXIMUM_WAIT_OBJECTS + 1];
    WaitObjectFunc *func[MAXIMUM_WAIT_OBJECTS + 1];
    void *opaque[MAXIMUM_WAIT_OBJECTS + 1];
} WaitObjects;

static WaitObjects wait_objects = {0};

int qemu_add_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
{
    WaitObjects *w = &wait_objects;
    if (w->num >= MAXIMUM_WAIT_OBJECTS) {
        return -1;
    }
    w->events[w->num] = handle;
    w->func[w->num] = func;
    w->opaque[w->num] = opaque;
320
    w->revents[w->num] = 0;
P
Paolo Bonzini 已提交
321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338
    w->num++;
    return 0;
}

void qemu_del_wait_object(HANDLE handle, WaitObjectFunc *func, void *opaque)
{
    int i, found;
    WaitObjects *w = &wait_objects;

    found = 0;
    for (i = 0; i < w->num; i++) {
        if (w->events[i] == handle) {
            found = 1;
        }
        if (found) {
            w->events[i] = w->events[i + 1];
            w->func[i] = w->func[i + 1];
            w->opaque[i] = w->opaque[i + 1];
339
            w->revents[i] = w->revents[i + 1];
P
Paolo Bonzini 已提交
340 341 342 343 344 345 346
        }
    }
    if (found) {
        w->num--;
    }
}

347 348
void qemu_fd_register(int fd)
{
349 350
    WSAEventSelect(fd, event_notifier_get_handle(&qemu_aio_context->notifier),
                   FD_READ | FD_ACCEPT | FD_CLOSE |
351 352 353
                   FD_CONNECT | FD_WRITE | FD_OOB);
}

354 355 356 357 358 359 360 361 362 363
static int pollfds_fill(GArray *pollfds, fd_set *rfds, fd_set *wfds,
                        fd_set *xfds)
{
    int nfds = -1;
    int i;

    for (i = 0; i < pollfds->len; i++) {
        GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
        int fd = pfd->fd;
        int events = pfd->events;
364
        if (events & G_IO_IN) {
365 366 367
            FD_SET(fd, rfds);
            nfds = MAX(nfds, fd);
        }
368
        if (events & G_IO_OUT) {
369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390
            FD_SET(fd, wfds);
            nfds = MAX(nfds, fd);
        }
        if (events & G_IO_PRI) {
            FD_SET(fd, xfds);
            nfds = MAX(nfds, fd);
        }
    }
    return nfds;
}

static void pollfds_poll(GArray *pollfds, int nfds, fd_set *rfds,
                         fd_set *wfds, fd_set *xfds)
{
    int i;

    for (i = 0; i < pollfds->len; i++) {
        GPollFD *pfd = &g_array_index(pollfds, GPollFD, i);
        int fd = pfd->fd;
        int revents = 0;

        if (FD_ISSET(fd, rfds)) {
391
            revents |= G_IO_IN;
392 393
        }
        if (FD_ISSET(fd, wfds)) {
394
            revents |= G_IO_OUT;
395 396 397 398 399 400 401 402
        }
        if (FD_ISSET(fd, xfds)) {
            revents |= G_IO_PRI;
        }
        pfd->revents = revents & pfd->events;
    }
}

403
static int os_host_main_loop_wait(int64_t timeout)
P
Paolo Bonzini 已提交
404
{
405
    GMainContext *context = g_main_context_default();
406
    GPollFD poll_fds[1024 * 2]; /* this is probably overkill */
407
    int select_ret = 0;
408
    int g_poll_ret, ret, i, n_poll_fds;
P
Paolo Bonzini 已提交
409
    PollingEntry *pe;
410
    WaitObjects *w = &wait_objects;
411
    gint poll_timeout;
412
    int64_t poll_timeout_ns;
413
    static struct timeval tv0;
414 415
    fd_set rfds, wfds, xfds;
    int nfds;
P
Paolo Bonzini 已提交
416 417 418 419 420 421

    /* XXX: need to suppress polling by better using win32 events */
    ret = 0;
    for (pe = first_polling_entry; pe != NULL; pe = pe->next) {
        ret |= pe->func(pe->opaque);
    }
422 423 424
    if (ret != 0) {
        return ret;
    }
P
Paolo Bonzini 已提交
425

426 427 428 429 430 431 432 433 434 435 436 437 438 439
    FD_ZERO(&rfds);
    FD_ZERO(&wfds);
    FD_ZERO(&xfds);
    nfds = pollfds_fill(gpollfds, &rfds, &wfds, &xfds);
    if (nfds >= 0) {
        select_ret = select(nfds + 1, &rfds, &wfds, &xfds, &tv0);
        if (select_ret != 0) {
            timeout = 0;
        }
        if (select_ret > 0) {
            pollfds_poll(gpollfds, nfds, &rfds, &wfds, &xfds);
        }
    }

440
    g_main_context_prepare(context, &max_priority);
441
    n_poll_fds = g_main_context_query(context, max_priority, &poll_timeout,
442 443 444
                                      poll_fds, ARRAY_SIZE(poll_fds));
    g_assert(n_poll_fds <= ARRAY_SIZE(poll_fds));

445
    for (i = 0; i < w->num; i++) {
446
        poll_fds[n_poll_fds + i].fd = (DWORD_PTR)w->events[i];
447
        poll_fds[n_poll_fds + i].events = G_IO_IN;
448 449
    }

450 451 452 453
    if (poll_timeout < 0) {
        poll_timeout_ns = -1;
    } else {
        poll_timeout_ns = (int64_t)poll_timeout * (int64_t)SCALE_MS;
454 455
    }

456 457
    poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout);

458
    qemu_mutex_unlock_iothread();
459 460
    g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns);

461
    qemu_mutex_lock_iothread();
462
    if (g_poll_ret > 0) {
463
        for (i = 0; i < w->num; i++) {
464
            w->revents[i] = poll_fds[n_poll_fds + i].revents;
465
        }
466 467 468
        for (i = 0; i < w->num; i++) {
            if (w->revents[i] && w->func[i]) {
                w->func[i](w->opaque[i]);
P
Paolo Bonzini 已提交
469 470 471 472
            }
        }
    }

473 474 475 476
    if (g_main_context_check(context, max_priority, poll_fds, n_poll_fds)) {
        g_main_context_dispatch(context);
    }

477
    return select_ret || g_poll_ret;
P
Paolo Bonzini 已提交
478 479 480 481 482
}
#endif

int main_loop_wait(int nonblocking)
{
483 484
    int ret;
    uint32_t timeout = UINT32_MAX;
485
    int64_t timeout_ns;
P
Paolo Bonzini 已提交
486 487 488 489 490 491

    if (nonblocking) {
        timeout = 0;
    }

    /* poll any events */
492
    g_array_set_size(gpollfds, 0); /* reset for new iteration */
P
Paolo Bonzini 已提交
493 494
    /* XXX: separate device handlers from system ones */
#ifdef CONFIG_SLIRP
495
    slirp_pollfds_fill(gpollfds, &timeout);
P
Paolo Bonzini 已提交
496
#endif
497 498 499 500 501 502 503 504 505 506 507 508

    if (timeout == UINT32_MAX) {
        timeout_ns = -1;
    } else {
        timeout_ns = (uint64_t)timeout * (int64_t)(SCALE_MS);
    }

    timeout_ns = qemu_soonest_timeout(timeout_ns,
                                      timerlistgroup_deadline_ns(
                                          &main_loop_tlg));

    ret = os_host_main_loop_wait(timeout_ns);
P
Paolo Bonzini 已提交
509
#ifdef CONFIG_SLIRP
S
Stefan Hajnoczi 已提交
510
    slirp_pollfds_poll(gpollfds, (ret < 0));
P
Paolo Bonzini 已提交
511 512
#endif

513 514
    /* CPU thread can infinitely wait for event after
       missing the warp */
P
Pavel Dovgalyuk 已提交
515
    qemu_start_warp_timer();
516
    qemu_clock_run_all_timers();
P
Paolo Bonzini 已提交
517 518 519

    return ret;
}
520 521 522 523 524 525 526

/* Functions to operate on the main QEMU AioContext.  */

QEMUBH *qemu_bh_new(QEMUBHFunc *cb, void *opaque)
{
    return aio_bh_new(qemu_aio_context, cb, opaque);
}