From 82a41186941c419afde977f477f19c545b40c1c5 Mon Sep 17 00:00:00 2001
From: Stefan Hajnoczi <stefanha@redhat.com>
Date: Thu, 1 Dec 2016 19:26:51 +0000
Subject: [PATCH] aio: self-tune polling time

This patch is based on the algorithm for the kvm.ko halt_poll_ns
parameter in Linux.  The initial polling time is zero.

If the event loop is woken up within the maximum polling time it means
polling could be effective, so grow polling time.

If the event loop is woken up beyond the maximum polling time it means
polling is not effective, so shrink polling time.

If the event loop makes progress within the current polling time then
the sweet spot has been reached.

This algorithm adjusts the polling time so it can adapt to variations in
workloads.  The goal is to reach the sweet spot while also recognizing
when polling would hurt more than help.

Two new trace events, poll_grow and poll_shrink, are added for observing
polling time adjustment.

Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Message-id: 20161201192652.9509-13-stefanha@redhat.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
---
 aio-posix.c         | 58 +++++++++++++++++++++++++++++++++++++++++----
 aio-win32.c         |  3 ++-
 async.c             |  3 +++
 include/block/aio.h | 10 ++++++--
 iothread.c          |  4 ++--
 trace-events        |  2 ++
 6 files changed, 71 insertions(+), 9 deletions(-)

diff --git a/aio-posix.c b/aio-posix.c
index 5216d82290..15855715d4 100644
--- a/aio-posix.c
+++ b/aio-posix.c
@@ -550,7 +550,7 @@ static bool try_poll_mode(AioContext *ctx, bool blocking)
     if (blocking && ctx->poll_max_ns && ctx->poll_disable_cnt == 0) {
         /* See qemu_soonest_timeout() uint64_t hack */
         int64_t max_ns = MIN((uint64_t)aio_compute_timeout(ctx),
-                             (uint64_t)ctx->poll_max_ns);
+                             (uint64_t)ctx->poll_ns);
 
         if (max_ns) {
             poll_set_started(ctx, true);
@@ -576,6 +576,7 @@ bool aio_poll(AioContext *ctx, bool blocking)
     int ret = 0;
     bool progress;
     int64_t timeout;
+    int64_t start = 0;
 
     aio_context_acquire(ctx);
     progress = false;
@@ -593,6 +594,10 @@ bool aio_poll(AioContext *ctx, bool blocking)
 
     ctx->walking_handlers++;
 
+    if (ctx->poll_max_ns) {
+        start = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
+    }
+
     if (try_poll_mode(ctx, blocking)) {
         progress = true;
     } else {
@@ -635,6 +640,47 @@ bool aio_poll(AioContext *ctx, bool blocking)
         atomic_sub(&ctx->notify_me, 2);
     }
 
+    /* Adjust polling time */
+    if (ctx->poll_max_ns) {
+        int64_t block_ns = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) - start;
+
+        if (block_ns <= ctx->poll_ns) {
+            /* This is the sweet spot, no adjustment needed */
+        } else if (block_ns > ctx->poll_max_ns) {
+            /* We'd have to poll for too long, poll less */
+            int64_t old = ctx->poll_ns;
+
+            if (ctx->poll_shrink) {
+                ctx->poll_ns /= ctx->poll_shrink;
+            } else {
+                ctx->poll_ns = 0;
+            }
+
+            trace_poll_shrink(ctx, old, ctx->poll_ns);
+        } else if (ctx->poll_ns < ctx->poll_max_ns &&
+                   block_ns < ctx->poll_max_ns) {
+            /* There is room to grow, poll longer */
+            int64_t old = ctx->poll_ns;
+            int64_t grow = ctx->poll_grow;
+
+            if (grow == 0) {
+                grow = 2;
+            }
+
+            if (ctx->poll_ns) {
+                ctx->poll_ns *= grow;
+            } else {
+                ctx->poll_ns = 4000; /* start polling at 4 microseconds */
+            }
+
+            if (ctx->poll_ns > ctx->poll_max_ns) {
+                ctx->poll_ns = ctx->poll_max_ns;
+            }
+
+            trace_poll_grow(ctx, old, ctx->poll_ns);
+        }
+    }
+
     aio_notify_accept(ctx);
 
     /* if we have any readable fds, dispatch event */
@@ -678,12 +724,16 @@ void aio_context_setup(AioContext *ctx)
 #endif
 }
 
-void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, Error **errp)
+void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
+                                 int64_t grow, int64_t shrink, Error **errp)
 {
-    /* No thread synchronization here, it doesn't matter if an incorrect poll
-     * timeout is used once.
+    /* No thread synchronization here, it doesn't matter if an incorrect value
+     * is used once.
      */
     ctx->poll_max_ns = max_ns;
+    ctx->poll_ns = 0;
+    ctx->poll_grow = grow;
+    ctx->poll_shrink = shrink;
 
     aio_notify(ctx);
 }
diff --git a/aio-win32.c b/aio-win32.c
index d0e40a854c..d19dc429d8 100644
--- a/aio-win32.c
+++ b/aio-win32.c
@@ -395,7 +395,8 @@ void aio_context_setup(AioContext *ctx)
 {
 }
 
-void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns, Error **errp)
+void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
+                                 int64_t grow, int64_t shrink, Error **errp)
 {
     error_setg(errp, "AioContext polling is not implemented on Windows");
 }
diff --git a/async.c b/async.c
index 29abf40ca7..2960171834 100644
--- a/async.c
+++ b/async.c
@@ -385,7 +385,10 @@ AioContext *aio_context_new(Error **errp)
     qemu_rec_mutex_init(&ctx->lock);
     timerlistgroup_init(&ctx->tlg, aio_timerlist_notify, ctx);
 
+    ctx->poll_ns = 0;
     ctx->poll_max_ns = 0;
+    ctx->poll_grow = 0;
+    ctx->poll_shrink = 0;
 
     return ctx;
 fail:
diff --git a/include/block/aio.h b/include/block/aio.h
index 3817d179fd..4dca54d9c7 100644
--- a/include/block/aio.h
+++ b/include/block/aio.h
@@ -134,8 +134,11 @@ struct AioContext {
     /* Number of AioHandlers without .io_poll() */
     int poll_disable_cnt;
 
-    /* Maximum polling time in nanoseconds */
-    int64_t poll_max_ns;
+    /* Polling mode parameters */
+    int64_t poll_ns;        /* current polling time in nanoseconds */
+    int64_t poll_max_ns;    /* maximum polling time in nanoseconds */
+    int64_t poll_grow;      /* polling time growth factor */
+    int64_t poll_shrink;    /* polling time shrink factor */
 
     /* Are we in polling mode or monitoring file descriptors? */
     bool poll_started;
@@ -511,10 +514,13 @@ void aio_context_setup(AioContext *ctx);
  * aio_context_set_poll_params:
  * @ctx: the aio context
  * @max_ns: how long to busy poll for, in nanoseconds
+ * @grow: polling time growth factor
+ * @shrink: polling time shrink factor
  *
  * Poll mode can be disabled by setting poll_max_ns to 0.
  */
 void aio_context_set_poll_params(AioContext *ctx, int64_t max_ns,
+                                 int64_t grow, int64_t shrink,
                                  Error **errp);
 
 #endif
diff --git a/iothread.c b/iothread.c
index 8dfd10dee6..28598b5463 100644
--- a/iothread.c
+++ b/iothread.c
@@ -98,7 +98,7 @@ static void iothread_complete(UserCreatable *obj, Error **errp)
         return;
     }
 
-    aio_context_set_poll_params(iothread->ctx, iothread->poll_max_ns,
+    aio_context_set_poll_params(iothread->ctx, iothread->poll_max_ns, 0, 0,
                                 &local_error);
     if (local_error) {
         error_propagate(errp, local_error);
@@ -158,7 +158,7 @@ static void iothread_set_poll_max_ns(Object *obj, Visitor *v,
     iothread->poll_max_ns = value;
 
     if (iothread->ctx) {
-        aio_context_set_poll_params(iothread->ctx, value, &local_err);
+        aio_context_set_poll_params(iothread->ctx, value, 0, 0, &local_err);
     }
 
 out:
diff --git a/trace-events b/trace-events
index 7fe3a1b0e8..1181486454 100644
--- a/trace-events
+++ b/trace-events
@@ -28,6 +28,8 @@
 # aio-posix.c
 run_poll_handlers_begin(void *ctx, int64_t max_ns) "ctx %p max_ns %"PRId64
 run_poll_handlers_end(void *ctx, bool progress) "ctx %p progress %d"
+poll_shrink(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
+poll_grow(void *ctx, int64_t old, int64_t new) "ctx %p old %"PRId64" new %"PRId64
 
 # thread-pool.c
 thread_pool_submit(void *pool, void *req, void *opaque) "pool %p req %p opaque %p"
-- 
GitLab