runner: Increase test postprocess timeout

Currently there is a hardcoded test-postprocess timeout consisting of two cycle-timeouts which is something between 1 and 2 seconds. This is not sufficient on heavily loaded machines so this patch increases the timeouts to: 1s: when test was interrupted (ctrl+c/timeout) 10s: when the process died but the status was not yet delivered 60s: when test reported status but the process did not finish taking into account the current available test/job timeout. As those deadlines are significantly longer, users could have noticed a frozen throbber, which is why I propagate the results_dispatcher and handle the throbber also during the postprocess, which makes this patch a bit more complicated than one would expect. I used "progress = False" to distinguish between running test and postprocessing it. Signed-off-by: N Lukáš Doktor <ldoktor@redhat.com>

runner: Increase test postprocess timeout
Currently there is a hardcoded test-postprocess timeout consisting of two cycle-timeouts which is something between 1 and 2 seconds. This is not sufficient on heavily loaded machines so this patch increases the timeouts to: 1s: when test was interrupted (ctrl+c/timeout) 10s: when the process died but the status was not yet delivered 60s: when test reported status but the process did not finish taking into account the current available test/job timeout. As those deadlines are significantly longer, users could have noticed a frozen throbber, which is why I propagate the results_dispatcher and handle the throbber also during the postprocess, which makes this patch a bit more complicated than one would expect. I used "progress = False" to distinguish between running test and postprocessing it. Signed-off-by: N Lukáš Doktor <ldoktor@redhat.com>
76d6efa5 · Lukáš Doktor · cd646022 · 76d6efa5 · 76d6efa5
显示空白变更内容
内联并排

Showing with 40 addition and 18 deletion

avocado/core/runner.py avocado/core/runner.py +33 -15

selftests/functional/test_basic.py selftests/functional/test_basic.py +7 -3

未找到文件。
--- a/avocado/core/runner.py
+++ b/avocado/core/runner.py
@@ -39,6 +39,13 @@ from ..utils import stacktrace
 TEST_LOG = logging.getLogger("avocado.test")
 APP_LOG = logging.getLogger("avocado.app")

+#: when test was interrupted (ctrl+c/timeout)
+TIMEOUT_TEST_INTERRUPTED = 1
+#: when the process died but the status was not yet delivered
+TIMEOUT_PROCESS_DIED = 10
+#: when test reported status but the process did not finish
+TIMEOUT_PROCESS_ALIVE = 60
+

 def add_runner_failure(test_state, new_status, message):
    """
@@ -194,32 +201,38 @@ class TestStatus(object):
                                      " see overall job.log for details.")
        return test_state

-    def finish(self, proc, started, timeout, step):
+    def finish(self, proc, started, step, deadline, result_dispatcher):
        """
        Wait for the test process to finish and report status or error status
        if unable to obtain the status till deadline.

        :param proc: The test's process
        :param started: Time when the test started
-        :param timeout: Timeout for waiting on status
        :param first: Delay before first check
        :param step: Step between checks for the status
+        :param deadline: Test execution deadline
+        :param result_dispatcher: Result dispatcher (for test_progress
+               notifications)
        """
        # Wait for either process termination or test status
-        wait.wait_for(lambda: not proc.is_alive() or self.status, timeout, 0,
+        wait.wait_for(lambda: not proc.is_alive() or self.status, 1, 0,
                      step)
        if self.status:     # status exists, wait for process to finish
-            if not wait.wait_for(lambda: not proc.is_alive(), timeout, 0,
+            deadline = min(deadline, time.time() + TIMEOUT_PROCESS_ALIVE)
+            while time.time() < deadline:
+                result_dispatcher.map_method('test_progress', False)
+                if wait.wait_for(lambda: not proc.is_alive(), 1, 0,
                                 step):
-                err = "Test reported status but did not finish"
-            else:   # Test finished and reported status, pass
                    return self._add_status_failures(self.status)
+            err = "Test reported status but did not finish"
        else:   # proc finished, wait for late status delivery
-            if not wait.wait_for(lambda: self.status, timeout, 0, step):
-                err = "Test died without reporting the status."
-            else:
+            deadline = min(deadline, time.time() + TIMEOUT_PROCESS_DIED)
+            while time.time() < deadline:
+                result_dispatcher.map_method('test_progress', False)
+                if wait.wait_for(lambda: self.status, 1, 0, step):
                    # Status delivered after the test process finished, pass
                    return self._add_status_failures(self.status)
+            err = "Test died without reporting the status."
        # At this point there were failures, fill the new test status
        TEST_LOG.debug("Original status: %s", str(self.status))
        test_state = self.early_status
@@ -432,9 +445,14 @@ class TestRunner(object):
                        stage_2_msg_displayed = True
                    os.kill(proc.pid, signal.SIGKILL)

-        # Get/update the test status
-        test_state = test_status.finish(proc, time_started, cycle_timeout,
-                                        step)
+        # Get/update the test status (decrease timeout on abort)
+        if abort_reason:
+            finish_deadline = TIMEOUT_TEST_INTERRUPTED
+        else:
+            finish_deadline = deadline
+        test_state = test_status.finish(proc, time_started, step,
+                                        finish_deadline,
+                                        result_dispatcher)

        # Try to log the timeout reason to test's results and update test_state
        if abort_reason:

--- a/selftests/functional/test_basic.py
+++ b/selftests/functional/test_basic.py
@@ -87,7 +87,7 @@ import time
 class MyTest(Test):
    def test(self):
         self.runner_queue.put({"running": False})
-         time.sleep(60)
+         time.sleep(70)
 '''


@@ -257,7 +257,7 @@ class RunnerOperationTest(unittest.TestCase):
                                    REPORTS_STATUS_AND_HANG,
                                    "hanged_test_with_status") as tst:
            res = process.run("%s run --sysinfo=off --job-results-dir %s %s "
-                              "--json -" % (AVOCADO, self.tmpdir, tst),
+                              "--json - --job-timeout 1" % (AVOCADO, self.tmpdir, tst),
                              ignore_status=True)
            self.assertEqual(res.exit_status, exit_codes.AVOCADO_TESTS_FAIL)
            results = json.loads(res.stdout)
@@ -266,7 +266,11 @@ class RunnerOperationTest(unittest.TestCase):
                                               "ERROR", res))
            self.assertIn("Test reported status but did not finish",
                          results["tests"][0]["fail_reason"])
-            self.assertLess(res.duration, 40, "Test execution took too long, "
+            # Currently it should finish up to 1s after the job-timeout
+            # but the prep and postprocess could take a bit longer on
+            # some environments, so let's just check it does not take
+            # > 60s, which is the deadline for force-finishing the test.
+            self.assertLess(res.duration, 55, "Test execution took too long, "
                            "which is likely because the hanged test was not "
                            "interrupted. Results:\n%s" % res)