From 76d6efa58e3b24ccea70c080af6818ffdcf8ba5f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Doktor?= Date: Tue, 25 Apr 2017 16:04:03 +0200 Subject: [PATCH] runner: Increase test postprocess timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently there is a hardcoded test-postprocess timeout consisting of two cycle-timeouts which is something between 1 and 2 seconds. This is not sufficient on heavily loaded machines so this patch increases the timeouts to: 1s: when test was interrupted (ctrl+c/timeout) 10s: when the process died but the status was not yet delivered 60s: when test reported status but the process did not finish taking into account the current available test/job timeout. As those deadlines are significantly longer, users could have noticed a frozen throbber, which is why I propagate the results_dispatcher and handle the throbber also during the postprocess, which makes this patch a bit more complicated than one would expect. I used "progress = False" to distinguish between running test and postprocessing it. Signed-off-by: Lukáš Doktor --- avocado/core/runner.py | 48 ++++++++++++++++++++---------- selftests/functional/test_basic.py | 10 +++++-- 2 files changed, 40 insertions(+), 18 deletions(-) diff --git a/avocado/core/runner.py b/avocado/core/runner.py index 97028dcc..c9910366 100644 --- a/avocado/core/runner.py +++ b/avocado/core/runner.py @@ -39,6 +39,13 @@ from ..utils import stacktrace TEST_LOG = logging.getLogger("avocado.test") APP_LOG = logging.getLogger("avocado.app") +#: when test was interrupted (ctrl+c/timeout) +TIMEOUT_TEST_INTERRUPTED = 1 +#: when the process died but the status was not yet delivered +TIMEOUT_PROCESS_DIED = 10 +#: when test reported status but the process did not finish +TIMEOUT_PROCESS_ALIVE = 60 + def add_runner_failure(test_state, new_status, message): """ @@ -194,32 +201,38 @@ class TestStatus(object): " see overall job.log for details.") return test_state - def finish(self, proc, started, timeout, step): + def finish(self, proc, started, step, deadline, result_dispatcher): """ Wait for the test process to finish and report status or error status if unable to obtain the status till deadline. :param proc: The test's process :param started: Time when the test started - :param timeout: Timeout for waiting on status :param first: Delay before first check :param step: Step between checks for the status + :param deadline: Test execution deadline + :param result_dispatcher: Result dispatcher (for test_progress + notifications) """ # Wait for either process termination or test status - wait.wait_for(lambda: not proc.is_alive() or self.status, timeout, 0, + wait.wait_for(lambda: not proc.is_alive() or self.status, 1, 0, step) if self.status: # status exists, wait for process to finish - if not wait.wait_for(lambda: not proc.is_alive(), timeout, 0, + deadline = min(deadline, time.time() + TIMEOUT_PROCESS_ALIVE) + while time.time() < deadline: + result_dispatcher.map_method('test_progress', False) + if wait.wait_for(lambda: not proc.is_alive(), 1, 0, step): - err = "Test reported status but did not finish" - else: # Test finished and reported status, pass - return self._add_status_failures(self.status) + return self._add_status_failures(self.status) + err = "Test reported status but did not finish" else: # proc finished, wait for late status delivery - if not wait.wait_for(lambda: self.status, timeout, 0, step): - err = "Test died without reporting the status." - else: - # Status delivered after the test process finished, pass - return self._add_status_failures(self.status) + deadline = min(deadline, time.time() + TIMEOUT_PROCESS_DIED) + while time.time() < deadline: + result_dispatcher.map_method('test_progress', False) + if wait.wait_for(lambda: self.status, 1, 0, step): + # Status delivered after the test process finished, pass + return self._add_status_failures(self.status) + err = "Test died without reporting the status." # At this point there were failures, fill the new test status TEST_LOG.debug("Original status: %s", str(self.status)) test_state = self.early_status @@ -432,9 +445,14 @@ class TestRunner(object): stage_2_msg_displayed = True os.kill(proc.pid, signal.SIGKILL) - # Get/update the test status - test_state = test_status.finish(proc, time_started, cycle_timeout, - step) + # Get/update the test status (decrease timeout on abort) + if abort_reason: + finish_deadline = TIMEOUT_TEST_INTERRUPTED + else: + finish_deadline = deadline + test_state = test_status.finish(proc, time_started, step, + finish_deadline, + result_dispatcher) # Try to log the timeout reason to test's results and update test_state if abort_reason: diff --git a/selftests/functional/test_basic.py b/selftests/functional/test_basic.py index 10c1cb1f..3e693db9 100644 --- a/selftests/functional/test_basic.py +++ b/selftests/functional/test_basic.py @@ -87,7 +87,7 @@ import time class MyTest(Test): def test(self): self.runner_queue.put({"running": False}) - time.sleep(60) + time.sleep(70) ''' @@ -257,7 +257,7 @@ class RunnerOperationTest(unittest.TestCase): REPORTS_STATUS_AND_HANG, "hanged_test_with_status") as tst: res = process.run("%s run --sysinfo=off --job-results-dir %s %s " - "--json -" % (AVOCADO, self.tmpdir, tst), + "--json - --job-timeout 1" % (AVOCADO, self.tmpdir, tst), ignore_status=True) self.assertEqual(res.exit_status, exit_codes.AVOCADO_TESTS_FAIL) results = json.loads(res.stdout) @@ -266,7 +266,11 @@ class RunnerOperationTest(unittest.TestCase): "ERROR", res)) self.assertIn("Test reported status but did not finish", results["tests"][0]["fail_reason"]) - self.assertLess(res.duration, 40, "Test execution took too long, " + # Currently it should finish up to 1s after the job-timeout + # but the prep and postprocess could take a bit longer on + # some environments, so let's just check it does not take + # > 60s, which is the deadline for force-finishing the test. + self.assertLess(res.duration, 55, "Test execution took too long, " "which is likely because the hanged test was not " "interrupted. Results:\n%s" % res) -- GitLab