提交 76d6efa5 编写于 作者: L Lukáš Doktor

runner: Increase test postprocess timeout

Currently there is a hardcoded test-postprocess timeout consisting of
two cycle-timeouts which is something between 1 and 2 seconds. This is
not sufficient on heavily loaded machines so this patch increases the
timeouts to:

 1s: when test was interrupted (ctrl+c/timeout)
10s: when the process died but the status was not yet delivered
60s: when test reported status but the process did not finish

taking into account the current available test/job timeout.

As those deadlines are significantly longer, users could have noticed a
frozen throbber, which is why I propagate the results_dispatcher and
handle the throbber also during the postprocess, which makes this patch
a bit more complicated than one would expect. I used "progress = False"
to distinguish between running test and postprocessing it.
Signed-off-by: NLukáš Doktor <ldoktor@redhat.com>
上级 cd646022
......@@ -39,6 +39,13 @@ from ..utils import stacktrace
TEST_LOG = logging.getLogger("avocado.test")
APP_LOG = logging.getLogger("avocado.app")
#: when test was interrupted (ctrl+c/timeout)
TIMEOUT_TEST_INTERRUPTED = 1
#: when the process died but the status was not yet delivered
TIMEOUT_PROCESS_DIED = 10
#: when test reported status but the process did not finish
TIMEOUT_PROCESS_ALIVE = 60
def add_runner_failure(test_state, new_status, message):
"""
......@@ -194,32 +201,38 @@ class TestStatus(object):
" see overall job.log for details.")
return test_state
def finish(self, proc, started, timeout, step):
def finish(self, proc, started, step, deadline, result_dispatcher):
"""
Wait for the test process to finish and report status or error status
if unable to obtain the status till deadline.
:param proc: The test's process
:param started: Time when the test started
:param timeout: Timeout for waiting on status
:param first: Delay before first check
:param step: Step between checks for the status
:param deadline: Test execution deadline
:param result_dispatcher: Result dispatcher (for test_progress
notifications)
"""
# Wait for either process termination or test status
wait.wait_for(lambda: not proc.is_alive() or self.status, timeout, 0,
wait.wait_for(lambda: not proc.is_alive() or self.status, 1, 0,
step)
if self.status: # status exists, wait for process to finish
if not wait.wait_for(lambda: not proc.is_alive(), timeout, 0,
deadline = min(deadline, time.time() + TIMEOUT_PROCESS_ALIVE)
while time.time() < deadline:
result_dispatcher.map_method('test_progress', False)
if wait.wait_for(lambda: not proc.is_alive(), 1, 0,
step):
err = "Test reported status but did not finish"
else: # Test finished and reported status, pass
return self._add_status_failures(self.status)
return self._add_status_failures(self.status)
err = "Test reported status but did not finish"
else: # proc finished, wait for late status delivery
if not wait.wait_for(lambda: self.status, timeout, 0, step):
err = "Test died without reporting the status."
else:
# Status delivered after the test process finished, pass
return self._add_status_failures(self.status)
deadline = min(deadline, time.time() + TIMEOUT_PROCESS_DIED)
while time.time() < deadline:
result_dispatcher.map_method('test_progress', False)
if wait.wait_for(lambda: self.status, 1, 0, step):
# Status delivered after the test process finished, pass
return self._add_status_failures(self.status)
err = "Test died without reporting the status."
# At this point there were failures, fill the new test status
TEST_LOG.debug("Original status: %s", str(self.status))
test_state = self.early_status
......@@ -432,9 +445,14 @@ class TestRunner(object):
stage_2_msg_displayed = True
os.kill(proc.pid, signal.SIGKILL)
# Get/update the test status
test_state = test_status.finish(proc, time_started, cycle_timeout,
step)
# Get/update the test status (decrease timeout on abort)
if abort_reason:
finish_deadline = TIMEOUT_TEST_INTERRUPTED
else:
finish_deadline = deadline
test_state = test_status.finish(proc, time_started, step,
finish_deadline,
result_dispatcher)
# Try to log the timeout reason to test's results and update test_state
if abort_reason:
......
......@@ -87,7 +87,7 @@ import time
class MyTest(Test):
def test(self):
self.runner_queue.put({"running": False})
time.sleep(60)
time.sleep(70)
'''
......@@ -257,7 +257,7 @@ class RunnerOperationTest(unittest.TestCase):
REPORTS_STATUS_AND_HANG,
"hanged_test_with_status") as tst:
res = process.run("%s run --sysinfo=off --job-results-dir %s %s "
"--json -" % (AVOCADO, self.tmpdir, tst),
"--json - --job-timeout 1" % (AVOCADO, self.tmpdir, tst),
ignore_status=True)
self.assertEqual(res.exit_status, exit_codes.AVOCADO_TESTS_FAIL)
results = json.loads(res.stdout)
......@@ -266,7 +266,11 @@ class RunnerOperationTest(unittest.TestCase):
"ERROR", res))
self.assertIn("Test reported status but did not finish",
results["tests"][0]["fail_reason"])
self.assertLess(res.duration, 40, "Test execution took too long, "
# Currently it should finish up to 1s after the job-timeout
# but the prep and postprocess could take a bit longer on
# some environments, so let's just check it does not take
# > 60s, which is the deadline for force-finishing the test.
self.assertLess(res.duration, 55, "Test execution took too long, "
"which is likely because the hanged test was not "
"interrupted. Results:\n%s" % res)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册