From bede34f81b13a6d210e6d433ff4897d922711391 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Luk=C3=A1=C5=A1=20Doktor?= Date: Thu, 24 Jan 2019 15:59:24 +0100 Subject: [PATCH] utils.process: Improve self-destruction on timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When we execute process with a timeout, we allow to specify signal to be used after the timeout is reached defaulting to SIGTERM. This signal is not always enough to really finish the process, on the other hand using SIGKILL all the time might do even more damage. Instead this version attempts to use the user-specified signal and only if that fails it sends SIGKILL to it. The same behavior is now shared on negative-timeout behavior. Signed-off-by: Lukáš Doktor --- avocado/utils/process.py | 35 +++++++++++++------ selftests/unit/test_utils_process.py | 50 ++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 10 deletions(-) diff --git a/avocado/utils/process.py b/avocado/utils/process.py index 6868a5f9..e0ec8a26 100644 --- a/avocado/utils/process.py +++ b/avocado/utils/process.py @@ -814,15 +814,29 @@ class SubProcess(object): :param timeout: Time (seconds) we'll wait until the process is finished. If it's not, we'll try to terminate it - and get a status. When the process refuses to die - within 1s we send SIGKILL to it and report the - status (be it exit_code or zombie) + and it's children using ``sig`` and get a + status. When the process refuses to die + within 1s we use SIGKILL and report the status + (be it exit_code or zombie) :param sig: Signal to send to the process in case it did not end after the specified timeout. """ - def timeout_handler(): - self.send_signal(sig) + def nuke_myself(): self.result.interrupted = "timeout after %ss" % timeout + try: + kill_process_tree(self.get_pid(), sig, timeout=1) + except Exception: + try: + kill_process_tree(self.get_pid(), signal.SIGKILL, + timeout=1) + log.warning("Process '%s' refused to die in 1s after " + "sending %s to, destroyed it successfully " + "using SIGKILL.", self.cmd, sig) + except Exception: + log.error("Process '%s' refused to die in 1s after " + "sending %s, followed by SIGKILL, probably " + "dealing with a zombie process.", self.cmd, + sig) self._init_subprocess() rc = None @@ -830,7 +844,7 @@ class SubProcess(object): if timeout is None: rc = self._popen.wait() elif timeout > 0.0: - timer = threading.Timer(timeout, timeout_handler) + timer = threading.Timer(timeout, nuke_myself) try: timer.start() rc = self._popen.wait() @@ -844,7 +858,7 @@ class SubProcess(object): if rc is not None: break else: - self.kill() + nuke_myself() rc = self._popen.poll() if rc is None: @@ -895,9 +909,10 @@ class SubProcess(object): :param timeout: Time (seconds) we'll wait until the process is finished. If it's not, we'll try to terminate it - and get a status. When the process refuses to die - within 1s we send SIGKILL to it and report the - status (be it exit_code or zombie) + and it's children using ``sig`` and get a + status. When the process refuses to die + within 1s we use SIGKILL and report the status + (be it exit_code or zombie) :type timeout: float :param sig: Signal to send to the process in case it did not end after the specified timeout. diff --git a/selftests/unit/test_utils_process.py b/selftests/unit/test_utils_process.py index c7052071..e8f93fe8 100644 --- a/selftests/unit/test_utils_process.py +++ b/selftests/unit/test_utils_process.py @@ -4,6 +4,7 @@ import os import shlex import unittest import sys +import time try: from unittest import mock @@ -13,6 +14,7 @@ except ImportError: from .. import recent_mock from avocado.utils import astring +from avocado.utils import script from avocado.utils import gdb from avocado.utils import process from avocado.utils import path @@ -35,6 +37,20 @@ def probe_binary(binary): ECHO_CMD = probe_binary('echo') FICTIONAL_CMD = '/usr/bin/fictional_cmd' +REFUSE_TO_DIE = """import signal +import time + +for sig in range(64): + try: + signal.signal(sig, signal.SIG_IGN) + except: + pass + +end = time.time() + 120 + +while time.time() < end: + time.sleep(1)""" + class TestSubProcess(unittest.TestCase): @@ -321,6 +337,40 @@ class TestProcessRun(unittest.TestCase): self.assertEqual(result.stdout, encoded_text) self.assertEqual(result.stdout_text, text) + @unittest.skipIf(int(os.environ.get("AVOCADO_CHECK_LEVEL", 1)) < 2, + "Skipping test that take a long time to run, are " + "resource intensive or time sensitve") + def test_run_with_timeout_ugly_cmd(self): + with script.TemporaryScript("refuse_to_die", REFUSE_TO_DIE) as exe: + cmd = "%s '%s'" % (sys.executable, exe.path) + # Wait 1s to set the traps + res = process.run(cmd, timeout=1, ignore_status=True) + self.assertLess(res.duration, 100, "Took longer than expected, " + "process probably not interrupted by Avocado.\n%s" + % res) + self.assertNotEqual(res.exit_status, 0, "Command finished without " + "reporting failure but should be killed.\n%s" + % res) + + @unittest.skipIf(int(os.environ.get("AVOCADO_CHECK_LEVEL", 0)) < 2, + "Skipping test that take a long time to run, are " + "resource intensive or time sensitve") + def test_run_with_negative_timeout_ugly_cmd(self): + with script.TemporaryScript("refuse_to_die", REFUSE_TO_DIE) as exe: + cmd = "%s '%s'" % (sys.executable, exe.path) + # Wait 1s to set the traps + proc = process.SubProcess(cmd) + proc.start() + time.sleep(1) + proc.wait(-1) + res = proc.result + self.assertLess(res.duration, 100, "Took longer than expected, " + "process probably not interrupted by Avocado.\n%s" + % res) + self.assertNotEqual(res.exit_status, 0, "Command finished without " + "reporting failure but should be killed.\n%s" + % res) + class MiscProcessTests(unittest.TestCase): -- GitLab