From 204ad7073859342a8cc08f2835346a2a68b456c6 Mon Sep 17 00:00:00 2001 From: Amador Pahim Date: Fri, 11 Aug 2017 14:04:01 +0200 Subject: [PATCH] Fix SIGINT/SIGTERM The SIGINT/SIGTERM handlers are currently broken. When we execute a SIMPLE TEST, which ignores SIGINT/SIGTERM, the test process will not finish on SIGINT/SIGTERM, becoming orphan. Reproducer:: $ cat /tmp/test.py #!/usr/bin/env python import signal import time if __name__ == "__main__": signal.signal(signal.SIGINT, signal.SIG_IGN) signal.signal(signal.SIGTERM, signal.SIG_IGN) signal.signal(signal.SIGQUIT, signal.SIG_IGN) while True: time.sleep(0.1) $ avocado run /tmp/test.py Then play with `Ctrl+C` (once and twice), with `kill -2 ` and `kill `. Check if Avocado hangs or whether any process is left behind. Also, the only test we have for those cases is supposed to catch the issue with SIGINT (test_interrupt.InterruptTest.test_badly_behaved), but it's not really working. This patch fixes the SIGINT/SIGTERM actions, as follows: - When a SIGINT is sent to the main main process, we send the same SIGINT to all the children hope for the best. This reproduces the behaviour on Ctrl+C. - On a second SIGINT (after the 2s ignore period), meaning process is still running, we send a SIGKILL to all the children. - When a SIGTERM is sent to the main process, we now send a SIGKILL to all the children (instead of a SIGTERM), so any subprocess ignoring SIGTERM will be terminated anyway. Reference: https://trello.com/c/5RPZPH4F Signed-off-by: Amador Pahim --- avocado/core/app.py | 2 +- avocado/core/runner.py | 3 +- docs/source/ReferenceGuide.rst | 30 +++ selftests/functional/test_interrupt.py | 285 ++++++++++++++++--------- 4 files changed, 222 insertions(+), 98 deletions(-) diff --git a/avocado/core/app.py b/avocado/core/app.py index 75cb8123..47c38584 100644 --- a/avocado/core/app.py +++ b/avocado/core/app.py @@ -42,7 +42,7 @@ class AvocadoApp(object): def sigterm_handler(signum, frame): # pylint: disable=W0613 children = process.get_children_pids(os.getpid()) for child in children: - process.kill_process_tree(int(child), sig=signal.SIGTERM) + process.kill_process_tree(int(child), sig=signal.SIGKILL) raise SystemExit('Terminated') signal.signal(signal.SIGTERM, sigterm_handler) diff --git a/avocado/core/runner.py b/avocado/core/runner.py index 0def7388..e6e06621 100644 --- a/avocado/core/runner.py +++ b/avocado/core/runner.py @@ -443,13 +443,14 @@ class TestRunner(object): ignore_window) stage_1_msg_displayed = True ignore_time_started = time.time() + process.kill_process_tree(proc.pid, signal.SIGINT) if (ctrl_c_count > 1) and (time_elapsed > ignore_window): if not stage_2_msg_displayed: abort_reason = "Interrupted by ctrl+c (multiple-times)" self.job.log.debug("Killing test subprocess %s", proc.pid) stage_2_msg_displayed = True - os.kill(proc.pid, signal.SIGKILL) + process.kill_process_tree(proc.pid, signal.SIGKILL) # Get/update the test status (decrease timeout on abort) if abort_reason: diff --git a/docs/source/ReferenceGuide.rst b/docs/source/ReferenceGuide.rst index e25ed1e1..7bb69b8a 100644 --- a/docs/source/ReferenceGuide.rst +++ b/docs/source/ReferenceGuide.rst @@ -653,4 +653,34 @@ be a valid one: * An end of string (or end of line) must immediately follow the content. +.. _signal_handlers: + +Signal Handlers +=============== + +Avocado normal operation is related to run code written by +users/test-writers. It means the test code can carry its own handlers +for different signals or even ignore then. Still, as the code is being +executed by Avocado, we have to make sure we will finish all the +subprocesses we create before ending our execution. + +Signals sent to the Avocado main process will be handled as follows: + +- SIGSTP/Ctrl+Z: On SIGSTP, Avocado will pause the execution of the + subprocesses, while the main process will still be running, + respecting the timeout timer and waiting for the subprocesses to + finish. A new SIGSTP will make the subprocesses to resume the + execution. +- SIGINT/Ctrl+C: This signal will be forwarded to the test process and + Avocado will wait until it's finished. If the test process does not + finish after receiving a SIGINT, user can send a second SIGINT (after + the 2 seconds ignore period). The second SIGINT will make Avocado + to send a SIGKILL to the whole subprocess tree and then complete the + main process execution. +- SIGTERM: This signal will make Avocado to terminate immediately. A + SIGKILL will be sent to the whole subprocess tree and the main process + will exit without completing the execution. Notice that it's a + best-effort attempt, meaning that in case of fork-bomb, newly created + processes might still be left behind. + .. [#f1] Avocado plugins can introduce additional test types. diff --git a/selftests/functional/test_interrupt.py b/selftests/functional/test_interrupt.py index 9e108648..84b729b5 100644 --- a/selftests/functional/test_interrupt.py +++ b/selftests/functional/test_interrupt.py @@ -1,13 +1,15 @@ import os import tempfile import time +import signal import shutil import stat +import subprocess import unittest -import aexpect import psutil +from avocado.utils import process from avocado.utils import wait from avocado.utils import script from avocado.utils import data_factory @@ -25,13 +27,20 @@ DEFAULT_MODE = (stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IROTH | stat.S_IXOTH) BAD_TEST = """#!/usr/bin/env python +import multiprocessing import signal import time +def foo(): + while True: + time.sleep(0.1) + if __name__ == "__main__": signal.signal(signal.SIGINT, signal.SIG_IGN) signal.signal(signal.SIGTERM, signal.SIG_IGN) signal.signal(signal.SIGQUIT, signal.SIG_IGN) + proc = multiprocessing.Process(target=foo) + proc.start() while True: time.sleep(0.1) """ @@ -52,13 +61,54 @@ if __name__ == "__main__": class InterruptTest(unittest.TestCase): + def _has_children(self): + return len(psutil.Process(self.proc.pid).children()) > 0 + + def _is_finished(self): + return self.proc.poll() is not None + + def _no_test_in_process_table(self): + """ + Make sure the test will be really gone from the + process table. + """ + test_processes = [] + + old_psutil = False + try: + process_list = psutil.pids() + except AttributeError: + process_list = psutil.get_pid_list() + old_psutil = True + + for p in process_list: + try: + p_obj = psutil.Process(p) + if p_obj is not None: + if old_psutil: + cmdline_list = psutil.Process(p).cmdline + else: + try: + cmdline_list = psutil.Process(p).cmdline() + except psutil.AccessDenied: + cmdline_list = [] + if self.test_module in " ".join(cmdline_list): + test_processes.append(p_obj) + # psutil.NoSuchProcess happens when the original + # process already ended and left the process table + except psutil.NoSuchProcess: + pass + + return len(test_processes) == 0 + def setUp(self): self.tmpdir = tempfile.mkdtemp(prefix='avocado_' + __name__) + self.test_module = None @unittest.skipIf(int(os.environ.get("AVOCADO_CHECK_LEVEL", 0)) < 2, "Skipping test that take a long time to run, are " "resource intensive or time sensitve") - def test_badly_behaved(self): + def test_badly_behaved_sigint(self): """ Make sure avocado can cleanly get out of a loop of badly behaved tests. """ @@ -67,64 +117,86 @@ class InterruptTest(unittest.TestCase): bad_test = script.TemporaryScript(bad_test_basename, BAD_TEST, 'avocado_interrupt_test', mode=DEFAULT_MODE) - bad_test.save() - + self.test_module = bad_test.path os.chdir(basedir) - cmd_line = ('%s run --sysinfo=off --job-results-dir %s ' - '%s %s %s' % (AVOCADO, self.tmpdir, bad_test.path, - bad_test.path, bad_test.path)) - proc = aexpect.Expect(command=cmd_line, linesep='') - proc.read_until_last_line_matches(os.path.basename(bad_test.path)) - proc.sendline('\x03') - proc.read_until_last_line_matches('Interrupt requested. Waiting 2 ' - 'seconds for test to finish ' - '(ignoring new Ctrl+C until then)') - # We have to actually wait 2 seconds until the ignore window is over + cmd = ('%s run %s --sysinfo=off --job-results-dir %s ' % + (AVOCADO, self.test_module, self.tmpdir)) + self.proc = subprocess.Popen(cmd.split(), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + if not wait.wait_for(self._has_children, timeout=2): + process.kill_process_tree(self.proc.pid) + self.fail('Avocado did not start the test process.') + + # This test will ignore SIGINT, so it should terminate + # when we send the second SIGINT. + os.kill(self.proc.pid, signal.SIGINT) + # We have to actually wait 2+ seconds until + # the ignore window is over time.sleep(2.5) - proc.sendline('\x03') - proc.read_until_last_line_matches('JOB TIME : %d s') - wait.wait_for(lambda: not proc.is_alive(), timeout=1) + os.kill(self.proc.pid, signal.SIGINT) - # Make sure the bad test will be really gone from the process table - def wait_until_no_badtest(): - bad_test_processes = [] + if not wait.wait_for(self._is_finished, timeout=2): + process.kill_process_tree(self.proc.pid) + self.fail('Avocado was still running after receiving SIGINT ' + 'twice.') - old_psutil = False - try: - process_list = psutil.pids() - except AttributeError: - process_list = psutil.get_pid_list() - old_psutil = True - - for p in process_list: - try: - p_obj = psutil.Process(p) - if p_obj is not None: - if old_psutil: - cmdline_list = psutil.Process(p).cmdline - else: - try: - cmdline_list = psutil.Process(p).cmdline() - except psutil.AccessDenied: - cmdline_list = [] - if bad_test.path in " ".join(cmdline_list): - bad_test_processes.append(p_obj) - # psutil.NoSuchProcess happens when the original - # process already ended and left the process table - except psutil.NoSuchProcess: - pass - - return len(bad_test_processes) == 0 - - wait.wait_for(wait_until_no_badtest, timeout=2) + self.assertTrue(wait.wait_for(self._no_test_in_process_table, + timeout=2), 'Avocado left processes behind.') + + output = self.proc.stdout.read() + # Make sure the Interrupted requested sentence is there + self.assertIn('Interrupt requested. Waiting 2 seconds for test to ' + 'finish (ignoring new Ctrl+C until then)', output) # Make sure the Killing test subprocess message did appear - self.assertIn('Killing test subprocess', proc.get_output()) + self.assertIn('Killing test subprocess', output) + + @unittest.skipIf(int(os.environ.get("AVOCADO_CHECK_LEVEL", 0)) < 2, + "Skipping test that take a long time to run, are " + "resource intensive or time sensitve") + def test_badly_behaved_sigterm(self): + """ + Make sure avocado can cleanly get out of a loop of badly behaved tests. + """ + bad_test_basename = ('wontquit-%s' % + data_factory.generate_random_string(5)) + bad_test = script.TemporaryScript(bad_test_basename, BAD_TEST, + 'avocado_interrupt_test', + mode=DEFAULT_MODE) + bad_test.save() + self.test_module = bad_test.path + os.chdir(basedir) + cmd = ('%s run %s --sysinfo=off --job-results-dir %s ' % + (AVOCADO, self.test_module, self.tmpdir)) + self.proc = subprocess.Popen(cmd.split(), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + if not wait.wait_for(self._has_children, timeout=2): + process.kill_process_tree(self.proc.pid) + self.fail('Avocado did not start the test process.') + + # This test should be terminated when the main process + # receives a SIGTERM, even if the test process ignores SIGTERM. + os.kill(self.proc.pid, signal.SIGTERM) + + if not wait.wait_for(self._is_finished, timeout=2): + process.kill_process_tree(self.proc.pid) + self.fail('Avocado was still running after receiving SIGINT ' + 'twice.') + + self.assertTrue(wait.wait_for(self._no_test_in_process_table, + timeout=2), 'Avocado left processes behind.') + + # Make sure the Interrupted test sentence is there + self.assertIn('Terminated\n', self.proc.stdout.read()) @unittest.skipIf(int(os.environ.get("AVOCADO_CHECK_LEVEL", 0)) < 1, "Skipping test that take a long time to run, are " "resource intensive or time sensitve") - def test_well_behaved(self): + def test_well_behaved_sigint(self): """ Make sure avocado can cleanly get out of a loop of well behaved tests. """ @@ -134,55 +206,76 @@ class InterruptTest(unittest.TestCase): 'avocado_interrupt_test', mode=DEFAULT_MODE) good_test.save() - + self.test_module = good_test.path os.chdir(basedir) - cmd_line = ('%s run --sysinfo=off --job-results-dir %s ' - '%s %s %s' % (AVOCADO, self.tmpdir, good_test.path, - good_test.path, good_test.path)) - proc = aexpect.Expect(command=cmd_line, linesep='') - proc.read_until_last_line_matches(os.path.basename(good_test.path)) - proc.sendline('\x03') - proc.read_until_last_line_matches('JOB TIME : %d s') - wait.wait_for(lambda: not proc.is_alive(), timeout=1) - - # Make sure the good test will be really gone from the process table - def wait_until_no_goodtest(): - good_test_processes = [] - - old_psutil = False - try: - process_list = psutil.pids() - except AttributeError: - process_list = psutil.get_pid_list() - old_psutil = True - - for p in process_list: - try: - p_obj = psutil.Process(p) - if p_obj is not None: - if old_psutil: - cmdline_list = psutil.Process(p).cmdline - else: - try: - cmdline_list = psutil.Process(p).cmdline() - except psutil.AccessDenied: - cmdline_list = [] - if good_test.path in " ".join(cmdline_list): - good_test_processes.append(p_obj) - # psutil.NoSuchProcess happens when the original - # process already ended and left the process table - except psutil.NoSuchProcess: - pass - - return len(good_test_processes) == 0 - - wait.wait_for(wait_until_no_goodtest, timeout=2) - # Make sure the Killing test subprocess message is not there - self.assertNotIn('Killing test subprocess', proc.get_output()) + cmd = ('%s run %s --sysinfo=off --job-results-dir %s ' % + (AVOCADO, self.test_module, self.tmpdir)) + self.proc = subprocess.Popen(cmd.split(), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + if not wait.wait_for(self._has_children, timeout=2): + process.kill_process_tree(self.proc.pid) + self.fail('Avocado did not start the test process.') + + # This test will not ignore SIGINT, so it should + # terminate right away. + os.kill(self.proc.pid, signal.SIGINT) + + if not wait.wait_for(self._is_finished, timeout=2): + process.kill_process_tree(self.proc.pid) + self.fail('Avocado was still running after receiving SIGINT ' + 'twice.') + + self.assertTrue(wait.wait_for(self._no_test_in_process_table, + timeout=2), 'Avocado left processes behind.') + + output = self.proc.stdout.read() # Make sure the Interrupted requested sentence is there self.assertIn('Interrupt requested. Waiting 2 seconds for test to ' - 'finish (ignoring new Ctrl+C until then)', - proc.get_output()) + 'finish (ignoring new Ctrl+C until then)', output) + # Make sure the Killing test subprocess message is not there + self.assertNotIn('Killing test subprocess', output) + + @unittest.skipIf(int(os.environ.get("AVOCADO_CHECK_LEVEL", 0)) < 1, + "Skipping test that take a long time to run, are " + "resource intensive or time sensitve") + def test_well_behaved_sigterm(self): + """ + Make sure avocado can cleanly get out of a loop of well behaved tests. + """ + good_test_basename = ('goodtest-%s.py' % + data_factory.generate_random_string(5)) + good_test = script.TemporaryScript(good_test_basename, GOOD_TEST, + 'avocado_interrupt_test', + mode=DEFAULT_MODE) + good_test.save() + self.test_module = good_test.path + os.chdir(basedir) + cmd = ('%s run %s --sysinfo=off --job-results-dir %s ' % + (AVOCADO, self.test_module, self.tmpdir)) + self.proc = subprocess.Popen(cmd.split(), + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT) + + if not wait.wait_for(self._has_children, timeout=2): + process.kill_process_tree(self.proc.pid) + self.fail('Avocado did not start the test process.') + + # This test should be terminated when the main process + # receives a SIGTERM. + os.kill(self.proc.pid, signal.SIGTERM) + + if not wait.wait_for(self._is_finished, timeout=2): + process.kill_process_tree(self.proc.pid) + self.fail('Avocado was still running after receiving SIGINT ' + 'twice.') + + self.assertTrue(wait.wait_for(self._no_test_in_process_table, + timeout=2), 'Avocado left processes behind.') + + # Make sure the Interrupted test sentence is there + self.assertIn('Terminated\n', self.proc.stdout.read()) def tearDown(self): shutil.rmtree(self.tmpdir) -- GitLab