From 204ad7073859342a8cc08f2835346a2a68b456c6 Mon Sep 17 00:00:00 2001
From: Amador Pahim <apahim@redhat.com>
Date: Fri, 11 Aug 2017 14:04:01 +0200
Subject: [PATCH] Fix SIGINT/SIGTERM

The SIGINT/SIGTERM handlers are currently broken. When we execute a
SIMPLE TEST, which ignores SIGINT/SIGTERM, the test process will not
finish on SIGINT/SIGTERM, becoming orphan.

Reproducer::

    $ cat /tmp/test.py
    #!/usr/bin/env python
    import signal
    import time

    if __name__ == "__main__":
        signal.signal(signal.SIGINT, signal.SIG_IGN)
        signal.signal(signal.SIGTERM, signal.SIG_IGN)
        signal.signal(signal.SIGQUIT, signal.SIG_IGN)
        while True:
            time.sleep(0.1)

    $ avocado run /tmp/test.py

Then play with `Ctrl+C` (once and twice), with `kill -2 <pid>` and
`kill <pid>`. Check if Avocado hangs or whether any process is left
behind.

Also, the only test we have for those cases is supposed to catch the
issue with SIGINT (test_interrupt.InterruptTest.test_badly_behaved), but
it's not really working.

This patch fixes the SIGINT/SIGTERM actions, as follows:

- When a SIGINT is sent to the main main process, we send the same SIGINT
  to all the children hope for the best. This reproduces the behaviour
  on Ctrl+C.
- On a second SIGINT (after the 2s ignore period), meaning process is
  still running, we send a SIGKILL to all the children.
- When a SIGTERM is sent to the main process, we now send a SIGKILL to
  all the children (instead of a SIGTERM), so any subprocess ignoring
  SIGTERM will be terminated anyway.

Reference: https://trello.com/c/5RPZPH4F
Signed-off-by: Amador Pahim <apahim@redhat.com>
---
 avocado/core/app.py                    |   2 +-
 avocado/core/runner.py                 |   3 +-
 docs/source/ReferenceGuide.rst         |  30 +++
 selftests/functional/test_interrupt.py | 285 ++++++++++++++++---------
 4 files changed, 222 insertions(+), 98 deletions(-)
diff --git a/avocado/core/app.py b/avocado/core/app.py
index 75cb8123..47c38584 100644
--- a/avocado/core/app.py
+++ b/avocado/core/app.py
@@ -42,7 +42,7 @@ class AvocadoApp(object):
         def sigterm_handler(signum, frame):     # pylint: disable=W0613
             children = process.get_children_pids(os.getpid())
             for child in children:
-                process.kill_process_tree(int(child), sig=signal.SIGTERM)
+                process.kill_process_tree(int(child), sig=signal.SIGKILL)
             raise SystemExit('Terminated')
 
         signal.signal(signal.SIGTERM, sigterm_handler)
diff --git a/avocado/core/runner.py b/avocado/core/runner.py
index 0def7388..e6e06621 100644
--- a/avocado/core/runner.py
+++ b/avocado/core/runner.py
@@ -443,13 +443,14 @@ class TestRunner(object):
                                            ignore_window)
                         stage_1_msg_displayed = True
                     ignore_time_started = time.time()
+                    process.kill_process_tree(proc.pid, signal.SIGINT)
                 if (ctrl_c_count > 1) and (time_elapsed > ignore_window):
                     if not stage_2_msg_displayed:
                         abort_reason = "Interrupted by ctrl+c (multiple-times)"
                         self.job.log.debug("Killing test subprocess %s",
                                            proc.pid)
                         stage_2_msg_displayed = True
-                    os.kill(proc.pid, signal.SIGKILL)
+                    process.kill_process_tree(proc.pid, signal.SIGKILL)
 
         # Get/update the test status (decrease timeout on abort)
         if abort_reason:
diff --git a/docs/source/ReferenceGuide.rst b/docs/source/ReferenceGuide.rst
index e25ed1e1..7bb69b8a 100644
--- a/docs/source/ReferenceGuide.rst
+++ b/docs/source/ReferenceGuide.rst
@@ -653,4 +653,34 @@ be a valid one:
  * An end of string (or end of line) must immediately follow the
    content.
 
+.. _signal_handlers:
+
+Signal Handlers
+===============
+
+Avocado normal operation is related to run code written by
+users/test-writers. It means the test code can carry its own handlers
+for different signals or even ignore then. Still, as the code is being
+executed by Avocado, we have to make sure we will finish all the
+subprocesses we create before ending our execution.
+
+Signals sent to the Avocado main process will be handled as follows:
+
+- SIGSTP/Ctrl+Z: On SIGSTP, Avocado will pause the execution of the
+  subprocesses, while the main process will still be running,
+  respecting the timeout timer and waiting for the subprocesses to
+  finish. A new SIGSTP will make the subprocesses to resume the
+  execution.
+- SIGINT/Ctrl+C: This signal will be forwarded to the test process and
+  Avocado will wait until it's finished. If the test process does not
+  finish after receiving a SIGINT, user can send a second SIGINT (after
+  the 2 seconds ignore period). The second SIGINT will make Avocado
+  to send a SIGKILL to the whole subprocess tree and then complete the
+  main process execution.
+- SIGTERM: This signal will make Avocado to terminate immediately. A
+  SIGKILL will be sent to the whole subprocess tree and the main process
+  will exit without completing the execution. Notice that it's a
+  best-effort attempt, meaning that in case of fork-bomb, newly created
+  processes might still be left behind.
+
 .. [#f1] Avocado plugins can introduce additional test types.
diff --git a/selftests/functional/test_interrupt.py b/selftests/functional/test_interrupt.py
index 9e108648..84b729b5 100644
--- a/selftests/functional/test_interrupt.py
+++ b/selftests/functional/test_interrupt.py
@@ -1,13 +1,15 @@
 import os
 import tempfile
 import time
+import signal
 import shutil
 import stat
+import subprocess
 import unittest
 
-import aexpect
 import psutil
 
+from avocado.utils import process
 from avocado.utils import wait
 from avocado.utils import script
 from avocado.utils import data_factory
@@ -25,13 +27,20 @@ DEFAULT_MODE = (stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR |
                 stat.S_IROTH | stat.S_IXOTH)
 
 BAD_TEST = """#!/usr/bin/env python
+import multiprocessing
 import signal
 import time
 
+def foo():
+    while True:
+        time.sleep(0.1)
+
 if __name__ == "__main__":
     signal.signal(signal.SIGINT, signal.SIG_IGN)
     signal.signal(signal.SIGTERM, signal.SIG_IGN)
     signal.signal(signal.SIGQUIT, signal.SIG_IGN)
+    proc = multiprocessing.Process(target=foo)
+    proc.start()
     while True:
         time.sleep(0.1)
 """
@@ -52,13 +61,54 @@ if __name__ == "__main__":
 
 class InterruptTest(unittest.TestCase):
 
+    def _has_children(self):
+        return len(psutil.Process(self.proc.pid).children()) > 0
+
+    def _is_finished(self):
+        return self.proc.poll() is not None
+
+    def _no_test_in_process_table(self):
+        """
+        Make sure the test will be really gone from the
+        process table.
+        """
+        test_processes = []
+
+        old_psutil = False
+        try:
+            process_list = psutil.pids()
+        except AttributeError:
+            process_list = psutil.get_pid_list()
+            old_psutil = True
+
+        for p in process_list:
+            try:
+                p_obj = psutil.Process(p)
+                if p_obj is not None:
+                    if old_psutil:
+                        cmdline_list = psutil.Process(p).cmdline
+                    else:
+                        try:
+                            cmdline_list = psutil.Process(p).cmdline()
+                        except psutil.AccessDenied:
+                            cmdline_list = []
+                    if self.test_module in " ".join(cmdline_list):
+                        test_processes.append(p_obj)
+            # psutil.NoSuchProcess happens when the original
+            # process already ended and left the process table
+            except psutil.NoSuchProcess:
+                pass
+
+        return len(test_processes) == 0
+
     def setUp(self):
         self.tmpdir = tempfile.mkdtemp(prefix='avocado_' + __name__)
+        self.test_module = None
 
     @unittest.skipIf(int(os.environ.get("AVOCADO_CHECK_LEVEL", 0)) < 2,
                      "Skipping test that take a long time to run, are "
                      "resource intensive or time sensitve")
-    def test_badly_behaved(self):
+    def test_badly_behaved_sigint(self):
         """
         Make sure avocado can cleanly get out of a loop of badly behaved tests.
         """
@@ -67,64 +117,86 @@ class InterruptTest(unittest.TestCase):
         bad_test = script.TemporaryScript(bad_test_basename, BAD_TEST,
                                           'avocado_interrupt_test',
                                           mode=DEFAULT_MODE)
-
         bad_test.save()
-
+        self.test_module = bad_test.path
         os.chdir(basedir)
-        cmd_line = ('%s run --sysinfo=off --job-results-dir %s '
-                    '%s %s %s' % (AVOCADO, self.tmpdir, bad_test.path,
-                                  bad_test.path, bad_test.path))
-        proc = aexpect.Expect(command=cmd_line, linesep='')
-        proc.read_until_last_line_matches(os.path.basename(bad_test.path))
-        proc.sendline('\x03')
-        proc.read_until_last_line_matches('Interrupt requested. Waiting 2 '
-                                          'seconds for test to finish '
-                                          '(ignoring new Ctrl+C until then)')
-        # We have to actually wait 2 seconds until the ignore window is over
+        cmd = ('%s run %s --sysinfo=off --job-results-dir %s ' %
+               (AVOCADO, self.test_module, self.tmpdir))
+        self.proc = subprocess.Popen(cmd.split(),
+                                     stdout=subprocess.PIPE,
+                                     stderr=subprocess.STDOUT)
+
+        if not wait.wait_for(self._has_children, timeout=2):
+            process.kill_process_tree(self.proc.pid)
+            self.fail('Avocado did not start the test process.')
+
+        # This test will ignore SIGINT, so it should terminate
+        # when we send the second SIGINT.
+        os.kill(self.proc.pid, signal.SIGINT)
+        # We have to actually wait 2+ seconds until
+        # the ignore window is over
         time.sleep(2.5)
-        proc.sendline('\x03')
-        proc.read_until_last_line_matches('JOB TIME   : %d s')
-        wait.wait_for(lambda: not proc.is_alive(), timeout=1)
+        os.kill(self.proc.pid, signal.SIGINT)
 
-        # Make sure the bad test will be really gone from the process table
-        def wait_until_no_badtest():
-            bad_test_processes = []
+        if not wait.wait_for(self._is_finished, timeout=2):
+            process.kill_process_tree(self.proc.pid)
+            self.fail('Avocado was still running after receiving SIGINT '
+                      'twice.')
 
-            old_psutil = False
-            try:
-                process_list = psutil.pids()
-            except AttributeError:
-                process_list = psutil.get_pid_list()
-                old_psutil = True
-
-            for p in process_list:
-                try:
-                    p_obj = psutil.Process(p)
-                    if p_obj is not None:
-                        if old_psutil:
-                            cmdline_list = psutil.Process(p).cmdline
-                        else:
-                            try:
-                                cmdline_list = psutil.Process(p).cmdline()
-                            except psutil.AccessDenied:
-                                cmdline_list = []
-                        if bad_test.path in " ".join(cmdline_list):
-                            bad_test_processes.append(p_obj)
-                # psutil.NoSuchProcess happens when the original
-                # process already ended and left the process table
-                except psutil.NoSuchProcess:
-                    pass
-
-            return len(bad_test_processes) == 0
-
-        wait.wait_for(wait_until_no_badtest, timeout=2)
+        self.assertTrue(wait.wait_for(self._no_test_in_process_table,
+                        timeout=2), 'Avocado left processes behind.')
+
+        output = self.proc.stdout.read()
+        # Make sure the Interrupted requested sentence is there
+        self.assertIn('Interrupt requested. Waiting 2 seconds for test to '
+                      'finish (ignoring new Ctrl+C until then)', output)
         # Make sure the Killing test subprocess message did appear
-        self.assertIn('Killing test subprocess', proc.get_output())
+        self.assertIn('Killing test subprocess', output)
+
+    @unittest.skipIf(int(os.environ.get("AVOCADO_CHECK_LEVEL", 0)) < 2,
+                     "Skipping test that take a long time to run, are "
+                     "resource intensive or time sensitve")
+    def test_badly_behaved_sigterm(self):
+        """
+        Make sure avocado can cleanly get out of a loop of badly behaved tests.
+        """
+        bad_test_basename = ('wontquit-%s' %
+                             data_factory.generate_random_string(5))
+        bad_test = script.TemporaryScript(bad_test_basename, BAD_TEST,
+                                          'avocado_interrupt_test',
+                                          mode=DEFAULT_MODE)
+        bad_test.save()
+        self.test_module = bad_test.path
+        os.chdir(basedir)
+        cmd = ('%s run %s --sysinfo=off --job-results-dir %s ' %
+               (AVOCADO, self.test_module, self.tmpdir))
+        self.proc = subprocess.Popen(cmd.split(),
+                                     stdout=subprocess.PIPE,
+                                     stderr=subprocess.STDOUT)
+
+        if not wait.wait_for(self._has_children, timeout=2):
+            process.kill_process_tree(self.proc.pid)
+            self.fail('Avocado did not start the test process.')
+
+        # This test should be terminated when the main process
+        # receives a SIGTERM, even if the test process ignores SIGTERM.
+        os.kill(self.proc.pid, signal.SIGTERM)
+
+        if not wait.wait_for(self._is_finished, timeout=2):
+            process.kill_process_tree(self.proc.pid)
+            self.fail('Avocado was still running after receiving SIGINT '
+                      'twice.')
+
+        self.assertTrue(wait.wait_for(self._no_test_in_process_table,
+                        timeout=2), 'Avocado left processes behind.')
+
+        # Make sure the Interrupted test sentence is there
+        self.assertIn('Terminated\n', self.proc.stdout.read())
 
     @unittest.skipIf(int(os.environ.get("AVOCADO_CHECK_LEVEL", 0)) < 1,
                      "Skipping test that take a long time to run, are "
                      "resource intensive or time sensitve")
-    def test_well_behaved(self):
+    def test_well_behaved_sigint(self):
         """
         Make sure avocado can cleanly get out of a loop of well behaved tests.
         """
@@ -134,55 +206,76 @@ class InterruptTest(unittest.TestCase):
                                            'avocado_interrupt_test',
                                            mode=DEFAULT_MODE)
         good_test.save()
-
+        self.test_module = good_test.path
         os.chdir(basedir)
-        cmd_line = ('%s run --sysinfo=off --job-results-dir %s '
-                    '%s %s %s' % (AVOCADO, self.tmpdir, good_test.path,
-                                  good_test.path, good_test.path))
-        proc = aexpect.Expect(command=cmd_line, linesep='')
-        proc.read_until_last_line_matches(os.path.basename(good_test.path))
-        proc.sendline('\x03')
-        proc.read_until_last_line_matches('JOB TIME   : %d s')
-        wait.wait_for(lambda: not proc.is_alive(), timeout=1)
-
-        # Make sure the good test will be really gone from the process table
-        def wait_until_no_goodtest():
-            good_test_processes = []
-
-            old_psutil = False
-            try:
-                process_list = psutil.pids()
-            except AttributeError:
-                process_list = psutil.get_pid_list()
-                old_psutil = True
-
-            for p in process_list:
-                try:
-                    p_obj = psutil.Process(p)
-                    if p_obj is not None:
-                        if old_psutil:
-                            cmdline_list = psutil.Process(p).cmdline
-                        else:
-                            try:
-                                cmdline_list = psutil.Process(p).cmdline()
-                            except psutil.AccessDenied:
-                                cmdline_list = []
-                        if good_test.path in " ".join(cmdline_list):
-                            good_test_processes.append(p_obj)
-                # psutil.NoSuchProcess happens when the original
-                # process already ended and left the process table
-                except psutil.NoSuchProcess:
-                    pass
-
-            return len(good_test_processes) == 0
-
-        wait.wait_for(wait_until_no_goodtest, timeout=2)
-        # Make sure the Killing test subprocess message is not there
-        self.assertNotIn('Killing test subprocess', proc.get_output())
+        cmd = ('%s run %s --sysinfo=off --job-results-dir %s ' %
+               (AVOCADO, self.test_module, self.tmpdir))
+        self.proc = subprocess.Popen(cmd.split(),
+                                     stdout=subprocess.PIPE,
+                                     stderr=subprocess.STDOUT)
+
+        if not wait.wait_for(self._has_children, timeout=2):
+            process.kill_process_tree(self.proc.pid)
+            self.fail('Avocado did not start the test process.')
+
+        # This test will not ignore SIGINT, so it should
+        # terminate right away.
+        os.kill(self.proc.pid, signal.SIGINT)
+
+        if not wait.wait_for(self._is_finished, timeout=2):
+            process.kill_process_tree(self.proc.pid)
+            self.fail('Avocado was still running after receiving SIGINT '
+                      'twice.')
+
+        self.assertTrue(wait.wait_for(self._no_test_in_process_table,
+                        timeout=2), 'Avocado left processes behind.')
+
+        output = self.proc.stdout.read()
         # Make sure the Interrupted requested sentence is there
         self.assertIn('Interrupt requested. Waiting 2 seconds for test to '
-                      'finish (ignoring new Ctrl+C until then)',
-                      proc.get_output())
+                      'finish (ignoring new Ctrl+C until then)', output)
+        # Make sure the Killing test subprocess message is not there
+        self.assertNotIn('Killing test subprocess', output)
+
+    @unittest.skipIf(int(os.environ.get("AVOCADO_CHECK_LEVEL", 0)) < 1,
+                     "Skipping test that take a long time to run, are "
+                     "resource intensive or time sensitve")
+    def test_well_behaved_sigterm(self):
+        """
+        Make sure avocado can cleanly get out of a loop of well behaved tests.
+        """
+        good_test_basename = ('goodtest-%s.py' %
+                              data_factory.generate_random_string(5))
+        good_test = script.TemporaryScript(good_test_basename, GOOD_TEST,
+                                           'avocado_interrupt_test',
+                                           mode=DEFAULT_MODE)
+        good_test.save()
+        self.test_module = good_test.path
+        os.chdir(basedir)
+        cmd = ('%s run %s --sysinfo=off --job-results-dir %s ' %
+               (AVOCADO, self.test_module, self.tmpdir))
+        self.proc = subprocess.Popen(cmd.split(),
+                                     stdout=subprocess.PIPE,
+                                     stderr=subprocess.STDOUT)
+
+        if not wait.wait_for(self._has_children, timeout=2):
+            process.kill_process_tree(self.proc.pid)
+            self.fail('Avocado did not start the test process.')
+
+        # This test should be terminated when the main process
+        # receives a SIGTERM.
+        os.kill(self.proc.pid, signal.SIGTERM)
+
+        if not wait.wait_for(self._is_finished, timeout=2):
+            process.kill_process_tree(self.proc.pid)
+            self.fail('Avocado was still running after receiving SIGINT '
+                      'twice.')
+
+        self.assertTrue(wait.wait_for(self._no_test_in_process_table,
+                        timeout=2), 'Avocado left processes behind.')
+
+        # Make sure the Interrupted test sentence is there
+        self.assertIn('Terminated\n', self.proc.stdout.read())
 
     def tearDown(self):
         shutil.rmtree(self.tmpdir)
-- 
GitLab