test_imperative_signal_handler.py 5.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
import signal
import unittest
import multiprocessing
import time

import paddle.compat as cpt
from paddle.fluid import core
24
from paddle.fluid.framework import _test_eager_guard
25 26 27


def set_child_signal_handler(self, child_pid):
28
    core._set_process_pids(id(self), tuple([child_pid]))
29 30 31 32 33 34 35 36 37 38 39 40
    current_handler = signal.getsignal(signal.SIGCHLD)
    if not callable(current_handler):
        current_handler = None

    def __handler__(signum, frame):
        core._throw_error_if_process_failed()
        if current_handler is not None:
            current_handler(signum, frame)

    signal.signal(signal.SIGCHLD, __handler__)


41
class DygraphDataLoaderSingalHandler(unittest.TestCase):
42

43
    def func_child_process_exit_with_error(self):
44

45 46
        def __test_process__():
            core._set_process_signal_handler()
47
            sys.exit(1)
48

49 50 51 52 53
        def try_except_exit():
            exception = None
            try:
                test_process = multiprocessing.Process(target=__test_process__)
                test_process.start()
54

55 56 57 58 59 60 61 62 63 64 65 66 67
                set_child_signal_handler(id(self), test_process.pid)
                time.sleep(5)
            except SystemError as ex:
                self.assertIn("Fatal", cpt.get_exception_message(ex))
                exception = ex
            return exception

        try_time = 10
        exception = None
        for i in range(try_time):
            exception = try_except_exit()
            if exception is not None:
                break
68 69 70

        self.assertIsNotNone(exception)

71 72 73 74 75 76
    def test_child_process_exit_with_error(self):
        with _test_eager_guard():
            self.func_child_process_exit_with_error()
        self.func_child_process_exit_with_error()

    def func_child_process_killed_by_sigsegv(self):
77

78 79 80 81
        def __test_process__():
            core._set_process_signal_handler()
            os.kill(os.getpid(), signal.SIGSEGV)

82 83 84 85 86
        def try_except_exit():
            exception = None
            try:
                test_process = multiprocessing.Process(target=__test_process__)
                test_process.start()
87

88 89 90 91 92 93 94 95 96 97 98 99 100 101
                set_child_signal_handler(id(self), test_process.pid)
                time.sleep(5)
            except SystemError as ex:
                self.assertIn("Segmentation fault",
                              cpt.get_exception_message(ex))
                exception = ex
            return exception

        try_time = 10
        exception = None
        for i in range(try_time):
            exception = try_except_exit()
            if exception is not None:
                break
102 103 104

        self.assertIsNotNone(exception)

105 106 107 108 109 110
    def test_child_process_killed_by_sigsegv(self):
        with _test_eager_guard():
            self.func_child_process_killed_by_sigsegv()
        self.func_child_process_killed_by_sigsegv()

    def func_child_process_killed_by_sigbus(self):
111

112 113 114 115
        def __test_process__():
            core._set_process_signal_handler()
            os.kill(os.getpid(), signal.SIGBUS)

116 117 118 119 120
        def try_except_exit():
            exception = None
            try:
                test_process = multiprocessing.Process(target=__test_process__)
                test_process.start()
121

122 123 124 125 126 127 128 129 130 131 132 133 134
                set_child_signal_handler(id(self), test_process.pid)
                time.sleep(5)
            except SystemError as ex:
                self.assertIn("Bus error", cpt.get_exception_message(ex))
                exception = ex
            return exception

        try_time = 10
        exception = None
        for i in range(try_time):
            exception = try_except_exit()
            if exception is not None:
                break
135 136 137

        self.assertIsNotNone(exception)

138 139 140 141 142 143
    def test_child_process_killed_by_sigbus(self):
        with _test_eager_guard():
            self.func_child_process_killed_by_sigbus()
        self.func_child_process_killed_by_sigbus()

    def func_child_process_killed_by_sigterm(self):
144

145 146 147 148 149 150 151 152 153 154 155
        def __test_process__():
            core._set_process_signal_handler()
            time.sleep(10)

        test_process = multiprocessing.Process(target=__test_process__)
        test_process.daemon = True
        test_process.start()

        set_child_signal_handler(id(self), test_process.pid)
        time.sleep(1)

156 157 158 159 160
    def test_child_process_killed_by_sigterm(self):
        with _test_eager_guard():
            self.func_child_process_killed_by_sigterm()
        self.func_child_process_killed_by_sigterm()

161 162 163

if __name__ == '__main__':
    unittest.main()