diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index ac6988d350f4f38c6e8da2a655c29069b8d0eda6..3cb727c1e0eae1bfe541af9a84a9ba8b7642ee06 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ +#include #include #include @@ -245,15 +246,16 @@ void InitDevices(const std::vector devices) { // Description Quoted from // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/signal.h.html const struct { + int signal_number; const char *name; const char *error_string; } SignalErrorStrings[] = { - {"SIGSEGV", "Segmentation fault"}, - {"SIGILL", "Illegal instruction"}, - {"SIGFPE", "Erroneous arithmetic operation"}, - {"SIGABRT", "Process abort signal"}, - {"SIGBUS", "Access to an undefined portion of a memory object"}, - {"SIGTERM", "Termination signal"}, + {SIGSEGV, "SIGSEGV", "Segmentation fault"}, + {SIGILL, "SIGILL", "Illegal instruction"}, + {SIGFPE, "SIGFPE", "Erroneous arithmetic operation"}, + {SIGABRT, "SIGABRT", "Process abort signal"}, + {SIGBUS, "SIGBUS", "Access to an undefined portion of a memory object"}, + {SIGTERM, "SIGTERM", "Termination signal"}, }; bool StartsWith(const char *str, const char *prefix) { @@ -309,7 +311,21 @@ void SignalHandle(const char *data, int size) { // will Kill program by the default signal handler } } +#endif // _WIN32 + +void DisableSignalHandler() { +#ifndef _WIN32 + for (size_t i = 0; + i < (sizeof(SignalErrorStrings) / sizeof(*(SignalErrorStrings))); ++i) { + int signal_number = SignalErrorStrings[i].signal_number; + struct sigaction sig_action; + memset(&sig_action, 0, sizeof(sig_action)); + sigemptyset(&sig_action.sa_mask); + sig_action.sa_handler = SIG_DFL; + sigaction(signal_number, &sig_action, NULL); + } #endif +} #ifdef WITH_WIN_DUMP_DBG typedef BOOL(WINAPI *MINIDUMP_WRITE_DUMP)( diff --git a/paddle/fluid/platform/init.h b/paddle/fluid/platform/init.h index cd5ef843fa8f7d436b9ca10367e46cee6d2822cb..b52456b19ac662c0573d0cca384fe42057062742 100644 --- a/paddle/fluid/platform/init.h +++ b/paddle/fluid/platform/init.h @@ -61,5 +61,7 @@ class SignalMessageDumper { void SignalHandle(const char* data, int size); #endif +void DisableSignalHandler(); + } // namespace framework } // namespace paddle diff --git a/paddle/fluid/pybind/pybind.cc b/paddle/fluid/pybind/pybind.cc index fd4ae63265366a27a090fed4ab694bae6ef261d4..bb3ad6bbb16455959c13a14a71fe95cf68b3715a 100644 --- a/paddle/fluid/pybind/pybind.cc +++ b/paddle/fluid/pybind/pybind.cc @@ -477,6 +477,8 @@ PYBIND11_MODULE(core_noavx, m) { m.def("set_num_threads", &platform::SetNumThreads); + m.def("disable_signal_handler", &DisableSignalHandler); + #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) m.def("cudnn_version", &platform::CudnnVersion); #endif diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 44982eb3be036ac462194396b5d58207223d6455..f00d6170fb5607e85d06f79e5399177a4406ac9b 100755 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -255,6 +255,7 @@ from .device import set_device # noqa: F401 from .device import get_device # noqa: F401 from .fluid.framework import is_compiled_with_cuda # noqa: F401 from .fluid.framework import is_compiled_with_rocm # noqa: F401 +from .fluid.framework import disable_signal_handler # noqa: F401 from .device import is_compiled_with_xpu # noqa: F401 from .device import is_compiled_with_npu # noqa: F401 from .device import XPUPlace # noqa: F401 @@ -457,6 +458,7 @@ __all__ = [ # noqa 'enable_static', 'scatter_nd', 'set_default_dtype', + 'disable_signal_handler', 'expand_as', 'stack', 'sqrt', diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index 5e431eaa4e7e6f0153591edef9f602641c4e4f98..cbc963f108fca81c28c421b30ab628988e745523 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -384,6 +384,31 @@ def is_compiled_with_xpu(): return core.is_compiled_with_xpu() +def disable_signal_handler(): + """ + Reset signal handler registered by Paddle. + + Paddle installs signal handlers at C++ level to log debug information upon failing. + However, conflicts can happen if another python module is making use of such signal. + Such being the case, one may disblae paddle signal handler via this interface. + + Known frameworks that require disabling signal handler includes: + 1. TVM + 2. ADLIK + + Make sure you called paddle.disable_signal_handler() before using above mentioned frameworks. + + Returns: None + + Examples: + .. code-block:: python + + import paddle + paddle.disable_signal_handler() + """ + core.disable_signal_handler() + + def is_compiled_with_cuda(): """ Whether this whl package can be used to run the model on GPU. diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 8341e9b93e67c7ebdc9b8c5e1a5a0df89d2c086b..e304b29185fbe8b7b7f87d315eeaab749ac387c3 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -103,6 +103,8 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32) LIST(REMOVE_ITEM TEST_OPS test_collective_wait) LIST(REMOVE_ITEM TEST_OPS test_memcpy_op) LIST(REMOVE_ITEM TEST_OPS test_raw_program_optimizer) + LIST(REMOVE_ITEM TEST_OPS test_fleet_gradient_scale) + LIST(REMOVE_ITEM TEST_OPS test_disable_signal_handler) endif() if(WIN32) diff --git a/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py b/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py new file mode 100644 index 0000000000000000000000000000000000000000..dbe9dcb7f823d76c6b89bb27dac047fed331f41f --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_disable_signal_handler.py @@ -0,0 +1,48 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np +import signal, os +import paddle +import subprocess + +SignalsToTest = { + signal.SIGTERM, signal.SIGBUS, signal.SIGABRT, signal.SIGSEGV, + signal.SIGILL, signal.SIGFPE +} + + +class TestSignOpError(unittest.TestCase): + def test_errors(self): + try: + for sig in SignalsToTest: + output = subprocess.check_output( + [ + "python", "-c", + f"import paddle; import signal,os; paddle.disable_signal_handler(); os.kill(os.getpid(), {sig})" + ], + stderr=subprocess.STDOUT) + except Exception as e: + # If paddle signal handler is enabled + # One would expect "paddle::framework::SignalHandle" in STDERR + stdout_message = str(e.output) + if "paddle::framework::SignalHandle" in stdout_message: + raise Exception("Paddle signal handler not disabled") + + +if __name__ == "__main__": + unittest.main()