未验证 提交 dd533dd3 编写于 作者: Z Zhanlue Yang 提交者: GitHub

Add function to disable paddle signal handler (#34577)

* Add function to disable paddle signal handler

Paddle used google::InstallFaultSignalHandler to handle selected system signals,
mainly for debugging and bug report purposes.

However, this can be conflicted with other python packages whoever captures similar signals.
Such python package involves tvm and more

To resolve this issue, we support a function to disable signal handler

* Remove signal test from WIN32 platform

* Remove redundant return from disable_signal_handler() function

* Add detailed messages to en_doc
上级 2e9a31eb
...@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS, ...@@ -11,6 +11,7 @@ distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <csignal>
#include <fstream> #include <fstream>
#include <string> #include <string>
...@@ -245,15 +246,16 @@ void InitDevices(const std::vector<int> devices) { ...@@ -245,15 +246,16 @@ void InitDevices(const std::vector<int> devices) {
// Description Quoted from // Description Quoted from
// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/signal.h.html // https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/signal.h.html
const struct { const struct {
int signal_number;
const char *name; const char *name;
const char *error_string; const char *error_string;
} SignalErrorStrings[] = { } SignalErrorStrings[] = {
{"SIGSEGV", "Segmentation fault"}, {SIGSEGV, "SIGSEGV", "Segmentation fault"},
{"SIGILL", "Illegal instruction"}, {SIGILL, "SIGILL", "Illegal instruction"},
{"SIGFPE", "Erroneous arithmetic operation"}, {SIGFPE, "SIGFPE", "Erroneous arithmetic operation"},
{"SIGABRT", "Process abort signal"}, {SIGABRT, "SIGABRT", "Process abort signal"},
{"SIGBUS", "Access to an undefined portion of a memory object"}, {SIGBUS, "SIGBUS", "Access to an undefined portion of a memory object"},
{"SIGTERM", "Termination signal"}, {SIGTERM, "SIGTERM", "Termination signal"},
}; };
bool StartsWith(const char *str, const char *prefix) { bool StartsWith(const char *str, const char *prefix) {
...@@ -319,7 +321,21 @@ void SignalHandle(const char *data, int size) { ...@@ -319,7 +321,21 @@ void SignalHandle(const char *data, int size) {
// will Kill program by the default signal handler // will Kill program by the default signal handler
} }
} }
#endif // _WIN32
void DisableSignalHandler() {
#ifndef _WIN32
for (size_t i = 0;
i < (sizeof(SignalErrorStrings) / sizeof(*(SignalErrorStrings))); ++i) {
int signal_number = SignalErrorStrings[i].signal_number;
struct sigaction sig_action;
memset(&sig_action, 0, sizeof(sig_action));
sigemptyset(&sig_action.sa_mask);
sig_action.sa_handler = SIG_DFL;
sigaction(signal_number, &sig_action, NULL);
}
#endif #endif
}
#ifdef WITH_WIN_DUMP_DBG #ifdef WITH_WIN_DUMP_DBG
typedef BOOL(WINAPI *MINIDUMP_WRITE_DUMP)( typedef BOOL(WINAPI *MINIDUMP_WRITE_DUMP)(
......
...@@ -61,5 +61,7 @@ class SignalMessageDumper { ...@@ -61,5 +61,7 @@ class SignalMessageDumper {
void SignalHandle(const char* data, int size); void SignalHandle(const char* data, int size);
#endif #endif
void DisableSignalHandler();
} // namespace framework } // namespace framework
} // namespace paddle } // namespace paddle
...@@ -507,6 +507,8 @@ PYBIND11_MODULE(core_noavx, m) { ...@@ -507,6 +507,8 @@ PYBIND11_MODULE(core_noavx, m) {
m.def("set_num_threads", &platform::SetNumThreads); m.def("set_num_threads", &platform::SetNumThreads);
m.def("disable_signal_handler", &DisableSignalHandler);
#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP)
m.def("cudnn_version", &platform::CudnnVersion); m.def("cudnn_version", &platform::CudnnVersion);
#endif #endif
......
...@@ -272,6 +272,7 @@ from .device import set_device # noqa: F401 ...@@ -272,6 +272,7 @@ from .device import set_device # noqa: F401
from .device import get_device # noqa: F401 from .device import get_device # noqa: F401
from .fluid.framework import is_compiled_with_cuda # noqa: F401 from .fluid.framework import is_compiled_with_cuda # noqa: F401
from .fluid.framework import is_compiled_with_rocm # noqa: F401 from .fluid.framework import is_compiled_with_rocm # noqa: F401
from .fluid.framework import disable_signal_handler # noqa: F401
from .device import is_compiled_with_xpu # noqa: F401 from .device import is_compiled_with_xpu # noqa: F401
from .device import is_compiled_with_npu # noqa: F401 from .device import is_compiled_with_npu # noqa: F401
from .device import XPUPlace # noqa: F401 from .device import XPUPlace # noqa: F401
...@@ -485,6 +486,7 @@ __all__ = [ # noqa ...@@ -485,6 +486,7 @@ __all__ = [ # noqa
'enable_static', 'enable_static',
'scatter_nd', 'scatter_nd',
'set_default_dtype', 'set_default_dtype',
'disable_signal_handler',
'expand_as', 'expand_as',
'stack', 'stack',
'sqrt', 'sqrt',
......
...@@ -395,6 +395,31 @@ def is_compiled_with_xpu(): ...@@ -395,6 +395,31 @@ def is_compiled_with_xpu():
return core.is_compiled_with_xpu() return core.is_compiled_with_xpu()
def disable_signal_handler():
"""
Reset signal handler registered by Paddle.
Paddle installs signal handlers at C++ level to log debug information upon failing.
However, conflicts can happen if another python module is making use of such signal.
Such being the case, one may disblae paddle signal handler via this interface.
Known frameworks that require disabling signal handler includes:
1. TVM
2. ADLIK
Make sure you called paddle.disable_signal_handler() before using above mentioned frameworks.
Returns: None
Examples:
.. code-block:: python
import paddle
paddle.disable_signal_handler()
"""
core.disable_signal_handler()
def is_compiled_with_cuda(): def is_compiled_with_cuda():
""" """
Whether this whl package can be used to run the model on GPU. Whether this whl package can be used to run the model on GPU.
......
...@@ -113,6 +113,7 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32) ...@@ -113,6 +113,7 @@ if(((NOT WITH_ROCM) AND (NOT WITH_GPU)) OR WIN32)
LIST(REMOVE_ITEM TEST_OPS test_memcpy_op) LIST(REMOVE_ITEM TEST_OPS test_memcpy_op)
LIST(REMOVE_ITEM TEST_OPS test_raw_program_optimizer) LIST(REMOVE_ITEM TEST_OPS test_raw_program_optimizer)
LIST(REMOVE_ITEM TEST_OPS test_fleet_gradient_scale) LIST(REMOVE_ITEM TEST_OPS test_fleet_gradient_scale)
LIST(REMOVE_ITEM TEST_OPS test_disable_signal_handler)
endif() endif()
if(WIN32) if(WIN32)
......
# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import signal, os
import paddle
import subprocess
SignalsToTest = {
signal.SIGTERM, signal.SIGBUS, signal.SIGABRT, signal.SIGSEGV,
signal.SIGILL, signal.SIGFPE
}
class TestSignOpError(unittest.TestCase):
def test_errors(self):
try:
for sig in SignalsToTest:
output = subprocess.check_output(
[
"python", "-c",
f"import paddle; import signal,os; paddle.disable_signal_handler(); os.kill(os.getpid(), {sig})"
],
stderr=subprocess.STDOUT)
except Exception as e:
# If paddle signal handler is enabled
# One would expect "paddle::framework::SignalHandle" in STDERR
stdout_message = str(e.output)
if "paddle::framework::SignalHandle" in stdout_message:
raise Exception("Paddle signal handler not disabled")
if __name__ == "__main__":
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册