diff --git a/paddle/fluid/platform/init.cc b/paddle/fluid/platform/init.cc index 8e8160fe6155fea5990aa7627190331df956f336..8a28292fb7cf4cde4411c77b25dc80c8d3d4a268 100644 --- a/paddle/fluid/platform/init.cc +++ b/paddle/fluid/platform/init.cc @@ -229,25 +229,66 @@ void InitDevices(bool init_p2p, const std::vector devices) { } #ifndef _WIN32 +// Description Quoted from +// https://pubs.opengroup.org/onlinepubs/9699919799/basedefs/signal.h.html +const struct { + const char *name; + const char *error_string; +} SignalErrorStrings[] = { + {"SIGSEGV", "Segmentation fault"}, + {"SIGILL", "Illegal instruction"}, + {"SIGFPE", "Erroneous arithmetic operation"}, + {"SIGABRT", "Process abort signal"}, + {"SIGBUS", "Access to an undefined portion of a memory object"}, + {"SIGTERM", "Termination signal"}, +}; + +bool StartsWith(const char *str, const char *prefix) { + size_t len_prefix = strlen(prefix); + size_t len_str = strlen(str); + return len_str < len_prefix ? false : memcmp(prefix, str, len_prefix) == 0; +} + +const char *ParseSignalErrorString(const std::string &str) { + for (size_t i = 0; + i < (sizeof(SignalErrorStrings) / sizeof(*(SignalErrorStrings))); ++i) { + if (std::string::npos != str.find(SignalErrorStrings[i].name)) { + return SignalErrorStrings[i].error_string; + } + } + return "Unknown signal"; +} + +// Handle SIGSEGV, SIGILL, SIGFPE, SIGABRT, SIGBUS, and SIGTERM. +std::ostringstream signal_msg_dumper; void SignalHandle(const char *data, int size) { - auto file_path = string::Sprintf("/tmp/paddle.%d.dump_info", ::getpid()); try { - // The signal is coming line by line but we print general guide just once - std::call_once(glog_warning_once_flag, [&]() { - LOG(WARNING) << "Warning: PaddlePaddle catches a failure signal, it may " - "not work properly\n"; - LOG(WARNING) << "You could check whether you killed PaddlePaddle " - "thread/process accidentally or report the case to " - "PaddlePaddle\n"; - LOG(WARNING) << "The detail failure signal is:\n\n"; - }); - - LOG(WARNING) << std::string(data, size); - std::ofstream dump_info; - dump_info.open(file_path, std::ios::app); - dump_info << std::string(data, size); - dump_info.close(); + // NOTE1: The glog FailureSignalHandler dumped messages + // are deal with line by line + // NOTE2: we only deal with the time info ane signal info, + // the stack trace will generated by paddle self + if (StartsWith(data, "*** Aborted at")) { + signal_msg_dumper << " [TimeInfo: " << std::string(data, size - 1) + << "]\n"; + } else if (StartsWith(data, "***")) { + std::string signal_info(data, size - 1); + std::string useless_substr("; stack trace:"); + size_t start_pos = signal_info.rfind(useless_substr); + signal_info.replace(start_pos, useless_substr.length(), ""); + signal_msg_dumper << " [SignalInfo: " << signal_info << "]\n"; + // NOTE3: Here does not throw an exception, + // otherwise it will casue "terminate called recursively" + auto exp = platform::EnforceNotMet( + platform::errors::Fatal( + "A serious error (%s) is detected by the operating system.", + ParseSignalErrorString(signal_info)), + __FILE__, __LINE__); + std::cout << exp.what() << signal_msg_dumper.str() << std::endl; + } } catch (...) { + // Since the program has already triggered a system error, + // no further processing is required here, glog FailureSignalHandler + // will Kill program by the default signal handler } } #endif