未验证 提交 55baeced 编写于 作者: C chengduo 提交者: GitHub

Enhance execution error info (#18482)

* enhance execution error info
test=develop
上级 047bba85
......@@ -13,6 +13,8 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include <string.h> // for strdup
#include <algorithm>
#include <fstream>
#include <iostream>
#include <memory>
#include <set>
#include <stdexcept>
......@@ -30,7 +32,6 @@ limitations under the License. */
#include "paddle/fluid/platform/init.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/piece.h"
#if defined(PADDLE_WITH_DGC)
#include "dgc/dgc.h"
#endif
......@@ -202,10 +203,27 @@ void InitDevices(bool init_p2p, const std::vector<int> devices) {
#endif
}
#ifndef _WIN32
static void SignalHandle(const char *data, int size) {
auto file_path = string::Sprintf("/tmp/paddle.%d.dump_info", ::getpid());
try {
std::ofstream dump_info;
dump_info.open(file_path, std::ios::app);
dump_info << std::string(data, size);
dump_info.close();
} catch (...) {
}
}
#endif
void InitGLOG(const std::string &prog_name) {
// glog will not hold the ARGV[0] inside.
// Use strdup to alloc a new string.
google::InitGoogleLogging(strdup(prog_name.c_str()));
#ifndef _WIN32
google::InstallFailureSignalHandler();
google::InstallFailureWriter(&SignalHandle);
#endif
}
#if defined(PADDLE_WITH_DGC)
......
......@@ -17,6 +17,7 @@ from __future__ import print_function
import logging
import os
import multiprocessing
import sys
import numpy as np
from .wrapped_decorator import signature_safe_contextmanager
import six
......@@ -627,6 +628,23 @@ class Executor(object):
list(numpy.array): fetch result according to fetch_list.
"""
try:
return self._run_impl(
program=program,
feed=feed,
fetch_list=fetch_list,
feed_var_name=feed_var_name,
fetch_var_name=fetch_var_name,
scope=scope,
return_numpy=return_numpy,
use_program_cache=use_program_cache)
except Exception as e:
if not isinstance(e, core.EOFException):
print("An exception was thrown!\n {}".format(str(e)))
raise e
def _run_impl(self, program, feed, fetch_list, feed_var_name,
fetch_var_name, scope, return_numpy, use_program_cache):
if self._closed:
raise RuntimeError("Attempted to use a closed Executor")
......@@ -639,7 +657,7 @@ class Executor(object):
compiled = isinstance(program, compiler.CompiledProgram)
# For backward compatibility, run directly.
if not compiled:
return self._run(
return self._run_program(
program,
self._default_executor,
feed=feed,
......@@ -672,7 +690,7 @@ class Executor(object):
# TODO(panyx0718): executor should be able to run graph.
assert program._program, "CompiledProgram is compiled from graph, can only run with_data_parallel."
# use_program_cache is not valid with CompiledProgram
return self._run(
return self._run_program(
program._program,
self._default_executor,
feed=feed,
......@@ -683,7 +701,7 @@ class Executor(object):
return_numpy=return_numpy,
use_program_cache=False)
def _run(self, program, exe, feed, fetch_list, feed_var_name,
def _run_program(self, program, exe, feed, fetch_list, feed_var_name,
fetch_var_name, scope, return_numpy, use_program_cache):
if feed is None:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册