未验证 提交 0fbb76f6 编写于 作者: W wopeizl 提交者: GitHub

Merge pull request #15204 from wopeizl/debug/support

add the python callstack for debug support test=develop
...@@ -82,6 +82,10 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, ...@@ -82,6 +82,10 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto,
AddAttr<std::string>(OpNamescopeAttrName(), "Operator name with namesope.") AddAttr<std::string>(OpNamescopeAttrName(), "Operator name with namesope.")
.SetDefault(""); .SetDefault("");
AddAttr<std::vector<std::string>>(OpCreationCallstackAttrName(),
"Callstack for Op Creatation.")
.SetDefault({});
Validate(); Validate();
} }
......
...@@ -47,6 +47,7 @@ class OpProtoAndCheckerMaker { ...@@ -47,6 +47,7 @@ class OpProtoAndCheckerMaker {
static const char *OpRoleAttrName() { return "op_role"; } static const char *OpRoleAttrName() { return "op_role"; }
static const char *OpRoleVarAttrName() { return "op_role_var"; } static const char *OpRoleVarAttrName() { return "op_role_var"; }
static const char *OpNamescopeAttrName() { return "op_namescope"; } static const char *OpNamescopeAttrName() { return "op_namescope"; }
static const char *OpCreationCallstackAttrName() { return "op_callstack"; }
void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker); void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker);
......
...@@ -16,9 +16,15 @@ limitations under the License. */ ...@@ -16,9 +16,15 @@ limitations under the License. */
#include <glog/logging.h> #include <glog/logging.h>
#include <algorithm> #include <algorithm>
#include <sstream>
#include <string>
#include <vector>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/data_transform.h"
#include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/lod_tensor.h" #include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_proto_maker.h"
#include "paddle/fluid/framework/operator.h" #include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/shape_inference.h" #include "paddle/fluid/framework/shape_inference.h"
#include "paddle/fluid/framework/transfer_scope_cache.h" #include "paddle/fluid/framework/transfer_scope_cache.h"
...@@ -156,27 +162,55 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames, ...@@ -156,27 +162,55 @@ RuntimeContext::RuntimeContext(const VariableNameMap& innames,
} }
void OperatorBase::Run(const Scope& scope, const platform::Place& place) { void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
VLOG(4) << place << " " << DebugStringEx(&scope); try {
if (platform::is_gpu_place(place)) { VLOG(4) << place << " " << DebugStringEx(&scope);
if (platform::is_gpu_place(place)) {
#ifndef PADDLE_WITH_CUDA #ifndef PADDLE_WITH_CUDA
PADDLE_THROW("Cannot run operator on place %s", place); PADDLE_THROW("Cannot run operator on place %s", place);
#else #else
auto dev_id = boost::get<platform::CUDAPlace>(place).device; auto dev_id = boost::get<platform::CUDAPlace>(place).device;
platform::SetDeviceId(dev_id); platform::SetDeviceId(dev_id);
#endif #endif
} }
// The profile has a process-wide mutex, results in serious performance issue // The profile has a process-wide mutex, results in serious performance
// in concurrency scenerio. Here use an `if` to fix this issue. // issue
// Please not remove the `if`, ask @Superjomn if there are any concern. // in concurrency scenerio. Here use an `if` to fix this issue.
if (platform::IsProfileEnabled()) { // Please not remove the `if`, ask @Superjomn if there are any concern.
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); if (platform::IsProfileEnabled()) {
platform::RecordEvent record_event(Type(), pool.Get(place)); platform::DeviceContextPool& pool =
RunImpl(scope, place); platform::DeviceContextPool::Instance();
} else { platform::RecordEvent record_event(Type(), pool.Get(place));
RunImpl(scope, place); RunImpl(scope, place);
} else {
RunImpl(scope, place);
}
VLOG(3) << place << " " << DebugStringEx(&scope);
} catch (platform::EnforceNotMet exception) {
if (Attrs().count("sub_block") != 0) {
throw exception;
}
auto& callstack = Attr<std::vector<std::string>>(
OpProtoAndCheckerMaker::OpCreationCallstackAttrName());
if (callstack.empty()) {
throw exception;
}
std::ostringstream sout;
sout << "Invoke operator " << Type() << " error.\n";
sout << "Python Callstacks: \n";
for (auto& line : callstack) {
sout << line;
}
sout << "C++ Callstacks: \n";
sout << exception.err_str_;
exception.err_str_ = sout.str();
throw exception;
} catch (...) {
std::rethrow_exception(std::current_exception());
} }
VLOG(3) << place << " " << DebugStringEx(&scope);
} }
bool OperatorBase::HasInputs(const std::string& name) const { bool OperatorBase::HasInputs(const std::string& name) const {
......
...@@ -263,6 +263,10 @@ inline void throw_on_error(T e) { ...@@ -263,6 +263,10 @@ inline void throw_on_error(T e) {
#define __THROW_ON_ERROR_ONE_ARG(COND, ARG) \ #define __THROW_ON_ERROR_ONE_ARG(COND, ARG) \
::paddle::platform::throw_on_error(COND, ::paddle::string::Sprintf(ARG)); ::paddle::platform::throw_on_error(COND, ::paddle::string::Sprintf(ARG));
#ifdef _WIN32
#define __PADDLE_THROW_ON_ERROR(COND, ...) \
__THROW_ON_ERROR_ONE_ARG(COND, __VA_ARGS__)
#else // _WIN32
#define __PADDLE_THROW_ON_ERROR(COND, ...) \ #define __PADDLE_THROW_ON_ERROR(COND, ...) \
__PADDLE_THROW_ERROR_I( \ __PADDLE_THROW_ERROR_I( \
__VA_ARGS__, ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \ __VA_ARGS__, ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
...@@ -274,6 +278,7 @@ inline void throw_on_error(T e) { ...@@ -274,6 +278,7 @@ inline void throw_on_error(T e) {
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \ ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
::paddle::platform::throw_on_error(COND, __VA_ARGS__), \ ::paddle::platform::throw_on_error(COND, __VA_ARGS__), \
__THROW_ON_ERROR_ONE_ARG(COND, __VA_ARGS__)) __THROW_ON_ERROR_ONE_ARG(COND, __VA_ARGS__))
#endif // _WIN32
#define __PADDLE_UNARY_COMPARE(COND, ...) \ #define __PADDLE_UNARY_COMPARE(COND, ...) \
do { \ do { \
......
...@@ -49,6 +49,9 @@ void BindConstValue(pybind11::module* m) { ...@@ -49,6 +49,9 @@ void BindConstValue(pybind11::module* m) {
op_proto_and_checker_maker.def( op_proto_and_checker_maker.def(
"kOpNameScopeAttrName", "kOpNameScopeAttrName",
framework::OpProtoAndCheckerMaker::OpNamescopeAttrName); framework::OpProtoAndCheckerMaker::OpNamescopeAttrName);
op_proto_and_checker_maker.def(
"kOpCreationCallstackAttrName",
framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName);
} }
} // namespace pybind } // namespace pybind
......
...@@ -19,6 +19,7 @@ from collections import defaultdict ...@@ -19,6 +19,7 @@ from collections import defaultdict
import contextlib import contextlib
import os import os
import re import re
import traceback
import six import six
import numpy as np import numpy as np
...@@ -625,6 +626,11 @@ class Operator(object): ...@@ -625,6 +626,11 @@ class Operator(object):
if type is None: if type is None:
raise ValueError( raise ValueError(
"`type` to initilized an Operator can not be None.") "`type` to initilized an Operator can not be None.")
else:
callstack_var_name = op_maker.kOpCreationCallstackAttrName()
op_attrs[callstack_var_name] = list(
reversed(traceback.format_stack()))[1:]
self.desc.set_type(type) self.desc.set_type(type)
proto = OpProtoHolder.instance().get_op_proto(type) proto = OpProtoHolder.instance().get_op_proto(type)
......
...@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase): ...@@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase):
set(mul_op.attr_names), set(mul_op.attr_names),
set([ set([
"x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var", "x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var",
"op_namescope" "op_namescope", "op_callstack"
])) ]))
self.assertEqual(mul_op.has_attr("x_num_col_dims"), True) self.assertEqual(mul_op.has_attr("x_num_col_dims"), True)
self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT) self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册