From cda7842e262e857905f74415040a46f09319f40e Mon Sep 17 00:00:00 2001 From: Yu Yang Date: Tue, 4 Sep 2018 04:12:22 +0000 Subject: [PATCH] Revert "Revert "Add Python Callstacks when Op::Run error (#12759)"" This reverts commit 1f270275a6d3b9c2a279609aa781e1cd30018523. --- paddle/fluid/framework/op_proto_maker.cc | 4 +- paddle/fluid/framework/op_proto_maker.h | 1 + paddle/fluid/framework/operator.cc | 61 ++++++++++++++----- paddle/fluid/operators/top_k_op.cc | 2 + paddle/fluid/pybind/const_value.cc | 3 + python/paddle/fluid/framework.py | 5 ++ .../tests/unittests/test_operator_desc.py | 2 +- 7 files changed, 61 insertions(+), 17 deletions(-) diff --git a/paddle/fluid/framework/op_proto_maker.cc b/paddle/fluid/framework/op_proto_maker.cc index 4fa047bf3ee..f6a9a71095c 100644 --- a/paddle/fluid/framework/op_proto_maker.cc +++ b/paddle/fluid/framework/op_proto_maker.cc @@ -131,7 +131,9 @@ void OpProtoAndCheckerMaker::operator()(proto::OpProto* proto, AddAttr(OpNamescopeAttrName(), "Operator name with namesope.") .SetDefault(""); - + AddAttr>(OpCreationCallstackAttrName(), + "Callstack for Op Creatation.") + .SetDefault({}); Validate(); } diff --git a/paddle/fluid/framework/op_proto_maker.h b/paddle/fluid/framework/op_proto_maker.h index 18827385ad6..c8386263bdb 100644 --- a/paddle/fluid/framework/op_proto_maker.h +++ b/paddle/fluid/framework/op_proto_maker.h @@ -40,6 +40,7 @@ class OpProtoAndCheckerMaker { static const char *OpRoleAttrName() { return "op_role"; } static const char *OpRoleVarAttrName() { return "op_role_var"; } static const char *OpNamescopeAttrName() { return "op_namescope"; } + static const char *OpCreationCallstackAttrName() { return "op_callstack"; } void operator()(proto::OpProto *proto, OpAttrChecker *attr_checker); diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc index d58d6e4f3e6..ee119aa3629 100644 --- a/paddle/fluid/framework/operator.cc +++ b/paddle/fluid/framework/operator.cc @@ -11,15 +11,17 @@ distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include -#include - +#include "paddle/fluid/framework/operator.h" #include - +#include +#include +#include +#include "gflags/gflags.h" +#include "glog/logging.h" #include "paddle/fluid/framework/data_transform.h" #include "paddle/fluid/framework/executor.h" #include "paddle/fluid/framework/lod_tensor.h" -#include "paddle/fluid/framework/operator.h" +#include "paddle/fluid/framework/op_proto_maker.h" #include "paddle/fluid/framework/shape_inference.h" #include "paddle/fluid/framework/var_type.h" #include "paddle/fluid/platform/profiler.h" @@ -137,19 +139,48 @@ static LoD GetLoD(const Scope& scope, const std::string& name) { } void OperatorBase::Run(const Scope& scope, const platform::Place& place) { - VLOG(4) << place << " " << DebugStringEx(&scope); - if (platform::is_gpu_place(place)) { + try { + if (VLOG_IS_ON(4)) { + VLOG(4) << place << " " << DebugStringEx(&scope); + } + if (platform::is_gpu_place(place)) { #ifndef PADDLE_WITH_CUDA - PADDLE_THROW("Cannot run operator on place %s", place); + PADDLE_THROW("Cannot run operator on place %s", place); #else - auto dev_id = boost::get(place).device; - platform::SetDeviceId(dev_id); + auto dev_id = boost::get(place).device; + platform::SetDeviceId(dev_id); #endif + } + platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); + platform::RecordEvent record_event(Type(), pool.Get(place)); + RunImpl(scope, place); + if (VLOG_IS_ON(3)) { + VLOG(3) << place << " " << DebugStringEx(&scope); + } + } catch (platform::EnforceNotMet exception) { + if (Attrs().count("sub_block") != 0) { + throw exception; + } + + auto& callstack = Attr>( + OpProtoAndCheckerMaker::OpCreationCallstackAttrName()); + + if (callstack.empty()) { + throw exception; + } + std::ostringstream sout; + sout << "Invoke operator " << Type() << " error.\n"; + sout << "Python Callstacks: \n"; + for (auto& line : callstack) { + sout << line; + } + sout << "C++ Callstacks: \n"; + sout << exception.err_str_; + exception.err_str_ = sout.str(); + throw exception; + } catch (...) { + std::rethrow_exception(std::current_exception()); } - platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance(); - platform::RecordEvent record_event(Type(), pool.Get(place)); - RunImpl(scope, place); - VLOG(3) << place << " " << DebugStringEx(&scope); } bool OperatorBase::HasInputs(const std::string& name) const { @@ -177,7 +208,7 @@ const std::vector& OperatorBase::Inputs( } bool OperatorBase::HasOutputs(const std::string& name) const { - if (outputs_.find(name) != outputs_.end()) { + if (outputs_.end() != outputs_.find(name)) { return true; } else { return false; diff --git a/paddle/fluid/operators/top_k_op.cc b/paddle/fluid/operators/top_k_op.cc index 4a8ac441cfa..92a0697e27b 100644 --- a/paddle/fluid/operators/top_k_op.cc +++ b/paddle/fluid/operators/top_k_op.cc @@ -30,6 +30,8 @@ class TopkOp : public framework::OperatorWithKernel { "Output(Indices) of TopkOp should not be null."); auto input_dims = ctx->GetInputDim("X"); + PADDLE_ENFORCE_EQ(input_dims.size(), 2, + "Rank of TopK op's input must be 2."); const int k = static_cast(ctx->Attrs().Get("k")); PADDLE_ENFORCE_GE(k, 1, "k must >= 1"); diff --git a/paddle/fluid/pybind/const_value.cc b/paddle/fluid/pybind/const_value.cc index f577068d1f3..b4af7ed8264 100644 --- a/paddle/fluid/pybind/const_value.cc +++ b/paddle/fluid/pybind/const_value.cc @@ -46,6 +46,9 @@ void BindConstValue(pybind11::module* m) { op_proto_and_checker_maker.def( "kOpNameScopeAttrName", framework::OpProtoAndCheckerMaker::OpNamescopeAttrName); + op_proto_and_checker_maker.def( + "kOpCreationCallstackAttrName", + framework::OpProtoAndCheckerMaker::OpCreationCallstackAttrName); } } // namespace pybind diff --git a/python/paddle/fluid/framework.py b/python/paddle/fluid/framework.py index b0e0d27ff7a..633d2334cf7 100644 --- a/python/paddle/fluid/framework.py +++ b/python/paddle/fluid/framework.py @@ -18,6 +18,7 @@ import collections import contextlib import re import six +import traceback import numpy as np @@ -572,6 +573,10 @@ class Operator(object): if role_var_name in op_attrs and len(op_attrs[role_var_name]) == 0: del op_attrs[role_var_name] + callstack_var_name = op_maker.kOpCreationCallstackAttrName() + op_attrs[callstack_var_name] = list( + reversed(traceback.format_stack()))[1:] + if len(self.desc.type()) != 0: return if type is None: diff --git a/python/paddle/fluid/tests/unittests/test_operator_desc.py b/python/paddle/fluid/tests/unittests/test_operator_desc.py index cac132e6e08..e0c9b28b3aa 100644 --- a/python/paddle/fluid/tests/unittests/test_operator_desc.py +++ b/python/paddle/fluid/tests/unittests/test_operator_desc.py @@ -69,7 +69,7 @@ class TestOperator(unittest.TestCase): set(mul_op.attr_names), set([ "x_num_col_dims", "y_num_col_dims", "op_role", "op_role_var", - "op_namescope" + "op_namescope", "op_callstack" ])) self.assertEqual(mul_op.has_attr("x_num_col_dims"), True) self.assertEqual(mul_op.attr_type("x_num_col_dims"), core.AttrType.INT) -- GitLab