未验证 提交 d2fa26f6 编写于 作者: N niuliling123 提交者: GitHub

Print python trace back when debugmode = CHECK_NAN_INF_AND_ABORT and backward...

Print python trace back when debugmode = CHECK_NAN_INF_AND_ABORT  and backward has nan/inf  (#52808)
上级 e5021ee9
......@@ -773,10 +773,6 @@ void BuildOpFuncList(const platform::Place& place,
}
// for debug nan/inf
if (FLAGS_check_nan_inf) {
VLOG(4) << "Check nan/inf";
framework::details::CheckOpHasNanOrInf(*op, *runtime_scope, place);
}
vec_func_list->emplace_back(op_func_node);
......@@ -848,6 +844,35 @@ void BuildOpFuncList(const platform::Place& place,
std::rethrow_exception(std::current_exception());
}
if (FLAGS_check_nan_inf) {
VLOG(4) << "Check nan/inf";
try {
framework::details::CheckOpHasNanOrInf(*op, *local_scope, place);
} catch (...) {
const std::vector<std::string>* callstack = nullptr;
auto attrs = op->Attrs();
auto iter =
attrs.find(OpProtoAndCheckerMaker::OpCreationCallstackAttrName());
if (iter != attrs.end()) {
callstack = &PADDLE_GET_CONST(std::vector<std::string>, iter->second);
if (callstack->empty()) callstack = nullptr;
}
std::ostringstream sout;
if (callstack) {
if (FLAGS_call_stack_level > 1) {
sout << "\n\n Compile Traceback (most recent call last):";
} else {
sout << "In user code:\n";
}
for (auto& line : *callstack) {
sout << "\n " << line;
}
}
std::cout << sout.str() << std::endl;
std::rethrow_exception(std::current_exception());
}
}
VLOG(4) << "End run " << place << " "
<< op_func_node.operator_base_->DebugStringEx(local_scope);
......
......@@ -983,10 +983,34 @@ void InterpreterCore::RunOperator(const Instruction& instr_node) {
// for debug nan/inf
if (op_with_kernel != nullptr && FLAGS_check_nan_inf) {
VLOG(4) << "Check nan/inf";
try {
framework::details::CheckOpHasNanOrInf(
*op,
*local_scope,
place); // TODO(xiongkun03) change it to inner scope.
} catch (...) {
const std::vector<std::string>* callstack = nullptr;
auto attrs = op->Attrs();
auto iter =
attrs.find(OpProtoAndCheckerMaker::OpCreationCallstackAttrName());
if (iter != attrs.end()) {
callstack = &PADDLE_GET_CONST(std::vector<std::string>, iter->second);
if (callstack->empty()) callstack = nullptr;
}
std::ostringstream sout;
if (callstack) {
if (FLAGS_call_stack_level > 1) {
sout << "\n\n Compile Traceback (most recent call last):";
} else {
sout << "In user code:\n";
}
for (auto& line : *callstack) {
sout << "\n " << line;
}
}
std::cout << sout.str() << std::endl;
std::rethrow_exception(std::current_exception());
}
}
}
......
......@@ -2044,7 +2044,31 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
}
if (FLAGS_check_nan_inf) {
try {
framework::details::CheckOpHasNanOrInf(*this, exec_scope, place);
} catch (...) {
const std::vector<std::string>* callstack = nullptr;
auto attrs = Attrs();
auto iter =
attrs.find(OpProtoAndCheckerMaker::OpCreationCallstackAttrName());
if (iter != attrs.end()) {
callstack = &PADDLE_GET_CONST(std::vector<std::string>, iter->second);
if (callstack->empty()) callstack = nullptr;
}
std::ostringstream sout;
if (callstack) {
if (FLAGS_call_stack_level > 1) {
sout << "\n\n Compile Traceback (most recent call last):";
} else {
sout << "In user code:\n";
}
for (auto& line : *callstack) {
sout << "\n " << line;
}
}
std::cout << sout.str() << std::endl;
std::rethrow_exception(std::current_exception());
}
}
// To solve issue #15032, have a discussion with @Luotao for cpu inference,
......
# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
# 定义静态图模型
def static_model(x, y):
z = paddle.pow(x, y)
return z
def main():
# 开启静态图模式
paddle.enable_static()
paddle.set_flags({"FLAGS_check_nan_inf": 1, "FLAGS_check_nan_inf_level": 0})
# 定义输入变量和模型输出变量
x_static = paddle.static.data(name='x_static', shape=[3], dtype='float32')
y_static = paddle.static.data(name='y_static', shape=[3], dtype='float32')
x_static.stop_gradient = False
z_static = static_model(x_static, y_static)
# 计算梯度
grads_static = paddle.static.gradients(z_static, x_static, y_static)
# 创建 Executor 对象
exe_static = paddle.static.Executor(paddle.CPUPlace())
# 编译计算图
exe_static.run(paddle.static.default_startup_program())
# 执行前向计算和反向传播
grads_val_static = exe_static.run(
paddle.static.default_main_program(),
feed={'x_static': [1, 0, 3], 'y_static': [0, 0, 0]},
fetch_list=[grads_static],
)
if __name__ == "__main__":
main()
......@@ -148,8 +148,8 @@ class TestNanInfCheckResult(unittest.TestCase):
if paddle.fluid.core.is_compiled_with_cuda():
_check_num_nan_inf(use_cuda=True)
def test_check_stack(self):
self._python_interp += " check_nan_inf_backward_stack.py"
def check_stack(self, file_name):
self._python_interp += file_name
cmd = self._python_interp
proc = subprocess.Popen(
cmd.split(" "),
......@@ -167,6 +167,12 @@ class TestNanInfCheckResult(unittest.TestCase):
# in python3, type(out+err) is 'bytes', need use encode
assert (out + err).find(b' z = paddle.pow(x, y)') != -1
def test_check_stack(self):
self.check_stack(" check_nan_inf_backward_stack.py")
def test_statck_check_stack(self):
self.check_stack(" check_nan_inf_backward_static_stack.py")
def check_nan_inf_level(self, use_cuda, dtype):
shape = [8, 8]
x_np, y_np = self.generate_inputs(shape, dtype)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册