未验证 提交 513641e1 编写于 作者: T tianshuo78520a 提交者: GitHub

Delete fast_check_nan_inf (#31788)

* Delete fast_check_nan_inf

* Delete run_fast_nan_inf_debug
上级 9d04ef73
......@@ -47,9 +47,6 @@ DECLARE_bool(benchmark);
DECLARE_bool(check_nan_inf);
DECLARE_bool(enable_unused_var_check);
DEFINE_int32(inner_op_parallelism, 0, "number of threads for inner op");
DEFINE_bool(fast_check_nan_inf, false,
"Fast checking NAN/INF after each operation. It will be a little"
"bit slow, much faster than check_nan_inf");
namespace paddle {
namespace framework {
......@@ -1173,25 +1170,6 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
#endif
}
if (FLAGS_fast_check_nan_inf) {
for (auto& vname : OutputVars(true)) {
// only check inserted vars,
// please see executor.py for details of fast_check_nan_inf
if (vname.rfind("debug_var") == 0) {
VLOG(3) << "debugging nan/inf in var " << vname;
auto* var = exec_scope.FindVar(vname);
if (var == nullptr) continue;
if (var->IsType<framework::LoDTensor>()) {
CheckTensorNANOrInf(type_, vname, var->Get<framework::LoDTensor>());
} else if (var->IsType<framework::SelectedRows>()) {
CheckTensorNANOrInf(type_, vname,
var->Get<framework::SelectedRows>().value());
}
}
}
}
if (FLAGS_check_nan_inf) {
framework::details::CheckOpHasNanOrInf(*this, exec_scope, place);
}
......
......@@ -175,7 +175,6 @@ def __bootstrap__():
sysstr = platform.system()
read_env_flags = [
'check_nan_inf',
'fast_check_nan_inf',
'benchmark',
'eager_delete_scope',
'fraction_of_cpu_memory_to_use',
......
......@@ -280,88 +280,3 @@ def draw_block_graphviz(block, highlights=None, path="./temp.dot"):
add_op_link_var(opn, var, True)
graph(path, show=False)
def prepare_fast_nan_inf_debug(_program):
"""
Given a program to run, insert a (reduce) sum op for every var in that program.
Instead of checking all vars originally defined in the program,
only those inserted ops will be checked in the c++ end, to detect if it contains NAN or INF.
Thereforce, the speed of nan/inf checking could be improved.
Please set ``FLAGS_fast_check_nan_inf" to open the fast nan/inf feature.
"""
helper = LayerHelper('reduce_sum', **locals())
if _program is None:
_program = default_main_program()
for _block in _program.blocks:
# fetch vars in the current block
_vars_in_prog = []
for _var_name in _block.vars:
_vars_in_prog.append((_var_name, _block.vars[_var_name]))
# append sum_op in the current block
for _var_name, _var in _vars_in_prog:
try:
if _var.dtype == -1:
continue
## create a var for holding sum output
_output_var = _block.create_var(
name=unique_name.generate("debug_var_" + _var_name),
dtype=_var.dtype,
type=core.VarDesc.VarType.LOD_TENSOR,
persistable=False,
stop_gradient=True)
## create a sum op, input each existing var in the block
_block.append_op(
type='sum',
outputs={'Out': _output_var},
inputs={'X': [_var]})
except Exception as e:
pass
def run_fast_nan_inf_debug(executor,
program=None,
feed=None,
fetch_list=None,
feed_var_name='feed',
fetch_var_name='fetch',
scope=None,
return_numpy=True,
use_program_cache=False,
dump_core=True):
"""
Run a program by the given executor. Catch the exception of NAN and INF, and save persistables into the dumped core.
"""
assert (executor is not None)
try:
output = executor.run(program=program,
feed=feed,
fetch_list=fetch_list,
feed_var_name=feed_var_name,
fetch_var_name=fetch_var_name,
scope=scope,
return_numpy=return_numpy,
use_program_cache=use_program_cache)
return output
except Exception as e:
print("catch an exception:")
print(e)
core_filename = "core" + str(int(random.random() * 10000)) + ".pdckpt"
io.save_persistables(
executor, "./", main_program=program, filename=core_filename)
print("dumping a core into ./%s" % core_filename)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册