未验证 提交 af602673 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #7093 from reyoung/feature/check_nan_executor

Feature/check nan executor
...@@ -14,18 +14,17 @@ limitations under the License. */ ...@@ -14,18 +14,17 @@ limitations under the License. */
#include "paddle/framework/executor.h" #include "paddle/framework/executor.h"
#include <algorithm>
#include <iostream>
#include <memory>
#include <set> #include <set>
#include <vector>
#include "gflags/gflags.h"
#include "paddle/framework/feed_fetch_type.h" #include "paddle/framework/feed_fetch_type.h"
#include "paddle/framework/lod_rank_table.h" #include "paddle/framework/lod_rank_table.h"
#include "paddle/framework/lod_tensor.h"
#include "paddle/framework/lod_tensor_array.h" #include "paddle/framework/lod_tensor_array.h"
#include "paddle/framework/op_registry.h" #include "paddle/framework/op_registry.h"
#include "paddle/framework/scope.h"
DEFINE_bool(check_nan_inf, false,
"Checking whether operator produce NAN/INF or not. It will be "
"extremely slow so please use this flag wisely.");
namespace paddle { namespace paddle {
namespace framework { namespace framework {
...@@ -58,6 +57,19 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { ...@@ -58,6 +57,19 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) {
} }
} }
static void CheckTensorNANOrInf(const std::string& name,
const framework::Tensor& tensor) {
if (tensor.memory_size() == 0) {
return;
}
if (tensor.type().hash_code() != typeid(float).hash_code() &&
tensor.type().hash_code() != typeid(double).hash_code()) {
return;
}
PADDLE_ENFORCE(!framework::HasInf(tensor), "Tensor %s has Inf", name);
PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN", name);
}
void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
bool create_local_scope, bool create_vars) { bool create_local_scope, bool create_vars) {
// TODO(tonyyang-svail): // TODO(tonyyang-svail):
...@@ -101,6 +113,15 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, ...@@ -101,6 +113,15 @@ void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
auto op = paddle::framework::OpRegistry::CreateOp(*op_desc); auto op = paddle::framework::OpRegistry::CreateOp(*op_desc);
VLOG(3) << op->DebugString(); VLOG(3) << op->DebugString();
op->Run(*local_scope, place_); op->Run(*local_scope, place_);
if (FLAGS_check_nan_inf) {
for (auto& vname : op->OutputVars(true)) {
auto* var = local_scope->FindVar(vname);
if (var == nullptr) continue;
if (var->IsType<framework::LoDTensor>()) {
CheckTensorNANOrInf(vname, var->Get<framework::LoDTensor>());
}
}
}
} }
if (create_vars && create_local_scope) { if (create_vars && create_local_scope) {
scope->DeleteScope(local_scope); scope->DeleteScope(local_scope);
......
...@@ -210,10 +210,10 @@ inline void CopyToVector(const Tensor& src, std::vector<T>* dst) { ...@@ -210,10 +210,10 @@ inline void CopyToVector(const Tensor& src, std::vector<T>* dst) {
} }
// Returns true if a tensor contains NAN, i.e., Not A Number. // Returns true if a tensor contains NAN, i.e., Not A Number.
extern bool HasNAN(const framework::Tensor& tensor); bool HasNAN(const framework::Tensor& tensor);
// Returns true if a tensor contains Inf, i.e., Infinity. // Returns true if a tensor contains Inf, i.e., Infinity.
extern bool HasInf(const framework::Tensor& tensor); bool HasInf(const framework::Tensor& tensor);
inline void SerializeToStream(std::ostream& os, const Tensor& tensor, inline void SerializeToStream(std::ostream& os, const Tensor& tensor,
const platform::DeviceContext& dev_ctx) { const platform::DeviceContext& dev_ctx) {
......
...@@ -231,7 +231,7 @@ TEST(CopyToVector, Tensor) { ...@@ -231,7 +231,7 @@ TEST(CopyToVector, Tensor) {
#endif #endif
} }
TEST(IsNAN, CPU) { TEST(HasNAN, CPU) {
using namespace paddle::framework; using namespace paddle::framework;
using namespace paddle::platform; using namespace paddle::platform;
Tensor src; Tensor src;
...@@ -243,7 +243,7 @@ TEST(IsNAN, CPU) { ...@@ -243,7 +243,7 @@ TEST(IsNAN, CPU) {
ASSERT_TRUE(HasNAN(src)); ASSERT_TRUE(HasNAN(src));
} }
TEST(IsInf, CPU) { TEST(HasInf, CPU) {
using namespace paddle::framework; using namespace paddle::framework;
using namespace paddle::platform; using namespace paddle::platform;
Tensor src; Tensor src;
......
...@@ -36,7 +36,7 @@ def __read_gflags_from_env__(): ...@@ -36,7 +36,7 @@ def __read_gflags_from_env__():
""" """
import sys import sys
import core import core
read_env_flags = ['use_pinned_memory'] read_env_flags = ['use_pinned_memory', 'check_nan_inf']
if core.is_compile_gpu(): if core.is_compile_gpu():
read_env_flags.append('fraction_of_gpu_memory_to_use') read_env_flags.append('fraction_of_gpu_memory_to_use')
core.init_gflags([sys.argv[0]] + core.init_gflags([sys.argv[0]] +
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册