diff --git a/paddle/framework/executor.cc b/paddle/framework/executor.cc index 9ee2ddb7c3e8d69a12fa97c67eb525a6c65915d5..fe9a42ace01a3844c90c17d6b275883a68cd8769 100644 --- a/paddle/framework/executor.cc +++ b/paddle/framework/executor.cc @@ -59,15 +59,16 @@ static void CreateTensor(Variable* var, proto::VarDesc::VarType var_type) { static void CheckTensorNANOrInf(const std::string& name, const framework::Tensor& tensor) { - if (tensor.type().hash_code() != typeid(float).hash_code() && - tensor.type().hash_code() != typeid(double).hash_code()) { + if (tensor.memory_size() == 0) { return; } - if (tensor.memory_size() == 0) { + if (tensor.type().hash_code() != typeid(float).hash_code() && + tensor.type().hash_code() != typeid(double).hash_code()) { return; } PADDLE_ENFORCE(!framework::HasInf(tensor), "Tensor %s has Inf", name); - PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN", name); + PADDLE_ENFORCE(!framework::HasNAN(tensor), "Tensor %s has NAN, %p", name, + &tensor); } void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id, diff --git a/paddle/framework/tensor_impl.h b/paddle/framework/tensor_impl.h index 6c6f298edc187a87677089e54c4c9046821282df..0161ed8c475e2ae77d921a7c182533063fb93cbd 100644 --- a/paddle/framework/tensor_impl.h +++ b/paddle/framework/tensor_impl.h @@ -134,8 +134,17 @@ inline void* Tensor::mutable_data(platform::Place place, std::type_index type) { #endif offset_ = 0; } - return reinterpret_cast(reinterpret_cast(holder_->ptr()) + - offset_); + void* buf = reinterpret_cast( + reinterpret_cast(holder_->ptr()) + offset_); + if (type.hash_code() == typeid(float).hash_code() || + type.hash_code() == typeid(double).hash_code()) { + float* tmp = (float*)(buf); + for (int64_t i = 0; i < numel(); ++i) { + tmp[i] = NAN; + } + } + + return buf; } inline void* Tensor::mutable_data(platform::Place place) { diff --git a/paddle/framework/variable.h b/paddle/framework/variable.h index e5a94759f9230ab4ce9d2cc24849a2debb8a5e2f..3720393601a5458fb43217d05257c4acb8be28cd 100644 --- a/paddle/framework/variable.h +++ b/paddle/framework/variable.h @@ -35,6 +35,7 @@ class Variable { template T* GetMutable() { if (!IsType()) { + VLOG(10) << "Resetting " << *this->name_; holder_.reset(new PlaceholderImpl(new T())); } return static_cast(holder_->Ptr()); diff --git a/paddle/operators/fill_constant_op.cc b/paddle/operators/fill_constant_op.cc index dcd43a30c86b62d79f52ac640f14b295a062146c..196c380c73ea69b90293706085b32ede0d391f8d 100644 --- a/paddle/operators/fill_constant_op.cc +++ b/paddle/operators/fill_constant_op.cc @@ -51,6 +51,7 @@ class FillConstantOp : public framework::OperatorBase { platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(dev_place); + VLOG(10) << "FillConstant to " << &out; math::set_constant(dev_ctx, &out, value); } }; diff --git a/paddle/operators/shrink_rnn_memory_op.cc b/paddle/operators/shrink_rnn_memory_op.cc index e5ef0740b6f385de7f17a3a419000cb8c897d986..9ef473e7264561877854254dc690636235fb91c0 100644 --- a/paddle/operators/shrink_rnn_memory_op.cc +++ b/paddle/operators/shrink_rnn_memory_op.cc @@ -116,9 +116,10 @@ class ShrinkRNNMemoryGradOp : public ArrayOp { auto height = dout_tensor.dims()[0]; auto slice = dx_tensor.Slice(0, static_cast(height)); framework::CopyFrom(dout_tensor, dout_tensor.place(), dev_ctx, &slice); - if (dx_tensor.dims()[0] < height) { + VLOG(10) << dx_tensor.dims()[0] << ", " << height; + if (dx_tensor.dims()[0] > height) { auto rest_tensor = dx_tensor.Slice( - static_cast(height), static_cast(dout_tensor.dims()[0])); + static_cast(height), static_cast(dx_tensor.dims()[0])); math::set_constant(dev_ctx, &rest_tensor, 0.0f); } } diff --git a/paddle/operators/sum_op.h b/paddle/operators/sum_op.h index eaa36aa1aea53e0b37ef6c578d8bb1cda230ded0..d1277d3edd897b951fe20ee2a0294bd9c615a059 100644 --- a/paddle/operators/sum_op.h +++ b/paddle/operators/sum_op.h @@ -38,11 +38,9 @@ class SumKernel : public framework::OpKernel { if (out_var->IsType()) { auto *out = context.Output("Out"); - out->mutable_data(context.GetPlace()); - auto result = EigenVector::Flatten(*out); - if (!in_place) { + out->mutable_data(context.GetPlace()); math::SetConstant constant_functor; constant_functor(context.template device_context(), out, 0.0); diff --git a/paddle/operators/tensor_array_read_write_op.cc b/paddle/operators/tensor_array_read_write_op.cc index 53e38ec70336ca7f2d7c142e5fb1bbe427ab2957..d5ff3e3fce29b1a888b2cd4d307c2655669e3e4c 100644 --- a/paddle/operators/tensor_array_read_write_op.cc +++ b/paddle/operators/tensor_array_read_write_op.cc @@ -130,9 +130,9 @@ class ReadFromArrayOp : public ArrayOp { auto &x_array = x->Get(); auto *out = scope.FindVar(Output("Out")); PADDLE_ENFORCE(out != nullptr, "Out must be set"); - auto *out_tensor = out->GetMutable(); size_t offset = GetOffset(scope, place); if (offset < x_array.size()) { + auto *out_tensor = out->GetMutable(); platform::DeviceContextPool &pool = platform::DeviceContextPool::Instance(); auto &dev_ctx = *pool.Get(place); diff --git a/paddle/operators/while_op.cc b/paddle/operators/while_op.cc index 728ef6079465d57f54dab383aac5e2bb750fe113..322270c8296d86348d9501dd79ccf094f10ac0e3 100644 --- a/paddle/operators/while_op.cc +++ b/paddle/operators/while_op.cc @@ -194,14 +194,27 @@ class WhileGradOp : public framework::OperatorBase { } } + auto check_var_no_nan = [](const framework::Scope &scope, + const std::string &var_name) { + auto *var = scope.FindVar(var_name); + if (var->IsType()) { + VLOG(10) << "Checking " << var_name; + PADDLE_ENFORCE(!framework::HasNAN(var->Get()), + "%s has NAN", var_name); + } + }; + check_var_no_nan(cur_scope, inside_grad_name); auto new_inside_name = cur_scope.Rename(inside_grad_name); + check_var_no_nan(cur_scope, new_inside_name); auto sum_op = framework::OpRegistry::CreateOp( "sum", {{"X", {pg_names[param_id], new_inside_name}}}, {{"Out", {pg_names[param_id]}}}, framework::AttributeMap{}); sum_op->Run(cur_scope, dev_place); + check_var_no_nan(cur_scope, pg_names[param_id]); cur_scope.Rename(new_inside_name, inside_grad_name); } } + VLOG(1) << "Complete WhileOpGrad"; } };