diff --git a/paddle/fluid/eager/api/utils/hook_utils.cc b/paddle/fluid/eager/api/utils/hook_utils.cc index 85ff6687e0dbea38b5661e04a7e3efc9caf4ef6f..7aa1917240f73739b2de9ccb0a3251923af22cc7 100644 --- a/paddle/fluid/eager/api/utils/hook_utils.cc +++ b/paddle/fluid/eager/api/utils/hook_utils.cc @@ -43,33 +43,37 @@ void RegisterReduceHookForTensor(const egr::EagerTensor& tensor, void RetainGradForTensor(const egr::EagerTensor& tensor) { // TODO(jiabin): Support More Tensor type here AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor); - egr::EagerTensor* grad_tensor = meta->MutableGrad(); + std::weak_ptr weak_grad_tensor = meta->WeakGrad(); // Define Hook std::function hook = - [grad_tensor](const egr::EagerTensor& t) { - if (!grad_tensor) { - PADDLE_THROW(paddle::platform::errors::Fatal( - "Detected null grad_tensor." - "Grad tensor in AutogradMeta of should not be nullptr")); - } - if (t.defined()) { - // Simply Copy impl() to grad_tensor - grad_tensor->set_impl(t.impl()); - return *grad_tensor; + [weak_grad_tensor](const egr::EagerTensor& t) { + if (!weak_grad_tensor.expired()) { + auto grad_tensor = weak_grad_tensor.lock(); + if (t.defined()) { + VLOG(7) << "Set impl for RetainGrad Hook for tensor: " << t.name(); + // Simply Copy impl() to grad_tensor + grad_tensor->set_impl(t.impl()); + return *grad_tensor.get(); + } else { + VLOG(7) << "Set Var for RetainGrad Hook for tensor: " << t.name(); + PADDLE_ENFORCE_EQ( + t.Var().IsInitialized(), true, + paddle::platform::errors::Fatal( + "Detected uninitialized variable, causing segmentation " + "fault " + "inside the hook." + "Variable %s has to be initialized while we need to set it." + "please check tensor initialization status.", + t.name())); + grad_tensor->MutableVar() + ->GetMutable() + ->ShareDataWith(t.Var().Get()); + return *grad_tensor.get(); + } } else { - PADDLE_ENFORCE_EQ( - t.Var().IsInitialized(), true, - paddle::platform::errors::Fatal( - "Detected uninitialized variable, causing segmentation fault " - "inside the hook." - "Variable %s has to be initialized while we need to set it." - "please check tensor initialization status.", - t.name())); - grad_tensor->MutableVar() - ->GetMutable() - ->ShareDataWith(t.Var().Get()); - return *grad_tensor; + VLOG(7) << "Retain NULL EagerTensor in Grad Hook"; + return EagerTensor(); } }; diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index dfdd0f1e5ce1b932bfa16d19a3d4489a5538451b..448fadd4b46447587dcb401f9ef57497960df70e 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -972,11 +972,16 @@ static std::string GenerateGradNodeCreationContent( iter.GetGradInsFwdSlotnameMap(); for (auto& kv : grad_ins_fwd_slotname_map) { const std::string& tensor_wrapper_name = kv.second; + std::string full_reserved = "false"; + if (fwd_outputs_name_pos_map.find(tensor_wrapper_name) == + fwd_outputs_name_pos_map.end()) { + full_reserved = "true"; + } const char* SET_TENSOR_WRAPPER_TEMPLATE = - " grad_node->SetTensorWrapper%s(%s);\n"; - grad_node_creation_str += - paddle::string::Sprintf(SET_TENSOR_WRAPPER_TEMPLATE, - tensor_wrapper_name, tensor_wrapper_name); + " grad_node->SetTensorWrapper%s(%s, %s);\n"; + grad_node_creation_str += paddle::string::Sprintf( + SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name, tensor_wrapper_name, + full_reserved); } } grad_node_creation_str += "\n"; @@ -1017,11 +1022,6 @@ static std::string GenerateGradNodeCreationContent( grad_node_creation_str += paddle::string::Sprintf( ADD_EDGES_TEMPLATE, input_autograd_name, input_position); } - VLOG(6) << "Generated Call RetainGradForTensor"; - const char* RETAIN_GRAD_TEMPLATE = - " egr::EagerUtils::CheckAndRetainGrad(%s);\n"; - grad_node_creation_str += - paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, input_name); } // [GradOpNode] SetGradInMeta @@ -1048,6 +1048,12 @@ static std::string GenerateGradNodeCreationContent( " egr::EagerUtils::SetHistory(&%s, grad_node);\n"; grad_node_creation_str += paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); + + VLOG(6) << "Generated Call RetainGradForTensor"; + const char* RETAIN_GRAD_TEMPLATE = + " egr::EagerUtils::CheckAndRetainGrad(%s);\n"; + grad_node_creation_str += + paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, output_name); } VLOG(6) << "Generated SetGradIn/OutMeta"; @@ -1771,6 +1777,7 @@ static std::string GenerateGradNodeHeaderContents( std::string tensor_wrapper_arg_str; std::string tensor_wrapper_body_str; + std::string full_reserved_str = "full_reserved"; if (duplicable_tensors.count(tensor_wrapper_name)) { const char* ATTR_TENSOR_WRAPPER_ARG_TEMPLATE = "const std::vector& %s"; @@ -1803,17 +1810,18 @@ static std::string GenerateGradNodeHeaderContents( TENSOR_WRAPPER_MEMBER_TEMPLATE, struct_tensor_wrapper_name); const char* SET_TENSOR_WRAPPER_BODY_TEMPLATE = - "%s = egr::TensorWrapper(%s, true /*full_reserved*/);"; + "%s = egr::TensorWrapper(%s, %s /*full_reserved*/);"; tensor_wrapper_body_str = paddle::string::Sprintf( SET_TENSOR_WRAPPER_BODY_TEMPLATE, struct_tensor_wrapper_name, - tensor_wrapper_name); + tensor_wrapper_name, full_reserved_str); } - + std::string full_reserved_signature_str = "bool full_reserved"; const char* SET_TENSOR_WRAPPER_TEMPLATE = - " void SetTensorWrapper%s(%s) {\n %s\n }\n"; + " void SetTensorWrapper%s(%s, %s) {\n %s\n }\n"; set_tensor_wrappers_str += paddle::string::Sprintf( SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name, - tensor_wrapper_arg_str, tensor_wrapper_body_str); + tensor_wrapper_arg_str, full_reserved_signature_str, + tensor_wrapper_body_str); } } VLOG(6) << "Generated TensorWrapper"; diff --git a/paddle/fluid/eager/autograd_meta.h b/paddle/fluid/eager/autograd_meta.h index 51937fc4815937c8220c2cb22f034ead3f2f091c..18156f913de785671ad95f80eae1d92e8cc257fe 100644 --- a/paddle/fluid/eager/autograd_meta.h +++ b/paddle/fluid/eager/autograd_meta.h @@ -14,8 +14,8 @@ #pragma once +#include "paddle/fluid/eager/api/utils/global_utils.h" #include "paddle/fluid/eager/grad_node_info.h" - namespace egr { using AbstractAutogradMeta = paddle::experimental::AbstractAutogradMeta; @@ -75,9 +75,20 @@ class AutogradMeta : public AbstractAutogradMeta { ~AutogradMeta() override = default; - const egr::EagerTensor& Grad() const { return grad_; } + const egr::EagerTensor& Grad() const { + PADDLE_ENFORCE_NOT_NULL( + grad_.get(), + paddle::platform::errors::InvalidArgument( + "Should Not get NULL from Grad pointer, since " + "we should have default EagerTensor once we init AutoGradMeta. " + "if you got this error may indicates framework error in " + "PaddlePaddle")); + return *(grad_.get()); + } + + egr::EagerTensor* MutableGrad() { return grad_.get(); } - egr::EagerTensor* MutableGrad() { return &grad_; } + std::weak_ptr WeakGrad() { return grad_; } void SetGradNode(const std::shared_ptr& grad_node) { PADDLE_ENFORCE_NOT_NULL( @@ -126,12 +137,13 @@ class AutogradMeta : public AbstractAutogradMeta { private: // TODO(jiabin) :Should we use pointer instead of object? - egr::EagerTensor grad_; + std::shared_ptr grad_{std::make_shared( + egr::Controller::Instance().GenerateUniqueName("@grad"))}; // GradNodeBase is base class of all grad op which is a // wrapper for grad op. This class will make grad op easy // to be traced. - std::shared_ptr grad_node_; + std::shared_ptr grad_node_ = nullptr; /** * Why we need slot id here? diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index bee7124b55cd9d1226d4ead9a44acb927a743848..9a760c03728cdd47f6a3a8ba9d20bf196832166d 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -71,6 +71,14 @@ std::unordered_map getInDegreeMap( return node_in_degree_map; } +void RunBackwardHooks( + const std::vector>& grad_tensors, + egr::GradNodeBase* grad_node) { + grad_node->ApplyGradientHooks(grad_tensors); + VLOG(6) << "Apply Reduce Hooks for node"; + grad_node->ApplyReduceHooks(); +} + void RunBackward(const std::vector& tensors, const std::vector& grad_tensors, bool retain_graph) { @@ -157,7 +165,11 @@ void RunBackward(const std::vector& tensors, std::unique_ptr node_input_buffer = std::move(node_input_buffers_dict[node]); VLOG(6) << "Run Backward Kernel with input_buffer"; - // Run Backward Node and get outputs + + RunBackwardHooks(node_input_buffer->Buffers(), node); + // TODO(jiabin): Support post hook here and make hook run in seperate + // operator + // Run Pre Backward Node and get outputs std::vector> grad_output_tensors = (*node)(node_input_buffer->Buffers()); // TODO(jiabin): Should we erase it or find a more efficient way. diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 6760499fdc7d5dee28590e80002b6393ba34ff9a..0e6f6aa63dd0f32db73bd5f8417749956449aba8 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -47,14 +47,18 @@ void GradNodeBase::AddEdges(std::vector* metas, size_t slot_id) { // adj_edges has as same rank as fwd inputs, and record it's output rank // from // its pre-ops - auto node = meta->GetMutableGradNode(); - if (node) { - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } else { - meta->SetGradNode(std::make_shared()); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); + if (meta) { + auto node = meta->GetMutableGradNode(); + if (node) { + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); + } else { + if (!meta->StopGradient()) { + meta->SetGradNode(std::make_shared()); + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); + } + } } } } @@ -71,14 +75,18 @@ void GradNodeBase::AddEdges(const std::vector& metas, // adj_edges has as same rank as fwd inputs, and record it's output rank // from // its pre-ops - auto node = meta->GetMutableGradNode(); - if (node) { - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } else { - meta->SetGradNode(std::make_shared()); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); + if (meta) { + auto node = meta->GetMutableGradNode(); + if (node) { + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); + } else { + if (!meta->StopGradient()) { + meta->SetGradNode(std::make_shared()); + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); + } + } } } } @@ -90,14 +98,18 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { "Given slot id is out of range of adj_edges outter size, " "adj_edges is designed to has the same size of grad " "inputs's slot num.")); - auto node = meta->GetMutableGradNode(); - if (node) { - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } else { - meta->SetGradNode(std::make_shared()); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); + if (meta) { + auto node = meta->GetMutableGradNode(); + if (node) { + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); + } else { + if (!meta->StopGradient()) { + meta->SetGradNode(std::make_shared()); + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); + } + } } } @@ -127,6 +139,11 @@ void GradNodeBase::SetGradInMeta(const std::vector& fwd_out, // Init stop gradient vector before use to avoid push back meta.Init(slot_size); for (size_t i = 0; i < slot_size; i++) { + PADDLE_ENFORCE_NOT_NULL(fwd_out[i], + paddle::platform::errors::PreconditionNotMet( + "Bwd_in_meta should only be called while " + "autograd_meta is not null. If you got this " + "error, it indicates bugs in framework.")); if (fwd_out[i]->StopGradient()) { // Set Stop Gradient only when its true or non-initialized autograd_meta, // since all default value is false. @@ -173,6 +190,10 @@ void GradNodeBase::SetGradOutMeta(const std::vector& fwd_in, // Init stop gradient vector before use to avoid push back meta.Init(slot_size); for (size_t i = 0; i < slot_size; i++) { + if (!fwd_in[i]) { + meta.SetStopGradient(i, true); + continue; + } if (fwd_in[i]->StopGradient()) { // Set Stop Gradient only when its true or non-initialized autograd_meta, // since all default value is false. @@ -249,6 +270,7 @@ std::vector> GradNodeBase::ApplyGradientHooks( slot_out.resize(tensors[slot_id].size()); egr::EagerTensor& out = slot_out[rank]; if (!out.defined() || !out.initialized()) { + VLOG(8) << "Run Hook for tensor: " << tensors[slot_id][rank].name(); out = hook(tensors[slot_id][rank]); } else { // TODO(jiabin): Why this? diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 6459614330ab4a98d2d36746c670a566af196e5d..e73dfa2ec8b6e45e2f39082a8896ec197a0cdf8d 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -266,6 +266,7 @@ std::vector EagerUtils::RecoverTensorWrapper( void EagerUtils::CheckAndRetainGrad(const egr::EagerTensor& tensor) { VLOG(6) << "Check RetainGradForTensor: " << tensor.name(); if (FLAGS_retain_grad_for_all_tensor) { + VLOG(6) << "RetainGradForTensor: " << tensor.name(); egr::egr_utils_api::RetainGradForTensor(tensor); } } @@ -274,7 +275,7 @@ void EagerUtils::CheckAndRetainGrad( const std::vector& tensors) { if (FLAGS_retain_grad_for_all_tensor) { for (auto& tensor : tensors) { - VLOG(6) << "Check RetainGradForTensor: " << tensor.name(); + VLOG(6) << "RetainGradForTensor: " << tensor.name(); egr::egr_utils_api::RetainGradForTensor(tensor); } } diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index 34ab707fe2910e4da84364c2a4339afe0fdd96d6..274d829dfcfc6cf94cc1ee1b893dc0180fbf1deb 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -62,6 +62,13 @@ void EmptyEagerTensorInitializer( const std::vector& dims = {}, framework::proto::VarType::Type var_type = paddle::framework::proto::VarType::LOD_TENSOR) { + auto ddims = paddle::framework::make_ddim(dims); + PADDLE_ENFORCE_GE( + paddle::framework::product(ddims), 0, + paddle::platform::errors::InvalidArgument( + "Create Eager Tensor with dims contain minus num is ilegal" + "Please check your code and make sure you new a " + "eager tensor with fixed shape instead of using -1.")); self->eager_tensor.set_name(name); auto autograd_meta = egr::EagerUtils::autograd_meta(&(self->eager_tensor)); autograd_meta->SetPersistable(persistable); @@ -71,8 +78,7 @@ void EmptyEagerTensorInitializer( std::shared_ptr dense_tensor = std::make_shared( pten::make_intrusive(place), - pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), - paddle::framework::make_ddim(dims))); + pten::DenseTensorMeta(pten::TransToPtenDataType(dtype), ddims)); self->eager_tensor.set_impl(dense_tensor); } else { PADDLE_THROW(platform::errors::InvalidArgument( diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index f5a48a279743293835ddfb993f4258eca4dff9d1..f47bd3350e30f46acfc05b5fd1b6f58a7b87828c 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -39,10 +39,12 @@ extern PyTypeObject* pEagerTensorType; static PyObject* eager_tensor_method_numpy(EagerTensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_SYNC_TRY - if (!self->eager_tensor.initialized()) { - Py_INCREF(Py_None); - return Py_None; - } + PADDLE_ENFORCE_EQ( + self->eager_tensor.initialized(), true, + platform::errors::InvalidArgument( + "Tensor data of %s is Empty that indicates we have null tensor for " + "now, please check if it has no data and initialize it first.", + self->eager_tensor.name())); auto tensor_dims = self->eager_tensor.shape(); auto numpy_dtype = TensorDtype2NumpyDtype(self->eager_tensor.type()); auto sizeof_dtype = pten::DataTypeSize(self->eager_tensor.type()); diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index 4025a33b561497431f70f839a78aad740fa0af59..b147d5fbad0ed80cdb84ce37240819e4e5f8ee56 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -75,6 +75,7 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self, std::dynamic_pointer_cast(grad_node); return ToPyObject(accumulation_grad_node->Grad()); } else { + VLOG(6) << "Get grad for tensor: " << self->eager_tensor.name(); auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); return ToPyObject(meta->Grad()); } diff --git a/python/paddle/fluid/eager/eager_tensor_patch_methods.py b/python/paddle/fluid/eager/eager_tensor_patch_methods.py index f820d02d1abfb259518fba691a47acc352acd1a5..2586685ec1adad9b16d99702a509ba4ece12bbaa 100644 --- a/python/paddle/fluid/eager/eager_tensor_patch_methods.py +++ b/python/paddle/fluid/eager/eager_tensor_patch_methods.py @@ -123,10 +123,11 @@ def monkey_patch_eagertensor(): # [500.] """ - if self.grad is None: + if self.grad._is_initialized(): + return self.grad.numpy() + else: return None # TODO(wanghuancoder) support SELECTED_ROWS - return self.grad.numpy() if hasattr(core, "eager"): setattr(core.eager.EagerTensor, "__str__", __str__) diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index 64c563ce721512541a21965722d78bc80f949676..f7260982ff15ee479d978bf2768b0d3f1e8c015c 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -452,6 +452,22 @@ class EagerParamBaseUsageTestCase(unittest.TestCase): self.assertTrue(np.array_equal(res1, res2)) self.assertTrue(np.array_equal(res3, res4)) + def test_backward_with_single_tensor(self): + arr4 = np.random.rand(4, 16, 16, 32).astype('float32') + egr_tensor12 = core.eager.EagerTensor(arr4, core.CPUPlace()) + egr_tensor12.retain_grads() + arr = np.ones([4, 16, 16, 32]).astype('float32') + self.assertEqual(egr_tensor12.persistable, False) + self.assertTrue("generated_tensor" in egr_tensor12.name) + self.assertEqual(egr_tensor12.shape, [4, 16, 16, 32]) + self.assertEqual(egr_tensor12.dtype, core.VarDesc.VarType.FP32) + self.assertEqual(egr_tensor12.stop_gradient, True) + self.assertTrue(egr_tensor12.place._equals(paddle.fluid.CPUPlace())) + self.assertTrue(np.array_equal(egr_tensor12.numpy(), arr4)) + self.assertTrue(np.array_equal(egr_tensor12.gradient(), None)) + egr_tensor12.backward() + self.assertTrue(np.array_equal(egr_tensor12.gradient(), arr)) + class EagerGuardTestCase(unittest.TestCase): def test__test_eager_guard(self): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_basic.py b/python/paddle/fluid/tests/unittests/test_imperative_basic.py index d523e746b939c44d53b1586e6338eb85397d4876..5b9e9ab8373abd4943ccc4a23ea92dfe5ff79de7 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_basic.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_basic.py @@ -24,7 +24,7 @@ from test_imperative_base import new_program_scope import paddle.fluid.dygraph_utils as dygraph_utils from paddle.fluid.dygraph.layer_object_helper import LayerObjectHelper import paddle -from paddle.fluid.framework import _test_eager_guard, _in_eager_mode +from paddle.fluid.framework import _test_eager_guard, _in_eager_mode, in_dygraph_mode class MyLayer(fluid.Layer): @@ -94,58 +94,13 @@ class SimpleRNNCell(fluid.Layer): is_bias=False) def forward(self, input, pre_hidden): - tmp_i2h = self.create_variable(dtype=self._dtype) - tmp_h2h = self.create_variable(dtype=self._dtype) - hidden = self.create_variable(dtype=self._dtype) - out = self.create_variable(dtype=self._dtype) - softmax_out = self.create_variable(dtype=self._dtype) - reduce_out = self.create_variable(dtype=self._dtype) - self._helper.append_op( - type="mul", - inputs={"X": input, - "Y": self._i2h_w}, - outputs={"Out": tmp_i2h}, - attrs={"x_num_col_dims": 1, - "y_num_col_dims": 1}) - - self._helper.append_op( - type="mul", - inputs={"X": pre_hidden, - "Y": self._h2h_w}, - outputs={"Out": tmp_h2h}, - attrs={"x_num_col_dims": 1, - "y_num_col_dims": 1}) - - self._helper.append_op( - type="elementwise_add", - inputs={'X': tmp_h2h, - 'Y': tmp_i2h}, - outputs={'Out': hidden}, - attrs={'axis': -1, - 'use_mkldnn': False}) + tmp_i2h = paddle.fluid.layers.nn.mul(input, self._i2h_w) + tmp_h2h = paddle.fluid.layers.nn.mul(pre_hidden, self._h2h_w) + hidden = paddle.add(tmp_h2h, tmp_i2h) hidden = self._helper.append_activation(hidden, act='tanh') - - self._helper.append_op( - type="mul", - inputs={"X": hidden, - "Y": self._h2o_w}, - outputs={"Out": out}, - attrs={"x_num_col_dims": 1, - "y_num_col_dims": 1}) - - self._helper.append_op( - type="softmax", - inputs={"X": out}, - outputs={"Out": softmax_out}, - attrs={"use_cudnn": False}) - - self._helper.append_op( - type='reduce_sum', - inputs={'X': softmax_out}, - outputs={'Out': reduce_out}, - attrs={'keep_dim': False, - 'reduce_all': True}) - + out = paddle.fluid.layers.nn.mul(hidden, self._h2o_w) + softmax_out = paddle.nn.functional.softmax(out) + reduce_out = paddle.fluid.layers.nn.reduce_sum(softmax_out) return reduce_out, hidden @@ -394,12 +349,17 @@ class TestImperative(unittest.TestCase): a = inputs2[0].gradient() self.assertTrue(np.allclose(inputs2[0].gradient(), x)) - def test_empty_var(self): + def func_empty_var(self): with fluid.dygraph.guard(): cur_program = fluid.Program() cur_block = cur_program.current_block() - new_variable = cur_block.create_var( - name="X", shape=[-1, 23, 48], dtype='float32') + # Normally, we don't allow tensor with -1 shape being created in dygraph mode, this test is not good. + if not _in_eager_mode(): + new_variable = cur_block.create_var( + name="X", shape=[-1, 23, 48], dtype='float32') + else: + new_variable = cur_block.create_var( + name="X", shape=[1, 23, 48], dtype='float32') try: new_variable.numpy() except Exception as e: @@ -409,37 +369,51 @@ class TestImperative(unittest.TestCase): new_variable.backward() except Exception as e: assert type(e) == core.EnforceNotMet + # TODO(jiabin): Support clear_gradient in eager mode later and remove this if statement + if not _in_eager_mode(): + try: + new_variable.clear_gradient() + except Exception as e: + assert type(e) == core.EnforceNotMet - try: - new_variable.clear_gradient() - except Exception as e: - assert type(e) == core.EnforceNotMet + def test_empty_var(self): + with _test_eager_guard(): + self.func_empty_var() + self.func_empty_var() - def test_empty_grad(self): + def func_empty_grad(self): with fluid.dygraph.guard(): x = np.ones([2, 2], np.float32) new_var = paddle.to_tensor(x) - try: - new_var.gradient() - except Exception as e: - assert type(e) == ValueError - - try: - new_var.clear_gradient() - except Exception as e: - assert type(e) == core.EnforceNotMet + self.assertIsNone(new_var.gradient()) + # TODO(jiabin): Support clear_gradient in eager mode later and remove this if statement + if not _in_eager_mode(): + try: + new_var.clear_gradient() + except Exception as e: + assert type(e) == core.EnforceNotMet with fluid.dygraph.guard(): cur_program = fluid.Program() cur_block = cur_program.current_block() - new_variable = cur_block.create_var( - name="X", shape=[-1, 23, 48], dtype='float32') + # Normally, we don't allow tensor with -1 shape being created in dygraph mode, this test is not good. + if not _in_eager_mode(): + new_variable = cur_block.create_var( + name="X", shape=[-1, 23, 48], dtype='float32') + else: + new_variable = cur_block.create_var( + name="X", shape=[1, 23, 48], dtype='float32') try: new_variable.gradient() except Exception as e: assert type(e) == ValueError - def test_set_persistable(self): + def test_empty_grad(self): + with _test_eager_guard(): + self.func_empty_grad() + self.func_empty_grad() + + def func_set_persistable(self): with fluid.dygraph.guard(): x = np.ones([2, 2], np.float32) new_var = paddle.to_tensor(x) @@ -447,12 +421,22 @@ class TestImperative(unittest.TestCase): new_var.persistable = True self.assertTrue(new_var.persistable) - def test_layer(self): + def test_set_persistable(self): + with _test_eager_guard(): + self.func_set_persistable() + self.func_set_persistable() + + def func_layer(self): with fluid.dygraph.guard(): l = fluid.Layer("l") self.assertRaises(NotImplementedError, l.forward, []) - def test_layer_in_out(self): + def test_layer(self): + with _test_eager_guard(): + self.func_layer() + self.func_layer() + + def func_layer_in_out(self): np_inp = np.array([1.0, 2.0, -1.0], dtype=np.float32) with fluid.dygraph.guard(): var_inp = paddle.to_tensor(np_inp) @@ -489,12 +473,17 @@ class TestImperative(unittest.TestCase): feed={inp.name: np_inp}, fetch_list=[x.name, param_grads[1].name]) - self.assertTrue(np.allclose(dy_out, static_out)) - self.assertTrue(np.allclose(dy_grad, static_grad)) - self.assertTrue(np.allclose(dy_out2, static_out)) - self.assertTrue(np.allclose(dy_grad2, static_grad)) + self.assertTrue(np.array_equal(dy_out, static_out)) + self.assertTrue(np.array_equal(dy_grad, static_grad)) + self.assertTrue(np.array_equal(dy_out2, static_out)) + self.assertTrue(np.array_equal(dy_grad2, static_grad)) - def test_mlp(self): + def test_layer_in_out(self): + with _test_eager_guard(): + self.func_layer_in_out() + self.func_layer_in_out() + + def func_mlp(self): np_inp = np.array([[1.0, 2.0], [3.0, 4.0]], dtype=np.float32) with fluid.dygraph.guard(): var_inp = paddle.to_tensor(np_inp) @@ -545,6 +534,11 @@ class TestImperative(unittest.TestCase): self.assertEqual(mlp._linear2, sublayers[1]) self.assertEqual(len(sublayers), 2) + def test_mlp(self): + with _test_eager_guard(): + self.func_mlp() + self.func_mlp() + def test_gradient_accumulation(self): def test_single_api(sort_sum_gradient): fluid.set_flags({'FLAGS_sort_sum_gradient': sort_sum_gradient}) @@ -677,7 +671,7 @@ class TestImperative(unittest.TestCase): test_mlp(False) test_mlp(True) - def test_dygraph_vs_static(self): + def func_dygraph_vs_static(self): np_inp1 = np.random.rand(4, 3, 3) np_inp2 = np.random.rand(4, 3, 3) @@ -728,7 +722,12 @@ class TestImperative(unittest.TestCase): fetch_list=out)[0] self.assertTrue(np.allclose(dygraph_result, static_result)) - def test_rnn(self): + def test_dygraph_vs_static(self): + with _test_eager_guard(): + self.func_dygraph_vs_static() + self.func_dygraph_vs_static() + + def func_rnn(self): np_inp = np.array([[1.0, 2.0, 3.0], [4.0, 5.0, 6.0], [7.0, 8.0, 9.0], [10.0, 11.0, 12.0]]) np_inp = np_inp.reshape((1, 4, 3)) @@ -771,14 +770,19 @@ class TestImperative(unittest.TestCase): param_grads[1][1].name, param_grads[2][1].name ]) - self.assertTrue(np.allclose(dy_out, static_out)) - self.assertTrue(np.allclose(dy_grad_h2o, static_grad_h2o)) - self.assertTrue(np.allclose(dy_grad_h2h, static_grad_h2h)) - self.assertTrue(np.allclose(dy_grad_i2h, static_grad_i2h)) - self.assertTrue(np.allclose(dy_out2, static_out)) - self.assertTrue(np.allclose(dy_grad_h2o2, static_grad_h2o)) - self.assertTrue(np.allclose(dy_grad_h2h2, static_grad_h2h)) - self.assertTrue(np.allclose(dy_grad_i2h2, static_grad_i2h)) + self.assertTrue(np.array_equal(dy_out, static_out)) + self.assertTrue(np.array_equal(dy_grad_h2o, static_grad_h2o)) + self.assertTrue(np.array_equal(dy_grad_h2h, static_grad_h2h)) + self.assertTrue(np.array_equal(dy_grad_i2h, static_grad_i2h)) + self.assertTrue(np.array_equal(dy_out2, static_out)) + self.assertTrue(np.array_equal(dy_grad_h2o2, static_grad_h2o)) + self.assertTrue(np.array_equal(dy_grad_h2h2, static_grad_h2h)) + self.assertTrue(np.array_equal(dy_grad_i2h2, static_grad_i2h)) + + def test_rnn(self): + with _test_eager_guard(): + self.func_rnn() + self.func_rnn() def func_layer_attrs(self): layer = fluid.dygraph.Layer("test")