diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 2c3207b116e2941e7dd49e7dd56d15995c56ff9f..11e033e1e5978f2099c7cf3bab85f0e4e3502de7 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1852,7 +1852,7 @@ static std::string GenerateGradNodeCCContents( " %s\n" " return outputs;\n"; generated_grad_function_body = paddle::string::Sprintf( - BWD_RETURN_TEMPLATE, outs_size, generated_grad_function_body); + BWD_RETURN_TEMPLATE, in_vars.size(), generated_grad_function_body); // [Generation] Get Full Grad Function const char* GRAD_FUNCTION_TEMPLATE = diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index 01cb1b81e341e95faf6d4c63819bb7db789a245b..b0e3d81df3a647151dd44d0462e80c4690c751bd 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -103,7 +103,17 @@ void RunBackward(const std::vector& tensors, VLOG(2) << "Out Rank of Tensor is slot: " << input_info.first << ", rank: " << input_info.second; // Get target GradNodeBase from target tensors - GradNodeBase* grad_node = auto_grad_meta->GetMutableGradNode().get(); + auto shared_grad_node = auto_grad_meta->GetMutableGradNode(); + + if (shared_grad_node == nullptr || shared_grad_node.get() == nullptr || + auto_grad_meta->StopGradient()) { + VLOG(3) << "Skip auto grad since there is no grad op for var or loss is " + "stop_gradient=True: " + << tensor.name(); + continue; + } + + GradNodeBase* grad_node = shared_grad_node.get(); // Prepare GradTensorHolder if (!node_input_buffers_dict.count(grad_node)) { @@ -192,19 +202,38 @@ void RunBackward(const std::vector& tensors, // Since we make edge has as same rank as bwd outputs, we indexing them // with // the same rank(i, j) - VLOG(6) << "Get Edge with slot: " << i << ", rank: " << j; - egr::EagerTensor& grad_output_tensor = grad_output_tensors[i][j]; - if (!grad_output_tensor.defined() || - !grad_output_tensor.initialized()) { - VLOG(6) << "We get grad_output_tensor with slot: " << i - << ", rank: " << j << " as uninitialized or undefined tensor"; - } - GradNodeBase* next_node = edge.GetMutableGradNode().get(); + auto next_node_shared = edge.GetMutableGradNode(); // Next node could be nullptr if it is leaf tensor with no // AccumulationNode attached // Or it could also originated from dispensable inputs - if (!next_node) continue; + if (!next_node_shared || !next_node_shared.get() || + grad_output_tensors[i].empty()) { + continue; + } + PADDLE_ENFORCE_LT( + j, grad_output_tensors[i].size(), + paddle::platform::errors::Fatal( + "Rank of grad_output_tensors should be less than " + "grad_output_tensors[i].size(), which is: %d. This error may " + "indicate autoprune or autograd api error. ", + grad_output_tensors.size())); + egr::EagerTensor& grad_output_tensor = grad_output_tensors[i][j]; + + if ((!grad_output_tensor.defined() || + !grad_output_tensor.initialized())) { + if (!grad_output_tensor.Var().IsInitialized()) { + VLOG(6) + << "We get grad_output_tensor with slot: " << i + << ", rank: " << j + << " as uninitialized or undefined in both tensor and variable"; + } + } + VLOG(6) << "Get Edge and grad_output_tensor with slot: " << i + << ", rank: " << j + << " 's name is: " << grad_output_tensor.name(); + + auto* next_node = next_node_shared.get(); if (!node_input_buffers_dict.count(next_node)) { node_input_buffers_dict[next_node] = diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index c58c0b9e66e7aee43e311e54074f192ec1bcfae6..8b8423c6173fbf29d4e916741feff3a0302d5a06 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -164,6 +164,14 @@ class EagerTensor final { */ void reset() { tensor_->reset(); } + /** + * @brief Determine whether tensor is DenseTensor + * + * @return true + * @return false + */ + bool is_dense_tensor() const { return tensor_->is_dense_tensor(); } + /** * @brief Transfer the current Tensor to the specified device and return. * diff --git a/paddle/fluid/eager/tests/task_tests/backward_test.cc b/paddle/fluid/eager/tests/task_tests/backward_test.cc index 3737fd95ad64d96d48e6f503308dede0d995d81c..8f0e6cc5e41c9f040aa841dd52f0a39f2d15af74 100644 --- a/paddle/fluid/eager/tests/task_tests/backward_test.cc +++ b/paddle/fluid/eager/tests/task_tests/backward_test.cc @@ -56,6 +56,7 @@ TEST(Backward, SingleNodeEmptyGrad) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(node0_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + auto_grad_meta->SetStopGradient(false); // Connect Tensor and AccumulationNode via AutoGradMeta auto acc_node_ptr = std::make_shared(); @@ -119,7 +120,7 @@ TEST(Backward, SingleNodeCustomGrad) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(node0_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); - + auto_grad_meta->SetStopGradient(false); // Connect Tensor and AccumulationNode via AutoGradMeta auto acc_node_ptr = std::make_shared(); @@ -189,7 +190,7 @@ TEST(Backward, LinearNodes) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(node0_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); - + auto_grad_meta->SetStopGradient(false); // Connect Node0 -> Node1 via Edge auto meta0 = egr::AutogradMeta(); meta0.SetStopGradient(false); @@ -281,13 +282,14 @@ TEST(Backward, WithAccumulation) { auto_grad_meta0->SetGradNode( std::dynamic_pointer_cast(node0_ptr)); auto_grad_meta0->SetSingleOutRankWithSlot(0, 0); - + auto_grad_meta0->SetStopGradient(false); // Connect Inp1 and Node1 via AutoGradMeta AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&(target_tensors[1])); auto_grad_meta1->SetGradNode( std::dynamic_pointer_cast(node1_ptr)); auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + auto_grad_meta1->SetStopGradient(false); // Connect Node0 -> Node2 via Edge auto meta0 = egr::AutogradMeta(); diff --git a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc index 7f180fa1076fd4d806be6b0f18d6fe46e123b48e..523f7102af04ddf6c102f6d2030e8e318c9846d3 100644 --- a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc +++ b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc @@ -58,6 +58,7 @@ TEST(CrossBatchAccumulation, SingleScaleNode) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(scale_node_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + auto_grad_meta->SetStopGradient(false); egr_utils_api::RetainGradForTensor(target_tensor); // result: 1.0 auto meta = AutogradMeta(); diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc index 0f8039dade8010ad2af7df4790042a2ed3af3d1d..4f4a33b1a743afdf3f6f5c3652a2d87b3e0499ef 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -93,6 +93,7 @@ TEST(RetainGrad, HookBeforeRetainGrad) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(scale_node_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + auto_grad_meta->SetStopGradient(false); target_tensor.set_autograd_meta( std::dynamic_pointer_cast( auto_grad_meta)); @@ -171,6 +172,7 @@ TEST(RetainGrad, HookAfterRetainGrad) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(scale_node_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + auto_grad_meta->SetStopGradient(false); target_tensor.set_autograd_meta( std::dynamic_pointer_cast( auto_grad_meta)); diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index e73dfa2ec8b6e45e2f39082a8896ec197a0cdf8d..f50458e556276b581efcace2199573ca2e0a6a1d 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -99,7 +99,12 @@ std::pair EagerUtils::OutRankInfo( std::shared_ptr EagerUtils::grad_node( const egr::EagerTensor& target) { - return unsafe_autograd_meta(target)->GetMutableGradNode(); + auto* meta = nullable_autograd_meta(target); + if (meta) { + return meta->GetMutableGradNode(); + } else { + return nullptr; + } } void EagerUtils::SetHistory(std::vector* autograd_metas, diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 46b56f27ff98e36f4e86a27a598f51dc7cbf63b4..4419640ccf328bde65471f840b9bb526dd4ffd6d 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -298,6 +298,21 @@ static PyObject* eager_tensor_method_detach(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +static PyObject* eager_tensor_method_get_underline_tensor( + EagerTensorObject* self, PyObject* args, PyObject* kwargs) { + EAGER_SYNC_TRY + if (self->eager_tensor.is_dense_tensor()) { + auto* tensor = static_cast( + self->eager_tensor.impl().get()); + VLOG(6) << "tensor: " << tensor->IsInitialized(); + return ToPyObject(tensor); + } else { + Py_IncRef(Py_None); + return Py_None; + } + EAGER_CATCH_AND_THROW_RETURN_NULL +} + PyMethodDef variable_methods[] = { {"numpy", (PyCFunction)(void (*)(void))eager_tensor_method_numpy, METH_VARARGS | METH_KEYWORDS, NULL}, @@ -315,14 +330,17 @@ PyMethodDef variable_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"_zero_grads", (PyCFunction)(void (*)(void))eager_tensor__zero_grads, METH_VARARGS | METH_KEYWORDS, NULL}, - {"_is_shared_buffer_to", + {"_share_buffer_to", (PyCFunction)(void (*)(void))eager_tensor__share_buffer_to, METH_VARARGS | METH_KEYWORDS, NULL}, - {"_share_buffer_with", + {"_is_shared_buffer_with", (PyCFunction)(void (*)(void))eager_tensor__is_shared_buffer_with, METH_VARARGS | METH_KEYWORDS, NULL}, {"detach", (PyCFunction)(void (*)(void))eager_tensor_method_detach, METH_VARARGS | METH_KEYWORDS, NULL}, + {"get_tensor", + (PyCFunction)(void (*)(void))eager_tensor_method_get_underline_tensor, + METH_VARARGS | METH_KEYWORDS, NULL}, {NULL, NULL, 0, NULL}}; } // namespace pybind diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index 038a1254d7ef6e243a03c523f0ed357f63f707ad..5f1d809168a42dd5994dca5a0cf471fe5810f7c0 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -42,6 +42,18 @@ PyObject* eager_tensor_properties_get_name(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +PyObject* eager_tensor_properties_get_type(EagerTensorObject* self, + void* closure) { + EAGER_SYNC_TRY + if (self->eager_tensor.is_dense_tensor()) { + return ToPyObject(paddle::framework::proto::VarType::LOD_TENSOR); + } else { + Py_INCREF(Py_None); + return Py_None; + } + EAGER_CATCH_AND_THROW_RETURN_NULL +} + int eager_tensor_properties_set_name(EagerTensorObject* self, PyObject* value, void* closure) { EAGER_SYNC_TRY @@ -74,8 +86,13 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self, return ToPyObject(*accumulation_grad_node->Grad()); } else { VLOG(6) << "Get grad for tensor: " << self->eager_tensor.name(); - auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); - return ToPyObject(meta->Grad()); + auto meta = egr::EagerUtils::nullable_autograd_meta(self->eager_tensor); + if (meta) { + return ToPyObject(meta->Grad()); + } else { + Py_INCREF(Py_None); + return Py_None; + } } EAGER_CATCH_AND_THROW_RETURN_NULL } @@ -185,6 +202,8 @@ struct PyGetSetDef variable_properties[] = { nullptr, nullptr}, {"dtype", (getter)eager_tensor_properties_get_dtype, nullptr, nullptr, nullptr}, + {"type", (getter)eager_tensor_properties_get_type, nullptr, nullptr, + nullptr}, {nullptr, nullptr, nullptr, nullptr, nullptr}}; } // namespace pybind diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index c1049d240795c3fee3ad7b6d0f394c54f22f0e5e..5c74653a719d34b6bafbb2418b1b94bd196f22bc 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -450,6 +450,18 @@ PyObject* ToPyObject(const paddle::framework::proto::VarType::Type& dtype) { return obj.ptr(); } +PyObject* ToPyObject(const paddle::framework::proto::VarType& type) { + auto obj = ::pybind11::cast(type); + obj.inc_ref(); + return obj.ptr(); +} + +PyObject* ToPyObject(const paddle::framework::LoDTensor* value) { + auto obj = ::pybind11::cast(value, py::return_value_policy::copy); + obj.inc_ref(); + return obj.ptr(); +} + PyObject* ToPyObject(const void* value) { if (value == nullptr) { Py_INCREF(Py_None); diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index 20c82c572c325da39bd6f0108d4eef7de410d8b3..e1a7ed2415014e1e25d84577efbf8f3f0a18ce67 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -11,6 +11,7 @@ limitations under the License. */ #pragma once #include +#include "paddle/pten/core/dense_tensor.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" @@ -54,7 +55,9 @@ PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const platform::Place& value); +PyObject* ToPyObject(const framework::LoDTensor* value); PyObject* ToPyObject(const paddle::framework::proto::VarType::Type& dtype); +PyObject* ToPyObject(const paddle::framework::proto::VarType& type); PyObject* ToPyObject(const void* value); PyObject* ToPyObject( const std::unordered_map>& value); diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index 716e1ac3d30bbaa94671ac00d395d99fce8c3fd3..fe088a95681468cabbae68c88c24f3d769c43215 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -285,6 +285,9 @@ const paddle::platform::Place& DenseTensor::place() const { storage_, paddle::platform::errors::PreconditionNotMet( "Tensor not initialized yet when Tensor::place() is called.")); + if (storage_->data_shared()) { + return storage_->data_shared()->place(); + } return storage_->place(); } diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index e06e7f52dd671bd59075ddb2ea89df588ce6daec..3cccaceb8e69893410552eab7ae04840267f77a7 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -758,10 +758,10 @@ def monkey_patch_varbase(): @framework.dygraph_only def _grad_ivar(self): - if self.grad._is_initialized(): - return self.grad - else: - return None + if self.grad is not None: + if self.grad._is_initialized(): + return self.grad + return None @framework.dygraph_only def _set_grad_ivar(self, value): @@ -782,6 +782,10 @@ def monkey_patch_varbase(): def clone(self): return _C_ops_.assign(self) + @framework.dygraph_only + def value(self): + return self + if core._in_eager_mode() and not hasattr(core, "eager"): return @@ -805,6 +809,7 @@ def monkey_patch_varbase(): setattr(core.eager.EagerTensor, "_set_grad_ivar", _set_grad_ivar) setattr(core.eager.EagerTensor, "clear_gradient", clear_gradient) setattr(core.eager.EagerTensor, "clone", clone) + setattr(core.eager.EagerTensor, "value", value) else: setattr(core.VarBase, "__name__", "Tensor") setattr(core.VarBase, "grad", grad) diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index 3ab7981cdb1a4b4bf3b0229439a2d4c47a0c713b..9630462b4963a961f34d601a9e0d812e34da69f4 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -109,7 +109,7 @@ class EagerDtypeTestCase(unittest.TestCase): core.VarDesc.VarType.COMPLEX128) -class EagerTensorPropertiesTestCase(unittest.TestCase): +class EagerTensorPropertiesAndMethodsTestCase(unittest.TestCase): def constructor(self, place): egr_tensor = core.eager.EagerTensor() self.assertEqual(egr_tensor.persistable, False) @@ -645,7 +645,8 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): self.assertTrue(tensor3.stop_gradient, True) self.assertTrue(tensor3.place.is_cpu_place()) - def test_share_buffer_to(): + def test_share_buffer_to(self): + with _test_eager_guard(): arr = np.ones([4, 16, 16, 32]).astype('float32') arr1 = np.zeros([4, 16]).astype('float32') arr2 = np.ones([4, 16, 16, 32]).astype('float32') + np.ones( @@ -661,7 +662,7 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): else: tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32, core.CPUPlace()) - self.assertTrue(np.array_equal(tensor.numpy(), arr1)) + self.assertTrue(np.array_equal(tensor.numpy(), arr)) self.assertTrue(np.array_equal(tensor2.numpy(), arr2)) tensor2._share_buffer_to(tensor) self.assertTrue(np.array_equal(tensor.numpy(), arr2)) @@ -694,6 +695,7 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): self.assertEqual(tensor.stop_gradient, False) tensor.stop_gradient = True self.assertEqual(tensor.stop_gradient, True) + self.assertEqual(tensor.type, core.VarDesc.VarType.LOD_TENSOR) def test_global_properties(self): print("Test_global_properties") @@ -714,6 +716,25 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): self.assertTrue(core.eager._get_expected_place().is_cpu_place()) core._disable_eager_mode() + def test_value(self): + with _test_eager_guard(): + arr = np.random.rand(4, 16, 16, 32).astype('float64') + + egr_tensor0 = core.eager.EagerTensor(value=arr) + self.assertEqual(egr_tensor0.persistable, False) + self.assertTrue("generated" in egr_tensor0.name) + self.assertEqual(egr_tensor0.shape, [4, 16, 16, 32]) + self.assertTrue( + egr_tensor0.place._equals( + paddle.fluid.framework._current_expected_place())) + self.assertEqual(egr_tensor0.dtype, core.VarDesc.VarType.FP64) + self.assertEqual(egr_tensor0.stop_gradient, True) + self.assertTrue(egr_tensor0.value().get_tensor()._dtype(), + core.VarDesc.VarType.FP64) + self.assertTrue(egr_tensor0.value().get_tensor()._place(), + paddle.fluid.framework._current_expected_place()) + self.assertTrue(egr_tensor0.value().get_tensor()._is_initialized()) + class EagerParamBaseUsageTestCase(unittest.TestCase): def test_print(self): @@ -803,6 +824,7 @@ class EagerParamBaseUsageTestCase(unittest.TestCase): self.assertTrue(egr_tensor12.place._equals(paddle.fluid.CPUPlace())) self.assertTrue(np.array_equal(egr_tensor12.numpy(), arr4)) self.assertTrue(np.array_equal(egr_tensor12.gradient(), None)) + egr_tensor12.stop_gradient = False egr_tensor12.backward() self.assertTrue(np.array_equal(egr_tensor12.gradient(), arr)) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index d2e1a4fbb18828bad39b0f5c87c37aea8d7a27b5..44d73612b1cb5e0bce1e561ab25884355b083d77 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -181,6 +181,7 @@ class TestImperativeAutoPrune(unittest.TestCase): self.func_auto_prune2() self.func_auto_prune2() + # TODO(jiabin): Support this when we support better split tensor def test_auto_prune3(self): with fluid.dygraph.guard(): case3 = AutoPruneLayer3(input_size=784) @@ -217,7 +218,7 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case4.linear.weight._grad_ivar() is not None) self.assertTrue((part2.gradient() == 0).all()) - def test_auto_prune6(self): + def func_auto_prune6(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") @@ -235,7 +236,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(linear.weight.gradient() is None) self.assertTrue(out1.gradient() is None) - def test_auto_prune7(self): + def test_auto_prune6(self): + with _test_eager_guard(): + self.func_auto_prune6() + self.func_auto_prune6() + + def func_auto_prune7(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") @@ -253,7 +259,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(linear.weight.gradient() is None) self.assertTrue(out1.gradient() is None) - def test_auto_prune8(self): + def test_auto_prune7(self): + with _test_eager_guard(): + self.func_auto_prune7() + self.func_auto_prune7() + + def func_auto_prune8(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") @@ -278,7 +289,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertFalse( np.array_equal(linear_origin, linear.weight.numpy())) - def test_auto_prune9(self): + def test_auto_prune8(self): + with _test_eager_guard(): + self.func_auto_prune8() + self.func_auto_prune8() + + def func_auto_prune9(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") @@ -307,7 +323,12 @@ class TestImperativeAutoPrune(unittest.TestCase): except ValueError as e: assert type(e) == ValueError - def test_auto_prune10(self): + def test_auto_prune9(self): + with _test_eager_guard(): + self.func_auto_prune9() + self.func_auto_prune9() + + def func_auto_prune10(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") @@ -321,12 +342,18 @@ class TestImperativeAutoPrune(unittest.TestCase): out2 = linear2(b) out1.stop_gradient = True out = fluid.layers.concat(input=[out1, out2, c], axis=1) + #TODO(jiabin): In Eager Mode we don't actually need sort_sum_gradient, this test should be removed when we don't support fluid anymore. fluid.set_flags({'FLAGS_sort_sum_gradient': True}) out.backward() self.assertTrue(linear.weight.gradient() is None) self.assertTrue(out1.gradient() is None) - def test_auto_prune_with_optimizer(self): + def test_auto_prune10(self): + with _test_eager_guard(): + self.func_auto_prune10() + self.func_auto_prune10() + + def func_auto_prune_with_optimizer(self): vocab_size = 100 size = 20 batch_size = 16 @@ -341,7 +368,6 @@ class TestImperativeAutoPrune(unittest.TestCase): grad_clip = fluid.clip.GradientClipByGlobalNorm(0.001) optimizer = fluid.optimizer.AdamOptimizer( 0.001, parameter_list=model.parameters(), grad_clip=grad_clip) - indices = fluid.dygraph.to_variable(indices) embed = fluid.dygraph.to_variable(embed) dummy_loss = model(embed) @@ -374,7 +400,12 @@ class TestImperativeAutoPrune(unittest.TestCase): assert model.embed1.weight._grad_ivar() is None assert model.linear_1.weight._grad_ivar() is None - def test_case2_prune_no_grad_branch(self): + def test_auto_prune_with_optimizer(self): + with _test_eager_guard(): + self.func_auto_prune_with_optimizer() + self.func_auto_prune_with_optimizer() + + def func_case2_prune_no_grad_branch(self): with fluid.dygraph.guard(): value1 = np.arange(784).reshape(1, 784) value2 = np.arange(1).reshape(1, 1) @@ -386,7 +417,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case3.linear2.weight._grad_ivar() is None) self.assertTrue(case3.linear.weight._grad_ivar() is not None) - def test_case3_prune_no_grad_branch2(self): + def test_case2_prune_no_grad_branch(self): + with _test_eager_guard(): + self.func_case2_prune_no_grad_branch() + self.func_case2_prune_no_grad_branch() + + def func_case3_prune_no_grad_branch2(self): with fluid.dygraph.guard(): value1 = np.arange(1).reshape(1, 1) linear = fluid.dygraph.Linear(1, 1, act=None) @@ -399,13 +435,23 @@ class TestImperativeAutoPrune(unittest.TestCase): loss.backward() self.assertTrue(linear.weight._grad_ivar() is None) - def test_case4_with_no_grad_op_maker(self): + def test_case3_prune_no_grad_branch2(self): + with _test_eager_guard(): + self.func_case3_prune_no_grad_branch2() + self.func_case3_prune_no_grad_branch2() + + def func_case4_with_no_grad_op_maker(self): with fluid.dygraph.guard(): out = fluid.layers.gaussian_random(shape=[20, 30]) loss = fluid.layers.mean(out) loss.backward() self.assertTrue(out._grad_ivar() is None) + def test_case4_with_no_grad_op_maker(self): + with _test_eager_guard(): + self.func_case4_with_no_grad_op_maker() + self.func_case4_with_no_grad_op_maker() + if __name__ == '__main__': unittest.main()