From f81569e37cbe61106255e1b52757010ffe84bf58 Mon Sep 17 00:00:00 2001 From: Jiabin Yang Date: Mon, 17 Jan 2022 09:44:07 +0800 Subject: [PATCH] Support auto prune logic in eager mode (#38960) * support test_auto_prune_partial * support rest of autoprune strategy in eager mode --- .../auto_code_generator/eager_generator.cc | 2 +- paddle/fluid/eager/backward.cc | 49 +++++++++++--- paddle/fluid/eager/eager_tensor.h | 8 +++ .../eager/tests/task_tests/backward_test.cc | 8 ++- .../cross_batch_accumulation_test.cc | 1 + .../fluid/eager/tests/task_tests/hook_test.cc | 2 + paddle/fluid/eager/utils.cc | 7 +- paddle/fluid/pybind/eager_method.cc | 22 ++++++- paddle/fluid/pybind/eager_properties.cc | 23 ++++++- paddle/fluid/pybind/eager_utils.cc | 12 ++++ paddle/fluid/pybind/eager_utils.h | 3 + paddle/pten/core/dense_tensor.cc | 3 + .../fluid/dygraph/varbase_patch_methods.py | 13 ++-- .../tests/unittests/test_egr_python_api.py | 28 +++++++- .../unittests/test_imperative_auto_prune.py | 66 ++++++++++++++++--- 15 files changed, 211 insertions(+), 36 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index 2c3207b116e..11e033e1e59 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -1852,7 +1852,7 @@ static std::string GenerateGradNodeCCContents( " %s\n" " return outputs;\n"; generated_grad_function_body = paddle::string::Sprintf( - BWD_RETURN_TEMPLATE, outs_size, generated_grad_function_body); + BWD_RETURN_TEMPLATE, in_vars.size(), generated_grad_function_body); // [Generation] Get Full Grad Function const char* GRAD_FUNCTION_TEMPLATE = diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index 01cb1b81e34..b0e3d81df3a 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -103,7 +103,17 @@ void RunBackward(const std::vector& tensors, VLOG(2) << "Out Rank of Tensor is slot: " << input_info.first << ", rank: " << input_info.second; // Get target GradNodeBase from target tensors - GradNodeBase* grad_node = auto_grad_meta->GetMutableGradNode().get(); + auto shared_grad_node = auto_grad_meta->GetMutableGradNode(); + + if (shared_grad_node == nullptr || shared_grad_node.get() == nullptr || + auto_grad_meta->StopGradient()) { + VLOG(3) << "Skip auto grad since there is no grad op for var or loss is " + "stop_gradient=True: " + << tensor.name(); + continue; + } + + GradNodeBase* grad_node = shared_grad_node.get(); // Prepare GradTensorHolder if (!node_input_buffers_dict.count(grad_node)) { @@ -192,19 +202,38 @@ void RunBackward(const std::vector& tensors, // Since we make edge has as same rank as bwd outputs, we indexing them // with // the same rank(i, j) - VLOG(6) << "Get Edge with slot: " << i << ", rank: " << j; - egr::EagerTensor& grad_output_tensor = grad_output_tensors[i][j]; - if (!grad_output_tensor.defined() || - !grad_output_tensor.initialized()) { - VLOG(6) << "We get grad_output_tensor with slot: " << i - << ", rank: " << j << " as uninitialized or undefined tensor"; - } - GradNodeBase* next_node = edge.GetMutableGradNode().get(); + auto next_node_shared = edge.GetMutableGradNode(); // Next node could be nullptr if it is leaf tensor with no // AccumulationNode attached // Or it could also originated from dispensable inputs - if (!next_node) continue; + if (!next_node_shared || !next_node_shared.get() || + grad_output_tensors[i].empty()) { + continue; + } + PADDLE_ENFORCE_LT( + j, grad_output_tensors[i].size(), + paddle::platform::errors::Fatal( + "Rank of grad_output_tensors should be less than " + "grad_output_tensors[i].size(), which is: %d. This error may " + "indicate autoprune or autograd api error. ", + grad_output_tensors.size())); + egr::EagerTensor& grad_output_tensor = grad_output_tensors[i][j]; + + if ((!grad_output_tensor.defined() || + !grad_output_tensor.initialized())) { + if (!grad_output_tensor.Var().IsInitialized()) { + VLOG(6) + << "We get grad_output_tensor with slot: " << i + << ", rank: " << j + << " as uninitialized or undefined in both tensor and variable"; + } + } + VLOG(6) << "Get Edge and grad_output_tensor with slot: " << i + << ", rank: " << j + << " 's name is: " << grad_output_tensor.name(); + + auto* next_node = next_node_shared.get(); if (!node_input_buffers_dict.count(next_node)) { node_input_buffers_dict[next_node] = diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index c58c0b9e66e..8b8423c6173 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -164,6 +164,14 @@ class EagerTensor final { */ void reset() { tensor_->reset(); } + /** + * @brief Determine whether tensor is DenseTensor + * + * @return true + * @return false + */ + bool is_dense_tensor() const { return tensor_->is_dense_tensor(); } + /** * @brief Transfer the current Tensor to the specified device and return. * diff --git a/paddle/fluid/eager/tests/task_tests/backward_test.cc b/paddle/fluid/eager/tests/task_tests/backward_test.cc index 3737fd95ad6..8f0e6cc5e41 100644 --- a/paddle/fluid/eager/tests/task_tests/backward_test.cc +++ b/paddle/fluid/eager/tests/task_tests/backward_test.cc @@ -56,6 +56,7 @@ TEST(Backward, SingleNodeEmptyGrad) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(node0_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + auto_grad_meta->SetStopGradient(false); // Connect Tensor and AccumulationNode via AutoGradMeta auto acc_node_ptr = std::make_shared(); @@ -119,7 +120,7 @@ TEST(Backward, SingleNodeCustomGrad) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(node0_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); - + auto_grad_meta->SetStopGradient(false); // Connect Tensor and AccumulationNode via AutoGradMeta auto acc_node_ptr = std::make_shared(); @@ -189,7 +190,7 @@ TEST(Backward, LinearNodes) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(node0_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); - + auto_grad_meta->SetStopGradient(false); // Connect Node0 -> Node1 via Edge auto meta0 = egr::AutogradMeta(); meta0.SetStopGradient(false); @@ -281,13 +282,14 @@ TEST(Backward, WithAccumulation) { auto_grad_meta0->SetGradNode( std::dynamic_pointer_cast(node0_ptr)); auto_grad_meta0->SetSingleOutRankWithSlot(0, 0); - + auto_grad_meta0->SetStopGradient(false); // Connect Inp1 and Node1 via AutoGradMeta AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&(target_tensors[1])); auto_grad_meta1->SetGradNode( std::dynamic_pointer_cast(node1_ptr)); auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + auto_grad_meta1->SetStopGradient(false); // Connect Node0 -> Node2 via Edge auto meta0 = egr::AutogradMeta(); diff --git a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc index 7f180fa1076..523f7102af0 100644 --- a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc +++ b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc @@ -58,6 +58,7 @@ TEST(CrossBatchAccumulation, SingleScaleNode) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(scale_node_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + auto_grad_meta->SetStopGradient(false); egr_utils_api::RetainGradForTensor(target_tensor); // result: 1.0 auto meta = AutogradMeta(); diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc index 0f8039dade8..4f4a33b1a74 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -93,6 +93,7 @@ TEST(RetainGrad, HookBeforeRetainGrad) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(scale_node_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + auto_grad_meta->SetStopGradient(false); target_tensor.set_autograd_meta( std::dynamic_pointer_cast( auto_grad_meta)); @@ -171,6 +172,7 @@ TEST(RetainGrad, HookAfterRetainGrad) { auto_grad_meta->SetGradNode( std::dynamic_pointer_cast(scale_node_ptr)); auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + auto_grad_meta->SetStopGradient(false); target_tensor.set_autograd_meta( std::dynamic_pointer_cast( auto_grad_meta)); diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index e73dfa2ec8b..f50458e5562 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -99,7 +99,12 @@ std::pair EagerUtils::OutRankInfo( std::shared_ptr EagerUtils::grad_node( const egr::EagerTensor& target) { - return unsafe_autograd_meta(target)->GetMutableGradNode(); + auto* meta = nullable_autograd_meta(target); + if (meta) { + return meta->GetMutableGradNode(); + } else { + return nullptr; + } } void EagerUtils::SetHistory(std::vector* autograd_metas, diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index 46b56f27ff9..4419640ccf3 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -298,6 +298,21 @@ static PyObject* eager_tensor_method_detach(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +static PyObject* eager_tensor_method_get_underline_tensor( + EagerTensorObject* self, PyObject* args, PyObject* kwargs) { + EAGER_SYNC_TRY + if (self->eager_tensor.is_dense_tensor()) { + auto* tensor = static_cast( + self->eager_tensor.impl().get()); + VLOG(6) << "tensor: " << tensor->IsInitialized(); + return ToPyObject(tensor); + } else { + Py_IncRef(Py_None); + return Py_None; + } + EAGER_CATCH_AND_THROW_RETURN_NULL +} + PyMethodDef variable_methods[] = { {"numpy", (PyCFunction)(void (*)(void))eager_tensor_method_numpy, METH_VARARGS | METH_KEYWORDS, NULL}, @@ -315,14 +330,17 @@ PyMethodDef variable_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"_zero_grads", (PyCFunction)(void (*)(void))eager_tensor__zero_grads, METH_VARARGS | METH_KEYWORDS, NULL}, - {"_is_shared_buffer_to", + {"_share_buffer_to", (PyCFunction)(void (*)(void))eager_tensor__share_buffer_to, METH_VARARGS | METH_KEYWORDS, NULL}, - {"_share_buffer_with", + {"_is_shared_buffer_with", (PyCFunction)(void (*)(void))eager_tensor__is_shared_buffer_with, METH_VARARGS | METH_KEYWORDS, NULL}, {"detach", (PyCFunction)(void (*)(void))eager_tensor_method_detach, METH_VARARGS | METH_KEYWORDS, NULL}, + {"get_tensor", + (PyCFunction)(void (*)(void))eager_tensor_method_get_underline_tensor, + METH_VARARGS | METH_KEYWORDS, NULL}, {NULL, NULL, 0, NULL}}; } // namespace pybind diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index 038a1254d7e..5f1d809168a 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -42,6 +42,18 @@ PyObject* eager_tensor_properties_get_name(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +PyObject* eager_tensor_properties_get_type(EagerTensorObject* self, + void* closure) { + EAGER_SYNC_TRY + if (self->eager_tensor.is_dense_tensor()) { + return ToPyObject(paddle::framework::proto::VarType::LOD_TENSOR); + } else { + Py_INCREF(Py_None); + return Py_None; + } + EAGER_CATCH_AND_THROW_RETURN_NULL +} + int eager_tensor_properties_set_name(EagerTensorObject* self, PyObject* value, void* closure) { EAGER_SYNC_TRY @@ -74,8 +86,13 @@ PyObject* eager_tensor_properties_get_grad(EagerTensorObject* self, return ToPyObject(*accumulation_grad_node->Grad()); } else { VLOG(6) << "Get grad for tensor: " << self->eager_tensor.name(); - auto meta = egr::EagerUtils::unsafe_autograd_meta(self->eager_tensor); - return ToPyObject(meta->Grad()); + auto meta = egr::EagerUtils::nullable_autograd_meta(self->eager_tensor); + if (meta) { + return ToPyObject(meta->Grad()); + } else { + Py_INCREF(Py_None); + return Py_None; + } } EAGER_CATCH_AND_THROW_RETURN_NULL } @@ -185,6 +202,8 @@ struct PyGetSetDef variable_properties[] = { nullptr, nullptr}, {"dtype", (getter)eager_tensor_properties_get_dtype, nullptr, nullptr, nullptr}, + {"type", (getter)eager_tensor_properties_get_type, nullptr, nullptr, + nullptr}, {nullptr, nullptr, nullptr, nullptr, nullptr}}; } // namespace pybind diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index c1049d24079..5c74653a719 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -450,6 +450,18 @@ PyObject* ToPyObject(const paddle::framework::proto::VarType::Type& dtype) { return obj.ptr(); } +PyObject* ToPyObject(const paddle::framework::proto::VarType& type) { + auto obj = ::pybind11::cast(type); + obj.inc_ref(); + return obj.ptr(); +} + +PyObject* ToPyObject(const paddle::framework::LoDTensor* value) { + auto obj = ::pybind11::cast(value, py::return_value_policy::copy); + obj.inc_ref(); + return obj.ptr(); +} + PyObject* ToPyObject(const void* value) { if (value == nullptr) { Py_INCREF(Py_None); diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index 20c82c572c3..e1a7ed24150 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -11,6 +11,7 @@ limitations under the License. */ #pragma once #include +#include "paddle/pten/core/dense_tensor.h" #include "pybind11/pybind11.h" #include "pybind11/stl.h" @@ -54,7 +55,9 @@ PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const platform::Place& value); +PyObject* ToPyObject(const framework::LoDTensor* value); PyObject* ToPyObject(const paddle::framework::proto::VarType::Type& dtype); +PyObject* ToPyObject(const paddle::framework::proto::VarType& type); PyObject* ToPyObject(const void* value); PyObject* ToPyObject( const std::unordered_map>& value); diff --git a/paddle/pten/core/dense_tensor.cc b/paddle/pten/core/dense_tensor.cc index 716e1ac3d30..fe088a95681 100644 --- a/paddle/pten/core/dense_tensor.cc +++ b/paddle/pten/core/dense_tensor.cc @@ -285,6 +285,9 @@ const paddle::platform::Place& DenseTensor::place() const { storage_, paddle::platform::errors::PreconditionNotMet( "Tensor not initialized yet when Tensor::place() is called.")); + if (storage_->data_shared()) { + return storage_->data_shared()->place(); + } return storage_->place(); } diff --git a/python/paddle/fluid/dygraph/varbase_patch_methods.py b/python/paddle/fluid/dygraph/varbase_patch_methods.py index e06e7f52dd6..3cccaceb8e6 100644 --- a/python/paddle/fluid/dygraph/varbase_patch_methods.py +++ b/python/paddle/fluid/dygraph/varbase_patch_methods.py @@ -758,10 +758,10 @@ def monkey_patch_varbase(): @framework.dygraph_only def _grad_ivar(self): - if self.grad._is_initialized(): - return self.grad - else: - return None + if self.grad is not None: + if self.grad._is_initialized(): + return self.grad + return None @framework.dygraph_only def _set_grad_ivar(self, value): @@ -782,6 +782,10 @@ def monkey_patch_varbase(): def clone(self): return _C_ops_.assign(self) + @framework.dygraph_only + def value(self): + return self + if core._in_eager_mode() and not hasattr(core, "eager"): return @@ -805,6 +809,7 @@ def monkey_patch_varbase(): setattr(core.eager.EagerTensor, "_set_grad_ivar", _set_grad_ivar) setattr(core.eager.EagerTensor, "clear_gradient", clear_gradient) setattr(core.eager.EagerTensor, "clone", clone) + setattr(core.eager.EagerTensor, "value", value) else: setattr(core.VarBase, "__name__", "Tensor") setattr(core.VarBase, "grad", grad) diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index 3ab7981cdb1..9630462b496 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -109,7 +109,7 @@ class EagerDtypeTestCase(unittest.TestCase): core.VarDesc.VarType.COMPLEX128) -class EagerTensorPropertiesTestCase(unittest.TestCase): +class EagerTensorPropertiesAndMethodsTestCase(unittest.TestCase): def constructor(self, place): egr_tensor = core.eager.EagerTensor() self.assertEqual(egr_tensor.persistable, False) @@ -645,7 +645,8 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): self.assertTrue(tensor3.stop_gradient, True) self.assertTrue(tensor3.place.is_cpu_place()) - def test_share_buffer_to(): + def test_share_buffer_to(self): + with _test_eager_guard(): arr = np.ones([4, 16, 16, 32]).astype('float32') arr1 = np.zeros([4, 16]).astype('float32') arr2 = np.ones([4, 16, 16, 32]).astype('float32') + np.ones( @@ -661,7 +662,7 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): else: tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32, core.CPUPlace()) - self.assertTrue(np.array_equal(tensor.numpy(), arr1)) + self.assertTrue(np.array_equal(tensor.numpy(), arr)) self.assertTrue(np.array_equal(tensor2.numpy(), arr2)) tensor2._share_buffer_to(tensor) self.assertTrue(np.array_equal(tensor.numpy(), arr2)) @@ -694,6 +695,7 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): self.assertEqual(tensor.stop_gradient, False) tensor.stop_gradient = True self.assertEqual(tensor.stop_gradient, True) + self.assertEqual(tensor.type, core.VarDesc.VarType.LOD_TENSOR) def test_global_properties(self): print("Test_global_properties") @@ -714,6 +716,25 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): self.assertTrue(core.eager._get_expected_place().is_cpu_place()) core._disable_eager_mode() + def test_value(self): + with _test_eager_guard(): + arr = np.random.rand(4, 16, 16, 32).astype('float64') + + egr_tensor0 = core.eager.EagerTensor(value=arr) + self.assertEqual(egr_tensor0.persistable, False) + self.assertTrue("generated" in egr_tensor0.name) + self.assertEqual(egr_tensor0.shape, [4, 16, 16, 32]) + self.assertTrue( + egr_tensor0.place._equals( + paddle.fluid.framework._current_expected_place())) + self.assertEqual(egr_tensor0.dtype, core.VarDesc.VarType.FP64) + self.assertEqual(egr_tensor0.stop_gradient, True) + self.assertTrue(egr_tensor0.value().get_tensor()._dtype(), + core.VarDesc.VarType.FP64) + self.assertTrue(egr_tensor0.value().get_tensor()._place(), + paddle.fluid.framework._current_expected_place()) + self.assertTrue(egr_tensor0.value().get_tensor()._is_initialized()) + class EagerParamBaseUsageTestCase(unittest.TestCase): def test_print(self): @@ -803,6 +824,7 @@ class EagerParamBaseUsageTestCase(unittest.TestCase): self.assertTrue(egr_tensor12.place._equals(paddle.fluid.CPUPlace())) self.assertTrue(np.array_equal(egr_tensor12.numpy(), arr4)) self.assertTrue(np.array_equal(egr_tensor12.gradient(), None)) + egr_tensor12.stop_gradient = False egr_tensor12.backward() self.assertTrue(np.array_equal(egr_tensor12.gradient(), arr)) diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index d2e1a4fbb18..44d73612b1c 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -181,6 +181,7 @@ class TestImperativeAutoPrune(unittest.TestCase): self.func_auto_prune2() self.func_auto_prune2() + # TODO(jiabin): Support this when we support better split tensor def test_auto_prune3(self): with fluid.dygraph.guard(): case3 = AutoPruneLayer3(input_size=784) @@ -217,7 +218,7 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case4.linear.weight._grad_ivar() is not None) self.assertTrue((part2.gradient() == 0).all()) - def test_auto_prune6(self): + def func_auto_prune6(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") @@ -235,7 +236,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(linear.weight.gradient() is None) self.assertTrue(out1.gradient() is None) - def test_auto_prune7(self): + def test_auto_prune6(self): + with _test_eager_guard(): + self.func_auto_prune6() + self.func_auto_prune6() + + def func_auto_prune7(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") @@ -253,7 +259,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(linear.weight.gradient() is None) self.assertTrue(out1.gradient() is None) - def test_auto_prune8(self): + def test_auto_prune7(self): + with _test_eager_guard(): + self.func_auto_prune7() + self.func_auto_prune7() + + def func_auto_prune8(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") @@ -278,7 +289,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertFalse( np.array_equal(linear_origin, linear.weight.numpy())) - def test_auto_prune9(self): + def test_auto_prune8(self): + with _test_eager_guard(): + self.func_auto_prune8() + self.func_auto_prune8() + + def func_auto_prune9(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") @@ -307,7 +323,12 @@ class TestImperativeAutoPrune(unittest.TestCase): except ValueError as e: assert type(e) == ValueError - def test_auto_prune10(self): + def test_auto_prune9(self): + with _test_eager_guard(): + self.func_auto_prune9() + self.func_auto_prune9() + + def func_auto_prune10(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") value1 = np.arange(6).reshape(2, 3).astype("float32") @@ -321,12 +342,18 @@ class TestImperativeAutoPrune(unittest.TestCase): out2 = linear2(b) out1.stop_gradient = True out = fluid.layers.concat(input=[out1, out2, c], axis=1) + #TODO(jiabin): In Eager Mode we don't actually need sort_sum_gradient, this test should be removed when we don't support fluid anymore. fluid.set_flags({'FLAGS_sort_sum_gradient': True}) out.backward() self.assertTrue(linear.weight.gradient() is None) self.assertTrue(out1.gradient() is None) - def test_auto_prune_with_optimizer(self): + def test_auto_prune10(self): + with _test_eager_guard(): + self.func_auto_prune10() + self.func_auto_prune10() + + def func_auto_prune_with_optimizer(self): vocab_size = 100 size = 20 batch_size = 16 @@ -341,7 +368,6 @@ class TestImperativeAutoPrune(unittest.TestCase): grad_clip = fluid.clip.GradientClipByGlobalNorm(0.001) optimizer = fluid.optimizer.AdamOptimizer( 0.001, parameter_list=model.parameters(), grad_clip=grad_clip) - indices = fluid.dygraph.to_variable(indices) embed = fluid.dygraph.to_variable(embed) dummy_loss = model(embed) @@ -374,7 +400,12 @@ class TestImperativeAutoPrune(unittest.TestCase): assert model.embed1.weight._grad_ivar() is None assert model.linear_1.weight._grad_ivar() is None - def test_case2_prune_no_grad_branch(self): + def test_auto_prune_with_optimizer(self): + with _test_eager_guard(): + self.func_auto_prune_with_optimizer() + self.func_auto_prune_with_optimizer() + + def func_case2_prune_no_grad_branch(self): with fluid.dygraph.guard(): value1 = np.arange(784).reshape(1, 784) value2 = np.arange(1).reshape(1, 1) @@ -386,7 +417,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case3.linear2.weight._grad_ivar() is None) self.assertTrue(case3.linear.weight._grad_ivar() is not None) - def test_case3_prune_no_grad_branch2(self): + def test_case2_prune_no_grad_branch(self): + with _test_eager_guard(): + self.func_case2_prune_no_grad_branch() + self.func_case2_prune_no_grad_branch() + + def func_case3_prune_no_grad_branch2(self): with fluid.dygraph.guard(): value1 = np.arange(1).reshape(1, 1) linear = fluid.dygraph.Linear(1, 1, act=None) @@ -399,13 +435,23 @@ class TestImperativeAutoPrune(unittest.TestCase): loss.backward() self.assertTrue(linear.weight._grad_ivar() is None) - def test_case4_with_no_grad_op_maker(self): + def test_case3_prune_no_grad_branch2(self): + with _test_eager_guard(): + self.func_case3_prune_no_grad_branch2() + self.func_case3_prune_no_grad_branch2() + + def func_case4_with_no_grad_op_maker(self): with fluid.dygraph.guard(): out = fluid.layers.gaussian_random(shape=[20, 30]) loss = fluid.layers.mean(out) loss.backward() self.assertTrue(out._grad_ivar() is None) + def test_case4_with_no_grad_op_maker(self): + with _test_eager_guard(): + self.func_case4_with_no_grad_op_maker() + self.func_case4_with_no_grad_op_maker() + if __name__ == '__main__': unittest.main() -- GitLab