From 2f50ae99ad874dfd6e196756f2a36547053756cb Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Wed, 23 Mar 2022 14:09:15 +0800 Subject: [PATCH] Support initializing specific grad tensors to zero for selected operators (#39963) * Supported Complex2Real Conversion for Eager Dygraph * Supported Complex2Real Conversion for Eager Dygraph * Enabled complex type promotion test for matmul_v2 * Fix CI issues * Support initializing specific grad tensors to zero for selected operators * Merged adj_edges_ with GradSlotMeta * Fixed monir issue * Adjusted num runs * Recovered Eager performance tests configurations * Recovered Eager performance tests configurations * Adjusted performance tests configurations * Fixed Minor Issues with performance tests * Moved out Edge from GradSlotMeta * Fixed issues from merge * Fixed typo * Addressed review comments * Fixed merge issues * Fixed minor issues * Fixed minor issue * Fixed major issues and enabled auto_prune test cases * Fixed issues from merge --- .../eager/accumulation/accumulation_node.cc | 5 ++-- .../eager/accumulation/accumulation_node.h | 2 +- .../eager_generated/backwards/scale_node.cc | 5 ++-- .../eager_generated/backwards/scale_node.h | 2 +- .../auto_code_generator/eager_generator.cc | 29 ++++++++++++++----- .../final_state_generator/eager_gen.py | 25 +++++++++++----- .../custom_operator/custom_operator_node.cc | 4 +-- .../custom_operator/custom_operator_node.h | 5 ++-- paddle/fluid/eager/grad_node_info.cc | 20 +++++++++++++ paddle/fluid/eager/grad_node_info.h | 6 +++- paddle/fluid/eager/grad_tensor_holder.h | 2 +- .../accumulation_node_test.cc | 8 +++-- .../data_structure_tests/grad_node_test.h | 2 +- .../tests/task_tests/eager_utils_test.cc | 16 ++++++++++ .../eager/to_static/run_program_op_node.h | 2 +- paddle/fluid/eager/utils.cc | 25 ++++++++++++++++ paddle/fluid/eager/utils.h | 7 +++++ .../unittests/test_imperative_auto_prune.py | 21 ++++++++++++-- 18 files changed, 151 insertions(+), 35 deletions(-) diff --git a/paddle/fluid/eager/accumulation/accumulation_node.cc b/paddle/fluid/eager/accumulation/accumulation_node.cc index 9c4089af092..10696dbacd3 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.cc +++ b/paddle/fluid/eager/accumulation/accumulation_node.cc @@ -39,8 +39,9 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, } std::vector> GradNodeAccumulation:: -operator()(const std::vector>& grads, - bool create_graph) { +operator()( + std::vector>& grads, // NOLINT + bool create_graph) { VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation"; PADDLE_ENFORCE(grads.size() == 1, paddle::platform::errors::Fatal( diff --git a/paddle/fluid/eager/accumulation/accumulation_node.h b/paddle/fluid/eager/accumulation/accumulation_node.h index a91a0b6e34c..2e38d7e9e91 100644 --- a/paddle/fluid/eager/accumulation/accumulation_node.h +++ b/paddle/fluid/eager/accumulation/accumulation_node.h @@ -35,7 +35,7 @@ class GradNodeAccumulation : public GradNodeBase { // Functor: perform backward computations virtual std::vector> operator()( - const std::vector>& grads, + std::vector>& grads, // NOLINT bool create_graph = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc index 0bc998a03a8..d9f5447a88e 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc @@ -145,8 +145,9 @@ void GradNodeScale::SetTensorWrappers_X( void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; } std::vector> GradNodeScale:: -operator()(const std::vector>& grads, - bool create_graph) { +operator()( + std::vector>& grads, // NOLINT + bool create_graph) { // 1. Check Output Size PADDLE_ENFORCE( ((grads.size() == 1) && (grads[0].size() == 1)), diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h index e263f73a6b8..0b942d2a067 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h @@ -39,7 +39,7 @@ class GradNodeScale : public GradNodeBase { // Functor: perform backward computations virtual std::vector> operator()( - const std::vector>& grads, + std::vector>& grads, // NOLINT bool create_graph = false) override; void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index df2cdc35626..22981759642 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -47,6 +47,9 @@ std::unordered_map> static std::unordered_map operators_with_attrs = {}; +static std::unordered_set ops_to_fill_zero_for_empty_grads = { + "split"}; + /* --- Black Ops list that's NO NEED to apply code generation --- */ static std::unordered_set black_ops_list = {"run_program"}; @@ -2243,11 +2246,21 @@ static std::string GenerateGradNodeCCContents( // [Generation] Get Full Grad Function const char* GRAD_FUNCTION_TEMPLATE = "std::vector> " - "GradNode%s::operator()(const " - "std::vector>& grads, " - "bool create_graph) {\n%s\n}"; - std::string grad_function_str = paddle::string::Sprintf( - GRAD_FUNCTION_TEMPLATE, fwd_op_type, generated_grad_function_body); + "GradNode%s::operator()(" + "std::vector>& grads, bool " + "create_graph) {\n" + "%s" + "%s" + "\n}"; + std::string fill_zero_str = ""; + if (ops_to_fill_zero_for_empty_grads.count(fwd_op_type)) { + fill_zero_str = + "egr::EagerUtils::FillZeroForEmptyGradInputs(&grads, " + "this->InputMeta());\n"; + } + std::string grad_function_str = + paddle::string::Sprintf(GRAD_FUNCTION_TEMPLATE, fwd_op_type, + fill_zero_str, generated_grad_function_body); VLOG(6) << "Generated returns"; @@ -2279,9 +2292,9 @@ static std::string GenerateGradNodeHeaderContents( " ~GradNode%s() override { VLOG(6) << \" Destruct GradNode%s \"; }\n" "\n" " virtual std::vector> " - "operator()(const " - "std::vector>& grads, const " - "bool create_graph = false) " + "operator()(" + "std::vector>& grads, bool " + "create_graph = false) " "override;\n" "\n" " void ClearTensorWrappers() override { \n" diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 92cee056d52..1de050d1230 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -17,6 +17,8 @@ import re import argparse import os +ops_to_fill_zero_for_empty_grads = set(list("split")) + # For API dispatch used at python-level # { op_name : [arg_name, ...] } core_ops_returns_info = {} @@ -598,7 +600,8 @@ class {} : public egr::GradNodeBase {{ ~{}() override = default; virtual std::vector> operator()( - const std::vector>& grads, bool create_graph = false) override; + std::vector>& grads, bool create_graph = false) override; + std::string name() override {{ return \" {} \"; }} void ClearTensorWrappers() override {{ @@ -656,10 +659,11 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map, for _, (ttype, fwd_position, grad_api_position) in backward_grad_input_map.items(): if IsPlainTensorType(ttype): - grad_api_args[grad_api_position] = f"grads[{fwd_position}][0]" + grad_api_args[ + grad_api_position] = f"hooked_grads[{fwd_position}][0]" else: assert IsVectorTensorType(ttype) - grad_api_args[grad_api_position] = f"grads[{fwd_position}]" + grad_api_args[grad_api_position] = f"hooked_grads[{fwd_position}]" for name, _, _, grad_api_position in backward_attrs_list: saved_attribute_name = GetSavedName(name) @@ -687,23 +691,30 @@ def GenerateNodeDefinition(fwd_api_name, bwd_api_name, backward_fwd_input_map, grad_node_name = GetGradNodeName(fwd_api_name) + fill_zero_str = "" + if fwd_api_name in ops_to_fill_zero_for_empty_grads: + fill_zero_str = "egr::EagerUtils::FillZeroForEmptyGradInputs(&grads, this->InputMeta());\n" + if len(namespace) > 0: grad_api_namespace = f"paddle::experimental::{namespace}" else: grad_api_namespace = f"paddle::experimental" FUNCTION_TEMPLATE = """ -std::vector> {}::operator()(const std::vector>& grads, bool create_graph) {{ +std::vector> {}::operator()(std::vector>& grads, bool create_graph) {{ + {} + auto hooked_grads = ApplyGradientHooks(grads); + // Call grad_api function - VLOG(3) << \"Finally State Running: \" << \"{}\"; + VLOG(3) << \"Final State Running: \" << \"{}\"; auto grad_api_returns = {}::{}({}); {} }} """ node_definition_str = FUNCTION_TEMPLATE.format( - grad_node_name, grad_node_name, grad_api_namespace, bwd_api_name, - grad_api_args_str, returns_str) + grad_node_name, fill_zero_str, grad_node_name, grad_api_namespace, + bwd_api_name, grad_api_args_str, returns_str) return node_definition_str diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.cc b/paddle/fluid/eager/custom_operator/custom_operator_node.cc index 72af1cc4b06..08ca3bed5a6 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_node.cc +++ b/paddle/fluid/eager/custom_operator/custom_operator_node.cc @@ -20,8 +20,8 @@ namespace egr { std::vector> RunCustomOpNode:: -operator()(const std::vector>& grads, - bool create_graph) { +operator()(std::vector>& grads, + bool create_graph) { // NOLINT paddle::CustomOpKernelContext ctx; auto grad_inputs_name = paddle::framework::OpMetaInfoHelper::GetInputs( egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]); diff --git a/paddle/fluid/eager/custom_operator/custom_operator_node.h b/paddle/fluid/eager/custom_operator/custom_operator_node.h index 6ece2658575..33b56fc8c86 100644 --- a/paddle/fluid/eager/custom_operator/custom_operator_node.h +++ b/paddle/fluid/eager/custom_operator/custom_operator_node.h @@ -37,8 +37,9 @@ class RunCustomOpNode : public GradNodeBase { // Functor: perform backward computations virtual std::vector> operator()( - const std::vector>& grads, - bool create_graph) override; + std::vector>& grads, + bool create_graph = false) // NOLINT + override; std::string name() { return paddle::string::Sprintf("RunCustomOpNode: %s_grad", op_type_); diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 1d44d842b08..25610a3f95f 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -102,6 +102,7 @@ const std::vector>& GradNodeBase::OutputMeta() const { void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out, size_t slot_rank) { + VLOG(6) << "Set GradSlotMeta for Grad Inputs"; auto* fwd_out_meta = egr::EagerUtils::nullable_autograd_meta(fwd_out); PADDLE_ENFORCE_LE( slot_rank, (bwd_in_meta_.size() - 1), @@ -117,6 +118,12 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out, auto& meta = metas[0]; meta.SetStopGradient(fwd_out_meta->StopGradient()); + if (!fwd_out.is_initialized()) { + VLOG(6) + << "Skip Configuring GradSlotMeta for uninitialized GradInput Tensor"; + return; + } + // Record TensorMeta if (phi::DenseTensor::classof(fwd_out.impl().get())) { // Only Copy Meta @@ -128,7 +135,9 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out, paddle::platform::errors::Fatal( "Attempting to copy DenseTensorMeta with phi::DataType::UNDEFINED," "which is illegal.")); + meta.SetTensorMeta(dense_tensor->meta()); + meta.SetPlace(fwd_out.inner_place()); if (paddle::framework::IsComplexType( paddle::framework::TransToProtoVarType(dense_tensor->type()))) { @@ -143,6 +152,7 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out, void GradNodeBase::SetGradInMeta( const std::vector& fwd_out, size_t slot_rank) { + VLOG(6) << "Set GradSlotMeta for Grad Inputs"; size_t slot_size = fwd_out.size(); PADDLE_ENFORCE_LE( slot_rank, (bwd_in_meta_.size() - 1), @@ -172,6 +182,12 @@ void GradNodeBase::SetGradInMeta( meta.SetStopGradient(fwd_out_meta->StopGradient()); } + if (!fwd_out_tensor.is_initialized()) { + VLOG(6) + << "Skip Configuring GradSlotMeta for uninitialized GradInput Tensor"; + return; + } + // Record TensorMeta if (phi::DenseTensor::classof(fwd_out_tensor.impl().get())) { // Only Copy Meta @@ -184,6 +200,8 @@ void GradNodeBase::SetGradInMeta( "with phi::DataType::UNDEFINED," "which is illegal.")); meta.SetTensorMeta(dense_tensor->meta()); + meta.SetPlace(fwd_out_tensor.inner_place()); + if (paddle::framework::IsComplexType( paddle::framework::TransToProtoVarType(dense_tensor->type()))) { need_complex_to_real_ = true; @@ -228,6 +246,7 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in, "with phi::DataType::UNDEFINED," "which is illegal.")); meta.SetTensorMeta(dense_tensor->meta()); + meta.SetPlace(fwd_in.inner_place()); } } else { VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta with " @@ -272,6 +291,7 @@ void GradNodeBase::SetGradOutMeta( "phi::DataType::UNDEFINED," "which is illegal.")); meta.SetTensorMeta(dense_tensor->meta()); + meta.SetPlace(fwd_in_tensor.inner_place()); } } else { VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta " diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 28c12717a24..4dec1c1f9f4 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -76,8 +76,12 @@ class GradSlotMeta { return *meta_.get(); } + void SetPlace(const phi::Place& place) { place_ = place; } + const phi::Place& GetPlace() const { return place_; } + private: bool stop_gradient_{false}; + phi::Place place_; std::shared_ptr meta_ = nullptr; }; @@ -102,7 +106,7 @@ class GradNodeBase { * is better choice to fit this format. * **/ virtual std::vector> operator()( - const std::vector>& grads, + std::vector>& grads, // NOLINT bool create_graph = false) = 0; virtual void ClearTensorWrappers() = 0; diff --git a/paddle/fluid/eager/grad_tensor_holder.h b/paddle/fluid/eager/grad_tensor_holder.h index 8c00f9161b6..db03789ea76 100644 --- a/paddle/fluid/eager/grad_tensor_holder.h +++ b/paddle/fluid/eager/grad_tensor_holder.h @@ -53,7 +53,7 @@ class GradTensorHolder { return buffer_[pos]; } - const std::vector>& Buffers() { + std::vector>& Buffers() { return buffer_; } diff --git a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc index 28682ab0fe0..6c6c7fd25e5 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/accumulation_node_test.cc @@ -80,13 +80,15 @@ TEST(AccumulationNode, Tensor) { grad_meta->SetStopGradient(false); // operator() - paddle::experimental::Tensor ret_et0 = node->operator()({{et0}})[0][0]; + std::vector> et0_vec = {{et0}}; + paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0]; auto* ret_et0_ptr = std::dynamic_pointer_cast(ret_et0.impl()) ->data(); CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f)); - paddle::experimental::Tensor ret_et1 = node->operator()({{et1}})[0][0]; + std::vector> et1_vec = {{et1}}; + paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0]; auto* ret_et1_ptr = std::dynamic_pointer_cast(ret_et1.impl()) @@ -121,7 +123,7 @@ TEST(AccumulationNode, Tensor) { std::make_shared(reduce_hook_1)); // operator() - paddle::experimental::Tensor _ret = node->operator()({{et0}})[0][0]; + paddle::experimental::Tensor _ret = node->operator()(et0_vec)[0][0]; // Check operator() result, should be 36.0 auto* _ret_ptr = std::dynamic_pointer_cast(_ret.impl()) diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h index 0b167203735..dff12fdfc34 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_test.h @@ -32,7 +32,7 @@ class GradTestNode : public egr::GradNodeBase { GradTestNode() : GradNodeBase() { val_ = 1.0; } std::string name() override { return "GradTestNode"; } std::vector> operator()( - const std::vector>& grads, + std::vector>& grads, bool create_graph = false) override { val_ = std::dynamic_pointer_cast(grads[0][0].impl()) ->data()[0]; diff --git a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc index 217055e4e9e..7486e711641 100644 --- a/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc +++ b/paddle/fluid/eager/tests/task_tests/eager_utils_test.cc @@ -247,4 +247,20 @@ TEST(EagerUtils, GetGradAccumulationNode) { ASSERT_ANY_THROW(egr::EagerUtils::GetGradAccumulationNode(t0)); } +TEST(EagerUtils, FillZeroForEmptyGradInputs) { + std::vector> grads = { + std::vector(1)}; + std::vector> slot_metas = { + std::vector(1)}; + + phi::DenseTensorMeta tensor_meta; + tensor_meta.dtype = paddle::experimental::DataType::FLOAT32; + tensor_meta.dims = {2, 4}; + slot_metas[0][0].SetTensorMeta(tensor_meta); + slot_metas[0][0].SetPlace(phi::CPUPlace()); + + EagerUtils::FillZeroForEmptyGradInputs(&grads, slot_metas); + eager_test::CompareTensorWithValue(grads[0][0], 0.0); +} + } // namespace egr diff --git a/paddle/fluid/eager/to_static/run_program_op_node.h b/paddle/fluid/eager/to_static/run_program_op_node.h index 4eaa64d3ac6..c83e16e9a1e 100644 --- a/paddle/fluid/eager/to_static/run_program_op_node.h +++ b/paddle/fluid/eager/to_static/run_program_op_node.h @@ -370,7 +370,7 @@ class GradNodeRunProgram : public egr::GradNodeBase { ~GradNodeRunProgram() override = default; // Functor: perform backward computations virtual std::vector> operator()( - const std::vector> &grads, + std::vector> &grads, // NOLINT bool create_graph) override { VLOG(3) << "Running Eager Backward Node: GradNodeRunProgram"; PADDLE_ENFORCE_EQ( diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 048087903a4..20faae95281 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -20,6 +20,7 @@ #include "paddle/phi/api/all.h" #include "paddle/phi/common/layout.h" +#include "paddle/phi/core/compat/convert_utils.h" #include "paddle/phi/core/tensor_meta.h" #include "paddle/fluid/framework/data_layout.h" @@ -392,4 +393,28 @@ std::shared_ptr EagerUtils::GetGradAccumulationNode( } } +void EagerUtils::FillZeroForEmptyGradInputs( + std::vector>* in_grads, + const std::vector>& grad_in_metas) { + for (size_t i = 0; i < in_grads->size(); i++) { + for (size_t j = 0; j < (*in_grads)[0].size(); j++) { + paddle::experimental::Tensor& grad = (*in_grads)[i][j]; + if (!grad.is_initialized()) { + const GradSlotMeta& grad_in_meta = grad_in_metas[i][j]; + PADDLE_ENFORCE( + grad_in_meta.HasTensorMeta(), + paddle::platform::errors::Fatal( + "Unable to fill empty grad inputs due to empty GradSlotMeta")); + + const auto& tensor_meta = grad_in_meta.GetTensorMeta(); + phi::Place place = grad_in_meta.GetPlace(); + + auto tensor_with_zero = paddle::experimental::full( + phi::vectorize(tensor_meta.dims), 0.0, tensor_meta.dtype, place); + grad.set_impl(tensor_with_zero.impl()); + } + } + } +} + } // namespace egr diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h index fbd080ef70e..396837f101c 100644 --- a/paddle/fluid/eager/utils.h +++ b/paddle/fluid/eager/utils.h @@ -217,6 +217,13 @@ class EagerUtils { const std::vector& tensors); static std::shared_ptr GetGradAccumulationNode( const paddle::experimental::Tensor& tensor); + + /** + * Fill Zero + * **/ + static void FillZeroForEmptyGradInputs( + std::vector>* out_grads, + const std::vector>& grad_out_metas); }; } // namespace egr diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index 44d73612b1c..39b79dd4ba2 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -182,7 +182,7 @@ class TestImperativeAutoPrune(unittest.TestCase): self.func_auto_prune2() # TODO(jiabin): Support this when we support better split tensor - def test_auto_prune3(self): + def func_auto_prune3(self): with fluid.dygraph.guard(): case3 = AutoPruneLayer3(input_size=784) value1 = np.arange(784).reshape(1, 784).astype("float32") @@ -194,7 +194,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case3.linear.weight._grad_ivar() is not None) self.assertTrue((part2.gradient() == 0).all()) - def test_auto_prune4(self): + def test_auto_prune3(self): + with _test_eager_guard(): + self.func_auto_prune3() + self.func_auto_prune3() + + def func_auto_prune4(self): with fluid.dygraph.guard(): case4 = AutoPruneLayer3(input_size=784) value1 = np.arange(784).reshape(1, 784).astype("float32") @@ -206,7 +211,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case4.linear.weight._grad_ivar() is not None) self.assertTrue((part2.gradient() == 1).all()) - def test_auto_prune5(self): + def test_auto_prune4(self): + with _test_eager_guard(): + self.func_auto_prune4() + self.func_auto_prune4() + + def func_auto_prune5(self): with fluid.dygraph.guard(): case4 = AutoPruneLayer3(input_size=784) value1 = np.arange(784).reshape(1, 784).astype("float32") @@ -218,6 +228,11 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case4.linear.weight._grad_ivar() is not None) self.assertTrue((part2.gradient() == 0).all()) + def test_auto_prune5(self): + with _test_eager_guard(): + self.func_auto_prune5() + self.func_auto_prune5() + def func_auto_prune6(self): with fluid.dygraph.guard(): value0 = np.arange(26).reshape(2, 13).astype("float32") -- GitLab