From 4640955c3ac0e3629f5bbdcf649823f5a146e99f Mon Sep 17 00:00:00 2001 From: Jiabin Yang Date: Wed, 12 Jan 2022 16:02:33 +0800 Subject: [PATCH] support test_auto_prune_partial (#38871) --- paddle/fluid/eager/api/utils/tensor_utils.cc | 1 - paddle/fluid/eager/backward.cc | 4 +- paddle/fluid/eager/eager_tensor.h | 8 ++-- paddle/fluid/eager/grad_node_info.cc | 45 +++---------------- paddle/fluid/eager/grad_node_info.h | 1 - .../grad_node_info_test.cc | 4 +- .../eager/tests/task_tests/backward_test.cc | 28 +++++++++--- .../cross_batch_accumulation_test.cc | 4 +- .../fluid/eager/tests/task_tests/hook_test.cc | 8 +++- paddle/fluid/pybind/eager_method.cc | 44 ++++++++++++++++++ .../tests/unittests/test_egr_python_api.py | 27 +++++++++++ .../unittests/test_imperative_auto_prune.py | 15 ++++++- 12 files changed, 130 insertions(+), 59 deletions(-) diff --git a/paddle/fluid/eager/api/utils/tensor_utils.cc b/paddle/fluid/eager/api/utils/tensor_utils.cc index ad6c34b7cf8..115c9144df2 100644 --- a/paddle/fluid/eager/api/utils/tensor_utils.cc +++ b/paddle/fluid/eager/api/utils/tensor_utils.cc @@ -49,7 +49,6 @@ egr::EagerTensor CreateTensorWithValue(const pten::DDim& ddim, egr::EagerTensor out = egr::EagerTensor(); out.set_tensor(std::make_shared(tensor)); auto meta = EagerUtils::autograd_meta(&out); - if (is_leaf) { auto accumulation_node = std::make_shared(); meta->SetGradNode(accumulation_node); diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index 9a760c03728..01cb1b81e34 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -181,7 +181,9 @@ void RunBackward(const std::vector& tensors, PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(), paddle::platform::errors::Fatal( "Number of edges should be either empty ( for leaf node " - ") or the same as number of output grad tensors")); + ") or the same as number of output grad tensors, but we " + "got edges size is: %d, grad_output size is: %d", + edges.size(), grad_output_tensors.size())); for (size_t i = 0; i < edges.size(); i++) { for (size_t j = 0; j < edges[i].size(); j++) { diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index 72fe5732e96..80faad9080f 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -195,7 +195,6 @@ class EagerTensor final { } tensor_->copy_(*(src.tensor_.get()), blocking); } - /* Part 6: Operator overloading */ EagerTensor& operator=(const EagerTensor& x) & { tensor_ = x.tensor_; @@ -238,7 +237,7 @@ class EagerTensor final { // Contruct framework::Tensor from egr::EagerTensor auto tensor_dense = std::dynamic_pointer_cast(tensor_->impl()); - if (tensor_dense) { + if (tensor_dense && tensor_dense.get()) { paddle::experimental::SharesStorage(tensor_dense.get(), framework_tensor); } else { @@ -292,11 +291,10 @@ class EagerTensor final { template void SetImplWithLegacyTensor() { const auto& framework_tensor = var_.Get(); - if (this->initialized()) { + if (defined()) { VLOG(8) << "Sync Var to initialized tensor for: " << name(); paddle::experimental::ReMakePtenDenseTensor( - framework_tensor, - static_cast(this->impl().get())); + framework_tensor, static_cast(impl().get())); } else { VLOG(8) << "Sync Var to uninitialized tensor for: " << name(); this->set_impl(std::move( diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 0e6f6aa63dd..49bd416d46a 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -47,45 +47,15 @@ void GradNodeBase::AddEdges(std::vector* metas, size_t slot_id) { // adj_edges has as same rank as fwd inputs, and record it's output rank // from // its pre-ops - if (meta) { + if (meta && !meta->StopGradient()) { auto node = meta->GetMutableGradNode(); if (node) { adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), meta->OutRankInfo()); } else { - if (!meta->StopGradient()) { - meta->SetGradNode(std::make_shared()); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } - } - } - } -} - -void GradNodeBase::AddEdges(const std::vector& metas, - size_t slot_id) { - PADDLE_ENFORCE_LT( - slot_id, adj_edges_.size(), - paddle::platform::errors::InvalidArgument( - "Given slot id is out of range of adj_edges outter size, " - "adj_edges is designed to has the same size of grad " - "inputs's slot num.")); - for (const auto& meta : metas) { - // adj_edges has as same rank as fwd inputs, and record it's output rank - // from - // its pre-ops - if (meta) { - auto node = meta->GetMutableGradNode(); - if (node) { + meta->SetGradNode(std::make_shared()); adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), meta->OutRankInfo()); - } else { - if (!meta->StopGradient()) { - meta->SetGradNode(std::make_shared()); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } } } } @@ -98,17 +68,16 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { "Given slot id is out of range of adj_edges outter size, " "adj_edges is designed to has the same size of grad " "inputs's slot num.")); - if (meta) { + if (meta && !meta->StopGradient()) { + VLOG(6) << "Add Edges for slot: " << slot_id; auto node = meta->GetMutableGradNode(); if (node) { adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), meta->OutRankInfo()); } else { - if (!meta->StopGradient()) { - meta->SetGradNode(std::make_shared()); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } + meta->SetGradNode(std::make_shared()); + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); } } } diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 545b577f4bd..f15c50ef751 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -106,7 +106,6 @@ class GradNodeBase { * This one is called slot by slot * **/ void AddEdges(std::vector* metas, size_t slot_id); - void AddEdges(const std::vector& metas, size_t slot_id); void AddEdges(AutogradMeta* meta, size_t slot_id); /** diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index aebb0553e28..a89fb019d5b 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -56,15 +56,17 @@ TEST(GradNodeInfo, GradNodeBase) { VLOG(6) << "Test Add Edges"; egr::Edge edge0(grad_test_node1, 1, 2); auto auto_grad0 = std::make_shared(edge0); + auto_grad0->SetStopGradient(false); egr::Edge edge1(grad_test_node1, 3, 4); auto auto_grad1 = std::make_shared(edge1); + auto_grad1->SetStopGradient(false); grad_test_node0->AddEdges(auto_grad0.get(), 0); CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first, size_t(1)); CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second, size_t(2)); std::vector metas = {auto_grad1.get()}; - grad_test_node0->AddEdges(metas, 1); + grad_test_node0->AddEdges(&metas, 1); CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first, size_t(3)); CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().second, diff --git a/paddle/fluid/eager/tests/task_tests/backward_test.cc b/paddle/fluid/eager/tests/task_tests/backward_test.cc index 0ec86b7cc36..3737fd95ad6 100644 --- a/paddle/fluid/eager/tests/task_tests/backward_test.cc +++ b/paddle/fluid/eager/tests/task_tests/backward_test.cc @@ -69,9 +69,11 @@ TEST(Backward, SingleNodeEmptyGrad) { // Connect Node0 -> AccumulationNode via Edge auto meta = egr::AutogradMeta(); + meta.SetStopGradient(false); meta.SetSingleOutRankWithSlot(0, 0); meta.SetGradNode(acc_node_ptr); - node0_ptr->AddEdges({&meta}, 0); + std::vector res = {&meta}; + node0_ptr->AddEdges(&res, 0); } std::vector outs = {target_tensor}; // Run Backward @@ -130,9 +132,11 @@ TEST(Backward, SingleNodeCustomGrad) { // Connect Node0 -> AccumulationNode via Edge auto meta = egr::AutogradMeta(); + meta.SetStopGradient(false); meta.SetSingleOutRankWithSlot(0, 0); meta.SetGradNode(acc_node_ptr); - node0_ptr->AddEdges({&meta}, 0); + std::vector res = {&meta}; + node0_ptr->AddEdges(&res, 0); } // Run Backward @@ -188,9 +192,11 @@ TEST(Backward, LinearNodes) { // Connect Node0 -> Node1 via Edge auto meta0 = egr::AutogradMeta(); + meta0.SetStopGradient(false); meta0.SetSingleOutRankWithSlot(0, 0); meta0.SetGradNode(node1_ptr); - node0_ptr->AddEdges({&meta0}, 0); + std::vector res0 = {&meta0}; + node0_ptr->AddEdges(&res0, 0); // Connect Tensor and AccumulationNode via AutoGradMeta auto acc_node_ptr = std::make_shared(); @@ -204,9 +210,11 @@ TEST(Backward, LinearNodes) { // Connect Node1 -> AccumulationNode via Edge auto meta1 = egr::AutogradMeta(); + meta1.SetStopGradient(false); meta1.SetSingleOutRankWithSlot(0, 0); meta1.SetGradNode(acc_node_ptr); - node1_ptr->AddEdges({&meta1}, 0); + std::vector res1 = {&meta1}; + node1_ptr->AddEdges(&res1, 0); } // Use Empty Grad Tensor @@ -283,15 +291,19 @@ TEST(Backward, WithAccumulation) { // Connect Node0 -> Node2 via Edge auto meta0 = egr::AutogradMeta(); + meta0.SetStopGradient(false); meta0.SetSingleOutRankWithSlot(0, 0); meta0.SetGradNode(node2_ptr); - node0_ptr->AddEdges({&meta0}, 0); + std::vector res0 = {&meta0}; + node0_ptr->AddEdges(&res0, 0); // Connect Node1 -> Node2 via Edge auto meta1 = egr::AutogradMeta(); + meta1.SetStopGradient(false); meta1.SetSingleOutRankWithSlot(0, 0); meta1.SetGradNode(node2_ptr); - node1_ptr->AddEdges({&meta1}, 0); + std::vector res1 = {&meta1}; + node1_ptr->AddEdges(&res1, 0); // Connect Tensor and AccumulationNode via AutoGradMeta auto acc_node_ptr = std::make_shared(); @@ -305,9 +317,11 @@ TEST(Backward, WithAccumulation) { // Connect Node2 -> AccumulationNode via Edge auto meta2 = egr::AutogradMeta(); + meta2.SetStopGradient(false); meta2.SetSingleOutRankWithSlot(0, 0); meta2.SetGradNode(acc_node_ptr); - node2_ptr->AddEdges({&meta2}, 0); + std::vector res2 = {&meta2}; + node2_ptr->AddEdges(&res2, 0); } RunBackward(target_tensors, grad_tensors); diff --git a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc index 52e10b2b1b8..7f180fa1076 100644 --- a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc +++ b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc @@ -62,8 +62,10 @@ TEST(CrossBatchAccumulation, SingleScaleNode) { auto meta = AutogradMeta(); meta.SetSingleOutRankWithSlot(0, 0); + meta.SetStopGradient(false); meta.SetGradNode(acc_node_ptr); - scale_node_ptr->AddEdges({&meta}, 0); + std::vector res = {&meta}; + scale_node_ptr->AddEdges(&res, 0); AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); auto_grad_meta1->SetGradNode( diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc index 4ec49bfa566..0f8039dade8 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -105,9 +105,11 @@ TEST(RetainGrad, HookBeforeRetainGrad) { // Connect ScaleNode -> AccumulationNode via Edge { auto meta = AutogradMeta(); + meta.SetStopGradient(false); meta.SetSingleOutRankWithSlot(0, 0); meta.SetGradNode(acc_node_ptr); - scale_node_ptr->AddEdges({&meta}, 0); + std::vector res = {&meta}; + scale_node_ptr->AddEdges(&res, 0); } // Retain Grad for leaf tensor1 @@ -180,9 +182,11 @@ TEST(RetainGrad, HookAfterRetainGrad) { // Connect ScaleNode -> AccumulationNode via Edge { auto meta = AutogradMeta(); + meta.SetStopGradient(false); meta.SetSingleOutRankWithSlot(0, 0); meta.SetGradNode(acc_node_ptr); - scale_node_ptr->AddEdges({&meta}, 0); + std::vector res = {&meta}; + scale_node_ptr->AddEdges(&res, 0); } // Retain Grad for leaf tensor1 diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index c56fe5be4da..a0067f9c64f 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -234,6 +234,44 @@ static PyObject* eager_tensor__zero_grads(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +static PyObject* eager_tensor__share_buffer_to(EagerTensorObject* self, + PyObject* args, + PyObject* kwargs) { + EAGER_SYNC_TRY + egr::EagerTensor* src_ptr = + &(reinterpret_cast(PyTuple_GET_ITEM(args, 0)) + ->eager_tensor); + PADDLE_ENFORCE_EQ(self->eager_tensor.initialized(), true, + platform::errors::InvalidArgument( + "Tensor %s has not been initialized! please initialize " + "src tensor before share_buffer_with to other.", + self->eager_tensor.name())); + src_ptr->set_impl(self->eager_tensor.impl()); + Py_INCREF(Py_None); + return Py_None; + EAGER_CATCH_AND_THROW_RETURN_NULL +} + +static PyObject* eager_tensor__is_shared_buffer_with(EagerTensorObject* self, + PyObject* args, + PyObject* kwargs) { + EAGER_SYNC_TRY + egr::EagerTensor src_tensor = + CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0); + PADDLE_ENFORCE_EQ(src_tensor.initialized(), true, + platform::errors::InvalidArgument( + "Tensor %s has not been initialized! please initialize " + "src tensor before share_buffer_with to other.", + src_tensor.name())); + bool res = false; + if (!self->eager_tensor.defined() || !src_tensor.defined()) { + return ToPyObject(res); + } + res = (self->eager_tensor.impl().get() == src_tensor.impl().get()); + return ToPyObject(res); + EAGER_CATCH_AND_THROW_RETURN_NULL +} + static PyObject* eager_tensor_method_detach(EagerTensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_SYNC_TRY @@ -278,6 +316,12 @@ PyMethodDef variable_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"_zero_grads", (PyCFunction)(void (*)(void))eager_tensor__zero_grads, METH_VARARGS | METH_KEYWORDS, NULL}, + {"_is_shared_buffer_to", + (PyCFunction)(void (*)(void))eager_tensor__share_buffer_to, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"_share_buffer_with", + (PyCFunction)(void (*)(void))eager_tensor__is_shared_buffer_with, + METH_VARARGS | METH_KEYWORDS, NULL}, {"detach", (PyCFunction)(void (*)(void))eager_tensor_method_detach, METH_VARARGS | METH_KEYWORDS, NULL}, {NULL, NULL, 0, NULL}}; diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index e4576fe2ea8..3ab7981cdb1 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -645,6 +645,33 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): self.assertTrue(tensor3.stop_gradient, True) self.assertTrue(tensor3.place.is_cpu_place()) + def test_share_buffer_to(): + arr = np.ones([4, 16, 16, 32]).astype('float32') + arr1 = np.zeros([4, 16]).astype('float32') + arr2 = np.ones([4, 16, 16, 32]).astype('float32') + np.ones( + [4, 16, 16, 32]).astype('float32') + tensor = None + tensor2 = None + tensor = paddle.to_tensor(arr, core.VarDesc.VarType.FP32, + core.CPUPlace()) + tensor3 = core.eager.EagerTensor() + if core.is_compiled_with_cuda(): + tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32, + core.CUDAPlace(0)) + else: + tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32, + core.CPUPlace()) + self.assertTrue(np.array_equal(tensor.numpy(), arr1)) + self.assertTrue(np.array_equal(tensor2.numpy(), arr2)) + tensor2._share_buffer_to(tensor) + self.assertTrue(np.array_equal(tensor.numpy(), arr2)) + self.assertTrue(np.array_equal(tensor2.numpy(), arr2)) + self.assertTrue(tensor._is_shared_buffer_with(tensor2)) + self.assertTrue(tensor2._is_shared_buffer_with(tensor)) + tensor._share_buffer_to(tensor3) + self.assertTrue(np.array_equal(tensor3.numpy(), arr2)) + self.assertTrue(tensor3._is_shared_buffer_with(tensor)) + def test_properties(self): print("Test_properties") with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index b82a058ae4e..d2e1a4fbb18 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -15,6 +15,7 @@ import unittest import paddle.fluid as fluid import numpy as np +from paddle.fluid.framework import _test_eager_guard class AutoPruneLayer0(fluid.Layer): @@ -145,7 +146,7 @@ class MyLayer2(fluid.Layer): class TestImperativeAutoPrune(unittest.TestCase): - def test_auto_prune(self): + def func_auto_prune(self): with fluid.dygraph.guard(): case1 = AutoPruneLayer0(input_size=5) value1 = np.arange(25).reshape(5, 5).astype("float32") @@ -157,7 +158,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case1.linear2.weight._grad_ivar() is not None) self.assertTrue(case1.linear1.weight._grad_ivar() is not None) - def test_auto_prune2(self): + def test_auto_prune(self): + with _test_eager_guard(): + self.func_auto_prune() + self.func_auto_prune() + + def func_auto_prune2(self): with fluid.dygraph.guard(): case2 = AutoPruneLayer1(input_size=5) value1 = np.arange(25).reshape(5, 5).astype("float32") @@ -170,6 +176,11 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case2.linear2.weight._grad_ivar() is None) self.assertTrue(case2.linear1.weight._grad_ivar() is not None) + def test_auto_prune2(self): + with _test_eager_guard(): + self.func_auto_prune2() + self.func_auto_prune2() + def test_auto_prune3(self): with fluid.dygraph.guard(): case3 = AutoPruneLayer3(input_size=784) -- GitLab