diff --git a/paddle/fluid/eager/api/utils/tensor_utils.cc b/paddle/fluid/eager/api/utils/tensor_utils.cc index ad6c34b7cf86cd4d7338539cde704c6aff3b6b11..115c9144df222dc48e8b9bc8febbc82959d90dcd 100644 --- a/paddle/fluid/eager/api/utils/tensor_utils.cc +++ b/paddle/fluid/eager/api/utils/tensor_utils.cc @@ -49,7 +49,6 @@ egr::EagerTensor CreateTensorWithValue(const pten::DDim& ddim, egr::EagerTensor out = egr::EagerTensor(); out.set_tensor(std::make_shared(tensor)); auto meta = EagerUtils::autograd_meta(&out); - if (is_leaf) { auto accumulation_node = std::make_shared(); meta->SetGradNode(accumulation_node); diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index 9a760c03728cdd47f6a3a8ba9d20bf196832166d..01cb1b81e341e95faf6d4c63819bb7db789a245b 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -181,7 +181,9 @@ void RunBackward(const std::vector& tensors, PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(), paddle::platform::errors::Fatal( "Number of edges should be either empty ( for leaf node " - ") or the same as number of output grad tensors")); + ") or the same as number of output grad tensors, but we " + "got edges size is: %d, grad_output size is: %d", + edges.size(), grad_output_tensors.size())); for (size_t i = 0; i < edges.size(); i++) { for (size_t j = 0; j < edges[i].size(); j++) { diff --git a/paddle/fluid/eager/eager_tensor.h b/paddle/fluid/eager/eager_tensor.h index 72fe5732e9620af8e008aa96ca751a51f3ae4af7..80faad9080ffe0c3daf33ed28ed2e8000372f9b5 100644 --- a/paddle/fluid/eager/eager_tensor.h +++ b/paddle/fluid/eager/eager_tensor.h @@ -195,7 +195,6 @@ class EagerTensor final { } tensor_->copy_(*(src.tensor_.get()), blocking); } - /* Part 6: Operator overloading */ EagerTensor& operator=(const EagerTensor& x) & { tensor_ = x.tensor_; @@ -238,7 +237,7 @@ class EagerTensor final { // Contruct framework::Tensor from egr::EagerTensor auto tensor_dense = std::dynamic_pointer_cast(tensor_->impl()); - if (tensor_dense) { + if (tensor_dense && tensor_dense.get()) { paddle::experimental::SharesStorage(tensor_dense.get(), framework_tensor); } else { @@ -292,11 +291,10 @@ class EagerTensor final { template void SetImplWithLegacyTensor() { const auto& framework_tensor = var_.Get(); - if (this->initialized()) { + if (defined()) { VLOG(8) << "Sync Var to initialized tensor for: " << name(); paddle::experimental::ReMakePtenDenseTensor( - framework_tensor, - static_cast(this->impl().get())); + framework_tensor, static_cast(impl().get())); } else { VLOG(8) << "Sync Var to uninitialized tensor for: " << name(); this->set_impl(std::move( diff --git a/paddle/fluid/eager/grad_node_info.cc b/paddle/fluid/eager/grad_node_info.cc index 0e6f6aa63dd0f32db73bd5f8417749956449aba8..49bd416d46a764a14b41ec2d2383e4bfccafc24a 100644 --- a/paddle/fluid/eager/grad_node_info.cc +++ b/paddle/fluid/eager/grad_node_info.cc @@ -47,45 +47,15 @@ void GradNodeBase::AddEdges(std::vector* metas, size_t slot_id) { // adj_edges has as same rank as fwd inputs, and record it's output rank // from // its pre-ops - if (meta) { + if (meta && !meta->StopGradient()) { auto node = meta->GetMutableGradNode(); if (node) { adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), meta->OutRankInfo()); } else { - if (!meta->StopGradient()) { - meta->SetGradNode(std::make_shared()); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } - } - } - } -} - -void GradNodeBase::AddEdges(const std::vector& metas, - size_t slot_id) { - PADDLE_ENFORCE_LT( - slot_id, adj_edges_.size(), - paddle::platform::errors::InvalidArgument( - "Given slot id is out of range of adj_edges outter size, " - "adj_edges is designed to has the same size of grad " - "inputs's slot num.")); - for (const auto& meta : metas) { - // adj_edges has as same rank as fwd inputs, and record it's output rank - // from - // its pre-ops - if (meta) { - auto node = meta->GetMutableGradNode(); - if (node) { + meta->SetGradNode(std::make_shared()); adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), meta->OutRankInfo()); - } else { - if (!meta->StopGradient()) { - meta->SetGradNode(std::make_shared()); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } } } } @@ -98,17 +68,16 @@ void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) { "Given slot id is out of range of adj_edges outter size, " "adj_edges is designed to has the same size of grad " "inputs's slot num.")); - if (meta) { + if (meta && !meta->StopGradient()) { + VLOG(6) << "Add Edges for slot: " << slot_id; auto node = meta->GetMutableGradNode(); if (node) { adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), meta->OutRankInfo()); } else { - if (!meta->StopGradient()) { - meta->SetGradNode(std::make_shared()); - adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), - meta->OutRankInfo()); - } + meta->SetGradNode(std::make_shared()); + adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(), + meta->OutRankInfo()); } } } diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 545b577f4bda9ba088daaf44da6915e1ce212f9a..f15c50ef75190da94316acbb5d06afa5f54fc02d 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -106,7 +106,6 @@ class GradNodeBase { * This one is called slot by slot * **/ void AddEdges(std::vector* metas, size_t slot_id); - void AddEdges(const std::vector& metas, size_t slot_id); void AddEdges(AutogradMeta* meta, size_t slot_id); /** diff --git a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc index aebb0553e28b6c5bad591169949077be153df8a5..a89fb019d5b37ccff090374c7dc406471470f392 100644 --- a/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc +++ b/paddle/fluid/eager/tests/data_structure_tests/grad_node_info_test.cc @@ -56,15 +56,17 @@ TEST(GradNodeInfo, GradNodeBase) { VLOG(6) << "Test Add Edges"; egr::Edge edge0(grad_test_node1, 1, 2); auto auto_grad0 = std::make_shared(edge0); + auto_grad0->SetStopGradient(false); egr::Edge edge1(grad_test_node1, 3, 4); auto auto_grad1 = std::make_shared(edge1); + auto_grad1->SetStopGradient(false); grad_test_node0->AddEdges(auto_grad0.get(), 0); CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first, size_t(1)); CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second, size_t(2)); std::vector metas = {auto_grad1.get()}; - grad_test_node0->AddEdges(metas, 1); + grad_test_node0->AddEdges(&metas, 1); CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first, size_t(3)); CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().second, diff --git a/paddle/fluid/eager/tests/task_tests/backward_test.cc b/paddle/fluid/eager/tests/task_tests/backward_test.cc index 0ec86b7cc360c714444e9a2adb4875dd823992f5..3737fd95ad64d96d48e6f503308dede0d995d81c 100644 --- a/paddle/fluid/eager/tests/task_tests/backward_test.cc +++ b/paddle/fluid/eager/tests/task_tests/backward_test.cc @@ -69,9 +69,11 @@ TEST(Backward, SingleNodeEmptyGrad) { // Connect Node0 -> AccumulationNode via Edge auto meta = egr::AutogradMeta(); + meta.SetStopGradient(false); meta.SetSingleOutRankWithSlot(0, 0); meta.SetGradNode(acc_node_ptr); - node0_ptr->AddEdges({&meta}, 0); + std::vector res = {&meta}; + node0_ptr->AddEdges(&res, 0); } std::vector outs = {target_tensor}; // Run Backward @@ -130,9 +132,11 @@ TEST(Backward, SingleNodeCustomGrad) { // Connect Node0 -> AccumulationNode via Edge auto meta = egr::AutogradMeta(); + meta.SetStopGradient(false); meta.SetSingleOutRankWithSlot(0, 0); meta.SetGradNode(acc_node_ptr); - node0_ptr->AddEdges({&meta}, 0); + std::vector res = {&meta}; + node0_ptr->AddEdges(&res, 0); } // Run Backward @@ -188,9 +192,11 @@ TEST(Backward, LinearNodes) { // Connect Node0 -> Node1 via Edge auto meta0 = egr::AutogradMeta(); + meta0.SetStopGradient(false); meta0.SetSingleOutRankWithSlot(0, 0); meta0.SetGradNode(node1_ptr); - node0_ptr->AddEdges({&meta0}, 0); + std::vector res0 = {&meta0}; + node0_ptr->AddEdges(&res0, 0); // Connect Tensor and AccumulationNode via AutoGradMeta auto acc_node_ptr = std::make_shared(); @@ -204,9 +210,11 @@ TEST(Backward, LinearNodes) { // Connect Node1 -> AccumulationNode via Edge auto meta1 = egr::AutogradMeta(); + meta1.SetStopGradient(false); meta1.SetSingleOutRankWithSlot(0, 0); meta1.SetGradNode(acc_node_ptr); - node1_ptr->AddEdges({&meta1}, 0); + std::vector res1 = {&meta1}; + node1_ptr->AddEdges(&res1, 0); } // Use Empty Grad Tensor @@ -283,15 +291,19 @@ TEST(Backward, WithAccumulation) { // Connect Node0 -> Node2 via Edge auto meta0 = egr::AutogradMeta(); + meta0.SetStopGradient(false); meta0.SetSingleOutRankWithSlot(0, 0); meta0.SetGradNode(node2_ptr); - node0_ptr->AddEdges({&meta0}, 0); + std::vector res0 = {&meta0}; + node0_ptr->AddEdges(&res0, 0); // Connect Node1 -> Node2 via Edge auto meta1 = egr::AutogradMeta(); + meta1.SetStopGradient(false); meta1.SetSingleOutRankWithSlot(0, 0); meta1.SetGradNode(node2_ptr); - node1_ptr->AddEdges({&meta1}, 0); + std::vector res1 = {&meta1}; + node1_ptr->AddEdges(&res1, 0); // Connect Tensor and AccumulationNode via AutoGradMeta auto acc_node_ptr = std::make_shared(); @@ -305,9 +317,11 @@ TEST(Backward, WithAccumulation) { // Connect Node2 -> AccumulationNode via Edge auto meta2 = egr::AutogradMeta(); + meta2.SetStopGradient(false); meta2.SetSingleOutRankWithSlot(0, 0); meta2.SetGradNode(acc_node_ptr); - node2_ptr->AddEdges({&meta2}, 0); + std::vector res2 = {&meta2}; + node2_ptr->AddEdges(&res2, 0); } RunBackward(target_tensors, grad_tensors); diff --git a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc index 52e10b2b1b8a094e40b96537e36cc66c49f1714f..7f180fa1076fd4d806be6b0f18d6fe46e123b48e 100644 --- a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc +++ b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc @@ -62,8 +62,10 @@ TEST(CrossBatchAccumulation, SingleScaleNode) { auto meta = AutogradMeta(); meta.SetSingleOutRankWithSlot(0, 0); + meta.SetStopGradient(false); meta.SetGradNode(acc_node_ptr); - scale_node_ptr->AddEdges({&meta}, 0); + std::vector res = {&meta}; + scale_node_ptr->AddEdges(&res, 0); AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); auto_grad_meta1->SetGradNode( diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc index 4ec49bfa56676445913bdb5d0507bdfd00d18b61..0f8039dade8010ad2af7df4790042a2ed3af3d1d 100644 --- a/paddle/fluid/eager/tests/task_tests/hook_test.cc +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -105,9 +105,11 @@ TEST(RetainGrad, HookBeforeRetainGrad) { // Connect ScaleNode -> AccumulationNode via Edge { auto meta = AutogradMeta(); + meta.SetStopGradient(false); meta.SetSingleOutRankWithSlot(0, 0); meta.SetGradNode(acc_node_ptr); - scale_node_ptr->AddEdges({&meta}, 0); + std::vector res = {&meta}; + scale_node_ptr->AddEdges(&res, 0); } // Retain Grad for leaf tensor1 @@ -180,9 +182,11 @@ TEST(RetainGrad, HookAfterRetainGrad) { // Connect ScaleNode -> AccumulationNode via Edge { auto meta = AutogradMeta(); + meta.SetStopGradient(false); meta.SetSingleOutRankWithSlot(0, 0); meta.SetGradNode(acc_node_ptr); - scale_node_ptr->AddEdges({&meta}, 0); + std::vector res = {&meta}; + scale_node_ptr->AddEdges(&res, 0); } // Retain Grad for leaf tensor1 diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index c56fe5be4da6982cc9e9c776093198ff8f9dd5c2..a0067f9c64fb157fe675e9ad848b65fce7ba621e 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -234,6 +234,44 @@ static PyObject* eager_tensor__zero_grads(EagerTensorObject* self, EAGER_CATCH_AND_THROW_RETURN_NULL } +static PyObject* eager_tensor__share_buffer_to(EagerTensorObject* self, + PyObject* args, + PyObject* kwargs) { + EAGER_SYNC_TRY + egr::EagerTensor* src_ptr = + &(reinterpret_cast(PyTuple_GET_ITEM(args, 0)) + ->eager_tensor); + PADDLE_ENFORCE_EQ(self->eager_tensor.initialized(), true, + platform::errors::InvalidArgument( + "Tensor %s has not been initialized! please initialize " + "src tensor before share_buffer_with to other.", + self->eager_tensor.name())); + src_ptr->set_impl(self->eager_tensor.impl()); + Py_INCREF(Py_None); + return Py_None; + EAGER_CATCH_AND_THROW_RETURN_NULL +} + +static PyObject* eager_tensor__is_shared_buffer_with(EagerTensorObject* self, + PyObject* args, + PyObject* kwargs) { + EAGER_SYNC_TRY + egr::EagerTensor src_tensor = + CastPyArg2EagerTensor(PyTuple_GET_ITEM(args, 0), 0); + PADDLE_ENFORCE_EQ(src_tensor.initialized(), true, + platform::errors::InvalidArgument( + "Tensor %s has not been initialized! please initialize " + "src tensor before share_buffer_with to other.", + src_tensor.name())); + bool res = false; + if (!self->eager_tensor.defined() || !src_tensor.defined()) { + return ToPyObject(res); + } + res = (self->eager_tensor.impl().get() == src_tensor.impl().get()); + return ToPyObject(res); + EAGER_CATCH_AND_THROW_RETURN_NULL +} + static PyObject* eager_tensor_method_detach(EagerTensorObject* self, PyObject* args, PyObject* kwargs) { EAGER_SYNC_TRY @@ -278,6 +316,12 @@ PyMethodDef variable_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"_zero_grads", (PyCFunction)(void (*)(void))eager_tensor__zero_grads, METH_VARARGS | METH_KEYWORDS, NULL}, + {"_is_shared_buffer_to", + (PyCFunction)(void (*)(void))eager_tensor__share_buffer_to, + METH_VARARGS | METH_KEYWORDS, NULL}, + {"_share_buffer_with", + (PyCFunction)(void (*)(void))eager_tensor__is_shared_buffer_with, + METH_VARARGS | METH_KEYWORDS, NULL}, {"detach", (PyCFunction)(void (*)(void))eager_tensor_method_detach, METH_VARARGS | METH_KEYWORDS, NULL}, {NULL, NULL, 0, NULL}}; diff --git a/python/paddle/fluid/tests/unittests/test_egr_python_api.py b/python/paddle/fluid/tests/unittests/test_egr_python_api.py index e4576fe2ea8bda9dcbcda6b206053b61f22fb4c2..3ab7981cdb1a4b4bf3b0229439a2d4c47a0c713b 100644 --- a/python/paddle/fluid/tests/unittests/test_egr_python_api.py +++ b/python/paddle/fluid/tests/unittests/test_egr_python_api.py @@ -645,6 +645,33 @@ class EagerTensorPropertiesTestCase(unittest.TestCase): self.assertTrue(tensor3.stop_gradient, True) self.assertTrue(tensor3.place.is_cpu_place()) + def test_share_buffer_to(): + arr = np.ones([4, 16, 16, 32]).astype('float32') + arr1 = np.zeros([4, 16]).astype('float32') + arr2 = np.ones([4, 16, 16, 32]).astype('float32') + np.ones( + [4, 16, 16, 32]).astype('float32') + tensor = None + tensor2 = None + tensor = paddle.to_tensor(arr, core.VarDesc.VarType.FP32, + core.CPUPlace()) + tensor3 = core.eager.EagerTensor() + if core.is_compiled_with_cuda(): + tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32, + core.CUDAPlace(0)) + else: + tensor2 = paddle.to_tensor(arr2, core.VarDesc.VarType.FP32, + core.CPUPlace()) + self.assertTrue(np.array_equal(tensor.numpy(), arr1)) + self.assertTrue(np.array_equal(tensor2.numpy(), arr2)) + tensor2._share_buffer_to(tensor) + self.assertTrue(np.array_equal(tensor.numpy(), arr2)) + self.assertTrue(np.array_equal(tensor2.numpy(), arr2)) + self.assertTrue(tensor._is_shared_buffer_with(tensor2)) + self.assertTrue(tensor2._is_shared_buffer_with(tensor)) + tensor._share_buffer_to(tensor3) + self.assertTrue(np.array_equal(tensor3.numpy(), arr2)) + self.assertTrue(tensor3._is_shared_buffer_with(tensor)) + def test_properties(self): print("Test_properties") with _test_eager_guard(): diff --git a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py index b82a058ae4eb155103f1b5cb2af68ebf2334f934..d2e1a4fbb18828bad39b0f5c87c37aea8d7a27b5 100644 --- a/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py +++ b/python/paddle/fluid/tests/unittests/test_imperative_auto_prune.py @@ -15,6 +15,7 @@ import unittest import paddle.fluid as fluid import numpy as np +from paddle.fluid.framework import _test_eager_guard class AutoPruneLayer0(fluid.Layer): @@ -145,7 +146,7 @@ class MyLayer2(fluid.Layer): class TestImperativeAutoPrune(unittest.TestCase): - def test_auto_prune(self): + def func_auto_prune(self): with fluid.dygraph.guard(): case1 = AutoPruneLayer0(input_size=5) value1 = np.arange(25).reshape(5, 5).astype("float32") @@ -157,7 +158,12 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case1.linear2.weight._grad_ivar() is not None) self.assertTrue(case1.linear1.weight._grad_ivar() is not None) - def test_auto_prune2(self): + def test_auto_prune(self): + with _test_eager_guard(): + self.func_auto_prune() + self.func_auto_prune() + + def func_auto_prune2(self): with fluid.dygraph.guard(): case2 = AutoPruneLayer1(input_size=5) value1 = np.arange(25).reshape(5, 5).astype("float32") @@ -170,6 +176,11 @@ class TestImperativeAutoPrune(unittest.TestCase): self.assertTrue(case2.linear2.weight._grad_ivar() is None) self.assertTrue(case2.linear1.weight._grad_ivar() is not None) + def test_auto_prune2(self): + with _test_eager_guard(): + self.func_auto_prune2() + self.func_auto_prune2() + def test_auto_prune3(self): with fluid.dygraph.guard(): case3 = AutoPruneLayer3(input_size=784)