From d2934a701b3e0aa19552bd0e37308ac246b10166 Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Sat, 27 Nov 2021 17:50:12 +0800 Subject: [PATCH] Added RunBackward and HookUtils to Eager Dygraph (#37599) --- paddle/fluid/eager/CMakeLists.txt | 1 + paddle/fluid/eager/api/CMakeLists.txt | 2 +- paddle/fluid/eager/api/all.h | 2 + paddle/fluid/eager/api/utils/CMakeLists.txt | 1 + paddle/fluid/eager/api/utils/hook_utils.cc | 93 +++++ paddle/fluid/eager/api/utils/hook_utils.h | 30 ++ paddle/fluid/eager/backward.cc | 212 ++++++++++++ paddle/fluid/eager/backward.h | 31 ++ paddle/fluid/eager/tests/CMakeLists.txt | 2 +- .../eager/tests/task_tests/CMakeLists.txt | 3 + .../eager/tests/task_tests/backward_test.cc | 320 ++++++++++++++++++ .../cross_batch_accumulation_test.cc | 88 +++++ .../fluid/eager/tests/task_tests/hook_test.cc | 214 ++++++++++++ 13 files changed, 997 insertions(+), 2 deletions(-) create mode 100644 paddle/fluid/eager/api/utils/hook_utils.cc create mode 100644 paddle/fluid/eager/api/utils/hook_utils.h create mode 100644 paddle/fluid/eager/backward.cc create mode 100644 paddle/fluid/eager/backward.h create mode 100644 paddle/fluid/eager/tests/task_tests/backward_test.cc create mode 100644 paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc create mode 100644 paddle/fluid/eager/tests/task_tests/hook_test.cc diff --git a/paddle/fluid/eager/CMakeLists.txt b/paddle/fluid/eager/CMakeLists.txt index 87866624b70..9746305c6c6 100644 --- a/paddle/fluid/eager/CMakeLists.txt +++ b/paddle/fluid/eager/CMakeLists.txt @@ -6,3 +6,4 @@ cc_library(autograd_meta SRCS autograd_meta.cc DEPS pten pten_api) cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api) cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulation) cc_library(utils SRCS utils.cc DEPS pten pten_api global_utils layer proto_desc operator op_registry variable_helper memcpy scale_op autograd_meta) +cc_library(backward SRCS backward.cc DEPS grad_tensor_holder utils autograd_meta grad_node_info) diff --git a/paddle/fluid/eager/api/CMakeLists.txt b/paddle/fluid/eager/api/CMakeLists.txt index c7c58a54365..4c241fd5b72 100644 --- a/paddle/fluid/eager/api/CMakeLists.txt +++ b/paddle/fluid/eager/api/CMakeLists.txt @@ -1,4 +1,4 @@ add_subdirectory(utils) add_subdirectory(generated) -cc_library(eager_api SRCS all.cc DEPS global_utils eager_scale) +cc_library(eager_api SRCS all.cc DEPS tensor_utils hook_utils global_utils eager_scale) diff --git a/paddle/fluid/eager/api/all.h b/paddle/fluid/eager/api/all.h index 6750073c3d3..bc5f1f3fb83 100644 --- a/paddle/fluid/eager/api/all.h +++ b/paddle/fluid/eager/api/all.h @@ -16,3 +16,5 @@ #include "paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h" #include "paddle/fluid/eager/api/utils/global_utils.h" +#include "paddle/fluid/eager/api/utils/hook_utils.h" +#include "paddle/fluid/eager/api/utils/tensor_utils.h" diff --git a/paddle/fluid/eager/api/utils/CMakeLists.txt b/paddle/fluid/eager/api/utils/CMakeLists.txt index 600212cae71..a785b1ddb5a 100644 --- a/paddle/fluid/eager/api/utils/CMakeLists.txt +++ b/paddle/fluid/eager/api/utils/CMakeLists.txt @@ -1,2 +1,3 @@ cc_library(tensor_utils SRCS tensor_utils.cc DEPS pten pten_api autograd_meta grad_node_info accumulation_node) +cc_library(hook_utils SRCS hook_utils.cc DEPS pten tensor_utils autograd_meta grad_node_info utils accumulation_node) cc_library(global_utils SRCS global_utils.cc DEPS place) diff --git a/paddle/fluid/eager/api/utils/hook_utils.cc b/paddle/fluid/eager/api/utils/hook_utils.cc new file mode 100644 index 00000000000..7f85d014fa8 --- /dev/null +++ b/paddle/fluid/eager/api/utils/hook_utils.cc @@ -0,0 +1,93 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/eager/api/utils/hook_utils.h" +#include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/utils/tensor_utils.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/utils.h" +#include "paddle/pten/core/dense_tensor.h" + +namespace egr { + +void RegisterGradientHookForTensor( + const egr::EagerTensor& tensor, + std::function& hook) { + // Find grad_node and out_rank from AutogradMeta + std::shared_ptr grad_node = EagerUtils::grad_node(tensor); + auto rank_info = EagerUtils::unsafe_autograd_meta(tensor)->OutRankInfo(); + + grad_node->RegisterGradientHook(rank_info.first, rank_info.second, hook); +} + +void RegisterReduceHookForTensor(const egr::EagerTensor& tensor, + const std::function& hook) { + // Find grad_node and out_rank from AutogradMeta + std::shared_ptr grad_node = EagerUtils::grad_node(tensor); + + grad_node->RegisterReduceHook(hook); +} + +void RetainGradForTensor(const egr::EagerTensor& tensor) { + // TODO(jiabin): Support More Tensor type here + AutogradMeta* meta = EagerUtils::unsafe_autograd_meta(tensor); + egr::EagerTensor* grad_tensor = meta->MutableGrad(); + + // Define Hook + std::function hook = + [grad_tensor](const egr::EagerTensor& t) { + if (!grad_tensor) { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Detected null grad_tensor." + "Grad tensor in AutogradMeta of should not be nullptr")); + } + if (t.defined()) { + // Simply Copy impl() to grad_tensor + grad_tensor->set_impl(t.impl()); + return *grad_tensor; + } else { + PADDLE_ENFORCE_EQ( + t.Var().IsInitialized(), true, + paddle::platform::errors::Fatal( + "Detected uninitialized variable, causing segmentation fault " + "inside the hook." + "Variable %s has to be initialized while we need to set it." + "please check tensor initialization status.", + t.name())); + grad_tensor->MutableVar() + ->GetMutable() + ->ShareDataWith(t.Var().Get()); + return *grad_tensor; + } + }; + + if (IsLeafTensor(tensor)) { + // Add RetainGrad as PostHook to AccumulationNode + std::shared_ptr grad_node = EagerUtils::grad_node(tensor); + PADDLE_ENFORCE( + grad_node.get() != nullptr, + paddle::platform::errors::Fatal("Detected NULL grad_node" + "Leaf tensor should have had grad_node " + "with type: GradNodeAccumulation")); + auto accumulation_grad_node = + std::dynamic_pointer_cast(grad_node); + accumulation_grad_node->RetainGrad(hook); + + } else { + // Append to GradientHooks + RegisterGradientHookForTensor(tensor, hook); + } +} + +} // namespace egr diff --git a/paddle/fluid/eager/api/utils/hook_utils.h b/paddle/fluid/eager/api/utils/hook_utils.h new file mode 100644 index 00000000000..bf320f0b15d --- /dev/null +++ b/paddle/fluid/eager/api/utils/hook_utils.h @@ -0,0 +1,30 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/pten/api/all.h" +namespace egr { + +void RegisterGradientHookForTensor( + const egr::EagerTensor& tensor, + std::function& hook); + +void RegisterReduceHookForTensor(const egr::EagerTensor& tensor, + const std::function& hook); +void RetainGradForTensor(const egr::EagerTensor& tensor); + +} // namespace egr diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc new file mode 100644 index 00000000000..9288c23a34a --- /dev/null +++ b/paddle/fluid/eager/backward.cc @@ -0,0 +1,212 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/eager/backward.h" +#include + +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/grad_tensor_holder.h" +#include "paddle/fluid/eager/utils.h" + +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/errors.h" + +#include "glog/logging.h" + +namespace egr { + +std::unordered_map getInDegreeMap( + const std::queue& init_queue) { + // Calculate in_degree for each node + // We can completely remove this pass, if in_degree were set during forward + // pass + std::unordered_map node_in_degree_map; + + // Copy nodes + std::queue queue = init_queue; + std::unordered_set visited; + + // Visit each node exactly once in any order + while (!queue.empty()) { + GradNodeBase* node = queue.front(); + queue.pop(); + + if (visited.count(node)) { + continue; + } + visited.insert(node); + + // Find and append next nodes + const std::vector>& edges = node->GetEdges(); + for (const auto& edge_list : edges) { + for (const Edge& edge : edge_list) { + GradNodeBase* next_node = edge.GetMutableGradNode().get(); + // Update in_degree + if (!node_in_degree_map.count(next_node)) + node_in_degree_map[next_node] = 0; + node_in_degree_map[next_node]++; + queue.push(next_node); + } + } + } + + return node_in_degree_map; +} + +void RunBackward(const std::vector& tensors, + const std::vector& grad_tensors, + bool retain_graph) { + VLOG(6) << "Start Backward"; + // *Gradient Hook should happen at node-level + // *Inplace version check should perform at node-level + // *Cross-batch accumulation happens at forward pass + + /* --- Initialization --- */ + // 1. Init queue with starting nodes + // 2. Prepare initial input buffers + std::queue queue; + std::unordered_map> + node_input_buffers_dict; + for (size_t i = 0; i < tensors.size(); i++) { + const egr::EagerTensor& tensor = tensors[i]; + + AutogradMeta* auto_grad_meta = EagerUtils::unsafe_autograd_meta(tensor); + // Get grad input info from target tensors + auto input_info = auto_grad_meta->OutRankInfo(); + + VLOG(2) << "Out Rank of Tensor is slot: " << input_info.first + << ", rank: " << input_info.second; + // Get target GradNodeBase from target tensors + GradNodeBase* grad_node = auto_grad_meta->GetMutableGradNode().get(); + + PADDLE_ENFORCE(grad_node, + paddle::platform::errors::Fatal( + "Detected null grad_node." + "Grad Node is nullptr for grad input tensor %d", + i)); + // Prepare GradTensorHolder + if (!node_input_buffers_dict.count(grad_node)) { + VLOG(6) << "Create Value for grad input tensor " << i; + node_input_buffers_dict[grad_node] = + std::make_unique(grad_node->InputMeta()); + } + + if (grad_tensors.size() > 0) { + PADDLE_ENFORCE( + grad_tensors.size() == tensors.size(), + paddle::platform::errors::Fatal( + "Detected size mismatch between tensors and grad_tensors" + "grad_tensors should either have " + "size = 0 or same size as tensors")); + // Feed given tensor if it's provided + VLOG(6) << "Fill grad input tensor " << i << "with give grad tensor"; + node_input_buffers_dict[grad_node]->add( + input_info.first, input_info.second, grad_tensors[i]); + + } else { + VLOG(6) << "Fill grad input tensor " << i << " with 1.0"; + // Initialize tensor with 1.0 + // Forward Tensor "tensor" is passed to indicate tensortype, datatype and + // dims + // GradTensorHolder will initialize another tensor with same tensortype, + // datatype and dims but filled with 1.0 + node_input_buffers_dict[grad_node]->add( + input_info.first, input_info.second, tensor, true /*fill_one=true*/); + } + + // Prepare queue + queue.push(grad_node); + } + + VLOG(6) << "Update In degree Map for backward"; + // 3. Compute in_degree for each node + std::unordered_map node_in_degree_map = + getInDegreeMap(queue); + + /* --- Topological Visit --- */ + // 1. Pop queue + // 2. Run node + // |- node(grads) + // |- Prepare for next node + // 3. Update queue + VLOG(6) << "Run Backward"; + while (!queue.empty()) { + GradNodeBase* node = queue.front(); + queue.pop(); + + // Run node: This is where Hook happens + PADDLE_ENFORCE( + node_input_buffers_dict.count(node), + paddle::platform::errors::Fatal( + "Unable to find next node in the InputBuufer" + "Trying to run Node without configuring its GradTensorHolder")); + + std::unique_ptr node_input_buffer = + std::move(node_input_buffers_dict[node]); + VLOG(6) << "Run Backward Kernel with input_buffer"; + // Run Backward Node and get outputs + std::vector> grad_output_tensors = + (*node)(node_input_buffer->Buffers()); + // TODO(jiabin): Should we erase it or find a more efficient way. + node_input_buffers_dict.erase(node); + + // Prepare GradTensorHolder for next node + const std::vector>& edges = node->GetEdges(); + + PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(), + paddle::platform::errors::Fatal( + "Number of edges should be either empty ( for leaf node " + ") or the same as number of output grad tensors")); + + for (size_t i = 0; i < edges.size(); i++) { + for (size_t j = 0; j < edges[i].size(); j++) { + const Edge& edge = edges[i][j]; + auto edge_rank = edge.GetEdgeRankInfo(); + // Since we make edge has as same rank as bwd outputs, we indexing them + // with + // the same rank(i, j) + VLOG(6) << "Get Edge with slot: " << i << ", rank: " << j; + egr::EagerTensor& grad_output_tensor = grad_output_tensors[i][j]; + if (!grad_output_tensor.defined() || + !grad_output_tensor.initialized()) { + VLOG(6) << "We get grad_output_tensor with slot: " << i + << ", rank: " << j << " as uninitialized or undefined tensor"; + } + GradNodeBase* next_node = edge.GetMutableGradNode().get(); + + if (!node_input_buffers_dict.count(next_node)) { + node_input_buffers_dict[next_node] = + std::make_unique(next_node->InputMeta()); + } + VLOG(6) << "Sum grad inputs for edge slot: " << edge_rank.first + << ", rank: " << edge_rank.second; + node_input_buffers_dict[next_node]->add( + edge_rank.first, edge_rank.second, grad_output_tensor); + + // Update queue + node_in_degree_map[next_node]--; + PADDLE_ENFORCE(node_in_degree_map[next_node] >= 0, + paddle::platform::errors::Fatal( + "Detected in-degree value smaller than zero." + "Node's in-degree cannot be negative")); + if (node_in_degree_map[next_node] == 0) { + queue.emplace(std::move(next_node)); + } + } + } + } +} + +} // namespace egr diff --git a/paddle/fluid/eager/backward.h b/paddle/fluid/eager/backward.h new file mode 100644 index 00000000000..b077fb37e87 --- /dev/null +++ b/paddle/fluid/eager/backward.h @@ -0,0 +1,31 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/pten/api/all.h" + +namespace egr { + +// run_backward(): +// tensors corresponds to those lived in the backward graph +// each grad_tensors[i] keeps the value for its corresponding tensors[i] +void RunBackward(const std::vector &tensors, + const std::vector &grad_tensors, + bool retain_graph = false); + +// Reserved for gradient() + +} // namespace egr diff --git a/paddle/fluid/eager/tests/CMakeLists.txt b/paddle/fluid/eager/tests/CMakeLists.txt index adff9572761..eb5c25b3387 100644 --- a/paddle/fluid/eager/tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/CMakeLists.txt @@ -1,4 +1,4 @@ -set(eager_deps pten pten_api tensor_utils utils global_utils pten_tensor autograd_meta grad_node_info grad_tensor_holder gradient_accumulation accumulation_node) +set(eager_deps pten pten_api hook_utils tensor_utils utils global_utils backward pten_tensor autograd_meta grad_node_info grad_tensor_holder gradient_accumulation accumulation_node) set(fluid_deps tracer layer proto_desc operator op_registry variable_helper memcpy) add_subdirectory(data_structure_tests) diff --git a/paddle/fluid/eager/tests/task_tests/CMakeLists.txt b/paddle/fluid/eager/tests/task_tests/CMakeLists.txt index 37e2bc1d079..7c1e8ba0237 100644 --- a/paddle/fluid/eager/tests/task_tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/task_tests/CMakeLists.txt @@ -1,3 +1,6 @@ cc_test(test_egr_task_tensor_utils SRCS tensor_utils_test.cc DEPS ${eager_deps}) cc_test(test_egr_task_eager_utils SRCS eager_utils_test.cc DEPS ${eager_deps}) cc_test(test_egr_task_forward_autograd SRCS forward_autograd_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) +cc_test(test_egr_task_backward SRCS backward_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) +cc_test(test_egr_task_hook SRCS hook_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) +cc_test(test_egr_task_cross_batch SRCS cross_batch_accumulation_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) diff --git a/paddle/fluid/eager/tests/task_tests/backward_test.cc b/paddle/fluid/eager/tests/task_tests/backward_test.cc new file mode 100644 index 00000000000..d63cff23ba9 --- /dev/null +++ b/paddle/fluid/eager/tests/task_tests/backward_test.cc @@ -0,0 +1,320 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" +#include "paddle/fluid/eager/api/utils/tensor_utils.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/backward.h" +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/tests/test_utils.h" + +#include "paddle/fluid/eager/api/all.h" + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/tensor_meta.h" + +using namespace egr; // NOLINT + +namespace eager_test { + +TEST(Backward, SingleNodeEmptyGrad) { + // Prepare Device Contexts + InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor target_tensor = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + + egr::EagerTensor leaf_tensor; + { + // Create Scale Node + auto node0_ptr = std::make_shared(1, 1); + node0_ptr->SetAttributes_scale(5.0 /*scale*/); + + // Set grad in/out meta + node0_ptr->SetDefaultGradInOutMeta(); + AutogradMeta* auto_grad_meta = EagerUtils::autograd_meta(&target_tensor); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(node0_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + + // Connect Tensor and AccumulationNode via AutoGradMeta + auto acc_node_ptr = std::make_shared(); + + AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); + auto_grad_meta1->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + + egr::RetainGradForTensor(leaf_tensor); + + // Connect Node0 -> AccumulationNode via Edge + auto meta = egr::AutogradMeta(); + meta.SetSingleOutRankWithSlot(0, 0); + meta.SetGradNode(acc_node_ptr); + node0_ptr->AddEdges({&meta}, 0); + } + std::vector outs = {target_tensor}; + // Run Backward + RunBackward(outs, {}); + + // Check Output Value + CompareGradTensorWithValue(leaf_tensor, 5.0); +} + +TEST(Backward, SingleNodeCustomGrad) { + // Prepare Device Contexts + InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor tensor = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor)); + + std::vector grad_tensors; + // Create Grad Tensor + egr::EagerTensor grad_tensor = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); + grad_tensors.emplace_back(std::move(grad_tensor)); + + egr::EagerTensor leaf_tensor; + { + // Create Scale Node + auto node0_ptr = std::make_shared(1, 1); + node0_ptr->SetAttributes_scale(5.0 /*scale*/); + + // Set grad in/out meta + node0_ptr->SetDefaultGradInOutMeta(); + + // Connect Tensor and Node via AutoGradMeta + AutogradMeta* auto_grad_meta = + EagerUtils::autograd_meta(&(target_tensors[0])); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(node0_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + + // Connect Tensor and AccumulationNode via AutoGradMeta + auto acc_node_ptr = std::make_shared(); + + AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); + auto_grad_meta1->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + + egr::RetainGradForTensor(leaf_tensor); + + // Connect Node0 -> AccumulationNode via Edge + auto meta = egr::AutogradMeta(); + meta.SetSingleOutRankWithSlot(0, 0); + meta.SetGradNode(acc_node_ptr); + node0_ptr->AddEdges({&meta}, 0); + } + + // Run Backward + RunBackward(target_tensors, grad_tensors); + + // Check Output Value + CompareGradTensorWithValue(leaf_tensor, 50.0); +} + +/* +Node1 + | +Node0 + | + inp0 +*/ +TEST(Backward, LinearNodes) { + // Prepare Device Contexts + InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor tensor = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor)); + + egr::EagerTensor leaf_tensor; + { + // Create Node0 + auto node0_ptr = std::make_shared(1, 1); + node0_ptr->SetAttributes_scale(5.0 /*scale*/); + + // Set grad in/out meta for node0 + node0_ptr->SetDefaultGradInOutMeta(); + + // Create Node1 + auto node1_ptr = std::make_shared(1, 1); + node1_ptr->SetAttributes_scale(10.0 /*scale*/); + + // Set grad in/out meta for node1 + node1_ptr->SetDefaultGradInOutMeta(); + + // Connect Input Tensor and Node0 via AutoGradMeta + AutogradMeta* auto_grad_meta = + EagerUtils::autograd_meta(&(target_tensors[0])); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(node0_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + + // Connect Node0 -> Node1 via Edge + auto meta0 = egr::AutogradMeta(); + meta0.SetSingleOutRankWithSlot(0, 0); + meta0.SetGradNode(node1_ptr); + node0_ptr->AddEdges({&meta0}, 0); + + // Connect Tensor and AccumulationNode via AutoGradMeta + auto acc_node_ptr = std::make_shared(); + + AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); + auto_grad_meta1->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + + egr::RetainGradForTensor(leaf_tensor); + + // Connect Node1 -> AccumulationNode via Edge + auto meta1 = egr::AutogradMeta(); + meta1.SetSingleOutRankWithSlot(0, 0); + meta1.SetGradNode(acc_node_ptr); + node1_ptr->AddEdges({&meta1}, 0); + } + + // Use Empty Grad Tensor + RunBackward(target_tensors, {}); + + // Check Output Value + CompareGradTensorWithValue(leaf_tensor, 50.0); +} + +/* + Node2 + | | +Node0 Node1 + | | + inp0 inp1 +*/ +TEST(Backward, WithAccumulation) { + // Prepare Device Contexts + InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + std::vector target_tensors; + egr::EagerTensor tensor0 = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + egr::EagerTensor tensor1 = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor0)); + target_tensors.emplace_back(std::move(tensor1)); + + // Create Grad Tensor + std::vector grad_tensors; + egr::EagerTensor grad_tensor0 = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); + egr::EagerTensor grad_tensor1 = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 10.0 /*value*/, false /*is_leaf*/); + grad_tensors.emplace_back(std::move(grad_tensor0)); + grad_tensors.emplace_back(std::move(grad_tensor1)); + + egr::EagerTensor leaf_tensor; + { + // Create Node0 + auto node0_ptr = std::make_shared(1, 1); + node0_ptr->SetAttributes_scale(5.0 /*scale*/); + node0_ptr->SetDefaultGradInOutMeta(); + + // Create Node1 + auto node1_ptr = std::make_shared(1, 1); + node1_ptr->SetAttributes_scale(10.0 /*scale*/); + node1_ptr->SetDefaultGradInOutMeta(); + // Create Node2 + auto node2_ptr = std::make_shared(1, 1); + node2_ptr->SetAttributes_scale(20.0 /*scale*/); + node2_ptr->SetDefaultGradInOutMeta(); + // Connect Inp0 and Node0 via AutoGradMeta + AutogradMeta* auto_grad_meta0 = + EagerUtils::autograd_meta(&(target_tensors[0])); + auto_grad_meta0->SetGradNode( + std::dynamic_pointer_cast(node0_ptr)); + auto_grad_meta0->SetSingleOutRankWithSlot(0, 0); + + // Connect Inp1 and Node1 via AutoGradMeta + AutogradMeta* auto_grad_meta1 = + EagerUtils::autograd_meta(&(target_tensors[1])); + auto_grad_meta1->SetGradNode( + std::dynamic_pointer_cast(node1_ptr)); + auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + + // Connect Node0 -> Node2 via Edge + auto meta0 = egr::AutogradMeta(); + meta0.SetSingleOutRankWithSlot(0, 0); + meta0.SetGradNode(node2_ptr); + node0_ptr->AddEdges({&meta0}, 0); + + // Connect Node1 -> Node2 via Edge + auto meta1 = egr::AutogradMeta(); + meta1.SetSingleOutRankWithSlot(0, 0); + meta1.SetGradNode(node2_ptr); + node1_ptr->AddEdges({&meta1}, 0); + + // Connect Tensor and AccumulationNode via AutoGradMeta + auto acc_node_ptr = std::make_shared(); + + AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor); + auto_grad_meta2->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta2->SetSingleOutRankWithSlot(0, 0); + + egr::RetainGradForTensor(leaf_tensor); + + // Connect Node2 -> AccumulationNode via Edge + auto meta2 = egr::AutogradMeta(); + meta2.SetSingleOutRankWithSlot(0, 0); + meta2.SetGradNode(acc_node_ptr); + node2_ptr->AddEdges({&meta2}, 0); + } + + RunBackward(target_tensors, grad_tensors); + + CompareGradTensorWithValue(leaf_tensor, 2500.0); +} + +} // namespace eager_test diff --git a/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc new file mode 100644 index 00000000000..e1e138cdee8 --- /dev/null +++ b/paddle/fluid/eager/tests/task_tests/cross_batch_accumulation_test.cc @@ -0,0 +1,88 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" +#include "paddle/fluid/eager/api/utils/tensor_utils.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/backward.h" +#include "paddle/fluid/eager/grad_node_info.h" + +#include "paddle/fluid/eager/api/all.h" + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/tensor_meta.h" + +#include "paddle/fluid/eager/tests/test_utils.h" + +using namespace egr; // NOLINT + +namespace eager_test { + +TEST(CrossBatchAccumulation, SingleScaleNode) { + InitEnv(paddle::platform::CPUPlace()); + + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + egr::EagerTensor tensor = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor)); + egr::EagerTensor& target_tensor = target_tensors[0]; + + egr::EagerTensor leaf_tensor = egr::EagerTensor(); + { + auto scale_node_ptr = std::make_shared(1, 1); + scale_node_ptr->SetAttributes_scale(5.0 /*scale*/); + + scale_node_ptr->SetDefaultGradInOutMeta(); + + auto acc_node_ptr = std::make_shared(); + + AutogradMeta* auto_grad_meta = EagerUtils::autograd_meta(&target_tensor); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(scale_node_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + RetainGradForTensor(target_tensor); // result: 1.0 + + auto meta = AutogradMeta(); + meta.SetSingleOutRankWithSlot(0, 0); + meta.SetGradNode(acc_node_ptr); + scale_node_ptr->AddEdges({&meta}, 0); + + AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); + auto_grad_meta1->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); + RetainGradForTensor(leaf_tensor); + } + + RunBackward(target_tensors, {}); + + CompareGradTensorWithValue(target_tensor, 1.0); + CompareGradTensorWithValue(leaf_tensor, 5.0); + + RunBackward(target_tensors, {}); + + CompareGradTensorWithValue(target_tensor, 1.0); + CompareGradTensorWithValue(leaf_tensor, 10.0); +} + +} // namespace eager_test diff --git a/paddle/fluid/eager/tests/task_tests/hook_test.cc b/paddle/fluid/eager/tests/task_tests/hook_test.cc new file mode 100644 index 00000000000..326240d0cb7 --- /dev/null +++ b/paddle/fluid/eager/tests/task_tests/hook_test.cc @@ -0,0 +1,214 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "paddle/fluid/eager/accumulation/accumulation_node.h" +#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/backward.h" +#include "paddle/fluid/eager/grad_node_info.h" + +#include "paddle/fluid/eager/api/all.h" + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/tensor_meta.h" + +#include "paddle/fluid/eager/tests/test_utils.h" + +using namespace egr; // NOLINT + +namespace eager_test { + +egr::EagerTensor hook_function(const egr::EagerTensor& t) { + auto t_dense = std::dynamic_pointer_cast(t.impl()); + + auto ret_meta = pten::DenseTensorMeta(t_dense->dtype(), t_dense->dims(), + t_dense->layout()); + auto place = t_dense->place(); + size_t bytes_size = + paddle::framework::product(t_dense->dims()) * SizeOf(t_dense->dtype()); + auto ret_dense = std::make_shared( + pten::make_intrusive( + paddle::memory::Alloc(place, bytes_size), 0), + std::move(ret_meta)); + + float* t_ptr = t_dense->mutable_data(); + float* ret_ptr = ret_dense->mutable_data(); + for (int i = 0; i < ret_dense->numel(); i++) { + ret_ptr[i] = t_ptr[i] + 3.0; + } + + auto ret_impl = std::dynamic_pointer_cast(ret_dense); + egr::EagerTensor ret = egr::EagerTensor(); + ret.set_impl(ret_impl); + + return ret; +} + +TEST(RetainGrad, HookBeforeRetainGrad) { + InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor tensor = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor)); + egr::EagerTensor& target_tensor = target_tensors[0]; + + // Create ScaleNode + auto scale_node_ptr = std::make_shared(1, 1); + scale_node_ptr->SetAttributes_scale(5.0 /*scale*/); + + // Set grad in/out meta for node0 + scale_node_ptr->SetDefaultGradInOutMeta(); + + // Create AccumulationNode + auto acc_node_ptr = std::make_shared(); + + // Connect Input Tensor and ScaleNode via AutoGradMeta + // Apply RetainGrad + { + // ScaleNode Hook: +3 + std::function hook = + &hook_function; + + auto auto_grad_meta = std::make_shared(); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(scale_node_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + target_tensor.set_autograd_meta( + std::dynamic_pointer_cast( + auto_grad_meta)); + + RegisterGradientHookForTensor(target_tensor, hook); + RetainGradForTensor(target_tensor); // result: 1.0 + 3.0 = 4.0 + } + + // Connect ScaleNode -> AccumulationNode via Edge + { + auto meta = AutogradMeta(); + meta.SetSingleOutRankWithSlot(0, 0); + meta.SetGradNode(acc_node_ptr); + scale_node_ptr->AddEdges({&meta}, 0); + } + + // Retain Grad for leaf tensor1 + egr::EagerTensor leaf_tensor = egr::EagerTensor(); + { + // AccumulationNode Hook: +3 + std::function hook = + &hook_function; + + auto auto_grad_meta = std::make_shared(); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + leaf_tensor.set_autograd_meta( + std::dynamic_pointer_cast( + auto_grad_meta)); + + RegisterGradientHookForTensor(leaf_tensor, hook); + RetainGradForTensor(leaf_tensor); // result: 4.0*5.0 + 3.0 = 23.0 + } + + RunBackward(target_tensors, {}); + + CompareGradTensorWithValue(target_tensor, 4.0); + CompareGradTensorWithValue(leaf_tensor, 23.0); +} + +TEST(RetainGrad, HookAfterRetainGrad) { + InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor tensor = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 1.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(tensor)); + egr::EagerTensor& target_tensor = target_tensors[0]; + + // Create ScaleNode + auto scale_node_ptr = std::make_shared(1, 1); + scale_node_ptr->SetAttributes_scale(5.0 /*scale*/); + // Set grad in/out meta for node0 + scale_node_ptr->SetDefaultGradInOutMeta(); + // Create AccumulationNode + auto acc_node_ptr = std::make_shared(); + + // Connect Input Tensor and ScaleNode via AutoGradMeta + // Apply RetainGrad + { + // ScaleNode Hook: +3 + std::function hook = + &hook_function; + + auto auto_grad_meta = std::make_shared(); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(scale_node_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + target_tensor.set_autograd_meta( + std::dynamic_pointer_cast( + auto_grad_meta)); + + RetainGradForTensor(target_tensor); // result: 1.0 + RegisterGradientHookForTensor(target_tensor, hook); + } + + // Connect ScaleNode -> AccumulationNode via Edge + { + auto meta = AutogradMeta(); + meta.SetSingleOutRankWithSlot(0, 0); + meta.SetGradNode(acc_node_ptr); + scale_node_ptr->AddEdges({&meta}, 0); + } + + // Retain Grad for leaf tensor1 + egr::EagerTensor leaf_tensor = egr::EagerTensor(); + { + // AccumulationNode Hook: +3 + std::function hook = + &hook_function; + + auto auto_grad_meta = std::make_shared(); + auto_grad_meta->SetGradNode( + std::dynamic_pointer_cast(acc_node_ptr)); + auto_grad_meta->SetSingleOutRankWithSlot(0, 0); + leaf_tensor.set_autograd_meta( + std::dynamic_pointer_cast( + auto_grad_meta)); + + RetainGradForTensor(leaf_tensor); // RetainGrad for leaf tensor gets + // postponed, result: 4.0*5.0 + 3.0 = + // 23.0 + RegisterGradientHookForTensor(leaf_tensor, hook); + } + + RunBackward(target_tensors, {}); + CompareGradTensorWithValue(target_tensor, 1.0); + CompareGradTensorWithValue(leaf_tensor, 23.0); +} + +} // namespace eager_test -- GitLab