From 04e3b62f0cf3e372b081f3a95cc90ac78d7478c9 Mon Sep 17 00:00:00 2001 From: Zhanlue Yang Date: Fri, 26 Nov 2021 18:07:53 +0800 Subject: [PATCH] [Eager] Supported Eager Dygraph Forward & AutoGrad functions (#37323) --- paddle/fluid/eager/CMakeLists.txt | 4 +- paddle/fluid/eager/api/CMakeLists.txt | 3 + paddle/fluid/eager/api/all.h | 1 + .../fluid/eager/api/generated/CMakeLists.txt | 1 + .../generated/eager_generated/CMakeLists.txt | 2 + .../eager_generated/backwards/CMakeLists.txt | 1 + .../eager_generated/backwards/scale_node.cc | 172 ++++++++++++ .../eager_generated/backwards/scale_node.h | 54 ++++ .../eager_generated/forwards/CMakeLists.txt | 1 + .../eager_generated/forwards/scale.cc | 100 +++++++ .../eager_generated/forwards/scale.h | 23 ++ paddle/fluid/eager/tests/CMakeLists.txt | 4 +- .../eager/tests/task_tests/CMakeLists.txt | 1 + .../tests/task_tests/forward_autograd_test.cc | 251 ++++++++++++++++++ 14 files changed, 615 insertions(+), 3 deletions(-) create mode 100644 paddle/fluid/eager/api/generated/CMakeLists.txt create mode 100644 paddle/fluid/eager/api/generated/eager_generated/CMakeLists.txt create mode 100644 paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt create mode 100644 paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc create mode 100644 paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h create mode 100644 paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt create mode 100644 paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc create mode 100644 paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h create mode 100644 paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc diff --git a/paddle/fluid/eager/CMakeLists.txt b/paddle/fluid/eager/CMakeLists.txt index 2eb5cc55cd6..87866624b70 100644 --- a/paddle/fluid/eager/CMakeLists.txt +++ b/paddle/fluid/eager/CMakeLists.txt @@ -1,8 +1,8 @@ -add_subdirectory(accumulation) add_subdirectory(api) +add_subdirectory(accumulation) add_subdirectory(tests) -cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api) cc_library(autograd_meta SRCS autograd_meta.cc DEPS pten pten_api) +cc_library(grad_node_info SRCS grad_node_info.cc DEPS pten pten_api) cc_library(grad_tensor_holder SRCS grad_tensor_holder.cc DEPS grad_node_info gradient_accumulation) cc_library(utils SRCS utils.cc DEPS pten pten_api global_utils layer proto_desc operator op_registry variable_helper memcpy scale_op autograd_meta) diff --git a/paddle/fluid/eager/api/CMakeLists.txt b/paddle/fluid/eager/api/CMakeLists.txt index 512d2b1553c..c7c58a54365 100644 --- a/paddle/fluid/eager/api/CMakeLists.txt +++ b/paddle/fluid/eager/api/CMakeLists.txt @@ -1 +1,4 @@ add_subdirectory(utils) +add_subdirectory(generated) + +cc_library(eager_api SRCS all.cc DEPS global_utils eager_scale) diff --git a/paddle/fluid/eager/api/all.h b/paddle/fluid/eager/api/all.h index 4d873ad95a4..6750073c3d3 100644 --- a/paddle/fluid/eager/api/all.h +++ b/paddle/fluid/eager/api/all.h @@ -14,4 +14,5 @@ // #pragma once +#include "paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h" #include "paddle/fluid/eager/api/utils/global_utils.h" diff --git a/paddle/fluid/eager/api/generated/CMakeLists.txt b/paddle/fluid/eager/api/generated/CMakeLists.txt new file mode 100644 index 00000000000..41fadef153b --- /dev/null +++ b/paddle/fluid/eager/api/generated/CMakeLists.txt @@ -0,0 +1 @@ +add_subdirectory(eager_generated) diff --git a/paddle/fluid/eager/api/generated/eager_generated/CMakeLists.txt b/paddle/fluid/eager/api/generated/eager_generated/CMakeLists.txt new file mode 100644 index 00000000000..15871597cd9 --- /dev/null +++ b/paddle/fluid/eager/api/generated/eager_generated/CMakeLists.txt @@ -0,0 +1,2 @@ +add_subdirectory(backwards) +add_subdirectory(forwards) diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt b/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt new file mode 100644 index 00000000000..1084f0ec573 --- /dev/null +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt @@ -0,0 +1 @@ +cc_library(scale_node SRCS scale_node.cc DEPS global_utils pten pten_api grad_node_info) diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc new file mode 100644 index 00000000000..e1e8f09ddff --- /dev/null +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.cc @@ -0,0 +1,172 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" +#include "paddle/fluid/eager/api/utils/global_utils.h" +#include "paddle/fluid/eager/eager_tensor.h" + +#include "paddle/pten/api/all.h" + +#include "paddle/fluid/platform/device_context.h" +#include "paddle/fluid/platform/enforce.h" +#include "paddle/fluid/platform/errors.h" + +#include "glog/logging.h" + +namespace egr { + +template +static void ScaleDeviceDispatch(const pten::DenseTensor& dense_tensor, + const DeviceContext& dev_ctx, float scale, + float bias, bool bias_after_scale, + pten::DenseTensor* dense_out) { + switch (dense_tensor.dtype()) { + case pten::DataType::FLOAT64: { + pten::Scale(dev_ctx, dense_tensor /* tensor */, scale /* scale */, + bias /* bias */, + bias_after_scale /* bias_after_scale */, + dense_out /* out tensor */); + break; + } + case pten::DataType::FLOAT32: { + pten::Scale(dev_ctx, dense_tensor /* tensor */, scale /* scale */, + bias /* bias */, + bias_after_scale /* bias_after_scale */, + dense_out /* out tensor */); + break; + } + case pten::DataType::INT64: { + pten::Scale(dev_ctx, dense_tensor /* tensor */, + scale /* scale */, bias /* bias */, + bias_after_scale /* bias_after_scale */, + dense_out /* out tensor */); + break; + } + case pten::DataType::INT32: { + pten::Scale(dev_ctx, dense_tensor /* tensor */, + scale /* scale */, bias /* bias */, + bias_after_scale /* bias_after_scale */, + dense_out /* out tensor */); + break; + } + default: { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Detected unsupported data type." + "Only Float64, Float32, Int64, Int32 are supported for now.")); + break; + } + } +} + +void ScaleAPI(const egr::EagerTensor& x, float scale, float bias, + bool bias_after_scale, egr::EagerTensor* out) { + // TODO(jiabin): Support multiple tensor here, Create DenseTensor is not a + // proper way to Demo it + // Run Forward Function + auto dense_tensor = std::dynamic_pointer_cast(x.impl()); + // Init output tensor + auto tensor_meta = pten::DenseTensorMeta( + dense_tensor->dtype(), dense_tensor->dims(), dense_tensor->layout()); + auto place = dense_tensor->place(); + size_t bytes_size = paddle::framework::product(dense_tensor->dims()) * + SizeOf(dense_tensor->dtype()); + auto dense_out = std::make_shared( + pten::make_intrusive( + paddle::memory::Alloc(place, bytes_size), 0), + std::move(tensor_meta)); + // Handle Device Context + const paddle::platform::Place& expected_kernel_place = + Controller::Instance().GetExpectedPlace(); + paddle::platform::DeviceContextPool& pool = + paddle::platform::DeviceContextPool::Instance(); + + if (expected_kernel_place == paddle::platform::CPUPlace()) { + auto* dev_ctx = dynamic_cast( + pool.Get(expected_kernel_place)); + if (!dev_ctx) { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Cannot convert device_context to CPUDeviceContext." + "This indicates backend mismatch." + "Pleas double check your expected place")); + } + ScaleDeviceDispatch( + *dense_tensor.get(), *dev_ctx, scale, bias, bias_after_scale, + dense_out.get()); + +#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) + } else if (expected_kernel_place == paddle::platform::CUDAPlace()) { + auto* dev_ctx = dynamic_cast( + pool.Get(expected_kernel_place)); + if (!dev_ctx) { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Cannot convert device_context to CUDADeviceContext." + "This indicates backend mismatch." + "Pleas double check your expected place")); + } + ScaleDeviceDispatch( + *dense_tensor.get(), *dev_ctx, scale, bias, bias_after_scale, + dense_out.get()); +#endif + } else { + PADDLE_THROW(paddle::platform::errors::Fatal( + "Detected unsupported backend." + "Only CPU and CUDA Backend are supported for now." + "Please double check if your backend falls into the above two " + "categories.")); + } + + out->set_impl(dense_out); +} + +void GradNodeScale::SetTensorWrappers_X( + const std::vector& tensors) { + // Does nothing for scale +} + +void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; } + +std::vector> GradNodeScale::operator()( + const std::vector>& grads) { + // 1. Check Output Size + PADDLE_ENFORCE( + ((grads.size() == 1) && (grads[0].size() == 1)), + paddle::platform::errors::Fatal( + "ScaleGradNode takes exactly 1 grad tensor." + "However received: %d", + "This indicates an issue with Eager Dygraph Backward logic", + grads.size())); + std::vector> outs; + // 2. Create needed out parttern + egr::EagerTensor out; + // Apply Gradient Hooks + if (GradientHooksRegistered()) { + // TODO(jiabin): Shall we apply hook slot by slot here or accept + // vector> to apply all hooks? + std::vector> hooked_grads = + ApplyGradientHooks(grads); + ScaleAPI(/* slot by slot set */ hooked_grads[0][0], scale_, 0.0 /* bias */, + true /* bias_after_scale */, &out); + } else { + ScaleAPI(grads[0][0], scale_, 0.0 /* bias */, true /* bias_after_scale */, + &out); + } + + // Apply Reduce Hooks + if (ReduceHooksRegistered()) { + ApplyReduceHooks(); + } + return {{out}}; +} + +} // namespace egr diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h new file mode 100644 index 00000000000..1618662516c --- /dev/null +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h @@ -0,0 +1,54 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/tensor_wrapper.h" + +/* + Each Operation has a specific GradNode inheritted from GradNodeBase + A specific GradNode defines + 1. Input Tensors + 2. overrides operator() to perform actual backward computations + + TODO: Generate GradNode via auto-code-generation +*/ +namespace egr { + +void ScaleAPI(const egr::EagerTensor& x, float scale, float bias, + bool bias_after_scale, egr::EagerTensor* out); + +class GradNodeScale : public GradNodeBase { + public: + // Constructor: configure fwd input tensors to grad node + GradNodeScale(size_t bwd_in_slot_num, size_t bwd_out_slot_num) + : GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {} + ~GradNodeScale() override = default; + + // Functor: perform backward computations + virtual std::vector> operator()( + const std::vector>& grads) override; + + void SetTensorWrappers_X(const std::vector& tensors); + + void SetAttributes_scale(float scale); + + // Members: define fwd input tensors + // For Scale there is no fwd input tensor needed + private: + float scale_{1.0}; +}; + +} // namespace egr diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt b/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt new file mode 100644 index 00000000000..ed04e0b6f5a --- /dev/null +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt @@ -0,0 +1 @@ +cc_library(eager_scale SRCS scale.cc DEPS pten_api pten autograd_meta scale_node) diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc new file mode 100644 index 00000000000..a8b3421baac --- /dev/null +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.cc @@ -0,0 +1,100 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * This File should be automatically generated by coding auto generator. + * All ops C++ autograd logic is defined here, in Python-C extension API + * system we try to avoid any autograd related code, and move them all to + * here. + * + * Currently, we just manually do some fwd autograd here. And we will replace + * them with auto code generator later. + * **/ + +#include "paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h" +#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/eager_tensor.h" +#include "paddle/fluid/eager/utils.h" + +#include "paddle/pten/api/all.h" +#include "paddle/pten/include/core.h" + +namespace egr { + +egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias, + bool bias_after_scale, bool trace_backward) { + // 1. Run Forward + // 1.1 Create outputs + egr::EagerTensor out; + // 1.2 Need by original op, we assemble ins, outs, attrs here + + // 1.3 Call forward C++ api + ScaleAPI(x, scale, bias, bias_after_scale, &out); + + // 2. Build Backward Depends + // 2.1 Get AutogradMetas for all ins and outs + auto p_autograd_in = EagerUtils::unsafe_autograd_meta(x); + // NOTE: Call EagerUtils::multi_autograd_meta when we have vector of outputs + auto p_autograd_out = EagerUtils::autograd_meta(&out); + + // 2.2 Add GradNode + // 2.2.1 ComputeRequireGrad + // TODO(jiabin) : make this function accept different kinds of input + // TODO(zhanlve): which one is more efficient: + // 1. construct a vector of pointers + // 2. call "ComputeRequireGrad" multiple times + bool require_any_grad = + EagerUtils::ComputeRequireGrad(trace_backward, p_autograd_in); + if (require_any_grad) { + EagerUtils::PassStopGradient(false /*generate_grad*/, p_autograd_out); + + // 2.2.2 Set OutRankInfo for outputs this needs to be as same as Edges's + // input_rank_ + /** Note: + // 1. We provide EagerUtils::SetMultiOutRank(vector), + // since we have some of Operator has servel slot name with duplicate + outputs. + // 2. We call AutogradMeta's SetOutput Rank only when we have single output + with + // single slot name. + **/ + p_autograd_out->SetSingleOutRankWithSlot(0, 0); + + // Init GradNode + auto scale_node = std::make_shared(/* fwd_in_slot_num */ 1, + /* bwd_in_slot_num */ 1); + + // Pass Attributes to GradNode + scale_node->SetAttributes_scale(scale); + + // Set Next Edges + scale_node->AddEdges(*p_autograd_in, /*slot id*/ 0); + + // Set TensorWrappers + scale_node->SetTensorWrappers_X({x}); + + // Set Grad out rank as same as fwd input and set stop gradient to bwd + scale_node->SetGradOutMeta(*p_autograd_in, /*slot id*/ 0); + // Set Grad out rank as same as fwd input and set stop gradient to bwd + scale_node->SetGradInMeta(*p_autograd_out, /*slot id*/ 0); + + // Set History for output set current Grad Node for + EagerUtils::SetHistory(p_autograd_out, scale_node); + } + + return out; +} + +} // namespace egr diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h new file mode 100644 index 00000000000..13b03c3b760 --- /dev/null +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/scale.h @@ -0,0 +1,23 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/fluid/eager/eager_tensor.h" +namespace egr { + +egr::EagerTensor scale(const egr::EagerTensor& x, float scale, float bias, + bool bias_after_scale, bool trace_backward); + +} // namespace egr diff --git a/paddle/fluid/eager/tests/CMakeLists.txt b/paddle/fluid/eager/tests/CMakeLists.txt index 9837d049abd..adff9572761 100644 --- a/paddle/fluid/eager/tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/CMakeLists.txt @@ -1,3 +1,5 @@ -set(eager_deps pten pten_api utils tensor_utils global_utils pten_tensor autograd_meta grad_node_info grad_tensor_holder gradient_accumulation accumulation_node) +set(eager_deps pten pten_api tensor_utils utils global_utils pten_tensor autograd_meta grad_node_info grad_tensor_holder gradient_accumulation accumulation_node) +set(fluid_deps tracer layer proto_desc operator op_registry variable_helper memcpy) + add_subdirectory(data_structure_tests) add_subdirectory(task_tests) diff --git a/paddle/fluid/eager/tests/task_tests/CMakeLists.txt b/paddle/fluid/eager/tests/task_tests/CMakeLists.txt index d6f384ab667..37e2bc1d079 100644 --- a/paddle/fluid/eager/tests/task_tests/CMakeLists.txt +++ b/paddle/fluid/eager/tests/task_tests/CMakeLists.txt @@ -1,2 +1,3 @@ cc_test(test_egr_task_tensor_utils SRCS tensor_utils_test.cc DEPS ${eager_deps}) cc_test(test_egr_task_eager_utils SRCS eager_utils_test.cc DEPS ${eager_deps}) +cc_test(test_egr_task_forward_autograd SRCS forward_autograd_test.cc DEPS ${eager_deps} ${fluid_deps} eager_scale scale_node) diff --git a/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc b/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc new file mode 100644 index 00000000000..6e23226cde4 --- /dev/null +++ b/paddle/fluid/eager/tests/task_tests/forward_autograd_test.cc @@ -0,0 +1,251 @@ +// Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "glog/logging.h" +#include "gtest/gtest.h" + +#include "paddle/fluid/eager/api/all.h" +#include "paddle/fluid/eager/api/generated/eager_generated/backwards/scale_node.h" +#include "paddle/fluid/eager/api/utils/tensor_utils.h" +#include "paddle/fluid/eager/autograd_meta.h" +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/tests/test_utils.h" + +#include "paddle/pten/core/dense_tensor.h" +#include "paddle/pten/core/tensor_meta.h" + +// TODO(jiabin): remove nolint here!!! +using namespace egr; // NOLINT + +namespace eager_test { + +TEST(Forward, SingleNode) { + // Prepare Device Contexts + InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor t = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(t)); + egr::EagerTensor& tensor = target_tensors[0]; + EagerUtils::autograd_meta(&tensor)->SetStopGradient(false); + + // Run Forward + float scale = 2.0; + float bias = 3.0; + egr::EagerTensor out = egr::scale( + tensor, scale, bias, true /*bias_after_scale*/, true /*trace_backward*/); + + // Examine Forward Output + CompareTensorWithValue(out, 13.0); + + // Examine GradNode + { + // 1. GradNode + AutogradMeta* meta = EagerUtils::autograd_meta(&out); + GradNodeBase* grad_node = meta->GradNode(); + GradNodeScale* scale_node = dynamic_cast(grad_node); + + CHECK_NOTNULL(scale_node); + CHECK_EQ(static_cast(meta->OutRankInfo().first), 0); + CHECK_EQ(static_cast(meta->OutRankInfo().second), 0); + } +} + +/* + inp + | +Node0 + | +Node1 + | + out +*/ +TEST(Forward, LinearNodes) { + InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor t = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(t)); + egr::EagerTensor& tensor = target_tensors[0]; + EagerUtils::autograd_meta(&tensor)->SetStopGradient(false); + + // Run Forward Node 0 + float scale0 = 2.0; + float bias0 = 3.0; + egr::EagerTensor out0 = + egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/, + true /*trace_backward*/); + + // Run Forward Node 1 + float scale1 = 5.0; + float bias1 = 10.0; + egr::EagerTensor out1 = egr::scale( + out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/); + + // Examine Forward Output 0 + CompareTensorWithValue(out0, 13.0); + + // Examine Forward Output 1 + CompareTensorWithValue(out1, 75.0); + + // Examine GradNode + { + // 1. GradNode + // Node 0 + AutogradMeta* meta0 = EagerUtils::autograd_meta(&out0); + GradNodeBase* grad_node0 = meta0->GradNode(); + GradNodeScale* scale_node0 = dynamic_cast(grad_node0); + + CHECK_NOTNULL(scale_node0); + CHECK_EQ(static_cast(meta0->OutRankInfo().first), 0); + CHECK_EQ(static_cast(meta0->OutRankInfo().second), 0); + + // Node 1 + AutogradMeta* meta1 = EagerUtils::autograd_meta(&out1); + GradNodeBase* grad_node1 = meta1->GradNode(); + GradNodeScale* scale_node1 = dynamic_cast(grad_node1); + + CHECK_NOTNULL(scale_node1); + CHECK_EQ(static_cast(meta1->OutRankInfo().first), 0); + CHECK_EQ(static_cast(meta1->OutRankInfo().second), 0); + + // 2. TensorWrapper: No TensorWrapper for ScaleNode + // 3. NextEdges: Node 1 -> Node 0 + const std::vector>& node1_edges = grad_node1->GetEdges(); + const auto& node1_edge = node1_edges[0]; + + CHECK_EQ(static_cast(node1_edge[0].GetEdgeRankInfo().first), 0); + CHECK_EQ(static_cast(node1_edge[0].GetEdgeRankInfo().second), 0); + CHECK_EQ(node1_edge[0].GetGradNode(), grad_node0); + } +} + +/* + inp + | + Node0 + ____|____ + | | + Node1 Node2 + | | + out1 out2 +*/ +TEST(Forward, BranchedNodes) { + InitEnv(paddle::platform::CPUPlace()); + + // Prepare Inputs + std::vector target_tensors; + paddle::framework::DDim ddim = paddle::framework::make_ddim({4, 16, 16, 32}); + + // Create Target Tensor + egr::EagerTensor t = CreateTensorWithValue( + ddim, paddle::platform::CPUPlace(), pten::DataType::FLOAT32, + pten::DataLayout::NCHW, 5.0 /*value*/, false /*is_leaf*/); + target_tensors.emplace_back(std::move(t)); + egr::EagerTensor& tensor = target_tensors[0]; + EagerUtils::autograd_meta(&tensor)->SetStopGradient(false); + + // Run Forward Node 0 + float scale0 = 2.0; + float bias0 = 3.0; + egr::EagerTensor out0 = + egr::scale(tensor, scale0, bias0, true /*bias_after_scale*/, + true /*trace_backward*/); + + // Run Forward Node 1 + float scale1 = 5.0; + float bias1 = 10.0; + egr::EagerTensor out1 = egr::scale( + out0, scale1, bias1, true /*bias_after_scale*/, true /*trace_backward*/); + + // Run Forward Node 2 + float scale2 = 10.0; + float bias2 = 20.0; + egr::EagerTensor out2 = egr::scale( + out0, scale2, bias2, true /*bias_after_scale*/, true /*trace_backward*/); + + // Examine Forward Output 0 + CompareTensorWithValue(out0, 13.0); + + // Examine Forward Output 1 + CompareTensorWithValue(out1, 75.0); + + // Examine Forward Output 2 + CompareTensorWithValue(out2, 150.0); + + // Examine GradNode + { + // 1. GradNode + // Node 0 + AutogradMeta* meta0 = EagerUtils::autograd_meta(&out0); + GradNodeBase* grad_node0 = meta0->GradNode(); + GradNodeScale* scale_node0 = dynamic_cast(grad_node0); + + CHECK_NOTNULL(scale_node0); + CHECK_EQ(static_cast(meta0->OutRankInfo().first), 0); + CHECK_EQ(static_cast(meta0->OutRankInfo().second), 0); + + // Node 1 + AutogradMeta* meta1 = EagerUtils::autograd_meta(&out1); + GradNodeBase* grad_node1 = meta1->GradNode(); + GradNodeScale* scale_node1 = dynamic_cast(grad_node1); + + CHECK_NOTNULL(scale_node1); + CHECK_EQ(static_cast(meta1->OutRankInfo().first), 0); + CHECK_EQ(static_cast(meta1->OutRankInfo().second), 0); + + // Node 2 + AutogradMeta* meta2 = EagerUtils::autograd_meta(&out2); + GradNodeBase* grad_node2 = meta2->GradNode(); + GradNodeScale* scale_node2 = dynamic_cast(grad_node2); + + CHECK_NOTNULL(scale_node2); + CHECK_EQ(static_cast(meta2->OutRankInfo().first), 0); + CHECK_EQ(static_cast(meta2->OutRankInfo().second), 0); + + // 2. TensorWrapper: No TensorWrapper for ScaleNode + // 3. NextEdges + // Node 1 -> Node 0 + const std::vector>& node1_edges = grad_node1->GetEdges(); + const Edge& node1_edge = node1_edges[0][0]; + + CHECK_EQ(static_cast(node1_edge.GetEdgeRankInfo().first), 0); + CHECK_EQ(static_cast(node1_edge.GetEdgeRankInfo().second), 0); + CHECK_EQ(node1_edge.GetGradNode(), grad_node0); + + // Node 2 -> Node 0 + const std::vector>& node2_edges = grad_node2->GetEdges(); + const Edge& node2_edge = node2_edges[0][0]; + + CHECK_EQ(static_cast(node2_edge.GetEdgeRankInfo().first), 0); + CHECK_EQ(static_cast(node2_edge.GetEdgeRankInfo().second), 0); + CHECK_EQ(node2_edge.GetGradNode(), grad_node0); + } +} + +} // namespace eager_test -- GitLab