diff --git a/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt b/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt index f704d2a49184b9d4591177a62177ced075ea0bcf..fbd552ef00da70172bc35b0a0a4efbf9204ff126 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt +++ b/paddle/fluid/eager/api/generated/eager_generated/backwards/CMakeLists.txt @@ -7,6 +7,6 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) cc_library( final_dygraph_node SRCS nodes.cc - DEPS ${eager_deps}) + DEPS ${eager_deps} ${eager_manual_nodes}) add_dependencies(final_dygraph_node eager_final_state_codegen) endif() diff --git a/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt b/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt index 8d6df647999bd00317816c51c0c3296861096686..66053baa5813b2467b334ac58e7318b089712b44 100644 --- a/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt +++ b/paddle/fluid/eager/api/generated/eager_generated/forwards/CMakeLists.txt @@ -7,6 +7,6 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER)) cc_library( final_dygraph_function SRCS dygraph_functions.cc - DEPS ${eager_deps}) + DEPS ${eager_deps} ${eager_manual_functions}) add_dependencies(final_dygraph_function eager_final_state_codegen) endif() diff --git a/paddle/fluid/eager/api/manual/CMakeLists.txt b/paddle/fluid/eager/api/manual/CMakeLists.txt index ebfcaad2eeac736726fb5bc190972cf2b2c1a123..e6db90ccc5bbe59e3a24553ac9b2d75a6b084082 100644 --- a/paddle/fluid/eager/api/manual/CMakeLists.txt +++ b/paddle/fluid/eager/api/manual/CMakeLists.txt @@ -6,4 +6,11 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER)) set(fluid_manual_nodes ${fluid_manual_nodes} PARENT_SCOPE) + add_subdirectory(eager_manual) + set(eager_manual_functions + ${eager_manual_functions} + PARENT_SCOPE) + set(eager_manual_nodes + ${eager_manual_nodes} + PARENT_SCOPE) endif() diff --git a/paddle/fluid/eager/api/manual/eager_manual/CMakeLists.txt b/paddle/fluid/eager/api/manual/eager_manual/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..09420f368507da7a552b43f760ac981b4f397156 --- /dev/null +++ b/paddle/fluid/eager/api/manual/eager_manual/CMakeLists.txt @@ -0,0 +1,8 @@ +add_subdirectory(forwards) +add_subdirectory(nodes) +set(eager_manual_functions + ${eager_manual_functions} + PARENT_SCOPE) +set(eager_manual_nodes + ${eager_manual_nodes} + PARENT_SCOPE) diff --git a/paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h b/paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h new file mode 100644 index 0000000000000000000000000000000000000000..0f06831068161797519134c565982ec0c6da02f0 --- /dev/null +++ b/paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h @@ -0,0 +1,30 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/api/include/tensor.h" + +paddle::experimental::Tensor conv2d_final_state_dygraph_function( + const paddle::experimental::Tensor& input, + const paddle::experimental::Tensor& filter, + std::vector strides, + std::vector paddings, + std::string paddding_algorithm, + int groups, + std::vector dilations, + std::string data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search); diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/CMakeLists.txt b/paddle/fluid/eager/api/manual/eager_manual/forwards/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..0ed2f26c0b25554ac2147e747c05bfb2e97264ae --- /dev/null +++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/CMakeLists.txt @@ -0,0 +1,10 @@ +cc_library( + conv2d_fwd_function + SRCS conv2d_fwd_function.cc + DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS}) + +add_dependencies(conv2d_fwd_function eager_codegen) + +set(eager_manual_functions + conv2d_fwd_function + PARENT_SCOPE) diff --git a/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc b/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc new file mode 100644 index 0000000000000000000000000000000000000000..f7bff6fb8899738c7e777b0a72b5ff1a55257a7f --- /dev/null +++ b/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc @@ -0,0 +1,153 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/fluid/eager/amp_utils.h" +#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h" +#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h" +#include "paddle/fluid/eager/api/utils/global_utils.h" +#include "paddle/fluid/eager/eager_amp_auto_cast.h" +#include "paddle/fluid/eager/nan_inf_utils.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" + +#pragma GCC diagnostic ignored "-Wunused-variable" +DECLARE_bool(check_nan_inf); + +paddle::experimental::Tensor conv2d_final_state_dygraph_function( + const paddle::experimental::Tensor& input, + const paddle::experimental::Tensor& filter, + std::vector strides, + std::vector paddings, + std::string paddding_algorithm, + int groups, + std::vector dilations, + std::string data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search) { + // Dygraph Record Event + paddle::platform::RecordEvent dygraph_entrance_record_event( + "conv2d dygraph", paddle::platform::TracerEventType::Operator, 1); + + // AMP Logic + if (egr::Controller::Instance().GetAMPLevel() != + paddle::imperative::AmpLevel::O0) { + VLOG(5) << "Check and Prepare For AMP"; + auto op_name = phi::TransToFluidOpName("conv2d"); + paddle::small_vector, + egr::kSlotSmallVectorSize> + amp_tensors_vector = {{input}, {filter}}; + + auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector); + + auto NEW_input = + egr::EagerAmpAutoCast("input", input, amp_dst_dtype, op_name); + auto NEW_filter = + egr::EagerAmpAutoCast("filter", filter, amp_dst_dtype, op_name); + + { + paddle::imperative::AutoCastGuard guard( + egr::Controller::Instance().GetCurrentTracer(), + paddle::imperative::AmpLevel::O0); + return conv2d_final_state_dygraph_function(NEW_input, + NEW_filter, + strides, + paddings, + paddding_algorithm, + groups, + dilations, + data_format, + use_addto, + workspace_size_MB, + exhaustive_search); + } + } + + // Get Input AutoGradMeta + egr::AutogradMeta* input_autograd_meta = + egr::EagerUtils::nullable_autograd_meta(input); + egr::AutogradMeta* filter_autograd_meta = + egr::EagerUtils::nullable_autograd_meta(filter); + // Forward API Call + VLOG(3) << "Final State Running: " + << "conv2d_final_state_dygraph_function"; + auto api_result = paddle::experimental::conv2d(input, + filter, + strides, + paddings, + paddding_algorithm, + groups, + dilations, + data_format, + use_addto, + workspace_size_MB, + exhaustive_search); + // Check NaN and Inf if needed + if (FLAGS_check_nan_inf) { + egr::CheckTensorHasNanOrInf("conv2d", api_result); + } + + // Get Outputs + auto& out = api_result; + + // Get Output AutoGradMeta + egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out); + bool trace_backward = egr::Controller::Instance().HasGrad(); + bool require_any_grad = egr::EagerUtils::ComputeRequireGrad( + trace_backward, input_autograd_meta, filter_autograd_meta); + + // Check Inplace if needed + + // Node Creation + if (require_any_grad) { + paddle::platform::RecordEvent node_creation_record_event( + "conv2d node_creation", + paddle::platform::TracerEventType::OperatorInner, + 1); + + egr::EagerUtils::PassStopGradient(false, out_autograd_meta); + + // Node Construction + auto grad_node = + std::shared_ptr(new Conv2dGradNodeFinal(1, 2)); + // SetAttributes if needed + grad_node->SetAttributestrides(strides); + grad_node->SetAttributepaddings(paddings); + grad_node->SetAttributepaddding_algorithm(paddding_algorithm); + grad_node->SetAttributegroups(groups); + grad_node->SetAttributedilations(dilations); + grad_node->SetAttributedata_format(data_format); + grad_node->SetAttributeuse_addto(use_addto); + grad_node->SetAttributeworkspace_size_MB(workspace_size_MB); + grad_node->SetAttributeexhaustive_search(exhaustive_search); + // Set TensorWrappers for Forward Inputs if needed + grad_node->SetTensorWrapperinput(input); + grad_node->SetTensorWrapperfilter(filter); + // SetGradOutMeta & SetEdges + grad_node->SetGradOutMeta(input, 0); + grad_node->SetGradOutMeta(filter, 1); + // SetOutRank & SetHistory & SetGradInMeta & RetainGrad + if (out_autograd_meta) { + egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0); + } + if (out_autograd_meta) { + egr::EagerUtils::SetHistory(out_autograd_meta, grad_node); + } + grad_node->SetGradInMeta(out, 0); + egr::EagerUtils::CheckAndRetainGrad(out); + // Set TensorWrappers for Forward Outputs if needed + } + + // Returns + return out; +} diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/CMakeLists.txt b/paddle/fluid/eager/api/manual/eager_manual/nodes/CMakeLists.txt new file mode 100644 index 0000000000000000000000000000000000000000..21642fbd6495c9534b79a354811bfecf730bfb62 --- /dev/null +++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/CMakeLists.txt @@ -0,0 +1,8 @@ +cc_library( + conv2d_nodes + SRCS conv2d_nodes.cc + DEPS ${eager_deps} ${fluid_deps}) + +set(eager_manual_nodes + conv2d_nodes + PARENT_SCOPE) diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc b/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc new file mode 100644 index 0000000000000000000000000000000000000000..ce8d647cb9ece3777036645e25e0fc0ed96f19ed --- /dev/null +++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc @@ -0,0 +1,308 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "glog/logging.h" +#include "paddle/fluid/eager/api/utils/global_utils.h" +#include "paddle/fluid/eager/nan_inf_utils.h" +#include "paddle/fluid/eager/to_static/run_program_op_node.h" +#include "paddle/fluid/eager/utils.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/imperative/tracer.h" +#include "paddle/fluid/platform/profiler/event_tracing.h" +#include "paddle/phi/api/all.h" +#include "paddle/phi/api/backward/backward_api.h" +#include "paddle/phi/api/backward/sparse_bw_api.h" + +#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h" +#include "paddle/phi/api/include/sparse_api.h" +DECLARE_bool(check_nan_inf); + +paddle::small_vector, + egr::kSlotSmallVectorSize> +Conv2dGradNodeFinal::operator()( + paddle::small_vector, + egr::kSlotSmallVectorSize>& grads, + bool create_graph, + bool is_new_grad) { + // Fill Zero For GradIn Tensors + VLOG(3) << " Running Conv2dGradNodeFinal: " << this; + // Apply Gradient Hooks + auto hooked_grads = ApplyGradientHooks(grads); + + // Collect GradIn Tensors, Attrs and Recovered TensorWrappers + auto input = egr::EagerUtils::RecoverTensorWrapper(&this->input_); + auto filter = egr::EagerUtils::RecoverTensorWrapper(&this->filter_); + auto& grad_out = hooked_grads[0][0]; + auto& strides = this->strides_; + auto& paddings = this->paddings_; + auto& paddding_algorithm = this->paddding_algorithm_; + auto& groups = this->groups_; + auto& dilations = this->dilations_; + auto& data_format = this->data_format_; + auto& use_addto = this->use_addto_; + auto& workspace_size_MB = this->workspace_size_MB_; + auto& exhaustive_search = this->exhaustive_search_; + // Prepare Grad function call + + const auto& out_metas = OutputMeta(); + paddle::small_vector, + egr::kSlotSmallVectorSize> + returns(2); + for (int i = 0; i < 2; ++i) { + out_metas[i].size() == 0 ? returns[i].resize(1) + : returns[i].resize(out_metas[i].size()); + } + + auto* api_output_0 = + (out_metas[0].empty() || out_metas[0][0].IsStopGradient()) + ? nullptr + : &returns[0][0]; + auto* api_output_1 = + (out_metas[1].empty() || out_metas[1][0].IsStopGradient()) + ? nullptr + : &returns[1][0]; + // Runtime check if we need next grad + bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph; + + // Inplace Check + + // Inplace Strategy + + // Call grad_api function + VLOG(3) << "Final State Running: Conv2dGradNodeFinal"; + + paddle::experimental::conv2d_grad(input, + filter, + grad_out, + strides, + paddings, + paddding_algorithm, + groups, + dilations, + data_format, + use_addto, + workspace_size_MB, + exhaustive_search, + api_output_0, + api_output_1); + // Check NaN and Inf id needed + if (FLAGS_check_nan_inf) { + egr::CheckTensorHasNanOrInf("conv2d_grad", returns); + } + + // Get GradOut autograd_meta + + auto& grad_input = returns[0][0]; + egr::AutogradMeta* grad_input_autograd_meta = + returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_input) + : nullptr; + if (grad_input_autograd_meta) + grad_input_autograd_meta->SetStopGradient(false); + VLOG(3) << "Conv2dGradNodeFinal grad_input_autograd_meta: " + << grad_input_autograd_meta; + + auto& grad_filter = returns[1][0]; + egr::AutogradMeta* grad_filter_autograd_meta = + returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_filter) + : nullptr; + if (grad_filter_autograd_meta) + grad_filter_autograd_meta->SetStopGradient(false); + VLOG(3) << "Conv2dGradNodeFinal grad_filter_autograd_meta: " + << grad_filter_autograd_meta; + + // Create Grad Node + if (trace_backward) { + paddle::platform::RecordEvent node_creation_record_event( + "conv2d_grad node_creation", + paddle::platform::TracerEventType::OperatorInner, + 1); + + // Node Construction + auto grad_node = std::shared_ptr( + new Conv2dDoubleGradNodeFinal(2, 3)); + // SetAttributes if needed + grad_node->SetAttributestrides(strides); + grad_node->SetAttributepaddings(paddings); + grad_node->SetAttributepaddding_algorithm(paddding_algorithm); + grad_node->SetAttributegroups(groups); + grad_node->SetAttributedilations(dilations); + grad_node->SetAttributedata_format(data_format); + grad_node->SetAttributeuse_addto(use_addto); + grad_node->SetAttributeworkspace_size_MB(workspace_size_MB); + grad_node->SetAttributeexhaustive_search(exhaustive_search); + // Set TensorWrappers for Forward Inputs if needed + grad_node->SetTensorWrapperinput(input); + grad_node->SetTensorWrapperfilter(filter); + grad_node->SetTensorWrappergrad_out(grad_out); + // SetGradOutMeta & SetEdges + if (grad_filter_autograd_meta) { + grad_node->SetGradOutMeta(input, 0); + } + if (grad_input_autograd_meta) { + grad_node->SetGradOutMeta(filter, 1); + grad_node->SetGradOutMeta(grad_out, 2); + } + // SetOutRank & SetHistory & SetGradInMeta & RetainGrad + if (grad_input_autograd_meta) { + egr::EagerUtils::SetOutRankWithSlot(grad_input_autograd_meta, 0); + } + if (grad_filter_autograd_meta) { + egr::EagerUtils::SetOutRankWithSlot(grad_filter_autograd_meta, 1); + } + if (grad_input_autograd_meta) { + egr::EagerUtils::SetHistory(grad_input_autograd_meta, grad_node); + } + if (grad_filter_autograd_meta) { + egr::EagerUtils::SetHistory(grad_filter_autograd_meta, grad_node); + } + grad_node->SetGradInMeta(grad_input, 0); + grad_node->SetGradInMeta(grad_filter, 1); + egr::EagerUtils::CheckAndRetainGrad(grad_input); + egr::EagerUtils::CheckAndRetainGrad(grad_filter); + // Set TensorWrappers for Forward Outputs if needed + } + + // Return + if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns); + return returns; +} + +paddle::small_vector, + egr::kSlotSmallVectorSize> +Conv2dDoubleGradNodeFinal::operator()( + paddle::small_vector, + egr::kSlotSmallVectorSize>& grads, + bool create_graph, + bool is_new_grad) { + // Fill Zero For GradIn Tensors + const auto& input_metas = this->InputMeta(); + egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[0][0], + input_metas[0][0]); + egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[1][0], + input_metas[1][0]); + + // Apply Gradient Hooks + auto hooked_grads = ApplyGradientHooks(grads); + + // Collect GradIn Tensors, Attrs and Recovered TensorWrappers + auto input = egr::EagerUtils::RecoverTensorWrapper(&this->input_); + auto filter = egr::EagerUtils::RecoverTensorWrapper(&this->filter_); + auto grad_out = egr::EagerUtils::RecoverTensorWrapper(&this->grad_out_); + auto& grad_input_grad = hooked_grads[0][0]; + + paddle::optional grad_input_grad_optional; + if (grad_input_grad.initialized()) + grad_input_grad_optional = + paddle::make_optional(grad_input_grad); + + auto& grad_filter_grad = hooked_grads[1][0]; + + paddle::optional grad_filter_grad_optional; + if (grad_filter_grad.initialized()) + grad_filter_grad_optional = + paddle::make_optional(grad_filter_grad); + + auto& strides = this->strides_; + auto& paddings = this->paddings_; + auto& paddding_algorithm = this->paddding_algorithm_; + auto& groups = this->groups_; + auto& dilations = this->dilations_; + auto& data_format = this->data_format_; + auto& use_addto = this->use_addto_; + auto& workspace_size_MB = this->workspace_size_MB_; + auto& exhaustive_search = this->exhaustive_search_; + // Prepare Grad function call + + const auto& out_metas = OutputMeta(); + paddle::small_vector, + egr::kSlotSmallVectorSize> + returns(3); + for (int i = 0; i < 3; ++i) { + out_metas[i].size() == 0 ? returns[i].resize(1) + : returns[i].resize(out_metas[i].size()); + } + + auto* api_output_0 = + (out_metas[0].empty() || out_metas[0][0].IsStopGradient()) + ? nullptr + : &returns[0][0]; + auto* api_output_1 = + (out_metas[1].empty() || out_metas[1][0].IsStopGradient()) + ? nullptr + : &returns[1][0]; + auto* api_output_2 = + (out_metas[2].empty() || out_metas[2][0].IsStopGradient()) + ? nullptr + : &returns[2][0]; + // Runtime check if we need next grad + + // Inplace Check + + // Inplace Strategy + + // Call grad_api function + VLOG(3) << "Final State Running: Conv2dGradGradNodeFinal"; + + paddle::experimental::conv2d_grad_grad(input, + filter, + grad_out, + grad_input_grad_optional, + grad_filter_grad_optional, + strides, + paddings, + paddding_algorithm, + groups, + dilations, + data_format, + use_addto, + workspace_size_MB, + exhaustive_search, + api_output_0, + api_output_1, + api_output_2); + // Check NaN and Inf id needed + if (FLAGS_check_nan_inf) { + egr::CheckTensorHasNanOrInf("conv2d_grad_grad", returns); + } + + // Get GradOut autograd_meta + + auto& input_grad = returns[0][0]; + egr::AutogradMeta* input_grad_autograd_meta = + returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&input_grad) + : nullptr; + if (input_grad_autograd_meta) + input_grad_autograd_meta->SetStopGradient(false); + + auto& filter_grad = returns[1][0]; + egr::AutogradMeta* filter_grad_autograd_meta = + returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&filter_grad) + : nullptr; + if (filter_grad_autograd_meta) + filter_grad_autograd_meta->SetStopGradient(false); + + auto& grad_out_grad = returns[2][0]; + egr::AutogradMeta* grad_out_grad_autograd_meta = + returns[2][0].initialized() + ? egr::EagerUtils::autograd_meta(&grad_out_grad) + : nullptr; + if (grad_out_grad_autograd_meta) + grad_out_grad_autograd_meta->SetStopGradient(false); + + // Create Grad Node + + // Return + if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns); + return returns; +} diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h b/paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h new file mode 100644 index 0000000000000000000000000000000000000000..f202b64f0b7096d872905a10a629b1f68feaaa85 --- /dev/null +++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h @@ -0,0 +1,182 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include "paddle/fluid/eager/grad_node_info.h" +#include "paddle/fluid/eager/tensor_wrapper.h" + +class Conv2dGradNodeFinal : public egr::GradNodeBase { + public: + Conv2dGradNodeFinal() : egr::GradNodeBase() {} + Conv2dGradNodeFinal(size_t bwd_in_slot_num, size_t bwd_out_slot_num) + : egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {} + ~Conv2dGradNodeFinal() override = default; + + virtual paddle::small_vector, + egr::kSlotSmallVectorSize> + operator()( + paddle::small_vector, // NOLINT + egr::kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, // NOLINT + bool is_new_grad = false) override; // NOLINT + std::string name() override { return "Conv2dGradNodeFinal"; } + + void ClearTensorWrappers() override { + input_.clear(); + filter_.clear(); + + SetIsTensorWrappersCleared(true); + } + + std::shared_ptr Copy() const override { + auto copied_node = + std::shared_ptr(new Conv2dGradNodeFinal(*this)); + VLOG(3) << "Copy Conv2dGradNodeFinal: " << this + << " to: " << copied_node.get(); + return copied_node; + } + + // SetTensorWrapperX, SetTensorWrapperY, ... + void SetTensorWrapperinput(const paddle::experimental::Tensor& input) { + input_ = egr::TensorWrapper(input, false); + } + void SetTensorWrapperfilter(const paddle::experimental::Tensor& filter) { + filter_ = egr::TensorWrapper(filter, false); + } + + // SetAttributes + void SetAttributestrides(const std::vector& strides) { + strides_ = strides; + } + void SetAttributepaddings(const std::vector& paddings) { + paddings_ = paddings; + } + void SetAttributepaddding_algorithm(const std::string& paddding_algorithm) { + paddding_algorithm_ = paddding_algorithm; + } + void SetAttributegroups(const int& groups) { groups_ = groups; } + void SetAttributedilations(const std::vector& dilations) { + dilations_ = dilations; + } + void SetAttributedata_format(const std::string& data_format) { + data_format_ = data_format; + } + void SetAttributeuse_addto(const bool& use_addto) { use_addto_ = use_addto; } + void SetAttributeworkspace_size_MB(const int& workspace_size_MB) { + workspace_size_MB_ = workspace_size_MB; + } + void SetAttributeexhaustive_search(const bool& exhaustive_search) { + exhaustive_search_ = exhaustive_search; + } + + private: + // TensorWrappers + egr::TensorWrapper input_; + egr::TensorWrapper filter_; + + // Attributes + std::vector strides_; + std::vector paddings_; + std::string paddding_algorithm_; + int groups_; + std::vector dilations_; + std::string data_format_; + bool use_addto_; + int workspace_size_MB_; + bool exhaustive_search_; +}; + +class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase { + public: + Conv2dDoubleGradNodeFinal() : egr::GradNodeBase() {} + Conv2dDoubleGradNodeFinal(size_t bwd_in_slot_num, size_t bwd_out_slot_num) + : egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {} + ~Conv2dDoubleGradNodeFinal() override = default; + + virtual paddle::small_vector, + egr::kSlotSmallVectorSize> + operator()( + paddle::small_vector, // NOLINT + egr::kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, // NOLINT + bool is_new_grad = false) override; // NOLINT + std::string name() override { return "Conv2dDoubleGradNodeFinal"; } + + void ClearTensorWrappers() override { + input_.clear(); + filter_.clear(); + grad_out_.clear(); + + SetIsTensorWrappersCleared(true); + } + + std::shared_ptr Copy() const override { + auto copied_node = std::shared_ptr( + new Conv2dDoubleGradNodeFinal(*this)); + return copied_node; + } + + // SetTensorWrapperX, SetTensorWrapperY, ... + void SetTensorWrapperinput(const paddle::experimental::Tensor& input) { + input_ = egr::TensorWrapper(input, false); + } + void SetTensorWrapperfilter(const paddle::experimental::Tensor& filter) { + filter_ = egr::TensorWrapper(filter, false); + } + void SetTensorWrappergrad_out(const paddle::experimental::Tensor& grad_out) { + grad_out_ = egr::TensorWrapper(grad_out, false); + } + + // SetAttributes + void SetAttributestrides(const std::vector& strides) { + strides_ = strides; + } + void SetAttributepaddings(const std::vector& paddings) { + paddings_ = paddings; + } + void SetAttributepaddding_algorithm(const std::string& paddding_algorithm) { + paddding_algorithm_ = paddding_algorithm; + } + void SetAttributegroups(const int& groups) { groups_ = groups; } + void SetAttributedilations(const std::vector& dilations) { + dilations_ = dilations; + } + void SetAttributedata_format(const std::string& data_format) { + data_format_ = data_format; + } + void SetAttributeuse_addto(const bool& use_addto) { use_addto_ = use_addto; } + void SetAttributeworkspace_size_MB(const int& workspace_size_MB) { + workspace_size_MB_ = workspace_size_MB; + } + void SetAttributeexhaustive_search(const bool& exhaustive_search) { + exhaustive_search_ = exhaustive_search; + } + + private: + // TensorWrappers + egr::TensorWrapper input_; + egr::TensorWrapper filter_; + egr::TensorWrapper grad_out_; + + // Attributes + std::vector strides_; + std::vector paddings_; + std::string paddding_algorithm_; + int groups_; + std::vector dilations_; + std::string data_format_; + bool use_addto_; + int workspace_size_MB_; + bool exhaustive_search_; +}; diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index d406f00b250396bdb2718d827ea89cc225e9a905..a6f5a36e389a921c5db44f69aece80e8b9ad8a0e 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -40,6 +40,8 @@ from codegen_utils import AssertMessage, GetIndent # keeping the code compatible, here we also skip inplace check in new dygraph temporarily, # and this will be fixed in the futrue. inplace_check_blacklist = set(["assign_out_"]) +# # --- Black Ops list that's NO NEED to apply backward code generation +black_ops_list = ["conv2d", "conv2d_grad", "conv2d_grad_grad"] ########### @@ -154,9 +156,7 @@ paddle::small_vector, egr::kSlotSmallV {} // Prepare Grad function call {} - // Get GradIn autograd_meta -{} - // Compute Require Grad + // Runtime check if we need next grad {} // Inplace Check {} @@ -229,6 +229,27 @@ FORWARD_BODY_TEMPLATE = \ }} """ +HIHGER_ORDER_DERIVATIVE_VALUE_TEMPLATE = \ +""" if(trace_backward) {{ +{} + // Node Construction +{} + // SetAttributes if needed +{} + // Set TensorWrappers for Forward Inputs if needed +{} + // SetGradOutMeta & SetEdges +{} + // SetOutRank & SetHistory & SetGradInMeta & RetainGrad +{} +{} +{} +{} + // Set TensorWrappers for Forward Outputs if needed +{} + }} +""" + NAMESPACE_WRAPPER_TEMPLATE = \ """ namespace {} {{ @@ -252,7 +273,7 @@ NODE_CC_FILE_TEMPLATE = \ #include "paddle/fluid/eager/nan_inf_utils.h" #include "paddle/phi/api/include/sparse_api.h" - +#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h" DECLARE_bool(check_nan_inf); {} """ @@ -279,7 +300,7 @@ FORWARD_CC_FILE_TEMPLATE = \ #include "paddle/fluid/eager/eager_amp_auto_cast.h" #include "paddle/phi/backends/gpu/gpu_info.h" #include "paddle/fluid/eager/nan_inf_utils.h" - +#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h" DECLARE_bool(check_nan_inf); {} {} @@ -294,7 +315,7 @@ FORWARD_H_FILE_TEMPLATE = \ #include "paddle/fluid/eager/utils.h" #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/eager/to_static/run_program_op_func.h" - +#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h" {} {} """ @@ -584,7 +605,6 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): self.backward_api_name = forward_api_contents['backward'] self.backward_forward_str = grad_api_contents['forward'] - backward_args_str = grad_api_contents['args'] backward_returns_str = grad_api_contents['output'] @@ -663,7 +683,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): backward_output_pos ] - def GenerateNodeCreationCodes(self): + def GenerateNodeCreationCodes(self, for_backward=False): forward_api_name = self.forward_api_name forward_inputs_position_map = self.forward_inputs_position_map forward_outputs_position_map = self.forward_outputs_position_map @@ -794,13 +814,21 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): node_event_name = forward_api_name + " node_creation" node_creation_event_str = f"{indent}paddle::platform::RecordEvent node_creation_record_event(\"{node_event_name}\", paddle::platform::TracerEventType::OperatorInner, 1);\n" + if not for_backward: + self.node_creation_str = FORWARD_BODY_TEMPLATE.format( + node_creation_event_str, pass_stop_gradient_args_str, + node_construction_str, set_attributes_str, + set_input_tensor_wrappers_str, set_grad_out_meta_str, + set_out_rank_str, set_history_str, set_grad_in_meta_str, + set_retain_grad_str, set_output_tensor_wrappers_str) + else: + self.node_creation_str = HIHGER_ORDER_DERIVATIVE_VALUE_TEMPLATE.format( + node_creation_event_str, node_construction_str, + set_attributes_str, set_input_tensor_wrappers_str, + set_grad_out_meta_str, set_out_rank_str, set_history_str, + set_grad_in_meta_str, set_retain_grad_str, + set_output_tensor_wrappers_str) - self.node_creation_str = FORWARD_BODY_TEMPLATE.format( - node_creation_event_str, pass_stop_gradient_args_str, - node_construction_str, set_attributes_str, - set_input_tensor_wrappers_str, set_grad_out_meta_str, - set_out_rank_str, set_history_str, set_grad_in_meta_str, - set_retain_grad_str, set_output_tensor_wrappers_str) self.grad_node_out_list = grad_node_out_list def run(self): @@ -1234,7 +1262,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): next_node_generator = DygraphFunctionGeneratorBase( forward_api_contents, backward_api_contents, namespace) next_node_generator.run() - next_node_generator.GenerateNodeCreationCodes() + next_node_generator.GenerateNodeCreationCodes(True) next_grad_node_creation_str = next_node_generator.node_creation_str next_grad_node_out_list = next_node_generator.grad_node_out_list @@ -1342,6 +1370,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): inplace_grad_input_str = "" inplaced_tensor_wrapper = False inplace_check_str = "" + optional_inplace_var_name = [] # Grad Ins from TensorWrappers for name, (_, is_fwd_input, grad_api_position), in backward_forward_inputs_map.items(): @@ -1351,6 +1380,13 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): is_optional = (name in self.optional_inputs) tensor_wrapper_recover_str = f"{indent}auto {transformed_tensor_name} = egr::EagerUtils::RecoverTensorWrapper(&this->{tensor_wrapper_name});" if backward_inplace_map and name in backward_inplace_map.keys(): + if len(next_grad_node_creation_str) > 0: + if (transformed_tensor_name + in backward_forward_inputs_map_next) and ( + backward_forward_inputs_map_next[ + transformed_tensor_name][1]): + optional_inplace_var_name.append( + transformed_tensor_name) tensor_wrapper_intermidiate_tensor_str = f"(&this->{tensor_wrapper_name})->get_intermidiate_tensor()" inplace_check_str += CHECK_BACKWARD_INPLACE_TEMPLATE.format( transformed_tensor_name, transformed_tensor_name, name, @@ -1371,7 +1407,6 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): get_grad_in_args_list.append(tensor_wrapper_recover_str) - optional_inplace_check = False # Grad Ins from grads for name, (ttype, fwd_position, grad_api_position) in backward_grad_inputs_map.items(): @@ -1388,7 +1423,8 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): in backward_forward_inputs_map_next) and ( backward_forward_inputs_map_next[ transformed_tensor_name][1]): - optional_inplace_check = False + optional_inplace_var_name.append( + transformed_tensor_name) grads_tensor_str = f"grads[{fwd_position}][0]" inplace_check_str += CHECK_BACKWARD_INPLACE_TEMPLATE.format( transformed_tensor_name, transformed_tensor_name, name, @@ -1441,25 +1477,25 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): transformed_tensor_name = self.TransformToNextGradName(name) out_index = out_index + 1 grad_api_args.append(f"api_output_{out_index}") - if not optional_inplace_check: - optional_inplace_str = "VLOG(6) << \"No Inplace should happend for wrappered input\";" + if inplace_grad_input_str in optional_inplace_var_name: + optional_inplace_str = "VLOG(6) << \"No Inplace should happend for wrappered input: {inplace_grad_input_str}\";" else: optional_inplace_str = f"""if (api_output_{out_index} != nullptr && can_be_inplaced) {{ - egr::EagerUtils::HandleViewBetweenInputAndOutput({inplace_grad_input_str}, api_output_{out_index}); - }}""" + egr::EagerUtils::HandleViewBetweenInputAndOutput({inplace_grad_input_str}, api_output_{out_index}); + }}""" if IsPlainTensorType(ttype): if backward_inplace_map and name in backward_inplace_map.values( ): - inplace_str = f"""if (api_output_{out_index} != nullptr && can_be_inplaced) {{ - egr::EagerUtils::HandleViewBetweenInputAndOutput({inplace_grad_input_str}, api_output_{out_index}); - }}""" + inplace_str = f""" if (api_output_{out_index} != nullptr && can_be_inplaced) {{ + egr::EagerUtils::HandleViewBetweenInputAndOutput({inplace_grad_input_str}, api_output_{out_index}); + }}""" if len(next_grad_node_creation_str) > 0: inplace_for_grad_outs_str += f""" - if (!require_any_grad) {{ - {inplace_str} - }}else{{ + if (trace_backward) {{ {optional_inplace_str} + }} else {{ + {inplace_str} }}""" else: inplace_for_grad_outs_str += inplace_str @@ -1490,84 +1526,53 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): backward_api_name, "returns") # Prepare for Node Creation if Necessary - inputs_autograd_meta_str = "" outputs_autograd_meta_str = "" - compute_require_grad_str = "" + compute_require_next_grad_str = "" if len(next_grad_node_creation_str) > 0: - # 1. Get Grad Input AutoGradMeta - inputs_autograd_meta_list = [] - compute_require_grad_args_list = ["trace_backward"] - for name, (ttype, pos, - grad_api_position) in backward_grad_inputs_map.items(): - transformed_tensor_name = self.TransformToNextGradName(name) - if transformed_tensor_name in next_grad_node_out_list: - input_autograd_meta_name = GetAutoGradMetaName( - transformed_tensor_name) - if IsPlainTensorType(ttype): - input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});" - else: - assert IsVectorTensorType(ttype) - input_autograd_meta_vec_name = GetAutoGradMetaVectorName( - transformed_tensor_name) - input_autograd_meta = f"{indent}std::vector {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n" - input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" - - inputs_autograd_meta_list.append(input_autograd_meta) - compute_require_grad_args_list.append( - input_autograd_meta_name) - - # 2. Get TensorWrapper AutoGradMeta - for name, (ttype, _, pos), in backward_forward_inputs_map.items(): - transformed_tensor_name = self.TransformToNextGradName(name) - if transformed_tensor_name in next_grad_node_out_list: - input_autograd_meta_name = GetAutoGradMetaName( - transformed_tensor_name) - if IsPlainTensorType(ttype): - input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});" - else: - assert IsVectorTensorType(ttype) - input_autograd_meta_vec_name = GetAutoGradMetaVectorName( - transformed_tensor_name) - input_autograd_meta = f"{indent}std::vector {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n" - input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" - - inputs_autograd_meta_list.append(input_autograd_meta) - compute_require_grad_args_list.append( - input_autograd_meta_name) - - inputs_autograd_meta_str = "\n".join(inputs_autograd_meta_list) - compute_require_grad_args_str = ",".join( - compute_require_grad_args_list) - - # 3. Get Output AutoGradMeta - outputs_autograd_meta_list = [] - num_fwd_outputs = len(backward_grad_outputs_map.keys()) - for name, (rtype, pos, - grad_api_position) in backward_grad_outputs_map.items(): - transformed_tensor_name = self.TransformToNextGradName(name) - - output_autograd_meta_name = GetAutoGradMetaName( - transformed_tensor_name) - output_autograd_meta_vec_name = GetAutoGradMetaVectorName( - transformed_tensor_name) - if IsPlainTensorType(rtype): - output_autograd_meta = f""" + compute_require_next_grad_str = f"{indent}bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;\n" + + # 3. Get Output AutoGradMeta + outputs_autograd_meta_list = [] + # TODO(jiabin): Optimize this with SetStopGradient instead of Pass Stop gradient + + num_fwd_outputs = len(backward_grad_outputs_map.keys()) + for name, (rtype, pos, + grad_api_position) in backward_grad_outputs_map.items(): + transformed_tensor_name = self.TransformToNextGradName(name) + + output_autograd_meta_name = GetAutoGradMetaName( + transformed_tensor_name) + output_autograd_meta_vec_name = GetAutoGradMetaVectorName( + transformed_tensor_name) + if IsPlainTensorType(rtype): + output_autograd_meta = f""" auto& {transformed_tensor_name} = returns[{pos}][0]; - egr::AutogradMeta* {output_autograd_meta_name} = returns[{pos}][0].initialized() ? egr::EagerUtils::autograd_meta(&{transformed_tensor_name}) : nullptr;""" + egr::AutogradMeta* {output_autograd_meta_name} = returns[{pos}][0].initialized() ? egr::EagerUtils::autograd_meta(&{transformed_tensor_name}) : nullptr; + if ({output_autograd_meta_name}) {output_autograd_meta_name}->SetStopGradient(false); + """ + else: + assert IsVectorTensorType(rtype) + if len(next_grad_node_creation_str) > 0: + output_autograd_meta = f""" + auto& {transformed_tensor_name} = returns[{pos}]; + std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name}); + std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name}; + for(auto* meta : {output_autograd_meta_vec_name}){{ + meta->SetStopGradient(false); + }} +""" else: - assert IsVectorTensorType(rtype) output_autograd_meta = f""" - auto& {transformed_tensor_name} = returns[{pos}]; - std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name}); - std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name}; + auto& {transformed_tensor_name} = returns[{pos}]; + std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name}); + for(auto* meta : {output_autograd_meta_vec_name}){{ + meta->SetStopGradient(false); + }} """ + outputs_autograd_meta_list.append(output_autograd_meta) - outputs_autograd_meta_list.append(output_autograd_meta) - outputs_autograd_meta_str = "\n".join(outputs_autograd_meta_list) - - compute_require_grad_str = f"{indent}bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;\n" - compute_require_grad_str += f"{indent}bool require_any_grad = egr::EagerUtils::ComputeRequireGrad({compute_require_grad_args_str});" + outputs_autograd_meta_str = "\n".join(outputs_autograd_meta_list) returns_str = f"{indent}if(NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);\n" returns_str += f"{indent}return returns;\n" @@ -1576,11 +1581,10 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): self.node_definition_str = GRAD_FUNCTION_TEMPLATE.format( grad_node_name, fill_zero_str, get_grad_in_args_str, - grad_function_prepare_str, inputs_autograd_meta_str, - compute_require_grad_str, inplace_check_str, - inplace_for_grad_outs_str, grad_node_name, grad_function_call_str, - check_nan_inf_str, outputs_autograd_meta_str, - next_grad_node_creation_str, returns_str) + grad_function_prepare_str, compute_require_next_grad_str, + inplace_check_str, inplace_for_grad_outs_str, grad_node_name, + grad_function_call_str, check_nan_inf_str, + outputs_autograd_meta_str, next_grad_node_creation_str, returns_str) def run(self): super().run() @@ -1631,6 +1635,7 @@ class DygraphForwardAndNodesGenerator(GeneratorBase): if 'backward' not in forward_api_contents.keys(): return None backward_api_name = forward_api_contents['backward'] + if backward_api_name in black_ops_list: return None assert backward_api_name in grad_api_dict.keys(), AssertMessage( backward_api_name, grad_api_dict.keys()) backward_api_contents = grad_api_dict[backward_api_name] @@ -1646,7 +1651,7 @@ class DygraphForwardAndNodesGenerator(GeneratorBase): backward_api_contents = self.GetBackwardAPIContents( forward_api_contents) if backward_api_contents is None: continue - + if forward_api_contents['api'] in black_ops_list: continue # Generate Dygraph Forward Function function_generator = DygraphForwardFunctionGenerator( forward_api_contents, backward_api_contents, namespace) diff --git a/paddle/fluid/eager/backward.cc b/paddle/fluid/eager/backward.cc index 26165c59e0153e3d49dfa628f6c53e399b48bfdb..c4797029abf3c292fe737a870be6eb98a6686c88 100644 --- a/paddle/fluid/eager/backward.cc +++ b/paddle/fluid/eager/backward.cc @@ -52,7 +52,14 @@ class GeneralGrad { AutogradMeta* auto_grad_meta = EagerUtils::unsafe_autograd_meta(inputs[i]); auto* target_node = auto_grad_meta->GetMutableGradNode().get(); - + VLOG(8) << "Get no grad vars' grad_node: " << target_node->name() + << ", " << target_node << " with output rank info: " + << auto_grad_meta->OutRankInfo().first << ", " + << auto_grad_meta->OutRankInfo().second; + if (is_no_grad_vars) { + (no_grad_var_nodes_inputmeta_map_)[target_node] = auto_grad_meta; + continue; + } if (orig_to_copied_node_mapping_.count(target_node)) { target_node = orig_to_copied_node_mapping_[target_node].get(); } else { @@ -67,11 +74,8 @@ class GeneralGrad { "stop_gradient=True.", msg, i)); - if (is_no_grad_vars) { - (no_grad_var_nodes_inputmeta_map_)[target_node] = auto_grad_meta; - } else { // normal input - (input_target_nodes_inputmeta_map_)[target_node] = auto_grad_meta; - } + // normal input + (input_target_nodes_inputmeta_map_)[target_node] = auto_grad_meta; } } } @@ -305,8 +309,6 @@ class GeneralGrad { const std::unordered_map>& node_input_buffers_dict) { - // Get no_grad_vars's GradNodes and InputMeta Info - GetTargetNodesInfo(no_grad_vars, true /* is_no_grad_vars */); // Get inputs's GradNodes and InputMeta Info GetTargetNodesInfo(inputs, false /* is_no_grad_vars */); // Purify potentialstartup_ops, remove those nodes that are the same as @@ -402,6 +404,21 @@ class GeneralGrad { std::shared_ptr orig_next_node = orig_edge.GetMutableGradNode(); + + if (no_grad_var_nodes_inputmeta_map_.count(orig_next_node.get()) && + (no_grad_var_nodes_inputmeta_map_[orig_next_node.get()] + ->OutRankInfo() == orig_edge.GetEdgeRankInfo())) { + VLOG(3) << "Get no grad edge from grad_node: " << orig_node->name() + << " : " << orig_node << " to:" << orig_next_node->name() + << ", " << orig_next_node.get() + << " with output rank info: " + << orig_edge.GetEdgeRankInfo().first << ", " + << orig_edge.GetEdgeRankInfo().second; + // Stop no grad var's preceding node + copied_node->MutableOutputMeta()[i][j].SetStopGradient(true); + copied_edge.Clear(); + continue; + } if (!orig_next_node) continue; // Copy Next Node @@ -638,6 +655,9 @@ std::vector RunBackward( } if (is_general_grad) { + // Get no_grad_vars's GradNodes and InputMeta Info + GeneralGrad::Instance().GetTargetNodesInfo(no_grad_vars, + true /* is_no_grad_vars */); // Copy Backward Graph GeneralGrad::Instance().ReconstructBackwardGraph(orig_queue); } @@ -696,19 +716,6 @@ std::vector RunBackward( node); } - // no_grad_vars - if (!no_grad_vars.empty() && is_general_grad) { - auto iter = - GeneralGrad::Instance().GetNoGradVarNodesInputMetaMap()->find(node); - if (iter != - GeneralGrad::Instance().GetNoGradVarNodesInputMetaMap()->end()) { - VLOG(6) << "Change the input buffer[slot][rank] by Zeros"; - auto rank_info = (iter->second)->OutRankInfo(); - node_input_buffer->SetBufferSlotRankZeros(rank_info.first, - rank_info.second); - } - } - // Check input EnforceGradNodeHasInput(node); @@ -750,7 +757,8 @@ std::vector RunBackward( // Since we make edge has as same rank as bwd outputs, we indexing them // with the same rank(i, j) auto next_node_shared = edge.GetMutableGradNode(); - VLOG(3) << "Found pending node: " << next_node_shared->name(); + VLOG(3) << "Found pending node: " << next_node_shared->name() << ": " + << next_node_shared.get(); // Next node could be nullptr if it is leaf tensor with no // AccumulationNode attached // Or it could also originated from dispensable inputs @@ -800,6 +808,8 @@ std::vector RunBackward( // Update queue node_in_degree_map[next_node]--; + VLOG(6) << next_node->name() + << " ref_cnt is: " << node_in_degree_map[next_node]; PADDLE_ENFORCE( node_in_degree_map[next_node] >= 0, diff --git a/paddle/fluid/eager/grad_node_info.h b/paddle/fluid/eager/grad_node_info.h index 269753f3c04f9642db6ae99d30508bb4793d7f4f..2f8ca2bb4209537e4119676d831794b5b72db35e 100644 --- a/paddle/fluid/eager/grad_node_info.h +++ b/paddle/fluid/eager/grad_node_info.h @@ -106,6 +106,12 @@ class Edge { } } + void Clear() { + grad_node_.reset(); + in_slot_id_ = 0; + in_rank_ = 0; + } + private: size_t in_slot_id_; size_t in_rank_; diff --git a/paddle/fluid/eager/grad_tensor_holder.cc b/paddle/fluid/eager/grad_tensor_holder.cc index c8d8b9ab548c07734e017c44e5e134521b6adbf2..231d81b5e73a62a89c1ee3207056ceff48d79d4d 100644 --- a/paddle/fluid/eager/grad_tensor_holder.cc +++ b/paddle/fluid/eager/grad_tensor_holder.cc @@ -24,6 +24,7 @@ namespace egr { void GradTensorHolder::SetBufferSlotRankZeros(size_t slot_id, size_t rank) { + // Set not grad var to zero and set stop gradient as default value: true buffer_[slot_id][rank] = paddle::experimental::zeros_like(buffer_[slot_id][rank]); } @@ -59,8 +60,15 @@ void GradTensorHolder::CopyValueFromTensor( if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) { // Perform deep copy here buffer_tensor.copy_(t, t.place(), false); - buffer_tensor.set_autograd_meta(t.mutable_autograd_meta()); - + auto* meta = egr::EagerUtils::autograd_meta(&buffer_tensor); + auto* origin_meta = egr::EagerUtils::nullable_autograd_meta(t); + if (origin_meta) { + auto grad_node = origin_meta->GetMutableGradNode(); + if (grad_node && grad_node.get()) { + meta->SetGradNode(origin_meta->GetMutableGradNode()); + } + meta->WeakGrad() = origin_meta->WeakGrad(); + } } else { PADDLE_THROW(paddle::platform::errors::Fatal( "Cannot copy grad_tensors' value to grad tensor holders," @@ -81,10 +89,10 @@ void GradTensorHolder::CopyValueFromTensor( "Only Support DENSE_TENSOR, SPARSE_COO_TENSOR, SPARSE_CSR_TENSOR " "now.")); } - egr::EagerUtils::autograd_meta(&(buffer_[slot_id][rank])) - ->SetStopGradient(false); } } + egr::EagerUtils::autograd_meta(&(buffer_[slot_id][rank])) + ->SetStopGradient(false); } void GradTensorHolder::add(size_t slot_id, diff --git a/paddle/fluid/eager/tensor_wrapper.h b/paddle/fluid/eager/tensor_wrapper.h index 66c13c66de9fcc772540e1d7e3b281ad7070f0fd..a6fd57ac6a4bc290defbf44c15f2d008c79987de 100644 --- a/paddle/fluid/eager/tensor_wrapper.h +++ b/paddle/fluid/eager/tensor_wrapper.h @@ -28,6 +28,7 @@ #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/grad_node_info.h" #include "paddle/fluid/eager/utils.h" +#include "paddle/phi/api/lib/utils/allocator.h" namespace egr { class TensorWrapper { @@ -57,9 +58,12 @@ class TensorWrapper { // Only Copy Meta phi::DenseTensor* dense_tensor = static_cast(tensor.impl().get()); - auto tw_dense_tensor = std::make_shared(); - tw_dense_tensor->set_meta(dense_tensor->meta()); - intermidiate_tensor_.set_impl(tw_dense_tensor); + // TODO(jiabin): It's not a good idea to set memory size to zero, find + // another way and change this. + intermidiate_tensor_.set_impl( + std::move(std::make_shared( + std::make_shared(nullptr, 0, tensor.place()), + std::move(dense_tensor->meta())))); } else { PADDLE_THROW(paddle::platform::errors::Fatal( "Unrecognized tensor type for no_need_buffer feature")); diff --git a/paddle/fluid/imperative/partial_grad_engine.cc b/paddle/fluid/imperative/partial_grad_engine.cc index 7c0243caf6abf0f480d8dfe506259536090fa5d1..f445632de8c5d1168c87997fcfddfd9ec7f0191e 100644 --- a/paddle/fluid/imperative/partial_grad_engine.cc +++ b/paddle/fluid/imperative/partial_grad_engine.cc @@ -98,6 +98,7 @@ static void GetGraphInfoBetweenTargets( auto &grad_node = output_target->GradVarBase()->GradNode(); if (visited.count(grad_node.get()) == 0) { for (auto &op : *grad_node) { + VLOG(10) << "Pushed op: " << op.Type(); q.emplace(&op, grad_node.get()); } } @@ -141,6 +142,8 @@ static void GetGraphInfoBetweenTargets( for (auto &pending_node : node->GradPendingNodes()) { for (auto &pending_op : *pending_node) { preceding_ops[&pending_op].insert(op); + VLOG(10) << "Find preceding op of: " << pending_op.Type() + << " is: " << op->Type(); } if (visited.count(pending_node.get()) == 0) { visited.insert(pending_node.get()); @@ -175,6 +178,7 @@ static void GetGraphInfoBetweenTargets( std::queue> op_queue; std::unordered_set, HashPair> op_base_visited; for (auto &endpoint_op : endpoint_ops) { + VLOG(10) << "Emplaced endpoint op: " << endpoint_op->Type(); op_queue.emplace(endpoint_op, nullptr); op_base_visited.emplace(endpoint_op, nullptr); } @@ -186,14 +190,18 @@ static void GetGraphInfoBetweenTargets( op_queue.pop(); + VLOG(10) << "Get op: " << op->Type(); + bool is_valid = false; for (auto &output_pair : op->GetOutsMap()) { if (!output_pair.second.IsGrad()) { + VLOG(10) << "Continueded output for : " << op->Type(); continue; } for (auto &out_var : output_pair.second) { if (out_var && target_vars.count(out_var.get()) > 0) { + VLOG(10) << "Find target output for : " << op->Type(); is_valid = true; break; } @@ -211,11 +219,13 @@ static void GetGraphInfoBetweenTargets( is_valid = false; for (auto &input_pair : op->GetInsMap()) { if (!input_pair.second.IsGrad()) { + VLOG(10) << "Continueded input for : " << op->Type(); continue; } for (auto &in_var : input_pair.second) { if (in_var && no_grad_var_grad.count(in_var.get()) == 0) { + VLOG(10) << "Find not no grad var in input for : " << op->Type(); target_vars.insert(in_var.get()); is_valid = true; } @@ -240,7 +250,10 @@ static void GetGraphInfoBetweenTargets( auto iter = preceding_ops.find(op); if (iter != preceding_ops.end()) { for (auto &preceding_op : iter->second) { + VLOG(10) << "Scan preceding op: " << preceding_op->Type() << " for " + << op->Type(); if (op_base_visited.count(std::make_pair(preceding_op, op)) == 0) { + VLOG(10) << "Emplace op: " << preceding_op->Type(); op_queue.emplace(preceding_op, op); op_base_visited.emplace(preceding_op, op); } @@ -648,6 +661,7 @@ PartialGradTask::PartialGradTask( platform::errors::Unimplemented( "only_inputs=False is not supported yet")); + VLOG(10) << "no_grad_vars size: " << no_grad_vars.size(); for (auto &var : no_grad_vars) { if (var && var->GradVarBase()) { no_grad_var_grad_.insert(var->GradVarBase()->SharedVar().get()); @@ -853,6 +867,7 @@ std::vector> PartialGradTask::Run() { } for (auto &pending_op : iter->second) { + VLOG(10) << "Find pending op" << pending_op->Type(); auto dep_iter = op_deps_.find(pending_op); PADDLE_ENFORCE_EQ( dep_iter != op_deps_.end(), @@ -862,6 +877,7 @@ std::vector> PartialGradTask::Run() { if (--(dep_iter->second) == 0) { q.push(pending_op); } + VLOG(10) << "Pending op deps: " << dep_iter->second; } } diff --git a/paddle/fluid/imperative/prepared_operator.h b/paddle/fluid/imperative/prepared_operator.h index 7ed4346ed82c2b16894f2749e7d061138274e205..c0ff0914401b55b1abf556e6a9eec57ac1b7f4e0 100644 --- a/paddle/fluid/imperative/prepared_operator.h +++ b/paddle/fluid/imperative/prepared_operator.h @@ -82,7 +82,7 @@ std::shared_ptr> PrepareData( auto& template_var = name_pair.second[i]; SetForwardDataTypeOfGradVar(template_var); const auto* tensor = GetTensorFromVar(template_var->Var()); - if (tensor && tensor->IsInitialized()) { + if (tensor && tensor->IsInitialized() && (tensor->memory_size() != 0)) { auto kernel_type_for_var = op.GetKernelTypeForVar( name_pair.first, *tensor, expected_kernel_key); if (!NeedTransform(kernel_type_for_var, expected_kernel_key)) { @@ -91,7 +91,8 @@ std::shared_ptr> PrepareData( VLOG(3) << "Transform Variable " << GetNameFromVar(template_var) << " from " << kernel_type_for_var << " to " << expected_kernel_key; - + VLOG(3) << GetNameFromVar(template_var) + << " memory size is: " << tensor->memory_size(); if (CheckCachedKey(template_var, expected_kernel_key)) { VLOG(3) << "Hit variable_wrapper cache: key=" << expected_kernel_key; @@ -634,7 +635,8 @@ void PreparePhiData(const phi::Kernel& phi_kernel, for (size_t offset = 0; offset < ins_vector.size(); ++offset) { auto& var = ins_vector[offset]; const auto* tensor_in = GetTensorFromVar(var->Var()); - if (tensor_in && tensor_in->IsInitialized()) { + if (tensor_in && tensor_in->IsInitialized() && + (tensor_in->memory_size() != 0)) { if (in_def.backend == phi::Backend::ALL_BACKEND) { continue; } diff --git a/paddle/fluid/pybind/eager.cc b/paddle/fluid/pybind/eager.cc index 7f54f472bdcd5bf52ae19f1b4513e02eccc74e8d..f436d0e96b5dc93eedab4897a8181d773f2b50f5 100644 --- a/paddle/fluid/pybind/eager.cc +++ b/paddle/fluid/pybind/eager.cc @@ -98,10 +98,11 @@ void EmptyTensorInitializer(TensorObject* self, } if (!autograd_meta->GetMutableGradNode()) { - VLOG(3) << "Tensor(" << name - << ") have not GradNode, add GradNodeAccumulation for it."; autograd_meta->SetGradNode( std::make_shared(autograd_meta)); + VLOG(3) << "Tensor(" << name + << ") have not GradNode, add GradNodeAccumulation" + << autograd_meta->GradNode() << " for it."; } } diff --git a/paddle/fluid/pybind/eager_properties.cc b/paddle/fluid/pybind/eager_properties.cc index a7f11fc963ebe65f8610a3347efd49d27833d1d6..12e262b3f7cb55313d42f2fdc4300738bc28e73a 100644 --- a/paddle/fluid/pybind/eager_properties.cc +++ b/paddle/fluid/pybind/eager_properties.cc @@ -95,6 +95,7 @@ PyObject* tensor_properties_get_grad(TensorObject* self, void* closure) { EAGER_TRY VLOG(6) << "Get grad for tensor: " << self->tensor.name(); auto meta = egr::EagerUtils::nullable_autograd_meta(self->tensor); + VLOG(6) << meta << " initialized: " << meta->Grad().initialized(); if (meta && meta->Grad().initialized()) { return ToPyObject(meta->Grad()); } else { diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt index f50323cef216c4a6d17b206f18ae1215dc5eecb4..5a5aa9638a3bed80787df0185028d5b5fc600bf6 100644 --- a/paddle/phi/api/lib/CMakeLists.txt +++ b/paddle/phi/api/lib/CMakeLists.txt @@ -257,8 +257,8 @@ add_custom_command( COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp} ${bw_api_source_file} COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}" - DEPENDS ${bw_api_yaml_file} ${legacy_bw_api_yaml_file} ${bw_api_gen_file} - ${api_gen_base} + DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base} + ${legacy_bw_api_yaml_file} VERBATIM) # generate sparse api diff --git a/paddle/phi/api/yaml/legacy_backward.yaml b/paddle/phi/api/yaml/legacy_backward.yaml index a4589120cc4752500bc5348e66045d936ab721c2..4af32c7e4cfa035107fe84b0db0e0a2469138e67 100644 --- a/paddle/phi/api/yaml/legacy_backward.yaml +++ b/paddle/phi/api/yaml/legacy_backward.yaml @@ -133,6 +133,17 @@ func : asinh_grad inplace : (out_grad -> x_grad) +- backward_api : assign_double_grad + forward : assign_grad (Tensor grad_out) -> Tensor(grad_x) + args : (Tensor grad_x_grad) + output : Tensor(grad_out_grad) + infer_meta : + func : UnchangedInferMeta + kernel : + func : assign + backward: assign_triple_grad + inplace : (grad_x_grad -> grad_out_grad) + - backward_api : assign_grad forward : assign (Tensor x) -> Tensor(out) args : (Tensor out_grad) @@ -141,6 +152,7 @@ func : UnchangedInferMeta kernel : func : assign + backward: assign_double_grad inplace : (out_grad -> x_grad) - backward_api : assign_out__grad @@ -153,6 +165,16 @@ func : assign inplace : (out_grad -> x_grad) +- backward_api : assign_triple_grad + forward : assign_double_grad (Tensor grad_out) -> Tensor(grad_x) + args : (Tensor grad_x_grad) + output : Tensor(grad_out_grad) + infer_meta : + func : UnchangedInferMeta + kernel : + func : assign + inplace : (grad_x_grad -> grad_out_grad) + - backward_api : atan_grad forward : atan (Tensor x) -> Tensor(out) args : (Tensor x, Tensor out_grad) @@ -1823,6 +1845,16 @@ func : sinh_grad inplace : (out_grad -> x_grad) +- backward_api : slice_double_grad + forward : slice_grad (Tensor input, Tensor grad_out, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) -> Tensor(grad_input) + args : (Tensor grad_input_grad, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) + output : Tensor(grad_out_grad) + infer_meta : + func : UnchangedInferMeta + param : [grad_input_grad] + kernel : + func : slice + - backward_api : slice_grad forward : slice (Tensor input, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) -> Tensor(out) args : (Tensor input, Tensor out_grad, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) @@ -1832,6 +1864,7 @@ param : [input] kernel : func : slice_grad + backward : slice_double_grad no_need_buffer : input - backward_api : soft_shrink_grad