diff --git a/paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h b/paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h index 92bd900177fee720b338f0a1f56e8fadf0fe04dd..6c3e3c494705eb309137e98ad97185f455aee091 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h +++ b/paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h @@ -26,6 +26,10 @@ paddle::Tensor conv2d_ad_func(const paddle::Tensor& input, std::vector dilations, int groups, std::string data_format); +paddle::Tensor multiply_ad_func(const paddle::Tensor& x, + const paddle::Tensor& y); +paddle::Tensor& multiply__ad_func(paddle::Tensor& x, // NOLINT + const paddle::Tensor& y); std::tuple, egr::kSlotSmallVectorSize> + amp_tensors_vector = {{x}, {y}}; + + auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector); + + auto new_x = egr::EagerAmpAutoCast("x", x, amp_dst_dtype, op_name); + auto new_y = egr::EagerAmpAutoCast("y", y, amp_dst_dtype, op_name); + + { + paddle::imperative::AutoCastGuard guard( + egr::Controller::Instance().GetCurrentTracer(), + paddle::imperative::AmpLevel::O0); + return multiply_ad_func(new_x, new_y); + } + } + + // Layout autotune + + if (egr::Controller::Instance().UseLayoutAutoTune()) { + paddle::small_vector, egr::kSlotSmallVectorSize> + tensors_vector = {{x}, {y}}; + + auto op_name = phi::TransToFluidOpName("multiply"); + auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector); + auto new_x = transformer->TransInTensor("x", x); + auto new_y = transformer->TransInTensor("y", y); + + VLOG(5) << "Check and Prepare For LAYOUT " << op_name; + paddle::imperative::LayoutAutotuneGuard guard( + egr::Controller::Instance().GetCurrentTracer(), false); + paddle::Tensor out = multiply_ad_func(new_x, new_y); + + transformer->SetOutTensorLayout(&out); + + // Returns + return out; + } + + // Get Input AutoGradMeta + egr::AutogradMeta* x_autograd_meta = + egr::EagerUtils::nullable_autograd_meta(x); + egr::AutogradMeta* y_autograd_meta = + egr::EagerUtils::nullable_autograd_meta(y); + + VLOG(5) << "Running C++ API: " + << "multiply"; + // Before log info + + if (VLOG_IS_ON(3)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str); + } + + // Forward API Call + auto api_result = paddle::experimental::multiply(x, y); + // Check NaN and Inf if needed + + if (FLAGS_check_nan_inf) { + egr::CheckTensorHasNanOrInf("multiply", api_result); + } + + // Get Outputs + auto& out = api_result; + + // Get Output AutoGradMeta + egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out); + bool trace_backward = egr::Controller::Instance().HasGrad(); + bool require_any_grad = egr::EagerUtils::ComputeRequireGrad( + trace_backward, x_autograd_meta, y_autograd_meta); + + // Check Inplace if needed + + // Node Creation + if (require_any_grad) { + paddle::platform::RecordEvent node_creation_record_event( + "multiply node_creation", + paddle::platform::TracerEventType::OperatorInner, + 1); + + egr::EagerUtils::PassStopGradient(false, out_autograd_meta); + + // Node Construction + auto grad_node = + std::shared_ptr(new MultiplyGradNode(1, 2)); + + // SetAttributes if needed + grad_node->SetAttributeaxis(-1); + // Set TensorWrappers for Forward Inputs if needed + if (paddle::platform::is_gpu_place(x.place())) { + if (x_autograd_meta != nullptr && x_autograd_meta->StopGradient() && + y_autograd_meta != nullptr && !y_autograd_meta->StopGradient()) { + grad_node->SetTensorWrapperx(x); + grad_node->SetTensorWrapperNoNeedBuffery(y); + } else if (x_autograd_meta != nullptr && + !x_autograd_meta->StopGradient() && + y_autograd_meta != nullptr && + y_autograd_meta->StopGradient()) { + grad_node->SetTensorWrapperNoNeedBufferx(x); + grad_node->SetTensorWrappery(y); + } else { + grad_node->SetTensorWrapperx(x); + grad_node->SetTensorWrappery(y); + } + } else { + grad_node->SetTensorWrapperx(x); + grad_node->SetTensorWrappery(y); + } + // SetGradOutMeta & SetEdges + grad_node->SetGradOutMeta(x, 0); + grad_node->SetGradOutMeta(y, 1); + // SetOutRank & SetHistory & SetGradInMeta + if (out_autograd_meta) { + egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0); + } + if (out_autograd_meta) { + egr::EagerUtils::SetHistory(out_autograd_meta, grad_node); + } + grad_node->SetGradInMeta(out, 0); + // Set TensorWrappers for Forward Outputs if needed + } + + VLOG(4) << "Finish AD API: multiply"; + // LOG IF DEBUG + + if (VLOG_IS_ON(4)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + const char* TENSOR_OUT_TEMPLATE = " \n( out , [%s]), "; + std::string output_out_str = paddle::string::Sprintf( + TENSOR_OUT_TEMPLATE, egr::EagerUtils::TensorStr(out)); + output_str += output_out_str; + VLOG(4) << paddle::string::Sprintf( + INPUT_PRINT_TEMPLATE, input_str, output_str); + } + + // Returns + return out; +} + +paddle::Tensor& multiply__ad_func(paddle::Tensor& x, // NOLINT + const paddle::Tensor& y) { + FLAGS_tensor_operants_mode = "eager"; + VLOG(3) << "Running AD API: " + << "multiply_"; + // Dygraph Record Event + paddle::platform::RecordEvent dygraph_entrance_record_event( + "multiply_ dygraph", paddle::platform::TracerEventType::Operator, 1); + + // AMP Logic + + VLOG(5) + << " No AMP for multiply__ad_func because it is a inplace or cast api. "; + // Layout autotune + + if (egr::Controller::Instance().UseLayoutAutoTune()) { + paddle::small_vector, egr::kSlotSmallVectorSize> + tensors_vector = {{x}, {y}}; + + auto op_name = phi::TransToFluidOpName("multiply_"); + auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector); + auto new_x = transformer->TransInTensor("x", x); + auto new_y = transformer->TransInTensor("y", y); + + VLOG(5) << "Check and Prepare For LAYOUT " << op_name; + paddle::imperative::LayoutAutotuneGuard guard( + egr::Controller::Instance().GetCurrentTracer(), false); + paddle::Tensor& out = multiply__ad_func(new_x, new_y); + + transformer->SetOutTensorLayout(&out); + + // Returns + return out; + } + + // Get Input AutoGradMeta + egr::AutogradMeta* x_autograd_meta = + egr::EagerUtils::nullable_autograd_meta(x); + egr::AutogradMeta* y_autograd_meta = + egr::EagerUtils::nullable_autograd_meta(y); + + VLOG(5) << "Running C++ API: " + << "multiply_"; + // Before log info + + if (VLOG_IS_ON(3)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str); + } + + // Forward API Call + auto& api_result = paddle::experimental::multiply_(x, y); + // Check NaN and Inf if needed + + if (FLAGS_check_nan_inf) { + egr::CheckTensorHasNanOrInf("multiply_", api_result); + } + + // Get Outputs + auto& out = api_result; + + // Get Output AutoGradMeta + egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out); + bool trace_backward = egr::Controller::Instance().HasGrad(); + bool require_any_grad = egr::EagerUtils::ComputeRequireGrad( + trace_backward, x_autograd_meta, y_autograd_meta); + + // Check Inplace if needed + + egr::EagerUtils::CheckInplace(x, x_autograd_meta, require_any_grad); + + // Bump Inplace Version + x.bump_inplace_version(); + VLOG(3) << "Tensor(" << x.name() << ") uses Inplace Strategy."; + + // Node Creation + if (require_any_grad) { + paddle::platform::RecordEvent node_creation_record_event( + "multiply node_creation", + paddle::platform::TracerEventType::OperatorInner, + 1); + + egr::EagerUtils::PassStopGradient(false, out_autograd_meta); + + // Node Construction + auto grad_node = + std::shared_ptr(new MultiplyGradNode(1, 2)); + + // SetAttributes if needed + grad_node->SetAttributeaxis(-1); + // Set TensorWrappers for Forward Inputs if needed + grad_node->SetTensorWrapperx(x); + grad_node->SetTensorWrappery(y); + // SetGradOutMeta & SetEdges + grad_node->SetGradOutMeta(x, 0); + grad_node->SetGradOutMeta(y, 1); + // SetOutRank & SetHistory & SetGradInMeta + if (out_autograd_meta) { + egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0); + } + if (out_autograd_meta) { + egr::EagerUtils::SetHistory(out_autograd_meta, grad_node); + } + grad_node->SetGradInMeta(out, 0); + // Set TensorWrappers for Forward Outputs if needed + } + + VLOG(4) << "Finish AD API: multiply_"; + // LOG IF DEBUG + + if (VLOG_IS_ON(4)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + const char* TENSOR_OUT_TEMPLATE = " \n( out , [%s]), "; + std::string output_out_str = paddle::string::Sprintf( + TENSOR_OUT_TEMPLATE, egr::EagerUtils::TensorStr(out)); + output_str += output_out_str; + VLOG(4) << paddle::string::Sprintf( + INPUT_PRINT_TEMPLATE, input_str, output_str); + } + + // Returns + return out; +} + +namespace sparse { + +paddle::Tensor multiply_ad_func(const paddle::Tensor& x, + const paddle::Tensor& y) { + FLAGS_tensor_operants_mode = "eager"; + VLOG(3) << "Running AD API: " + << "multiply"; + // Dygraph Record Event + paddle::platform::RecordEvent dygraph_entrance_record_event( + "multiply dygraph", paddle::platform::TracerEventType::Operator, 1); + + // AMP Logic + if (egr::Controller::Instance().GetAMPLevel() != + paddle::imperative::AmpLevel::O0) { + VLOG(5) << "Check and Prepare For AMP"; + auto op_name = phi::TransToFluidOpName("multiply"); + paddle::small_vector, egr::kSlotSmallVectorSize> + amp_tensors_vector = {{x}, {y}}; + + auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector); + + auto new_x = egr::EagerAmpAutoCast("x", x, amp_dst_dtype, op_name); + auto new_y = egr::EagerAmpAutoCast("y", y, amp_dst_dtype, op_name); + + { + paddle::imperative::AutoCastGuard guard( + egr::Controller::Instance().GetCurrentTracer(), + paddle::imperative::AmpLevel::O0); + return multiply_ad_func(new_x, new_y); + } + } + + // Layout autotune + + if (egr::Controller::Instance().UseLayoutAutoTune()) { + paddle::small_vector, egr::kSlotSmallVectorSize> + tensors_vector = {{x}, {y}}; + + auto op_name = phi::TransToFluidOpName("multiply"); + auto transformer = egr::EagerLayoutAutotune(op_name, tensors_vector); + auto new_x = transformer->TransInTensor("x", x); + auto new_y = transformer->TransInTensor("y", y); + + VLOG(5) << "Check and Prepare For LAYOUT " << op_name; + paddle::imperative::LayoutAutotuneGuard guard( + egr::Controller::Instance().GetCurrentTracer(), false); + paddle::Tensor out = multiply_ad_func(new_x, new_y); + + transformer->SetOutTensorLayout(&out); + + // Returns + return out; + } + + // Get Input AutoGradMeta + egr::AutogradMeta* x_autograd_meta = + egr::EagerUtils::nullable_autograd_meta(x); + egr::AutogradMeta* y_autograd_meta = + egr::EagerUtils::nullable_autograd_meta(y); + + VLOG(5) << "Running C++ API: " + << "multiply"; + // Before log info + + if (VLOG_IS_ON(3)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str); + } + + // Forward API Call + auto api_result = paddle::experimental::sparse::multiply(x, y); + // Check NaN and Inf if needed + + if (FLAGS_check_nan_inf) { + egr::CheckTensorHasNanOrInf("multiply", api_result); + } + + // Get Outputs + auto& out = api_result; + + // Get Output AutoGradMeta + egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out); + bool trace_backward = egr::Controller::Instance().HasGrad(); + bool require_any_grad = egr::EagerUtils::ComputeRequireGrad( + trace_backward, x_autograd_meta, y_autograd_meta); + + // Check Inplace if needed + + // Node Creation + if (require_any_grad) { + paddle::platform::RecordEvent node_creation_record_event( + "multiply node_creation", + paddle::platform::TracerEventType::OperatorInner, + 1); + + egr::EagerUtils::PassStopGradient(false, out_autograd_meta); + + // Node Construction + auto grad_node = + std::shared_ptr(new MultiplyGradNode(1, 2)); + // SetAttributes if needed + + // Set TensorWrappers for Forward Inputs if needed + grad_node->SetTensorWrapperx(x); + grad_node->SetTensorWrappery(y); + // SetGradOutMeta & SetEdges + grad_node->SetGradOutMeta(x, 0); + grad_node->SetGradOutMeta(y, 1); + // SetOutRank & SetHistory & SetGradInMeta + if (out_autograd_meta) { + egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0); + } + if (out_autograd_meta) { + egr::EagerUtils::SetHistory(out_autograd_meta, grad_node); + } + grad_node->SetGradInMeta(out, 0); + // Set TensorWrappers for Forward Outputs if needed + } + + VLOG(4) << "Finish AD API: multiply"; + // LOG IF DEBUG + + if (VLOG_IS_ON(4)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + const char* TENSOR_OUT_TEMPLATE = " \n( out , [%s]), "; + std::string output_out_str = paddle::string::Sprintf( + TENSOR_OUT_TEMPLATE, egr::EagerUtils::TensorStr(out)); + output_str += output_out_str; + VLOG(4) << paddle::string::Sprintf( + INPUT_PRINT_TEMPLATE, input_str, output_str); + } + + // Returns + return out; +} + +} // namespace sparse diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/CMakeLists.txt b/paddle/fluid/eager/api/manual/eager_manual/nodes/CMakeLists.txt index 5b195441816c89080a1f362a7908d77febd404ed..efdcaa70131e6833ff1a42e121f613fad2f311f6 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/nodes/CMakeLists.txt +++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/CMakeLists.txt @@ -2,4 +2,5 @@ set(eager_manual_nodes ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/add_n_node.cc ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/sync_batch_norm_node.cc + ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc PARENT_SCOPE) diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc b/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc new file mode 100644 index 0000000000000000000000000000000000000000..81e8a22f91db83b9857b5495ce023182e67cd2b5 --- /dev/null +++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/multiply_node.cc @@ -0,0 +1,942 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "glog/logging.h" +#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" +#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h" +#include "paddle/fluid/eager/api/utils/global_utils.h" +#include "paddle/fluid/eager/nan_inf_utils.h" +#include "paddle/fluid/eager/utils.h" +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/imperative/tracer.h" +#include "paddle/fluid/prim/api/all.h" +#include "paddle/fluid/prim/api/composite_backward/composite_backward_api.h" +#include "paddle/fluid/prim/utils/utils.h" +#include "paddle/phi/api/all.h" +#include "paddle/phi/api/backward/backward_api.h" +#include "paddle/phi/api/backward/sparse_bw_api.h" +#include "paddle/phi/api/include/sparse_api.h" +#include "paddle/phi/api/lib/api_custom_impl.h" +#include "paddle/phi/core/flags.h" + +DECLARE_bool(check_nan_inf); + +paddle::small_vector, egr::kSlotSmallVectorSize> +MultiplyGradNode::operator()( + paddle::small_vector, + egr::kSlotSmallVectorSize>& grads, + bool create_graph, + bool is_new_grad) { + VLOG(3) << "Running AD API GRAD: " + << "multiply_grad"; + // Fill Zero For GradIn Tensors + const auto& input_metas = this->InputMeta(); + egr::EagerUtils::FillZeroForEmptyGradInput(&grads[0][0], input_metas[0][0]); + + // Apply Gradient Hooks + auto hooked_grads = ApplyGradientHooks(grads); + + // Collect GradIn Tensors, Attrs and Recovered TensorWrappers + auto x = egr::EagerUtils::RecoverTensorWrapper(&this->x_); + auto y = egr::EagerUtils::RecoverTensorWrapper(&this->y_); + auto& grad_out = hooked_grads[0][0]; + auto& axis = this->axis_; + // Prepare Grad function call + + const auto& out_metas = OutputMeta(); + paddle::small_vector, egr::kSlotSmallVectorSize> + returns(2); + for (int i = 0; i < 2; ++i) { + out_metas[i].size() == 0 ? returns[i].resize(1) + : returns[i].resize(out_metas[i].size()); + } + + auto* api_output_0 = + (out_metas[0].empty() || out_metas[0][0].IsStopGradient()) + ? nullptr + : &returns[0][0]; + auto* api_output_1 = + (out_metas[1].empty() || out_metas[1][0].IsStopGradient()) + ? nullptr + : &returns[1][0]; + // Runtime check if we need next grad + bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph; + + // Inplace Check + + // Inplace Strategy + + VLOG(5) << "Running C++ API: " + << "multiply_grad"; + // Before log info + + if (VLOG_IS_ON(3)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_GRAD_OUT_TEMPLATE = " \n( grad_out , [%s]), "; + std::string input_grad_out_str = paddle::string::Sprintf( + TENSOR_GRAD_OUT_TEMPLATE, egr::EagerUtils::TensorStr(grad_out)); + input_str += input_grad_out_str; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str); + } + + // Call grad_api function + + if (paddle::prim::PrimCommonUtils::IsEagerPrimEnabled()) { + bool original_global_grad = egr::Controller::Instance().HasGrad(); + if (!create_graph) { + egr::Controller::Instance().SetHasGrad(create_graph); + } + paddle::prim::multiply_grad( + x, y, grad_out, axis, api_output_0, api_output_1); + VLOG(4) << "Composite api multiply_grad is called "; + if (!create_graph) { + egr::Controller::Instance().SetHasGrad(original_global_grad); + } + } else { + paddle::experimental::multiply_grad( + x, y, grad_out, axis, api_output_0, api_output_1); + VLOG(4) << "Fused api multiply_grad is called "; + } + + // Check NaN and Inf id needed + + if (FLAGS_check_nan_inf) { + try { + egr::CheckTensorHasNanOrInf("multiply_grad", returns); + } catch (...) { + LOG(WARNING) << "There are nan/inf in (multiply_grad)"; + std::rethrow_exception(std::current_exception()); + } + } + + // Get GradOut autograd_meta + + auto& grad_x = returns[0][0]; + egr::AutogradMeta* grad_x_autograd_meta = + returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_x) + : nullptr; + if (grad_x_autograd_meta) grad_x_autograd_meta->SetStopGradient(false); + + auto& grad_y = returns[1][0]; + egr::AutogradMeta* grad_y_autograd_meta = + returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_y) + : nullptr; + if (grad_y_autograd_meta) grad_y_autograd_meta->SetStopGradient(false); + + // Create Grad Node + + if (!paddle::prim::PrimCommonUtils::IsEagerPrimEnabled()) { + if (trace_backward) { + paddle::platform::RecordEvent node_creation_record_event( + "multiply_grad node_creation", + paddle::platform::TracerEventType::OperatorInner, + 1); + + // Node Construction + auto grad_node = std::shared_ptr( + new MultiplyDoubleGradNode(2, 3)); + // SetAttributes if needed + grad_node->SetAttributeaxis(axis); + // Set TensorWrappers for Forward Inputs if needed + grad_node->SetTensorWrapperx(x); + grad_node->SetTensorWrappery(y); + grad_node->SetTensorWrappergrad_out(grad_out); + // SetGradOutMeta & SetEdges + grad_node->SetGradOutMeta(x, 0); + grad_node->SetGradOutMeta(y, 1); + grad_node->SetGradOutMeta(grad_out, 2); + // SetOutRank & SetHistory & SetGradInMeta + if (grad_x_autograd_meta) { + egr::EagerUtils::SetOutRankWithSlot(grad_x_autograd_meta, 0); + } + if (grad_y_autograd_meta) { + egr::EagerUtils::SetOutRankWithSlot(grad_y_autograd_meta, 1); + } + if (grad_x_autograd_meta) { + egr::EagerUtils::SetHistory(grad_x_autograd_meta, grad_node); + } + if (grad_y_autograd_meta) { + egr::EagerUtils::SetHistory(grad_y_autograd_meta, grad_node); + } + grad_node->SetGradInMeta(grad_x, 0); + grad_node->SetGradInMeta(grad_y, 1); + // Set TensorWrappers for Forward Outputs if needed + } + } + + VLOG(4) << "Finish AD API GRAD: multiply_grad"; + // LOG IF DEBUG + + if (VLOG_IS_ON(4)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_GRAD_OUT_TEMPLATE = " \n( grad_out , [%s]), "; + std::string input_grad_out_str = paddle::string::Sprintf( + TENSOR_GRAD_OUT_TEMPLATE, egr::EagerUtils::TensorStr(grad_out)); + input_str += input_grad_out_str; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + const char* TENSOR_GRAD_X_TEMPLATE = " \n ( grad_x , [%s]), "; + std::string output_grad_x_str = paddle::string::Sprintf( + TENSOR_GRAD_X_TEMPLATE, egr::EagerUtils::TensorStr(grad_x)); + output_str += output_grad_x_str; + const char* TENSOR_GRAD_Y_TEMPLATE = " \n ( grad_y , [%s]), "; + std::string output_grad_y_str = paddle::string::Sprintf( + TENSOR_GRAD_Y_TEMPLATE, egr::EagerUtils::TensorStr(grad_y)); + output_str += output_grad_y_str; + VLOG(4) << paddle::string::Sprintf( + INPUT_PRINT_TEMPLATE, input_str, output_str); + } + + // Return + if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns); + return returns; +} + +paddle::small_vector, egr::kSlotSmallVectorSize> +MultiplyDoubleGradNode::operator()( + paddle::small_vector, + egr::kSlotSmallVectorSize>& grads, + bool create_graph, + bool is_new_grad) { + VLOG(3) << "Running AD API GRAD: " + << "multiply_double_grad"; + // Fill Zero For GradIn Tensors + const auto& input_metas = this->InputMeta(); + egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[0][0], + input_metas[0][0]); + egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[1][0], + input_metas[1][0]); + + // Apply Gradient Hooks + auto hooked_grads = ApplyGradientHooks(grads); + + // Collect GradIn Tensors, Attrs and Recovered TensorWrappers + auto x = egr::EagerUtils::RecoverTensorWrapper(&this->x_); + auto y = egr::EagerUtils::RecoverTensorWrapper(&this->y_); + auto fwd_grad_out = egr::EagerUtils::RecoverTensorWrapper(&this->grad_out_); + auto& fwd_grad_grad_x = hooked_grads[0][0]; + + paddle::optional fwd_grad_grad_x_optional; + if (fwd_grad_grad_x.initialized()) + fwd_grad_grad_x_optional = + paddle::make_optional(fwd_grad_grad_x); + + auto& fwd_grad_grad_y = hooked_grads[1][0]; + + paddle::optional fwd_grad_grad_y_optional; + if (fwd_grad_grad_y.initialized()) + fwd_grad_grad_y_optional = + paddle::make_optional(fwd_grad_grad_y); + + auto& axis = this->axis_; + // Prepare Grad function call + + const auto& out_metas = OutputMeta(); + paddle::small_vector, egr::kSlotSmallVectorSize> + returns(3); + for (int i = 0; i < 3; ++i) { + out_metas[i].size() == 0 ? returns[i].resize(1) + : returns[i].resize(out_metas[i].size()); + } + + auto* api_output_0 = + (out_metas[0].empty() || out_metas[0][0].IsStopGradient()) + ? nullptr + : &returns[0][0]; + auto* api_output_1 = + (out_metas[1].empty() || out_metas[1][0].IsStopGradient()) + ? nullptr + : &returns[1][0]; + auto* api_output_2 = + (out_metas[2].empty() || out_metas[2][0].IsStopGradient()) + ? nullptr + : &returns[2][0]; + // Runtime check if we need next grad + bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph; + + // Inplace Check + + bool can_be_inplaced = false; + if (fwd_grad_grad_x.initialized()) { + VLOG(10) << fwd_grad_grad_x.name() << "(grad_x_grad) use_count: " + << fwd_grad_grad_x.impl().use_count(); + if (fwd_grad_grad_x.impl().use_count() == 1 || + (fwd_grad_grad_x.impl().use_count() == 2 && + fwd_grad_grad_x.impl().get() == grads[0][0].impl().get())) { + can_be_inplaced = true; + } + } + // Inplace Strategy + + if (trace_backward) { + VLOG(6) << "No Inplace should happend for wrappered input: " + "{inplace_grad_input_str}"; + } else { + if (api_output_2 != nullptr && can_be_inplaced) { + egr::EagerUtils::HandleViewBetweenInputAndOutput(fwd_grad_grad_x, + api_output_2); + } + } + + VLOG(5) << "Running C++ API: " + << "multiply_double_grad"; + // Before log info + + if (VLOG_IS_ON(3)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_FWD_GRAD_GRAD_X_TEMPLATE = + " \n( fwd_grad_grad_x , [%s]), "; + std::string input_fwd_grad_grad_x_str = + paddle::string::Sprintf(TENSOR_FWD_GRAD_GRAD_X_TEMPLATE, + egr::EagerUtils::TensorStr(fwd_grad_grad_x)); + input_str += input_fwd_grad_grad_x_str; + const char* TENSOR_FWD_GRAD_GRAD_Y_TEMPLATE = + " \n( fwd_grad_grad_y , [%s]), "; + std::string input_fwd_grad_grad_y_str = + paddle::string::Sprintf(TENSOR_FWD_GRAD_GRAD_Y_TEMPLATE, + egr::EagerUtils::TensorStr(fwd_grad_grad_y)); + input_str += input_fwd_grad_grad_y_str; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + const char* TENSOR_FWD_GRAD_OUT_TEMPLATE = " \n( fwd_grad_out , [%s]), "; + std::string input_fwd_grad_out_str = paddle::string::Sprintf( + TENSOR_FWD_GRAD_OUT_TEMPLATE, egr::EagerUtils::TensorStr(fwd_grad_out)); + input_str += input_fwd_grad_out_str; + VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str); + } + + // Call grad_api function + + paddle::experimental::multiply_double_grad(x, + y, + fwd_grad_out, + fwd_grad_grad_x_optional, + fwd_grad_grad_y_optional, + axis, + api_output_0, + api_output_1, + api_output_2); + + // Check NaN and Inf id needed + + if (FLAGS_check_nan_inf) { + try { + egr::CheckTensorHasNanOrInf("multiply_double_grad", returns); + } catch (...) { + LOG(WARNING) << "There are nan/inf in (multiply_double_grad)"; + std::rethrow_exception(std::current_exception()); + } + } + + // Get GradOut autograd_meta + + auto& grad_x = returns[0][0]; + egr::AutogradMeta* grad_x_autograd_meta = + returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_x) + : nullptr; + if (grad_x_autograd_meta) grad_x_autograd_meta->SetStopGradient(false); + + auto& grad_y = returns[1][0]; + egr::AutogradMeta* grad_y_autograd_meta = + returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_y) + : nullptr; + if (grad_y_autograd_meta) grad_y_autograd_meta->SetStopGradient(false); + + auto& grad_grad_out = returns[2][0]; + egr::AutogradMeta* grad_grad_out_autograd_meta = + returns[2][0].initialized() + ? egr::EagerUtils::autograd_meta(&grad_grad_out) + : nullptr; + if (grad_grad_out_autograd_meta) + grad_grad_out_autograd_meta->SetStopGradient(false); + + // Create Grad Node + + if (!paddle::prim::PrimCommonUtils::IsEagerPrimEnabled()) { + if (trace_backward) { + paddle::platform::RecordEvent node_creation_record_event( + "multiply_double_grad node_creation", + paddle::platform::TracerEventType::OperatorInner, + 1); + + // Node Construction + auto grad_node = std::shared_ptr( + new MultiplyTripleGradNode(3, 5)); + // SetAttributes if needed + grad_node->SetAttributeaxis(-1); + // Set TensorWrappers for Forward Inputs if needed + grad_node->SetTensorWrapperx(x); + grad_node->SetTensorWrappery(y); + grad_node->SetTensorWrapperfwd_grad_out(fwd_grad_out); + grad_node->SetTensorWrapperfwd_grad_grad_x(fwd_grad_grad_x); + grad_node->SetTensorWrapperfwd_grad_grad_y(fwd_grad_grad_y); + // SetGradOutMeta & SetEdges + grad_node->SetGradOutMeta(x, 0); + grad_node->SetGradOutMeta(y, 1); + grad_node->SetGradOutMeta(fwd_grad_out, 2); + grad_node->SetGradOutMeta(fwd_grad_grad_x, 3); + grad_node->SetGradOutMeta(fwd_grad_grad_y, 4); + // SetOutRank & SetHistory & SetGradInMeta + if (grad_x_autograd_meta) { + egr::EagerUtils::SetOutRankWithSlot(grad_x_autograd_meta, 0); + } + if (grad_y_autograd_meta) { + egr::EagerUtils::SetOutRankWithSlot(grad_y_autograd_meta, 1); + } + if (grad_grad_out_autograd_meta) { + egr::EagerUtils::SetOutRankWithSlot(grad_grad_out_autograd_meta, 2); + } + if (grad_x_autograd_meta) { + egr::EagerUtils::SetHistory(grad_x_autograd_meta, grad_node); + } + if (grad_y_autograd_meta) { + egr::EagerUtils::SetHistory(grad_y_autograd_meta, grad_node); + } + if (grad_grad_out_autograd_meta) { + egr::EagerUtils::SetHistory(grad_grad_out_autograd_meta, grad_node); + } + grad_node->SetGradInMeta(grad_x, 0); + grad_node->SetGradInMeta(grad_y, 1); + grad_node->SetGradInMeta(grad_grad_out, 2); + // Set TensorWrappers for Forward Outputs if needed + } + } + + VLOG(4) << "Finish AD API GRAD: multiply_double_grad"; + // LOG IF DEBUG + + if (VLOG_IS_ON(4)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_FWD_GRAD_GRAD_X_TEMPLATE = + " \n( fwd_grad_grad_x , [%s]), "; + std::string input_fwd_grad_grad_x_str = + paddle::string::Sprintf(TENSOR_FWD_GRAD_GRAD_X_TEMPLATE, + egr::EagerUtils::TensorStr(fwd_grad_grad_x)); + input_str += input_fwd_grad_grad_x_str; + const char* TENSOR_FWD_GRAD_GRAD_Y_TEMPLATE = + " \n( fwd_grad_grad_y , [%s]), "; + std::string input_fwd_grad_grad_y_str = + paddle::string::Sprintf(TENSOR_FWD_GRAD_GRAD_Y_TEMPLATE, + egr::EagerUtils::TensorStr(fwd_grad_grad_y)); + input_str += input_fwd_grad_grad_y_str; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + const char* TENSOR_FWD_GRAD_OUT_TEMPLATE = " \n( fwd_grad_out , [%s]), "; + std::string input_fwd_grad_out_str = paddle::string::Sprintf( + TENSOR_FWD_GRAD_OUT_TEMPLATE, egr::EagerUtils::TensorStr(fwd_grad_out)); + input_str += input_fwd_grad_out_str; + const char* TENSOR_GRAD_X_TEMPLATE = " \n ( grad_x , [%s]), "; + std::string output_grad_x_str = paddle::string::Sprintf( + TENSOR_GRAD_X_TEMPLATE, egr::EagerUtils::TensorStr(grad_x)); + output_str += output_grad_x_str; + const char* TENSOR_GRAD_Y_TEMPLATE = " \n ( grad_y , [%s]), "; + std::string output_grad_y_str = paddle::string::Sprintf( + TENSOR_GRAD_Y_TEMPLATE, egr::EagerUtils::TensorStr(grad_y)); + output_str += output_grad_y_str; + const char* TENSOR_GRAD_GRAD_OUT_TEMPLATE = " \n ( grad_grad_out , [%s]), "; + std::string output_grad_grad_out_str = + paddle::string::Sprintf(TENSOR_GRAD_GRAD_OUT_TEMPLATE, + egr::EagerUtils::TensorStr(grad_grad_out)); + output_str += output_grad_grad_out_str; + VLOG(4) << paddle::string::Sprintf( + INPUT_PRINT_TEMPLATE, input_str, output_str); + } + + // Return + if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns); + return returns; +} + +paddle::small_vector, egr::kSlotSmallVectorSize> +MultiplyTripleGradNode::operator()( + paddle::small_vector, + egr::kSlotSmallVectorSize>& grads, + bool create_graph, + bool is_new_grad) { + VLOG(3) << "Running AD API GRAD: " + << "multiply_triple_grad"; + // Fill Zero For GradIn Tensors + const auto& input_metas = this->InputMeta(); + egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[0][0], + input_metas[0][0]); + egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[1][0], + input_metas[1][0]); + egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[2][0], + input_metas[2][0]); + + // Apply Gradient Hooks + auto hooked_grads = ApplyGradientHooks(grads); + + // Collect GradIn Tensors, Attrs and Recovered TensorWrappers + auto x = egr::EagerUtils::RecoverTensorWrapper(&this->x_); + auto y = egr::EagerUtils::RecoverTensorWrapper(&this->y_); + auto fwd_grad_out = + egr::EagerUtils::RecoverTensorWrapper(&this->fwd_grad_out_); + auto fwd_grad_grad_x = + egr::EagerUtils::RecoverTensorWrapper(&this->fwd_grad_grad_x_); + + paddle::optional fwd_grad_grad_x_optional; + if (fwd_grad_grad_x.impl()) + fwd_grad_grad_x_optional = + paddle::make_optional(fwd_grad_grad_x); + + auto fwd_grad_grad_y = + egr::EagerUtils::RecoverTensorWrapper(&this->fwd_grad_grad_y_); + + paddle::optional fwd_grad_grad_y_optional; + if (fwd_grad_grad_y.impl()) + fwd_grad_grad_y_optional = + paddle::make_optional(fwd_grad_grad_y); + + auto& grad_x_grad = hooked_grads[0][0]; + + paddle::optional grad_x_grad_optional; + if (grad_x_grad.initialized()) + grad_x_grad_optional = paddle::make_optional(grad_x_grad); + + auto& grad_y_grad = hooked_grads[1][0]; + + paddle::optional grad_y_grad_optional; + if (grad_y_grad.initialized()) + grad_y_grad_optional = paddle::make_optional(grad_y_grad); + + auto& grad_grad_out_grad = hooked_grads[2][0]; + + paddle::optional grad_grad_out_grad_optional; + if (grad_grad_out_grad.initialized()) + grad_grad_out_grad_optional = + paddle::make_optional(grad_grad_out_grad); + + auto& axis = this->axis_; + // Prepare Grad function call + + const auto& out_metas = OutputMeta(); + paddle::small_vector, egr::kSlotSmallVectorSize> + returns(5); + for (int i = 0; i < 5; ++i) { + out_metas[i].size() == 0 ? returns[i].resize(1) + : returns[i].resize(out_metas[i].size()); + } + + auto* api_output_0 = + (out_metas[0].empty() || out_metas[0][0].IsStopGradient()) + ? nullptr + : &returns[0][0]; + auto* api_output_1 = + (out_metas[1].empty() || out_metas[1][0].IsStopGradient()) + ? nullptr + : &returns[1][0]; + auto* api_output_2 = + (out_metas[2].empty() || out_metas[2][0].IsStopGradient()) + ? nullptr + : &returns[2][0]; + auto* api_output_3 = + (out_metas[3].empty() || out_metas[3][0].IsStopGradient()) + ? nullptr + : &returns[3][0]; + auto* api_output_4 = + (out_metas[4].empty() || out_metas[4][0].IsStopGradient()) + ? nullptr + : &returns[4][0]; + // Runtime check if we need next grad + bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph; + + // Inplace Check + + // Inplace Strategy + + VLOG(5) << "Running C++ API: " + << "multiply_triple_grad"; + // Before log info + + if (VLOG_IS_ON(3)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_GRAD_X_GRAD_TEMPLATE = " \n( grad_x_grad , [%s]), "; + std::string input_grad_x_grad_str = paddle::string::Sprintf( + TENSOR_GRAD_X_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grad_x_grad)); + input_str += input_grad_x_grad_str; + const char* TENSOR_GRAD_Y_GRAD_TEMPLATE = " \n( grad_y_grad , [%s]), "; + std::string input_grad_y_grad_str = paddle::string::Sprintf( + TENSOR_GRAD_Y_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grad_y_grad)); + input_str += input_grad_y_grad_str; + const char* TENSOR_GRAD_GRAD_OUT_GRAD_TEMPLATE = + " \n( grad_grad_out_grad , [%s]), "; + std::string input_grad_grad_out_grad_str = + paddle::string::Sprintf(TENSOR_GRAD_GRAD_OUT_GRAD_TEMPLATE, + egr::EagerUtils::TensorStr(grad_grad_out_grad)); + input_str += input_grad_grad_out_grad_str; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + const char* TENSOR_FWD_GRAD_OUT_TEMPLATE = " \n( fwd_grad_out , [%s]), "; + std::string input_fwd_grad_out_str = paddle::string::Sprintf( + TENSOR_FWD_GRAD_OUT_TEMPLATE, egr::EagerUtils::TensorStr(fwd_grad_out)); + input_str += input_fwd_grad_out_str; + const char* TENSOR_FWD_GRAD_GRAD_X_TEMPLATE = + " \n( fwd_grad_grad_x , [%s]), "; + std::string input_fwd_grad_grad_x_str = + paddle::string::Sprintf(TENSOR_FWD_GRAD_GRAD_X_TEMPLATE, + egr::EagerUtils::TensorStr(fwd_grad_grad_x)); + input_str += input_fwd_grad_grad_x_str; + const char* TENSOR_FWD_GRAD_GRAD_Y_TEMPLATE = + " \n( fwd_grad_grad_y , [%s]), "; + std::string input_fwd_grad_grad_y_str = + paddle::string::Sprintf(TENSOR_FWD_GRAD_GRAD_Y_TEMPLATE, + egr::EagerUtils::TensorStr(fwd_grad_grad_y)); + input_str += input_fwd_grad_grad_y_str; + VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str); + } + + // Call grad_api function + + paddle::experimental::multiply_triple_grad(x, + y, + fwd_grad_out, + fwd_grad_grad_x_optional, + fwd_grad_grad_y_optional, + grad_x_grad_optional, + grad_y_grad_optional, + grad_grad_out_grad_optional, + axis, + api_output_0, + api_output_1, + api_output_2, + api_output_3, + api_output_4); + // Check NaN and Inf id needed + + if (FLAGS_check_nan_inf) { + try { + egr::CheckTensorHasNanOrInf("multiply_triple_grad", returns); + } catch (...) { + LOG(WARNING) << "There are nan/inf in (multiply_triple_grad)"; + std::rethrow_exception(std::current_exception()); + } + } + + // Get GradOut autograd_meta + + auto& x_grad = returns[0][0]; + egr::AutogradMeta* x_grad_autograd_meta = + returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&x_grad) + : nullptr; + if (x_grad_autograd_meta) x_grad_autograd_meta->SetStopGradient(false); + + auto& y_grad = returns[1][0]; + egr::AutogradMeta* y_grad_autograd_meta = + returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&y_grad) + : nullptr; + if (y_grad_autograd_meta) y_grad_autograd_meta->SetStopGradient(false); + + auto& fwd_grad_out_grad = returns[2][0]; + egr::AutogradMeta* fwd_grad_out_grad_autograd_meta = + returns[2][0].initialized() + ? egr::EagerUtils::autograd_meta(&fwd_grad_out_grad) + : nullptr; + if (fwd_grad_out_grad_autograd_meta) + fwd_grad_out_grad_autograd_meta->SetStopGradient(false); + + auto& fwd_grad_grad_x_grad = returns[3][0]; + egr::AutogradMeta* fwd_grad_grad_x_grad_autograd_meta = + returns[3][0].initialized() + ? egr::EagerUtils::autograd_meta(&fwd_grad_grad_x_grad) + : nullptr; + if (fwd_grad_grad_x_grad_autograd_meta) + fwd_grad_grad_x_grad_autograd_meta->SetStopGradient(false); + + auto& fwd_grad_grad_y_grad = returns[4][0]; + egr::AutogradMeta* fwd_grad_grad_y_grad_autograd_meta = + returns[4][0].initialized() + ? egr::EagerUtils::autograd_meta(&fwd_grad_grad_y_grad) + : nullptr; + if (fwd_grad_grad_y_grad_autograd_meta) + fwd_grad_grad_y_grad_autograd_meta->SetStopGradient(false); + + // Create Grad Node + if (trace_backward) { + PADDLE_THROW(phi::errors::Unavailable( + "The Op multiply_triple_grad doesn't have any grad" + "op. If you don't intend calculating higher order" + "derivatives, please set `create_graph`to False.")); + } + VLOG(4) << "Finish AD API GRAD: multiply_triple_grad"; + // LOG IF DEBUG + + if (VLOG_IS_ON(4)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_GRAD_X_GRAD_TEMPLATE = " \n( grad_x_grad , [%s]), "; + std::string input_grad_x_grad_str = paddle::string::Sprintf( + TENSOR_GRAD_X_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grad_x_grad)); + input_str += input_grad_x_grad_str; + const char* TENSOR_GRAD_Y_GRAD_TEMPLATE = " \n( grad_y_grad , [%s]), "; + std::string input_grad_y_grad_str = paddle::string::Sprintf( + TENSOR_GRAD_Y_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(grad_y_grad)); + input_str += input_grad_y_grad_str; + const char* TENSOR_GRAD_GRAD_OUT_GRAD_TEMPLATE = + " \n( grad_grad_out_grad , [%s]), "; + std::string input_grad_grad_out_grad_str = + paddle::string::Sprintf(TENSOR_GRAD_GRAD_OUT_GRAD_TEMPLATE, + egr::EagerUtils::TensorStr(grad_grad_out_grad)); + input_str += input_grad_grad_out_grad_str; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + const char* TENSOR_FWD_GRAD_OUT_TEMPLATE = " \n( fwd_grad_out , [%s]), "; + std::string input_fwd_grad_out_str = paddle::string::Sprintf( + TENSOR_FWD_GRAD_OUT_TEMPLATE, egr::EagerUtils::TensorStr(fwd_grad_out)); + input_str += input_fwd_grad_out_str; + const char* TENSOR_FWD_GRAD_GRAD_X_TEMPLATE = + " \n( fwd_grad_grad_x , [%s]), "; + std::string input_fwd_grad_grad_x_str = + paddle::string::Sprintf(TENSOR_FWD_GRAD_GRAD_X_TEMPLATE, + egr::EagerUtils::TensorStr(fwd_grad_grad_x)); + input_str += input_fwd_grad_grad_x_str; + const char* TENSOR_FWD_GRAD_GRAD_Y_TEMPLATE = + " \n( fwd_grad_grad_y , [%s]), "; + std::string input_fwd_grad_grad_y_str = + paddle::string::Sprintf(TENSOR_FWD_GRAD_GRAD_Y_TEMPLATE, + egr::EagerUtils::TensorStr(fwd_grad_grad_y)); + input_str += input_fwd_grad_grad_y_str; + const char* TENSOR_X_GRAD_TEMPLATE = " \n ( x_grad , [%s]), "; + std::string output_x_grad_str = paddle::string::Sprintf( + TENSOR_X_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(x_grad)); + output_str += output_x_grad_str; + const char* TENSOR_Y_GRAD_TEMPLATE = " \n ( y_grad , [%s]), "; + std::string output_y_grad_str = paddle::string::Sprintf( + TENSOR_Y_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(y_grad)); + output_str += output_y_grad_str; + const char* TENSOR_FWD_GRAD_OUT_GRAD_TEMPLATE = + " \n ( fwd_grad_out_grad , [%s]), "; + std::string output_fwd_grad_out_grad_str = + paddle::string::Sprintf(TENSOR_FWD_GRAD_OUT_GRAD_TEMPLATE, + egr::EagerUtils::TensorStr(fwd_grad_out_grad)); + output_str += output_fwd_grad_out_grad_str; + const char* TENSOR_FWD_GRAD_GRAD_X_GRAD_TEMPLATE = + " \n ( fwd_grad_grad_x_grad , [%s]), "; + std::string output_fwd_grad_grad_x_grad_str = paddle::string::Sprintf( + TENSOR_FWD_GRAD_GRAD_X_GRAD_TEMPLATE, + egr::EagerUtils::TensorStr(fwd_grad_grad_x_grad)); + output_str += output_fwd_grad_grad_x_grad_str; + const char* TENSOR_FWD_GRAD_GRAD_Y_GRAD_TEMPLATE = + " \n ( fwd_grad_grad_y_grad , [%s]), "; + std::string output_fwd_grad_grad_y_grad_str = paddle::string::Sprintf( + TENSOR_FWD_GRAD_GRAD_Y_GRAD_TEMPLATE, + egr::EagerUtils::TensorStr(fwd_grad_grad_y_grad)); + output_str += output_fwd_grad_grad_y_grad_str; + VLOG(4) << paddle::string::Sprintf( + INPUT_PRINT_TEMPLATE, input_str, output_str); + } + + // Return + if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns); + return returns; +} + +namespace sparse { +paddle::small_vector, egr::kSlotSmallVectorSize> +MultiplyGradNode::operator()( + paddle::small_vector, + egr::kSlotSmallVectorSize>& grads, + bool create_graph, + bool is_new_grad) { + VLOG(3) << "Running AD API GRAD: " + << "multiply_grad"; + // Fill Zero For GradIn Tensors + const auto& input_metas = this->InputMeta(); + egr::EagerUtils::FillZeroForEmptyGradInput(&grads[0][0], input_metas[0][0]); + + // Apply Gradient Hooks + auto hooked_grads = ApplyGradientHooks(grads); + + // Collect GradIn Tensors, Attrs and Recovered TensorWrappers + auto x = egr::EagerUtils::RecoverTensorWrapper(&this->x_); + auto y = egr::EagerUtils::RecoverTensorWrapper(&this->y_); + auto& out_grad = hooked_grads[0][0]; + // Prepare Grad function call + + const auto& out_metas = OutputMeta(); + paddle::small_vector, egr::kSlotSmallVectorSize> + returns(2); + for (int i = 0; i < 2; ++i) { + out_metas[i].size() == 0 ? returns[i].resize(1) + : returns[i].resize(out_metas[i].size()); + } + + auto* api_output_0 = + (out_metas[0].empty() || out_metas[0][0].IsStopGradient()) + ? nullptr + : &returns[0][0]; + auto* api_output_1 = + (out_metas[1].empty() || out_metas[1][0].IsStopGradient()) + ? nullptr + : &returns[1][0]; + // Runtime check if we need next grad + bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph; + + // Inplace Check + + // Inplace Strategy + + VLOG(5) << "Running C++ API: " + << "multiply_grad"; + // Before log info + + if (VLOG_IS_ON(3)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_OUT_GRAD_TEMPLATE = " \n( out_grad , [%s]), "; + std::string input_out_grad_str = paddle::string::Sprintf( + TENSOR_OUT_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(out_grad)); + input_str += input_out_grad_str; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str); + } + + // Call grad_api function + + paddle::experimental::sparse::multiply_grad( + x, y, out_grad, api_output_0, api_output_1); + // Check NaN and Inf id needed + + if (FLAGS_check_nan_inf) { + try { + egr::CheckTensorHasNanOrInf("multiply_grad", returns); + } catch (...) { + LOG(WARNING) << "There are nan/inf in (multiply_grad)"; + std::rethrow_exception(std::current_exception()); + } + } + + // Get GradOut autograd_meta + + auto& x_grad = returns[0][0]; + egr::AutogradMeta* x_grad_autograd_meta = + returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&x_grad) + : nullptr; + if (x_grad_autograd_meta) x_grad_autograd_meta->SetStopGradient(false); + + auto& y_grad = returns[1][0]; + egr::AutogradMeta* y_grad_autograd_meta = + returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&y_grad) + : nullptr; + if (y_grad_autograd_meta) y_grad_autograd_meta->SetStopGradient(false); + + // Create Grad Node + if (trace_backward) { + PADDLE_THROW(phi::errors::Unavailable( + "The Op multiply_grad doesn't have any grad" + "op. If you don't intend calculating higher order" + "derivatives, please set `create_graph`to False.")); + } + VLOG(4) << "Finish AD API GRAD: multiply_grad"; + // LOG IF DEBUG + + if (VLOG_IS_ON(4)) { + const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } "; + + std::string input_str = ""; + std::string output_str = ""; + const char* TENSOR_OUT_GRAD_TEMPLATE = " \n( out_grad , [%s]), "; + std::string input_out_grad_str = paddle::string::Sprintf( + TENSOR_OUT_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(out_grad)); + input_str += input_out_grad_str; + const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), "; + std::string input_x_str = paddle::string::Sprintf( + TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x)); + input_str += input_x_str; + const char* TENSOR_Y_TEMPLATE = " \n( y , [%s]), "; + std::string input_y_str = paddle::string::Sprintf( + TENSOR_Y_TEMPLATE, egr::EagerUtils::TensorStr(y)); + input_str += input_y_str; + const char* TENSOR_X_GRAD_TEMPLATE = " \n ( x_grad , [%s]), "; + std::string output_x_grad_str = paddle::string::Sprintf( + TENSOR_X_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(x_grad)); + output_str += output_x_grad_str; + const char* TENSOR_Y_GRAD_TEMPLATE = " \n ( y_grad , [%s]), "; + std::string output_y_grad_str = paddle::string::Sprintf( + TENSOR_Y_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(y_grad)); + output_str += output_y_grad_str; + VLOG(4) << paddle::string::Sprintf( + INPUT_PRINT_TEMPLATE, input_str, output_str); + } + + // Return + if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns); + return returns; +} + +} // namespace sparse diff --git a/paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h b/paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h index 0c827868fc68fbb0fe92489f949cc565c4620ceb..e5496800197e4fce8254eb7ab084c21dd22c1497 100644 --- a/paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h +++ b/paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h @@ -205,6 +205,177 @@ class AddNGradNodeFinal : public egr::GradNodeBase { // Attributes }; +class MultiplyGradNode : public egr::GradNodeBase { + public: + MultiplyGradNode() : egr::GradNodeBase() {} + MultiplyGradNode(size_t bwd_in_slot_num, size_t bwd_out_slot_num) + : egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {} + ~MultiplyGradNode() override = default; + + virtual paddle::small_vector, + egr::kSlotSmallVectorSize> + operator()(paddle::small_vector, // NOLINT + egr::kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, + bool is_new_grad = false) override; + std::string name() override { return "MultiplyGradNode"; } + + void ClearTensorWrappers() override { + x_.clear(); + y_.clear(); + + SetIsTensorWrappersCleared(true); + } + + std::shared_ptr Copy() const override { + auto copied_node = + std::shared_ptr(new MultiplyGradNode(*this)); + return copied_node; + } + + // SetTensorWrapperX, SetTensorWrapperY, ... + void SetTensorWrapperx(const paddle::Tensor& x) { + x_ = egr::TensorWrapper(x, false); + } + void SetTensorWrappery(const paddle::Tensor& y) { + y_ = egr::TensorWrapper(y, false); + } + + void SetTensorWrapperNoNeedBufferx(const paddle::Tensor& x) { + x_ = egr::TensorWrapper(x, true); + } + void SetTensorWrapperNoNeedBuffery(const paddle::Tensor& y) { + y_ = egr::TensorWrapper(y, true); + } + + // SetAttributes + void SetAttributeaxis(const int& axis) { axis_ = axis; } + + private: + // TensorWrappers + egr::TensorWrapper x_; + egr::TensorWrapper y_; + + // Attributes + int axis_ = -1; +}; + +class MultiplyDoubleGradNode : public egr::GradNodeBase { + public: + MultiplyDoubleGradNode() : egr::GradNodeBase() {} + MultiplyDoubleGradNode(size_t bwd_in_slot_num, size_t bwd_out_slot_num) + : egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {} + ~MultiplyDoubleGradNode() override = default; + + virtual paddle::small_vector, + egr::kSlotSmallVectorSize> + operator()(paddle::small_vector, // NOLINT + egr::kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, + bool is_new_grad = false) override; + std::string name() override { return "MultiplyDoubleGradNode"; } + + void ClearTensorWrappers() override { + x_.clear(); + y_.clear(); + grad_out_.clear(); + + SetIsTensorWrappersCleared(true); + } + + std::shared_ptr Copy() const override { + auto copied_node = std::shared_ptr( + new MultiplyDoubleGradNode(*this)); + return copied_node; + } + + // SetTensorWrapperX, SetTensorWrapperY, ... + void SetTensorWrapperx(const paddle::Tensor& x) { + x_ = egr::TensorWrapper(x, false); + } + void SetTensorWrappery(const paddle::Tensor& y) { + y_ = egr::TensorWrapper(y, false); + } + void SetTensorWrappergrad_out(const paddle::Tensor& grad_out) { + grad_out_ = egr::TensorWrapper(grad_out, false); + } + + // SetAttributes + void SetAttributeaxis(const int& axis) { axis_ = axis; } + + private: + // TensorWrappers + egr::TensorWrapper x_; + egr::TensorWrapper y_; + egr::TensorWrapper grad_out_; + + // Attributes + int axis_ = -1; +}; + +class MultiplyTripleGradNode : public egr::GradNodeBase { + public: + MultiplyTripleGradNode() : egr::GradNodeBase() {} + MultiplyTripleGradNode(size_t bwd_in_slot_num, size_t bwd_out_slot_num) + : egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {} + ~MultiplyTripleGradNode() override = default; + + virtual paddle::small_vector, + egr::kSlotSmallVectorSize> + operator()(paddle::small_vector, // NOLINT + egr::kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, + bool is_new_grad = false) override; + std::string name() override { return "MultiplyTripleGradNode"; } + + void ClearTensorWrappers() override { + x_.clear(); + y_.clear(); + fwd_grad_out_.clear(); + fwd_grad_grad_x_.clear(); + fwd_grad_grad_y_.clear(); + + SetIsTensorWrappersCleared(true); + } + + std::shared_ptr Copy() const override { + auto copied_node = std::shared_ptr( + new MultiplyTripleGradNode(*this)); + return copied_node; + } + + // SetTensorWrapperX, SetTensorWrapperY, ... + void SetTensorWrapperx(const paddle::Tensor& x) { + x_ = egr::TensorWrapper(x, false); + } + void SetTensorWrappery(const paddle::Tensor& y) { + y_ = egr::TensorWrapper(y, false); + } + void SetTensorWrapperfwd_grad_out(const paddle::Tensor& fwd_grad_out) { + fwd_grad_out_ = egr::TensorWrapper(fwd_grad_out, false); + } + void SetTensorWrapperfwd_grad_grad_x(const paddle::Tensor& fwd_grad_grad_x) { + fwd_grad_grad_x_ = egr::TensorWrapper(fwd_grad_grad_x, false); + } + void SetTensorWrapperfwd_grad_grad_y(const paddle::Tensor& fwd_grad_grad_y) { + fwd_grad_grad_y_ = egr::TensorWrapper(fwd_grad_grad_y, false); + } + + // SetAttributes + void SetAttributeaxis(const int& axis) { axis_ = axis; } + + private: + // TensorWrappers + egr::TensorWrapper x_; + egr::TensorWrapper y_; + egr::TensorWrapper fwd_grad_out_; + egr::TensorWrapper fwd_grad_grad_x_; + egr::TensorWrapper fwd_grad_grad_y_; + + // Attributes + int axis_ = -1; +}; + class SyncBatchNormGradNode : public egr::GradNodeBase { public: SyncBatchNormGradNode() : egr::GradNodeBase() {} @@ -374,4 +545,50 @@ class SyncBatchNormGradNode : public egr::GradNodeBase { bool trainable_statistics_; }; +class MultiplyGradNode : public egr::GradNodeBase { + public: + MultiplyGradNode() : egr::GradNodeBase() {} + MultiplyGradNode(size_t bwd_in_slot_num, size_t bwd_out_slot_num) + : egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {} + ~MultiplyGradNode() override = default; + + virtual paddle::small_vector, + egr::kSlotSmallVectorSize> + operator()(paddle::small_vector, // NOLINT + egr::kSlotSmallVectorSize>& grads, // NOLINT + bool create_graph = false, + bool is_new_grad = false) override; + std::string name() override { return "MultiplyGradNode"; } + + void ClearTensorWrappers() override { + x_.clear(); + y_.clear(); + + SetIsTensorWrappersCleared(true); + } + + std::shared_ptr Copy() const override { + auto copied_node = + std::shared_ptr(new MultiplyGradNode(*this)); + return copied_node; + } + + // SetTensorWrapperX, SetTensorWrapperY, ... + void SetTensorWrapperx(const paddle::Tensor& x) { + x_ = egr::TensorWrapper(x, false); + } + void SetTensorWrappery(const paddle::Tensor& y) { + y_ = egr::TensorWrapper(y, false); + } + + // SetAttributes + + private: + // TensorWrappers + egr::TensorWrapper x_; + egr::TensorWrapper y_; + + // Attributes +}; + } // namespace sparse diff --git a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py index 4e105d138b7e8e8a8e60d2382136fca4544cdff2..81ef56d04d0953af30f8392fb63f30efff778ae4 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/eager_gen.py @@ -58,6 +58,8 @@ black_ops_list = [ "add_n", "add_n_grad", "sync_batch_norm_", + "multiply", + "multiply_grad", ] diff --git a/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py b/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py index 16cd6f0ffd7fcaf5ca1ea42db4c34fef06f8e557..bb6744111a5fe5fcef43b54099a5a25b2882d400 100644 --- a/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py +++ b/paddle/fluid/eager/auto_code_generator/generator/python_c_gen.py @@ -166,6 +166,7 @@ PYTHON_C_WRAPPER_TEMPLATE = """ #include "paddle/fluid/platform/profiler/event_tracing.h" #include "paddle/fluid/pybind/op_function_common.h" #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" +#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h" #include "paddle/fluid/pybind/eager_custom_python_api.h" #include "paddle/fluid/pybind/eager.h" #include "paddle/fluid/eager/amp_utils.h" diff --git a/paddle/fluid/prim/api/auto_code_generated/eager_gen.py b/paddle/fluid/prim/api/auto_code_generated/eager_gen.py index ec8419169f9b5a47e813d58d623615538545630a..106fa1ce586489b8fd7237ff87ae5651d64d72bd 100644 --- a/paddle/fluid/prim/api/auto_code_generated/eager_gen.py +++ b/paddle/fluid/prim/api/auto_code_generated/eager_gen.py @@ -351,6 +351,7 @@ def eager_source_include(): return """ #include "paddle/fluid/eager/api/all.h" #include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h" +#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h" #include "paddle/fluid/prim/api/generated_prim/prim_generated_api.h" """