“aed8803a405bc4f88e9a65e3c4ab6bb63afd741b”上不存在“...tools/git@gitcode.net:s920243400/PaddleDetection.git”
未验证 提交 336160cf 编写于 作者: W wanghuancoder 提交者: GitHub

force sync batch norm grad sequential (#52268)

* force sync batch norm grad sequential
上级 551ff882
...@@ -26,3 +26,40 @@ paddle::Tensor conv2d_ad_func(const paddle::Tensor& input, ...@@ -26,3 +26,40 @@ paddle::Tensor conv2d_ad_func(const paddle::Tensor& input,
std::vector<int> dilations, std::vector<int> dilations,
int groups, int groups,
std::string data_format); std::string data_format);
std::tuple<paddle::Tensor,
paddle::Tensor&,
paddle::Tensor&,
paddle::Tensor,
paddle::Tensor,
paddle::Tensor>
sync_batch_norm__ad_func(const paddle::Tensor& x,
paddle::Tensor& mean, // NOLINT
paddle::Tensor& variance, // NOLINT
const paddle::Tensor& scale,
const paddle::Tensor& bias,
bool is_test,
float momentum,
float epsilon,
std::string data_layout,
bool use_global_stats,
bool trainable_statistics);
namespace sparse {
std::tuple<paddle::Tensor,
paddle::Tensor&,
paddle::Tensor&,
paddle::Tensor,
paddle::Tensor,
paddle::Tensor>
sync_batch_norm__ad_func(const paddle::Tensor& x,
paddle::Tensor& mean, // NOLINT
paddle::Tensor& variance, // NOLINT
const paddle::Tensor& scale,
const paddle::Tensor& bias,
bool is_test,
float momentum,
float epsilon,
std::string data_layout,
bool use_global_stats,
bool trainable_statistics);
} // namespace sparse
set(eager_manual_functions set(eager_manual_functions
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/add_n_fwd_func.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/conv2d_fwd_function.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/forwards/sync_batch_norm_fwd_func.cc
PARENT_SCOPE) PARENT_SCOPE)
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/eager/amp_utils.h"
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/eager_amp_auto_cast.h"
#include "paddle/fluid/eager/eager_layout_auto_tune.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/phi/api/include/sparse_api.h"
#pragma GCC diagnostic ignored "-Wunused-variable"
DECLARE_bool(check_nan_inf);
DECLARE_string(tensor_operants_mode);
std::tuple<paddle::Tensor,
paddle::Tensor&,
paddle::Tensor&,
paddle::Tensor,
paddle::Tensor,
paddle::Tensor>
sync_batch_norm__ad_func(const paddle::Tensor& x,
paddle::Tensor& mean, // NOLINT
paddle::Tensor& variance, // NOLINT
const paddle::Tensor& scale,
const paddle::Tensor& bias,
bool is_test,
float momentum,
float epsilon,
std::string data_layout,
bool use_global_stats,
bool trainable_statistics) {
FLAGS_tensor_operants_mode = "eager";
VLOG(3) << "Running AD API: "
<< "sync_batch_norm_";
// Dygraph Record Event
paddle::platform::RecordEvent dygraph_entrance_record_event(
"sync_batch_norm_ dygraph",
paddle::platform::TracerEventType::Operator,
1);
// AMP Logic
VLOG(5) << " No AMP for sync_batch_norm__ad_func because it is a inplace or "
"cast api. ";
// Layout autotune
if (egr::Controller::Instance().UseLayoutAutoTune()) {
paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
tensors_vector = {{x}, {mean}, {variance}, {scale}, {bias}};
auto op_name = phi::TransToFluidOpName("sync_batch_norm_");
auto transformer = egr::EagerLayoutAutotune<std::string>(
op_name, tensors_vector, &data_layout);
auto new_x = transformer->TransInTensor("x", x);
auto new_mean = transformer->TransInTensor("mean", mean);
auto new_variance = transformer->TransInTensor("variance", variance);
auto new_scale = transformer->TransInTensor("scale", scale);
auto new_bias = transformer->TransInTensor("bias", bias);
VLOG(5) << "Check and Prepare For LAYOUT " << op_name;
paddle::imperative::LayoutAutotuneGuard guard(
egr::Controller::Instance().GetCurrentTracer(), false);
std::tuple<paddle::Tensor,
paddle::Tensor&,
paddle::Tensor&,
paddle::Tensor,
paddle::Tensor,
paddle::Tensor>
api_result = sync_batch_norm__ad_func(new_x,
new_mean,
new_variance,
new_scale,
new_bias,
is_test,
momentum,
epsilon,
data_layout,
use_global_stats,
trainable_statistics);
auto& out = std::get<0>(api_result);
transformer->SetOutTensorLayout(&out);
auto& mean_out = std::get<1>(api_result);
transformer->SetOutTensorLayout(&mean_out);
auto& variance_out = std::get<2>(api_result);
transformer->SetOutTensorLayout(&variance_out);
auto& saved_mean = std::get<3>(api_result);
transformer->SetOutTensorLayout(&saved_mean);
auto& saved_variance = std::get<4>(api_result);
transformer->SetOutTensorLayout(&saved_variance);
auto& reserve_space = std::get<5>(api_result);
transformer->SetOutTensorLayout(&reserve_space);
// Returns
return std::tuple<paddle::Tensor,
paddle::Tensor&,
paddle::Tensor&,
paddle::Tensor,
paddle::Tensor,
paddle::Tensor>{
out, mean_out, variance_out, saved_mean, saved_variance, reserve_space};
}
// Get Input AutoGradMeta
egr::AutogradMeta* x_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(x);
egr::AutogradMeta* mean_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(mean);
egr::AutogradMeta* variance_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(variance);
egr::AutogradMeta* scale_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(scale);
egr::AutogradMeta* bias_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(bias);
VLOG(5) << "Running C++ API: "
<< "sync_batch_norm_";
// Before log info
if (VLOG_IS_ON(3)) {
const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} ";
std::string input_str = "";
std::string output_str = "";
const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), ";
std::string input_x_str = paddle::string::Sprintf(
TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x));
input_str += input_x_str;
const char* TENSOR_MEAN_TEMPLATE = " \n( mean , [%s]), ";
std::string input_mean_str = paddle::string::Sprintf(
TENSOR_MEAN_TEMPLATE, egr::EagerUtils::TensorStr(mean));
input_str += input_mean_str;
const char* TENSOR_VARIANCE_TEMPLATE = " \n( variance , [%s]), ";
std::string input_variance_str = paddle::string::Sprintf(
TENSOR_VARIANCE_TEMPLATE, egr::EagerUtils::TensorStr(variance));
input_str += input_variance_str;
const char* TENSOR_SCALE_TEMPLATE = " \n( scale , [%s]), ";
std::string input_scale_str = paddle::string::Sprintf(
TENSOR_SCALE_TEMPLATE, egr::EagerUtils::TensorStr(scale));
input_str += input_scale_str;
const char* TENSOR_BIAS_TEMPLATE = " \n( bias , [%s]), ";
std::string input_bias_str = paddle::string::Sprintf(
TENSOR_BIAS_TEMPLATE, egr::EagerUtils::TensorStr(bias));
input_str += input_bias_str;
VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str);
}
// Forward API Call
auto api_result =
paddle::experimental::sync_batch_norm_(x,
mean,
variance,
scale,
bias,
is_test,
momentum,
epsilon,
data_layout,
use_global_stats,
trainable_statistics);
// Check NaN and Inf if needed
if (FLAGS_check_nan_inf) {
egr::CheckTensorHasNanOrInf("sync_batch_norm_", api_result);
}
// Get Outputs
auto& out = std::get<0>(api_result);
auto& mean_out = std::get<1>(api_result);
auto& variance_out = std::get<2>(api_result);
auto& saved_mean = std::get<3>(api_result);
auto& saved_variance = std::get<4>(api_result);
auto& reserve_space = std::get<5>(api_result);
// Get Output AutoGradMeta
egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out);
egr::AutogradMeta* mean_out_autograd_meta =
egr::EagerUtils::autograd_meta(&mean_out);
egr::AutogradMeta* variance_out_autograd_meta =
egr::EagerUtils::autograd_meta(&variance_out);
egr::AutogradMeta* saved_mean_autograd_meta =
egr::EagerUtils::autograd_meta(&saved_mean);
egr::AutogradMeta* saved_variance_autograd_meta =
egr::EagerUtils::autograd_meta(&saved_variance);
egr::AutogradMeta* reserve_space_autograd_meta =
egr::EagerUtils::autograd_meta(&reserve_space);
bool trace_backward = egr::Controller::Instance().HasGrad();
bool require_any_grad =
egr::EagerUtils::ComputeRequireGrad(trace_backward,
x_autograd_meta,
mean_autograd_meta,
variance_autograd_meta,
scale_autograd_meta,
bias_autograd_meta);
// Check Inplace if needed
// Node Creation
if (require_any_grad) {
paddle::platform::RecordEvent node_creation_record_event(
"sync_batch_norm_ node_creation",
paddle::platform::TracerEventType::OperatorInner,
1);
egr::EagerUtils::PassStopGradient(false,
out_autograd_meta,
mean_out_autograd_meta,
variance_out_autograd_meta,
saved_mean_autograd_meta,
saved_variance_autograd_meta,
reserve_space_autograd_meta);
// Node Construction
auto grad_node =
std::shared_ptr<SyncBatchNormGradNode>(new SyncBatchNormGradNode(6, 5));
egr::Controller::Instance().PushBackForceSequentialNodes(grad_node.get());
// SetAttributes if needed
grad_node->SetAttributemomentum(momentum);
grad_node->SetAttributeepsilon(epsilon);
grad_node->SetAttributedata_layout(data_layout);
grad_node->SetAttributeis_test(is_test);
grad_node->SetAttributeuse_global_stats(use_global_stats);
grad_node->SetAttributetrainable_statistics(trainable_statistics);
// Set TensorWrappers for Forward Inputs if needed
grad_node->SetTensorWrapperx(x);
grad_node->SetTensorWrapperscale(scale);
grad_node->SetTensorWrapperbias(bias);
// SetGradOutMeta & SetEdges
grad_node->SetGradOutMeta(x, 0);
grad_node->SetGradOutMeta(scale, 3);
grad_node->SetGradOutMeta(bias, 4);
// SetOutRank & SetHistory & SetGradInMeta
if (out_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0);
}
if (mean_out_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(mean_out_autograd_meta, 1);
}
if (variance_out_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(variance_out_autograd_meta, 2);
}
if (saved_mean_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(saved_mean_autograd_meta, 3);
}
if (saved_variance_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(saved_variance_autograd_meta, 4);
}
if (reserve_space_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(reserve_space_autograd_meta, 5);
}
if (out_autograd_meta) {
egr::EagerUtils::SetHistory(out_autograd_meta, grad_node);
}
if (mean_out_autograd_meta) {
egr::EagerUtils::SetHistory(mean_out_autograd_meta, grad_node);
}
if (variance_out_autograd_meta) {
egr::EagerUtils::SetHistory(variance_out_autograd_meta, grad_node);
}
if (saved_mean_autograd_meta) {
egr::EagerUtils::SetHistory(saved_mean_autograd_meta, grad_node);
}
if (saved_variance_autograd_meta) {
egr::EagerUtils::SetHistory(saved_variance_autograd_meta, grad_node);
}
if (reserve_space_autograd_meta) {
egr::EagerUtils::SetHistory(reserve_space_autograd_meta, grad_node);
}
grad_node->SetGradInMeta(out, 0);
grad_node->SetGradInMeta(mean_out, 1);
grad_node->SetGradInMeta(variance_out, 2);
grad_node->SetGradInMeta(saved_mean, 3);
grad_node->SetGradInMeta(saved_variance, 4);
grad_node->SetGradInMeta(reserve_space, 5);
// Set TensorWrappers for Forward Outputs if needed
grad_node->SetTensorWrappersaved_mean(saved_mean);
grad_node->SetTensorWrappersaved_variance(saved_variance);
grad_node->SetTensorWrapperreserve_space(reserve_space);
}
VLOG(4) << "Finish AD API: sync_batch_norm_";
// LOG IF DEBUG
if (VLOG_IS_ON(4)) {
const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } ";
std::string input_str = "";
std::string output_str = "";
const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), ";
std::string input_x_str = paddle::string::Sprintf(
TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x));
input_str += input_x_str;
const char* TENSOR_MEAN_TEMPLATE = " \n( mean , [%s]), ";
std::string input_mean_str = paddle::string::Sprintf(
TENSOR_MEAN_TEMPLATE, egr::EagerUtils::TensorStr(mean));
input_str += input_mean_str;
const char* TENSOR_VARIANCE_TEMPLATE = " \n( variance , [%s]), ";
std::string input_variance_str = paddle::string::Sprintf(
TENSOR_VARIANCE_TEMPLATE, egr::EagerUtils::TensorStr(variance));
input_str += input_variance_str;
const char* TENSOR_SCALE_TEMPLATE = " \n( scale , [%s]), ";
std::string input_scale_str = paddle::string::Sprintf(
TENSOR_SCALE_TEMPLATE, egr::EagerUtils::TensorStr(scale));
input_str += input_scale_str;
const char* TENSOR_BIAS_TEMPLATE = " \n( bias , [%s]), ";
std::string input_bias_str = paddle::string::Sprintf(
TENSOR_BIAS_TEMPLATE, egr::EagerUtils::TensorStr(bias));
input_str += input_bias_str;
const char* TENSOR_OUT_TEMPLATE = " \n( out , [%s]), ";
std::string output_out_str = paddle::string::Sprintf(
TENSOR_OUT_TEMPLATE, egr::EagerUtils::TensorStr(out));
output_str += output_out_str;
const char* TENSOR_MEAN_OUT_TEMPLATE = " \n( mean_out , [%s]), ";
std::string output_mean_out_str = paddle::string::Sprintf(
TENSOR_MEAN_OUT_TEMPLATE, egr::EagerUtils::TensorStr(mean_out));
output_str += output_mean_out_str;
const char* TENSOR_VARIANCE_OUT_TEMPLATE = " \n( variance_out , [%s]), ";
std::string output_variance_out_str = paddle::string::Sprintf(
TENSOR_VARIANCE_OUT_TEMPLATE, egr::EagerUtils::TensorStr(variance_out));
output_str += output_variance_out_str;
const char* TENSOR_SAVED_MEAN_TEMPLATE = " \n( saved_mean , [%s]), ";
std::string output_saved_mean_str = paddle::string::Sprintf(
TENSOR_SAVED_MEAN_TEMPLATE, egr::EagerUtils::TensorStr(saved_mean));
output_str += output_saved_mean_str;
const char* TENSOR_SAVED_VARIANCE_TEMPLATE =
" \n( saved_variance , [%s]), ";
std::string output_saved_variance_str =
paddle::string::Sprintf(TENSOR_SAVED_VARIANCE_TEMPLATE,
egr::EagerUtils::TensorStr(saved_variance));
output_str += output_saved_variance_str;
const char* TENSOR_RESERVE_SPACE_TEMPLATE = " \n( reserve_space , [%s]), ";
std::string output_reserve_space_str =
paddle::string::Sprintf(TENSOR_RESERVE_SPACE_TEMPLATE,
egr::EagerUtils::TensorStr(reserve_space));
output_str += output_reserve_space_str;
VLOG(4) << paddle::string::Sprintf(
INPUT_PRINT_TEMPLATE, input_str, output_str);
}
// Returns
return std::tuple<paddle::Tensor,
paddle::Tensor&,
paddle::Tensor&,
paddle::Tensor,
paddle::Tensor,
paddle::Tensor>{
out, mean_out, variance_out, saved_mean, saved_variance, reserve_space};
}
namespace sparse {
std::tuple<paddle::Tensor,
paddle::Tensor&,
paddle::Tensor&,
paddle::Tensor,
paddle::Tensor,
paddle::Tensor>
sync_batch_norm__ad_func(const paddle::Tensor& x,
paddle::Tensor& mean, // NOLINT
paddle::Tensor& variance, // NOLINT
const paddle::Tensor& scale,
const paddle::Tensor& bias,
bool is_test,
float momentum,
float epsilon,
std::string data_layout,
bool use_global_stats,
bool trainable_statistics) {
FLAGS_tensor_operants_mode = "eager";
VLOG(3) << "Running AD API: "
<< "sync_batch_norm_";
// Dygraph Record Event
paddle::platform::RecordEvent dygraph_entrance_record_event(
"sync_batch_norm_ dygraph",
paddle::platform::TracerEventType::Operator,
1);
// AMP Logic
VLOG(5) << " No AMP for sync_batch_norm__ad_func because it is a inplace or "
"cast api. ";
// Layout autotune
if (egr::Controller::Instance().UseLayoutAutoTune()) {
paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
tensors_vector = {{x}, {mean}, {variance}, {scale}, {bias}};
auto op_name = phi::TransToFluidOpName("sync_batch_norm_");
auto transformer = egr::EagerLayoutAutotune<std::string>(
op_name, tensors_vector, &data_layout);
auto new_x = transformer->TransInTensor("x", x);
auto new_mean = transformer->TransInTensor("mean", mean);
auto new_variance = transformer->TransInTensor("variance", variance);
auto new_scale = transformer->TransInTensor("scale", scale);
auto new_bias = transformer->TransInTensor("bias", bias);
VLOG(5) << "Check and Prepare For LAYOUT " << op_name;
paddle::imperative::LayoutAutotuneGuard guard(
egr::Controller::Instance().GetCurrentTracer(), false);
std::tuple<paddle::Tensor,
paddle::Tensor&,
paddle::Tensor&,
paddle::Tensor,
paddle::Tensor,
paddle::Tensor>
api_result = sync_batch_norm__ad_func(new_x,
new_mean,
new_variance,
new_scale,
new_bias,
is_test,
momentum,
epsilon,
data_layout,
use_global_stats,
trainable_statistics);
auto& out = std::get<0>(api_result);
transformer->SetOutTensorLayout(&out);
auto& mean_out = std::get<1>(api_result);
transformer->SetOutTensorLayout(&mean_out);
auto& variance_out = std::get<2>(api_result);
transformer->SetOutTensorLayout(&variance_out);
auto& saved_mean = std::get<3>(api_result);
transformer->SetOutTensorLayout(&saved_mean);
auto& saved_variance = std::get<4>(api_result);
transformer->SetOutTensorLayout(&saved_variance);
auto& reserve_space = std::get<5>(api_result);
transformer->SetOutTensorLayout(&reserve_space);
// Returns
return std::tuple<paddle::Tensor,
paddle::Tensor&,
paddle::Tensor&,
paddle::Tensor,
paddle::Tensor,
paddle::Tensor>{
out, mean_out, variance_out, saved_mean, saved_variance, reserve_space};
}
// Get Input AutoGradMeta
egr::AutogradMeta* x_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(x);
egr::AutogradMeta* mean_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(mean);
egr::AutogradMeta* variance_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(variance);
egr::AutogradMeta* scale_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(scale);
egr::AutogradMeta* bias_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(bias);
VLOG(5) << "Running C++ API: "
<< "sync_batch_norm_";
// Before log info
if (VLOG_IS_ON(3)) {
const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} ";
std::string input_str = "";
std::string output_str = "";
const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), ";
std::string input_x_str = paddle::string::Sprintf(
TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x));
input_str += input_x_str;
const char* TENSOR_MEAN_TEMPLATE = " \n( mean , [%s]), ";
std::string input_mean_str = paddle::string::Sprintf(
TENSOR_MEAN_TEMPLATE, egr::EagerUtils::TensorStr(mean));
input_str += input_mean_str;
const char* TENSOR_VARIANCE_TEMPLATE = " \n( variance , [%s]), ";
std::string input_variance_str = paddle::string::Sprintf(
TENSOR_VARIANCE_TEMPLATE, egr::EagerUtils::TensorStr(variance));
input_str += input_variance_str;
const char* TENSOR_SCALE_TEMPLATE = " \n( scale , [%s]), ";
std::string input_scale_str = paddle::string::Sprintf(
TENSOR_SCALE_TEMPLATE, egr::EagerUtils::TensorStr(scale));
input_str += input_scale_str;
const char* TENSOR_BIAS_TEMPLATE = " \n( bias , [%s]), ";
std::string input_bias_str = paddle::string::Sprintf(
TENSOR_BIAS_TEMPLATE, egr::EagerUtils::TensorStr(bias));
input_str += input_bias_str;
VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str);
}
// Forward API Call
auto api_result =
paddle::experimental::sparse::sync_batch_norm_(x,
mean,
variance,
scale,
bias,
is_test,
momentum,
epsilon,
data_layout,
use_global_stats,
trainable_statistics);
// Check NaN and Inf if needed
if (FLAGS_check_nan_inf) {
egr::CheckTensorHasNanOrInf("sync_batch_norm_", api_result);
}
// Get Outputs
auto& out = std::get<0>(api_result);
auto& mean_out = std::get<1>(api_result);
auto& variance_out = std::get<2>(api_result);
auto& saved_mean = std::get<3>(api_result);
auto& saved_variance = std::get<4>(api_result);
auto& reserve_space = std::get<5>(api_result);
// Get Output AutoGradMeta
egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out);
egr::AutogradMeta* mean_out_autograd_meta =
egr::EagerUtils::autograd_meta(&mean_out);
egr::AutogradMeta* variance_out_autograd_meta =
egr::EagerUtils::autograd_meta(&variance_out);
egr::AutogradMeta* saved_mean_autograd_meta =
egr::EagerUtils::autograd_meta(&saved_mean);
egr::AutogradMeta* saved_variance_autograd_meta =
egr::EagerUtils::autograd_meta(&saved_variance);
egr::AutogradMeta* reserve_space_autograd_meta =
egr::EagerUtils::autograd_meta(&reserve_space);
bool trace_backward = egr::Controller::Instance().HasGrad();
bool require_any_grad =
egr::EagerUtils::ComputeRequireGrad(trace_backward,
x_autograd_meta,
mean_autograd_meta,
variance_autograd_meta,
scale_autograd_meta,
bias_autograd_meta);
// Check Inplace if needed
// Node Creation
if (require_any_grad) {
paddle::platform::RecordEvent node_creation_record_event(
"sync_batch_norm_ node_creation",
paddle::platform::TracerEventType::OperatorInner,
1);
egr::EagerUtils::PassStopGradient(false,
out_autograd_meta,
mean_out_autograd_meta,
variance_out_autograd_meta,
saved_mean_autograd_meta,
saved_variance_autograd_meta,
reserve_space_autograd_meta);
// Node Construction
auto grad_node =
std::shared_ptr<SyncBatchNormGradNode>(new SyncBatchNormGradNode(6, 5));
egr::Controller::Instance().PushBackForceSequentialNodes(grad_node.get());
// SetAttributes if needed
grad_node->SetAttributemomentum(momentum);
grad_node->SetAttributeepsilon(epsilon);
grad_node->SetAttributedata_layout(data_layout);
grad_node->SetAttributeis_test(is_test);
grad_node->SetAttributeuse_global_stats(use_global_stats);
grad_node->SetAttributetrainable_statistics(trainable_statistics);
// Set TensorWrappers for Forward Inputs if needed
grad_node->SetTensorWrapperx(x);
grad_node->SetTensorWrapperscale(scale);
grad_node->SetTensorWrapperbias(bias);
// SetGradOutMeta & SetEdges
grad_node->SetGradOutMeta(x, 0);
grad_node->SetGradOutMeta(scale, 3);
grad_node->SetGradOutMeta(bias, 4);
// SetOutRank & SetHistory & SetGradInMeta
if (out_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0);
}
if (mean_out_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(mean_out_autograd_meta, 1);
}
if (variance_out_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(variance_out_autograd_meta, 2);
}
if (saved_mean_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(saved_mean_autograd_meta, 3);
}
if (saved_variance_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(saved_variance_autograd_meta, 4);
}
if (reserve_space_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(reserve_space_autograd_meta, 5);
}
if (out_autograd_meta) {
egr::EagerUtils::SetHistory(out_autograd_meta, grad_node);
}
if (mean_out_autograd_meta) {
egr::EagerUtils::SetHistory(mean_out_autograd_meta, grad_node);
}
if (variance_out_autograd_meta) {
egr::EagerUtils::SetHistory(variance_out_autograd_meta, grad_node);
}
if (saved_mean_autograd_meta) {
egr::EagerUtils::SetHistory(saved_mean_autograd_meta, grad_node);
}
if (saved_variance_autograd_meta) {
egr::EagerUtils::SetHistory(saved_variance_autograd_meta, grad_node);
}
if (reserve_space_autograd_meta) {
egr::EagerUtils::SetHistory(reserve_space_autograd_meta, grad_node);
}
grad_node->SetGradInMeta(out, 0);
grad_node->SetGradInMeta(mean_out, 1);
grad_node->SetGradInMeta(variance_out, 2);
grad_node->SetGradInMeta(saved_mean, 3);
grad_node->SetGradInMeta(saved_variance, 4);
grad_node->SetGradInMeta(reserve_space, 5);
// Set TensorWrappers for Forward Outputs if needed
grad_node->SetTensorWrappersaved_mean(saved_mean);
grad_node->SetTensorWrappersaved_variance(saved_variance);
grad_node->SetTensorWrapperreserve_space(reserve_space);
}
VLOG(4) << "Finish AD API: sync_batch_norm_";
// LOG IF DEBUG
if (VLOG_IS_ON(4)) {
const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } ";
std::string input_str = "";
std::string output_str = "";
const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), ";
std::string input_x_str = paddle::string::Sprintf(
TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x));
input_str += input_x_str;
const char* TENSOR_MEAN_TEMPLATE = " \n( mean , [%s]), ";
std::string input_mean_str = paddle::string::Sprintf(
TENSOR_MEAN_TEMPLATE, egr::EagerUtils::TensorStr(mean));
input_str += input_mean_str;
const char* TENSOR_VARIANCE_TEMPLATE = " \n( variance , [%s]), ";
std::string input_variance_str = paddle::string::Sprintf(
TENSOR_VARIANCE_TEMPLATE, egr::EagerUtils::TensorStr(variance));
input_str += input_variance_str;
const char* TENSOR_SCALE_TEMPLATE = " \n( scale , [%s]), ";
std::string input_scale_str = paddle::string::Sprintf(
TENSOR_SCALE_TEMPLATE, egr::EagerUtils::TensorStr(scale));
input_str += input_scale_str;
const char* TENSOR_BIAS_TEMPLATE = " \n( bias , [%s]), ";
std::string input_bias_str = paddle::string::Sprintf(
TENSOR_BIAS_TEMPLATE, egr::EagerUtils::TensorStr(bias));
input_str += input_bias_str;
const char* TENSOR_OUT_TEMPLATE = " \n( out , [%s]), ";
std::string output_out_str = paddle::string::Sprintf(
TENSOR_OUT_TEMPLATE, egr::EagerUtils::TensorStr(out));
output_str += output_out_str;
const char* TENSOR_MEAN_OUT_TEMPLATE = " \n( mean_out , [%s]), ";
std::string output_mean_out_str = paddle::string::Sprintf(
TENSOR_MEAN_OUT_TEMPLATE, egr::EagerUtils::TensorStr(mean_out));
output_str += output_mean_out_str;
const char* TENSOR_VARIANCE_OUT_TEMPLATE = " \n( variance_out , [%s]), ";
std::string output_variance_out_str = paddle::string::Sprintf(
TENSOR_VARIANCE_OUT_TEMPLATE, egr::EagerUtils::TensorStr(variance_out));
output_str += output_variance_out_str;
const char* TENSOR_SAVED_MEAN_TEMPLATE = " \n( saved_mean , [%s]), ";
std::string output_saved_mean_str = paddle::string::Sprintf(
TENSOR_SAVED_MEAN_TEMPLATE, egr::EagerUtils::TensorStr(saved_mean));
output_str += output_saved_mean_str;
const char* TENSOR_SAVED_VARIANCE_TEMPLATE =
" \n( saved_variance , [%s]), ";
std::string output_saved_variance_str =
paddle::string::Sprintf(TENSOR_SAVED_VARIANCE_TEMPLATE,
egr::EagerUtils::TensorStr(saved_variance));
output_str += output_saved_variance_str;
const char* TENSOR_RESERVE_SPACE_TEMPLATE = " \n( reserve_space , [%s]), ";
std::string output_reserve_space_str =
paddle::string::Sprintf(TENSOR_RESERVE_SPACE_TEMPLATE,
egr::EagerUtils::TensorStr(reserve_space));
output_str += output_reserve_space_str;
VLOG(4) << paddle::string::Sprintf(
INPUT_PRINT_TEMPLATE, input_str, output_str);
}
// Returns
return std::tuple<paddle::Tensor,
paddle::Tensor&,
paddle::Tensor&,
paddle::Tensor,
paddle::Tensor,
paddle::Tensor>{
out, mean_out, variance_out, saved_mean, saved_variance, reserve_space};
}
} // namespace sparse
set(eager_manual_nodes set(eager_manual_nodes
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/conv2d_nodes.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/add_n_node.cc ${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/add_n_node.cc
${PADDLE_SOURCE_DIR}/paddle/fluid/eager/api/manual/eager_manual/nodes/sync_batch_norm_node.cc
PARENT_SCOPE) PARENT_SCOPE)
...@@ -204,3 +204,174 @@ class AddNGradNodeFinal : public egr::GradNodeBase { ...@@ -204,3 +204,174 @@ class AddNGradNodeFinal : public egr::GradNodeBase {
// Attributes // Attributes
}; };
class SyncBatchNormGradNode : public egr::GradNodeBase {
public:
SyncBatchNormGradNode() : egr::GradNodeBase() {}
SyncBatchNormGradNode(size_t bwd_in_slot_num, size_t bwd_out_slot_num)
: egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}
~SyncBatchNormGradNode() override = default;
virtual paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;
std::string name() override { return "SyncBatchNormGradNode"; }
void ClearTensorWrappers() override {
x_.clear();
scale_.clear();
bias_.clear();
saved_mean_.clear();
saved_variance_.clear();
reserve_space_.clear();
SetIsTensorWrappersCleared(true);
}
std::shared_ptr<GradNodeBase> Copy() const override {
auto copied_node = std::shared_ptr<SyncBatchNormGradNode>(
new SyncBatchNormGradNode(*this));
return copied_node;
}
// SetTensorWrapperX, SetTensorWrapperY, ...
void SetTensorWrapperx(const paddle::Tensor& x) {
x_ = egr::TensorWrapper(x, false);
}
void SetTensorWrapperscale(const paddle::Tensor& scale) {
scale_ = egr::TensorWrapper(scale, false);
}
void SetTensorWrapperbias(const paddle::Tensor& bias) {
bias_ = egr::TensorWrapper(bias, false);
}
void SetTensorWrappersaved_mean(const paddle::Tensor& saved_mean) {
saved_mean_ = egr::TensorWrapper(saved_mean, false);
}
void SetTensorWrappersaved_variance(const paddle::Tensor& saved_variance) {
saved_variance_ = egr::TensorWrapper(saved_variance, false);
}
void SetTensorWrapperreserve_space(const paddle::Tensor& reserve_space) {
reserve_space_ = egr::TensorWrapper(reserve_space, false);
}
// SetAttributes
void SetAttributemomentum(const float& momentum) { momentum_ = momentum; }
void SetAttributeepsilon(const float& epsilon) { epsilon_ = epsilon; }
void SetAttributedata_layout(const std::string& data_layout) {
data_layout_ = data_layout;
}
void SetAttributeis_test(const bool& is_test) { is_test_ = is_test; }
void SetAttributeuse_global_stats(const bool& use_global_stats) {
use_global_stats_ = use_global_stats;
}
void SetAttributetrainable_statistics(const bool& trainable_statistics) {
trainable_statistics_ = trainable_statistics;
}
private:
// TensorWrappers
egr::TensorWrapper x_;
egr::TensorWrapper scale_;
egr::TensorWrapper bias_;
egr::TensorWrapper saved_mean_;
egr::TensorWrapper saved_variance_;
egr::TensorWrapper reserve_space_;
// Attributes
float momentum_;
float epsilon_;
std::string data_layout_;
bool is_test_;
bool use_global_stats_;
bool trainable_statistics_;
};
namespace sparse {
class SyncBatchNormGradNode : public egr::GradNodeBase {
public:
SyncBatchNormGradNode() : egr::GradNodeBase() {}
SyncBatchNormGradNode(size_t bwd_in_slot_num, size_t bwd_out_slot_num)
: egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}
~SyncBatchNormGradNode() override = default;
virtual paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;
std::string name() override { return "SyncBatchNormGradNode"; }
void ClearTensorWrappers() override {
x_.clear();
scale_.clear();
bias_.clear();
saved_mean_.clear();
saved_variance_.clear();
reserve_space_.clear();
SetIsTensorWrappersCleared(true);
}
std::shared_ptr<GradNodeBase> Copy() const override {
auto copied_node = std::shared_ptr<SyncBatchNormGradNode>(
new SyncBatchNormGradNode(*this));
return copied_node;
}
// SetTensorWrapperX, SetTensorWrapperY, ...
void SetTensorWrapperx(const paddle::Tensor& x) {
x_ = egr::TensorWrapper(x, false);
}
void SetTensorWrapperscale(const paddle::Tensor& scale) {
scale_ = egr::TensorWrapper(scale, false);
}
void SetTensorWrapperbias(const paddle::Tensor& bias) {
bias_ = egr::TensorWrapper(bias, false);
}
void SetTensorWrappersaved_mean(const paddle::Tensor& saved_mean) {
saved_mean_ = egr::TensorWrapper(saved_mean, false);
}
void SetTensorWrappersaved_variance(const paddle::Tensor& saved_variance) {
saved_variance_ = egr::TensorWrapper(saved_variance, false);
}
void SetTensorWrapperreserve_space(const paddle::Tensor& reserve_space) {
reserve_space_ = egr::TensorWrapper(reserve_space, false);
}
// SetAttributes
void SetAttributemomentum(const float& momentum) { momentum_ = momentum; }
void SetAttributeepsilon(const float& epsilon) { epsilon_ = epsilon; }
void SetAttributedata_layout(const std::string& data_layout) {
data_layout_ = data_layout;
}
void SetAttributeis_test(const bool& is_test) { is_test_ = is_test; }
void SetAttributeuse_global_stats(const bool& use_global_stats) {
use_global_stats_ = use_global_stats;
}
void SetAttributetrainable_statistics(const bool& trainable_statistics) {
trainable_statistics_ = trainable_statistics;
}
private:
// TensorWrappers
egr::TensorWrapper x_;
egr::TensorWrapper scale_;
egr::TensorWrapper bias_;
egr::TensorWrapper saved_mean_;
egr::TensorWrapper saved_variance_;
egr::TensorWrapper reserve_space_;
// Attributes
float momentum_;
float epsilon_;
std::string data_layout_;
bool is_test_;
bool use_global_stats_;
bool trainable_statistics_;
};
} // namespace sparse
// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "glog/logging.h"
#include "paddle/fluid/eager/api/generated/eager_generated/forwards/dygraph_functions.h"
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/all.h"
#include "paddle/phi/api/backward/backward_api.h"
#include "paddle/phi/api/backward/sparse_bw_api.h"
#include "paddle/phi/api/include/sparse_api.h"
#include "paddle/phi/api/lib/api_custom_impl.h"
DECLARE_bool(check_nan_inf);
paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
SyncBatchNormGradNode::operator()(
paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads,
bool create_graph,
bool is_new_grad) {
VLOG(3) << "Running AD API GRAD: "
<< "sync_batch_norm_grad";
// Fill Zero For GradIn Tensors
// Apply Gradient Hooks
auto hooked_grads = ApplyGradientHooks(grads);
// Collect GradIn Tensors, Attrs and Recovered TensorWrappers
auto x = egr::EagerUtils::RecoverTensorWrapper(&this->x_);
auto scale = egr::EagerUtils::RecoverTensorWrapper(&this->scale_);
auto bias = egr::EagerUtils::RecoverTensorWrapper(&this->bias_);
auto saved_mean = egr::EagerUtils::RecoverTensorWrapper(&this->saved_mean_);
auto saved_variance =
egr::EagerUtils::RecoverTensorWrapper(&this->saved_variance_);
auto reserve_space =
egr::EagerUtils::RecoverTensorWrapper(&this->reserve_space_);
paddle::optional<paddle::Tensor> reserve_space_optional;
if (reserve_space.impl())
reserve_space_optional =
paddle::make_optional<paddle::Tensor>(reserve_space);
auto& out_grad = hooked_grads[0][0];
auto& momentum = this->momentum_;
auto& epsilon = this->epsilon_;
auto& data_layout = this->data_layout_;
auto& is_test = this->is_test_;
auto& use_global_stats = this->use_global_stats_;
auto& trainable_statistics = this->trainable_statistics_;
// Prepare Grad function call
const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
returns(5);
for (int i = 0; i < 5; ++i) {
out_metas[i].size() == 0 ? returns[i].resize(1)
: returns[i].resize(out_metas[i].size());
}
auto* api_output_0 =
(out_metas[0].empty() || out_metas[0][0].IsStopGradient())
? nullptr
: &returns[0][0];
auto* api_output_1 =
(out_metas[3].empty() || out_metas[3][0].IsStopGradient())
? nullptr
: &returns[3][0];
auto* api_output_2 =
(out_metas[4].empty() || out_metas[4][0].IsStopGradient())
? nullptr
: &returns[4][0];
// Runtime check if we need next grad
bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;
// Inplace Check
// Inplace Strategy
VLOG(5) << "Running C++ API: "
<< "sync_batch_norm_grad";
// Before log info
if (VLOG_IS_ON(3)) {
const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} ";
std::string input_str = "";
std::string output_str = "";
const char* TENSOR_OUT_GRAD_TEMPLATE = " \n( out_grad , [%s]), ";
std::string input_out_grad_str = paddle::string::Sprintf(
TENSOR_OUT_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(out_grad));
input_str += input_out_grad_str;
const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), ";
std::string input_x_str = paddle::string::Sprintf(
TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x));
input_str += input_x_str;
const char* TENSOR_SCALE_TEMPLATE = " \n( scale , [%s]), ";
std::string input_scale_str = paddle::string::Sprintf(
TENSOR_SCALE_TEMPLATE, egr::EagerUtils::TensorStr(scale));
input_str += input_scale_str;
const char* TENSOR_BIAS_TEMPLATE = " \n( bias , [%s]), ";
std::string input_bias_str = paddle::string::Sprintf(
TENSOR_BIAS_TEMPLATE, egr::EagerUtils::TensorStr(bias));
input_str += input_bias_str;
const char* TENSOR_SAVED_MEAN_TEMPLATE = " \n( saved_mean , [%s]), ";
std::string input_saved_mean_str = paddle::string::Sprintf(
TENSOR_SAVED_MEAN_TEMPLATE, egr::EagerUtils::TensorStr(saved_mean));
input_str += input_saved_mean_str;
const char* TENSOR_SAVED_VARIANCE_TEMPLATE =
" \n( saved_variance , [%s]), ";
std::string input_saved_variance_str =
paddle::string::Sprintf(TENSOR_SAVED_VARIANCE_TEMPLATE,
egr::EagerUtils::TensorStr(saved_variance));
input_str += input_saved_variance_str;
const char* TENSOR_RESERVE_SPACE_TEMPLATE = " \n( reserve_space , [%s]), ";
std::string input_reserve_space_str =
paddle::string::Sprintf(TENSOR_RESERVE_SPACE_TEMPLATE,
egr::EagerUtils::TensorStr(reserve_space));
input_str += input_reserve_space_str;
VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str);
}
// Call grad_api function
paddle::experimental::sync_batch_norm_grad(x,
scale,
bias,
saved_mean,
saved_variance,
reserve_space_optional,
out_grad,
momentum,
epsilon,
data_layout,
is_test,
use_global_stats,
trainable_statistics,
api_output_0,
api_output_1,
api_output_2);
// Check NaN and Inf id needed
if (FLAGS_check_nan_inf) {
egr::CheckTensorHasNanOrInf("sync_batch_norm_grad", returns);
}
// Get GradOut autograd_meta
auto& x_grad = returns[0][0];
egr::AutogradMeta* x_grad_autograd_meta =
returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&x_grad)
: nullptr;
if (x_grad_autograd_meta) x_grad_autograd_meta->SetStopGradient(false);
auto& scale_grad = returns[3][0];
egr::AutogradMeta* scale_grad_autograd_meta =
returns[3][0].initialized() ? egr::EagerUtils::autograd_meta(&scale_grad)
: nullptr;
if (scale_grad_autograd_meta)
scale_grad_autograd_meta->SetStopGradient(false);
auto& bias_grad = returns[4][0];
egr::AutogradMeta* bias_grad_autograd_meta =
returns[4][0].initialized() ? egr::EagerUtils::autograd_meta(&bias_grad)
: nullptr;
if (bias_grad_autograd_meta) bias_grad_autograd_meta->SetStopGradient(false);
// Create Grad Node
if (trace_backward) {
PADDLE_THROW(phi::errors::Unavailable(
"The Op sync_batch_norm_grad doesn't have any grad"
"op. If you don't intend calculating higher order"
"derivatives, please set `create_graph`to False."));
}
VLOG(4) << "Finish AD API GRAD: sync_batch_norm_grad";
// LOG IF DEBUG
if (VLOG_IS_ON(4)) {
const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } ";
std::string input_str = "";
std::string output_str = "";
const char* TENSOR_OUT_GRAD_TEMPLATE = " \n( out_grad , [%s]), ";
std::string input_out_grad_str = paddle::string::Sprintf(
TENSOR_OUT_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(out_grad));
input_str += input_out_grad_str;
const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), ";
std::string input_x_str = paddle::string::Sprintf(
TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x));
input_str += input_x_str;
const char* TENSOR_SCALE_TEMPLATE = " \n( scale , [%s]), ";
std::string input_scale_str = paddle::string::Sprintf(
TENSOR_SCALE_TEMPLATE, egr::EagerUtils::TensorStr(scale));
input_str += input_scale_str;
const char* TENSOR_BIAS_TEMPLATE = " \n( bias , [%s]), ";
std::string input_bias_str = paddle::string::Sprintf(
TENSOR_BIAS_TEMPLATE, egr::EagerUtils::TensorStr(bias));
input_str += input_bias_str;
const char* TENSOR_SAVED_MEAN_TEMPLATE = " \n( saved_mean , [%s]), ";
std::string input_saved_mean_str = paddle::string::Sprintf(
TENSOR_SAVED_MEAN_TEMPLATE, egr::EagerUtils::TensorStr(saved_mean));
input_str += input_saved_mean_str;
const char* TENSOR_SAVED_VARIANCE_TEMPLATE =
" \n( saved_variance , [%s]), ";
std::string input_saved_variance_str =
paddle::string::Sprintf(TENSOR_SAVED_VARIANCE_TEMPLATE,
egr::EagerUtils::TensorStr(saved_variance));
input_str += input_saved_variance_str;
const char* TENSOR_RESERVE_SPACE_TEMPLATE = " \n( reserve_space , [%s]), ";
std::string input_reserve_space_str =
paddle::string::Sprintf(TENSOR_RESERVE_SPACE_TEMPLATE,
egr::EagerUtils::TensorStr(reserve_space));
input_str += input_reserve_space_str;
const char* TENSOR_X_GRAD_TEMPLATE = " \n ( x_grad , [%s]), ";
std::string output_x_grad_str = paddle::string::Sprintf(
TENSOR_X_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(x_grad));
output_str += output_x_grad_str;
const char* TENSOR_SCALE_GRAD_TEMPLATE = " \n ( scale_grad , [%s]), ";
std::string output_scale_grad_str = paddle::string::Sprintf(
TENSOR_SCALE_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(scale_grad));
output_str += output_scale_grad_str;
const char* TENSOR_BIAS_GRAD_TEMPLATE = " \n ( bias_grad , [%s]), ";
std::string output_bias_grad_str = paddle::string::Sprintf(
TENSOR_BIAS_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(bias_grad));
output_str += output_bias_grad_str;
VLOG(4) << paddle::string::Sprintf(
INPUT_PRINT_TEMPLATE, input_str, output_str);
}
// Return
if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);
return returns;
}
namespace sparse {
paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
SyncBatchNormGradNode::operator()(
paddle::small_vector<std::vector<paddle::Tensor>,
egr::kSlotSmallVectorSize>& grads,
bool create_graph,
bool is_new_grad) {
VLOG(3) << "Running AD API GRAD: "
<< "sync_batch_norm_grad";
// Fill Zero For GradIn Tensors
// Apply Gradient Hooks
auto hooked_grads = ApplyGradientHooks(grads);
// Collect GradIn Tensors, Attrs and Recovered TensorWrappers
auto x = egr::EagerUtils::RecoverTensorWrapper(&this->x_);
auto scale = egr::EagerUtils::RecoverTensorWrapper(&this->scale_);
auto bias = egr::EagerUtils::RecoverTensorWrapper(&this->bias_);
auto saved_mean = egr::EagerUtils::RecoverTensorWrapper(&this->saved_mean_);
auto saved_variance =
egr::EagerUtils::RecoverTensorWrapper(&this->saved_variance_);
auto reserve_space =
egr::EagerUtils::RecoverTensorWrapper(&this->reserve_space_);
paddle::optional<paddle::Tensor> reserve_space_optional;
if (reserve_space.impl())
reserve_space_optional =
paddle::make_optional<paddle::Tensor>(reserve_space);
auto& out_grad = hooked_grads[0][0];
auto& momentum = this->momentum_;
auto& epsilon = this->epsilon_;
auto& data_layout = this->data_layout_;
auto& is_test = this->is_test_;
auto& use_global_stats = this->use_global_stats_;
auto& trainable_statistics = this->trainable_statistics_;
// Prepare Grad function call
const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::Tensor>, egr::kSlotSmallVectorSize>
returns(5);
for (int i = 0; i < 5; ++i) {
out_metas[i].size() == 0 ? returns[i].resize(1)
: returns[i].resize(out_metas[i].size());
}
auto* api_output_0 =
(out_metas[0].empty() || out_metas[0][0].IsStopGradient())
? nullptr
: &returns[0][0];
auto* api_output_1 =
(out_metas[3].empty() || out_metas[3][0].IsStopGradient())
? nullptr
: &returns[3][0];
auto* api_output_2 =
(out_metas[4].empty() || out_metas[4][0].IsStopGradient())
? nullptr
: &returns[4][0];
// Runtime check if we need next grad
bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;
// Inplace Check
// Inplace Strategy
VLOG(5) << "Running C++ API: "
<< "sync_batch_norm_grad";
// Before log info
if (VLOG_IS_ON(3)) {
const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s]} ";
std::string input_str = "";
std::string output_str = "";
const char* TENSOR_OUT_GRAD_TEMPLATE = " \n( out_grad , [%s]), ";
std::string input_out_grad_str = paddle::string::Sprintf(
TENSOR_OUT_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(out_grad));
input_str += input_out_grad_str;
const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), ";
std::string input_x_str = paddle::string::Sprintf(
TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x));
input_str += input_x_str;
const char* TENSOR_SCALE_TEMPLATE = " \n( scale , [%s]), ";
std::string input_scale_str = paddle::string::Sprintf(
TENSOR_SCALE_TEMPLATE, egr::EagerUtils::TensorStr(scale));
input_str += input_scale_str;
const char* TENSOR_BIAS_TEMPLATE = " \n( bias , [%s]), ";
std::string input_bias_str = paddle::string::Sprintf(
TENSOR_BIAS_TEMPLATE, egr::EagerUtils::TensorStr(bias));
input_str += input_bias_str;
const char* TENSOR_SAVED_MEAN_TEMPLATE = " \n( saved_mean , [%s]), ";
std::string input_saved_mean_str = paddle::string::Sprintf(
TENSOR_SAVED_MEAN_TEMPLATE, egr::EagerUtils::TensorStr(saved_mean));
input_str += input_saved_mean_str;
const char* TENSOR_SAVED_VARIANCE_TEMPLATE =
" \n( saved_variance , [%s]), ";
std::string input_saved_variance_str =
paddle::string::Sprintf(TENSOR_SAVED_VARIANCE_TEMPLATE,
egr::EagerUtils::TensorStr(saved_variance));
input_str += input_saved_variance_str;
const char* TENSOR_RESERVE_SPACE_TEMPLATE = " \n( reserve_space , [%s]), ";
std::string input_reserve_space_str =
paddle::string::Sprintf(TENSOR_RESERVE_SPACE_TEMPLATE,
egr::EagerUtils::TensorStr(reserve_space));
input_str += input_reserve_space_str;
VLOG(3) << paddle::string::Sprintf(INPUT_PRINT_TEMPLATE, input_str);
}
// Call grad_api function
paddle::experimental::sparse::sync_batch_norm_grad(x,
scale,
bias,
saved_mean,
saved_variance,
reserve_space_optional,
out_grad,
momentum,
epsilon,
data_layout,
is_test,
use_global_stats,
trainable_statistics,
api_output_0,
api_output_1,
api_output_2);
// Check NaN and Inf id needed
if (FLAGS_check_nan_inf) {
egr::CheckTensorHasNanOrInf("sync_batch_norm_grad", returns);
}
// Get GradOut autograd_meta
auto& x_grad = returns[0][0];
egr::AutogradMeta* x_grad_autograd_meta =
returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&x_grad)
: nullptr;
if (x_grad_autograd_meta) x_grad_autograd_meta->SetStopGradient(false);
auto& scale_grad = returns[3][0];
egr::AutogradMeta* scale_grad_autograd_meta =
returns[3][0].initialized() ? egr::EagerUtils::autograd_meta(&scale_grad)
: nullptr;
if (scale_grad_autograd_meta)
scale_grad_autograd_meta->SetStopGradient(false);
auto& bias_grad = returns[4][0];
egr::AutogradMeta* bias_grad_autograd_meta =
returns[4][0].initialized() ? egr::EagerUtils::autograd_meta(&bias_grad)
: nullptr;
if (bias_grad_autograd_meta) bias_grad_autograd_meta->SetStopGradient(false);
// Create Grad Node
if (trace_backward) {
PADDLE_THROW(phi::errors::Unavailable(
"The Op sync_batch_norm_grad doesn't have any grad"
"op. If you don't intend calculating higher order"
"derivatives, please set `create_graph`to False."));
}
VLOG(4) << "Finish AD API GRAD: sync_batch_norm_grad";
// LOG IF DEBUG
if (VLOG_IS_ON(4)) {
const char* INPUT_PRINT_TEMPLATE = "{ Input: [%s], \n Output: [%s] } ";
std::string input_str = "";
std::string output_str = "";
const char* TENSOR_OUT_GRAD_TEMPLATE = " \n( out_grad , [%s]), ";
std::string input_out_grad_str = paddle::string::Sprintf(
TENSOR_OUT_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(out_grad));
input_str += input_out_grad_str;
const char* TENSOR_X_TEMPLATE = " \n( x , [%s]), ";
std::string input_x_str = paddle::string::Sprintf(
TENSOR_X_TEMPLATE, egr::EagerUtils::TensorStr(x));
input_str += input_x_str;
const char* TENSOR_SCALE_TEMPLATE = " \n( scale , [%s]), ";
std::string input_scale_str = paddle::string::Sprintf(
TENSOR_SCALE_TEMPLATE, egr::EagerUtils::TensorStr(scale));
input_str += input_scale_str;
const char* TENSOR_BIAS_TEMPLATE = " \n( bias , [%s]), ";
std::string input_bias_str = paddle::string::Sprintf(
TENSOR_BIAS_TEMPLATE, egr::EagerUtils::TensorStr(bias));
input_str += input_bias_str;
const char* TENSOR_SAVED_MEAN_TEMPLATE = " \n( saved_mean , [%s]), ";
std::string input_saved_mean_str = paddle::string::Sprintf(
TENSOR_SAVED_MEAN_TEMPLATE, egr::EagerUtils::TensorStr(saved_mean));
input_str += input_saved_mean_str;
const char* TENSOR_SAVED_VARIANCE_TEMPLATE =
" \n( saved_variance , [%s]), ";
std::string input_saved_variance_str =
paddle::string::Sprintf(TENSOR_SAVED_VARIANCE_TEMPLATE,
egr::EagerUtils::TensorStr(saved_variance));
input_str += input_saved_variance_str;
const char* TENSOR_RESERVE_SPACE_TEMPLATE = " \n( reserve_space , [%s]), ";
std::string input_reserve_space_str =
paddle::string::Sprintf(TENSOR_RESERVE_SPACE_TEMPLATE,
egr::EagerUtils::TensorStr(reserve_space));
input_str += input_reserve_space_str;
const char* TENSOR_X_GRAD_TEMPLATE = " \n ( x_grad , [%s]), ";
std::string output_x_grad_str = paddle::string::Sprintf(
TENSOR_X_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(x_grad));
output_str += output_x_grad_str;
const char* TENSOR_SCALE_GRAD_TEMPLATE = " \n ( scale_grad , [%s]), ";
std::string output_scale_grad_str = paddle::string::Sprintf(
TENSOR_SCALE_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(scale_grad));
output_str += output_scale_grad_str;
const char* TENSOR_BIAS_GRAD_TEMPLATE = " \n ( bias_grad , [%s]), ";
std::string output_bias_grad_str = paddle::string::Sprintf(
TENSOR_BIAS_GRAD_TEMPLATE, egr::EagerUtils::TensorStr(bias_grad));
output_str += output_bias_grad_str;
VLOG(4) << paddle::string::Sprintf(
INPUT_PRINT_TEMPLATE, input_str, output_str);
}
// Return
if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);
return returns;
}
} // namespace sparse
...@@ -40,6 +40,8 @@ class UniqueNameGenerator { ...@@ -40,6 +40,8 @@ class UniqueNameGenerator {
// TODO(jiabin): Now we are using imperative tracer, move it here when we // TODO(jiabin): Now we are using imperative tracer, move it here when we
// deprecate imperative. // deprecate imperative.
class GradNodeBase;
class Controller { class Controller {
public: public:
static Controller& Instance() { return *controller_; } static Controller& Instance() { return *controller_; }
...@@ -119,6 +121,18 @@ class Controller { ...@@ -119,6 +121,18 @@ class Controller {
void ClearFinalBackwardHooks() { final_backward_hooks_.clear(); } void ClearFinalBackwardHooks() { final_backward_hooks_.clear(); }
void ClearForceSequentialNodes() {
while (!force_sequential_nodes_.empty()) {
force_sequential_nodes_.pop();
}
}
void PushBackForceSequentialNodes(GradNodeBase* node) {
force_sequential_nodes_.push(node);
}
std::queue<GradNodeBase*> GetForceSequentialNodes() {
return force_sequential_nodes_;
}
private: private:
Controller() = default; Controller() = default;
static Controller* controller_; static Controller* controller_;
...@@ -132,6 +146,7 @@ class Controller { ...@@ -132,6 +146,7 @@ class Controller {
std::vector<std::vector<std::unordered_map<int, int>>>> std::vector<std::vector<std::unordered_map<int, int>>>>
custom_edges_slot_map_; custom_edges_slot_map_;
std::vector<std::shared_ptr<VoidHook>> final_backward_hooks_; std::vector<std::shared_ptr<VoidHook>> final_backward_hooks_;
std::queue<GradNodeBase*> force_sequential_nodes_;
DISABLE_COPY_AND_ASSIGN(Controller); DISABLE_COPY_AND_ASSIGN(Controller);
}; };
......
...@@ -57,6 +57,7 @@ black_ops_list = [ ...@@ -57,6 +57,7 @@ black_ops_list = [
"conv2d_grad_grad", "conv2d_grad_grad",
"add_n", "add_n",
"add_n_grad", "add_n_grad",
"sync_batch_norm_",
] ]
......
...@@ -111,6 +111,22 @@ std::vector<paddle::Tensor> RunBackward( ...@@ -111,6 +111,22 @@ std::vector<paddle::Tensor> RunBackward(
const std::vector<paddle::Tensor>& no_grad_vars = {}) { const std::vector<paddle::Tensor>& no_grad_vars = {}) {
VLOG(3) << "Start Backward"; VLOG(3) << "Start Backward";
std::queue<GradNodeBase*> force_sequential_nodes_forward_queue =
egr::Controller::Instance().GetForceSequentialNodes();
egr::Controller::Instance().ClearForceSequentialNodes();
std::deque<GradNodeBase*> force_sequential_nodes_queue;
std::set<GradNodeBase*> force_sequential_nodes_set;
std::set<GradNodeBase*> ready_force_sequential_nodes;
auto force_sequential_nodes_size =
force_sequential_nodes_forward_queue.size();
for (size_t i = 0; i < force_sequential_nodes_size; ++i) {
force_sequential_nodes_set.insert(
force_sequential_nodes_forward_queue.front());
force_sequential_nodes_queue.push_front(
force_sequential_nodes_forward_queue.front());
force_sequential_nodes_forward_queue.pop();
}
// *Gradient Hook should happen at node-level // *Gradient Hook should happen at node-level
// *Inplace version check should perform at node-level // *Inplace version check should perform at node-level
// *Cross-batch accumulation happens at forward pass // *Cross-batch accumulation happens at forward pass
...@@ -355,12 +371,34 @@ std::vector<paddle::Tensor> RunBackward( ...@@ -355,12 +371,34 @@ std::vector<paddle::Tensor> RunBackward(
"Node's in-degree cannot be negative.", "Node's in-degree cannot be negative.",
next_node->name())); next_node->name()));
if (node_in_degree_map[next_node] == 0) { auto add_next_node_func = [&node_in_degree_map,
if (dynamic_cast<egr::GradNodeAccumulation*>(next_node)) { &queue](GradNodeBase* next_node) {
queue.push_front(std::move(next_node)); if (node_in_degree_map[next_node] == 0) {
if (dynamic_cast<egr::GradNodeAccumulation*>(next_node)) {
queue.push_front(std::move(next_node));
} else {
queue.push_back(std::move(next_node));
}
}
};
if (force_sequential_nodes_set.count(next_node)) {
if (force_sequential_nodes_queue.front() == next_node) {
force_sequential_nodes_queue.pop_front();
add_next_node_func(next_node);
while (ready_force_sequential_nodes.count(
force_sequential_nodes_queue.front())) {
ready_force_sequential_nodes.erase(
force_sequential_nodes_queue.front());
add_next_node_func(force_sequential_nodes_queue.front());
force_sequential_nodes_queue.pop_front();
}
} else { } else {
queue.push_back(std::move(next_node)); ready_force_sequential_nodes.insert(next_node);
continue;
} }
} else {
add_next_node_func(next_node);
} }
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册