未验证 提交 a7c98ddb 编写于 作者: J Jiabin Yang 提交者: GitHub

Fix higher order deriv with inplace (#44020)

* fix deriv with inplace

* fix double grad bugs

* remove additional file

* fix compat dygraph mode

* fix yaml remove additional yaml

* fix slice double grad error and auto code gen logic error for higher order differentiate

* fix fix_higher_order_deriv

* remove additional include

* fix fix_higher_order_deriv
上级 aa0c885a
......@@ -7,6 +7,6 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library(
final_dygraph_node
SRCS nodes.cc
DEPS ${eager_deps})
DEPS ${eager_deps} ${eager_manual_nodes})
add_dependencies(final_dygraph_node eager_final_state_codegen)
endif()
......@@ -7,6 +7,6 @@ if(NOT (NOT WITH_PYTHON AND ON_INFER))
cc_library(
final_dygraph_function
SRCS dygraph_functions.cc
DEPS ${eager_deps})
DEPS ${eager_deps} ${eager_manual_functions})
add_dependencies(final_dygraph_function eager_final_state_codegen)
endif()
......@@ -6,4 +6,11 @@ if(NOT ((NOT WITH_PYTHON) AND ON_INFER))
set(fluid_manual_nodes
${fluid_manual_nodes}
PARENT_SCOPE)
add_subdirectory(eager_manual)
set(eager_manual_functions
${eager_manual_functions}
PARENT_SCOPE)
set(eager_manual_nodes
${eager_manual_nodes}
PARENT_SCOPE)
endif()
add_subdirectory(forwards)
add_subdirectory(nodes)
set(eager_manual_functions
${eager_manual_functions}
PARENT_SCOPE)
set(eager_manual_nodes
${eager_manual_nodes}
PARENT_SCOPE)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/api/include/tensor.h"
paddle::experimental::Tensor conv2d_final_state_dygraph_function(
const paddle::experimental::Tensor& input,
const paddle::experimental::Tensor& filter,
std::vector<int> strides,
std::vector<int> paddings,
std::string paddding_algorithm,
int groups,
std::vector<int> dilations,
std::string data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search);
cc_library(
conv2d_fwd_function
SRCS conv2d_fwd_function.cc
DEPS ${eager_deps} ${fluid_deps} ${GLOB_OP_LIB} ${GLOB_OPERATOR_DEPS})
add_dependencies(conv2d_fwd_function eager_codegen)
set(eager_manual_functions
conv2d_fwd_function
PARENT_SCOPE)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/eager/amp_utils.h"
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/eager_amp_auto_cast.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#pragma GCC diagnostic ignored "-Wunused-variable"
DECLARE_bool(check_nan_inf);
paddle::experimental::Tensor conv2d_final_state_dygraph_function(
const paddle::experimental::Tensor& input,
const paddle::experimental::Tensor& filter,
std::vector<int> strides,
std::vector<int> paddings,
std::string paddding_algorithm,
int groups,
std::vector<int> dilations,
std::string data_format,
bool use_addto,
int workspace_size_MB,
bool exhaustive_search) {
// Dygraph Record Event
paddle::platform::RecordEvent dygraph_entrance_record_event(
"conv2d dygraph", paddle::platform::TracerEventType::Operator, 1);
// AMP Logic
if (egr::Controller::Instance().GetAMPLevel() !=
paddle::imperative::AmpLevel::O0) {
VLOG(5) << "Check and Prepare For AMP";
auto op_name = phi::TransToFluidOpName("conv2d");
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
amp_tensors_vector = {{input}, {filter}};
auto amp_dst_dtype = egr::GetAmpDestDtype(op_name, amp_tensors_vector);
auto NEW_input =
egr::EagerAmpAutoCast("input", input, amp_dst_dtype, op_name);
auto NEW_filter =
egr::EagerAmpAutoCast("filter", filter, amp_dst_dtype, op_name);
{
paddle::imperative::AutoCastGuard guard(
egr::Controller::Instance().GetCurrentTracer(),
paddle::imperative::AmpLevel::O0);
return conv2d_final_state_dygraph_function(NEW_input,
NEW_filter,
strides,
paddings,
paddding_algorithm,
groups,
dilations,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search);
}
}
// Get Input AutoGradMeta
egr::AutogradMeta* input_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(input);
egr::AutogradMeta* filter_autograd_meta =
egr::EagerUtils::nullable_autograd_meta(filter);
// Forward API Call
VLOG(3) << "Final State Running: "
<< "conv2d_final_state_dygraph_function";
auto api_result = paddle::experimental::conv2d(input,
filter,
strides,
paddings,
paddding_algorithm,
groups,
dilations,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search);
// Check NaN and Inf if needed
if (FLAGS_check_nan_inf) {
egr::CheckTensorHasNanOrInf("conv2d", api_result);
}
// Get Outputs
auto& out = api_result;
// Get Output AutoGradMeta
egr::AutogradMeta* out_autograd_meta = egr::EagerUtils::autograd_meta(&out);
bool trace_backward = egr::Controller::Instance().HasGrad();
bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(
trace_backward, input_autograd_meta, filter_autograd_meta);
// Check Inplace if needed
// Node Creation
if (require_any_grad) {
paddle::platform::RecordEvent node_creation_record_event(
"conv2d node_creation",
paddle::platform::TracerEventType::OperatorInner,
1);
egr::EagerUtils::PassStopGradient(false, out_autograd_meta);
// Node Construction
auto grad_node =
std::shared_ptr<Conv2dGradNodeFinal>(new Conv2dGradNodeFinal(1, 2));
// SetAttributes if needed
grad_node->SetAttributestrides(strides);
grad_node->SetAttributepaddings(paddings);
grad_node->SetAttributepaddding_algorithm(paddding_algorithm);
grad_node->SetAttributegroups(groups);
grad_node->SetAttributedilations(dilations);
grad_node->SetAttributedata_format(data_format);
grad_node->SetAttributeuse_addto(use_addto);
grad_node->SetAttributeworkspace_size_MB(workspace_size_MB);
grad_node->SetAttributeexhaustive_search(exhaustive_search);
// Set TensorWrappers for Forward Inputs if needed
grad_node->SetTensorWrapperinput(input);
grad_node->SetTensorWrapperfilter(filter);
// SetGradOutMeta & SetEdges
grad_node->SetGradOutMeta(input, 0);
grad_node->SetGradOutMeta(filter, 1);
// SetOutRank & SetHistory & SetGradInMeta & RetainGrad
if (out_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(out_autograd_meta, 0);
}
if (out_autograd_meta) {
egr::EagerUtils::SetHistory(out_autograd_meta, grad_node);
}
grad_node->SetGradInMeta(out, 0);
egr::EagerUtils::CheckAndRetainGrad(out);
// Set TensorWrappers for Forward Outputs if needed
}
// Returns
return out;
}
cc_library(
conv2d_nodes
SRCS conv2d_nodes.cc
DEPS ${eager_deps} ${fluid_deps})
set(eager_manual_nodes
conv2d_nodes
PARENT_SCOPE)
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "glog/logging.h"
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/fluid/eager/to_static/run_program_op_node.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/phi/api/all.h"
#include "paddle/phi/api/backward/backward_api.h"
#include "paddle/phi/api/backward/sparse_bw_api.h"
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
#include "paddle/phi/api/include/sparse_api.h"
DECLARE_bool(check_nan_inf);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
Conv2dGradNodeFinal::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>& grads,
bool create_graph,
bool is_new_grad) {
// Fill Zero For GradIn Tensors
VLOG(3) << " Running Conv2dGradNodeFinal: " << this;
// Apply Gradient Hooks
auto hooked_grads = ApplyGradientHooks(grads);
// Collect GradIn Tensors, Attrs and Recovered TensorWrappers
auto input = egr::EagerUtils::RecoverTensorWrapper(&this->input_);
auto filter = egr::EagerUtils::RecoverTensorWrapper(&this->filter_);
auto& grad_out = hooked_grads[0][0];
auto& strides = this->strides_;
auto& paddings = this->paddings_;
auto& paddding_algorithm = this->paddding_algorithm_;
auto& groups = this->groups_;
auto& dilations = this->dilations_;
auto& data_format = this->data_format_;
auto& use_addto = this->use_addto_;
auto& workspace_size_MB = this->workspace_size_MB_;
auto& exhaustive_search = this->exhaustive_search_;
// Prepare Grad function call
const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
returns(2);
for (int i = 0; i < 2; ++i) {
out_metas[i].size() == 0 ? returns[i].resize(1)
: returns[i].resize(out_metas[i].size());
}
auto* api_output_0 =
(out_metas[0].empty() || out_metas[0][0].IsStopGradient())
? nullptr
: &returns[0][0];
auto* api_output_1 =
(out_metas[1].empty() || out_metas[1][0].IsStopGradient())
? nullptr
: &returns[1][0];
// Runtime check if we need next grad
bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;
// Inplace Check
// Inplace Strategy
// Call grad_api function
VLOG(3) << "Final State Running: Conv2dGradNodeFinal";
paddle::experimental::conv2d_grad(input,
filter,
grad_out,
strides,
paddings,
paddding_algorithm,
groups,
dilations,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
api_output_0,
api_output_1);
// Check NaN and Inf id needed
if (FLAGS_check_nan_inf) {
egr::CheckTensorHasNanOrInf("conv2d_grad", returns);
}
// Get GradOut autograd_meta
auto& grad_input = returns[0][0];
egr::AutogradMeta* grad_input_autograd_meta =
returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_input)
: nullptr;
if (grad_input_autograd_meta)
grad_input_autograd_meta->SetStopGradient(false);
VLOG(3) << "Conv2dGradNodeFinal grad_input_autograd_meta: "
<< grad_input_autograd_meta;
auto& grad_filter = returns[1][0];
egr::AutogradMeta* grad_filter_autograd_meta =
returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&grad_filter)
: nullptr;
if (grad_filter_autograd_meta)
grad_filter_autograd_meta->SetStopGradient(false);
VLOG(3) << "Conv2dGradNodeFinal grad_filter_autograd_meta: "
<< grad_filter_autograd_meta;
// Create Grad Node
if (trace_backward) {
paddle::platform::RecordEvent node_creation_record_event(
"conv2d_grad node_creation",
paddle::platform::TracerEventType::OperatorInner,
1);
// Node Construction
auto grad_node = std::shared_ptr<Conv2dDoubleGradNodeFinal>(
new Conv2dDoubleGradNodeFinal(2, 3));
// SetAttributes if needed
grad_node->SetAttributestrides(strides);
grad_node->SetAttributepaddings(paddings);
grad_node->SetAttributepaddding_algorithm(paddding_algorithm);
grad_node->SetAttributegroups(groups);
grad_node->SetAttributedilations(dilations);
grad_node->SetAttributedata_format(data_format);
grad_node->SetAttributeuse_addto(use_addto);
grad_node->SetAttributeworkspace_size_MB(workspace_size_MB);
grad_node->SetAttributeexhaustive_search(exhaustive_search);
// Set TensorWrappers for Forward Inputs if needed
grad_node->SetTensorWrapperinput(input);
grad_node->SetTensorWrapperfilter(filter);
grad_node->SetTensorWrappergrad_out(grad_out);
// SetGradOutMeta & SetEdges
if (grad_filter_autograd_meta) {
grad_node->SetGradOutMeta(input, 0);
}
if (grad_input_autograd_meta) {
grad_node->SetGradOutMeta(filter, 1);
grad_node->SetGradOutMeta(grad_out, 2);
}
// SetOutRank & SetHistory & SetGradInMeta & RetainGrad
if (grad_input_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(grad_input_autograd_meta, 0);
}
if (grad_filter_autograd_meta) {
egr::EagerUtils::SetOutRankWithSlot(grad_filter_autograd_meta, 1);
}
if (grad_input_autograd_meta) {
egr::EagerUtils::SetHistory(grad_input_autograd_meta, grad_node);
}
if (grad_filter_autograd_meta) {
egr::EagerUtils::SetHistory(grad_filter_autograd_meta, grad_node);
}
grad_node->SetGradInMeta(grad_input, 0);
grad_node->SetGradInMeta(grad_filter, 1);
egr::EagerUtils::CheckAndRetainGrad(grad_input);
egr::EagerUtils::CheckAndRetainGrad(grad_filter);
// Set TensorWrappers for Forward Outputs if needed
}
// Return
if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);
return returns;
}
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
Conv2dDoubleGradNodeFinal::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>& grads,
bool create_graph,
bool is_new_grad) {
// Fill Zero For GradIn Tensors
const auto& input_metas = this->InputMeta();
egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[0][0],
input_metas[0][0]);
egr::EagerUtils::FillZeroForEmptyOptionalGradInput(&grads[1][0],
input_metas[1][0]);
// Apply Gradient Hooks
auto hooked_grads = ApplyGradientHooks(grads);
// Collect GradIn Tensors, Attrs and Recovered TensorWrappers
auto input = egr::EagerUtils::RecoverTensorWrapper(&this->input_);
auto filter = egr::EagerUtils::RecoverTensorWrapper(&this->filter_);
auto grad_out = egr::EagerUtils::RecoverTensorWrapper(&this->grad_out_);
auto& grad_input_grad = hooked_grads[0][0];
paddle::optional<paddle::experimental::Tensor> grad_input_grad_optional;
if (grad_input_grad.initialized())
grad_input_grad_optional =
paddle::make_optional<paddle::experimental::Tensor>(grad_input_grad);
auto& grad_filter_grad = hooked_grads[1][0];
paddle::optional<paddle::experimental::Tensor> grad_filter_grad_optional;
if (grad_filter_grad.initialized())
grad_filter_grad_optional =
paddle::make_optional<paddle::experimental::Tensor>(grad_filter_grad);
auto& strides = this->strides_;
auto& paddings = this->paddings_;
auto& paddding_algorithm = this->paddding_algorithm_;
auto& groups = this->groups_;
auto& dilations = this->dilations_;
auto& data_format = this->data_format_;
auto& use_addto = this->use_addto_;
auto& workspace_size_MB = this->workspace_size_MB_;
auto& exhaustive_search = this->exhaustive_search_;
// Prepare Grad function call
const auto& out_metas = OutputMeta();
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
returns(3);
for (int i = 0; i < 3; ++i) {
out_metas[i].size() == 0 ? returns[i].resize(1)
: returns[i].resize(out_metas[i].size());
}
auto* api_output_0 =
(out_metas[0].empty() || out_metas[0][0].IsStopGradient())
? nullptr
: &returns[0][0];
auto* api_output_1 =
(out_metas[1].empty() || out_metas[1][0].IsStopGradient())
? nullptr
: &returns[1][0];
auto* api_output_2 =
(out_metas[2].empty() || out_metas[2][0].IsStopGradient())
? nullptr
: &returns[2][0];
// Runtime check if we need next grad
// Inplace Check
// Inplace Strategy
// Call grad_api function
VLOG(3) << "Final State Running: Conv2dGradGradNodeFinal";
paddle::experimental::conv2d_grad_grad(input,
filter,
grad_out,
grad_input_grad_optional,
grad_filter_grad_optional,
strides,
paddings,
paddding_algorithm,
groups,
dilations,
data_format,
use_addto,
workspace_size_MB,
exhaustive_search,
api_output_0,
api_output_1,
api_output_2);
// Check NaN and Inf id needed
if (FLAGS_check_nan_inf) {
egr::CheckTensorHasNanOrInf("conv2d_grad_grad", returns);
}
// Get GradOut autograd_meta
auto& input_grad = returns[0][0];
egr::AutogradMeta* input_grad_autograd_meta =
returns[0][0].initialized() ? egr::EagerUtils::autograd_meta(&input_grad)
: nullptr;
if (input_grad_autograd_meta)
input_grad_autograd_meta->SetStopGradient(false);
auto& filter_grad = returns[1][0];
egr::AutogradMeta* filter_grad_autograd_meta =
returns[1][0].initialized() ? egr::EagerUtils::autograd_meta(&filter_grad)
: nullptr;
if (filter_grad_autograd_meta)
filter_grad_autograd_meta->SetStopGradient(false);
auto& grad_out_grad = returns[2][0];
egr::AutogradMeta* grad_out_grad_autograd_meta =
returns[2][0].initialized()
? egr::EagerUtils::autograd_meta(&grad_out_grad)
: nullptr;
if (grad_out_grad_autograd_meta)
grad_out_grad_autograd_meta->SetStopGradient(false);
// Create Grad Node
// Return
if (NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);
return returns;
}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/tensor_wrapper.h"
class Conv2dGradNodeFinal : public egr::GradNodeBase {
public:
Conv2dGradNodeFinal() : egr::GradNodeBase() {}
Conv2dGradNodeFinal(size_t bwd_in_slot_num, size_t bwd_out_slot_num)
: egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}
~Conv2dGradNodeFinal() override = default;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, // NOLINT
egr::kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false, // NOLINT
bool is_new_grad = false) override; // NOLINT
std::string name() override { return "Conv2dGradNodeFinal"; }
void ClearTensorWrappers() override {
input_.clear();
filter_.clear();
SetIsTensorWrappersCleared(true);
}
std::shared_ptr<GradNodeBase> Copy() const override {
auto copied_node =
std::shared_ptr<Conv2dGradNodeFinal>(new Conv2dGradNodeFinal(*this));
VLOG(3) << "Copy Conv2dGradNodeFinal: " << this
<< " to: " << copied_node.get();
return copied_node;
}
// SetTensorWrapperX, SetTensorWrapperY, ...
void SetTensorWrapperinput(const paddle::experimental::Tensor& input) {
input_ = egr::TensorWrapper(input, false);
}
void SetTensorWrapperfilter(const paddle::experimental::Tensor& filter) {
filter_ = egr::TensorWrapper(filter, false);
}
// SetAttributes
void SetAttributestrides(const std::vector<int>& strides) {
strides_ = strides;
}
void SetAttributepaddings(const std::vector<int>& paddings) {
paddings_ = paddings;
}
void SetAttributepaddding_algorithm(const std::string& paddding_algorithm) {
paddding_algorithm_ = paddding_algorithm;
}
void SetAttributegroups(const int& groups) { groups_ = groups; }
void SetAttributedilations(const std::vector<int>& dilations) {
dilations_ = dilations;
}
void SetAttributedata_format(const std::string& data_format) {
data_format_ = data_format;
}
void SetAttributeuse_addto(const bool& use_addto) { use_addto_ = use_addto; }
void SetAttributeworkspace_size_MB(const int& workspace_size_MB) {
workspace_size_MB_ = workspace_size_MB;
}
void SetAttributeexhaustive_search(const bool& exhaustive_search) {
exhaustive_search_ = exhaustive_search;
}
private:
// TensorWrappers
egr::TensorWrapper input_;
egr::TensorWrapper filter_;
// Attributes
std::vector<int> strides_;
std::vector<int> paddings_;
std::string paddding_algorithm_;
int groups_;
std::vector<int> dilations_;
std::string data_format_;
bool use_addto_;
int workspace_size_MB_;
bool exhaustive_search_;
};
class Conv2dDoubleGradNodeFinal : public egr::GradNodeBase {
public:
Conv2dDoubleGradNodeFinal() : egr::GradNodeBase() {}
Conv2dDoubleGradNodeFinal(size_t bwd_in_slot_num, size_t bwd_out_slot_num)
: egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {}
~Conv2dDoubleGradNodeFinal() override = default;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, // NOLINT
egr::kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false, // NOLINT
bool is_new_grad = false) override; // NOLINT
std::string name() override { return "Conv2dDoubleGradNodeFinal"; }
void ClearTensorWrappers() override {
input_.clear();
filter_.clear();
grad_out_.clear();
SetIsTensorWrappersCleared(true);
}
std::shared_ptr<GradNodeBase> Copy() const override {
auto copied_node = std::shared_ptr<Conv2dDoubleGradNodeFinal>(
new Conv2dDoubleGradNodeFinal(*this));
return copied_node;
}
// SetTensorWrapperX, SetTensorWrapperY, ...
void SetTensorWrapperinput(const paddle::experimental::Tensor& input) {
input_ = egr::TensorWrapper(input, false);
}
void SetTensorWrapperfilter(const paddle::experimental::Tensor& filter) {
filter_ = egr::TensorWrapper(filter, false);
}
void SetTensorWrappergrad_out(const paddle::experimental::Tensor& grad_out) {
grad_out_ = egr::TensorWrapper(grad_out, false);
}
// SetAttributes
void SetAttributestrides(const std::vector<int>& strides) {
strides_ = strides;
}
void SetAttributepaddings(const std::vector<int>& paddings) {
paddings_ = paddings;
}
void SetAttributepaddding_algorithm(const std::string& paddding_algorithm) {
paddding_algorithm_ = paddding_algorithm;
}
void SetAttributegroups(const int& groups) { groups_ = groups; }
void SetAttributedilations(const std::vector<int>& dilations) {
dilations_ = dilations;
}
void SetAttributedata_format(const std::string& data_format) {
data_format_ = data_format;
}
void SetAttributeuse_addto(const bool& use_addto) { use_addto_ = use_addto; }
void SetAttributeworkspace_size_MB(const int& workspace_size_MB) {
workspace_size_MB_ = workspace_size_MB;
}
void SetAttributeexhaustive_search(const bool& exhaustive_search) {
exhaustive_search_ = exhaustive_search;
}
private:
// TensorWrappers
egr::TensorWrapper input_;
egr::TensorWrapper filter_;
egr::TensorWrapper grad_out_;
// Attributes
std::vector<int> strides_;
std::vector<int> paddings_;
std::string paddding_algorithm_;
int groups_;
std::vector<int> dilations_;
std::string data_format_;
bool use_addto_;
int workspace_size_MB_;
bool exhaustive_search_;
};
......@@ -40,6 +40,8 @@ from codegen_utils import AssertMessage, GetIndent
# keeping the code compatible, here we also skip inplace check in new dygraph temporarily,
# and this will be fixed in the futrue.
inplace_check_blacklist = set(["assign_out_"])
# # --- Black Ops list that's NO NEED to apply backward code generation
black_ops_list = ["conv2d", "conv2d_grad", "conv2d_grad_grad"]
###########
......@@ -154,9 +156,7 @@ paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallV
{}
// Prepare Grad function call
{}
// Get GradIn autograd_meta
{}
// Compute Require Grad
// Runtime check if we need next grad
{}
// Inplace Check
{}
......@@ -229,6 +229,27 @@ FORWARD_BODY_TEMPLATE = \
}}
"""
HIHGER_ORDER_DERIVATIVE_VALUE_TEMPLATE = \
""" if(trace_backward) {{
{}
// Node Construction
{}
// SetAttributes if needed
{}
// Set TensorWrappers for Forward Inputs if needed
{}
// SetGradOutMeta & SetEdges
{}
// SetOutRank & SetHistory & SetGradInMeta & RetainGrad
{}
{}
{}
{}
// Set TensorWrappers for Forward Outputs if needed
{}
}}
"""
NAMESPACE_WRAPPER_TEMPLATE = \
"""
namespace {} {{
......@@ -252,7 +273,7 @@ NODE_CC_FILE_TEMPLATE = \
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/phi/api/include/sparse_api.h"
#include "paddle/fluid/eager/api/manual/eager_manual/nodes/nodes.h"
DECLARE_bool(check_nan_inf);
{}
"""
......@@ -279,7 +300,7 @@ FORWARD_CC_FILE_TEMPLATE = \
#include "paddle/fluid/eager/eager_amp_auto_cast.h"
#include "paddle/phi/backends/gpu/gpu_info.h"
#include "paddle/fluid/eager/nan_inf_utils.h"
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
DECLARE_bool(check_nan_inf);
{}
{}
......@@ -294,7 +315,7 @@ FORWARD_H_FILE_TEMPLATE = \
#include "paddle/fluid/eager/utils.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/eager/to_static/run_program_op_func.h"
#include "paddle/fluid/eager/api/manual/eager_manual/dygraph_forward_api.h"
{}
{}
"""
......@@ -584,7 +605,6 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
self.backward_api_name = forward_api_contents['backward']
self.backward_forward_str = grad_api_contents['forward']
backward_args_str = grad_api_contents['args']
backward_returns_str = grad_api_contents['output']
......@@ -663,7 +683,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
backward_output_pos
]
def GenerateNodeCreationCodes(self):
def GenerateNodeCreationCodes(self, for_backward=False):
forward_api_name = self.forward_api_name
forward_inputs_position_map = self.forward_inputs_position_map
forward_outputs_position_map = self.forward_outputs_position_map
......@@ -794,13 +814,21 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
node_event_name = forward_api_name + " node_creation"
node_creation_event_str = f"{indent}paddle::platform::RecordEvent node_creation_record_event(\"{node_event_name}\", paddle::platform::TracerEventType::OperatorInner, 1);\n"
if not for_backward:
self.node_creation_str = FORWARD_BODY_TEMPLATE.format(
node_creation_event_str, pass_stop_gradient_args_str,
node_construction_str, set_attributes_str,
set_input_tensor_wrappers_str, set_grad_out_meta_str,
set_out_rank_str, set_history_str, set_grad_in_meta_str,
set_retain_grad_str, set_output_tensor_wrappers_str)
else:
self.node_creation_str = HIHGER_ORDER_DERIVATIVE_VALUE_TEMPLATE.format(
node_creation_event_str, node_construction_str,
set_attributes_str, set_input_tensor_wrappers_str,
set_grad_out_meta_str, set_out_rank_str, set_history_str,
set_grad_in_meta_str, set_retain_grad_str,
set_output_tensor_wrappers_str)
self.grad_node_out_list = grad_node_out_list
def run(self):
......@@ -1234,7 +1262,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
next_node_generator = DygraphFunctionGeneratorBase(
forward_api_contents, backward_api_contents, namespace)
next_node_generator.run()
next_node_generator.GenerateNodeCreationCodes()
next_node_generator.GenerateNodeCreationCodes(True)
next_grad_node_creation_str = next_node_generator.node_creation_str
next_grad_node_out_list = next_node_generator.grad_node_out_list
......@@ -1342,6 +1370,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
inplace_grad_input_str = ""
inplaced_tensor_wrapper = False
inplace_check_str = ""
optional_inplace_var_name = []
# Grad Ins from TensorWrappers
for name, (_, is_fwd_input,
grad_api_position), in backward_forward_inputs_map.items():
......@@ -1351,6 +1380,13 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
is_optional = (name in self.optional_inputs)
tensor_wrapper_recover_str = f"{indent}auto {transformed_tensor_name} = egr::EagerUtils::RecoverTensorWrapper(&this->{tensor_wrapper_name});"
if backward_inplace_map and name in backward_inplace_map.keys():
if len(next_grad_node_creation_str) > 0:
if (transformed_tensor_name
in backward_forward_inputs_map_next) and (
backward_forward_inputs_map_next[
transformed_tensor_name][1]):
optional_inplace_var_name.append(
transformed_tensor_name)
tensor_wrapper_intermidiate_tensor_str = f"(&this->{tensor_wrapper_name})->get_intermidiate_tensor()"
inplace_check_str += CHECK_BACKWARD_INPLACE_TEMPLATE.format(
transformed_tensor_name, transformed_tensor_name, name,
......@@ -1371,7 +1407,6 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
get_grad_in_args_list.append(tensor_wrapper_recover_str)
optional_inplace_check = False
# Grad Ins from grads
for name, (ttype, fwd_position,
grad_api_position) in backward_grad_inputs_map.items():
......@@ -1388,7 +1423,8 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
in backward_forward_inputs_map_next) and (
backward_forward_inputs_map_next[
transformed_tensor_name][1]):
optional_inplace_check = False
optional_inplace_var_name.append(
transformed_tensor_name)
grads_tensor_str = f"grads[{fwd_position}][0]"
inplace_check_str += CHECK_BACKWARD_INPLACE_TEMPLATE.format(
transformed_tensor_name, transformed_tensor_name, name,
......@@ -1441,8 +1477,8 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
transformed_tensor_name = self.TransformToNextGradName(name)
out_index = out_index + 1
grad_api_args.append(f"api_output_{out_index}")
if not optional_inplace_check:
optional_inplace_str = "VLOG(6) << \"No Inplace should happend for wrappered input\";"
if inplace_grad_input_str in optional_inplace_var_name:
optional_inplace_str = "VLOG(6) << \"No Inplace should happend for wrappered input: {inplace_grad_input_str}\";"
else:
optional_inplace_str = f"""if (api_output_{out_index} != nullptr && can_be_inplaced) {{
egr::EagerUtils::HandleViewBetweenInputAndOutput({inplace_grad_input_str}, api_output_{out_index});
......@@ -1451,15 +1487,15 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
if backward_inplace_map and name in backward_inplace_map.values(
):
inplace_str = f"""if (api_output_{out_index} != nullptr && can_be_inplaced) {{
inplace_str = f""" if (api_output_{out_index} != nullptr && can_be_inplaced) {{
egr::EagerUtils::HandleViewBetweenInputAndOutput({inplace_grad_input_str}, api_output_{out_index});
}}"""
if len(next_grad_node_creation_str) > 0:
inplace_for_grad_outs_str += f"""
if (!require_any_grad) {{
{inplace_str}
}}else{{
if (trace_backward) {{
{optional_inplace_str}
}} else {{
{inplace_str}
}}"""
else:
inplace_for_grad_outs_str += inplace_str
......@@ -1490,57 +1526,15 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
backward_api_name, "returns")
# Prepare for Node Creation if Necessary
inputs_autograd_meta_str = ""
outputs_autograd_meta_str = ""
compute_require_grad_str = ""
compute_require_next_grad_str = ""
if len(next_grad_node_creation_str) > 0:
# 1. Get Grad Input AutoGradMeta
inputs_autograd_meta_list = []
compute_require_grad_args_list = ["trace_backward"]
for name, (ttype, pos,
grad_api_position) in backward_grad_inputs_map.items():
transformed_tensor_name = self.TransformToNextGradName(name)
if transformed_tensor_name in next_grad_node_out_list:
input_autograd_meta_name = GetAutoGradMetaName(
transformed_tensor_name)
if IsPlainTensorType(ttype):
input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});"
else:
assert IsVectorTensorType(ttype)
input_autograd_meta_vec_name = GetAutoGradMetaVectorName(
transformed_tensor_name)
input_autograd_meta = f"{indent}std::vector<egr::AutogradMeta*> {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n"
input_autograd_meta += f"{indent}std::vector<egr::AutogradMeta*>* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};"
inputs_autograd_meta_list.append(input_autograd_meta)
compute_require_grad_args_list.append(
input_autograd_meta_name)
# 2. Get TensorWrapper AutoGradMeta
for name, (ttype, _, pos), in backward_forward_inputs_map.items():
transformed_tensor_name = self.TransformToNextGradName(name)
if transformed_tensor_name in next_grad_node_out_list:
input_autograd_meta_name = GetAutoGradMetaName(
transformed_tensor_name)
if IsPlainTensorType(ttype):
input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});"
else:
assert IsVectorTensorType(ttype)
input_autograd_meta_vec_name = GetAutoGradMetaVectorName(
transformed_tensor_name)
input_autograd_meta = f"{indent}std::vector<egr::AutogradMeta*> {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n"
input_autograd_meta += f"{indent}std::vector<egr::AutogradMeta*>* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};"
inputs_autograd_meta_list.append(input_autograd_meta)
compute_require_grad_args_list.append(
input_autograd_meta_name)
inputs_autograd_meta_str = "\n".join(inputs_autograd_meta_list)
compute_require_grad_args_str = ",".join(
compute_require_grad_args_list)
compute_require_next_grad_str = f"{indent}bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;\n"
# 3. Get Output AutoGradMeta
outputs_autograd_meta_list = []
# TODO(jiabin): Optimize this with SetStopGradient instead of Pass Stop gradient
num_fwd_outputs = len(backward_grad_outputs_map.keys())
for name, (rtype, pos,
grad_api_position) in backward_grad_outputs_map.items():
......@@ -1553,21 +1547,32 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
if IsPlainTensorType(rtype):
output_autograd_meta = f"""
auto& {transformed_tensor_name} = returns[{pos}][0];
egr::AutogradMeta* {output_autograd_meta_name} = returns[{pos}][0].initialized() ? egr::EagerUtils::autograd_meta(&{transformed_tensor_name}) : nullptr;"""
egr::AutogradMeta* {output_autograd_meta_name} = returns[{pos}][0].initialized() ? egr::EagerUtils::autograd_meta(&{transformed_tensor_name}) : nullptr;
if ({output_autograd_meta_name}) {output_autograd_meta_name}->SetStopGradient(false);
"""
else:
assert IsVectorTensorType(rtype)
if len(next_grad_node_creation_str) > 0:
output_autograd_meta = f"""
auto& {transformed_tensor_name} = returns[{pos}];
std::vector<egr::AutogradMeta*> {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});
std::vector<egr::AutogradMeta*>* {output_autograd_meta_name} = &{output_autograd_meta_vec_name};
for(auto* meta : {output_autograd_meta_vec_name}){{
meta->SetStopGradient(false);
}}
"""
else:
output_autograd_meta = f"""
auto& {transformed_tensor_name} = returns[{pos}];
std::vector<egr::AutogradMeta*> {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});
for(auto* meta : {output_autograd_meta_vec_name}){{
meta->SetStopGradient(false);
}}
"""
outputs_autograd_meta_list.append(output_autograd_meta)
outputs_autograd_meta_str = "\n".join(outputs_autograd_meta_list)
compute_require_grad_str = f"{indent}bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;\n"
compute_require_grad_str += f"{indent}bool require_any_grad = egr::EagerUtils::ComputeRequireGrad({compute_require_grad_args_str});"
outputs_autograd_meta_str = "\n".join(outputs_autograd_meta_list)
returns_str = f"{indent}if(NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);\n"
returns_str += f"{indent}return returns;\n"
......@@ -1576,11 +1581,10 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
self.node_definition_str = GRAD_FUNCTION_TEMPLATE.format(
grad_node_name, fill_zero_str, get_grad_in_args_str,
grad_function_prepare_str, inputs_autograd_meta_str,
compute_require_grad_str, inplace_check_str,
inplace_for_grad_outs_str, grad_node_name, grad_function_call_str,
check_nan_inf_str, outputs_autograd_meta_str,
next_grad_node_creation_str, returns_str)
grad_function_prepare_str, compute_require_next_grad_str,
inplace_check_str, inplace_for_grad_outs_str, grad_node_name,
grad_function_call_str, check_nan_inf_str,
outputs_autograd_meta_str, next_grad_node_creation_str, returns_str)
def run(self):
super().run()
......@@ -1631,6 +1635,7 @@ class DygraphForwardAndNodesGenerator(GeneratorBase):
if 'backward' not in forward_api_contents.keys(): return None
backward_api_name = forward_api_contents['backward']
if backward_api_name in black_ops_list: return None
assert backward_api_name in grad_api_dict.keys(), AssertMessage(
backward_api_name, grad_api_dict.keys())
backward_api_contents = grad_api_dict[backward_api_name]
......@@ -1646,7 +1651,7 @@ class DygraphForwardAndNodesGenerator(GeneratorBase):
backward_api_contents = self.GetBackwardAPIContents(
forward_api_contents)
if backward_api_contents is None: continue
if forward_api_contents['api'] in black_ops_list: continue
# Generate Dygraph Forward Function
function_generator = DygraphForwardFunctionGenerator(
forward_api_contents, backward_api_contents, namespace)
......
......@@ -52,7 +52,14 @@ class GeneralGrad {
AutogradMeta* auto_grad_meta =
EagerUtils::unsafe_autograd_meta(inputs[i]);
auto* target_node = auto_grad_meta->GetMutableGradNode().get();
VLOG(8) << "Get no grad vars' grad_node: " << target_node->name()
<< ", " << target_node << " with output rank info: "
<< auto_grad_meta->OutRankInfo().first << ", "
<< auto_grad_meta->OutRankInfo().second;
if (is_no_grad_vars) {
(no_grad_var_nodes_inputmeta_map_)[target_node] = auto_grad_meta;
continue;
}
if (orig_to_copied_node_mapping_.count(target_node)) {
target_node = orig_to_copied_node_mapping_[target_node].get();
} else {
......@@ -67,14 +74,11 @@ class GeneralGrad {
"stop_gradient=True.",
msg,
i));
if (is_no_grad_vars) {
(no_grad_var_nodes_inputmeta_map_)[target_node] = auto_grad_meta;
} else { // normal input
// normal input
(input_target_nodes_inputmeta_map_)[target_node] = auto_grad_meta;
}
}
}
}
// Purify potential_startup_nodes_, remove nodes those are the same as
// input_target_nodes
......@@ -305,8 +309,6 @@ class GeneralGrad {
const std::unordered_map<GradNodeBase*,
std::unique_ptr<GradTensorHolder>>&
node_input_buffers_dict) {
// Get no_grad_vars's GradNodes and InputMeta Info
GetTargetNodesInfo(no_grad_vars, true /* is_no_grad_vars */);
// Get inputs's GradNodes and InputMeta Info
GetTargetNodesInfo(inputs, false /* is_no_grad_vars */);
// Purify potentialstartup_ops, remove those nodes that are the same as
......@@ -402,6 +404,21 @@ class GeneralGrad {
std::shared_ptr<GradNodeBase> orig_next_node =
orig_edge.GetMutableGradNode();
if (no_grad_var_nodes_inputmeta_map_.count(orig_next_node.get()) &&
(no_grad_var_nodes_inputmeta_map_[orig_next_node.get()]
->OutRankInfo() == orig_edge.GetEdgeRankInfo())) {
VLOG(3) << "Get no grad edge from grad_node: " << orig_node->name()
<< " : " << orig_node << " to:" << orig_next_node->name()
<< ", " << orig_next_node.get()
<< " with output rank info: "
<< orig_edge.GetEdgeRankInfo().first << ", "
<< orig_edge.GetEdgeRankInfo().second;
// Stop no grad var's preceding node
copied_node->MutableOutputMeta()[i][j].SetStopGradient(true);
copied_edge.Clear();
continue;
}
if (!orig_next_node) continue;
// Copy Next Node
......@@ -638,6 +655,9 @@ std::vector<paddle::experimental::Tensor> RunBackward(
}
if (is_general_grad) {
// Get no_grad_vars's GradNodes and InputMeta Info
GeneralGrad::Instance().GetTargetNodesInfo(no_grad_vars,
true /* is_no_grad_vars */);
// Copy Backward Graph
GeneralGrad::Instance().ReconstructBackwardGraph(orig_queue);
}
......@@ -696,19 +716,6 @@ std::vector<paddle::experimental::Tensor> RunBackward(
node);
}
// no_grad_vars
if (!no_grad_vars.empty() && is_general_grad) {
auto iter =
GeneralGrad::Instance().GetNoGradVarNodesInputMetaMap()->find(node);
if (iter !=
GeneralGrad::Instance().GetNoGradVarNodesInputMetaMap()->end()) {
VLOG(6) << "Change the input buffer[slot][rank] by Zeros";
auto rank_info = (iter->second)->OutRankInfo();
node_input_buffer->SetBufferSlotRankZeros(rank_info.first,
rank_info.second);
}
}
// Check input
EnforceGradNodeHasInput(node);
......@@ -750,7 +757,8 @@ std::vector<paddle::experimental::Tensor> RunBackward(
// Since we make edge has as same rank as bwd outputs, we indexing them
// with the same rank(i, j)
auto next_node_shared = edge.GetMutableGradNode();
VLOG(3) << "Found pending node: " << next_node_shared->name();
VLOG(3) << "Found pending node: " << next_node_shared->name() << ": "
<< next_node_shared.get();
// Next node could be nullptr if it is leaf tensor with no
// AccumulationNode attached
// Or it could also originated from dispensable inputs
......@@ -800,6 +808,8 @@ std::vector<paddle::experimental::Tensor> RunBackward(
// Update queue
node_in_degree_map[next_node]--;
VLOG(6) << next_node->name()
<< " ref_cnt is: " << node_in_degree_map[next_node];
PADDLE_ENFORCE(
node_in_degree_map[next_node] >= 0,
......
......@@ -106,6 +106,12 @@ class Edge {
}
}
void Clear() {
grad_node_.reset();
in_slot_id_ = 0;
in_rank_ = 0;
}
private:
size_t in_slot_id_;
size_t in_rank_;
......
......@@ -24,6 +24,7 @@
namespace egr {
void GradTensorHolder::SetBufferSlotRankZeros(size_t slot_id, size_t rank) {
// Set not grad var to zero and set stop gradient as default value: true
buffer_[slot_id][rank] =
paddle::experimental::zeros_like(buffer_[slot_id][rank]);
}
......@@ -59,8 +60,15 @@ void GradTensorHolder::CopyValueFromTensor(
if ((!buffer_tensor.defined() || !buffer_tensor.initialized())) {
// Perform deep copy here
buffer_tensor.copy_(t, t.place(), false);
buffer_tensor.set_autograd_meta(t.mutable_autograd_meta());
auto* meta = egr::EagerUtils::autograd_meta(&buffer_tensor);
auto* origin_meta = egr::EagerUtils::nullable_autograd_meta(t);
if (origin_meta) {
auto grad_node = origin_meta->GetMutableGradNode();
if (grad_node && grad_node.get()) {
meta->SetGradNode(origin_meta->GetMutableGradNode());
}
meta->WeakGrad() = origin_meta->WeakGrad();
}
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Cannot copy grad_tensors' value to grad tensor holders,"
......@@ -81,10 +89,10 @@ void GradTensorHolder::CopyValueFromTensor(
"Only Support DENSE_TENSOR, SPARSE_COO_TENSOR, SPARSE_CSR_TENSOR "
"now."));
}
egr::EagerUtils::autograd_meta(&(buffer_[slot_id][rank]))
->SetStopGradient(false);
}
}
egr::EagerUtils::autograd_meta(&(buffer_[slot_id][rank]))
->SetStopGradient(false);
}
void GradTensorHolder::add(size_t slot_id,
......
......@@ -28,6 +28,7 @@
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/grad_node_info.h"
#include "paddle/fluid/eager/utils.h"
#include "paddle/phi/api/lib/utils/allocator.h"
namespace egr {
class TensorWrapper {
......@@ -57,9 +58,12 @@ class TensorWrapper {
// Only Copy Meta
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(tensor.impl().get());
auto tw_dense_tensor = std::make_shared<phi::DenseTensor>();
tw_dense_tensor->set_meta(dense_tensor->meta());
intermidiate_tensor_.set_impl(tw_dense_tensor);
// TODO(jiabin): It's not a good idea to set memory size to zero, find
// another way and change this.
intermidiate_tensor_.set_impl(
std::move(std::make_shared<phi::DenseTensor>(
std::make_shared<phi::Allocation>(nullptr, 0, tensor.place()),
std::move(dense_tensor->meta()))));
} else {
PADDLE_THROW(paddle::platform::errors::Fatal(
"Unrecognized tensor type for no_need_buffer feature"));
......
......@@ -98,6 +98,7 @@ static void GetGraphInfoBetweenTargets(
auto &grad_node = output_target->GradVarBase()->GradNode();
if (visited.count(grad_node.get()) == 0) {
for (auto &op : *grad_node) {
VLOG(10) << "Pushed op: " << op.Type();
q.emplace(&op, grad_node.get());
}
}
......@@ -141,6 +142,8 @@ static void GetGraphInfoBetweenTargets(
for (auto &pending_node : node->GradPendingNodes()) {
for (auto &pending_op : *pending_node) {
preceding_ops[&pending_op].insert(op);
VLOG(10) << "Find preceding op of: " << pending_op.Type()
<< " is: " << op->Type();
}
if (visited.count(pending_node.get()) == 0) {
visited.insert(pending_node.get());
......@@ -175,6 +178,7 @@ static void GetGraphInfoBetweenTargets(
std::queue<std::pair<OpBase * /*op*/, OpBase * /*pending op*/>> op_queue;
std::unordered_set<std::pair<OpBase *, OpBase *>, HashPair> op_base_visited;
for (auto &endpoint_op : endpoint_ops) {
VLOG(10) << "Emplaced endpoint op: " << endpoint_op->Type();
op_queue.emplace(endpoint_op, nullptr);
op_base_visited.emplace(endpoint_op, nullptr);
}
......@@ -186,14 +190,18 @@ static void GetGraphInfoBetweenTargets(
op_queue.pop();
VLOG(10) << "Get op: " << op->Type();
bool is_valid = false;
for (auto &output_pair : op->GetOutsMap()) {
if (!output_pair.second.IsGrad()) {
VLOG(10) << "Continueded output for : " << op->Type();
continue;
}
for (auto &out_var : output_pair.second) {
if (out_var && target_vars.count(out_var.get()) > 0) {
VLOG(10) << "Find target output for : " << op->Type();
is_valid = true;
break;
}
......@@ -211,11 +219,13 @@ static void GetGraphInfoBetweenTargets(
is_valid = false;
for (auto &input_pair : op->GetInsMap()) {
if (!input_pair.second.IsGrad()) {
VLOG(10) << "Continueded input for : " << op->Type();
continue;
}
for (auto &in_var : input_pair.second) {
if (in_var && no_grad_var_grad.count(in_var.get()) == 0) {
VLOG(10) << "Find not no grad var in input for : " << op->Type();
target_vars.insert(in_var.get());
is_valid = true;
}
......@@ -240,7 +250,10 @@ static void GetGraphInfoBetweenTargets(
auto iter = preceding_ops.find(op);
if (iter != preceding_ops.end()) {
for (auto &preceding_op : iter->second) {
VLOG(10) << "Scan preceding op: " << preceding_op->Type() << " for "
<< op->Type();
if (op_base_visited.count(std::make_pair(preceding_op, op)) == 0) {
VLOG(10) << "Emplace op: " << preceding_op->Type();
op_queue.emplace(preceding_op, op);
op_base_visited.emplace(preceding_op, op);
}
......@@ -648,6 +661,7 @@ PartialGradTask::PartialGradTask(
platform::errors::Unimplemented(
"only_inputs=False is not supported yet"));
VLOG(10) << "no_grad_vars size: " << no_grad_vars.size();
for (auto &var : no_grad_vars) {
if (var && var->GradVarBase()) {
no_grad_var_grad_.insert(var->GradVarBase()->SharedVar().get());
......@@ -853,6 +867,7 @@ std::vector<std::shared_ptr<VarBase>> PartialGradTask::Run() {
}
for (auto &pending_op : iter->second) {
VLOG(10) << "Find pending op" << pending_op->Type();
auto dep_iter = op_deps_.find(pending_op);
PADDLE_ENFORCE_EQ(
dep_iter != op_deps_.end(),
......@@ -862,6 +877,7 @@ std::vector<std::shared_ptr<VarBase>> PartialGradTask::Run() {
if (--(dep_iter->second) == 0) {
q.push(pending_op);
}
VLOG(10) << "Pending op deps: " << dep_iter->second;
}
}
......
......@@ -82,7 +82,7 @@ std::shared_ptr<NameVarMap<VarType>> PrepareData(
auto& template_var = name_pair.second[i];
SetForwardDataTypeOfGradVar(template_var);
const auto* tensor = GetTensorFromVar(template_var->Var());
if (tensor && tensor->IsInitialized()) {
if (tensor && tensor->IsInitialized() && (tensor->memory_size() != 0)) {
auto kernel_type_for_var = op.GetKernelTypeForVar(
name_pair.first, *tensor, expected_kernel_key);
if (!NeedTransform(kernel_type_for_var, expected_kernel_key)) {
......@@ -91,7 +91,8 @@ std::shared_ptr<NameVarMap<VarType>> PrepareData(
VLOG(3) << "Transform Variable " << GetNameFromVar(template_var)
<< " from " << kernel_type_for_var << " to "
<< expected_kernel_key;
VLOG(3) << GetNameFromVar(template_var)
<< " memory size is: " << tensor->memory_size();
if (CheckCachedKey(template_var, expected_kernel_key)) {
VLOG(3) << "Hit variable_wrapper cache: key="
<< expected_kernel_key;
......@@ -634,7 +635,8 @@ void PreparePhiData(const phi::Kernel& phi_kernel,
for (size_t offset = 0; offset < ins_vector.size(); ++offset) {
auto& var = ins_vector[offset];
const auto* tensor_in = GetTensorFromVar(var->Var());
if (tensor_in && tensor_in->IsInitialized()) {
if (tensor_in && tensor_in->IsInitialized() &&
(tensor_in->memory_size() != 0)) {
if (in_def.backend == phi::Backend::ALL_BACKEND) {
continue;
}
......
......@@ -98,10 +98,11 @@ void EmptyTensorInitializer(TensorObject* self,
}
if (!autograd_meta->GetMutableGradNode()) {
VLOG(3) << "Tensor(" << name
<< ") have not GradNode, add GradNodeAccumulation for it.";
autograd_meta->SetGradNode(
std::make_shared<egr::GradNodeAccumulation>(autograd_meta));
VLOG(3) << "Tensor(" << name
<< ") have not GradNode, add GradNodeAccumulation"
<< autograd_meta->GradNode() << " for it.";
}
}
......
......@@ -95,6 +95,7 @@ PyObject* tensor_properties_get_grad(TensorObject* self, void* closure) {
EAGER_TRY
VLOG(6) << "Get grad for tensor: " << self->tensor.name();
auto meta = egr::EagerUtils::nullable_autograd_meta(self->tensor);
VLOG(6) << meta << " initialized: " << meta->Grad().initialized();
if (meta && meta->Grad().initialized()) {
return ToPyObject(meta->Grad());
} else {
......
......@@ -257,8 +257,8 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp}
${bw_api_source_file}
COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}"
DEPENDS ${bw_api_yaml_file} ${legacy_bw_api_yaml_file} ${bw_api_gen_file}
${api_gen_base}
DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base}
${legacy_bw_api_yaml_file}
VERBATIM)
# generate sparse api
......
......@@ -133,6 +133,17 @@
func : asinh_grad
inplace : (out_grad -> x_grad)
- backward_api : assign_double_grad
forward : assign_grad (Tensor grad_out) -> Tensor(grad_x)
args : (Tensor grad_x_grad)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
kernel :
func : assign
backward: assign_triple_grad
inplace : (grad_x_grad -> grad_out_grad)
- backward_api : assign_grad
forward : assign (Tensor x) -> Tensor(out)
args : (Tensor out_grad)
......@@ -141,6 +152,7 @@
func : UnchangedInferMeta
kernel :
func : assign
backward: assign_double_grad
inplace : (out_grad -> x_grad)
- backward_api : assign_out__grad
......@@ -153,6 +165,16 @@
func : assign
inplace : (out_grad -> x_grad)
- backward_api : assign_triple_grad
forward : assign_double_grad (Tensor grad_out) -> Tensor(grad_x)
args : (Tensor grad_x_grad)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
kernel :
func : assign
inplace : (grad_x_grad -> grad_out_grad)
- backward_api : atan_grad
forward : atan (Tensor x) -> Tensor(out)
args : (Tensor x, Tensor out_grad)
......@@ -1823,6 +1845,16 @@
func : sinh_grad
inplace : (out_grad -> x_grad)
- backward_api : slice_double_grad
forward : slice_grad (Tensor input, Tensor grad_out, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) -> Tensor(grad_input)
args : (Tensor grad_input_grad, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis)
output : Tensor(grad_out_grad)
infer_meta :
func : UnchangedInferMeta
param : [grad_input_grad]
kernel :
func : slice
- backward_api : slice_grad
forward : slice (Tensor input, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis) -> Tensor(out)
args : (Tensor input, Tensor out_grad, int64_t[] axes, IntArray starts, IntArray ends, int64_t[] infer_flags, int64_t[] decrease_axis)
......@@ -1832,6 +1864,7 @@
param : [input]
kernel :
func : slice_grad
backward : slice_double_grad
no_need_buffer : input
- backward_api : soft_shrink_grad
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册