未验证 提交 2bee99df 编写于 作者: J Jiabin Yang 提交者: GitHub

Using small vector for slot and merge edge into grad_slot_meta (#42350)

上级 27cf7afb
...@@ -447,10 +447,12 @@ void EagerReducer::TraverseBackwardGraph(const std::vector<Tensor> &outputs) { ...@@ -447,10 +447,12 @@ void EagerReducer::TraverseBackwardGraph(const std::vector<Tensor> &outputs) {
while (!queue.empty()) { while (!queue.empty()) {
egr::GradNodeBase *node = queue.front(); egr::GradNodeBase *node = queue.front();
queue.pop(); queue.pop();
const std::vector<std::vector<egr::Edge>> &edges = node->GetEdges(); const paddle::small_vector<std::vector<egr::GradSlotMeta>,
for (size_t i = 0; i < edges.size(); i++) { egr::kSlotSmallVectorSize> &metas =
for (size_t j = 0; j < edges[i].size(); j++) { node->OutputMeta();
const egr::Edge &edge = edges[i][j]; for (size_t i = 0; i < metas.size(); i++) {
for (size_t j = 0; j < metas[i].size(); j++) {
const egr::Edge &edge = metas[i][j].GetEdge();
auto next_node_shared = edge.GetMutableGradNode(); auto next_node_shared = edge.GetMutableGradNode();
if (!next_node_shared || !next_node_shared.get()) { if (!next_node_shared || !next_node_shared.get()) {
continue; continue;
......
...@@ -38,10 +38,13 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor, ...@@ -38,10 +38,13 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
} }
} }
std::vector<std::vector<paddle::experimental::Tensor>> GradNodeAccumulation:: paddle::small_vector<std::vector<paddle::experimental::Tensor>,
operator()( kSlotSmallVectorSize>
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT GradNodeAccumulation::operator()(
bool create_graph, bool is_new_grad) { paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph,
bool is_new_grad) {
VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation"; VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation";
PADDLE_ENFORCE(grads.size() == 1, PADDLE_ENFORCE(grads.size() == 1,
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
...@@ -56,8 +59,9 @@ operator()( ...@@ -56,8 +59,9 @@ operator()(
// Apply Gradient Hooks // Apply Gradient Hooks
paddle::experimental::Tensor grad_out; paddle::experimental::Tensor grad_out;
if (GradientHooksRegistered()) { if (GradientHooksRegistered()) {
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = paddle::small_vector<std::vector<paddle::experimental::Tensor>,
ApplyGradientHooks(grads); kSlotSmallVectorSize>
hooked_grads = ApplyGradientHooks(grads);
grad_out = hooked_grads[0][0]; grad_out = hooked_grads[0][0];
} else { } else {
grad_out = grads[0][0]; grad_out = grads[0][0];
......
...@@ -37,9 +37,12 @@ class GradNodeAccumulation : public GradNodeBase { ...@@ -37,9 +37,12 @@ class GradNodeAccumulation : public GradNodeBase {
} }
// Functor: perform backward computations // Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()( virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT kSlotSmallVectorSize>
bool create_graph = false, bool is_new_grad = false) override; operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;
void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
......
...@@ -21,8 +21,8 @@ namespace egr { ...@@ -21,8 +21,8 @@ namespace egr {
static inline paddle::experimental::DataType GetPromoteType( static inline paddle::experimental::DataType GetPromoteType(
const std::string& op_name, const std::string& op_name,
const std::vector<std::vector<paddle::experimental::Tensor>>& const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
amp_tensors_vector, kSlotSmallVectorSize>& amp_tensors_vector,
const paddle::experimental::DataType& amp_dtype) { const paddle::experimental::DataType& amp_dtype) {
auto dst_type = amp_dtype; auto dst_type = amp_dtype;
if (egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype() == if (egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype() ==
...@@ -86,8 +86,8 @@ static inline paddle::experimental::DataType GetPromoteType( ...@@ -86,8 +86,8 @@ static inline paddle::experimental::DataType GetPromoteType(
inline paddle::experimental::DataType GetAmpDestDtype( inline paddle::experimental::DataType GetAmpDestDtype(
const std::string& op_name, const std::string& op_name,
const std::vector<std::vector<paddle::experimental::Tensor>>& const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
amp_tensors_vector) { kSlotSmallVectorSize>& amp_tensors_vector) {
auto amp_dtype = auto amp_dtype =
egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype(); egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype();
auto amp_level = egr::Controller::Instance().GetAMPLevel(); auto amp_level = egr::Controller::Instance().GetAMPLevel();
......
...@@ -144,11 +144,15 @@ void GradNodeScale::SetTensorWrappers_X( ...@@ -144,11 +144,15 @@ void GradNodeScale::SetTensorWrappers_X(
void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; } void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; }
std::vector<std::vector<paddle::experimental::Tensor>> GradNodeScale:: paddle::small_vector<std::vector<paddle::experimental::Tensor>,
operator()( kSlotSmallVectorSize>
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT GradNodeScale::operator()(
bool create_graph, bool is_new_grad) { paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph,
bool is_new_grad) {
// 1. Check Output Size // 1. Check Output Size
VLOG(6) << "grad size is: " << grads.size();
PADDLE_ENFORCE( PADDLE_ENFORCE(
((grads.size() == 1) && (grads[0].size() == 1)), ((grads.size() == 1) && (grads[0].size() == 1)),
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
...@@ -156,15 +160,18 @@ operator()( ...@@ -156,15 +160,18 @@ operator()(
"However received: %d", "However received: %d",
"This indicates an issue with Eager Dygraph Backward logic", "This indicates an issue with Eager Dygraph Backward logic",
grads.size())); grads.size()));
std::vector<std::vector<paddle::experimental::Tensor>> outs; paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
outs;
// 2. Create needed out parttern // 2. Create needed out parttern
paddle::experimental::Tensor out; paddle::experimental::Tensor out;
// Apply Gradient Hooks // Apply Gradient Hooks
if (GradientHooksRegistered()) { if (GradientHooksRegistered()) {
// TODO(jiabin): Shall we apply hook slot by slot here or accept // TODO(jiabin): Shall we apply hook slot by slot here or accept
// vector<vector<phi::tensor>> to apply all hooks? // vector<vector<phi::tensor>> to apply all hooks?
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = paddle::small_vector<std::vector<paddle::experimental::Tensor>,
ApplyGradientHooks(grads); kSlotSmallVectorSize>
hooked_grads = ApplyGradientHooks(grads);
ScaleAPI(/* slot by slot set */ hooked_grads[0][0], scale_, 0.0 /* bias */, ScaleAPI(/* slot by slot set */ hooked_grads[0][0], scale_, 0.0 /* bias */,
true /* bias_after_scale */, &out); true /* bias_after_scale */, &out);
} else { } else {
......
...@@ -38,9 +38,12 @@ class GradNodeScale : public GradNodeBase { ...@@ -38,9 +38,12 @@ class GradNodeScale : public GradNodeBase {
~GradNodeScale() override = default; ~GradNodeScale() override = default;
// Functor: perform backward computations // Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()( virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT kSlotSmallVectorSize>
bool create_graph = false, bool is_new_grad = false) override; operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;
void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
...@@ -48,7 +51,7 @@ class GradNodeScale : public GradNodeBase { ...@@ -48,7 +51,7 @@ class GradNodeScale : public GradNodeBase {
const std::vector<paddle::experimental::Tensor>& tensors); const std::vector<paddle::experimental::Tensor>& tensors);
void SetAttributes_scale(float scale); void SetAttributes_scale(float scale);
std::string name() override { return ""; } std::string name() override { return "scale node"; }
// Members: define fwd input tensors // Members: define fwd input tensors
// For Scale there is no fwd input tensor needed // For Scale there is no fwd input tensor needed
......
...@@ -79,9 +79,6 @@ paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x, ...@@ -79,9 +79,6 @@ paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x,
// Pass Attributes to GradNode // Pass Attributes to GradNode
scale_node->SetAttributes_scale(scale); scale_node->SetAttributes_scale(scale);
// Set Next Edges
scale_node->AddEdges(p_autograd_in, /*slot id*/ 0);
// Set TensorWrappers // Set TensorWrappers
scale_node->SetTensorWrappers_X({x}); scale_node->SetTensorWrappers_X({x});
......
...@@ -19,8 +19,9 @@ ...@@ -19,8 +19,9 @@
#include <memory> #include <memory>
#include "paddle/fluid/imperative/tracer.h" #include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/ext/op_meta_info.h" #include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/utils/small_vector.h"
namespace egr { namespace egr {
constexpr size_t kSlotSmallVectorSize = 15U;
class UniqueNameGenerator { class UniqueNameGenerator {
public: public:
explicit UniqueNameGenerator(std::string prefix = "") : prefix_(prefix) {} explicit UniqueNameGenerator(std::string prefix = "") : prefix_(prefix) {}
......
...@@ -1187,11 +1187,6 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1187,11 +1187,6 @@ static std::string GenerateGradNodeCreationContent(
grad_node_creation_str += paddle::string::Sprintf( grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_name, input_position); SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);
const char* ADD_EDGES_TEMPLATE =
" if(%s) grad_node->AddEdges(%s, %d);\n";
grad_node_creation_str +=
paddle::string::Sprintf(ADD_EDGES_TEMPLATE, input_autograd_name,
input_autograd_name, input_position);
} else { } else {
compute_require_grad_args += ", &" + input_autograd_name; compute_require_grad_args += ", &" + input_autograd_name;
size_t input_position = fwd_inputs_name_pos_map.at(input_name); size_t input_position = fwd_inputs_name_pos_map.at(input_name);
...@@ -1200,10 +1195,6 @@ static std::string GenerateGradNodeCreationContent( ...@@ -1200,10 +1195,6 @@ static std::string GenerateGradNodeCreationContent(
" grad_node->SetGradOutMeta(%s, %d);\n"; " grad_node->SetGradOutMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf( grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_name, input_position); SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);
const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
ADD_EDGES_TEMPLATE, input_autograd_name, input_position);
} }
} }
...@@ -1649,7 +1640,8 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents( ...@@ -1649,7 +1640,8 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
std::string amp_logic_str = ""; std::string amp_logic_str = "";
if (in_vars.size() != 0) { if (in_vars.size() != 0) {
const char* AMP_TENSORS_VECTOR_TEMPLATE = const char* AMP_TENSORS_VECTOR_TEMPLATE =
" std::vector<std::vector<paddle::experimental::Tensor>> " " paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> "
"amp_tensors_vector = { " "amp_tensors_vector = { "
"%s };\n"; "%s };\n";
std::string amp_tensors_vector = paddle::string::Sprintf( std::string amp_tensors_vector = paddle::string::Sprintf(
...@@ -2428,9 +2420,11 @@ static std::string GenerateGradNodeCCContents( ...@@ -2428,9 +2420,11 @@ static std::string GenerateGradNodeCCContents(
} }
const char* BWD_RETURN_TEMPLATE = const char* BWD_RETURN_TEMPLATE =
" std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = " " paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> hooked_grads = "
"GradNode%s::ApplyGradientHooks(grads);\n" "GradNode%s::ApplyGradientHooks(grads);\n"
" std::vector<std::vector<paddle::experimental::Tensor>> outputs(%d);\n" " paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> outputs(%d);\n"
" %s\n" " %s\n"
" if(NeedComplexToRealConversion()) " " if(NeedComplexToRealConversion()) "
"HandleComplexGradToRealGrad(&outputs);\n" "HandleComplexGradToRealGrad(&outputs);\n"
...@@ -2441,9 +2435,11 @@ static std::string GenerateGradNodeCCContents( ...@@ -2441,9 +2435,11 @@ static std::string GenerateGradNodeCCContents(
// [Generation] Get Full Grad Function // [Generation] Get Full Grad Function
const char* GRAD_FUNCTION_TEMPLATE = const char* GRAD_FUNCTION_TEMPLATE =
"std::vector<std::vector<paddle::experimental::Tensor>> " "paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> "
"GradNode%s::operator()(" "GradNode%s::operator()("
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool " "paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize>& grads, bool "
"create_graph, bool is_new_grad) {\n" "create_graph, bool is_new_grad) {\n"
"%s" "%s"
"%s" "%s"
...@@ -2487,9 +2483,12 @@ static std::string GenerateGradNodeHeaderContents( ...@@ -2487,9 +2483,12 @@ static std::string GenerateGradNodeHeaderContents(
"Construct GradNode%s \"; }\n" "Construct GradNode%s \"; }\n"
" ~GradNode%s() override { VLOG(6) << \" Destruct GradNode%s \"; }\n" " ~GradNode%s() override { VLOG(6) << \" Destruct GradNode%s \"; }\n"
"\n" "\n"
" virtual std::vector<std::vector<paddle::experimental::Tensor>> " " virtual "
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> "
"operator()(" "operator()("
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool " "paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize>& grads, bool "
"create_graph = false, bool is_new_grad = false) " "create_graph = false, bool is_new_grad = false) "
"override;\n" "override;\n"
"\n" "\n"
......
...@@ -118,8 +118,8 @@ class {} : public egr::GradNodeBase {{ ...@@ -118,8 +118,8 @@ class {} : public egr::GradNodeBase {{
egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {{}} egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {{}}
~{}() override = default; ~{}() override = default;
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()( virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool create_graph = false, bool is_new_grad = false) override; paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph = false, bool is_new_grad = false) override;
std::string name() override {{ return \"{}\"; }} std::string name() override {{ return \"{}\"; }}
void ClearTensorWrappers() override {{ void ClearTensorWrappers() override {{
...@@ -149,7 +149,7 @@ class {} : public egr::GradNodeBase {{ ...@@ -149,7 +149,7 @@ class {} : public egr::GradNodeBase {{
GRAD_FUNCTION_TEMPLATE = \ GRAD_FUNCTION_TEMPLATE = \
""" """
std::vector<std::vector<paddle::experimental::Tensor>> {}::operator()(std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool create_graph, bool is_new_grad) {{ paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> {}::operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph, bool is_new_grad) {{
// Fill Zero For GradIn Tensors // Fill Zero For GradIn Tensors
{} {}
...@@ -239,7 +239,6 @@ FORWARD_BODY_TEMPLATE = \ ...@@ -239,7 +239,6 @@ FORWARD_BODY_TEMPLATE = \
// Set TensorWrappers for Forward Inputs // Set TensorWrappers for Forward Inputs
{} {}
// SetGradOutMeta & SetEdges // SetGradOutMeta & SetEdges
{}
{} {}
// SetOutRank & SetHistory & SetGradInMeta & RetainGrad // SetOutRank & SetHistory & SetGradInMeta & RetainGrad
{} {}
...@@ -356,7 +355,7 @@ AMP_LOGIC_TEMPLATE = \ ...@@ -356,7 +355,7 @@ AMP_LOGIC_TEMPLATE = \
if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {{ if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {{
VLOG(5) << "Check and Prepare For AMP"; VLOG(5) << "Check and Prepare For AMP";
{} {}
std::vector<std::vector<paddle::experimental::Tensor>> amp_tensors_vector = {}; paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = {};
{} {}
{} {}
{} {}
...@@ -769,15 +768,11 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): ...@@ -769,15 +768,11 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
is_optional = (name in self.optional_inputs) is_optional = (name in self.optional_inputs)
if is_optional: if is_optional:
set_grad_out_meta = f"{indent}if({name}.get_ptr() != nullptr) grad_node->SetGradOutMeta(*({name}.get_ptr()), {pos});" set_grad_out_meta = f"{indent}if({name}.get_ptr() != nullptr) grad_node->SetGradOutMeta(*({name}.get_ptr()), {pos});"
set_edges = f"{indent}if({name}.get_ptr() != nullptr) grad_node->AddEdges({input_autograd_meta_name}, {pos});"
else: else:
set_grad_out_meta = f"{indent}grad_node->SetGradOutMeta({name}, {pos});" set_grad_out_meta = f"{indent}grad_node->SetGradOutMeta({name}, {pos});"
set_edges = f"{indent}grad_node->AddEdges({input_autograd_meta_name}, {pos});"
set_grad_out_meta_list.append(set_grad_out_meta) set_grad_out_meta_list.append(set_grad_out_meta)
set_edges_list.append(set_edges)
set_grad_out_meta_str = "\n".join(set_grad_out_meta_list) set_grad_out_meta_str = "\n".join(set_grad_out_meta_list)
set_edges_str = "\n".join(set_edges_list)
# SetOutRank & SetHistory & SetGradInMeta # SetOutRank & SetHistory & SetGradInMeta
set_out_rank_list = [] set_out_rank_list = []
...@@ -808,7 +803,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): ...@@ -808,7 +803,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
self.node_creation_str = FORWARD_BODY_TEMPLATE.format( self.node_creation_str = FORWARD_BODY_TEMPLATE.format(
node_creation_event_str, pass_stop_gradient_args_str, node_creation_event_str, pass_stop_gradient_args_str,
node_construction_str, set_attributes_str, node_construction_str, set_attributes_str,
set_input_tensor_wrappers_str, set_grad_out_meta_str, set_edges_str, set_input_tensor_wrappers_str, set_grad_out_meta_str,
set_out_rank_str, set_history_str, set_grad_in_meta_str, set_out_rank_str, set_history_str, set_grad_in_meta_str,
set_retain_grad_str, set_output_tensor_wrappers_str) set_retain_grad_str, set_output_tensor_wrappers_str)
...@@ -1454,7 +1449,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): ...@@ -1454,7 +1449,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
# Construct grad_api returns # Construct grad_api returns
slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys()) slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys())
returns_str = f"{indent}std::vector<std::vector<paddle::experimental::Tensor>> returns({slot_num_bwd_outputs});\n" returns_str = f"{indent}paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs});\n"
for name, (ttype, fwd_position, for name, (ttype, fwd_position,
grad_api_position) in backward_grad_outputs_map.items(): grad_api_position) in backward_grad_outputs_map.items():
transformed_tensor_name = self.TransformToNextGradName(name) transformed_tensor_name = self.TransformToNextGradName(name)
......
...@@ -169,9 +169,12 @@ class GeneralGrad { ...@@ -169,9 +169,12 @@ class GeneralGrad {
input_target_nodes_inputmeta_map.count(node); input_target_nodes_inputmeta_map.count(node);
// Find and append next nodes // Find and append next nodes
const std::vector<std::vector<Edge>>& edges = node->GetEdges(); const paddle::small_vector<std::vector<GradSlotMeta>,
for (const auto& edge_list : edges) { kSlotSmallVectorSize>& metas =
for (const Edge& edge : edge_list) { node->OutputMeta();
for (const auto& meta_list : metas) {
for (const GradSlotMeta& meta : meta_list) {
const auto& edge = meta.GetEdge();
GradNodeBase* next_node = edge.GetMutableGradNode().get(); GradNodeBase* next_node = edge.GetMutableGradNode().get();
// Next node could be nullptr if it is leaf tensor with no // Next node could be nullptr if it is leaf tensor with no
...@@ -381,13 +384,15 @@ class GeneralGrad { ...@@ -381,13 +384,15 @@ class GeneralGrad {
"unable to find copied target for certain grad node.")); "unable to find copied target for certain grad node."));
GradNodeBase* copied_node = orig_to_copied_node_mapping_[orig_node].get(); GradNodeBase* copied_node = orig_to_copied_node_mapping_[orig_node].get();
const std::vector<std::vector<Edge>>& orig_edges = orig_node->GetEdges(); const paddle::small_vector<std::vector<GradSlotMeta>,
std::vector<std::vector<Edge>>& copied_edges = kSlotSmallVectorSize>& orig_meta =
copied_node->GetMutableEdges(); orig_node->OutputMeta();
for (size_t i = 0; i < orig_edges.size(); i++) { paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
for (size_t j = 0; j < orig_edges[i].size(); j++) { copied_edges = copied_node->MutableOutputMeta();
const Edge& orig_edge = orig_edges[i][j]; for (size_t i = 0; i < orig_meta.size(); i++) {
Edge& copied_edge = copied_edges[i][j]; for (size_t j = 0; j < orig_meta[i].size(); j++) {
const Edge& orig_edge = orig_meta[i][j].GetEdge();
Edge& copied_edge = copied_edges[i][j].GetMutableEdge();
std::shared_ptr<GradNodeBase> orig_next_node = std::shared_ptr<GradNodeBase> orig_next_node =
orig_edge.GetMutableGradNode(); orig_edge.GetMutableGradNode();
...@@ -468,9 +473,11 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap( ...@@ -468,9 +473,11 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
"We got null node when we traverse the backward graph, and this " "We got null node when we traverse the backward graph, and this "
"should not happened please check your code and contact us.")); "should not happened please check your code and contact us."));
// Find and append next nodes // Find and append next nodes
const std::vector<std::vector<Edge>>& edges = node->GetEdges(); const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
for (const auto& edge_list : edges) { metas = node->OutputMeta();
for (const Edge& edge : edge_list) { for (const auto& meta_list : metas) {
for (const GradSlotMeta& meta : meta_list) {
const auto& edge = meta.GetEdge();
GradNodeBase* next_node = edge.GetMutableGradNode().get(); GradNodeBase* next_node = edge.GetMutableGradNode().get();
// Next node could be nullptr if it is leaf tensor with no // Next node could be nullptr if it is leaf tensor with no
// AccumulationNode attached // AccumulationNode attached
...@@ -689,8 +696,10 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -689,8 +696,10 @@ std::vector<paddle::experimental::Tensor> RunBackward(
VLOG(6) << "Run Backward Kernel with GradTensorHolder."; VLOG(6) << "Run Backward Kernel with GradTensorHolder.";
// Run Pre Backward Node and get outputs // Run Pre Backward Node and get outputs
std::vector<std::vector<paddle::experimental::Tensor>> grad_output_tensors = paddle::small_vector<std::vector<paddle::experimental::Tensor>,
(*node)(node_input_buffer->Buffers(), create_graph, is_general_grad); kSlotSmallVectorSize>
grad_output_tensors = (*node)(node_input_buffer->Buffers(),
create_graph, is_general_grad);
// retain_grad or not // retain_grad or not
if (!retain_graph) { if (!retain_graph) {
...@@ -704,17 +713,18 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -704,17 +713,18 @@ std::vector<paddle::experimental::Tensor> RunBackward(
node_input_buffers_dict.erase(node); node_input_buffers_dict.erase(node);
// Prepare GradTensorHolder for next node // Prepare GradTensorHolder for next node
const std::vector<std::vector<Edge>>& edges = node->GetEdges(); const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(), metas = node->OutputMeta();
PADDLE_ENFORCE(metas.size() == grad_output_tensors.size() || metas.empty(),
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
"Number of edges should be either empty ( for leaf node " "Number of edges should be either empty ( for leaf node "
") or the same as number of output grad tensors, but we " ") or the same as number of output grad tensors, but we "
"got edges size is: %d, grad_output size is: %d", "got edges size is: %d, grad_output size is: %d",
edges.size(), grad_output_tensors.size())); metas.size(), grad_output_tensors.size()));
for (size_t i = 0; i < edges.size(); i++) { for (size_t i = 0; i < metas.size(); i++) {
for (size_t j = 0; j < edges[i].size(); j++) { for (size_t j = 0; j < metas[i].size(); j++) {
const Edge& edge = edges[i][j]; const Edge& edge = metas[i][j].GetEdge();
if (!edge.IsInitialized()) { if (!edge.IsInitialized()) {
continue; continue;
} }
......
...@@ -19,10 +19,12 @@ ...@@ -19,10 +19,12 @@
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
namespace egr { namespace egr {
std::vector<std::vector<paddle::experimental::Tensor>> RunCustomOpNode:: paddle::small_vector<std::vector<paddle::experimental::Tensor>,
operator()( kSlotSmallVectorSize>
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT RunCustomOpNode::operator()(
bool create_graph, bool is_new_grad) { paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads,
bool create_graph, bool is_new_grad) { // NOLINT
paddle::CustomOpKernelContext ctx; paddle::CustomOpKernelContext ctx;
auto grad_inputs_name = paddle::framework::OpMetaInfoHelper::GetInputs( auto grad_inputs_name = paddle::framework::OpMetaInfoHelper::GetInputs(
egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]); egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]);
...@@ -31,8 +33,9 @@ operator()( ...@@ -31,8 +33,9 @@ operator()(
auto map = egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type_); auto map = egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type_);
auto kernel_map = egr::Controller::Instance().GetOpMetaInfoMap(); auto kernel_map = egr::Controller::Instance().GetOpMetaInfoMap();
std::vector<std::vector<paddle::experimental::Tensor>> tmp_ins( paddle::small_vector<std::vector<paddle::experimental::Tensor>,
grad_inputs_name.size()); kSlotSmallVectorSize>
tmp_ins(grad_inputs_name.size());
VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size() VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size()
<< ", whose grad_inputs_name size is: " << grad_inputs_name.size(); << ", whose grad_inputs_name size is: " << grad_inputs_name.size();
for (size_t i = 0; i < grads.size(); i++) { for (size_t i = 0; i < grads.size(); i++) {
...@@ -58,17 +61,19 @@ operator()( ...@@ -58,17 +61,19 @@ operator()(
} }
VLOG(6) << "Prepare Grad attrs"; VLOG(6) << "Prepare Grad attrs";
ctx.EmplaceBackAttrs(attrs_); ctx.EmplaceBackAttrs(attrs_);
std::vector<std::vector<paddle::experimental::Tensor>> outs( paddle::small_vector<std::vector<paddle::experimental::Tensor>,
GetEdges().size()); kSlotSmallVectorSize>
std::vector<std::vector<paddle::experimental::Tensor>> tmp_outs( outs(OutputMeta().size());
grad_outputs_names.size()); paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
tmp_outs(grad_outputs_names.size());
VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size(); VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size();
for (size_t i = 0; i < GetEdges().size(); i++) { for (size_t i = 0; i < OutputMeta().size(); i++) {
if (map[0].find(i) != map[0].end()) { if (map[0].find(i) != map[0].end()) {
VLOG(7) << "Insert grad outputs: " << i VLOG(7) << "Insert grad outputs: " << i
<< " with size: " << GetEdges()[i].size() << " with size: " << OutputMeta()[i].size()
<< " to tmp_outputs: " << map[0][i]; << " to tmp_outputs: " << map[0][i];
for (size_t j = 0; j < GetEdges()[i].size(); j++) { for (size_t j = 0; j < OutputMeta()[i].size(); j++) {
outs[i].emplace_back(/* init it incase of copy nullptr of shared_ptr */ outs[i].emplace_back(/* init it incase of copy nullptr of shared_ptr */
std::make_shared<phi::DenseTensor>( std::make_shared<phi::DenseTensor>(
phi::DataType::UNDEFINED), phi::DataType::UNDEFINED),
......
...@@ -36,10 +36,13 @@ class RunCustomOpNode : public GradNodeBase { ...@@ -36,10 +36,13 @@ class RunCustomOpNode : public GradNodeBase {
} }
// Functor: perform backward computations // Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
operator()( // NOLINT kSlotSmallVectorSize>
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT operator()( // NOLINT
bool create_graph = false, bool is_new_grad = false) // NOLINT paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) // NOLINT
override; override;
std::string name() { std::string name() {
......
...@@ -40,70 +40,20 @@ GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) { ...@@ -40,70 +40,20 @@ GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) {
VLOG(6) << "Construct GradNodeBase"; VLOG(6) << "Construct GradNodeBase";
bwd_in_meta_.resize(bwd_in_slot_num); bwd_in_meta_.resize(bwd_in_slot_num);
bwd_out_meta_.resize(bwd_out_slot_num); bwd_out_meta_.resize(bwd_out_slot_num);
adj_edges_.resize(bwd_out_slot_num);
} }
void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) { const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
PADDLE_ENFORCE_LT( GradNodeBase::InputMeta() const {
slot_id, adj_edges_.size(), return bwd_in_meta_;
paddle::platform::errors::InvalidArgument(
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
for (size_t i = 0; i < metas->size(); i++) {
const auto& meta = (*metas)[i];
// adj_edges has as same rank as fwd inputs, and record it's output rank
// from
// its pre-ops
if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode();
if (!node || !node.get()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
}
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " (addr: " << this << ") "
<< " to " << meta->GetMutableGradNode()->name()
<< " (addr: " << meta->GetMutableGradNode().get() << ")";
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
adj_edges_[slot_id].emplace_back();
}
}
}
void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
PADDLE_ENFORCE_LT(
slot_id, adj_edges_.size(),
paddle::platform::errors::InvalidArgument(
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode();
if (!node || !node.get()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
}
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " (addr: " << this << ") "
<< " to " << meta->GetMutableGradNode()->name()
<< " (addr: " << meta->GetMutableGradNode().get() << ")";
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
adj_edges_[slot_id].emplace_back();
}
} }
const std::vector<std::vector<GradSlotMeta>>& GradNodeBase::InputMeta() const { const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
return bwd_in_meta_; GradNodeBase::OutputMeta() const {
return bwd_out_meta_;
} }
const std::vector<std::vector<GradSlotMeta>>& GradNodeBase::OutputMeta() const { paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
GradNodeBase::MutableOutputMeta() {
return bwd_out_meta_; return bwd_out_meta_;
} }
...@@ -123,7 +73,9 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out, ...@@ -123,7 +73,9 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
} }
auto& meta = metas[0]; auto& meta = metas[0];
meta.SetStopGradient(fwd_out_meta->StopGradient()); if (fwd_out_meta && fwd_out_meta->StopGradient()) {
meta.SetStopGradient(fwd_out_meta->StopGradient());
}
if (!fwd_out.initialized()) { if (!fwd_out.initialized()) {
VLOG(6) VLOG(6)
...@@ -153,8 +105,8 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out, ...@@ -153,8 +105,8 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
meta.SetTensorMeta(dense_tensor->meta()); meta.SetTensorMeta(dense_tensor->meta());
meta.SetPlace(fwd_out.place()); meta.SetPlace(fwd_out.place());
if (paddle::framework::IsComplexType( if (dense_tensor->type() == paddle::experimental::DataType::COMPLEX64 ||
paddle::framework::TransToProtoVarType(dense_tensor->type()))) { dense_tensor->type() == paddle::experimental::DataType::COMPLEX128) {
need_complex_to_real_ = true; need_complex_to_real_ = true;
} }
} }
...@@ -186,7 +138,7 @@ void GradNodeBase::SetGradInMeta( ...@@ -186,7 +138,7 @@ void GradNodeBase::SetGradInMeta(
"Bwd_in_meta should only be called while " "Bwd_in_meta should only be called while "
"autograd_meta is not null. If you got this " "autograd_meta is not null. If you got this "
"error, it indicates bugs in framework.")); "error, it indicates bugs in framework."));
if (fwd_out_meta->StopGradient()) { if (fwd_out_meta && fwd_out_meta->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta, // Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false. // since all default value is false.
meta.SetStopGradient(fwd_out_meta->StopGradient()); meta.SetStopGradient(fwd_out_meta->StopGradient());
...@@ -212,8 +164,8 @@ void GradNodeBase::SetGradInMeta( ...@@ -212,8 +164,8 @@ void GradNodeBase::SetGradInMeta(
meta.SetTensorMeta(dense_tensor->meta()); meta.SetTensorMeta(dense_tensor->meta());
meta.SetPlace(fwd_out_tensor.place()); meta.SetPlace(fwd_out_tensor.place());
if (paddle::framework::IsComplexType( if (dense_tensor->type() == paddle::experimental::DataType::COMPLEX64 ||
paddle::framework::TransToProtoVarType(dense_tensor->type()))) { dense_tensor->type() == paddle::experimental::DataType::COMPLEX128) {
need_complex_to_real_ = true; need_complex_to_real_ = true;
} }
} else { } else {
...@@ -238,12 +190,24 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in, ...@@ -238,12 +190,24 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
metas.resize(1); metas.resize(1);
} }
auto& meta = metas[0]; auto& meta = metas[0];
// Set Stop_gradient
if (fwd_in_meta) { if (fwd_in_meta) {
meta.SetStopGradient(fwd_in_meta->StopGradient()); meta.SetStopGradient(fwd_in_meta->StopGradient());
} else {
meta.SetStopGradient(true);
} }
// Set Adj Edges
if (fwd_in_meta && !fwd_in_meta->StopGradient()) {
auto node = fwd_in_meta->GetMutableGradNode();
if (!node || !node.get()) {
fwd_in_meta->SetGradNode(
std::make_shared<egr::GradNodeAccumulation>(fwd_in_meta));
}
VLOG(6) << "Add Edges for slot: " << slot_rank << ", the Edge is from "
<< this->name() << " (addr: " << this << ") "
<< " to " << fwd_in_meta->GetMutableGradNode()->name()
<< " (addr: " << fwd_in_meta->GetMutableGradNode().get() << ")";
meta.SetEdge(fwd_in_meta->GetMutableGradNode(), fwd_in_meta->OutRankInfo());
}
// Record TensorMeta // Record TensorMeta
if (fwd_in.impl() && fwd_in.impl().get()) { if (fwd_in.impl() && fwd_in.impl().get()) {
if (phi::DenseTensor::classof(fwd_in.impl().get())) { if (phi::DenseTensor::classof(fwd_in.impl().get())) {
...@@ -282,30 +246,43 @@ void GradNodeBase::SetGradOutMeta( ...@@ -282,30 +246,43 @@ void GradNodeBase::SetGradOutMeta(
const auto& fwd_in_tensor = fwd_in[i]; const auto& fwd_in_tensor = fwd_in[i];
auto& meta = metas[i]; auto& meta = metas[i];
auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in_tensor); auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in_tensor);
// Set Stop_gradient
if (fwd_in_meta) { if (fwd_in_meta) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(fwd_in_meta->StopGradient()); meta.SetStopGradient(fwd_in_meta->StopGradient());
} }
// Set Adj Edges
if (fwd_in_meta && !fwd_in_meta->StopGradient()) {
auto node = fwd_in_meta->GetMutableGradNode();
if (!node || !node.get()) {
fwd_in_meta->SetGradNode(
std::make_shared<egr::GradNodeAccumulation>(fwd_in_meta));
}
VLOG(6) << "Add Edges for slot: " << slot_rank << ", the Edge is from "
<< this->name() << " (addr: " << this << ") "
<< " to " << fwd_in_meta->GetMutableGradNode()->name()
<< " (addr: " << fwd_in_meta->GetMutableGradNode().get() << ")";
meta.SetEdge(fwd_in_meta->GetMutableGradNode(),
fwd_in_meta->OutRankInfo());
}
// Record TensorMeta // Record TensorMeta
if (fwd_in_tensor.impl() && fwd_in_tensor.impl().get()) { if (fwd_in_tensor.impl() && fwd_in_tensor.impl().get()) {
if (phi::DenseTensor::classof(fwd_in_tensor.impl().get())) { if (phi::DenseTensor::classof(fwd_in_tensor.impl().get())) {
// Only Copy Meta // Only Copy Meta
phi::DenseTensor* dense_tensor = phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(fwd_in_tensor.impl().get()); static_cast<phi::DenseTensor*>(fwd_in_tensor.impl().get());
PADDLE_ENFORCE_NE(dense_tensor->meta().dtype, phi::DataType::UNDEFINED, PADDLE_ENFORCE_NE(dense_tensor->meta().dtype, phi::DataType::UNDEFINED,
paddle::platform::errors::Fatal( paddle::platform::errors::Fatal(
"Attempting to copy DenseTensorMeta with " "Attempting to copy DenseTensorMeta "
"phi::DataType::UNDEFINED," "with phi::DataType::UNDEFINED,"
"which is illegal.")); "which is illegal."));
meta.SetTensorMeta(dense_tensor->meta()); meta.SetTensorMeta(dense_tensor->meta());
meta.SetPlace(fwd_in_tensor.place()); meta.SetPlace(fwd_in_tensor.place());
} }
} else { } else {
VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta " VLOG(6)
"with non-DenseTensor argument."; << "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
"non-DenseTensor argument.";
} }
} }
} }
...@@ -328,18 +305,14 @@ int64_t GradNodeBase::RegisterGradientHook( ...@@ -328,18 +305,14 @@ int64_t GradNodeBase::RegisterGradientHook(
return next_hook_id_++; return next_hook_id_++;
} }
const std::vector<std::vector<Edge>>& GradNodeBase::GetEdges() const { paddle::small_vector<std::vector<paddle::experimental::Tensor>,
return adj_edges_; kSlotSmallVectorSize>
}
std::vector<std::vector<Edge>>& GradNodeBase::GetMutableEdges() {
return adj_edges_;
}
std::vector<std::vector<paddle::experimental::Tensor>>
GradNodeBase::ApplyGradientHooks( GradNodeBase::ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors) { const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
std::vector<std::vector<paddle::experimental::Tensor>> outs(tensors.size()); kSlotSmallVectorSize>& tensors) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
outs(tensors.size());
for (auto& hook_pair : gradient_hooks_) { for (auto& hook_pair : gradient_hooks_) {
size_t slot_id = std::get<0>(hook_pair.second); size_t slot_id = std::get<0>(hook_pair.second);
size_t rank = std::get<1>(hook_pair.second); size_t rank = std::get<1>(hook_pair.second);
...@@ -386,7 +359,8 @@ GradNodeBase::ApplyGradientHooks( ...@@ -386,7 +359,8 @@ GradNodeBase::ApplyGradientHooks(
} }
void GradNodeBase::HandleComplexGradToRealGrad( void GradNodeBase::HandleComplexGradToRealGrad(
std::vector<std::vector<paddle::experimental::Tensor>>* out_grads) { paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>* out_grads) {
for (size_t slot_id = 0; slot_id < out_grads->size(); slot_id++) { for (size_t slot_id = 0; slot_id < out_grads->size(); slot_id++) {
const std::vector<paddle::experimental::Tensor>& slot_out_grads = const std::vector<paddle::experimental::Tensor>& slot_out_grads =
(*out_grads)[slot_id]; (*out_grads)[slot_id];
......
...@@ -16,6 +16,7 @@ ...@@ -16,6 +16,7 @@
#include <memory> #include <memory>
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/hooks.h" #include "paddle/fluid/eager/hooks.h"
#include "paddle/phi/api/all.h" #include "paddle/phi/api/all.h"
...@@ -46,9 +47,8 @@ namespace egr { ...@@ -46,9 +47,8 @@ namespace egr {
* indicate which * indicate which
* input of grad this edge belong). * input of grad this edge belong).
* */ * */
class Edge;
class AutogradMeta; class AutogradMeta;
class GradNodeBase;
/** /**
* GradSlotMeta is used to Record Forward Tensor info to backward, since paddle * GradSlotMeta is used to Record Forward Tensor info to backward, since paddle
* has lots of operators * has lots of operators
...@@ -56,6 +56,70 @@ class AutogradMeta; ...@@ -56,6 +56,70 @@ class AutogradMeta;
* So, we need a meta info * So, we need a meta info
* to record it's needs. * to record it's needs.
* **/ * **/
class Edge {
public:
// Default constructor for Edges in order to construct it for AutogradMeta
Edge() : in_slot_id_(0), in_rank_(0), grad_node_(nullptr) {}
// In real use cases we should create Edge from grad node and input rank which
// indicate which edge it is.
// Since we have slot design in operators we will have to locate an edge with
// slot
// and rank.
Edge(const std::shared_ptr<GradNodeBase>& grad_node, size_t in_slot_id,
size_t in_rank)
: in_slot_id_(in_slot_id), in_rank_(in_rank), grad_node_(grad_node) {}
Edge(const std::shared_ptr<GradNodeBase>& grad_node,
const std::pair</* slot_id */ size_t, /* rank */ size_t>& rank_info)
: in_slot_id_(rank_info.first),
in_rank_(rank_info.second),
grad_node_(grad_node) {}
GradNodeBase* GetGradNode() const { return grad_node_.get(); }
std::shared_ptr<GradNodeBase> GetMutableGradNode() const {
return grad_node_;
}
void SetGradNode(const std::shared_ptr<GradNodeBase>& node) {
VLOG(6) << "Reseting Edge's Grad Node";
grad_node_ = node;
}
std::pair<size_t, size_t> GetEdgeRankInfo() const {
return std::make_pair(in_slot_id_, in_rank_);
}
void SetEdgeRankInfo(size_t slot_id, size_t in_rank) {
in_slot_id_ = slot_id;
in_rank_ = in_rank;
}
void SetEdgeRankInfo(
const std::pair</* slot_id */ size_t, /* rank */ size_t>& edge_rank) {
in_slot_id_ = edge_rank.first;
in_rank_ = edge_rank.second;
}
// Currently we use grad_node_ to identify if a edge is initialized.
bool IsInitialized() const {
if (!grad_node_) {
return false;
} else {
if (!(grad_node_.get())) {
return false;
} else {
return true;
}
}
}
private:
size_t in_slot_id_;
size_t in_rank_;
std::shared_ptr<GradNodeBase> grad_node_{nullptr};
};
class GradSlotMeta { class GradSlotMeta {
public: public:
GradSlotMeta() = default; GradSlotMeta() = default;
...@@ -81,10 +145,21 @@ class GradSlotMeta { ...@@ -81,10 +145,21 @@ class GradSlotMeta {
void SetPlace(const phi::Place& place) { place_ = place; } void SetPlace(const phi::Place& place) { place_ = place; }
const phi::Place& GetPlace() const { return place_; } const phi::Place& GetPlace() const { return place_; }
void SetEdge(const Edge& edge) { adj_edge_ = edge; }
void SetEdge(
const std::shared_ptr<GradNodeBase>& grad_node,
const std::pair</* slot_id */ size_t, /* rank */ size_t>& rank_info) {
adj_edge_.SetGradNode(grad_node);
adj_edge_.SetEdgeRankInfo(rank_info);
}
Edge& GetMutableEdge() { return adj_edge_; }
const Edge& GetEdge() const { return adj_edge_; }
private: private:
bool stop_gradient_{false}; bool stop_gradient_{false};
phi::Place place_; phi::Place place_;
std::shared_ptr<phi::DenseTensorMeta> meta_ = nullptr; std::shared_ptr<phi::DenseTensorMeta> meta_ = nullptr;
Edge adj_edge_;
}; };
class GradNodeBase { class GradNodeBase {
...@@ -107,9 +182,12 @@ class GradNodeBase { ...@@ -107,9 +182,12 @@ class GradNodeBase {
* so, vector of vector * so, vector of vector
* is better choice to fit this format. * is better choice to fit this format.
* **/ * **/
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()( virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT kSlotSmallVectorSize>
bool create_graph = false, bool is_new_grad = false) = 0; operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) = 0;
virtual void ClearTensorWrappers() = 0; virtual void ClearTensorWrappers() = 0;
...@@ -118,17 +196,6 @@ class GradNodeBase { ...@@ -118,17 +196,6 @@ class GradNodeBase {
* **/ * **/
virtual std::shared_ptr<GradNodeBase> Copy() const = 0; virtual std::shared_ptr<GradNodeBase> Copy() const = 0;
/**
* AddEdges is designed to set input tensors' backward Node as current
* node's Edges.
* This method should be call in forward code and for double backward depends
* computation.
*
* This one is called slot by slot
* **/
void AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id);
void AddEdges(AutogradMeta* meta, size_t slot_id);
// adj_edges were moved inside OutputMeta(), so no available direct access // adj_edges were moved inside OutputMeta(), so no available direct access
// from GradNodeBase. // from GradNodeBase.
// To access Edges, get GradSlotMeta by calling OutputMeta(), then use // To access Edges, get GradSlotMeta by calling OutputMeta(), then use
...@@ -136,10 +203,15 @@ class GradNodeBase { ...@@ -136,10 +203,15 @@ class GradNodeBase {
/** /**
* Get Input Meta of current Grad node**/ * Get Input Meta of current Grad node**/
const std::vector<std::vector<GradSlotMeta>>& InputMeta() const; const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
InputMeta() const;
/** /**
* Get Output Meta of current Grad node**/ * Get Output Meta of current Grad node**/
const std::vector<std::vector<GradSlotMeta>>& OutputMeta() const; const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
OutputMeta() const;
paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
MutableOutputMeta();
/** /**
* Set bwd ins and outs info with forward vars * Set bwd ins and outs info with forward vars
* **/ * **/
...@@ -180,23 +252,22 @@ class GradNodeBase { ...@@ -180,23 +252,22 @@ class GradNodeBase {
* **/ * **/
inline bool GradientHooksRegistered() { return !gradient_hooks_.empty(); } inline bool GradientHooksRegistered() { return !gradient_hooks_.empty(); }
std::vector<std::vector<paddle::experimental::Tensor>> ApplyGradientHooks( paddle::small_vector<std::vector<paddle::experimental::Tensor>,
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors); kSlotSmallVectorSize>
ApplyGradientHooks(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors);
/** /**
* Handle Complex - Real Type Promotion * Handle Complex - Real Type Promotion
* **/ * **/
void HandleComplexGradToRealGrad( void HandleComplexGradToRealGrad(
std::vector<std::vector<paddle::experimental::Tensor>>* out_grads); paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>* out_grads);
bool NeedComplexToRealConversion() { return need_complex_to_real_; } bool NeedComplexToRealConversion() { return need_complex_to_real_; }
virtual std::string name() { return "GradNodeBase"; } virtual std::string name() { return "GradNodeBase"; }
/**
* GetEdges is designed to get all edges of current node**/
const std::vector<std::vector<Edge>>& GetEdges() const;
std::vector<std::vector<Edge>>& GetMutableEdges();
/** /**
* The following interfaces are designed for no_need_buffer * The following interfaces are designed for no_need_buffer
* **/ * **/
...@@ -207,18 +278,13 @@ class GradNodeBase { ...@@ -207,18 +278,13 @@ class GradNodeBase {
} }
private: private:
// TODO(zhanlve): Merge adj_edges_ into GradOutMeta
// Edges recorded the backward related node info, which indicate all edges
// linked
// by this Grad Node.
// Why we need vector<vector<Edge>>: Edges is as same rank as bwd output.
std::vector<std::vector<Edge>> adj_edges_;
// bwd_out_meta_ is used to record Grad output info for backward // bwd_out_meta_ is used to record Grad output info for backward
std::vector<std::vector<GradSlotMeta>> bwd_out_meta_; paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>
bwd_out_meta_;
// bwd_in_meta_ used to record Grad input info for backward // bwd_in_meta_ used to record Grad input info for backward
std::vector<std::vector<GradSlotMeta>> bwd_in_meta_; paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>
bwd_in_meta_;
// Gradient Hooks // Gradient Hooks
// Customer may register a list of hooks which will be called in order during // Customer may register a list of hooks which will be called in order during
// backward // backward
...@@ -235,71 +301,6 @@ class GradNodeBase { ...@@ -235,71 +301,6 @@ class GradNodeBase {
bool is_tensor_wrappers_cleared_ = false; bool is_tensor_wrappers_cleared_ = false;
}; };
class Edge {
public:
// Default constructor for Edges in order to construct it for AutogradMeta
Edge() : in_slot_id_(0), in_rank_(0), grad_node_(nullptr) {}
// In real use cases we should create Edge from grad node and input rank which
// indicate which edge it is.
// Since we have slot design in operators we will have to locate an edge with
// slot
// and rank.
Edge(const std::shared_ptr<GradNodeBase>& grad_node, size_t in_slot_id,
size_t in_rank)
: in_slot_id_(in_slot_id), in_rank_(in_rank), grad_node_(grad_node) {}
Edge(const std::shared_ptr<GradNodeBase>& grad_node,
const std::pair</* slot_id */ size_t, /* rank */ size_t>& rank_info)
: in_slot_id_(rank_info.first),
in_rank_(rank_info.second),
grad_node_(grad_node) {}
GradNodeBase* GetGradNode() const { return grad_node_.get(); }
std::shared_ptr<GradNodeBase> GetMutableGradNode() const {
return grad_node_;
}
void SetGradNode(const std::shared_ptr<GradNodeBase>& node) {
VLOG(6) << "Reseting Edge's Grad Node";
grad_node_ = node;
}
std::pair<size_t, size_t> GetEdgeRankInfo() const {
return std::make_pair(in_slot_id_, in_rank_);
}
void SetEdgeRankInfo(size_t slot_id, size_t in_rank) {
in_slot_id_ = slot_id;
in_rank_ = in_rank;
}
void SetEdgeRankInfo(
const std::pair</* slot_id */ size_t, /* rank */ size_t>& edge_rank) {
in_slot_id_ = edge_rank.first;
in_rank_ = edge_rank.second;
}
// Currently we use grad_node_ to identify if a edge is initialized.
bool IsInitialized() const {
if (!grad_node_) {
return false;
} else {
if (!(grad_node_.get())) {
return false;
} else {
return true;
}
}
}
private:
size_t in_slot_id_;
size_t in_rank_;
std::shared_ptr<GradNodeBase> grad_node_{nullptr};
};
inline void CheckTensor(const paddle::experimental::Tensor& pre, inline void CheckTensor(const paddle::experimental::Tensor& pre,
const paddle::experimental::Tensor& post) { const paddle::experimental::Tensor& post) {
if (!pre.initialized() && post.initialized()) { if (!pre.initialized() && post.initialized()) {
......
...@@ -27,7 +27,8 @@ namespace egr { ...@@ -27,7 +27,8 @@ namespace egr {
class GradTensorHolder { class GradTensorHolder {
public: public:
explicit GradTensorHolder( explicit GradTensorHolder(
const std::vector<std::vector<GradSlotMeta>>& metas) { const paddle::small_vector<std::vector<GradSlotMeta>,
kSlotSmallVectorSize>& metas) {
VLOG(7) << "Init GradTensorHolder with meta size: " << metas.size(); VLOG(7) << "Init GradTensorHolder with meta size: " << metas.size();
buffer_.resize(metas.size()); buffer_.resize(metas.size());
for (size_t i = 0; i < buffer_.size(); i++) { for (size_t i = 0; i < buffer_.size(); i++) {
...@@ -39,7 +40,8 @@ class GradTensorHolder { ...@@ -39,7 +40,8 @@ class GradTensorHolder {
GradTensorHolder(const GradTensorHolder& other) = default; GradTensorHolder(const GradTensorHolder& other) = default;
explicit GradTensorHolder( explicit GradTensorHolder(
std::vector<std::vector<paddle::experimental::Tensor>>&& inputs) paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>&& inputs)
: buffer_(std::move(inputs)) {} : buffer_(std::move(inputs)) {}
GradTensorHolder& operator=(const GradTensorHolder& other) = default; GradTensorHolder& operator=(const GradTensorHolder& other) = default;
...@@ -56,14 +58,18 @@ class GradTensorHolder { ...@@ -56,14 +58,18 @@ class GradTensorHolder {
return buffer_[pos]; return buffer_[pos];
} }
std::vector<std::vector<paddle::experimental::Tensor>>& Buffers() { paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>&
Buffers() {
return buffer_; return buffer_;
} }
void SetBufferSlotRankZeros(size_t slot_id, size_t rank); void SetBufferSlotRankZeros(size_t slot_id, size_t rank);
private: private:
std::vector<std::vector<paddle::experimental::Tensor>> buffer_; paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
buffer_;
}; };
} // namespace egr } // namespace egr
...@@ -29,14 +29,18 @@ ...@@ -29,14 +29,18 @@
#include "pybind11/pytypes.h" #include "pybind11/pytypes.h"
namespace egr { namespace egr {
std::vector<std::vector<paddle::experimental::Tensor>> GradNodePyLayer:: paddle::small_vector<std::vector<paddle::experimental::Tensor>,
operator()( kSlotSmallVectorSize>
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT GradNodePyLayer::operator()(
bool create_graph, bool is_new_grad) { paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph,
bool is_new_grad) {
VLOG(3) << "Running Eager Backward Node: " << name(); VLOG(3) << "Running Eager Backward Node: " << name();
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = paddle::small_vector<std::vector<paddle::experimental::Tensor>,
GradNodePyLayer::ApplyGradientHooks(grads); kSlotSmallVectorSize>
hooked_grads = GradNodePyLayer::ApplyGradientHooks(grads);
paddle::pybind::PyLayerObject* ctx = paddle::pybind::PyLayerObject* ctx =
reinterpret_cast<paddle::pybind::PyLayerObject*>(ctx_); reinterpret_cast<paddle::pybind::PyLayerObject*>(ctx_);
...@@ -124,7 +128,9 @@ operator()( ...@@ -124,7 +128,9 @@ operator()(
ctx->forward_input_tensor_is_duplicable.size(), outputs_size)); ctx->forward_input_tensor_is_duplicable.size(), outputs_size));
} }
std::vector<std::vector<paddle::experimental::Tensor>> grad_out; paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
grad_out;
grad_out.reserve(ctx->forward_input_tensor_is_duplicable.size()); grad_out.reserve(ctx->forward_input_tensor_is_duplicable.size());
for (size_t i = 0; i < ctx->forward_input_tensor_is_duplicable.size(); i++) { for (size_t i = 0; i < ctx->forward_input_tensor_is_duplicable.size(); i++) {
if (i < outputs_size) { if (i < outputs_size) {
......
...@@ -34,9 +34,12 @@ class GradNodePyLayer : public GradNodeBase { ...@@ -34,9 +34,12 @@ class GradNodePyLayer : public GradNodeBase {
~GradNodePyLayer() override { Py_DECREF(ctx_); }; ~GradNodePyLayer() override { Py_DECREF(ctx_); };
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()( virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT kSlotSmallVectorSize>
bool create_graph = false, bool is_new_grad = false) override; operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;
void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
......
...@@ -88,7 +88,7 @@ class TensorWrapper { ...@@ -88,7 +88,7 @@ class TensorWrapper {
} else { } else {
intermidiate_tensor_.set_impl(tensor.impl()); intermidiate_tensor_.set_impl(tensor.impl());
} }
// TODO(jiabin): This may has server performance issue
intermidiate_tensor_.set_name(tensor.name() + "@Saved"); intermidiate_tensor_.set_name(tensor.name() + "@Saved");
auto* tensor_autograd_meta = EagerUtils::nullable_autograd_meta(tensor); auto* tensor_autograd_meta = EagerUtils::nullable_autograd_meta(tensor);
......
...@@ -80,14 +80,18 @@ TEST(AccumulationNode, Tensor) { ...@@ -80,14 +80,18 @@ TEST(AccumulationNode, Tensor) {
grad_meta->SetStopGradient(false); grad_meta->SetStopGradient(false);
// operator() // operator()
std::vector<std::vector<paddle::experimental::Tensor>> et0_vec = {{et0}}; paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
et0_vec = {{et0}};
paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0]; paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0];
auto* ret_et0_ptr = auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et0.impl()) std::dynamic_pointer_cast<phi::DenseTensor>(ret_et0.impl())
->data<paddle::platform::float16>(); ->data<paddle::platform::float16>();
CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f)); CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f));
std::vector<std::vector<paddle::experimental::Tensor>> et1_vec = {{et1}}; paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
et1_vec = {{et1}};
paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0]; paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0];
auto* ret_et1_ptr = auto* ret_et1_ptr =
......
...@@ -34,7 +34,9 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { ...@@ -34,7 +34,9 @@ void TestGradNodeBase(bool is_remove_gradient_hook) {
auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>( auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>(
/* val */ 5.0, /* in_num */ 2, /* out_num */ 2); /* val */ 5.0, /* in_num */ 2, /* out_num */ 2);
auto grad_test_node1 = std::make_shared<eager_test::GradTestNode>(); auto grad_test_node1 = std::make_shared<eager_test::GradTestNode>();
std::vector<std::vector<paddle::experimental::Tensor>> grads; paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
grads;
phi::DenseTensorMeta meta = phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1})); phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1}));
std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>( std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>(
...@@ -51,28 +53,9 @@ void TestGradNodeBase(bool is_remove_gradient_hook) { ...@@ -51,28 +53,9 @@ void TestGradNodeBase(bool is_remove_gradient_hook) {
CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(res[0][0].impl()) CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(res[0][0].impl())
->data<float>()[0], ->data<float>()[0],
6.0f); 6.0f);
VLOG(6) << "Test Add Edges";
egr::Edge tmp_edge0(grad_test_node1, 1, 2);
auto auto_grad0 = std::make_shared<egr::AutogradMeta>(tmp_edge0);
auto_grad0->SetStopGradient(false);
egr::Edge tmp_edge1(grad_test_node1, 3, 4); egr::Edge tmp_edge1(grad_test_node1, 3, 4);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>(tmp_edge1); auto auto_grad1 = std::make_shared<egr::AutogradMeta>(tmp_edge1);
et1.set_autograd_meta(auto_grad1); et1.set_autograd_meta(auto_grad1);
auto_grad1->SetStopGradient(false);
grad_test_node0->AddEdges(auto_grad0.get(), 0);
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first,
size_t(1));
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second,
size_t(2));
std::vector<egr::AutogradMeta*> metas = {auto_grad1.get()};
grad_test_node0->AddEdges(&metas, 1);
CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first,
size_t(3));
CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().second,
size_t(4));
VLOG(6) << "Test Set Meta and Get Meta"; VLOG(6) << "Test Set Meta and Get Meta";
auto_grad1->SetStopGradient(true); auto_grad1->SetStopGradient(true);
......
...@@ -31,9 +31,12 @@ class GradTestNode : public egr::GradNodeBase { ...@@ -31,9 +31,12 @@ class GradTestNode : public egr::GradNodeBase {
: GradNodeBase(in_num, out_num), val_(val) {} : GradNodeBase(in_num, out_num), val_(val) {}
GradTestNode() : GradNodeBase() { val_ = 1.0; } GradTestNode() : GradNodeBase() { val_ = 1.0; }
std::string name() override { return "GradTestNode"; } std::string name() override { return "GradTestNode"; }
std::vector<std::vector<paddle::experimental::Tensor>> operator()( paddle::small_vector<std::vector<paddle::experimental::Tensor>,
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT egr::kSlotSmallVectorSize>
bool create_graph = false, bool is_new_grad = false) override { operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override {
val_ = std::dynamic_pointer_cast<phi::DenseTensor>(grads[0][0].impl()) val_ = std::dynamic_pointer_cast<phi::DenseTensor>(grads[0][0].impl())
->data<float>()[0]; ->data<float>()[0];
phi::DenseTensorMeta meta = phi::DenseTensorMeta meta =
...@@ -46,7 +49,9 @@ class GradTestNode : public egr::GradNodeBase { ...@@ -46,7 +49,9 @@ class GradTestNode : public egr::GradNodeBase {
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace()); auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 6.0f; dt_ptr[0] = 6.0f;
paddle::experimental::Tensor et1(dt); paddle::experimental::Tensor et1(dt);
std::vector<std::vector<paddle::experimental::Tensor>> res = {{et1}}; paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
res = {{et1}};
return res; return res;
} }
void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
......
...@@ -45,7 +45,9 @@ TEST(GradTensorHolder, Constructor) { ...@@ -45,7 +45,9 @@ TEST(GradTensorHolder, Constructor) {
meta); meta);
paddle::experimental::Tensor et = paddle::experimental::Tensor(dt); paddle::experimental::Tensor et = paddle::experimental::Tensor(dt);
std::vector<std::vector<paddle::experimental::Tensor>> inputs; paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
inputs;
inputs.push_back({et}); inputs.push_back({et});
GradTensorHolder grad_tensor_holder4 = GradTensorHolder(std::move(inputs)); GradTensorHolder grad_tensor_holder4 = GradTensorHolder(std::move(inputs));
......
...@@ -76,8 +76,7 @@ TEST(Backward, SingleNodeEmptyGrad) { ...@@ -76,8 +76,7 @@ TEST(Backward, SingleNodeEmptyGrad) {
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false); auto_grad_meta1->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta1}; node0_ptr->SetGradOutMeta({leaf_tensor}, 0);
node0_ptr->AddEdges(&res, 0);
} }
std::vector<paddle::experimental::Tensor> outs = {target_tensor}; std::vector<paddle::experimental::Tensor> outs = {target_tensor};
// Run Backward // Run Backward
...@@ -135,8 +134,7 @@ TEST(Backward, SingleNodeCustomGrad) { ...@@ -135,8 +134,7 @@ TEST(Backward, SingleNodeCustomGrad) {
std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr)); std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr));
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false); auto_grad_meta1->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta1}; node0_ptr->SetGradOutMeta({leaf_tensor}, 0);
node0_ptr->AddEdges(&res, 0);
} }
// Run Backward // Run Backward
...@@ -191,12 +189,12 @@ TEST(Backward, LinearNodes) { ...@@ -191,12 +189,12 @@ TEST(Backward, LinearNodes) {
auto_grad_meta->SetSingleOutRankWithSlot(0, 0); auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false); auto_grad_meta->SetStopGradient(false);
// Connect Node0 -> Node1 via Edge // Connect Node0 -> Node1 via Edge
auto meta0 = egr::AutogradMeta(); auto tmp_tensor = paddle::experimental::Tensor();
meta0.SetStopGradient(false); auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor);
meta0.SetSingleOutRankWithSlot(0, 0); meta0->SetStopGradient(false);
meta0.SetGradNode(node1_ptr); meta0->SetSingleOutRankWithSlot(0, 0);
std::vector<egr::AutogradMeta*> res0 = {&meta0}; meta0->SetGradNode(node1_ptr);
node0_ptr->AddEdges(&res0, 0); node0_ptr->SetGradOutMeta(tmp_tensor, 0);
AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta // Connect Tensor and AccumulationNode via AutoGradMeta
...@@ -208,8 +206,7 @@ TEST(Backward, LinearNodes) { ...@@ -208,8 +206,7 @@ TEST(Backward, LinearNodes) {
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false); auto_grad_meta1->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res1 = {auto_grad_meta1}; node1_ptr->SetGradOutMeta(leaf_tensor, 0);
node1_ptr->AddEdges(&res1, 0);
} }
// Use Empty Grad Tensor // Use Empty Grad Tensor
...@@ -288,20 +285,20 @@ TEST(Backward, WithAccumulation) { ...@@ -288,20 +285,20 @@ TEST(Backward, WithAccumulation) {
auto_grad_meta1->SetStopGradient(false); auto_grad_meta1->SetStopGradient(false);
// Connect Node0 -> Node2 via Edge // Connect Node0 -> Node2 via Edge
auto meta0 = egr::AutogradMeta(); auto tmp_tensor0 = paddle::experimental::Tensor();
meta0.SetStopGradient(false); auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor0);
meta0.SetSingleOutRankWithSlot(0, 0); meta0->SetStopGradient(false);
meta0.SetGradNode(node2_ptr); meta0->SetSingleOutRankWithSlot(0, 0);
std::vector<egr::AutogradMeta*> res0 = {&meta0}; meta0->SetGradNode(node2_ptr);
node0_ptr->AddEdges(&res0, 0); node0_ptr->SetGradOutMeta(tmp_tensor0, 0);
// Connect Node1 -> Node2 via Edge // Connect Node1 -> Node2 via Edge
auto meta1 = egr::AutogradMeta(); auto tmp_tensor1 = paddle::experimental::Tensor();
meta1.SetStopGradient(false); auto* meta1 = EagerUtils::autograd_meta(&tmp_tensor1);
meta1.SetSingleOutRankWithSlot(0, 0); meta1->SetStopGradient(false);
meta1.SetGradNode(node2_ptr); meta1->SetSingleOutRankWithSlot(0, 0);
std::vector<egr::AutogradMeta*> res1 = {&meta1}; meta1->SetGradNode(node2_ptr);
node1_ptr->AddEdges(&res1, 0); node1_ptr->SetGradOutMeta(tmp_tensor1, 0);
AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor); AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta // Connect Tensor and AccumulationNode via AutoGradMeta
...@@ -314,7 +311,7 @@ TEST(Backward, WithAccumulation) { ...@@ -314,7 +311,7 @@ TEST(Backward, WithAccumulation) {
auto_grad_meta2->SetStopGradient(false); auto_grad_meta2->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res2 = {auto_grad_meta2}; std::vector<egr::AutogradMeta*> res2 = {auto_grad_meta2};
node2_ptr->AddEdges(&res2, 0); node2_ptr->SetGradOutMeta(leaf_tensor, 0);
} }
Backward(target_tensors, grad_tensors); Backward(target_tensors, grad_tensors);
......
...@@ -69,7 +69,7 @@ TEST(CrossBatchAccumulation, SingleScaleNode) { ...@@ -69,7 +69,7 @@ TEST(CrossBatchAccumulation, SingleScaleNode) {
meta->SetSingleOutRankWithSlot(0, 0); meta->SetSingleOutRankWithSlot(0, 0);
meta->SetGradNode(acc_node_ptr); meta->SetGradNode(acc_node_ptr);
std::vector<egr::AutogradMeta*> res = {meta}; std::vector<egr::AutogradMeta*> res = {meta};
scale_node_ptr->AddEdges(&res, 0); scale_node_ptr->SetGradOutMeta(leaf_tensor, 0);
Backward(target_tensors, {}); Backward(target_tensors, {});
......
...@@ -251,10 +251,11 @@ TEST(EagerUtils, GetGradAccumulationNode) { ...@@ -251,10 +251,11 @@ TEST(EagerUtils, GetGradAccumulationNode) {
} }
TEST(EagerUtils, FillZeroForEmptyGradInputs) { TEST(EagerUtils, FillZeroForEmptyGradInputs) {
std::vector<std::vector<paddle::experimental::Tensor>> grads = { paddle::small_vector<std::vector<paddle::experimental::Tensor>,
std::vector<paddle::experimental::Tensor>(1)}; egr::kSlotSmallVectorSize>
std::vector<std::vector<GradSlotMeta>> slot_metas = { grads = {std::vector<paddle::experimental::Tensor>(1)};
std::vector<GradSlotMeta>(1)}; paddle::small_vector<std::vector<GradSlotMeta>, egr::kSlotSmallVectorSize>
slot_metas = {std::vector<GradSlotMeta>(1)};
phi::DenseTensorMeta tensor_meta; phi::DenseTensorMeta tensor_meta;
tensor_meta.dtype = paddle::experimental::DataType::FLOAT32; tensor_meta.dtype = paddle::experimental::DataType::FLOAT32;
......
...@@ -137,12 +137,16 @@ TEST(Forward, LinearNodes) { ...@@ -137,12 +137,16 @@ TEST(Forward, LinearNodes) {
// 2. TensorWrapper: No TensorWrapper for ScaleNode // 2. TensorWrapper: No TensorWrapper for ScaleNode
// 3. NextEdges: Node 1 -> Node 0 // 3. NextEdges: Node 1 -> Node 0
const std::vector<std::vector<Edge>>& node1_edges = grad_node1->GetEdges(); const paddle::small_vector<std::vector<GradSlotMeta>,
const auto& node1_edge = node1_edges[0]; egr::kSlotSmallVectorSize>& node1_metas =
grad_node1->OutputMeta();
CHECK_EQ(static_cast<int>(node1_edge[0].GetEdgeRankInfo().first), 0); const auto& node1_meta = node1_metas[0];
CHECK_EQ(static_cast<int>(node1_edge[0].GetEdgeRankInfo().second), 0);
CHECK_EQ(node1_edge[0].GetGradNode(), grad_node0); CHECK_EQ(static_cast<int>(node1_meta[0].GetEdge().GetEdgeRankInfo().first),
0);
CHECK_EQ(static_cast<int>(node1_meta[0].GetEdge().GetEdgeRankInfo().second),
0);
CHECK_EQ(node1_meta[0].GetEdge().GetGradNode(), grad_node0);
} }
} }
...@@ -232,16 +236,19 @@ TEST(Forward, BranchedNodes) { ...@@ -232,16 +236,19 @@ TEST(Forward, BranchedNodes) {
// 2. TensorWrapper: No TensorWrapper for ScaleNode // 2. TensorWrapper: No TensorWrapper for ScaleNode
// 3. NextEdges // 3. NextEdges
// Node 1 -> Node 0 // Node 1 -> Node 0
const std::vector<std::vector<Edge>>& node1_edges = grad_node1->GetEdges(); const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
const Edge& node1_edge = node1_edges[0][0]; node1_metas = grad_node1->OutputMeta();
const Edge& node1_edge = node1_metas[0][0].GetEdge();
CHECK_EQ(static_cast<int>(node1_edge.GetEdgeRankInfo().first), 0); CHECK_EQ(static_cast<int>(node1_edge.GetEdgeRankInfo().first), 0);
CHECK_EQ(static_cast<int>(node1_edge.GetEdgeRankInfo().second), 0); CHECK_EQ(static_cast<int>(node1_edge.GetEdgeRankInfo().second), 0);
CHECK_EQ(node1_edge.GetGradNode(), grad_node0); CHECK_EQ(node1_edge.GetGradNode(), grad_node0);
// Node 2 -> Node 0 // Node 2 -> Node 0
const std::vector<std::vector<Edge>>& node2_edges = grad_node2->GetEdges(); const paddle::small_vector<std::vector<egr::GradSlotMeta>,
const Edge& node2_edge = node2_edges[0][0]; egr::kSlotSmallVectorSize>& node2_metas =
grad_node2->OutputMeta();
const Edge& node2_edge = node2_metas[0][0].GetEdge();
CHECK_EQ(static_cast<int>(node2_edge.GetEdgeRankInfo().first), 0); CHECK_EQ(static_cast<int>(node2_edge.GetEdgeRankInfo().first), 0);
CHECK_EQ(static_cast<int>(node2_edge.GetEdgeRankInfo().second), 0); CHECK_EQ(static_cast<int>(node2_edge.GetEdgeRankInfo().second), 0);
......
...@@ -87,7 +87,7 @@ TEST(Grad, SingleNodeEmptyGrad) { ...@@ -87,7 +87,7 @@ TEST(Grad, SingleNodeEmptyGrad) {
// grad_node Add Edges // grad_node Add Edges
std::vector<egr::AutogradMeta*> res = {auto_grad_meta1}; std::vector<egr::AutogradMeta*> res = {auto_grad_meta1};
node0_ptr->AddEdges(&res, 0); node0_ptr->SetGradOutMeta(leaf_tensor, 0);
} }
std::vector<paddle::experimental::Tensor> outs = {output_tensor}; std::vector<paddle::experimental::Tensor> outs = {output_tensor};
...@@ -150,7 +150,7 @@ TEST(Grad, SingleNodeCustomGrad) { ...@@ -150,7 +150,7 @@ TEST(Grad, SingleNodeCustomGrad) {
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false); auto_grad_meta1->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta1}; std::vector<egr::AutogradMeta*> res = {auto_grad_meta1};
node0_ptr->AddEdges(&res, 0); node0_ptr->SetGradOutMeta(leaf_tensor, 0);
} }
auto result = Grad(target_tensors, {leaf_tensor}, grad_tensors); auto result = Grad(target_tensors, {leaf_tensor}, grad_tensors);
...@@ -207,12 +207,12 @@ TEST(Grad, LinearNodes) { ...@@ -207,12 +207,12 @@ TEST(Grad, LinearNodes) {
auto_grad_meta->SetSingleOutRankWithSlot(0, 0); auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false); auto_grad_meta->SetStopGradient(false);
// Connect Node0 -> Node1 via Edge // Connect Node0 -> Node1 via Edge
auto meta0 = egr::AutogradMeta(); auto tmp_tensor = paddle::experimental::Tensor();
meta0.SetStopGradient(false); auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor);
meta0.SetSingleOutRankWithSlot(0, 0); meta0->SetStopGradient(false);
meta0.SetGradNode(node1_ptr); meta0->SetSingleOutRankWithSlot(0, 0);
std::vector<egr::AutogradMeta*> res0 = {&meta0}; meta0->SetGradNode(node1_ptr);
node0_ptr->AddEdges(&res0, 0); node0_ptr->SetGradOutMeta(tmp_tensor, 0);
AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor); AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta // Connect Tensor and AccumulationNode via AutoGradMeta
...@@ -224,8 +224,7 @@ TEST(Grad, LinearNodes) { ...@@ -224,8 +224,7 @@ TEST(Grad, LinearNodes) {
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0); auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false); auto_grad_meta1->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res1 = {auto_grad_meta1}; node1_ptr->SetGradOutMeta(leaf_tensor, 0);
node1_ptr->AddEdges(&res1, 0);
} }
// Use Empty Grad Tensor // Use Empty Grad Tensor
...@@ -304,20 +303,20 @@ TEST(Grad, WithAccumulation) { ...@@ -304,20 +303,20 @@ TEST(Grad, WithAccumulation) {
auto_grad_meta1->SetStopGradient(false); auto_grad_meta1->SetStopGradient(false);
// Connect Node0 -> Node2 via Edge // Connect Node0 -> Node2 via Edge
auto meta0 = egr::AutogradMeta(); auto tmp_tensor0 = paddle::experimental::Tensor();
meta0.SetStopGradient(false); auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor0);
meta0.SetSingleOutRankWithSlot(0, 0); meta0->SetStopGradient(false);
meta0.SetGradNode(node2_ptr); meta0->SetSingleOutRankWithSlot(0, 0);
std::vector<egr::AutogradMeta*> res0 = {&meta0}; meta0->SetGradNode(node2_ptr);
node0_ptr->AddEdges(&res0, 0); node0_ptr->SetGradOutMeta(tmp_tensor0, 0);
// Connect Node1 -> Node2 via Edge // Connect Node1 -> Node2 via Edge
auto meta1 = egr::AutogradMeta(); auto tmp_tensor1 = paddle::experimental::Tensor();
meta1.SetStopGradient(false); auto meta1 = EagerUtils::autograd_meta(&tmp_tensor1);
meta1.SetSingleOutRankWithSlot(0, 0); meta1->SetStopGradient(false);
meta1.SetGradNode(node2_ptr); meta1->SetSingleOutRankWithSlot(0, 0);
std::vector<egr::AutogradMeta*> res1 = {&meta1}; meta1->SetGradNode(node2_ptr);
node1_ptr->AddEdges(&res1, 0); node1_ptr->SetGradOutMeta(tmp_tensor1, 0);
AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor); AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta // Connect Tensor and AccumulationNode via AutoGradMeta
...@@ -329,8 +328,7 @@ TEST(Grad, WithAccumulation) { ...@@ -329,8 +328,7 @@ TEST(Grad, WithAccumulation) {
auto_grad_meta2->SetSingleOutRankWithSlot(0, 0); auto_grad_meta2->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta2->SetStopGradient(false); auto_grad_meta2->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res2 = {auto_grad_meta2}; node2_ptr->SetGradOutMeta(leaf_tensor, 0);
node2_ptr->AddEdges(&res2, 0);
} }
auto result = Grad(target_tensors, {leaf_tensor}, grad_tensors); auto result = Grad(target_tensors, {leaf_tensor}, grad_tensors);
......
...@@ -110,21 +110,20 @@ TEST(RetainGrad, HookBeforeRetainGrad) { ...@@ -110,21 +110,20 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor(); paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor();
{ {
// AccumulationNode Hook: +3 // AccumulationNode Hook: +3
auto tmp_tensor0 = paddle::experimental::Tensor();
auto auto_grad_meta = EagerUtils::autograd_meta(&tmp_tensor0);
auto auto_grad_meta = std::make_shared<AutogradMeta>(); auto acc_node_ptr = std::make_shared<GradNodeAccumulation>(auto_grad_meta);
auto acc_node_ptr =
std::make_shared<GradNodeAccumulation>(auto_grad_meta.get());
auto_grad_meta->SetStopGradient(false); auto_grad_meta->SetStopGradient(false);
auto_grad_meta->SetGradNode(acc_node_ptr); auto_grad_meta->SetGradNode(acc_node_ptr);
auto_grad_meta->SetSingleOutRankWithSlot(0, 0); auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta.get()}; std::vector<egr::AutogradMeta*> res = {auto_grad_meta};
scale_node_ptr->AddEdges(&res, 0); scale_node_ptr->SetGradOutMeta(tmp_tensor0, 0);
leaf_tensor.set_autograd_meta( leaf_tensor.set_autograd_meta(
std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>( std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
auto_grad_meta)); tmp_tensor0.mutable_autograd_meta()));
egr_utils_api::RegisterGradientHookForTensor( egr_utils_api::RegisterGradientHookForTensor(
leaf_tensor, std::make_shared<egr::CppTensorHook>(hook_function)); leaf_tensor, std::make_shared<egr::CppTensorHook>(hook_function));
...@@ -181,19 +180,17 @@ TEST(RetainGrad, HookAfterRetainGrad) { ...@@ -181,19 +180,17 @@ TEST(RetainGrad, HookAfterRetainGrad) {
paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor(); paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor();
{ {
// AccumulationNode Hook: +3 // AccumulationNode Hook: +3
auto tmp_tensor0 = paddle::experimental::Tensor();
auto auto_grad_meta = std::make_shared<AutogradMeta>(); auto auto_grad_meta = EagerUtils::autograd_meta(&tmp_tensor0);
auto acc_node_ptr = auto acc_node_ptr = std::make_shared<GradNodeAccumulation>(auto_grad_meta);
std::make_shared<GradNodeAccumulation>(auto_grad_meta.get());
auto_grad_meta->SetGradNode(acc_node_ptr); auto_grad_meta->SetGradNode(acc_node_ptr);
auto_grad_meta->SetStopGradient(false); auto_grad_meta->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta.get()}; scale_node_ptr->SetGradOutMeta(tmp_tensor0, 0);
scale_node_ptr->AddEdges(&res, 0);
auto_grad_meta->SetSingleOutRankWithSlot(0, 0); auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
leaf_tensor.set_autograd_meta( leaf_tensor.set_autograd_meta(
std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>( std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
auto_grad_meta)); tmp_tensor0.mutable_autograd_meta()));
egr_utils_api::RegisterGradientHookForTensor( egr_utils_api::RegisterGradientHookForTensor(
leaf_tensor, std::make_shared<egr::CppTensorHook>(hook_function)); leaf_tensor, std::make_shared<egr::CppTensorHook>(hook_function));
......
...@@ -69,9 +69,6 @@ inline void run_program_dygraph_function( ...@@ -69,9 +69,6 @@ inline void run_program_dygraph_function(
grad_node->SetGradOutMeta(params, /*slot id*/ 1); grad_node->SetGradOutMeta(params, /*slot id*/ 1);
grad_node->SetGradInMeta(deref_out, 0); grad_node->SetGradInMeta(deref_out, 0);
// Set Next Edges
grad_node->AddEdges(&p_autograd_x, /*slot id*/ 0);
grad_node->AddEdges(&p_autograd_params, /*slot id*/ 1);
egr::EagerUtils::SetOutRankWithSlot(&p_autograd_outs, 0); egr::EagerUtils::SetOutRankWithSlot(&p_autograd_outs, 0);
......
...@@ -364,12 +364,16 @@ class GradNodeRunProgram : public egr::GradNodeBase { ...@@ -364,12 +364,16 @@ class GradNodeRunProgram : public egr::GradNodeBase {
~GradNodeRunProgram() override = default; ~GradNodeRunProgram() override = default;
// Functor: perform backward computations // Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()( virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
std::vector<std::vector<paddle::experimental::Tensor>> &grads, // NOLINT egr::kSlotSmallVectorSize>
bool create_graph, bool is_new_grad) override { operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize> &grads, // NOLINT
bool create_graph,
bool is_new_grad) override {
VLOG(3) << "Running Eager Backward Node: GradNodeRunProgram"; VLOG(3) << "Running Eager Backward Node: GradNodeRunProgram";
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = paddle::small_vector<std::vector<paddle::experimental::Tensor>,
GradNodeRunProgram::ApplyGradientHooks(grads); egr::kSlotSmallVectorSize>
hooked_grads = GradNodeRunProgram::ApplyGradientHooks(grads);
PADDLE_ENFORCE_EQ(hooked_grads.size(), 1, PADDLE_ENFORCE_EQ(hooked_grads.size(), 1,
paddle::platform::errors::InvalidArgument( paddle::platform::errors::InvalidArgument(
"The hooked_grads.size() of RunProgramGradOp should " "The hooked_grads.size() of RunProgramGradOp should "
......
...@@ -441,8 +441,10 @@ std::shared_ptr<egr::GradNodeBase> EagerUtils::GetGradAccumulationNode( ...@@ -441,8 +441,10 @@ std::shared_ptr<egr::GradNodeBase> EagerUtils::GetGradAccumulationNode(
} }
void EagerUtils::FillZeroForEmptyGradInputs( void EagerUtils::FillZeroForEmptyGradInputs(
std::vector<std::vector<paddle::experimental::Tensor>>* in_grads, paddle::small_vector<std::vector<paddle::experimental::Tensor>,
const std::vector<std::vector<GradSlotMeta>>& grad_in_metas) { kSlotSmallVectorSize>* in_grads,
const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
grad_in_metas) {
for (size_t i = 0; i < in_grads->size(); i++) { for (size_t i = 0; i < in_grads->size(); i++) {
for (size_t j = 0; j < (*in_grads)[i].size(); j++) { for (size_t j = 0; j < (*in_grads)[i].size(); j++) {
paddle::experimental::Tensor& grad = (*in_grads)[i][j]; paddle::experimental::Tensor& grad = (*in_grads)[i][j];
......
...@@ -234,8 +234,10 @@ class EagerUtils { ...@@ -234,8 +234,10 @@ class EagerUtils {
* Fill Zero * Fill Zero
* **/ * **/
static void FillZeroForEmptyGradInputs( static void FillZeroForEmptyGradInputs(
std::vector<std::vector<paddle::experimental::Tensor>>* out_grads, paddle::small_vector<std::vector<paddle::experimental::Tensor>,
const std::vector<std::vector<GradSlotMeta>>& grad_out_metas); kSlotSmallVectorSize>* out_grads,
const paddle::small_vector<std::vector<GradSlotMeta>,
kSlotSmallVectorSize>& grad_out_metas);
}; };
} // namespace egr } // namespace egr
...@@ -406,12 +406,9 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, ...@@ -406,12 +406,9 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
if (slot_map[0].find(i) != slot_map[0].end()) { if (slot_map[0].find(i) != slot_map[0].end()) {
grad_node->SetGradOutMeta(in_tensors, slot_map[0][i]); grad_node->SetGradOutMeta(in_tensors, slot_map[0][i]);
grad_node->AddEdges(&ins_auto_grad_metas[i], slot_map[0][i]);
} else { } else {
grad_node->SetGradOutMeta(in_tensors, grad_node->SetGradOutMeta(in_tensors,
ins_auto_grad_metas.size() - 1 - no_grad_cnt); ins_auto_grad_metas.size() - 1 - no_grad_cnt);
grad_node->AddEdges(&ins_auto_grad_metas[i],
ins_auto_grad_metas.size() - 1 - no_grad_cnt);
no_grad_cnt++; no_grad_cnt++;
} }
} }
......
...@@ -346,10 +346,8 @@ PyObject* pylayer_method_apply(PyObject* cls, PyObject* args, ...@@ -346,10 +346,8 @@ PyObject* pylayer_method_apply(PyObject* cls, PyObject* args,
for (auto t : inputs_tensor[i]) { for (auto t : inputs_tensor[i]) {
grad_node->SetGradOutMeta(*t, i); grad_node->SetGradOutMeta(*t, i);
} }
grad_node->AddEdges(&inputs_autograd_meta[i], i);
} else { } else {
grad_node->SetGradOutMeta(*inputs_tensor[i][0], i); grad_node->SetGradOutMeta(*inputs_tensor[i][0], i);
grad_node->AddEdges(inputs_autograd_meta[i][0], i);
} }
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册