未验证 提交 2bee99df 编写于 作者: J Jiabin Yang 提交者: GitHub

Using small vector for slot and merge edge into grad_slot_meta (#42350)

上级 27cf7afb
......@@ -447,10 +447,12 @@ void EagerReducer::TraverseBackwardGraph(const std::vector<Tensor> &outputs) {
while (!queue.empty()) {
egr::GradNodeBase *node = queue.front();
queue.pop();
const std::vector<std::vector<egr::Edge>> &edges = node->GetEdges();
for (size_t i = 0; i < edges.size(); i++) {
for (size_t j = 0; j < edges[i].size(); j++) {
const egr::Edge &edge = edges[i][j];
const paddle::small_vector<std::vector<egr::GradSlotMeta>,
egr::kSlotSmallVectorSize> &metas =
node->OutputMeta();
for (size_t i = 0; i < metas.size(); i++) {
for (size_t j = 0; j < metas[i].size(); j++) {
const egr::Edge &edge = metas[i][j].GetEdge();
auto next_node_shared = edge.GetMutableGradNode();
if (!next_node_shared || !next_node_shared.get()) {
continue;
......
......@@ -38,10 +38,13 @@ static void CopyOrAddTensor(paddle::experimental::Tensor* tensor,
}
}
std::vector<std::vector<paddle::experimental::Tensor>> GradNodeAccumulation::
operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph, bool is_new_grad) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
GradNodeAccumulation::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph,
bool is_new_grad) {
VLOG(3) << "Running Eager Backward Node: GradNodeAccumulation";
PADDLE_ENFORCE(grads.size() == 1,
paddle::platform::errors::Fatal(
......@@ -56,8 +59,9 @@ operator()(
// Apply Gradient Hooks
paddle::experimental::Tensor grad_out;
if (GradientHooksRegistered()) {
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads =
ApplyGradientHooks(grads);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
hooked_grads = ApplyGradientHooks(grads);
grad_out = hooked_grads[0][0];
} else {
grad_out = grads[0][0];
......
......@@ -37,9 +37,12 @@ class GradNodeAccumulation : public GradNodeBase {
}
// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph = false, bool is_new_grad = false) override;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;
void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
......
......@@ -21,8 +21,8 @@ namespace egr {
static inline paddle::experimental::DataType GetPromoteType(
const std::string& op_name,
const std::vector<std::vector<paddle::experimental::Tensor>>&
amp_tensors_vector,
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& amp_tensors_vector,
const paddle::experimental::DataType& amp_dtype) {
auto dst_type = amp_dtype;
if (egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype() ==
......@@ -86,8 +86,8 @@ static inline paddle::experimental::DataType GetPromoteType(
inline paddle::experimental::DataType GetAmpDestDtype(
const std::string& op_name,
const std::vector<std::vector<paddle::experimental::Tensor>>&
amp_tensors_vector) {
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& amp_tensors_vector) {
auto amp_dtype =
egr::Controller::Instance().GetCurrentTracer()->GetAmpDtype();
auto amp_level = egr::Controller::Instance().GetAMPLevel();
......
......@@ -144,11 +144,15 @@ void GradNodeScale::SetTensorWrappers_X(
void GradNodeScale::SetAttributes_scale(float scale) { scale_ = scale; }
std::vector<std::vector<paddle::experimental::Tensor>> GradNodeScale::
operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph, bool is_new_grad) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
GradNodeScale::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph,
bool is_new_grad) {
// 1. Check Output Size
VLOG(6) << "grad size is: " << grads.size();
PADDLE_ENFORCE(
((grads.size() == 1) && (grads[0].size() == 1)),
paddle::platform::errors::Fatal(
......@@ -156,15 +160,18 @@ operator()(
"However received: %d",
"This indicates an issue with Eager Dygraph Backward logic",
grads.size()));
std::vector<std::vector<paddle::experimental::Tensor>> outs;
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
outs;
// 2. Create needed out parttern
paddle::experimental::Tensor out;
// Apply Gradient Hooks
if (GradientHooksRegistered()) {
// TODO(jiabin): Shall we apply hook slot by slot here or accept
// vector<vector<phi::tensor>> to apply all hooks?
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads =
ApplyGradientHooks(grads);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
hooked_grads = ApplyGradientHooks(grads);
ScaleAPI(/* slot by slot set */ hooked_grads[0][0], scale_, 0.0 /* bias */,
true /* bias_after_scale */, &out);
} else {
......
......@@ -38,9 +38,12 @@ class GradNodeScale : public GradNodeBase {
~GradNodeScale() override = default;
// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph = false, bool is_new_grad = false) override;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;
void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
......@@ -48,7 +51,7 @@ class GradNodeScale : public GradNodeBase {
const std::vector<paddle::experimental::Tensor>& tensors);
void SetAttributes_scale(float scale);
std::string name() override { return ""; }
std::string name() override { return "scale node"; }
// Members: define fwd input tensors
// For Scale there is no fwd input tensor needed
......
......@@ -79,9 +79,6 @@ paddle::experimental::Tensor scale(const paddle::experimental::Tensor& x,
// Pass Attributes to GradNode
scale_node->SetAttributes_scale(scale);
// Set Next Edges
scale_node->AddEdges(p_autograd_in, /*slot id*/ 0);
// Set TensorWrappers
scale_node->SetTensorWrappers_X({x});
......
......@@ -19,8 +19,9 @@
#include <memory>
#include "paddle/fluid/imperative/tracer.h"
#include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/utils/small_vector.h"
namespace egr {
constexpr size_t kSlotSmallVectorSize = 15U;
class UniqueNameGenerator {
public:
explicit UniqueNameGenerator(std::string prefix = "") : prefix_(prefix) {}
......
......@@ -1187,11 +1187,6 @@ static std::string GenerateGradNodeCreationContent(
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);
const char* ADD_EDGES_TEMPLATE =
" if(%s) grad_node->AddEdges(%s, %d);\n";
grad_node_creation_str +=
paddle::string::Sprintf(ADD_EDGES_TEMPLATE, input_autograd_name,
input_autograd_name, input_position);
} else {
compute_require_grad_args += ", &" + input_autograd_name;
size_t input_position = fwd_inputs_name_pos_map.at(input_name);
......@@ -1200,10 +1195,6 @@ static std::string GenerateGradNodeCreationContent(
" grad_node->SetGradOutMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);
const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
ADD_EDGES_TEMPLATE, input_autograd_name, input_position);
}
}
......@@ -1649,7 +1640,8 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
std::string amp_logic_str = "";
if (in_vars.size() != 0) {
const char* AMP_TENSORS_VECTOR_TEMPLATE =
" std::vector<std::vector<paddle::experimental::Tensor>> "
" paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> "
"amp_tensors_vector = { "
"%s };\n";
std::string amp_tensors_vector = paddle::string::Sprintf(
......@@ -2428,9 +2420,11 @@ static std::string GenerateGradNodeCCContents(
}
const char* BWD_RETURN_TEMPLATE =
" std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads = "
" paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> hooked_grads = "
"GradNode%s::ApplyGradientHooks(grads);\n"
" std::vector<std::vector<paddle::experimental::Tensor>> outputs(%d);\n"
" paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> outputs(%d);\n"
" %s\n"
" if(NeedComplexToRealConversion()) "
"HandleComplexGradToRealGrad(&outputs);\n"
......@@ -2441,9 +2435,11 @@ static std::string GenerateGradNodeCCContents(
// [Generation] Get Full Grad Function
const char* GRAD_FUNCTION_TEMPLATE =
"std::vector<std::vector<paddle::experimental::Tensor>> "
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> "
"GradNode%s::operator()("
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool "
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize>& grads, bool "
"create_graph, bool is_new_grad) {\n"
"%s"
"%s"
......@@ -2487,9 +2483,12 @@ static std::string GenerateGradNodeHeaderContents(
"Construct GradNode%s \"; }\n"
" ~GradNode%s() override { VLOG(6) << \" Destruct GradNode%s \"; }\n"
"\n"
" virtual std::vector<std::vector<paddle::experimental::Tensor>> "
" virtual "
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize> "
"operator()("
"std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool "
"paddle::small_vector<std::vector<paddle::experimental::Tensor>, "
"egr::kSlotSmallVectorSize>& grads, bool "
"create_graph = false, bool is_new_grad = false) "
"override;\n"
"\n"
......
......@@ -118,8 +118,8 @@ class {} : public egr::GradNodeBase {{
egr::GradNodeBase(bwd_in_slot_num, bwd_out_slot_num) {{}}
~{}() override = default;
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool create_graph = false, bool is_new_grad = false) override;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph = false, bool is_new_grad = false) override;
std::string name() override {{ return \"{}\"; }}
void ClearTensorWrappers() override {{
......@@ -149,7 +149,7 @@ class {} : public egr::GradNodeBase {{
GRAD_FUNCTION_TEMPLATE = \
"""
std::vector<std::vector<paddle::experimental::Tensor>> {}::operator()(std::vector<std::vector<paddle::experimental::Tensor>>& grads, bool create_graph, bool is_new_grad) {{
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> {}::operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize>& grads, bool create_graph, bool is_new_grad) {{
// Fill Zero For GradIn Tensors
{}
......@@ -239,7 +239,6 @@ FORWARD_BODY_TEMPLATE = \
// Set TensorWrappers for Forward Inputs
{}
// SetGradOutMeta & SetEdges
{}
{}
// SetOutRank & SetHistory & SetGradInMeta & RetainGrad
{}
......@@ -356,7 +355,7 @@ AMP_LOGIC_TEMPLATE = \
if (egr::Controller::Instance().GetAMPLevel() != paddle::imperative::AmpLevel::O0) {{
VLOG(5) << "Check and Prepare For AMP";
{}
std::vector<std::vector<paddle::experimental::Tensor>> amp_tensors_vector = {};
paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> amp_tensors_vector = {};
{}
{}
{}
......@@ -769,15 +768,11 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
is_optional = (name in self.optional_inputs)
if is_optional:
set_grad_out_meta = f"{indent}if({name}.get_ptr() != nullptr) grad_node->SetGradOutMeta(*({name}.get_ptr()), {pos});"
set_edges = f"{indent}if({name}.get_ptr() != nullptr) grad_node->AddEdges({input_autograd_meta_name}, {pos});"
else:
set_grad_out_meta = f"{indent}grad_node->SetGradOutMeta({name}, {pos});"
set_edges = f"{indent}grad_node->AddEdges({input_autograd_meta_name}, {pos});"
set_grad_out_meta_list.append(set_grad_out_meta)
set_edges_list.append(set_edges)
set_grad_out_meta_str = "\n".join(set_grad_out_meta_list)
set_edges_str = "\n".join(set_edges_list)
# SetOutRank & SetHistory & SetGradInMeta
set_out_rank_list = []
......@@ -808,7 +803,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase):
self.node_creation_str = FORWARD_BODY_TEMPLATE.format(
node_creation_event_str, pass_stop_gradient_args_str,
node_construction_str, set_attributes_str,
set_input_tensor_wrappers_str, set_grad_out_meta_str, set_edges_str,
set_input_tensor_wrappers_str, set_grad_out_meta_str,
set_out_rank_str, set_history_str, set_grad_in_meta_str,
set_retain_grad_str, set_output_tensor_wrappers_str)
......@@ -1454,7 +1449,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase):
# Construct grad_api returns
slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys())
returns_str = f"{indent}std::vector<std::vector<paddle::experimental::Tensor>> returns({slot_num_bwd_outputs});\n"
returns_str = f"{indent}paddle::small_vector<std::vector<paddle::experimental::Tensor>, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs});\n"
for name, (ttype, fwd_position,
grad_api_position) in backward_grad_outputs_map.items():
transformed_tensor_name = self.TransformToNextGradName(name)
......
......@@ -169,9 +169,12 @@ class GeneralGrad {
input_target_nodes_inputmeta_map.count(node);
// Find and append next nodes
const std::vector<std::vector<Edge>>& edges = node->GetEdges();
for (const auto& edge_list : edges) {
for (const Edge& edge : edge_list) {
const paddle::small_vector<std::vector<GradSlotMeta>,
kSlotSmallVectorSize>& metas =
node->OutputMeta();
for (const auto& meta_list : metas) {
for (const GradSlotMeta& meta : meta_list) {
const auto& edge = meta.GetEdge();
GradNodeBase* next_node = edge.GetMutableGradNode().get();
// Next node could be nullptr if it is leaf tensor with no
......@@ -381,13 +384,15 @@ class GeneralGrad {
"unable to find copied target for certain grad node."));
GradNodeBase* copied_node = orig_to_copied_node_mapping_[orig_node].get();
const std::vector<std::vector<Edge>>& orig_edges = orig_node->GetEdges();
std::vector<std::vector<Edge>>& copied_edges =
copied_node->GetMutableEdges();
for (size_t i = 0; i < orig_edges.size(); i++) {
for (size_t j = 0; j < orig_edges[i].size(); j++) {
const Edge& orig_edge = orig_edges[i][j];
Edge& copied_edge = copied_edges[i][j];
const paddle::small_vector<std::vector<GradSlotMeta>,
kSlotSmallVectorSize>& orig_meta =
orig_node->OutputMeta();
paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
copied_edges = copied_node->MutableOutputMeta();
for (size_t i = 0; i < orig_meta.size(); i++) {
for (size_t j = 0; j < orig_meta[i].size(); j++) {
const Edge& orig_edge = orig_meta[i][j].GetEdge();
Edge& copied_edge = copied_edges[i][j].GetMutableEdge();
std::shared_ptr<GradNodeBase> orig_next_node =
orig_edge.GetMutableGradNode();
......@@ -468,9 +473,11 @@ std::unordered_map<GradNodeBase*, int> getInDegreeMap(
"We got null node when we traverse the backward graph, and this "
"should not happened please check your code and contact us."));
// Find and append next nodes
const std::vector<std::vector<Edge>>& edges = node->GetEdges();
for (const auto& edge_list : edges) {
for (const Edge& edge : edge_list) {
const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
metas = node->OutputMeta();
for (const auto& meta_list : metas) {
for (const GradSlotMeta& meta : meta_list) {
const auto& edge = meta.GetEdge();
GradNodeBase* next_node = edge.GetMutableGradNode().get();
// Next node could be nullptr if it is leaf tensor with no
// AccumulationNode attached
......@@ -689,8 +696,10 @@ std::vector<paddle::experimental::Tensor> RunBackward(
VLOG(6) << "Run Backward Kernel with GradTensorHolder.";
// Run Pre Backward Node and get outputs
std::vector<std::vector<paddle::experimental::Tensor>> grad_output_tensors =
(*node)(node_input_buffer->Buffers(), create_graph, is_general_grad);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
grad_output_tensors = (*node)(node_input_buffer->Buffers(),
create_graph, is_general_grad);
// retain_grad or not
if (!retain_graph) {
......@@ -704,17 +713,18 @@ std::vector<paddle::experimental::Tensor> RunBackward(
node_input_buffers_dict.erase(node);
// Prepare GradTensorHolder for next node
const std::vector<std::vector<Edge>>& edges = node->GetEdges();
PADDLE_ENFORCE(edges.size() == grad_output_tensors.size() || edges.empty(),
const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
metas = node->OutputMeta();
PADDLE_ENFORCE(metas.size() == grad_output_tensors.size() || metas.empty(),
paddle::platform::errors::Fatal(
"Number of edges should be either empty ( for leaf node "
") or the same as number of output grad tensors, but we "
"got edges size is: %d, grad_output size is: %d",
edges.size(), grad_output_tensors.size()));
metas.size(), grad_output_tensors.size()));
for (size_t i = 0; i < edges.size(); i++) {
for (size_t j = 0; j < edges[i].size(); j++) {
const Edge& edge = edges[i][j];
for (size_t i = 0; i < metas.size(); i++) {
for (size_t j = 0; j < metas[i].size(); j++) {
const Edge& edge = metas[i][j].GetEdge();
if (!edge.IsInitialized()) {
continue;
}
......
......@@ -19,10 +19,12 @@
#include "paddle/phi/core/dense_tensor.h"
namespace egr {
std::vector<std::vector<paddle::experimental::Tensor>> RunCustomOpNode::
operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph, bool is_new_grad) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
RunCustomOpNode::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads,
bool create_graph, bool is_new_grad) { // NOLINT
paddle::CustomOpKernelContext ctx;
auto grad_inputs_name = paddle::framework::OpMetaInfoHelper::GetInputs(
egr::Controller::Instance().GetOpMetaInfoMap().at(op_type_)[1]);
......@@ -31,8 +33,9 @@ operator()(
auto map = egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type_);
auto kernel_map = egr::Controller::Instance().GetOpMetaInfoMap();
std::vector<std::vector<paddle::experimental::Tensor>> tmp_ins(
grad_inputs_name.size());
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
tmp_ins(grad_inputs_name.size());
VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size()
<< ", whose grad_inputs_name size is: " << grad_inputs_name.size();
for (size_t i = 0; i < grads.size(); i++) {
......@@ -58,17 +61,19 @@ operator()(
}
VLOG(6) << "Prepare Grad attrs";
ctx.EmplaceBackAttrs(attrs_);
std::vector<std::vector<paddle::experimental::Tensor>> outs(
GetEdges().size());
std::vector<std::vector<paddle::experimental::Tensor>> tmp_outs(
grad_outputs_names.size());
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
outs(OutputMeta().size());
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
tmp_outs(grad_outputs_names.size());
VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size();
for (size_t i = 0; i < GetEdges().size(); i++) {
for (size_t i = 0; i < OutputMeta().size(); i++) {
if (map[0].find(i) != map[0].end()) {
VLOG(7) << "Insert grad outputs: " << i
<< " with size: " << GetEdges()[i].size()
<< " with size: " << OutputMeta()[i].size()
<< " to tmp_outputs: " << map[0][i];
for (size_t j = 0; j < GetEdges()[i].size(); j++) {
for (size_t j = 0; j < OutputMeta()[i].size(); j++) {
outs[i].emplace_back(/* init it incase of copy nullptr of shared_ptr */
std::make_shared<phi::DenseTensor>(
phi::DataType::UNDEFINED),
......
......@@ -36,10 +36,13 @@ class RunCustomOpNode : public GradNodeBase {
}
// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>>
operator()( // NOLINT
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph = false, bool is_new_grad = false) // NOLINT
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
operator()( // NOLINT
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) // NOLINT
override;
std::string name() {
......
......@@ -40,70 +40,20 @@ GradNodeBase::GradNodeBase(size_t bwd_in_slot_num, size_t bwd_out_slot_num) {
VLOG(6) << "Construct GradNodeBase";
bwd_in_meta_.resize(bwd_in_slot_num);
bwd_out_meta_.resize(bwd_out_slot_num);
adj_edges_.resize(bwd_out_slot_num);
}
void GradNodeBase::AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id) {
PADDLE_ENFORCE_LT(
slot_id, adj_edges_.size(),
paddle::platform::errors::InvalidArgument(
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
for (size_t i = 0; i < metas->size(); i++) {
const auto& meta = (*metas)[i];
// adj_edges has as same rank as fwd inputs, and record it's output rank
// from
// its pre-ops
if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode();
if (!node || !node.get()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
}
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " (addr: " << this << ") "
<< " to " << meta->GetMutableGradNode()->name()
<< " (addr: " << meta->GetMutableGradNode().get() << ")";
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
adj_edges_[slot_id].emplace_back();
}
}
}
void GradNodeBase::AddEdges(AutogradMeta* meta, size_t slot_id) {
PADDLE_ENFORCE_LT(
slot_id, adj_edges_.size(),
paddle::platform::errors::InvalidArgument(
"Given slot id is out of range of adj_edges outter size, "
"adj_edges is designed to has the same size of grad "
"inputs's slot num."));
if (meta && !meta->StopGradient()) {
auto node = meta->GetMutableGradNode();
if (!node || !node.get()) {
meta->SetGradNode(std::make_shared<egr::GradNodeAccumulation>(meta));
}
VLOG(6) << "Add Edges for slot: " << slot_id << ", the Edge is from "
<< this->name() << " (addr: " << this << ") "
<< " to " << meta->GetMutableGradNode()->name()
<< " (addr: " << meta->GetMutableGradNode().get() << ")";
adj_edges_[slot_id].emplace_back(meta->GetMutableGradNode(),
meta->OutRankInfo());
} else {
adj_edges_[slot_id].emplace_back();
}
const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
GradNodeBase::InputMeta() const {
return bwd_in_meta_;
}
const std::vector<std::vector<GradSlotMeta>>& GradNodeBase::InputMeta() const {
return bwd_in_meta_;
const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
GradNodeBase::OutputMeta() const {
return bwd_out_meta_;
}
const std::vector<std::vector<GradSlotMeta>>& GradNodeBase::OutputMeta() const {
paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
GradNodeBase::MutableOutputMeta() {
return bwd_out_meta_;
}
......@@ -123,7 +73,9 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
}
auto& meta = metas[0];
meta.SetStopGradient(fwd_out_meta->StopGradient());
if (fwd_out_meta && fwd_out_meta->StopGradient()) {
meta.SetStopGradient(fwd_out_meta->StopGradient());
}
if (!fwd_out.initialized()) {
VLOG(6)
......@@ -153,8 +105,8 @@ void GradNodeBase::SetGradInMeta(const paddle::experimental::Tensor& fwd_out,
meta.SetTensorMeta(dense_tensor->meta());
meta.SetPlace(fwd_out.place());
if (paddle::framework::IsComplexType(
paddle::framework::TransToProtoVarType(dense_tensor->type()))) {
if (dense_tensor->type() == paddle::experimental::DataType::COMPLEX64 ||
dense_tensor->type() == paddle::experimental::DataType::COMPLEX128) {
need_complex_to_real_ = true;
}
}
......@@ -186,7 +138,7 @@ void GradNodeBase::SetGradInMeta(
"Bwd_in_meta should only be called while "
"autograd_meta is not null. If you got this "
"error, it indicates bugs in framework."));
if (fwd_out_meta->StopGradient()) {
if (fwd_out_meta && fwd_out_meta->StopGradient()) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(fwd_out_meta->StopGradient());
......@@ -212,8 +164,8 @@ void GradNodeBase::SetGradInMeta(
meta.SetTensorMeta(dense_tensor->meta());
meta.SetPlace(fwd_out_tensor.place());
if (paddle::framework::IsComplexType(
paddle::framework::TransToProtoVarType(dense_tensor->type()))) {
if (dense_tensor->type() == paddle::experimental::DataType::COMPLEX64 ||
dense_tensor->type() == paddle::experimental::DataType::COMPLEX128) {
need_complex_to_real_ = true;
}
} else {
......@@ -238,12 +190,24 @@ void GradNodeBase::SetGradOutMeta(const paddle::experimental::Tensor& fwd_in,
metas.resize(1);
}
auto& meta = metas[0];
// Set Stop_gradient
if (fwd_in_meta) {
meta.SetStopGradient(fwd_in_meta->StopGradient());
} else {
meta.SetStopGradient(true);
}
// Set Adj Edges
if (fwd_in_meta && !fwd_in_meta->StopGradient()) {
auto node = fwd_in_meta->GetMutableGradNode();
if (!node || !node.get()) {
fwd_in_meta->SetGradNode(
std::make_shared<egr::GradNodeAccumulation>(fwd_in_meta));
}
VLOG(6) << "Add Edges for slot: " << slot_rank << ", the Edge is from "
<< this->name() << " (addr: " << this << ") "
<< " to " << fwd_in_meta->GetMutableGradNode()->name()
<< " (addr: " << fwd_in_meta->GetMutableGradNode().get() << ")";
meta.SetEdge(fwd_in_meta->GetMutableGradNode(), fwd_in_meta->OutRankInfo());
}
// Record TensorMeta
if (fwd_in.impl() && fwd_in.impl().get()) {
if (phi::DenseTensor::classof(fwd_in.impl().get())) {
......@@ -282,30 +246,43 @@ void GradNodeBase::SetGradOutMeta(
const auto& fwd_in_tensor = fwd_in[i];
auto& meta = metas[i];
auto* fwd_in_meta = egr::EagerUtils::nullable_autograd_meta(fwd_in_tensor);
// Set Stop_gradient
if (fwd_in_meta) {
// Set Stop Gradient only when its true or non-initialized autograd_meta,
// since all default value is false.
meta.SetStopGradient(fwd_in_meta->StopGradient());
}
// Set Adj Edges
if (fwd_in_meta && !fwd_in_meta->StopGradient()) {
auto node = fwd_in_meta->GetMutableGradNode();
if (!node || !node.get()) {
fwd_in_meta->SetGradNode(
std::make_shared<egr::GradNodeAccumulation>(fwd_in_meta));
}
VLOG(6) << "Add Edges for slot: " << slot_rank << ", the Edge is from "
<< this->name() << " (addr: " << this << ") "
<< " to " << fwd_in_meta->GetMutableGradNode()->name()
<< " (addr: " << fwd_in_meta->GetMutableGradNode().get() << ")";
meta.SetEdge(fwd_in_meta->GetMutableGradNode(),
fwd_in_meta->OutRankInfo());
}
// Record TensorMeta
if (fwd_in_tensor.impl() && fwd_in_tensor.impl().get()) {
if (phi::DenseTensor::classof(fwd_in_tensor.impl().get())) {
// Only Copy Meta
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(fwd_in_tensor.impl().get());
PADDLE_ENFORCE_NE(dense_tensor->meta().dtype, phi::DataType::UNDEFINED,
paddle::platform::errors::Fatal(
"Attempting to copy DenseTensorMeta with "
"phi::DataType::UNDEFINED,"
"Attempting to copy DenseTensorMeta "
"with phi::DataType::UNDEFINED,"
"which is illegal."));
meta.SetTensorMeta(dense_tensor->meta());
meta.SetPlace(fwd_in_tensor.place());
}
} else {
VLOG(6) << "Unable to initialize the DenseTensorMeta of GradSlotMeta "
"with non-DenseTensor argument.";
VLOG(6)
<< "Unable to initialize the DenseTensorMeta of GradSlotMeta with "
"non-DenseTensor argument.";
}
}
}
......@@ -328,18 +305,14 @@ int64_t GradNodeBase::RegisterGradientHook(
return next_hook_id_++;
}
const std::vector<std::vector<Edge>>& GradNodeBase::GetEdges() const {
return adj_edges_;
}
std::vector<std::vector<Edge>>& GradNodeBase::GetMutableEdges() {
return adj_edges_;
}
std::vector<std::vector<paddle::experimental::Tensor>>
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
GradNodeBase::ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors) {
std::vector<std::vector<paddle::experimental::Tensor>> outs(tensors.size());
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
outs(tensors.size());
for (auto& hook_pair : gradient_hooks_) {
size_t slot_id = std::get<0>(hook_pair.second);
size_t rank = std::get<1>(hook_pair.second);
......@@ -386,7 +359,8 @@ GradNodeBase::ApplyGradientHooks(
}
void GradNodeBase::HandleComplexGradToRealGrad(
std::vector<std::vector<paddle::experimental::Tensor>>* out_grads) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>* out_grads) {
for (size_t slot_id = 0; slot_id < out_grads->size(); slot_id++) {
const std::vector<paddle::experimental::Tensor>& slot_out_grads =
(*out_grads)[slot_id];
......
......@@ -16,6 +16,7 @@
#include <memory>
#include "paddle/fluid/eager/api/utils/global_utils.h"
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/hooks.h"
#include "paddle/phi/api/all.h"
......@@ -46,9 +47,8 @@ namespace egr {
* indicate which
* input of grad this edge belong).
* */
class Edge;
class AutogradMeta;
class GradNodeBase;
/**
* GradSlotMeta is used to Record Forward Tensor info to backward, since paddle
* has lots of operators
......@@ -56,6 +56,70 @@ class AutogradMeta;
* So, we need a meta info
* to record it's needs.
* **/
class Edge {
public:
// Default constructor for Edges in order to construct it for AutogradMeta
Edge() : in_slot_id_(0), in_rank_(0), grad_node_(nullptr) {}
// In real use cases we should create Edge from grad node and input rank which
// indicate which edge it is.
// Since we have slot design in operators we will have to locate an edge with
// slot
// and rank.
Edge(const std::shared_ptr<GradNodeBase>& grad_node, size_t in_slot_id,
size_t in_rank)
: in_slot_id_(in_slot_id), in_rank_(in_rank), grad_node_(grad_node) {}
Edge(const std::shared_ptr<GradNodeBase>& grad_node,
const std::pair</* slot_id */ size_t, /* rank */ size_t>& rank_info)
: in_slot_id_(rank_info.first),
in_rank_(rank_info.second),
grad_node_(grad_node) {}
GradNodeBase* GetGradNode() const { return grad_node_.get(); }
std::shared_ptr<GradNodeBase> GetMutableGradNode() const {
return grad_node_;
}
void SetGradNode(const std::shared_ptr<GradNodeBase>& node) {
VLOG(6) << "Reseting Edge's Grad Node";
grad_node_ = node;
}
std::pair<size_t, size_t> GetEdgeRankInfo() const {
return std::make_pair(in_slot_id_, in_rank_);
}
void SetEdgeRankInfo(size_t slot_id, size_t in_rank) {
in_slot_id_ = slot_id;
in_rank_ = in_rank;
}
void SetEdgeRankInfo(
const std::pair</* slot_id */ size_t, /* rank */ size_t>& edge_rank) {
in_slot_id_ = edge_rank.first;
in_rank_ = edge_rank.second;
}
// Currently we use grad_node_ to identify if a edge is initialized.
bool IsInitialized() const {
if (!grad_node_) {
return false;
} else {
if (!(grad_node_.get())) {
return false;
} else {
return true;
}
}
}
private:
size_t in_slot_id_;
size_t in_rank_;
std::shared_ptr<GradNodeBase> grad_node_{nullptr};
};
class GradSlotMeta {
public:
GradSlotMeta() = default;
......@@ -81,10 +145,21 @@ class GradSlotMeta {
void SetPlace(const phi::Place& place) { place_ = place; }
const phi::Place& GetPlace() const { return place_; }
void SetEdge(const Edge& edge) { adj_edge_ = edge; }
void SetEdge(
const std::shared_ptr<GradNodeBase>& grad_node,
const std::pair</* slot_id */ size_t, /* rank */ size_t>& rank_info) {
adj_edge_.SetGradNode(grad_node);
adj_edge_.SetEdgeRankInfo(rank_info);
}
Edge& GetMutableEdge() { return adj_edge_; }
const Edge& GetEdge() const { return adj_edge_; }
private:
bool stop_gradient_{false};
phi::Place place_;
std::shared_ptr<phi::DenseTensorMeta> meta_ = nullptr;
Edge adj_edge_;
};
class GradNodeBase {
......@@ -107,9 +182,12 @@ class GradNodeBase {
* so, vector of vector
* is better choice to fit this format.
* **/
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph = false, bool is_new_grad = false) = 0;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) = 0;
virtual void ClearTensorWrappers() = 0;
......@@ -118,17 +196,6 @@ class GradNodeBase {
* **/
virtual std::shared_ptr<GradNodeBase> Copy() const = 0;
/**
* AddEdges is designed to set input tensors' backward Node as current
* node's Edges.
* This method should be call in forward code and for double backward depends
* computation.
*
* This one is called slot by slot
* **/
void AddEdges(std::vector<AutogradMeta*>* metas, size_t slot_id);
void AddEdges(AutogradMeta* meta, size_t slot_id);
// adj_edges were moved inside OutputMeta(), so no available direct access
// from GradNodeBase.
// To access Edges, get GradSlotMeta by calling OutputMeta(), then use
......@@ -136,10 +203,15 @@ class GradNodeBase {
/**
* Get Input Meta of current Grad node**/
const std::vector<std::vector<GradSlotMeta>>& InputMeta() const;
const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
InputMeta() const;
/**
* Get Output Meta of current Grad node**/
const std::vector<std::vector<GradSlotMeta>>& OutputMeta() const;
const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
OutputMeta() const;
paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
MutableOutputMeta();
/**
* Set bwd ins and outs info with forward vars
* **/
......@@ -180,23 +252,22 @@ class GradNodeBase {
* **/
inline bool GradientHooksRegistered() { return !gradient_hooks_.empty(); }
std::vector<std::vector<paddle::experimental::Tensor>> ApplyGradientHooks(
const std::vector<std::vector<paddle::experimental::Tensor>>& tensors);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
ApplyGradientHooks(
const paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& tensors);
/**
* Handle Complex - Real Type Promotion
* **/
void HandleComplexGradToRealGrad(
std::vector<std::vector<paddle::experimental::Tensor>>* out_grads);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>* out_grads);
bool NeedComplexToRealConversion() { return need_complex_to_real_; }
virtual std::string name() { return "GradNodeBase"; }
/**
* GetEdges is designed to get all edges of current node**/
const std::vector<std::vector<Edge>>& GetEdges() const;
std::vector<std::vector<Edge>>& GetMutableEdges();
/**
* The following interfaces are designed for no_need_buffer
* **/
......@@ -207,18 +278,13 @@ class GradNodeBase {
}
private:
// TODO(zhanlve): Merge adj_edges_ into GradOutMeta
// Edges recorded the backward related node info, which indicate all edges
// linked
// by this Grad Node.
// Why we need vector<vector<Edge>>: Edges is as same rank as bwd output.
std::vector<std::vector<Edge>> adj_edges_;
// bwd_out_meta_ is used to record Grad output info for backward
std::vector<std::vector<GradSlotMeta>> bwd_out_meta_;
paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>
bwd_out_meta_;
// bwd_in_meta_ used to record Grad input info for backward
std::vector<std::vector<GradSlotMeta>> bwd_in_meta_;
paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>
bwd_in_meta_;
// Gradient Hooks
// Customer may register a list of hooks which will be called in order during
// backward
......@@ -235,71 +301,6 @@ class GradNodeBase {
bool is_tensor_wrappers_cleared_ = false;
};
class Edge {
public:
// Default constructor for Edges in order to construct it for AutogradMeta
Edge() : in_slot_id_(0), in_rank_(0), grad_node_(nullptr) {}
// In real use cases we should create Edge from grad node and input rank which
// indicate which edge it is.
// Since we have slot design in operators we will have to locate an edge with
// slot
// and rank.
Edge(const std::shared_ptr<GradNodeBase>& grad_node, size_t in_slot_id,
size_t in_rank)
: in_slot_id_(in_slot_id), in_rank_(in_rank), grad_node_(grad_node) {}
Edge(const std::shared_ptr<GradNodeBase>& grad_node,
const std::pair</* slot_id */ size_t, /* rank */ size_t>& rank_info)
: in_slot_id_(rank_info.first),
in_rank_(rank_info.second),
grad_node_(grad_node) {}
GradNodeBase* GetGradNode() const { return grad_node_.get(); }
std::shared_ptr<GradNodeBase> GetMutableGradNode() const {
return grad_node_;
}
void SetGradNode(const std::shared_ptr<GradNodeBase>& node) {
VLOG(6) << "Reseting Edge's Grad Node";
grad_node_ = node;
}
std::pair<size_t, size_t> GetEdgeRankInfo() const {
return std::make_pair(in_slot_id_, in_rank_);
}
void SetEdgeRankInfo(size_t slot_id, size_t in_rank) {
in_slot_id_ = slot_id;
in_rank_ = in_rank;
}
void SetEdgeRankInfo(
const std::pair</* slot_id */ size_t, /* rank */ size_t>& edge_rank) {
in_slot_id_ = edge_rank.first;
in_rank_ = edge_rank.second;
}
// Currently we use grad_node_ to identify if a edge is initialized.
bool IsInitialized() const {
if (!grad_node_) {
return false;
} else {
if (!(grad_node_.get())) {
return false;
} else {
return true;
}
}
}
private:
size_t in_slot_id_;
size_t in_rank_;
std::shared_ptr<GradNodeBase> grad_node_{nullptr};
};
inline void CheckTensor(const paddle::experimental::Tensor& pre,
const paddle::experimental::Tensor& post) {
if (!pre.initialized() && post.initialized()) {
......
......@@ -27,7 +27,8 @@ namespace egr {
class GradTensorHolder {
public:
explicit GradTensorHolder(
const std::vector<std::vector<GradSlotMeta>>& metas) {
const paddle::small_vector<std::vector<GradSlotMeta>,
kSlotSmallVectorSize>& metas) {
VLOG(7) << "Init GradTensorHolder with meta size: " << metas.size();
buffer_.resize(metas.size());
for (size_t i = 0; i < buffer_.size(); i++) {
......@@ -39,7 +40,8 @@ class GradTensorHolder {
GradTensorHolder(const GradTensorHolder& other) = default;
explicit GradTensorHolder(
std::vector<std::vector<paddle::experimental::Tensor>>&& inputs)
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>&& inputs)
: buffer_(std::move(inputs)) {}
GradTensorHolder& operator=(const GradTensorHolder& other) = default;
......@@ -56,14 +58,18 @@ class GradTensorHolder {
return buffer_[pos];
}
std::vector<std::vector<paddle::experimental::Tensor>>& Buffers() {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>&
Buffers() {
return buffer_;
}
void SetBufferSlotRankZeros(size_t slot_id, size_t rank);
private:
std::vector<std::vector<paddle::experimental::Tensor>> buffer_;
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
buffer_;
};
} // namespace egr
......@@ -29,14 +29,18 @@
#include "pybind11/pytypes.h"
namespace egr {
std::vector<std::vector<paddle::experimental::Tensor>> GradNodePyLayer::
operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph, bool is_new_grad) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
GradNodePyLayer::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph,
bool is_new_grad) {
VLOG(3) << "Running Eager Backward Node: " << name();
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads =
GradNodePyLayer::ApplyGradientHooks(grads);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
hooked_grads = GradNodePyLayer::ApplyGradientHooks(grads);
paddle::pybind::PyLayerObject* ctx =
reinterpret_cast<paddle::pybind::PyLayerObject*>(ctx_);
......@@ -124,7 +128,9 @@ operator()(
ctx->forward_input_tensor_is_duplicable.size(), outputs_size));
}
std::vector<std::vector<paddle::experimental::Tensor>> grad_out;
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
grad_out;
grad_out.reserve(ctx->forward_input_tensor_is_duplicable.size());
for (size_t i = 0; i < ctx->forward_input_tensor_is_duplicable.size(); i++) {
if (i < outputs_size) {
......
......@@ -34,9 +34,12 @@ class GradNodePyLayer : public GradNodeBase {
~GradNodePyLayer() override { Py_DECREF(ctx_); };
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph = false, bool is_new_grad = false) override;
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override;
void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
......
......@@ -88,7 +88,7 @@ class TensorWrapper {
} else {
intermidiate_tensor_.set_impl(tensor.impl());
}
// TODO(jiabin): This may has server performance issue
intermidiate_tensor_.set_name(tensor.name() + "@Saved");
auto* tensor_autograd_meta = EagerUtils::nullable_autograd_meta(tensor);
......
......@@ -80,14 +80,18 @@ TEST(AccumulationNode, Tensor) {
grad_meta->SetStopGradient(false);
// operator()
std::vector<std::vector<paddle::experimental::Tensor>> et0_vec = {{et0}};
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
et0_vec = {{et0}};
paddle::experimental::Tensor ret_et0 = node->operator()(et0_vec)[0][0];
auto* ret_et0_ptr =
std::dynamic_pointer_cast<phi::DenseTensor>(ret_et0.impl())
->data<paddle::platform::float16>();
CHECK_EQ(ret_et0_ptr[0], paddle::platform::float16(10.0f));
std::vector<std::vector<paddle::experimental::Tensor>> et1_vec = {{et1}};
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
et1_vec = {{et1}};
paddle::experimental::Tensor ret_et1 = node->operator()(et1_vec)[0][0];
auto* ret_et1_ptr =
......
......@@ -34,7 +34,9 @@ void TestGradNodeBase(bool is_remove_gradient_hook) {
auto grad_test_node0 = std::make_shared<eager_test::GradTestNode>(
/* val */ 5.0, /* in_num */ 2, /* out_num */ 2);
auto grad_test_node1 = std::make_shared<eager_test::GradTestNode>();
std::vector<std::vector<paddle::experimental::Tensor>> grads;
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
grads;
phi::DenseTensorMeta meta =
phi::DenseTensorMeta(phi::DataType::FLOAT32, phi::make_ddim({1, 1}));
std::shared_ptr<phi::DenseTensor> dt = std::make_shared<phi::DenseTensor>(
......@@ -51,28 +53,9 @@ void TestGradNodeBase(bool is_remove_gradient_hook) {
CHECK_EQ(std::dynamic_pointer_cast<phi::DenseTensor>(res[0][0].impl())
->data<float>()[0],
6.0f);
VLOG(6) << "Test Add Edges";
egr::Edge tmp_edge0(grad_test_node1, 1, 2);
auto auto_grad0 = std::make_shared<egr::AutogradMeta>(tmp_edge0);
auto_grad0->SetStopGradient(false);
egr::Edge tmp_edge1(grad_test_node1, 3, 4);
auto auto_grad1 = std::make_shared<egr::AutogradMeta>(tmp_edge1);
et1.set_autograd_meta(auto_grad1);
auto_grad1->SetStopGradient(false);
grad_test_node0->AddEdges(auto_grad0.get(), 0);
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().first,
size_t(1));
CHECK_EQ(grad_test_node0->GetEdges()[0][0].GetEdgeRankInfo().second,
size_t(2));
std::vector<egr::AutogradMeta*> metas = {auto_grad1.get()};
grad_test_node0->AddEdges(&metas, 1);
CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().first,
size_t(3));
CHECK_EQ(grad_test_node0->GetEdges()[1][0].GetEdgeRankInfo().second,
size_t(4));
VLOG(6) << "Test Set Meta and Get Meta";
auto_grad1->SetStopGradient(true);
......
......@@ -31,9 +31,12 @@ class GradTestNode : public egr::GradNodeBase {
: GradNodeBase(in_num, out_num), val_(val) {}
GradTestNode() : GradNodeBase() { val_ = 1.0; }
std::string name() override { return "GradTestNode"; }
std::vector<std::vector<paddle::experimental::Tensor>> operator()(
std::vector<std::vector<paddle::experimental::Tensor>>& grads, // NOLINT
bool create_graph = false, bool is_new_grad = false) override {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) override {
val_ = std::dynamic_pointer_cast<phi::DenseTensor>(grads[0][0].impl())
->data<float>()[0];
phi::DenseTensorMeta meta =
......@@ -46,7 +49,9 @@ class GradTestNode : public egr::GradNodeBase {
auto* dt_ptr = dt->mutable_data<float>(paddle::platform::CPUPlace());
dt_ptr[0] = 6.0f;
paddle::experimental::Tensor et1(dt);
std::vector<std::vector<paddle::experimental::Tensor>> res = {{et1}};
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
res = {{et1}};
return res;
}
void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; }
......
......@@ -45,7 +45,9 @@ TEST(GradTensorHolder, Constructor) {
meta);
paddle::experimental::Tensor et = paddle::experimental::Tensor(dt);
std::vector<std::vector<paddle::experimental::Tensor>> inputs;
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
inputs;
inputs.push_back({et});
GradTensorHolder grad_tensor_holder4 = GradTensorHolder(std::move(inputs));
......
......@@ -76,8 +76,7 @@ TEST(Backward, SingleNodeEmptyGrad) {
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta1};
node0_ptr->AddEdges(&res, 0);
node0_ptr->SetGradOutMeta({leaf_tensor}, 0);
}
std::vector<paddle::experimental::Tensor> outs = {target_tensor};
// Run Backward
......@@ -135,8 +134,7 @@ TEST(Backward, SingleNodeCustomGrad) {
std::dynamic_pointer_cast<GradNodeBase>(acc_node_ptr));
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta1};
node0_ptr->AddEdges(&res, 0);
node0_ptr->SetGradOutMeta({leaf_tensor}, 0);
}
// Run Backward
......@@ -191,12 +189,12 @@ TEST(Backward, LinearNodes) {
auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false);
// Connect Node0 -> Node1 via Edge
auto meta0 = egr::AutogradMeta();
meta0.SetStopGradient(false);
meta0.SetSingleOutRankWithSlot(0, 0);
meta0.SetGradNode(node1_ptr);
std::vector<egr::AutogradMeta*> res0 = {&meta0};
node0_ptr->AddEdges(&res0, 0);
auto tmp_tensor = paddle::experimental::Tensor();
auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor);
meta0->SetStopGradient(false);
meta0->SetSingleOutRankWithSlot(0, 0);
meta0->SetGradNode(node1_ptr);
node0_ptr->SetGradOutMeta(tmp_tensor, 0);
AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta
......@@ -208,8 +206,7 @@ TEST(Backward, LinearNodes) {
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res1 = {auto_grad_meta1};
node1_ptr->AddEdges(&res1, 0);
node1_ptr->SetGradOutMeta(leaf_tensor, 0);
}
// Use Empty Grad Tensor
......@@ -288,20 +285,20 @@ TEST(Backward, WithAccumulation) {
auto_grad_meta1->SetStopGradient(false);
// Connect Node0 -> Node2 via Edge
auto meta0 = egr::AutogradMeta();
meta0.SetStopGradient(false);
meta0.SetSingleOutRankWithSlot(0, 0);
meta0.SetGradNode(node2_ptr);
std::vector<egr::AutogradMeta*> res0 = {&meta0};
node0_ptr->AddEdges(&res0, 0);
auto tmp_tensor0 = paddle::experimental::Tensor();
auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor0);
meta0->SetStopGradient(false);
meta0->SetSingleOutRankWithSlot(0, 0);
meta0->SetGradNode(node2_ptr);
node0_ptr->SetGradOutMeta(tmp_tensor0, 0);
// Connect Node1 -> Node2 via Edge
auto meta1 = egr::AutogradMeta();
meta1.SetStopGradient(false);
meta1.SetSingleOutRankWithSlot(0, 0);
meta1.SetGradNode(node2_ptr);
std::vector<egr::AutogradMeta*> res1 = {&meta1};
node1_ptr->AddEdges(&res1, 0);
auto tmp_tensor1 = paddle::experimental::Tensor();
auto* meta1 = EagerUtils::autograd_meta(&tmp_tensor1);
meta1->SetStopGradient(false);
meta1->SetSingleOutRankWithSlot(0, 0);
meta1->SetGradNode(node2_ptr);
node1_ptr->SetGradOutMeta(tmp_tensor1, 0);
AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta
......@@ -314,7 +311,7 @@ TEST(Backward, WithAccumulation) {
auto_grad_meta2->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res2 = {auto_grad_meta2};
node2_ptr->AddEdges(&res2, 0);
node2_ptr->SetGradOutMeta(leaf_tensor, 0);
}
Backward(target_tensors, grad_tensors);
......
......@@ -69,7 +69,7 @@ TEST(CrossBatchAccumulation, SingleScaleNode) {
meta->SetSingleOutRankWithSlot(0, 0);
meta->SetGradNode(acc_node_ptr);
std::vector<egr::AutogradMeta*> res = {meta};
scale_node_ptr->AddEdges(&res, 0);
scale_node_ptr->SetGradOutMeta(leaf_tensor, 0);
Backward(target_tensors, {});
......
......@@ -251,10 +251,11 @@ TEST(EagerUtils, GetGradAccumulationNode) {
}
TEST(EagerUtils, FillZeroForEmptyGradInputs) {
std::vector<std::vector<paddle::experimental::Tensor>> grads = {
std::vector<paddle::experimental::Tensor>(1)};
std::vector<std::vector<GradSlotMeta>> slot_metas = {
std::vector<GradSlotMeta>(1)};
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
grads = {std::vector<paddle::experimental::Tensor>(1)};
paddle::small_vector<std::vector<GradSlotMeta>, egr::kSlotSmallVectorSize>
slot_metas = {std::vector<GradSlotMeta>(1)};
phi::DenseTensorMeta tensor_meta;
tensor_meta.dtype = paddle::experimental::DataType::FLOAT32;
......
......@@ -137,12 +137,16 @@ TEST(Forward, LinearNodes) {
// 2. TensorWrapper: No TensorWrapper for ScaleNode
// 3. NextEdges: Node 1 -> Node 0
const std::vector<std::vector<Edge>>& node1_edges = grad_node1->GetEdges();
const auto& node1_edge = node1_edges[0];
CHECK_EQ(static_cast<int>(node1_edge[0].GetEdgeRankInfo().first), 0);
CHECK_EQ(static_cast<int>(node1_edge[0].GetEdgeRankInfo().second), 0);
CHECK_EQ(node1_edge[0].GetGradNode(), grad_node0);
const paddle::small_vector<std::vector<GradSlotMeta>,
egr::kSlotSmallVectorSize>& node1_metas =
grad_node1->OutputMeta();
const auto& node1_meta = node1_metas[0];
CHECK_EQ(static_cast<int>(node1_meta[0].GetEdge().GetEdgeRankInfo().first),
0);
CHECK_EQ(static_cast<int>(node1_meta[0].GetEdge().GetEdgeRankInfo().second),
0);
CHECK_EQ(node1_meta[0].GetEdge().GetGradNode(), grad_node0);
}
}
......@@ -232,16 +236,19 @@ TEST(Forward, BranchedNodes) {
// 2. TensorWrapper: No TensorWrapper for ScaleNode
// 3. NextEdges
// Node 1 -> Node 0
const std::vector<std::vector<Edge>>& node1_edges = grad_node1->GetEdges();
const Edge& node1_edge = node1_edges[0][0];
const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
node1_metas = grad_node1->OutputMeta();
const Edge& node1_edge = node1_metas[0][0].GetEdge();
CHECK_EQ(static_cast<int>(node1_edge.GetEdgeRankInfo().first), 0);
CHECK_EQ(static_cast<int>(node1_edge.GetEdgeRankInfo().second), 0);
CHECK_EQ(node1_edge.GetGradNode(), grad_node0);
// Node 2 -> Node 0
const std::vector<std::vector<Edge>>& node2_edges = grad_node2->GetEdges();
const Edge& node2_edge = node2_edges[0][0];
const paddle::small_vector<std::vector<egr::GradSlotMeta>,
egr::kSlotSmallVectorSize>& node2_metas =
grad_node2->OutputMeta();
const Edge& node2_edge = node2_metas[0][0].GetEdge();
CHECK_EQ(static_cast<int>(node2_edge.GetEdgeRankInfo().first), 0);
CHECK_EQ(static_cast<int>(node2_edge.GetEdgeRankInfo().second), 0);
......
......@@ -87,7 +87,7 @@ TEST(Grad, SingleNodeEmptyGrad) {
// grad_node Add Edges
std::vector<egr::AutogradMeta*> res = {auto_grad_meta1};
node0_ptr->AddEdges(&res, 0);
node0_ptr->SetGradOutMeta(leaf_tensor, 0);
}
std::vector<paddle::experimental::Tensor> outs = {output_tensor};
......@@ -150,7 +150,7 @@ TEST(Grad, SingleNodeCustomGrad) {
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta1};
node0_ptr->AddEdges(&res, 0);
node0_ptr->SetGradOutMeta(leaf_tensor, 0);
}
auto result = Grad(target_tensors, {leaf_tensor}, grad_tensors);
......@@ -207,12 +207,12 @@ TEST(Grad, LinearNodes) {
auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta->SetStopGradient(false);
// Connect Node0 -> Node1 via Edge
auto meta0 = egr::AutogradMeta();
meta0.SetStopGradient(false);
meta0.SetSingleOutRankWithSlot(0, 0);
meta0.SetGradNode(node1_ptr);
std::vector<egr::AutogradMeta*> res0 = {&meta0};
node0_ptr->AddEdges(&res0, 0);
auto tmp_tensor = paddle::experimental::Tensor();
auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor);
meta0->SetStopGradient(false);
meta0->SetSingleOutRankWithSlot(0, 0);
meta0->SetGradNode(node1_ptr);
node0_ptr->SetGradOutMeta(tmp_tensor, 0);
AutogradMeta* auto_grad_meta1 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta
......@@ -224,8 +224,7 @@ TEST(Grad, LinearNodes) {
auto_grad_meta1->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta1->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res1 = {auto_grad_meta1};
node1_ptr->AddEdges(&res1, 0);
node1_ptr->SetGradOutMeta(leaf_tensor, 0);
}
// Use Empty Grad Tensor
......@@ -304,20 +303,20 @@ TEST(Grad, WithAccumulation) {
auto_grad_meta1->SetStopGradient(false);
// Connect Node0 -> Node2 via Edge
auto meta0 = egr::AutogradMeta();
meta0.SetStopGradient(false);
meta0.SetSingleOutRankWithSlot(0, 0);
meta0.SetGradNode(node2_ptr);
std::vector<egr::AutogradMeta*> res0 = {&meta0};
node0_ptr->AddEdges(&res0, 0);
auto tmp_tensor0 = paddle::experimental::Tensor();
auto* meta0 = EagerUtils::autograd_meta(&tmp_tensor0);
meta0->SetStopGradient(false);
meta0->SetSingleOutRankWithSlot(0, 0);
meta0->SetGradNode(node2_ptr);
node0_ptr->SetGradOutMeta(tmp_tensor0, 0);
// Connect Node1 -> Node2 via Edge
auto meta1 = egr::AutogradMeta();
meta1.SetStopGradient(false);
meta1.SetSingleOutRankWithSlot(0, 0);
meta1.SetGradNode(node2_ptr);
std::vector<egr::AutogradMeta*> res1 = {&meta1};
node1_ptr->AddEdges(&res1, 0);
auto tmp_tensor1 = paddle::experimental::Tensor();
auto meta1 = EagerUtils::autograd_meta(&tmp_tensor1);
meta1->SetStopGradient(false);
meta1->SetSingleOutRankWithSlot(0, 0);
meta1->SetGradNode(node2_ptr);
node1_ptr->SetGradOutMeta(tmp_tensor1, 0);
AutogradMeta* auto_grad_meta2 = EagerUtils::autograd_meta(&leaf_tensor);
// Connect Tensor and AccumulationNode via AutoGradMeta
......@@ -329,8 +328,7 @@ TEST(Grad, WithAccumulation) {
auto_grad_meta2->SetSingleOutRankWithSlot(0, 0);
auto_grad_meta2->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res2 = {auto_grad_meta2};
node2_ptr->AddEdges(&res2, 0);
node2_ptr->SetGradOutMeta(leaf_tensor, 0);
}
auto result = Grad(target_tensors, {leaf_tensor}, grad_tensors);
......
......@@ -110,21 +110,20 @@ TEST(RetainGrad, HookBeforeRetainGrad) {
paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor();
{
// AccumulationNode Hook: +3
auto tmp_tensor0 = paddle::experimental::Tensor();
auto auto_grad_meta = EagerUtils::autograd_meta(&tmp_tensor0);
auto auto_grad_meta = std::make_shared<AutogradMeta>();
auto acc_node_ptr =
std::make_shared<GradNodeAccumulation>(auto_grad_meta.get());
auto acc_node_ptr = std::make_shared<GradNodeAccumulation>(auto_grad_meta);
auto_grad_meta->SetStopGradient(false);
auto_grad_meta->SetGradNode(acc_node_ptr);
auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta.get()};
scale_node_ptr->AddEdges(&res, 0);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta};
scale_node_ptr->SetGradOutMeta(tmp_tensor0, 0);
leaf_tensor.set_autograd_meta(
std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
auto_grad_meta));
tmp_tensor0.mutable_autograd_meta()));
egr_utils_api::RegisterGradientHookForTensor(
leaf_tensor, std::make_shared<egr::CppTensorHook>(hook_function));
......@@ -181,19 +180,17 @@ TEST(RetainGrad, HookAfterRetainGrad) {
paddle::experimental::Tensor leaf_tensor = paddle::experimental::Tensor();
{
// AccumulationNode Hook: +3
auto auto_grad_meta = std::make_shared<AutogradMeta>();
auto acc_node_ptr =
std::make_shared<GradNodeAccumulation>(auto_grad_meta.get());
auto tmp_tensor0 = paddle::experimental::Tensor();
auto auto_grad_meta = EagerUtils::autograd_meta(&tmp_tensor0);
auto acc_node_ptr = std::make_shared<GradNodeAccumulation>(auto_grad_meta);
auto_grad_meta->SetGradNode(acc_node_ptr);
auto_grad_meta->SetStopGradient(false);
std::vector<egr::AutogradMeta*> res = {auto_grad_meta.get()};
scale_node_ptr->AddEdges(&res, 0);
scale_node_ptr->SetGradOutMeta(tmp_tensor0, 0);
auto_grad_meta->SetSingleOutRankWithSlot(0, 0);
leaf_tensor.set_autograd_meta(
std::dynamic_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
auto_grad_meta));
tmp_tensor0.mutable_autograd_meta()));
egr_utils_api::RegisterGradientHookForTensor(
leaf_tensor, std::make_shared<egr::CppTensorHook>(hook_function));
......
......@@ -69,9 +69,6 @@ inline void run_program_dygraph_function(
grad_node->SetGradOutMeta(params, /*slot id*/ 1);
grad_node->SetGradInMeta(deref_out, 0);
// Set Next Edges
grad_node->AddEdges(&p_autograd_x, /*slot id*/ 0);
grad_node->AddEdges(&p_autograd_params, /*slot id*/ 1);
egr::EagerUtils::SetOutRankWithSlot(&p_autograd_outs, 0);
......
......@@ -364,12 +364,16 @@ class GradNodeRunProgram : public egr::GradNodeBase {
~GradNodeRunProgram() override = default;
// Functor: perform backward computations
virtual std::vector<std::vector<paddle::experimental::Tensor>> operator()(
std::vector<std::vector<paddle::experimental::Tensor>> &grads, // NOLINT
bool create_graph, bool is_new_grad) override {
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
operator()(paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize> &grads, // NOLINT
bool create_graph,
bool is_new_grad) override {
VLOG(3) << "Running Eager Backward Node: GradNodeRunProgram";
std::vector<std::vector<paddle::experimental::Tensor>> hooked_grads =
GradNodeRunProgram::ApplyGradientHooks(grads);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
egr::kSlotSmallVectorSize>
hooked_grads = GradNodeRunProgram::ApplyGradientHooks(grads);
PADDLE_ENFORCE_EQ(hooked_grads.size(), 1,
paddle::platform::errors::InvalidArgument(
"The hooked_grads.size() of RunProgramGradOp should "
......
......@@ -441,8 +441,10 @@ std::shared_ptr<egr::GradNodeBase> EagerUtils::GetGradAccumulationNode(
}
void EagerUtils::FillZeroForEmptyGradInputs(
std::vector<std::vector<paddle::experimental::Tensor>>* in_grads,
const std::vector<std::vector<GradSlotMeta>>& grad_in_metas) {
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>* in_grads,
const paddle::small_vector<std::vector<GradSlotMeta>, kSlotSmallVectorSize>&
grad_in_metas) {
for (size_t i = 0; i < in_grads->size(); i++) {
for (size_t j = 0; j < (*in_grads)[i].size(); j++) {
paddle::experimental::Tensor& grad = (*in_grads)[i][j];
......
......@@ -234,8 +234,10 @@ class EagerUtils {
* Fill Zero
* **/
static void FillZeroForEmptyGradInputs(
std::vector<std::vector<paddle::experimental::Tensor>>* out_grads,
const std::vector<std::vector<GradSlotMeta>>& grad_out_metas);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>* out_grads,
const paddle::small_vector<std::vector<GradSlotMeta>,
kSlotSmallVectorSize>& grad_out_metas);
};
} // namespace egr
......@@ -406,12 +406,9 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
if (slot_map[0].find(i) != slot_map[0].end()) {
grad_node->SetGradOutMeta(in_tensors, slot_map[0][i]);
grad_node->AddEdges(&ins_auto_grad_metas[i], slot_map[0][i]);
} else {
grad_node->SetGradOutMeta(in_tensors,
ins_auto_grad_metas.size() - 1 - no_grad_cnt);
grad_node->AddEdges(&ins_auto_grad_metas[i],
ins_auto_grad_metas.size() - 1 - no_grad_cnt);
no_grad_cnt++;
}
}
......
......@@ -346,10 +346,8 @@ PyObject* pylayer_method_apply(PyObject* cls, PyObject* args,
for (auto t : inputs_tensor[i]) {
grad_node->SetGradOutMeta(*t, i);
}
grad_node->AddEdges(&inputs_autograd_meta[i], i);
} else {
grad_node->SetGradOutMeta(*inputs_tensor[i][0], i);
grad_node->AddEdges(inputs_autograd_meta[i][0], i);
}
}
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册