未验证 提交 00ecb98f 编写于 作者: J Jiabin Yang 提交者: GitHub

support custom operator run in double grad mode (#42653)

上级 6c696db1
...@@ -77,7 +77,8 @@ class Controller { ...@@ -77,7 +77,8 @@ class Controller {
op_meta_info_map_.insert(map.begin(), map.end()); op_meta_info_map_.insert(map.begin(), map.end());
} }
std::unordered_map<std::string, std::vector<std::unordered_map<int, int>>>& std::unordered_map<std::string,
std::vector<std::vector<std::unordered_map<int, int>>>>&
GetCustomEdgesSlotMap() { GetCustomEdgesSlotMap() {
return custom_edges_slot_map_; return custom_edges_slot_map_;
} }
...@@ -89,8 +90,10 @@ class Controller { ...@@ -89,8 +90,10 @@ class Controller {
new paddle::imperative::Tracer()}; new paddle::imperative::Tracer()};
std::unordered_map<std::string, std::vector<paddle::OpMetaInfo>> std::unordered_map<std::string, std::vector<paddle::OpMetaInfo>>
op_meta_info_map_; op_meta_info_map_;
/* op_type : {{grad_outputs}, {grad_inputs}, {input}, {output}, {attrs}}*/ /* op_type : {{{grad_outputs}, {grad_inputs}, {input}, {output}, {attrs}},
std::unordered_map<std::string, std::vector<std::unordered_map<int, int>>> * {{grad_outputs}, {grad_inputs}, {input}, {output}, {attrs}}}*/
std::unordered_map<std::string,
std::vector<std::vector<std::unordered_map<int, int>>>>
custom_edges_slot_map_; custom_edges_slot_map_;
DISABLE_COPY_AND_ASSIGN(Controller); DISABLE_COPY_AND_ASSIGN(Controller);
}; };
......
...@@ -698,8 +698,6 @@ std::vector<paddle::experimental::Tensor> RunBackward( ...@@ -698,8 +698,6 @@ std::vector<paddle::experimental::Tensor> RunBackward(
} }
} }
VLOG(6) << "Running GradNode:" << node->name();
// Check input // Check input
EnforceGradNodeHasInput(node); EnforceGradNodeHasInput(node);
......
...@@ -15,10 +15,151 @@ ...@@ -15,10 +15,151 @@
#include "paddle/fluid/eager/custom_operator/custom_operator_node.h" #include "paddle/fluid/eager/custom_operator/custom_operator_node.h"
#include "paddle/fluid/framework/custom_operator.h" #include "paddle/fluid/framework/custom_operator.h"
#include "paddle/fluid/framework/op_meta_info_helper.h" #include "paddle/fluid/framework/op_meta_info_helper.h"
#include "paddle/fluid/platform/profiler/event_tracing.h"
#include "paddle/phi/api/ext/op_meta_info.h" #include "paddle/phi/api/ext/op_meta_info.h"
#include "paddle/phi/core/dense_tensor.h" #include "paddle/phi/core/dense_tensor.h"
namespace egr { namespace egr {
static void ConstructFwdAndBwdMap(
const std::vector<paddle::OpMetaInfo>& vec_map,
const std::string& op_type) {
auto& in_out_map = egr::Controller::Instance().GetCustomEdgesSlotMap();
if (in_out_map.find(op_type) != in_out_map.end()) {
if (in_out_map[op_type].size() == 2) {
VLOG(7) << "Find Exist CustomEdgesSlotMap Skip >>>> ";
return;
}
}
VLOG(7) << "Construct DoubleGrad's CustomEdgesSlotMap ";
auto inputs_names =
paddle::framework::OpMetaInfoHelper::GetInputs(vec_map[1]);
auto outputs_names =
paddle::framework::OpMetaInfoHelper::GetOutputs(vec_map[1]);
auto attrs_names = paddle::framework::OpMetaInfoHelper::GetAttrs(vec_map[1]);
auto grad_outputs_names =
paddle::framework::OpMetaInfoHelper::GetOutputs(vec_map[2]);
auto grad_inputs_names =
paddle::framework::OpMetaInfoHelper::GetInputs(vec_map[2]);
auto grad_attrs_names =
paddle::framework::OpMetaInfoHelper::GetAttrs(vec_map[2]);
std::vector<std::unordered_map<int, int>> res(5);
in_out_map[op_type].push_back(res);
// Prepare pos map for grad_outputs
VLOG(7) << "Prepare pos map for grad_outputs";
PADDLE_ENFORCE_LE(
grad_outputs_names.size(), inputs_names.size(),
paddle::platform::errors::InvalidArgument(
"Grad outputs num should be less equal than forward inputs num."));
for (size_t i = 0; i < grad_outputs_names.size(); i++) {
auto end = grad_outputs_names[i].find("@GRAD@GRAD");
if (end != std::string::npos) {
for (size_t j = 0; j < inputs_names.size(); j++) {
if (grad_outputs_names[i].substr(0, end + 5) == inputs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "_grad "
<< "'s No." << j << " inputs: " << inputs_names[j]
<< " related to No." << i
<< " grad_outputs: " << grad_outputs_names[i];
in_out_map[op_type][1][0][j] = i;
}
}
} else {
size_t end_n = grad_outputs_names[i].find("@GRAD@NEW");
if (end_n != std::string::npos) {
for (size_t j = 0; j < inputs_names.size(); j++) {
if (grad_outputs_names[i].substr(0, end_n) == inputs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "_grad "
<< "'s No." << j << " inputs: " << inputs_names[j]
<< " related to No." << i
<< " grad_outputs: " << grad_outputs_names[i];
in_out_map[op_type][1][0][j] = i;
}
}
} else {
size_t end_one_grad = grad_outputs_names[i].find("@GRAD");
if (end_one_grad != std::string::npos) {
for (size_t j = 0; j < inputs_names.size(); j++) {
if (grad_outputs_names[i].substr(0, end_one_grad) ==
inputs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "_grad "
<< "'s No." << j << " inputs: " << inputs_names[j]
<< " related to No." << i
<< " grad_outputs: " << grad_outputs_names[i];
in_out_map[op_type][1][0][j] = i;
}
}
} else {
PADDLE_THROW(paddle::platform::errors::NotFound(
"All Grad outputs should be end of @GRAD@GRAD or @GRAD@NEW or "
"@GRAD and we got %s is not one of them, "
"please check your op and change to fit the rule.",
grad_outputs_names[i]));
}
}
}
}
// Prepare pos map for grad_inputs
for (size_t i = 0; i < grad_inputs_names.size(); i++) {
size_t end = grad_inputs_names[i].find("@GRAD@GRAD");
if (end != std::string::npos) {
for (size_t j = 0; j < outputs_names.size(); j++) {
if (grad_inputs_names[i].substr(0, end + 5) == outputs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "_grad "
<< "'s No." << j << " outputs: " << outputs_names[j]
<< " related to No." << i
<< " grad_inputs's grad: " << grad_inputs_names[i];
in_out_map[op_type][1][1][j] = i;
}
}
} else {
if (std::find(outputs_names.begin(), outputs_names.end(),
grad_inputs_names[i]) != outputs_names.end()) {
for (size_t j = 0; j < outputs_names.size(); j++) {
if (grad_inputs_names[i] == outputs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "_grad "
<< "'s No." << j << " outputs: " << outputs_names[j]
<< " related to No." << i
<< " grad_inputs fwd outputs: " << grad_inputs_names[i];
in_out_map[op_type][1][2][j] = i;
}
}
} else {
for (size_t j = 0; j < inputs_names.size(); j++) {
if (grad_inputs_names[i] == inputs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "_grad "
<< "'s No." << j << " inputs: " << inputs_names[j]
<< " related to No." << i
<< " grad_inputs fwd inputs: " << grad_inputs_names[i];
in_out_map[op_type][1][3][j] = i;
}
}
}
}
}
// Prepare pos map for grad attrs_
for (size_t i = 0; i < grad_attrs_names.size(); i++) {
auto end =
std::find(attrs_names.begin(), attrs_names.end(), grad_attrs_names[i]);
PADDLE_ENFORCE_NE(end, attrs_names.end(),
paddle::platform::errors::NotFound(
"All Grad attrs should be one of forward attrs and "
"we got %s is not one of them, please check your "
"op and change to fit the rule.",
grad_attrs_names[i]));
for (size_t j = 0; j < attrs_names.size(); j++) {
if (grad_attrs_names[i] == attrs_names[j]) {
VLOG(7) << " ==== Custom Operator: " << op_type << "_grad "
<< "'s No." << j << " attrs: " << attrs_names[j]
<< " related to No." << i
<< " grad_attrs: " << grad_attrs_names[i];
in_out_map[op_type][1][4][j] = i;
}
}
}
}
paddle::small_vector<std::vector<paddle::experimental::Tensor>, paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize> kSlotSmallVectorSize>
RunCustomOpNode::operator()( RunCustomOpNode::operator()(
...@@ -38,10 +179,11 @@ RunCustomOpNode::operator()( ...@@ -38,10 +179,11 @@ RunCustomOpNode::operator()(
tmp_ins(grad_inputs_name.size()); tmp_ins(grad_inputs_name.size());
VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size() VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size()
<< ", whose grad_inputs_name size is: " << grad_inputs_name.size(); << ", whose grad_inputs_name size is: " << grad_inputs_name.size();
for (size_t i = 0; i < grads.size(); i++) { auto hooked_grads = ApplyGradientHooks(grads);
if (map[1].find(i) != map[1].end()) { for (size_t i = 0; i < hooked_grads.size(); i++) {
VLOG(7) << "Insert grad: " << i << " to grad_inputs: " << map[1][i]; if (map[0][1].find(i) != map[0][1].end()) {
tmp_ins[map[1][i]] = grads[i]; VLOG(7) << "Insert grad: " << i << " to grad_inputs: " << map[0][1][i];
tmp_ins[map[0][1][i]] = hooked_grads[i];
} }
} }
...@@ -69,28 +211,218 @@ RunCustomOpNode::operator()( ...@@ -69,28 +211,218 @@ RunCustomOpNode::operator()(
tmp_outs(grad_outputs_names.size()); tmp_outs(grad_outputs_names.size());
VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size(); VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size();
for (size_t i = 0; i < OutputMeta().size(); i++) { for (size_t i = 0; i < OutputMeta().size(); i++) {
if (map[0].find(i) != map[0].end()) { if (map[0][0].find(i) != map[0][0].end()) {
VLOG(7) << "Insert grad outputs: " << i VLOG(7) << "Insert grad outputs: " << i
<< " with size: " << OutputMeta()[i].size() << " with size: " << OutputMeta()[i].size()
<< " to tmp_outputs: " << map[0][i]; << " to tmp_outputs: " << map[0][0][i];
for (size_t j = 0; j < OutputMeta()[i].size(); j++) { for (size_t j = 0; j < OutputMeta()[i].size(); j++) {
outs[i].emplace_back(/* init it incase of copy nullptr of shared_ptr */ outs[i].emplace_back(/* init it incase of copy nullptr of shared_ptr */
std::make_shared<phi::DenseTensor>( std::make_shared<phi::DenseTensor>(
phi::DataType::UNDEFINED), phi::DataType::UNDEFINED),
egr::Controller::Instance().GenerateUniqueName( egr::Controller::Instance().GenerateUniqueName(
"custom_tmp_grad")); "custom_tmp_grad"));
egr::EagerUtils::autograd_meta(&(outs[i][j]));
} }
tmp_outs[map[0][i]] = outs[i]; tmp_outs[map[0][0][i]] = outs[i];
} }
} }
for (size_t i = 0; i < tmp_outs.size(); i++) { for (size_t i = 0; i < tmp_outs.size(); i++) {
VLOG(7) << "Prepare grad outputs size: " << tmp_outs[i].size(); VLOG(7) << "Prepare grad outputs size: " << tmp_outs[i].size();
ctx.EmplaceBackOutputs(tmp_outs[i]); ctx.EmplaceBackOutputs(tmp_outs[i]);
} }
VLOG(7) << "Run Kernel of Grad Custom Op: " << op_type_; VLOG(7) << "Run Kernel of Grad Custom Op: " << op_type_ << "_grad";
(*paddle::framework::OpMetaInfoHelper::GetKernelFn( (*paddle::framework::OpMetaInfoHelper::GetKernelFn(
kernel_map.at(op_type_)[1]))(&ctx); kernel_map.at(op_type_)[1]))(&ctx);
VLOG(7) << "Get AutogradMeta for inputs and outputs for Custom Op";
std::vector<std::vector<egr::AutogradMeta*>> ins_auto_grad_metas;
std::vector<std::vector<egr::AutogradMeta*>> outs_auto_grad_metas;
VLOG(7) << "We got slot num of ins is: " << ctx.InputRange().size();
ins_auto_grad_metas.resize(ctx.InputRange().size());
VLOG(7) << "We got slot num of outs is: " << ctx.OutputRange().size();
outs_auto_grad_metas.resize(ctx.OutputRange().size());
for (size_t i = 0; i < ctx.InputRange().size(); i++) {
ins_auto_grad_metas[i] =
egr::EagerUtils::nullable_autograd_meta(ctx.InputsBetween(
ctx.InputRangeAt(i).first, ctx.InputRangeAt(i).second));
}
for (size_t i = 0; i < ctx.OutputRange().size(); i++) {
outs_auto_grad_metas[i] =
egr::EagerUtils::unsafe_autograd_meta(ctx.OutputsBetweeen(
ctx.OutputRangeAt(i).first, ctx.OutputRangeAt(i).second));
}
bool require_any_grad = false;
bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;
for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) {
require_any_grad =
require_any_grad || egr::EagerUtils::ComputeRequireGrad(
trace_backward, &(ins_auto_grad_metas[i]));
}
if (require_any_grad) {
auto meta_info_map = egr::Controller::Instance().GetOpMetaInfoMap();
const auto& vec_map = meta_info_map.at(op_type_);
paddle::platform::RecordEvent node_creation_record_event(
"Custom Op " + op_type_ + " double_grad node_creation",
paddle::platform::TracerEventType::OperatorInner, 1);
VLOG(6) << " Construct Grad for Custom Op: " << op_type_;
ConstructFwdAndBwdMap(vec_map, op_type_);
for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) {
egr::EagerUtils::PassStopGradient(false, &(outs_auto_grad_metas[i]));
}
auto grad_node = std::make_shared<egr::RunCustomOpDoubleGradNode>(
outs_auto_grad_metas.size(), ins_auto_grad_metas.size(), op_type_);
auto slot_map =
egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type_);
// Prepare Grad outputs
size_t no_grad_cnt = 0;
for (size_t i = 0; i < ins_auto_grad_metas.size(); i++) {
const std::vector<paddle::experimental::Tensor>& in_tensors =
ctx.InputsBetween(ctx.InputRangeAt(i).first,
ctx.InputRangeAt(i).second);
if (slot_map[1][0].find(i) != slot_map[1][0].end()) {
grad_node->SetGradOutMeta(in_tensors, slot_map[1][0][i]);
} else {
grad_node->SetGradOutMeta(in_tensors,
ins_auto_grad_metas.size() - 1 - no_grad_cnt);
no_grad_cnt++;
}
}
// Prepare Grad inputs with grad of fwd outputs
for (size_t i = 0; i < outs_auto_grad_metas.size(); i++) {
const std::vector<paddle::experimental::Tensor>& out_tensors =
ctx.OutputsBetweeen(ctx.OutputRangeAt(i).first,
ctx.OutputRangeAt(i).second);
egr::EagerUtils::SetOutRankWithSlot(&(outs_auto_grad_metas[i]), i);
egr::EagerUtils::SetHistory(&(outs_auto_grad_metas[i]), grad_node);
grad_node->SetGradInMeta(out_tensors, i);
egr::EagerUtils::CheckAndRetainGrad(out_tensors);
}
// Prepare Grad inputs with fwd outputs
for (auto it = slot_map[1][2].begin(); it != slot_map[1][2].end(); it++) {
VLOG(7) << "Prepare fwd_outs: " << it->first
<< " to grad_inputs: " << it->second;
grad_node->fwd_outs[it->second] =
egr::RunCustomOpNode::ConstructTensorWrapper(
ctx.OutputsBetweeen(ctx.OutputRangeAt(it->first).first,
ctx.OutputRangeAt(it->first).second));
}
// Prepare Grad inputs with fwd inputs
for (auto it = slot_map[1][3].begin(); it != slot_map[1][3].end(); it++) {
VLOG(7) << "Prepare fwd_ins: " << it->first
<< " to grad_inputs: " << it->second;
grad_node->fwd_ins[it->second] =
egr::RunCustomOpNode::ConstructTensorWrapper(
ctx.InputsBetween(ctx.InputRangeAt(it->first).first,
ctx.InputRangeAt(it->first).second));
}
auto attrs_names = paddle::framework::OpMetaInfoHelper::GetAttrs(
meta_info_map.at(op_type_)[2]);
std::vector<paddle::any> attrs(attrs_names.size());
// Prepare attrs for Grad node
for (auto it = slot_map[1][4].begin(); it != slot_map[1][4].end(); it++) {
VLOG(7) << "Prepare fwd attrs: " << it->first
<< " to grad_attrs: " << it->second;
attrs[it->second] = attrs_[it->first];
}
grad_node->SetAttrs(attrs);
}
return outs;
}
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
RunCustomOpDoubleGradNode::operator()(
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads,
bool create_graph, bool is_new_grad) { // NOLINT
paddle::CustomOpKernelContext ctx;
auto meta_info_map = egr::Controller::Instance().GetOpMetaInfoMap();
const auto& vec_map = meta_info_map.at(op_type_);
auto grad_inputs_name =
paddle::framework::OpMetaInfoHelper::GetInputs(vec_map[2]);
auto grad_outputs_names =
paddle::framework::OpMetaInfoHelper::GetOutputs(vec_map[2]);
auto map = egr::Controller::Instance().GetCustomEdgesSlotMap().at(op_type_);
auto kernel_map = egr::Controller::Instance().GetOpMetaInfoMap();
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
tmp_ins(grad_inputs_name.size());
VLOG(7) << " Prepare Backward inputs of grads with size: " << grads.size()
<< ", whose grad_inputs_name size is: " << grad_inputs_name.size();
auto hooked_grads = ApplyGradientHooks(grads);
for (size_t i = 0; i < hooked_grads.size(); i++) {
if (map[1][1].find(i) != map[1][1].end()) {
VLOG(7) << "Insert grad: " << i << " to grad_inputs: " << map[1][1][i];
tmp_ins[map[1][1][i]] = hooked_grads[i];
}
}
for (auto it : fwd_outs) {
VLOG(7) << "Insert fwd_outs to grad_inputs: " << it.first;
tmp_ins[it.first] = RunCustomOpDoubleGradNode::Recover(&(it.second));
}
for (auto it : fwd_ins) {
VLOG(7) << "Insert fwd_ins to grad_inputs: " << it.first;
tmp_ins[it.first] = RunCustomOpDoubleGradNode::Recover(&(it.second));
}
VLOG(6) << "Prepare Grad inputs";
for (const auto& in : tmp_ins) {
ctx.EmplaceBackInputs(in);
}
VLOG(6) << "Prepare Grad attrs";
ctx.EmplaceBackAttrs(attrs_);
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
outs(OutputMeta().size());
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
tmp_outs(grad_outputs_names.size());
VLOG(6) << "Prepare Grad outputs for size: " << grad_outputs_names.size();
for (const auto& name : grad_outputs_names) {
VLOG(6) << "Prepare Grad outputs name is: " << name;
}
for (size_t i = 0; i < OutputMeta().size(); i++) {
if (map[1][0].find(i) != map[1][0].end()) {
VLOG(7) << "Insert grad outputs: " << i
<< " with size: " << OutputMeta()[i].size()
<< " to tmp_outputs: " << map[1][0][i];
for (size_t j = 0; j < OutputMeta()[i].size(); j++) {
outs[i].emplace_back(/* init it incase of copy nullptr of shared_ptr */
std::make_shared<phi::DenseTensor>(
phi::DataType::UNDEFINED),
egr::Controller::Instance().GenerateUniqueName(
"custom_tmp_grad"));
}
tmp_outs[map[1][0][i]] = outs[i];
}
}
for (size_t i = 0; i < tmp_outs.size(); i++) {
VLOG(7) << "Prepare grad outputs size: " << tmp_outs[i].size();
ctx.EmplaceBackOutputs(tmp_outs[i]);
}
VLOG(7) << "Run Kernel of Grad Custom Op: " << name();
(*paddle::framework::OpMetaInfoHelper::GetKernelFn(
kernel_map.at(op_type_)[2]))(&ctx);
return outs; return outs;
} }
} // namespace egr } // namespace egr
...@@ -67,7 +67,11 @@ class RunCustomOpNode : public GradNodeBase { ...@@ -67,7 +67,11 @@ class RunCustomOpNode : public GradNodeBase {
return res; return res;
} }
void ClearTensorWrappers() override { VLOG(6) << "Do nothing here now"; } void ClearTensorWrappers() override {
fwd_outs.clear();
fwd_ins.clear();
grads2grad_in_map.clear();
}
void SetAttrs(const std::vector<paddle::any>& attr) { attrs_ = attr; } void SetAttrs(const std::vector<paddle::any>& attr) { attrs_ = attr; }
...@@ -87,4 +91,75 @@ class RunCustomOpNode : public GradNodeBase { ...@@ -87,4 +91,75 @@ class RunCustomOpNode : public GradNodeBase {
std::string op_type_{""}; std::string op_type_{""};
}; };
class RunCustomOpDoubleGradNode : public GradNodeBase {
public:
// Constructor: configure fwd input tensors to grad node
explicit RunCustomOpDoubleGradNode(size_t bwd_in_slot_num,
size_t bwd_out_slot_num,
const std::string& op_type)
: GradNodeBase(bwd_in_slot_num, bwd_out_slot_num), op_type_(op_type) {
VLOG(6) << "Construct RunCustomOpDoubleGradNode for op: " << op_type;
}
~RunCustomOpDoubleGradNode() override {
VLOG(6) << "Destruct RunCustomOpDoubleGradNode for op: " << op_type_;
}
// Functor: perform backward computations
virtual paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>
operator()( // NOLINT
paddle::small_vector<std::vector<paddle::experimental::Tensor>,
kSlotSmallVectorSize>& grads, // NOLINT
bool create_graph = false,
bool is_new_grad = false) // NOLINT
override;
std::string name() {
return paddle::string::Sprintf("RunCustomOpDoubleGradNode: %s_grad_grad",
op_type_);
}
static std::vector<egr::TensorWrapper> ConstructTensorWrapper(
const std::vector<paddle::experimental::Tensor>& fwd_var) {
std::vector<egr::TensorWrapper> res;
for (auto const& var : fwd_var) {
res.emplace_back(var);
}
return res;
}
static std::vector<paddle::experimental::Tensor> Recover(
std::vector<egr::TensorWrapper>* fwd_var) {
std::vector<paddle::experimental::Tensor> res;
for (size_t i = 0; i < fwd_var->size(); i++) {
res.emplace_back(fwd_var->at(i).recover());
}
return res;
}
void ClearTensorWrappers() override {
fwd_outs.clear();
fwd_ins.clear();
grads2grad_in_map.clear();
}
void SetAttrs(const std::vector<paddle::any>& attr) { attrs_ = attr; }
std::shared_ptr<GradNodeBase> Copy() const override {
auto copied_node = std::shared_ptr<RunCustomOpDoubleGradNode>(
new RunCustomOpDoubleGradNode(*this));
return copied_node;
}
public:
std::unordered_map<int, std::vector<egr::TensorWrapper>> fwd_outs;
std::unordered_map<int, std::vector<egr::TensorWrapper>> fwd_ins;
std::unordered_map<int, int> grads2grad_in_map;
private:
std::vector<paddle::any> attrs_;
std::string op_type_{""};
};
} // namespace egr } // namespace egr
...@@ -119,18 +119,24 @@ class TensorWrapper { ...@@ -119,18 +119,24 @@ class TensorWrapper {
paddle::experimental::Tensor recovered_tensor = intermidiate_tensor_; paddle::experimental::Tensor recovered_tensor = intermidiate_tensor_;
std::shared_ptr<GradNodeBase> new_grad_node = weak_grad_node_.lock(); std::shared_ptr<GradNodeBase> new_grad_node = weak_grad_node_.lock();
auto* intermediate_autograd_meta =
EagerUtils::unsafe_autograd_meta(intermidiate_tensor_);
auto p_ab_autograd_meta =
std::make_shared<AutogradMeta>(*intermediate_autograd_meta);
if (new_grad_node) { if (new_grad_node) {
VLOG(3) << "Recovered TensorWrapper with GradNode " VLOG(3) << "Recovered TensorWrapper with GradNode "
<< new_grad_node->name() << " addr: " << new_grad_node.get(); << new_grad_node->name() << " addr: " << new_grad_node.get();
p_ab_autograd_meta->SetGradNode(new_grad_node);
} else { } else {
VLOG(3) << "Recovered TensorWrapper with Empth GradNode"; VLOG(3) << "Recovered TensorWrapper with Empty GradNode";
}
auto* intermediate_autograd_meta =
EagerUtils::nullable_autograd_meta(intermidiate_tensor_);
if (intermediate_autograd_meta) {
auto p_ab_autograd_meta =
std::make_shared<AutogradMeta>(*intermediate_autograd_meta);
if (new_grad_node) {
p_ab_autograd_meta->SetGradNode(new_grad_node);
}
recovered_tensor.set_autograd_meta(p_ab_autograd_meta);
} }
recovered_tensor.set_autograd_meta(p_ab_autograd_meta);
return recovered_tensor; return recovered_tensor;
} }
} }
......
...@@ -157,7 +157,7 @@ void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas, ...@@ -157,7 +157,7 @@ void EagerUtils::SetHistory(std::vector<AutogradMeta*>* autograd_metas,
if (autograd_meta->GradNode()) { if (autograd_meta->GradNode()) {
VLOG(7) << "Should not set grad node twice, original node is:" VLOG(7) << "Should not set grad node twice, original node is:"
<< autograd_meta->GradNode()->name() << autograd_meta->GradNode()->name()
<< "current is: " << grad_node->name(); << " current is: " << grad_node->name();
} }
autograd_meta->SetGradNode(grad_node); autograd_meta->SetGradNode(grad_node);
} }
......
...@@ -207,7 +207,8 @@ static void ConstructFwdAndBwdMap( ...@@ -207,7 +207,8 @@ static void ConstructFwdAndBwdMap(
auto grad_attrs_names = auto grad_attrs_names =
paddle::framework::OpMetaInfoHelper::GetAttrs(vec_map[1]); paddle::framework::OpMetaInfoHelper::GetAttrs(vec_map[1]);
std::vector<std::unordered_map<int, int>> res(5); std::vector<std::unordered_map<int, int>> res(5);
in_out_map.insert({op_type, res});
in_out_map.insert({op_type, {res}});
// Prepare pos map for grad_outputs // Prepare pos map for grad_outputs
VLOG(7) << "Prepare pos map for grad_outputs"; VLOG(7) << "Prepare pos map for grad_outputs";
PADDLE_ENFORCE_LE( PADDLE_ENFORCE_LE(
...@@ -227,7 +228,7 @@ static void ConstructFwdAndBwdMap( ...@@ -227,7 +228,7 @@ static void ConstructFwdAndBwdMap(
VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
<< " inputs: " << inputs_names[j] << " related to No." << i << " inputs: " << inputs_names[j] << " related to No." << i
<< " grad_outputs: " << grad_outputs_names[i]; << " grad_outputs: " << grad_outputs_names[i];
in_out_map[op_type][0][j] = i; in_out_map[op_type][0][0][j] = i;
} }
} }
} }
...@@ -240,7 +241,7 @@ static void ConstructFwdAndBwdMap( ...@@ -240,7 +241,7 @@ static void ConstructFwdAndBwdMap(
VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
<< " outputs: " << outputs_names[j] << " related to No." << " outputs: " << outputs_names[j] << " related to No."
<< i << " grad_inputs's grad: " << grad_inputs_names[i]; << i << " grad_inputs's grad: " << grad_inputs_names[i];
in_out_map[op_type][1][j] = i; in_out_map[op_type][0][1][j] = i;
} }
} }
} else { } else {
...@@ -252,7 +253,7 @@ static void ConstructFwdAndBwdMap( ...@@ -252,7 +253,7 @@ static void ConstructFwdAndBwdMap(
<< " outputs: " << outputs_names[j] << " related to No." << " outputs: " << outputs_names[j] << " related to No."
<< i << i
<< " grad_inputs fwd outputs: " << grad_inputs_names[i]; << " grad_inputs fwd outputs: " << grad_inputs_names[i];
in_out_map[op_type][2][j] = i; in_out_map[op_type][0][2][j] = i;
} }
} }
} else { } else {
...@@ -262,7 +263,7 @@ static void ConstructFwdAndBwdMap( ...@@ -262,7 +263,7 @@ static void ConstructFwdAndBwdMap(
<< " inputs: " << inputs_names[j] << " related to No." << " inputs: " << inputs_names[j] << " related to No."
<< i << i
<< " grad_inputs fwd inputs: " << grad_inputs_names[i]; << " grad_inputs fwd inputs: " << grad_inputs_names[i];
in_out_map[op_type][3][j] = i; in_out_map[op_type][0][3][j] = i;
} }
} }
} }
...@@ -284,7 +285,7 @@ static void ConstructFwdAndBwdMap( ...@@ -284,7 +285,7 @@ static void ConstructFwdAndBwdMap(
VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j VLOG(7) << " ==== Custom Operator: " << op_type << "'s No." << j
<< " attrs: " << attrs_names[j] << " related to No." << i << " attrs: " << attrs_names[j] << " related to No." << i
<< " grad_attrs: " << grad_attrs_names[i]; << " grad_attrs: " << grad_attrs_names[i];
in_out_map[op_type][4][j] = i; in_out_map[op_type][0][4][j] = i;
} }
} }
} }
...@@ -402,8 +403,8 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, ...@@ -402,8 +403,8 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
ctx.InputsBetween(ctx.InputRangeAt(i).first, ctx.InputsBetween(ctx.InputRangeAt(i).first,
ctx.InputRangeAt(i).second); ctx.InputRangeAt(i).second);
if (slot_map[0].find(i) != slot_map[0].end()) { if (slot_map[0][0].find(i) != slot_map[0][0].end()) {
grad_node->SetGradOutMeta(in_tensors, slot_map[0][i]); grad_node->SetGradOutMeta(in_tensors, slot_map[0][0][i]);
} else { } else {
grad_node->SetGradOutMeta(in_tensors, grad_node->SetGradOutMeta(in_tensors,
ins_auto_grad_metas.size() - 1 - no_grad_cnt); ins_auto_grad_metas.size() - 1 - no_grad_cnt);
...@@ -423,7 +424,7 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, ...@@ -423,7 +424,7 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
} }
// Prepare Grad inputs with fwd outputs // Prepare Grad inputs with fwd outputs
for (auto it = slot_map[2].begin(); it != slot_map[2].end(); it++) { for (auto it = slot_map[0][2].begin(); it != slot_map[0][2].end(); it++) {
VLOG(7) << "Prepare fwd_outs: " << it->first VLOG(7) << "Prepare fwd_outs: " << it->first
<< " to grad_inputs: " << it->second; << " to grad_inputs: " << it->second;
grad_node->fwd_outs[it->second] = grad_node->fwd_outs[it->second] =
...@@ -433,7 +434,7 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, ...@@ -433,7 +434,7 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
} }
// Prepare Grad inputs with fwd inputs // Prepare Grad inputs with fwd inputs
for (auto it = slot_map[3].begin(); it != slot_map[3].end(); it++) { for (auto it = slot_map[0][3].begin(); it != slot_map[0][3].end(); it++) {
VLOG(7) << "Prepare fwd_ins: " << it->first VLOG(7) << "Prepare fwd_ins: " << it->first
<< " to grad_inputs: " << it->second; << " to grad_inputs: " << it->second;
grad_node->fwd_ins[it->second] = grad_node->fwd_ins[it->second] =
...@@ -446,7 +447,7 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args, ...@@ -446,7 +447,7 @@ static PyObject* eager_api_run_costum_op(PyObject* self, PyObject* args,
meta_info_map.at(op_type)[1]); meta_info_map.at(op_type)[1]);
std::vector<paddle::any> attrs(attrs_names.size()); std::vector<paddle::any> attrs(attrs_names.size());
// Prepare attrs for Grad node // Prepare attrs for Grad node
for (auto it = slot_map[4].begin(); it != slot_map[4].end(); it++) { for (auto it = slot_map[0][4].begin(); it != slot_map[0][4].end(); it++) {
VLOG(7) << "Prepare fwd attrs: " << it->first VLOG(7) << "Prepare fwd attrs: " << it->first
<< " to grad_attrs: " << it->second; << " to grad_attrs: " << it->second;
attrs[it->second] = res_attrs[it->first]; attrs[it->second] = res_attrs[it->first];
......
...@@ -21,8 +21,7 @@ import paddle.static as static ...@@ -21,8 +21,7 @@ import paddle.static as static
from paddle.utils.cpp_extension import load, get_build_directory from paddle.utils.cpp_extension import load, get_build_directory
from paddle.utils.cpp_extension.extension_utils import run_cmd from paddle.utils.cpp_extension.extension_utils import run_cmd
from utils import paddle_includes, extra_cc_args, extra_nvcc_args from utils import paddle_includes, extra_cc_args, extra_nvcc_args
from paddle.fluid.framework import _test_eager_guard, _enable_legacy_dygraph from paddle.fluid.framework import _test_eager_guard
_enable_legacy_dygraph()
# Because Windows don't use docker, the shared lib already exists in the # Because Windows don't use docker, the shared lib already exists in the
# cache dir, it will not be compiled again unless the shared lib is removed. # cache dir, it will not be compiled again unless the shared lib is removed.
...@@ -64,7 +63,7 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase): ...@@ -64,7 +63,7 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase):
self.dtypes = ['float32', 'float64'] self.dtypes = ['float32', 'float64']
self.devices = ['cpu'] self.devices = ['cpu']
def test_func_double_grad_dynamic(self): def func_double_grad_dynamic(self):
for device in self.devices: for device in self.devices:
for dtype in self.dtypes: for dtype in self.dtypes:
x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype)
...@@ -85,6 +84,11 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase): ...@@ -85,6 +84,11 @@ class TestCustomTanhDoubleGradJit(unittest.TestCase):
"custom op out grad: {},\n paddle api out grad: {}".format( "custom op out grad: {},\n paddle api out grad: {}".format(
dout, pd_dout)) dout, pd_dout))
def test_func_double_grad_dynamic(self):
with _test_eager_guard():
self.func_double_grad_dynamic()
self.func_double_grad_dynamic()
if __name__ == "__main__": if __name__ == "__main__":
unittest.main() unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册