未验证 提交 8e612903 编写于 作者: P pangyoki 提交者: GitHub

support inplace in dygraph eager_fluid state (#40400)

* [Eager] Support eager grad interface, draft version

* Support eager grad interface with allow_unused and multi startup_op

* Fix code format

* Fix allow_unused case, return PyNone if tensor not initialize

* Support output's stop_gradient related to create_graph

* Support grad exception case in eager mode, fix coverage CI

* Update ToPyObject, return PyNone if not initialize

* AccumulationNode add FLAGS_retain_grad_for_all_tensor

* Fix ci issue

* Fix CI issue

* fix, use core.eager.Tensor

* Add func SetBufferSlotRankZeros for GradTensorHolder

* Support retain_graph by using ClearTensorWrappers

* Support retain_graph by using ClearTensorWrappers

* Update retain_graph and no_grad_vars related test case

* Update code gen logic for ClearTensorWrappers

* Fix by override statement

* fix override func args

* Support retain_graph, update unit tests

* Updated ClearTensorWrappers logic

* fix grad python interface

* Use deep copy and update unit tests

* Polish code

* Polish code

* Fix CI issue, Deep copy only use when user set grad_tensors

* Fix CI, use Backward instead RunBackward

* Fix CI, Declare kernel explicitly in test file

* Polish, remove vector of TensorWrapper

* Refactor the logic of grad/backward, polish codes

* Update code after merge upstream develop

* Polish after merge upstream develop

* Update to adapt new GradNodeBase superclass

* Fix error introduced during conflict resolution

* support inplace strategy in eager_fluid state

* solve conflict

* nothing

* Update purify potential_startup_nodes logic

* Fix errors

* Polish code

* Remove useless args for ToPyObject

* Remove useless TensorWrappersSet

* fix record conflict

* Fix code-format, re-install pre-commit

* fix tensor_wrapper bug

* Fix pre-process logic for potential_startup_ops

* Update unit tests, use eager mode

* Fix conflicts

* fix unittest timeout

* little change
Co-authored-by: NWeilong Wu <veyron_wu@163.com>
上级 50fad3ed
......@@ -30,7 +30,8 @@ namespace egr_utils_api {
bool IsLeafTensor(const paddle::experimental::Tensor& target) {
std::shared_ptr<GradNodeBase> grad_node = EagerUtils::grad_node(target);
if (std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node)) {
if (!grad_node ||
std::dynamic_pointer_cast<GradNodeAccumulation>(grad_node)) {
return true;
}
......
......@@ -979,7 +979,9 @@ static bool CollectGradInformationFromOpInfo(
/* --------------------------------------------------- */
static std::string GenerateGradNodeCreationContent(
const ForwardGenerationInfo& fwd_info,
const GradNodeGenerationInfo& bwd_info) {
const GradNodeGenerationInfo& bwd_info,
const std::string& trace_op_body_str,
std::map<std::string, std::string> inplace_map = {}) {
VLOG(6) << "Generating GradNode Creation codes";
const std::string& op_type = fwd_info.GetOpType();
......@@ -998,7 +1000,8 @@ static std::string GenerateGradNodeCreationContent(
// If single output slotname and not duplicable,
// then generate: "egr::AutogradMeta* p_autograd_out =
// egr::EagerUtils::autograd_meta("op_proto->outputs()[0].name()")"
std::string get_autograd_meta_str = " // Prepare Autograd Meta \n";
std::string get_input_autograd_meta_str = " // Prepare Autograd Meta \n";
std::string get_output_autograd_meta_str = "";
// If single output slotname and not duplicable,
// then generate: "egr::AutogradMeta* p_autograd_out =
// egr::EagerUtils::autograd_meta("op_proto.outputs()[0].name()")"
......@@ -1006,22 +1009,39 @@ static std::string GenerateGradNodeCreationContent(
const std::string& output_name = output.name();
const std::string& output_autograd_name = "p_autograd_" + output_name;
// output autograd_meta should be got after running TraceOP.
if (output.duplicable()) {
const char* GET_MULTI_AUTOGRAD_META_TEMPLATE =
" std::vector<egr::AutogradMeta*> %s = "
" std::vector<egr::AutogradMeta*> %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
get_output_autograd_meta_str += paddle::string::Sprintf(
GET_MULTI_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
} else {
const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE =
" egr::AutogradMeta* %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
GET_SINGLE_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name);
// In inplace op, the case where output is duplicable is not considered.
// Replace output directly with input in inplace op.
if (!inplace_map.empty() && inplace_map.count(output_name)) {
auto inplace_input_name = inplace_map[output_name];
const std::string& inplace_input_autograd_name =
"p_autograd_" + inplace_input_name;
const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE =
" %s = egr::EagerUtils::autograd_meta(&%s);\n";
get_output_autograd_meta_str += paddle::string::Sprintf(
GET_SINGLE_AUTOGRAD_META_TEMPLATE, inplace_input_autograd_name,
inplace_input_name);
} else {
const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE =
" egr::AutogradMeta* %s = "
"egr::EagerUtils::autograd_meta(&%s);\n";
get_output_autograd_meta_str +=
paddle::string::Sprintf(GET_SINGLE_AUTOGRAD_META_TEMPLATE,
output_autograd_name, output_name);
}
}
}
VLOG(6) << "Generated outputs autograd_meta";
// input autograd_meta should be got before running TraceOP (for checking
// inplace).
for (const proto::OpProto::Var& input : in_vars) {
const std::string& input_name = input.name();
const std::string& input_autograd_name = "p_autograd_" + input_name;
......@@ -1030,28 +1050,46 @@ static std::string GenerateGradNodeCreationContent(
const char* GET_MULTI_AUTOGRAD_META_TEMPLATE =
" std::vector<egr::AutogradMeta*> %s = "
"egr::EagerUtils::nullable_autograd_meta(%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
get_input_autograd_meta_str += paddle::string::Sprintf(
GET_MULTI_AUTOGRAD_META_TEMPLATE, input_autograd_name, input_name);
} else if (input.dispensable()) {
const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE =
" egr::AutogradMeta* %s = "
"egr::EagerUtils::nullable_autograd_meta(%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
get_input_autograd_meta_str += paddle::string::Sprintf(
GET_SINGLE_AUTOGRAD_META_TEMPLATE, input_autograd_name, input_name);
} else {
const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE =
" egr::AutogradMeta* %s = "
"egr::EagerUtils::nullable_autograd_meta(%s);\n";
get_autograd_meta_str += paddle::string::Sprintf(
get_input_autograd_meta_str += paddle::string::Sprintf(
GET_SINGLE_AUTOGRAD_META_TEMPLATE, input_autograd_name, input_name);
}
}
VLOG(6) << "Generated inputs autograd_meta";
// check inplace input to avoid inplace operations on leaf nodes with
// stop_gradient=False.
std::string check_inplace_str = "";
if (!inplace_map.empty()) {
const char* CHECKING_INPLACE_TEMPLATE =
" // Check Inplace\n"
" egr::EagerUtils::CheckInplace(%s, p_autograd_%s, "
"require_any_grad);\n";
for (auto& inplace_pair : inplace_map) {
std::string inplace_name = inplace_pair.second;
check_inplace_str += paddle::string::Sprintf(CHECKING_INPLACE_TEMPLATE,
inplace_name, inplace_name);
}
VLOG(6) << "Check Inplace Input";
}
std::string prepare_autograd_meta_str = "";
prepare_autograd_meta_str += get_autograd_meta_str;
// only generate input autograd_meta in temporary.
// output autograd_meta will be generated after running TraceOP.
prepare_autograd_meta_str += get_input_autograd_meta_str;
prepare_autograd_meta_str += "\n";
// [GradOpNode] GetTraceBackward
......@@ -1066,7 +1104,7 @@ static std::string GenerateGradNodeCreationContent(
size_t bwd_in_slot_num = out_vars.size();
size_t bwd_out_slot_num = in_vars.size();
const char* GRAD_OP_NODE_TEMPLATE =
" auto grad_node = std::make_shared<GradNode%s>(%d, %d);\n";
" auto grad_node = std::make_shared<GradNode%s>(%d, %d);\n";
grad_node_creation_str += " // Create GradOpNode\n";
grad_node_creation_str += paddle::string::Sprintf(
GRAD_OP_NODE_TEMPLATE, op_type, bwd_in_slot_num, bwd_out_slot_num);
......@@ -1075,14 +1113,14 @@ static std::string GenerateGradNodeCreationContent(
VLOG(6) << "Generated GradOpNode construction";
// [GradOpNode] Set Attrs
grad_node_creation_str += " // Set Attributes\n";
grad_node_creation_str += " grad_node->SetAttrMap(std::move(attrs));\n";
grad_node_creation_str += " // Set Attributes\n";
grad_node_creation_str += " grad_node->SetAttrMap(std::move(attrs));\n";
grad_node_creation_str +=
" grad_node->SetDefaultAttrMap(std::move(default_attrs));\n";
" grad_node->SetDefaultAttrMap(std::move(default_attrs));\n";
grad_node_creation_str += "\n";
// [GradOpNode] Set TensorWrappers
grad_node_creation_str += " // Set Tensor Wrappers\n";
grad_node_creation_str += " // Set Tensor Wrappers\n";
for (const auto& iter : op_base_infos) {
const std::map<std::string, std::string>& grad_ins_fwd_slotname_map =
iter.GetGradInsFwdSlotnameMap();
......@@ -1094,10 +1132,18 @@ static std::string GenerateGradNodeCreationContent(
full_reserved = "true";
}
const char* SET_TENSOR_WRAPPER_TEMPLATE =
" grad_node->SetTensorWrapper%s(%s, %s);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name, tensor_wrapper_name,
full_reserved);
" grad_node->SetTensorWrapper%s(%s, %s);\n";
// Replace output directly with input in inplace op.
if (!inplace_map.empty() && inplace_map.count(tensor_wrapper_name)) {
auto inplace_input_name = inplace_map[tensor_wrapper_name];
grad_node_creation_str += paddle::string::Sprintf(
SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name,
inplace_input_name, full_reserved);
} else {
grad_node_creation_str += paddle::string::Sprintf(
SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name,
tensor_wrapper_name, full_reserved);
}
}
}
grad_node_creation_str += "\n";
......@@ -1115,12 +1161,12 @@ static std::string GenerateGradNodeCreationContent(
size_t input_position = fwd_inputs_name_pos_map.at(input_name);
const char* SET_GRAD_OUT_META_TEMPLATE =
" grad_node->SetGradOutMeta(%s, %d);\n";
" grad_node->SetGradOutMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);
const char* ADD_EDGES_TEMPLATE =
" if(%s) grad_node->AddEdges(%s, %d);\n";
" if(%s) grad_node->AddEdges(%s, %d);\n";
grad_node_creation_str +=
paddle::string::Sprintf(ADD_EDGES_TEMPLATE, input_autograd_name,
input_autograd_name, input_position);
......@@ -1129,11 +1175,11 @@ static std::string GenerateGradNodeCreationContent(
size_t input_position = fwd_inputs_name_pos_map.at(input_name);
const char* SET_GRAD_OUT_META_TEMPLATE =
" grad_node->SetGradOutMeta(%s, %d);\n";
" grad_node->SetGradOutMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_OUT_META_TEMPLATE, input_name, input_position);
const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n";
const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
ADD_EDGES_TEMPLATE, input_autograd_name, input_position);
}
......@@ -1145,73 +1191,125 @@ static std::string GenerateGradNodeCreationContent(
std::string pass_stop_gradient_args = "false";
for (const proto::OpProto::Var& output : out_vars) {
const std::string& output_name = output.name();
const std::string& output_autograd_name = "p_autograd_" + output_name;
size_t output_position = fwd_outputs_name_pos_map.at(output_name);
// Intermediate Tensor does not require SetHistory, nor RetainGrad
if (output.duplicable()) {
pass_stop_gradient_args += ", &" + output_autograd_name;
// Replace output directly with input in inplace op.
if (!inplace_map.empty() && inplace_map.count(output_name)) {
auto inplace_input_name = inplace_map[output_name];
const std::string& inplace_input_autograd_name =
"p_autograd_" + inplace_input_name;
size_t output_position = fwd_outputs_name_pos_map.at(output_name);
// Intermediate Tensor does not require SetHistory, nor RetainGrad
pass_stop_gradient_args += ", " + inplace_input_autograd_name;
const char* SET_OUT_RANK_TEMPLATE =
" egr::EagerUtils::SetOutRankWithSlot(&%s, %d);\n";
" egr::EagerUtils::SetOutRankWithSlot(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position);
SET_OUT_RANK_TEMPLATE, inplace_input_autograd_name, output_position);
// Intermediate Tensor does not require SetHistory
if (!output.intermediate()) {
const char* SET_HISTORY_TEMPLATE =
" egr::EagerUtils::SetHistory(&%s, grad_node);\n";
grad_node_creation_str +=
paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name);
" egr::EagerUtils::SetHistory(%s, grad_node);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_HISTORY_TEMPLATE, inplace_input_autograd_name);
}
const char* SET_GRAD_IN_META_TEMPLATE =
" grad_node->SetGradInMeta(%s, %d);\n";
" grad_node->SetGradInMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_IN_META_TEMPLATE, output_name, output_position);
SET_GRAD_IN_META_TEMPLATE, inplace_input_name, output_position);
// Intermediate Tensor does not require CheckAndRetainGrad
if (!output.intermediate()) {
VLOG(6) << "Generated Call RetainGradForTensor";
const char* RETAIN_GRAD_TEMPLATE =
" egr::EagerUtils::CheckAndRetainGrad(%s);\n";
grad_node_creation_str +=
paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, inplace_input_name);
}
} else {
pass_stop_gradient_args += ", " + output_autograd_name;
const char* SET_OUT_RANK_TEMPLATE =
" egr::EagerUtils::SetOutRankWithSlot(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position);
const std::string& output_autograd_name = "p_autograd_" + output_name;
size_t output_position = fwd_outputs_name_pos_map.at(output_name);
// Intermediate Tensor does not require SetHistory
// Intermediate Tensor does not require SetHistory, nor RetainGrad
if (output.duplicable()) {
pass_stop_gradient_args += ", &" + output_autograd_name;
const char* SET_OUT_RANK_TEMPLATE =
" egr::EagerUtils::SetOutRankWithSlot(&%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position);
// Intermediate Tensor does not require SetHistory
if (!output.intermediate()) {
const char* SET_HISTORY_TEMPLATE =
" egr::EagerUtils::SetHistory(&%s, grad_node);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_HISTORY_TEMPLATE, output_autograd_name);
}
const char* SET_GRAD_IN_META_TEMPLATE =
" grad_node->SetGradInMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_IN_META_TEMPLATE, output_name, output_position);
} else {
pass_stop_gradient_args += ", " + output_autograd_name;
const char* SET_OUT_RANK_TEMPLATE =
" egr::EagerUtils::SetOutRankWithSlot(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position);
// Intermediate Tensor does not require SetHistory
if (!output.intermediate()) {
const char* SET_HISTORY_TEMPLATE =
" egr::EagerUtils::SetHistory(%s, grad_node);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_HISTORY_TEMPLATE, output_autograd_name);
}
const char* SET_GRAD_IN_META_TEMPLATE =
" grad_node->SetGradInMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_IN_META_TEMPLATE, output_name, output_position);
}
// Intermediate Tensor does not require CheckAndRetainGrad
if (!output.intermediate()) {
const char* SET_HISTORY_TEMPLATE =
" egr::EagerUtils::SetHistory(%s, grad_node);\n";
VLOG(6) << "Generated Call RetainGradForTensor";
const char* RETAIN_GRAD_TEMPLATE =
" egr::EagerUtils::CheckAndRetainGrad(%s);\n";
grad_node_creation_str +=
paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name);
paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, output_name);
}
const char* SET_GRAD_IN_META_TEMPLATE =
" grad_node->SetGradInMeta(%s, %d);\n";
grad_node_creation_str += paddle::string::Sprintf(
SET_GRAD_IN_META_TEMPLATE, output_name, output_position);
}
// Intermediate Tensor does not require CheckAndRetainGrad
if (!output.intermediate()) {
VLOG(6) << "Generated Call RetainGradForTensor";
const char* RETAIN_GRAD_TEMPLATE =
" egr::EagerUtils::CheckAndRetainGrad(%s);\n";
grad_node_creation_str +=
paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, output_name);
}
}
VLOG(6) << "Generated SetGradIn/OutMeta";
// [Generation] GradNode Creation
// After getting require_any_grad, firstly use CheckInplace method for inplace
// op.
// Then execute TraceOp and generate output autograd_meta.
// Finally, Construct GradNode. (Replace output directly with input in inplace
// op.)
// Add event record
std::string event_name = op_type + " node_creation";
const char* GRAD_NODE_CREATION_TEMPLATE =
" %s"
"%s"
" bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(%s);\n"
" if(require_any_grad) {\n"
" VLOG(6) << \" Construct Grad for %s \"; \n"
" egr::EagerUtils::PassStopGradient(%s);\n"
"%s\n }";
"%s\n"
"%s"
" {\n"
" paddle::platform::RecordEvent node_creation_record_event(\"%s\", "
"paddle::platform::TracerEventType::Operator, 1);\n"
"%s"
" if(require_any_grad) {\n"
" VLOG(6) << \" Construct Grad for %s \"; \n"
" egr::EagerUtils::PassStopGradient(%s);\n"
" %s\n"
" }\n"
" }";
std::string grad_node_creation_body_str = paddle::string::Sprintf(
GRAD_NODE_CREATION_TEMPLATE, prepare_autograd_meta_str,
compute_require_grad_args, op_type, pass_stop_gradient_args,
grad_node_creation_str);
compute_require_grad_args, check_inplace_str, trace_op_body_str,
event_name, get_output_autograd_meta_str, op_type,
pass_stop_gradient_args, grad_node_creation_str);
return grad_node_creation_body_str;
}
......@@ -1221,7 +1319,8 @@ static std::string GenerateGradNodeCreationContent(
/* -------------------------------- */
static std::pair<std::string, std::string> GenerateForwardFunctionContents(
const ForwardGenerationInfo& fwd_info,
const GradNodeGenerationInfo& bwd_info) {
const GradNodeGenerationInfo& bwd_info,
std::map<std::string, std::string> inplace_map = {}) {
/* --- Process Forward Info ---*/
const std::string& op_type = fwd_info.GetOpType();
const std::unordered_map<std::string, size_t>& fwd_inputs_name_pos_map =
......@@ -1301,8 +1400,21 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
core_ops_args_type_info[op_type][input_position] = "list";
} else {
const char* FWD_INS_ARG_TEMPLATE =
"const paddle::experimental::Tensor& %s";
// inplace tensor can't be const
const char* FWD_INS_ARG_TEMPLATE;
bool flag_find_input_name = false;
if (!inplace_map.empty()) {
for (auto& inplace_pair : inplace_map) {
if (inplace_pair.second == input_name) {
flag_find_input_name = true;
FWD_INS_ARG_TEMPLATE = "paddle::experimental::Tensor& %s";
break;
}
}
}
if (!flag_find_input_name) {
FWD_INS_ARG_TEMPLATE = "const paddle::experimental::Tensor& %s";
}
input_args_str_list[input_position] =
paddle::string::Sprintf(FWD_INS_ARG_TEMPLATE, input_name);
......@@ -1362,6 +1474,7 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
// [Generation] Get Outs Map
std::string outs_contents_str = "";
std::string inplace_mapping_str = "";
for (const proto::OpProto::Var& output : out_vars) {
const std::string& output_name = output.name();
std::string outnum = "1";
......@@ -1404,6 +1517,22 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
}
core_ops_args_info[op_type].push_back(output_var_name);
} else if (!inplace_map.empty() && inplace_map.count(output_name)) {
// In inplace op, replace the output with the input directly.
PADDLE_ENFORCE_NE(
inplace_map[output_name], "",
paddle::platform::errors::InvalidArgument(
"Inplace op %s has no input corresponding to output %s.", op_type,
output_name));
const char* FWD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", ins[\"%s\"] },";
auto inplace_input_name = inplace_map[output_name];
outs_contents_str += paddle::string::Sprintf(
FWD_OUTS_CONTENT_TEMPLATE, output_name, inplace_input_name);
// inplace_map used in TraceOp.
const char* INPLACE_MAPPING_TEMPLATE = R"({"%s", "%s"},)";
inplace_mapping_str += paddle::string::Sprintf(
INPLACE_MAPPING_TEMPLATE, inplace_input_name, output_name);
} else {
if (output.duplicable()) {
outnum = output_name + "Num";
......@@ -1430,6 +1559,8 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
}
if (outs_contents_str.size() > 0)
outs_contents_str.pop_back(); // Remove trailing ","
if (inplace_mapping_str.size() > 0)
inplace_mapping_str.pop_back(); // Remove trailing ","
const char* FWD_OUTS_MAP_TEMPLATE =
" std::map<std::string, "
......@@ -1463,6 +1594,12 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
dygraph_function_args_str +=
", const paddle::framework::AttributeMap& attr_map";
/* --------- Generate TraceOp ----- */
// TraceOp should be run after compute require_any_grad. (for checking
// inplace)
// `trace_op_body_str` will be passed as a parameter to
// `GenerateGradNodeCreationContent`.
std::string trace_op_body_str = "";
// [Generation] Get TraceOp
const char* FWD_TRACE_OP_TEMPLATE =
" paddle::framework::AttributeMap attrs = attr_map;\n"
......@@ -1470,11 +1607,12 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
" egr::Controller::Instance().GetCurrentTracer()->TraceOp(\"%s\", ins, "
"outs, attrs, \n"
" egr::Controller::Instance().GetExpectedPlace(),\n"
" &default_attrs, true, {});\n";
std::string trace_op_str =
paddle::string::Sprintf(FWD_TRACE_OP_TEMPLATE, op_type);
generated_function_body += trace_op_str;
generated_function_body += "\n";
" &default_attrs, true, {%s});\n";
std::string trace_op_str = paddle::string::Sprintf(
FWD_TRACE_OP_TEMPLATE, op_type, inplace_mapping_str);
trace_op_body_str += trace_op_str;
trace_op_body_str += "\n";
VLOG(6) << "Generated AttrMap & TraceOp";
......@@ -1539,48 +1677,64 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
output_varname, output_var_args_name);
}
} else {
const char* FWD_OUT_TENSOR_TEMPLATE =
" paddle::experimental::Tensor %s;\n"
" egr::EagerUtils::GetOutput(outs[\"%s\"][0], &%s);\n";
out_tensor_str =
paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE, output_varname,
output_name, output_varname);
if (!inplace_map.empty() && inplace_map.count(output_name)) {
// Modify meta info of inplace tensor.
// Bump inplace version of inplace tensor.
auto inplace_input_name = inplace_map[output_name];
const char* FWD_OUT_TENSOR_TEMPLATE =
" egr::EagerUtils::ModifyInplaceInput(outs[\"%s\"][0], &%s);\n"
" %s.bump_inplace_version();\n"
" VLOG(3) << \"Tensor(\" << %s.name() << \") uses Inplace "
"Strategy.\";\n";
out_tensor_str = paddle::string::Sprintf(
FWD_OUT_TENSOR_TEMPLATE, output_name, inplace_input_name,
inplace_input_name, inplace_input_name);
} else {
const char* FWD_OUT_TENSOR_TEMPLATE =
" paddle::experimental::Tensor %s;\n"
" egr::EagerUtils::GetOutput(outs[\"%s\"][0], &%s);\n";
out_tensor_str =
paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE, output_varname,
output_name, output_varname);
}
}
return_types[return_position] = "paddle::experimental::Tensor";
}
return_contents[return_position] = output_varname;
generated_function_body += out_tensor_str;
if (!inplace_map.empty() && inplace_map.count(output_name)) {
// Replace output directly with input in inplace op.
return_contents[return_position] = inplace_map[output_name];
} else {
return_contents[return_position] = output_varname;
}
trace_op_body_str += out_tensor_str;
}
generated_function_body += "\n";
trace_op_body_str += "\n";
VLOG(6) << "Converted Output VarBase to EagerVariable(s)";
/* ------ END Generate TraceOp ----- */
// [Generation] Handle core_ops_returns_info
core_ops_returns_info[op_type] = return_contents;
// avoid inplace op changing core_ops_returns_info
if (core_ops_returns_info.empty() || !core_ops_returns_info.count(op_type)) {
core_ops_returns_info[op_type] = return_contents;
}
// [Generation] ComputeRequireGrad -> GradNodeCreation
if (!bwd_info.GenerateForwardOnly()) {
std::string grad_node_creation_body_str =
GenerateGradNodeCreationContent(fwd_info, bwd_info);
// Add event record
std::string event_name = op_type + " node_creation";
const char* NODE_CREATION_TEMPLATE =
"{\n"
" paddle::platform::RecordEvent node_creation_record_event(\"%s\", "
"paddle::platform::TracerEventType::Operator, 1);\n"
" %s\n"
"}";
grad_node_creation_body_str = paddle::string::Sprintf(
NODE_CREATION_TEMPLATE, event_name, grad_node_creation_body_str);
// If GradNode needs to be generated, pass `trace_op_body_str`
// into `GenerateGradNodeCreationContent`.
std::string grad_node_creation_body_str = GenerateGradNodeCreationContent(
fwd_info, bwd_info, trace_op_body_str, inplace_map);
generated_function_body += grad_node_creation_body_str;
generated_function_body += "\n";
// [Generation] Call RetainGradForTensor
VLOG(6) << "Generated GradNode Creation codes";
} else {
// If GradNode doesn't need to be generated, generate TraceOP directly.
generated_function_body += trace_op_body_str;
}
// [Generation] Handle return: Tuple/Vector/Tensor
......@@ -1627,7 +1781,13 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
VLOG(6) << "Generated return codes";
// [Generation] Get Full Function
std::string function_name = op_type + "_dygraph_function";
std::string function_name;
if (inplace_map.empty()) {
function_name = op_type + "_dygraph_function";
} else {
// change function_name for inplace op.
function_name = op_type + "__dygraph_function";
}
if (dygraph_function_args_str.size() > 0) {
auto iter = dygraph_function_args_str.begin();
......@@ -1635,15 +1795,15 @@ static std::pair<std::string, std::string> GenerateForwardFunctionContents(
}
const char* DYGRAPH_FUNCTION_EVENT_RECORD_FUNCTION_TEMPLATE =
"paddle::platform::RecordEvent dygraph_entrance_record_event(\"%s\", "
" paddle::platform::RecordEvent dygraph_entrance_record_event(\"%s\", "
"paddle::platform::TracerEventType::Operator, 1);";
std::string event_name = op_type + " dygraph";
std::string fwd_record_event_str = paddle::string::Sprintf(
DYGRAPH_FUNCTION_EVENT_RECORD_FUNCTION_TEMPLATE, event_name);
const char* FWD_FUNCTION_TEMPLATE =
"%s %s(%s) {\n\n"
" %s\n"
" %s\n"
"%s\n"
"%s\n"
"}\n\n";
std::string fwd_function_str = paddle::string::Sprintf(
FWD_FUNCTION_TEMPLATE, function_proto_return_type_str, function_name,
......@@ -2426,7 +2586,7 @@ static void DygraphCodeGeneration(const std::string& output_dir) {
/* --------------------------- */
VLOG(6) << "-------- GenerateForwardFunctionContents -------";
std::pair<std::string, std::string> body_and_declaration =
GenerateForwardFunctionContents(fwd_info, bwd_info);
GenerateForwardFunctionContents(fwd_info, bwd_info, {});
fwd_function_str += body_and_declaration.first + "\n";
......@@ -2434,6 +2594,30 @@ static void DygraphCodeGeneration(const std::string& output_dir) {
std::string fwd_function_declare_str = body_and_declaration.second;
dygraph_forward_api_str += fwd_function_declare_str;
auto& infer_inplace =
paddle::framework::OpInfoMap::Instance().Get(op_type).infer_inplace_;
std::map<std::string, std::string> inplace_map;
// Inplace Function Generator.
// `sum` op has duplicate input. Don't consider adding inplace strategy
// for `sum` in temporary.
if (op_type != "sum" && infer_inplace) {
auto in_to_outs = infer_inplace(true);
for (auto& inplace_pair : in_to_outs) {
inplace_map[inplace_pair.second] = inplace_pair.first;
}
VLOG(6) << "-------- GenerateInplaceForwardFunctionContents -------";
std::pair<std::string, std::string> inplace_body_and_declaration =
GenerateForwardFunctionContents(fwd_info, bwd_info, inplace_map);
fwd_function_str += inplace_body_and_declaration.first + "\n";
VLOG(6) << "-------- GenerateInplaceDygraphForwardAPIContents -------";
std::string inplace_fwd_function_declare_str =
inplace_body_and_declaration.second;
dygraph_forward_api_str += inplace_fwd_function_declare_str;
}
if (bwd_info.GenerateForwardOnly()) continue;
VLOG(6) << "-------- GenerateGradNodeHeaderContents -------";
......
......@@ -36,6 +36,15 @@ class TensorWrapper {
explicit TensorWrapper(const paddle::experimental::Tensor& tensor,
bool full_reserved = false,
bool no_need_buffer = false) {
// set inplace_version_snapshot_ according to tensor's current inplace
// version.
if (tensor.impl() && phi::DenseTensor::classof(tensor.impl().get())) {
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(tensor.impl().get());
auto& inplace_version_counter = dense_tensor->InplaceVersionCounter();
inplace_version_snapshot_ = inplace_version_counter.CurrentVersion();
}
/**
* Normally, we should fully reserved all non-output or non-leaf fwd tensor
* here. And for fwd output tensor, we should not reserve its autogradmeta,
......@@ -49,6 +58,7 @@ class TensorWrapper {
}
// shallow copy tensor_impl here
no_need_buffer_ = no_need_buffer;
if (no_need_buffer) {
if (phi::DenseTensor::classof(tensor.impl().get())) {
// Only Copy Meta
......@@ -86,6 +96,7 @@ class TensorWrapper {
// if it's full_reserved just return the full copy of tensor
if (full_reserved_) {
check_inplace_version();
return intermidiate_tensor_;
} else {
std::shared_ptr<GradNodeBase> new_grad_node = grad_node;
......@@ -94,15 +105,52 @@ class TensorWrapper {
intermidiate_tensor_.set_autograd_meta(
std::static_pointer_cast<paddle::experimental::AbstractAutogradMeta>(
p_ab_autograd_meta));
check_inplace_version();
return intermidiate_tensor_;
}
}
void check_inplace_version() {
if (no_need_buffer_) {
VLOG(6) << "There's no need to check inplace_version because "
"no_need_buffer_ is true.";
return;
}
if (intermidiate_tensor_.impl() &&
phi::DenseTensor::classof(intermidiate_tensor_.impl().get())) {
phi::DenseTensor* dense_tensor =
static_cast<phi::DenseTensor*>(intermidiate_tensor_.impl().get());
auto& inplace_version_counter = dense_tensor->InplaceVersionCounter();
uint32_t current_inplace_version =
inplace_version_counter.CurrentVersion();
PADDLE_ENFORCE_EQ(
current_inplace_version, inplace_version_snapshot_,
paddle::platform::errors::PermissionDenied(
"Tensor '%s' used in gradient computation has been "
"modified by an inplace operation. "
"Its version is %d but the expected version is %d. "
"Please fix your code to void calling an inplace operator "
"after using the Tensor which will used in gradient "
"computation.",
intermidiate_tensor_.name(), current_inplace_version,
inplace_version_snapshot_));
VLOG(6) << " The inplace_version_snapshot_ of Tensor '"
<< intermidiate_tensor_.name() << "' is [ "
<< inplace_version_snapshot_ << " ]";
VLOG(6) << " The current_inplace_version of Tensor '"
<< intermidiate_tensor_.name() << "' is [ "
<< current_inplace_version << " ]";
}
}
void clear() { intermidiate_tensor_.reset(); }
private:
bool full_reserved_ = false;
bool no_need_buffer_ = false;
std::pair<size_t, size_t> out_rank_info_;
paddle::experimental::Tensor intermidiate_tensor_;
uint32_t inplace_version_snapshot_ = 0;
};
} // namespace egr
......@@ -212,6 +212,27 @@ std::vector<std::shared_ptr<EagerVariable>> EagerUtils::CreateVars(
return res;
}
void EagerUtils::ModifyInplaceInput(
const std::shared_ptr<EagerVariable>& inplace_variable,
paddle::experimental::Tensor* inplace_tensor) {
// Only modify the meta information of the inplace tensor, because
// EagerVariable cannot modify Tensor's meta information after inplace
// op (such as ``reshape``) is executed.
PADDLE_ENFORCE_NOT_NULL(inplace_tensor,
paddle::platform::errors::Fatal(
"Inplace Tensor is null and cannot be modified. "
"We are tring to Modify Inplace Input from its "
"shared_ptr, this error may indicate the inplace "
" input is nullptr"));
if (phi::DenseTensor::classof(inplace_variable->GetTensorBase().get())) {
phi::DenseTensor* variable_dense_tensor =
static_cast<phi::DenseTensor*>(inplace_variable->GetTensorBase().get());
phi::DenseTensor* tensor_dense_tensor =
static_cast<phi::DenseTensor*>(inplace_tensor->impl().get());
tensor_dense_tensor->set_meta(variable_dense_tensor->meta());
}
}
std::vector<paddle::experimental::Tensor> EagerUtils::GetOutputs(
const std::vector<std::shared_ptr<EagerVariable>>& outs) {
std::vector<paddle::experimental::Tensor> res;
......
......@@ -14,6 +14,7 @@
#pragma once
#include "paddle/fluid/eager/api/utils/tensor_utils.h"
#include "paddle/fluid/eager/autograd_meta.h"
#include "paddle/fluid/eager/eager_tensor.h"
#include "paddle/fluid/eager/grad_node_info.h"
......@@ -144,6 +145,19 @@ class EagerUtils {
iter.apply(std::forward<Args>(args)...);
}
static void CheckInplace(const paddle::experimental::Tensor& target,
const AutogradMeta* autograd_meta,
bool require_any_grad) {
if (require_any_grad && autograd_meta) {
PADDLE_ENFORCE_EQ(!autograd_meta->StopGradient() &&
egr::egr_utils_api::IsLeafTensor(target),
false, paddle::platform::errors::InvalidArgument(
"Leaf Var (%s) that doesn't stop gradient "
"can't use inplace strategy.",
target.name()));
}
}
// TensorWrapper Utils
static paddle::experimental::Tensor RecoverTensorWrapper(
TensorWrapper* tw, const std::shared_ptr<GradNodeBase>& grad_node);
......@@ -171,6 +185,9 @@ class EagerUtils {
static std::vector<std::shared_ptr<EagerVariable>> CreateVars(
const size_t num);
// Construct Tensor From var
static void ModifyInplaceInput(
const std::shared_ptr<EagerVariable>& inplace_variable,
paddle::experimental::Tensor* inplace_tensor);
static std::vector<paddle::experimental::Tensor> GetOutputs(
const std::vector<std::shared_ptr<EagerVariable>>& outs);
static paddle::experimental::Tensor GetOutput(
......
......@@ -718,6 +718,15 @@ static PyObject* set_grad_type(TensorObject* self, PyObject* args,
EAGER_CATCH_AND_THROW_RETURN_NULL
}
static PyObject* tensor__inplace_version(TensorObject* self, PyObject* args,
PyObject* kwargs) {
EAGER_TRY
uint32_t inplace_version = self->tensor.current_inplace_version();
return ToPyObject(inplace_version);
EAGER_CATCH_AND_THROW_RETURN_NULL
}
PyMethodDef variable_methods[] = {
{"numpy", (PyCFunction)(void (*)(void))tensor_method_numpy,
METH_VARARGS | METH_KEYWORDS, NULL},
......@@ -766,6 +775,8 @@ PyMethodDef variable_methods[] = {
METH_VARARGS | METH_KEYWORDS, NULL},
{"_set_grad_type", (PyCFunction)(void (*)(void))set_grad_type,
METH_VARARGS | METH_KEYWORDS, NULL},
{"_inplace_version", (PyCFunction)(void (*)(void))tensor__inplace_version,
METH_VARARGS | METH_KEYWORDS, NULL},
{NULL, NULL, 0, NULL}};
} // namespace pybind
......
......@@ -162,17 +162,22 @@ static inline std::string TempName(const std::string& name) {
std::string GenerateOpFunctionsBody(
const paddle::framework::proto::OpProto* op_proto, std::string func_name,
bool use_inplace_strategy = false,
std::map<std::string, std::string> inplace_map = {}) {
auto& op_type = op_proto->type();
std::string input_args = "";
std::string call_api_str = "auto out = " + op_type + "_dygraph_function(";
std::string call_api_str = "";
std::string ins_initializer_with_null = "";
std::string py_arg = "";
int arg_idx = 0;
int input_args_num = 0;
std::string ins_cast_str = "";
std::string view_strategy_str = "";
if (!inplace_map.empty()) {
// change call_api_str for inplace op
call_api_str = "auto out = " + op_type + "__dygraph_function(";
} else {
call_api_str = "auto out = " + op_type + "_dygraph_function(";
}
for (auto& input : op_proto->inputs()) {
auto& in_name = input.name();
// skip those dispensable inputs, like ResidualData in conv2d
......@@ -288,8 +293,31 @@ std::string GenerateOpFunctionsBody(
HANDLE_VIEW_BETWEEN_INPUT_AND_OUTPUT, viwe_input_name, viwe_output_name,
viwe_input_name, viwe_output_name);
}
return_str = "return ToPyObject(out);";
if (!inplace_map.empty()) {
// For inplace op, Use the input PyObject directly.
for (auto& inplace_pair : inplace_map) {
// Find index of inplace tensor, and directly use input PyObject.
std::string inplace_arg_name = inplace_pair.second;
std::string inplace_return_name = inplace_pair.first;
const char* RETURN_INPLACE_TENSOR_TEMPLATE =
"ssize_t arg_id = GetIdxFromCoreOpsInfoMap(core_ops_args_info, "
"\"%s\", \"%s\");\n"
" ssize_t return_id = "
"GetIdxFromCoreOpsInfoMap(core_ops_returns_info, \"%s\", \"%s\");\n"
" return ToPyObject(out, return_id, args, arg_id);";
return_str = paddle::string::Sprintf(RETURN_INPLACE_TENSOR_TEMPLATE,
op_type, inplace_arg_name, op_type,
inplace_return_name);
// only support one inplace_var in temporary.
PADDLE_ENFORCE_EQ(
inplace_map.size(), 1,
paddle::platform::errors::InvalidArgument(
"size of inplace_map must be 1, but got %d", inplace_map.size()));
break;
}
} else {
return_str = "return ToPyObject(out);";
}
std::string function_args = "";
if (input_args == "") {
......@@ -383,7 +411,8 @@ GenerateOpFunctions() {
continue;
}
std::string func_name = "eager_api_" + op_type;
std::string op_function_str = GenerateOpFunctionsBody(op_proto, func_name);
std::string op_function_str =
GenerateOpFunctionsBody(op_proto, func_name, {});
// generate pybind item
auto bind_function_str = paddle::string::Sprintf(
......@@ -391,6 +420,40 @@ GenerateOpFunctions() {
op_function_list.emplace_back(std::move(op_function_str));
bind_function_list.emplace_back(std::move(bind_function_str));
// NOTE(pangyoki): Inplace Strategy.
// In this case, output will reuse input varbase.
// Dygraph mode needs to be aligned with the in-place strategy in static
// mode, and the mapping relationships between output and input that have
// been defined in static mode should be used in dygraph mode.
// Find which ops need to use Inplace strategy in static mode, and get the
// mapping relationship between Inplace output and input.
auto& infer_inplace =
paddle::framework::OpInfoMap::Instance().Get(op_type).infer_inplace_;
std::map<std::string, std::string> inplace_map;
// `sum` op has duplicate input. Don't consider adding inplace strategy
// for `sum` in temporary.
if (op_type != "sum" && infer_inplace) {
// Inplace OP: op_type_.
// The inplace OP needs a new implementation method.
auto in_to_outs = infer_inplace(true);
for (auto& inplace_pair : in_to_outs) {
inplace_map[inplace_pair.second] = inplace_pair.first;
}
std::string inplace_op_type = op_type + "_";
std::string inplace_func_name = "eager_api_" + inplace_op_type;
std::string inplace_op_function_str =
GenerateOpFunctionsBody(op_proto, inplace_func_name, inplace_map);
// generate pybind item
auto inplace_bind_function_str =
paddle::string::Sprintf(PYBIND_ITEM_TEMPLATE, inplace_op_type,
inplace_func_name, inplace_op_type);
op_function_list.emplace_back(std::move(inplace_op_function_str));
bind_function_list.emplace_back(std::move(inplace_bind_function_str));
}
}
if (append_custom_head_file) {
op_function_list.emplace_back(CUSTOM_HANDWRITE_OP_FUNC_FILE);
......
......@@ -417,6 +417,8 @@ PyObject* ToPyObject(bool value) {
PyObject* ToPyObject(int value) { return PyLong_FromLong(value); }
PyObject* ToPyObject(uint32_t value) { return PyLong_FromUnsignedLong(value); }
PyObject* ToPyObject(int64_t value) { return PyLong_FromLongLong(value); }
PyObject* ToPyObject(float value) { return PyLong_FromDouble(value); }
......@@ -442,6 +444,20 @@ PyObject* ToPyObject(const paddle::experimental::Tensor& value) {
return obj;
}
PyObject* ToPyObject(const paddle::experimental::Tensor& value,
ssize_t value_idx, PyObject* args, ssize_t arg_idx) {
// For inplace op, directly return the input PyObject of the inplace tensor.
// [Parameter]
// value: Useless parameter.
// value_idx: Useless parameter.
// args: Input PyObject.
// arg_idx: Index of inplace PyObject in input args. Used to find the input
// inplace PyObject.
PyObject* obj = PyTuple_GET_ITEM(args, arg_idx);
Py_INCREF(obj);
return obj;
}
PyObject* ToPyObject(const std::vector<bool>& value) {
PyObject* result = PyList_New((Py_ssize_t)value.size());
......
......@@ -56,6 +56,7 @@ framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj,
ssize_t arg_pos);
PyObject* ToPyObject(int value);
PyObject* ToPyObject(uint32_t value);
PyObject* ToPyObject(bool value);
PyObject* ToPyObject(int64_t value);
PyObject* ToPyObject(float value);
......@@ -63,6 +64,8 @@ PyObject* ToPyObject(double value);
PyObject* ToPyObject(const char* value);
PyObject* ToPyObject(const std::string& value);
PyObject* ToPyObject(const paddle::experimental::Tensor& value);
PyObject* ToPyObject(const paddle::experimental::Tensor& value,
ssize_t value_idx, PyObject* args, ssize_t arg_idx);
PyObject* ToPyObject(const std::vector<bool>& value);
PyObject* ToPyObject(const std::vector<int>& value);
PyObject* ToPyObject(const std::vector<int64_t>& value);
......@@ -84,6 +87,17 @@ struct TupleTensorResult {
TupleTensorResult<Tuple, N - 1>::Run(out, result);
PyTuple_SET_ITEM(result, N - 1, ToPyObject(std::get<N - 1>(out)));
}
static void Run(const Tuple& out, PyObject* result, ssize_t value_idx,
PyObject* args, ssize_t arg_idx) {
TupleTensorResult<Tuple, N - 1>::Run(out, result, value_idx, args, arg_idx);
if (N - 1 == value_idx) {
PyTuple_SET_ITEM(result, N - 1, ToPyObject(std::get<N - 1>(out),
value_idx, args, arg_idx));
} else {
PyTuple_SET_ITEM(result, N - 1, ToPyObject(std::get<N - 1>(out)));
}
}
};
template <typename Tuple>
......@@ -91,6 +105,16 @@ struct TupleTensorResult<Tuple, 1> {
static void Run(const Tuple& out, PyObject* result) {
PyTuple_SET_ITEM(result, 0, ToPyObject(std::get<0>(out)));
}
static void Run(const Tuple& out, PyObject* result, ssize_t value_idx,
PyObject* args, ssize_t arg_idx) {
if (value_idx == 0) {
PyTuple_SET_ITEM(result, 0,
ToPyObject(std::get<0>(out), value_idx, args, arg_idx));
} else {
PyTuple_SET_ITEM(result, 0, ToPyObject(std::get<0>(out)));
}
}
};
template <typename... Args>
......@@ -103,6 +127,26 @@ PyObject* ToPyObject(const std::tuple<Args...>& out) {
return result;
}
template <typename... Args>
PyObject* ToPyObject(const std::tuple<Args...>& out, ssize_t value_idx,
PyObject* args, ssize_t arg_idx) {
// For inplace op, directly return the input PyObject of the inplace tensor.
// [Parameter]
// out: Outputs tuple after executing op.
// value_idx: Index of inplace tensor in outputs tuple. Used to find the
// output inplace tensor.
// args: Input PyObject.
// arg_idx: Index of inplace PyObject in input args. Used to find the input
// inplace PyObject.
auto len = sizeof...(Args);
PyObject* result = PyTuple_New(len);
TupleTensorResult<decltype(out), sizeof...(Args)>::Run(out, result, value_idx,
args, arg_idx);
return result;
}
paddle::experimental::Scalar CastPyArg2Scalar(PyObject* obj,
const std::string& op_type,
ssize_t arg_pos);
......
......@@ -854,5 +854,30 @@ void InitOpsAttrTypeMap() {
}
}
ssize_t GetIdxFromCoreOpsInfoMap(
const std::unordered_map<std::string, std::vector<std::string>>&
core_ops_info_map,
const std::string& op_type, const std::string& name) {
// `core_ops_info_map` can be `core_ops_args_info` or `core_ops_returns_info`.
// `core_ops_args_info`: get index from core_ops_args_info[op_type] according
// to input name.
// `core_ops_returns_info`: get index from core_ops_returns_info[op_type]
// according to return name.
if (!core_ops_info_map.count(op_type)) {
PADDLE_THROW(platform::errors::Fatal(
"Op %s is not found in core_ops_*_info map.", op_type));
} else {
auto args_list = core_ops_info_map.at(op_type);
auto it = std::find(args_list.begin(), args_list.end(), name);
if (it == args_list.end()) {
PADDLE_THROW(platform::errors::Fatal("%s is not found in op %s's args.",
name, op_type));
} else {
return std::distance(args_list.begin(), it);
}
}
return -1;
}
} // namespace pybind
} // namespace paddle
......@@ -146,5 +146,10 @@ unsigned long GetUnsignedLongFromArgs( // NOLINT
void InitOpsAttrTypeMap();
ssize_t GetIdxFromCoreOpsInfoMap(
const std::unordered_map<std::string, std::vector<std::string>>&
core_ops_info_map,
const std::string& op_type, const std::string& name);
} // namespace pybind
} // namespace paddle
......@@ -481,7 +481,21 @@ class PADDLE_API Tensor final {
*/
void set_autograd_meta(std::shared_ptr<AbstractAutogradMeta> autograd_meta);
/* Part 9: Auto generated Tensor methods */
/* Part 9: Inplace methods */
/**
* @brief Increase inplace version
*/
void bump_inplace_version();
/**
* @brief Get current inplace version
*
* @return uint32_t
*/
uint32_t current_inplace_version();
/* Part 10: Auto generated Tensor methods */
private:
/**
......
......@@ -347,5 +347,36 @@ void Tensor::set_autograd_meta(
autograd_meta_ = std::move(autograd_meta);
}
void Tensor::bump_inplace_version() {
if (is_dense_tensor()) {
auto &inplace_version_counter =
std::dynamic_pointer_cast<phi::DenseTensor>(impl_)
->InplaceVersionCounter();
VLOG(3) << "yoki: before bump inplace version: "
<< inplace_version_counter.CurrentVersion();
inplace_version_counter.Bump();
VLOG(3) << "yoki: after bump inplace version: "
<< inplace_version_counter.CurrentVersion();
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"bump_inplace_version is only supported on DenseTensor now."));
}
}
uint32_t Tensor::current_inplace_version() {
if (is_dense_tensor()) {
auto &inplace_version_counter =
std::dynamic_pointer_cast<phi::DenseTensor>(impl_)
->InplaceVersionCounter();
VLOG(3) << "yoki: print version: "
<< inplace_version_counter.CurrentVersion();
return inplace_version_counter.CurrentVersion();
} else {
PADDLE_THROW(phi::errors::Unimplemented(
"current_inplace_version is only supported on DenseTensor now."));
}
return 0;
}
} // namespace experimental
} // namespace paddle
......@@ -960,6 +960,7 @@ set_tests_properties(test_bicubic_interp_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_deformable_conv_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_nearest_interp_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_profiler PROPERTIES TIMEOUT 120)
set_tests_properties(test_inplace_eager_fluid PROPERTIES TIMEOUT 120)
set_tests_properties(test_inplace_softmax_with_cross_entropy PROPERTIES TIMEOUT 120)
set_tests_properties(test_cross_entropy2_op PROPERTIES TIMEOUT 120)
set_tests_properties(test_fetch_unmerged PROPERTIES TIMEOUT 120)
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import unittest
import numpy as np
import paddle
import paddle.fluid.core as core
from paddle.fluid.framework import _test_eager_guard
class TestDygraphInplace(unittest.TestCase):
def setUp(self):
self.init_data()
self.set_np_compare_func()
def init_data(self):
self.input_var_numpy = np.random.uniform(-5, 5, [10, 20, 1])
self.dtype = "float32"
def set_np_compare_func(self):
self.np_compare = np.array_equal
def non_inplace_api_processing(self, var):
return paddle.squeeze(var)
def inplace_api_processing(self, var):
return paddle.squeeze_(var)
def test_inplace_api(self):
with _test_eager_guard():
var = paddle.to_tensor(self.input_var_numpy).astype(self.dtype)
inplace_var = self.inplace_api_processing(var)
self.assertTrue(id(var) == id(inplace_var))
inplace_var.exp_()
self.assertTrue(np.array_equal(var.numpy(), inplace_var.numpy()))
def test_forward_version(self):
with paddle.fluid.dygraph.guard():
with _test_eager_guard():
var = paddle.to_tensor(self.input_var_numpy).astype(self.dtype)
self.assertEqual(var.inplace_version, 0)
inplace_var = self.inplace_api_processing(var)
self.assertEqual(var.inplace_version, 1)
inplace_var.exp_()
self.assertEqual(var.inplace_version, 2)
inplace_var = self.inplace_api_processing(inplace_var)
self.assertEqual(var.inplace_version, 3)
def test_leaf_inplace_var_error(self):
with paddle.fluid.dygraph.guard():
with _test_eager_guard():
var = paddle.to_tensor(self.input_var_numpy).astype(self.dtype)
var.stop_gradient = False
def leaf_inplace_error():
self.inplace_api_processing(var)
self.assertRaises(ValueError, leaf_inplace_error)
def test_backward_error(self):
# It raises an error because the inplace operator will result
# in incorrect gradient computation.
with paddle.fluid.dygraph.guard():
with _test_eager_guard():
var_a = paddle.to_tensor(self.input_var_numpy).astype(
self.dtype)
var_a.stop_gradient = False
var_b = var_a**2
# Here, the gradient computation will use the value of var_b
var_c = var_b**2
self.inplace_api_processing(var_b)
loss = paddle.nn.functional.relu(var_c)
with self.assertRaisesRegexp(
RuntimeError,
"received current_inplace_version:{} != inplace_version_snapshot_:{}".
format(1, 0)):
loss.backward()
def test_backward_success_1(self):
# var_b is modified inplace before using it, the inplace operator doesn't result
# in incorrect gradient computation.
grad_var_a, grad_var_a_inplace = 0, 1
with paddle.fluid.dygraph.guard():
with _test_eager_guard():
var_a = paddle.to_tensor(self.input_var_numpy).astype(
self.dtype)
var_a.stop_gradient = False
var_b = var_a**2
var_c = self.inplace_api_processing(
var_b) # var_b is modified inplace before using it
# Here, the gradient computation will use the value of var_b
var_d = var_c**2
loss = var_d.sum()
loss.backward()
grad_var_a_inplace = var_a.grad.numpy()
with paddle.fluid.dygraph.guard():
with _test_eager_guard():
var_a = paddle.to_tensor(self.input_var_numpy).astype(
self.dtype)
var_a.stop_gradient = False
var_b = var_a**2
var_c = self.non_inplace_api_processing(var_b)
var_d = var_c**2
loss = var_d.sum()
loss.backward()
grad_var_a = var_a.grad.numpy()
self.assertTrue(self.np_compare(grad_var_a_inplace, grad_var_a))
def test_backward_success_2(self):
# Although var_b is modified inplace after using it, it does not used in gradient computation.
# The inplace operator doesn't result in incorrect gradient computation.
grad_var_a, grad_var_a_inplace = 0, 1
with paddle.fluid.dygraph.guard():
with _test_eager_guard():
var_a = paddle.to_tensor(self.input_var_numpy).astype(
self.dtype)
var_a.stop_gradient = False
var_b = var_a**2
var_c = self.inplace_api_processing(
var_b) # var_b is modified inplace before using it
var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b
loss = var_d.sum()
loss.backward()
grad_var_a_inplace = var_a.grad.numpy()
with paddle.fluid.dygraph.guard():
with _test_eager_guard():
var_a = paddle.to_tensor(self.input_var_numpy).astype(
self.dtype)
var_a.stop_gradient = False
var_b = var_a**2
var_c = self.non_inplace_api_processing(
var_b) # var_b is modified inplace before using it
var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b
loss = var_d.sum()
loss.backward()
grad_var_a = var_a.grad.numpy()
self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a))
class TestDygraphInplaceUnsqueeze(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return paddle.unsqueeze(var, -1)
def inplace_api_processing(self, var):
return paddle.unsqueeze_(var, -1)
class TestDygraphInplaceReshape(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return paddle.reshape(var, [-1])
def inplace_api_processing(self, var):
return paddle.reshape_(var, [-1])
class TestDygraphInplaceFlatten(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return var.flatten()
def inplace_api_processing(self, var):
return var.flatten_()
class TestDygraphInplaceScatter(TestDygraphInplace):
def init_data(self):
self.input_var_numpy = np.array([[1, 1], [2, 2], [3, 3]])
self.dtype = "float32"
def non_inplace_api_processing(self, var):
index = paddle.to_tensor([2, 1, 0, 1], dtype='int64')
updates = paddle.to_tensor(
[[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32')
return paddle.scatter(var, index, updates, overwrite=False)
def inplace_api_processing(self, var):
index = paddle.to_tensor([2, 1, 0, 1], dtype='int64')
updates = paddle.to_tensor(
[[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32')
return paddle.scatter_(var, index, updates, overwrite=False)
class TestDygraphInplaceElu(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return paddle.nn.functional.elu(var)
def inplace_api_processing(self, var):
return paddle.nn.functional.elu_(var)
class TestDygraphInplaceRelu(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return paddle.nn.functional.relu(var)
def inplace_api_processing(self, var):
return paddle.nn.functional.relu_(var)
class TestDygraphInplaceSoftmax(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return paddle.nn.functional.softmax(var)
def inplace_api_processing(self, var):
return paddle.nn.functional.softmax_(var)
class TestDygraphInplaceTanh(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return paddle.tanh(var)
def inplace_api_processing(self, var):
return paddle.tanh_(var)
class TestDygraphInplaceCeil(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return var.ceil()
def inplace_api_processing(self, var):
return var.ceil_()
class TestDygraphInplaceFloor(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return var.floor()
def inplace_api_processing(self, var):
return var.floor_()
class TestDygraphInplaceExp(TestDygraphInplace):
def set_np_compare_func(self):
self.np_compare = np.allclose
def non_inplace_api_processing(self, var):
return var.exp()
def inplace_api_processing(self, var):
return var.exp_()
class TestDygraphInplaceReciprocal(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return var.reciprocal()
def inplace_api_processing(self, var):
return var.reciprocal_()
class TestDygraphInplaceRound(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return var.round()
def inplace_api_processing(self, var):
return var.round_()
class TestDygraphInplaceSqrt(TestDygraphInplace):
def init_data(self):
self.input_var_numpy = np.random.uniform(0, 5, [10, 20, 1])
self.dtype = "float32"
def non_inplace_api_processing(self, var):
return var.sqrt()
def inplace_api_processing(self, var):
return var.sqrt_()
class TestDygraphInplaceRsqrt(TestDygraphInplaceSqrt):
def non_inplace_api_processing(self, var):
return var.rsqrt()
def inplace_api_processing(self, var):
return var.rsqrt_()
class TestDygraphInplaceClip(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return var.clip(0.6, 1.5)
def inplace_api_processing(self, var):
return var.clip_(0.6, 1.5)
class TestDygraphInplaceScale(TestDygraphInplace):
def non_inplace_api_processing(self, var):
return var.scale(scale=2.0, bias=3.0)
def inplace_api_processing(self, var):
return var.scale_(scale=2.0, bias=3.0)
class TestDygraphInplaceAdd(TestDygraphInplace):
def init_data(self):
self.input_var_numpy = np.random.rand(2, 3, 4)
self.dtype = "float32"
self.input_var_numpy_2 = np.random.rand(2, 3, 4).astype(self.dtype)
def non_inplace_api_processing(self, var):
input_var_2 = paddle.to_tensor(self.input_var_numpy_2)
return var.add(input_var_2)
def inplace_api_processing(self, var):
input_var_2 = paddle.to_tensor(self.input_var_numpy_2)
return var.add_(input_var_2)
class TestDygraphInplaceSubtract(TestDygraphInplaceAdd):
def non_inplace_api_processing(self, var):
input_var_2 = paddle.to_tensor(self.input_var_numpy_2)
return var.subtract(input_var_2)
def inplace_api_processing(self, var):
input_var_2 = paddle.to_tensor(self.input_var_numpy_2)
return var.subtract_(input_var_2)
class TestLossIsInplaceVar(unittest.TestCase):
def test_loss_is_inplace_var(self):
with paddle.fluid.dygraph.guard():
with _test_eager_guard():
var_a = paddle.ones((2, 2))
var_a.stop_gradient = False
var_b = var_a * 2
loss = var_b.tanh_()
loss.backward()
inplace_grad_var_a = var_a.grad.numpy()
with paddle.fluid.dygraph.guard():
with _test_eager_guard():
var_a = paddle.ones((2, 2))
var_a.stop_gradient = False
var_b = var_a * 2
loss = var_b.tanh()
loss.backward()
grad_var_a = var_a.grad.numpy()
self.assertTrue(np.array_equal(inplace_grad_var_a, grad_var_a))
class TestContinuouslyInplace(unittest.TestCase):
def test_continuously_inplace(self):
with _test_eager_guard():
a = paddle.rand([2, 3])
a.stop_gradient = False
b = a * 2
b.reshape_([-1])
b.reshape_([2, 3])
b.reshape_([-1])
b.backward()
if __name__ == '__main__':
unittest.main()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册