From 8e612903d342f4f717ff195bac3ebc77a2672a10 Mon Sep 17 00:00:00 2001 From: pangyoki Date: Sat, 19 Mar 2022 00:21:38 +0800 Subject: [PATCH] support inplace in dygraph eager_fluid state (#40400) * [Eager] Support eager grad interface, draft version * Support eager grad interface with allow_unused and multi startup_op * Fix code format * Fix allow_unused case, return PyNone if tensor not initialize * Support output's stop_gradient related to create_graph * Support grad exception case in eager mode, fix coverage CI * Update ToPyObject, return PyNone if not initialize * AccumulationNode add FLAGS_retain_grad_for_all_tensor * Fix ci issue * Fix CI issue * fix, use core.eager.Tensor * Add func SetBufferSlotRankZeros for GradTensorHolder * Support retain_graph by using ClearTensorWrappers * Support retain_graph by using ClearTensorWrappers * Update retain_graph and no_grad_vars related test case * Update code gen logic for ClearTensorWrappers * Fix by override statement * fix override func args * Support retain_graph, update unit tests * Updated ClearTensorWrappers logic * fix grad python interface * Use deep copy and update unit tests * Polish code * Polish code * Fix CI issue, Deep copy only use when user set grad_tensors * Fix CI, use Backward instead RunBackward * Fix CI, Declare kernel explicitly in test file * Polish, remove vector of TensorWrapper * Refactor the logic of grad/backward, polish codes * Update code after merge upstream develop * Polish after merge upstream develop * Update to adapt new GradNodeBase superclass * Fix error introduced during conflict resolution * support inplace strategy in eager_fluid state * solve conflict * nothing * Update purify potential_startup_nodes logic * Fix errors * Polish code * Remove useless args for ToPyObject * Remove useless TensorWrappersSet * fix record conflict * Fix code-format, re-install pre-commit * fix tensor_wrapper bug * Fix pre-process logic for potential_startup_ops * Update unit tests, use eager mode * Fix conflicts * fix unittest timeout * little change Co-authored-by: Weilong Wu --- paddle/fluid/eager/api/utils/tensor_utils.cc | 3 +- .../auto_code_generator/eager_generator.cc | 396 ++++++++++++----- paddle/fluid/eager/tensor_wrapper.h | 48 +++ paddle/fluid/eager/utils.cc | 21 + paddle/fluid/eager/utils.h | 17 + paddle/fluid/pybind/eager_method.cc | 11 + .../pybind/eager_op_function_generator.cc | 73 +++- paddle/fluid/pybind/eager_utils.cc | 16 + paddle/fluid/pybind/eager_utils.h | 44 ++ paddle/fluid/pybind/op_function_common.cc | 25 ++ paddle/fluid/pybind/op_function_common.h | 5 + paddle/phi/api/include/tensor.h | 16 +- paddle/phi/api/lib/tensor.cc | 31 ++ .../fluid/tests/unittests/CMakeLists.txt | 1 + .../unittests/test_inplace_eager_fluid.py | 397 ++++++++++++++++++ 15 files changed, 991 insertions(+), 113 deletions(-) create mode 100644 python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py diff --git a/paddle/fluid/eager/api/utils/tensor_utils.cc b/paddle/fluid/eager/api/utils/tensor_utils.cc index 77c39d1b0a3..b485beca57a 100644 --- a/paddle/fluid/eager/api/utils/tensor_utils.cc +++ b/paddle/fluid/eager/api/utils/tensor_utils.cc @@ -30,7 +30,8 @@ namespace egr_utils_api { bool IsLeafTensor(const paddle::experimental::Tensor& target) { std::shared_ptr grad_node = EagerUtils::grad_node(target); - if (std::dynamic_pointer_cast(grad_node)) { + if (!grad_node || + std::dynamic_pointer_cast(grad_node)) { return true; } diff --git a/paddle/fluid/eager/auto_code_generator/eager_generator.cc b/paddle/fluid/eager/auto_code_generator/eager_generator.cc index d15c413339a..b8d59e8dd8b 100644 --- a/paddle/fluid/eager/auto_code_generator/eager_generator.cc +++ b/paddle/fluid/eager/auto_code_generator/eager_generator.cc @@ -979,7 +979,9 @@ static bool CollectGradInformationFromOpInfo( /* --------------------------------------------------- */ static std::string GenerateGradNodeCreationContent( const ForwardGenerationInfo& fwd_info, - const GradNodeGenerationInfo& bwd_info) { + const GradNodeGenerationInfo& bwd_info, + const std::string& trace_op_body_str, + std::map inplace_map = {}) { VLOG(6) << "Generating GradNode Creation codes"; const std::string& op_type = fwd_info.GetOpType(); @@ -998,7 +1000,8 @@ static std::string GenerateGradNodeCreationContent( // If single output slotname and not duplicable, // then generate: "egr::AutogradMeta* p_autograd_out = // egr::EagerUtils::autograd_meta("op_proto->outputs()[0].name()")" - std::string get_autograd_meta_str = " // Prepare Autograd Meta \n"; + std::string get_input_autograd_meta_str = " // Prepare Autograd Meta \n"; + std::string get_output_autograd_meta_str = ""; // If single output slotname and not duplicable, // then generate: "egr::AutogradMeta* p_autograd_out = // egr::EagerUtils::autograd_meta("op_proto.outputs()[0].name()")" @@ -1006,22 +1009,39 @@ static std::string GenerateGradNodeCreationContent( const std::string& output_name = output.name(); const std::string& output_autograd_name = "p_autograd_" + output_name; + // output autograd_meta should be got after running TraceOP. if (output.duplicable()) { const char* GET_MULTI_AUTOGRAD_META_TEMPLATE = - " std::vector %s = " + " std::vector %s = " "egr::EagerUtils::autograd_meta(&%s);\n"; - get_autograd_meta_str += paddle::string::Sprintf( + get_output_autograd_meta_str += paddle::string::Sprintf( GET_MULTI_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name); } else { - const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE = - " egr::AutogradMeta* %s = " - "egr::EagerUtils::autograd_meta(&%s);\n"; - get_autograd_meta_str += paddle::string::Sprintf( - GET_SINGLE_AUTOGRAD_META_TEMPLATE, output_autograd_name, output_name); + // In inplace op, the case where output is duplicable is not considered. + // Replace output directly with input in inplace op. + if (!inplace_map.empty() && inplace_map.count(output_name)) { + auto inplace_input_name = inplace_map[output_name]; + const std::string& inplace_input_autograd_name = + "p_autograd_" + inplace_input_name; + const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE = + " %s = egr::EagerUtils::autograd_meta(&%s);\n"; + get_output_autograd_meta_str += paddle::string::Sprintf( + GET_SINGLE_AUTOGRAD_META_TEMPLATE, inplace_input_autograd_name, + inplace_input_name); + } else { + const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE = + " egr::AutogradMeta* %s = " + "egr::EagerUtils::autograd_meta(&%s);\n"; + get_output_autograd_meta_str += + paddle::string::Sprintf(GET_SINGLE_AUTOGRAD_META_TEMPLATE, + output_autograd_name, output_name); + } } } VLOG(6) << "Generated outputs autograd_meta"; + // input autograd_meta should be got before running TraceOP (for checking + // inplace). for (const proto::OpProto::Var& input : in_vars) { const std::string& input_name = input.name(); const std::string& input_autograd_name = "p_autograd_" + input_name; @@ -1030,28 +1050,46 @@ static std::string GenerateGradNodeCreationContent( const char* GET_MULTI_AUTOGRAD_META_TEMPLATE = " std::vector %s = " "egr::EagerUtils::nullable_autograd_meta(%s);\n"; - get_autograd_meta_str += paddle::string::Sprintf( + get_input_autograd_meta_str += paddle::string::Sprintf( GET_MULTI_AUTOGRAD_META_TEMPLATE, input_autograd_name, input_name); } else if (input.dispensable()) { const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE = " egr::AutogradMeta* %s = " "egr::EagerUtils::nullable_autograd_meta(%s);\n"; - get_autograd_meta_str += paddle::string::Sprintf( + get_input_autograd_meta_str += paddle::string::Sprintf( GET_SINGLE_AUTOGRAD_META_TEMPLATE, input_autograd_name, input_name); } else { const char* GET_SINGLE_AUTOGRAD_META_TEMPLATE = " egr::AutogradMeta* %s = " "egr::EagerUtils::nullable_autograd_meta(%s);\n"; - get_autograd_meta_str += paddle::string::Sprintf( + get_input_autograd_meta_str += paddle::string::Sprintf( GET_SINGLE_AUTOGRAD_META_TEMPLATE, input_autograd_name, input_name); } } VLOG(6) << "Generated inputs autograd_meta"; + // check inplace input to avoid inplace operations on leaf nodes with + // stop_gradient=False. + std::string check_inplace_str = ""; + if (!inplace_map.empty()) { + const char* CHECKING_INPLACE_TEMPLATE = + " // Check Inplace\n" + " egr::EagerUtils::CheckInplace(%s, p_autograd_%s, " + "require_any_grad);\n"; + for (auto& inplace_pair : inplace_map) { + std::string inplace_name = inplace_pair.second; + check_inplace_str += paddle::string::Sprintf(CHECKING_INPLACE_TEMPLATE, + inplace_name, inplace_name); + } + VLOG(6) << "Check Inplace Input"; + } + std::string prepare_autograd_meta_str = ""; - prepare_autograd_meta_str += get_autograd_meta_str; + // only generate input autograd_meta in temporary. + // output autograd_meta will be generated after running TraceOP. + prepare_autograd_meta_str += get_input_autograd_meta_str; prepare_autograd_meta_str += "\n"; // [GradOpNode] GetTraceBackward @@ -1066,7 +1104,7 @@ static std::string GenerateGradNodeCreationContent( size_t bwd_in_slot_num = out_vars.size(); size_t bwd_out_slot_num = in_vars.size(); const char* GRAD_OP_NODE_TEMPLATE = - " auto grad_node = std::make_shared(%d, %d);\n"; + " auto grad_node = std::make_shared(%d, %d);\n"; grad_node_creation_str += " // Create GradOpNode\n"; grad_node_creation_str += paddle::string::Sprintf( GRAD_OP_NODE_TEMPLATE, op_type, bwd_in_slot_num, bwd_out_slot_num); @@ -1075,14 +1113,14 @@ static std::string GenerateGradNodeCreationContent( VLOG(6) << "Generated GradOpNode construction"; // [GradOpNode] Set Attrs - grad_node_creation_str += " // Set Attributes\n"; - grad_node_creation_str += " grad_node->SetAttrMap(std::move(attrs));\n"; + grad_node_creation_str += " // Set Attributes\n"; + grad_node_creation_str += " grad_node->SetAttrMap(std::move(attrs));\n"; grad_node_creation_str += - " grad_node->SetDefaultAttrMap(std::move(default_attrs));\n"; + " grad_node->SetDefaultAttrMap(std::move(default_attrs));\n"; grad_node_creation_str += "\n"; // [GradOpNode] Set TensorWrappers - grad_node_creation_str += " // Set Tensor Wrappers\n"; + grad_node_creation_str += " // Set Tensor Wrappers\n"; for (const auto& iter : op_base_infos) { const std::map& grad_ins_fwd_slotname_map = iter.GetGradInsFwdSlotnameMap(); @@ -1094,10 +1132,18 @@ static std::string GenerateGradNodeCreationContent( full_reserved = "true"; } const char* SET_TENSOR_WRAPPER_TEMPLATE = - " grad_node->SetTensorWrapper%s(%s, %s);\n"; - grad_node_creation_str += paddle::string::Sprintf( - SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name, tensor_wrapper_name, - full_reserved); + " grad_node->SetTensorWrapper%s(%s, %s);\n"; + // Replace output directly with input in inplace op. + if (!inplace_map.empty() && inplace_map.count(tensor_wrapper_name)) { + auto inplace_input_name = inplace_map[tensor_wrapper_name]; + grad_node_creation_str += paddle::string::Sprintf( + SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name, + inplace_input_name, full_reserved); + } else { + grad_node_creation_str += paddle::string::Sprintf( + SET_TENSOR_WRAPPER_TEMPLATE, tensor_wrapper_name, + tensor_wrapper_name, full_reserved); + } } } grad_node_creation_str += "\n"; @@ -1115,12 +1161,12 @@ static std::string GenerateGradNodeCreationContent( size_t input_position = fwd_inputs_name_pos_map.at(input_name); const char* SET_GRAD_OUT_META_TEMPLATE = - " grad_node->SetGradOutMeta(%s, %d);\n"; + " grad_node->SetGradOutMeta(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( SET_GRAD_OUT_META_TEMPLATE, input_name, input_position); const char* ADD_EDGES_TEMPLATE = - " if(%s) grad_node->AddEdges(%s, %d);\n"; + " if(%s) grad_node->AddEdges(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf(ADD_EDGES_TEMPLATE, input_autograd_name, input_autograd_name, input_position); @@ -1129,11 +1175,11 @@ static std::string GenerateGradNodeCreationContent( size_t input_position = fwd_inputs_name_pos_map.at(input_name); const char* SET_GRAD_OUT_META_TEMPLATE = - " grad_node->SetGradOutMeta(%s, %d);\n"; + " grad_node->SetGradOutMeta(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( SET_GRAD_OUT_META_TEMPLATE, input_name, input_position); - const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n"; + const char* ADD_EDGES_TEMPLATE = " grad_node->AddEdges(&%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( ADD_EDGES_TEMPLATE, input_autograd_name, input_position); } @@ -1145,73 +1191,125 @@ static std::string GenerateGradNodeCreationContent( std::string pass_stop_gradient_args = "false"; for (const proto::OpProto::Var& output : out_vars) { const std::string& output_name = output.name(); - const std::string& output_autograd_name = "p_autograd_" + output_name; - size_t output_position = fwd_outputs_name_pos_map.at(output_name); - - // Intermediate Tensor does not require SetHistory, nor RetainGrad - - if (output.duplicable()) { - pass_stop_gradient_args += ", &" + output_autograd_name; + // Replace output directly with input in inplace op. + if (!inplace_map.empty() && inplace_map.count(output_name)) { + auto inplace_input_name = inplace_map[output_name]; + const std::string& inplace_input_autograd_name = + "p_autograd_" + inplace_input_name; + size_t output_position = fwd_outputs_name_pos_map.at(output_name); + + // Intermediate Tensor does not require SetHistory, nor RetainGrad + pass_stop_gradient_args += ", " + inplace_input_autograd_name; const char* SET_OUT_RANK_TEMPLATE = - " egr::EagerUtils::SetOutRankWithSlot(&%s, %d);\n"; + " egr::EagerUtils::SetOutRankWithSlot(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( - SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position); + SET_OUT_RANK_TEMPLATE, inplace_input_autograd_name, output_position); // Intermediate Tensor does not require SetHistory if (!output.intermediate()) { const char* SET_HISTORY_TEMPLATE = - " egr::EagerUtils::SetHistory(&%s, grad_node);\n"; - grad_node_creation_str += - paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); + " egr::EagerUtils::SetHistory(%s, grad_node);\n"; + grad_node_creation_str += paddle::string::Sprintf( + SET_HISTORY_TEMPLATE, inplace_input_autograd_name); } const char* SET_GRAD_IN_META_TEMPLATE = - " grad_node->SetGradInMeta(%s, %d);\n"; + " grad_node->SetGradInMeta(%s, %d);\n"; grad_node_creation_str += paddle::string::Sprintf( - SET_GRAD_IN_META_TEMPLATE, output_name, output_position); + SET_GRAD_IN_META_TEMPLATE, inplace_input_name, output_position); + // Intermediate Tensor does not require CheckAndRetainGrad + if (!output.intermediate()) { + VLOG(6) << "Generated Call RetainGradForTensor"; + const char* RETAIN_GRAD_TEMPLATE = + " egr::EagerUtils::CheckAndRetainGrad(%s);\n"; + grad_node_creation_str += + paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, inplace_input_name); + } } else { - pass_stop_gradient_args += ", " + output_autograd_name; - const char* SET_OUT_RANK_TEMPLATE = - " egr::EagerUtils::SetOutRankWithSlot(%s, %d);\n"; - grad_node_creation_str += paddle::string::Sprintf( - SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position); + const std::string& output_autograd_name = "p_autograd_" + output_name; + size_t output_position = fwd_outputs_name_pos_map.at(output_name); - // Intermediate Tensor does not require SetHistory + // Intermediate Tensor does not require SetHistory, nor RetainGrad + + if (output.duplicable()) { + pass_stop_gradient_args += ", &" + output_autograd_name; + const char* SET_OUT_RANK_TEMPLATE = + " egr::EagerUtils::SetOutRankWithSlot(&%s, %d);\n"; + grad_node_creation_str += paddle::string::Sprintf( + SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position); + + // Intermediate Tensor does not require SetHistory + if (!output.intermediate()) { + const char* SET_HISTORY_TEMPLATE = + " egr::EagerUtils::SetHistory(&%s, grad_node);\n"; + grad_node_creation_str += paddle::string::Sprintf( + SET_HISTORY_TEMPLATE, output_autograd_name); + } + const char* SET_GRAD_IN_META_TEMPLATE = + " grad_node->SetGradInMeta(%s, %d);\n"; + grad_node_creation_str += paddle::string::Sprintf( + SET_GRAD_IN_META_TEMPLATE, output_name, output_position); + + } else { + pass_stop_gradient_args += ", " + output_autograd_name; + const char* SET_OUT_RANK_TEMPLATE = + " egr::EagerUtils::SetOutRankWithSlot(%s, %d);\n"; + grad_node_creation_str += paddle::string::Sprintf( + SET_OUT_RANK_TEMPLATE, output_autograd_name, output_position); + + // Intermediate Tensor does not require SetHistory + if (!output.intermediate()) { + const char* SET_HISTORY_TEMPLATE = + " egr::EagerUtils::SetHistory(%s, grad_node);\n"; + grad_node_creation_str += paddle::string::Sprintf( + SET_HISTORY_TEMPLATE, output_autograd_name); + } + const char* SET_GRAD_IN_META_TEMPLATE = + " grad_node->SetGradInMeta(%s, %d);\n"; + grad_node_creation_str += paddle::string::Sprintf( + SET_GRAD_IN_META_TEMPLATE, output_name, output_position); + } + + // Intermediate Tensor does not require CheckAndRetainGrad if (!output.intermediate()) { - const char* SET_HISTORY_TEMPLATE = - " egr::EagerUtils::SetHistory(%s, grad_node);\n"; + VLOG(6) << "Generated Call RetainGradForTensor"; + const char* RETAIN_GRAD_TEMPLATE = + " egr::EagerUtils::CheckAndRetainGrad(%s);\n"; grad_node_creation_str += - paddle::string::Sprintf(SET_HISTORY_TEMPLATE, output_autograd_name); + paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, output_name); } - const char* SET_GRAD_IN_META_TEMPLATE = - " grad_node->SetGradInMeta(%s, %d);\n"; - grad_node_creation_str += paddle::string::Sprintf( - SET_GRAD_IN_META_TEMPLATE, output_name, output_position); - } - - // Intermediate Tensor does not require CheckAndRetainGrad - if (!output.intermediate()) { - VLOG(6) << "Generated Call RetainGradForTensor"; - const char* RETAIN_GRAD_TEMPLATE = - " egr::EagerUtils::CheckAndRetainGrad(%s);\n"; - grad_node_creation_str += - paddle::string::Sprintf(RETAIN_GRAD_TEMPLATE, output_name); } } VLOG(6) << "Generated SetGradIn/OutMeta"; // [Generation] GradNode Creation + // After getting require_any_grad, firstly use CheckInplace method for inplace + // op. + // Then execute TraceOp and generate output autograd_meta. + // Finally, Construct GradNode. (Replace output directly with input in inplace + // op.) + // Add event record + std::string event_name = op_type + " node_creation"; const char* GRAD_NODE_CREATION_TEMPLATE = - " %s" + "%s" " bool require_any_grad = egr::EagerUtils::ComputeRequireGrad(%s);\n" - " if(require_any_grad) {\n" - " VLOG(6) << \" Construct Grad for %s \"; \n" - " egr::EagerUtils::PassStopGradient(%s);\n" - "%s\n }"; + "%s\n" + "%s" + " {\n" + " paddle::platform::RecordEvent node_creation_record_event(\"%s\", " + "paddle::platform::TracerEventType::Operator, 1);\n" + "%s" + " if(require_any_grad) {\n" + " VLOG(6) << \" Construct Grad for %s \"; \n" + " egr::EagerUtils::PassStopGradient(%s);\n" + " %s\n" + " }\n" + " }"; std::string grad_node_creation_body_str = paddle::string::Sprintf( GRAD_NODE_CREATION_TEMPLATE, prepare_autograd_meta_str, - compute_require_grad_args, op_type, pass_stop_gradient_args, - grad_node_creation_str); + compute_require_grad_args, check_inplace_str, trace_op_body_str, + event_name, get_output_autograd_meta_str, op_type, + pass_stop_gradient_args, grad_node_creation_str); return grad_node_creation_body_str; } @@ -1221,7 +1319,8 @@ static std::string GenerateGradNodeCreationContent( /* -------------------------------- */ static std::pair GenerateForwardFunctionContents( const ForwardGenerationInfo& fwd_info, - const GradNodeGenerationInfo& bwd_info) { + const GradNodeGenerationInfo& bwd_info, + std::map inplace_map = {}) { /* --- Process Forward Info ---*/ const std::string& op_type = fwd_info.GetOpType(); const std::unordered_map& fwd_inputs_name_pos_map = @@ -1301,8 +1400,21 @@ static std::pair GenerateForwardFunctionContents( core_ops_args_type_info[op_type][input_position] = "list"; } else { - const char* FWD_INS_ARG_TEMPLATE = - "const paddle::experimental::Tensor& %s"; + // inplace tensor can't be const + const char* FWD_INS_ARG_TEMPLATE; + bool flag_find_input_name = false; + if (!inplace_map.empty()) { + for (auto& inplace_pair : inplace_map) { + if (inplace_pair.second == input_name) { + flag_find_input_name = true; + FWD_INS_ARG_TEMPLATE = "paddle::experimental::Tensor& %s"; + break; + } + } + } + if (!flag_find_input_name) { + FWD_INS_ARG_TEMPLATE = "const paddle::experimental::Tensor& %s"; + } input_args_str_list[input_position] = paddle::string::Sprintf(FWD_INS_ARG_TEMPLATE, input_name); @@ -1362,6 +1474,7 @@ static std::pair GenerateForwardFunctionContents( // [Generation] Get Outs Map std::string outs_contents_str = ""; + std::string inplace_mapping_str = ""; for (const proto::OpProto::Var& output : out_vars) { const std::string& output_name = output.name(); std::string outnum = "1"; @@ -1404,6 +1517,22 @@ static std::pair GenerateForwardFunctionContents( } core_ops_args_info[op_type].push_back(output_var_name); + } else if (!inplace_map.empty() && inplace_map.count(output_name)) { + // In inplace op, replace the output with the input directly. + PADDLE_ENFORCE_NE( + inplace_map[output_name], "", + paddle::platform::errors::InvalidArgument( + "Inplace op %s has no input corresponding to output %s.", op_type, + output_name)); + const char* FWD_OUTS_CONTENT_TEMPLATE = "{ \"%s\", ins[\"%s\"] },"; + auto inplace_input_name = inplace_map[output_name]; + outs_contents_str += paddle::string::Sprintf( + FWD_OUTS_CONTENT_TEMPLATE, output_name, inplace_input_name); + + // inplace_map used in TraceOp. + const char* INPLACE_MAPPING_TEMPLATE = R"({"%s", "%s"},)"; + inplace_mapping_str += paddle::string::Sprintf( + INPLACE_MAPPING_TEMPLATE, inplace_input_name, output_name); } else { if (output.duplicable()) { outnum = output_name + "Num"; @@ -1430,6 +1559,8 @@ static std::pair GenerateForwardFunctionContents( } if (outs_contents_str.size() > 0) outs_contents_str.pop_back(); // Remove trailing "," + if (inplace_mapping_str.size() > 0) + inplace_mapping_str.pop_back(); // Remove trailing "," const char* FWD_OUTS_MAP_TEMPLATE = " std::map GenerateForwardFunctionContents( dygraph_function_args_str += ", const paddle::framework::AttributeMap& attr_map"; + /* --------- Generate TraceOp ----- */ + // TraceOp should be run after compute require_any_grad. (for checking + // inplace) + // `trace_op_body_str` will be passed as a parameter to + // `GenerateGradNodeCreationContent`. + std::string trace_op_body_str = ""; // [Generation] Get TraceOp const char* FWD_TRACE_OP_TEMPLATE = " paddle::framework::AttributeMap attrs = attr_map;\n" @@ -1470,11 +1607,12 @@ static std::pair GenerateForwardFunctionContents( " egr::Controller::Instance().GetCurrentTracer()->TraceOp(\"%s\", ins, " "outs, attrs, \n" " egr::Controller::Instance().GetExpectedPlace(),\n" - " &default_attrs, true, {});\n"; - std::string trace_op_str = - paddle::string::Sprintf(FWD_TRACE_OP_TEMPLATE, op_type); - generated_function_body += trace_op_str; - generated_function_body += "\n"; + " &default_attrs, true, {%s});\n"; + std::string trace_op_str = paddle::string::Sprintf( + FWD_TRACE_OP_TEMPLATE, op_type, inplace_mapping_str); + + trace_op_body_str += trace_op_str; + trace_op_body_str += "\n"; VLOG(6) << "Generated AttrMap & TraceOp"; @@ -1539,48 +1677,64 @@ static std::pair GenerateForwardFunctionContents( output_varname, output_var_args_name); } } else { - const char* FWD_OUT_TENSOR_TEMPLATE = - " paddle::experimental::Tensor %s;\n" - " egr::EagerUtils::GetOutput(outs[\"%s\"][0], &%s);\n"; - out_tensor_str = - paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE, output_varname, - output_name, output_varname); + if (!inplace_map.empty() && inplace_map.count(output_name)) { + // Modify meta info of inplace tensor. + // Bump inplace version of inplace tensor. + auto inplace_input_name = inplace_map[output_name]; + const char* FWD_OUT_TENSOR_TEMPLATE = + " egr::EagerUtils::ModifyInplaceInput(outs[\"%s\"][0], &%s);\n" + " %s.bump_inplace_version();\n" + " VLOG(3) << \"Tensor(\" << %s.name() << \") uses Inplace " + "Strategy.\";\n"; + out_tensor_str = paddle::string::Sprintf( + FWD_OUT_TENSOR_TEMPLATE, output_name, inplace_input_name, + inplace_input_name, inplace_input_name); + } else { + const char* FWD_OUT_TENSOR_TEMPLATE = + " paddle::experimental::Tensor %s;\n" + " egr::EagerUtils::GetOutput(outs[\"%s\"][0], &%s);\n"; + out_tensor_str = + paddle::string::Sprintf(FWD_OUT_TENSOR_TEMPLATE, output_varname, + output_name, output_varname); + } } return_types[return_position] = "paddle::experimental::Tensor"; } - return_contents[return_position] = output_varname; - generated_function_body += out_tensor_str; + if (!inplace_map.empty() && inplace_map.count(output_name)) { + // Replace output directly with input in inplace op. + return_contents[return_position] = inplace_map[output_name]; + } else { + return_contents[return_position] = output_varname; + } + trace_op_body_str += out_tensor_str; } - generated_function_body += "\n"; + trace_op_body_str += "\n"; VLOG(6) << "Converted Output VarBase to EagerVariable(s)"; + /* ------ END Generate TraceOp ----- */ // [Generation] Handle core_ops_returns_info - core_ops_returns_info[op_type] = return_contents; + // avoid inplace op changing core_ops_returns_info + if (core_ops_returns_info.empty() || !core_ops_returns_info.count(op_type)) { + core_ops_returns_info[op_type] = return_contents; + } // [Generation] ComputeRequireGrad -> GradNodeCreation if (!bwd_info.GenerateForwardOnly()) { - std::string grad_node_creation_body_str = - GenerateGradNodeCreationContent(fwd_info, bwd_info); - - // Add event record - std::string event_name = op_type + " node_creation"; - const char* NODE_CREATION_TEMPLATE = - "{\n" - " paddle::platform::RecordEvent node_creation_record_event(\"%s\", " - "paddle::platform::TracerEventType::Operator, 1);\n" - " %s\n" - "}"; - - grad_node_creation_body_str = paddle::string::Sprintf( - NODE_CREATION_TEMPLATE, event_name, grad_node_creation_body_str); + // If GradNode needs to be generated, pass `trace_op_body_str` + // into `GenerateGradNodeCreationContent`. + std::string grad_node_creation_body_str = GenerateGradNodeCreationContent( + fwd_info, bwd_info, trace_op_body_str, inplace_map); generated_function_body += grad_node_creation_body_str; generated_function_body += "\n"; // [Generation] Call RetainGradForTensor VLOG(6) << "Generated GradNode Creation codes"; + } else { + // If GradNode doesn't need to be generated, generate TraceOP directly. + generated_function_body += trace_op_body_str; } // [Generation] Handle return: Tuple/Vector/Tensor @@ -1627,7 +1781,13 @@ static std::pair GenerateForwardFunctionContents( VLOG(6) << "Generated return codes"; // [Generation] Get Full Function - std::string function_name = op_type + "_dygraph_function"; + std::string function_name; + if (inplace_map.empty()) { + function_name = op_type + "_dygraph_function"; + } else { + // change function_name for inplace op. + function_name = op_type + "__dygraph_function"; + } if (dygraph_function_args_str.size() > 0) { auto iter = dygraph_function_args_str.begin(); @@ -1635,15 +1795,15 @@ static std::pair GenerateForwardFunctionContents( } const char* DYGRAPH_FUNCTION_EVENT_RECORD_FUNCTION_TEMPLATE = - "paddle::platform::RecordEvent dygraph_entrance_record_event(\"%s\", " + " paddle::platform::RecordEvent dygraph_entrance_record_event(\"%s\", " "paddle::platform::TracerEventType::Operator, 1);"; std::string event_name = op_type + " dygraph"; std::string fwd_record_event_str = paddle::string::Sprintf( DYGRAPH_FUNCTION_EVENT_RECORD_FUNCTION_TEMPLATE, event_name); const char* FWD_FUNCTION_TEMPLATE = "%s %s(%s) {\n\n" - " %s\n" - " %s\n" + "%s\n" + "%s\n" "}\n\n"; std::string fwd_function_str = paddle::string::Sprintf( FWD_FUNCTION_TEMPLATE, function_proto_return_type_str, function_name, @@ -2426,7 +2586,7 @@ static void DygraphCodeGeneration(const std::string& output_dir) { /* --------------------------- */ VLOG(6) << "-------- GenerateForwardFunctionContents -------"; std::pair body_and_declaration = - GenerateForwardFunctionContents(fwd_info, bwd_info); + GenerateForwardFunctionContents(fwd_info, bwd_info, {}); fwd_function_str += body_and_declaration.first + "\n"; @@ -2434,6 +2594,30 @@ static void DygraphCodeGeneration(const std::string& output_dir) { std::string fwd_function_declare_str = body_and_declaration.second; dygraph_forward_api_str += fwd_function_declare_str; + auto& infer_inplace = + paddle::framework::OpInfoMap::Instance().Get(op_type).infer_inplace_; + std::map inplace_map; + // Inplace Function Generator. + // `sum` op has duplicate input. Don't consider adding inplace strategy + // for `sum` in temporary. + if (op_type != "sum" && infer_inplace) { + auto in_to_outs = infer_inplace(true); + for (auto& inplace_pair : in_to_outs) { + inplace_map[inplace_pair.second] = inplace_pair.first; + } + + VLOG(6) << "-------- GenerateInplaceForwardFunctionContents -------"; + std::pair inplace_body_and_declaration = + GenerateForwardFunctionContents(fwd_info, bwd_info, inplace_map); + + fwd_function_str += inplace_body_and_declaration.first + "\n"; + + VLOG(6) << "-------- GenerateInplaceDygraphForwardAPIContents -------"; + std::string inplace_fwd_function_declare_str = + inplace_body_and_declaration.second; + dygraph_forward_api_str += inplace_fwd_function_declare_str; + } + if (bwd_info.GenerateForwardOnly()) continue; VLOG(6) << "-------- GenerateGradNodeHeaderContents -------"; diff --git a/paddle/fluid/eager/tensor_wrapper.h b/paddle/fluid/eager/tensor_wrapper.h index 0e11444b815..8da27f3bb8a 100644 --- a/paddle/fluid/eager/tensor_wrapper.h +++ b/paddle/fluid/eager/tensor_wrapper.h @@ -36,6 +36,15 @@ class TensorWrapper { explicit TensorWrapper(const paddle::experimental::Tensor& tensor, bool full_reserved = false, bool no_need_buffer = false) { + // set inplace_version_snapshot_ according to tensor's current inplace + // version. + if (tensor.impl() && phi::DenseTensor::classof(tensor.impl().get())) { + phi::DenseTensor* dense_tensor = + static_cast(tensor.impl().get()); + auto& inplace_version_counter = dense_tensor->InplaceVersionCounter(); + inplace_version_snapshot_ = inplace_version_counter.CurrentVersion(); + } + /** * Normally, we should fully reserved all non-output or non-leaf fwd tensor * here. And for fwd output tensor, we should not reserve its autogradmeta, @@ -49,6 +58,7 @@ class TensorWrapper { } // shallow copy tensor_impl here + no_need_buffer_ = no_need_buffer; if (no_need_buffer) { if (phi::DenseTensor::classof(tensor.impl().get())) { // Only Copy Meta @@ -86,6 +96,7 @@ class TensorWrapper { // if it's full_reserved just return the full copy of tensor if (full_reserved_) { + check_inplace_version(); return intermidiate_tensor_; } else { std::shared_ptr new_grad_node = grad_node; @@ -94,15 +105,52 @@ class TensorWrapper { intermidiate_tensor_.set_autograd_meta( std::static_pointer_cast( p_ab_autograd_meta)); + check_inplace_version(); return intermidiate_tensor_; } } + void check_inplace_version() { + if (no_need_buffer_) { + VLOG(6) << "There's no need to check inplace_version because " + "no_need_buffer_ is true."; + return; + } + if (intermidiate_tensor_.impl() && + phi::DenseTensor::classof(intermidiate_tensor_.impl().get())) { + phi::DenseTensor* dense_tensor = + static_cast(intermidiate_tensor_.impl().get()); + auto& inplace_version_counter = dense_tensor->InplaceVersionCounter(); + + uint32_t current_inplace_version = + inplace_version_counter.CurrentVersion(); + PADDLE_ENFORCE_EQ( + current_inplace_version, inplace_version_snapshot_, + paddle::platform::errors::PermissionDenied( + "Tensor '%s' used in gradient computation has been " + "modified by an inplace operation. " + "Its version is %d but the expected version is %d. " + "Please fix your code to void calling an inplace operator " + "after using the Tensor which will used in gradient " + "computation.", + intermidiate_tensor_.name(), current_inplace_version, + inplace_version_snapshot_)); + VLOG(6) << " The inplace_version_snapshot_ of Tensor '" + << intermidiate_tensor_.name() << "' is [ " + << inplace_version_snapshot_ << " ]"; + VLOG(6) << " The current_inplace_version of Tensor '" + << intermidiate_tensor_.name() << "' is [ " + << current_inplace_version << " ]"; + } + } + void clear() { intermidiate_tensor_.reset(); } private: bool full_reserved_ = false; + bool no_need_buffer_ = false; std::pair out_rank_info_; paddle::experimental::Tensor intermidiate_tensor_; + uint32_t inplace_version_snapshot_ = 0; }; } // namespace egr diff --git a/paddle/fluid/eager/utils.cc b/paddle/fluid/eager/utils.cc index 8a57d269453..048087903a4 100644 --- a/paddle/fluid/eager/utils.cc +++ b/paddle/fluid/eager/utils.cc @@ -212,6 +212,27 @@ std::vector> EagerUtils::CreateVars( return res; } +void EagerUtils::ModifyInplaceInput( + const std::shared_ptr& inplace_variable, + paddle::experimental::Tensor* inplace_tensor) { + // Only modify the meta information of the inplace tensor, because + // EagerVariable cannot modify Tensor's meta information after inplace + // op (such as ``reshape``) is executed. + PADDLE_ENFORCE_NOT_NULL(inplace_tensor, + paddle::platform::errors::Fatal( + "Inplace Tensor is null and cannot be modified. " + "We are tring to Modify Inplace Input from its " + "shared_ptr, this error may indicate the inplace " + " input is nullptr")); + if (phi::DenseTensor::classof(inplace_variable->GetTensorBase().get())) { + phi::DenseTensor* variable_dense_tensor = + static_cast(inplace_variable->GetTensorBase().get()); + phi::DenseTensor* tensor_dense_tensor = + static_cast(inplace_tensor->impl().get()); + tensor_dense_tensor->set_meta(variable_dense_tensor->meta()); + } +} + std::vector EagerUtils::GetOutputs( const std::vector>& outs) { std::vector res; diff --git a/paddle/fluid/eager/utils.h b/paddle/fluid/eager/utils.h index fa5735e6f32..fbd080ef70e 100644 --- a/paddle/fluid/eager/utils.h +++ b/paddle/fluid/eager/utils.h @@ -14,6 +14,7 @@ #pragma once +#include "paddle/fluid/eager/api/utils/tensor_utils.h" #include "paddle/fluid/eager/autograd_meta.h" #include "paddle/fluid/eager/eager_tensor.h" #include "paddle/fluid/eager/grad_node_info.h" @@ -144,6 +145,19 @@ class EagerUtils { iter.apply(std::forward(args)...); } + static void CheckInplace(const paddle::experimental::Tensor& target, + const AutogradMeta* autograd_meta, + bool require_any_grad) { + if (require_any_grad && autograd_meta) { + PADDLE_ENFORCE_EQ(!autograd_meta->StopGradient() && + egr::egr_utils_api::IsLeafTensor(target), + false, paddle::platform::errors::InvalidArgument( + "Leaf Var (%s) that doesn't stop gradient " + "can't use inplace strategy.", + target.name())); + } + } + // TensorWrapper Utils static paddle::experimental::Tensor RecoverTensorWrapper( TensorWrapper* tw, const std::shared_ptr& grad_node); @@ -171,6 +185,9 @@ class EagerUtils { static std::vector> CreateVars( const size_t num); // Construct Tensor From var + static void ModifyInplaceInput( + const std::shared_ptr& inplace_variable, + paddle::experimental::Tensor* inplace_tensor); static std::vector GetOutputs( const std::vector>& outs); static paddle::experimental::Tensor GetOutput( diff --git a/paddle/fluid/pybind/eager_method.cc b/paddle/fluid/pybind/eager_method.cc index d4bbfa0e66e..e0a3931c3e3 100644 --- a/paddle/fluid/pybind/eager_method.cc +++ b/paddle/fluid/pybind/eager_method.cc @@ -718,6 +718,15 @@ static PyObject* set_grad_type(TensorObject* self, PyObject* args, EAGER_CATCH_AND_THROW_RETURN_NULL } +static PyObject* tensor__inplace_version(TensorObject* self, PyObject* args, + PyObject* kwargs) { + EAGER_TRY + uint32_t inplace_version = self->tensor.current_inplace_version(); + + return ToPyObject(inplace_version); + EAGER_CATCH_AND_THROW_RETURN_NULL +} + PyMethodDef variable_methods[] = { {"numpy", (PyCFunction)(void (*)(void))tensor_method_numpy, METH_VARARGS | METH_KEYWORDS, NULL}, @@ -766,6 +775,8 @@ PyMethodDef variable_methods[] = { METH_VARARGS | METH_KEYWORDS, NULL}, {"_set_grad_type", (PyCFunction)(void (*)(void))set_grad_type, METH_VARARGS | METH_KEYWORDS, NULL}, + {"_inplace_version", (PyCFunction)(void (*)(void))tensor__inplace_version, + METH_VARARGS | METH_KEYWORDS, NULL}, {NULL, NULL, 0, NULL}}; } // namespace pybind diff --git a/paddle/fluid/pybind/eager_op_function_generator.cc b/paddle/fluid/pybind/eager_op_function_generator.cc index 102cdbb91ab..685e20aef25 100644 --- a/paddle/fluid/pybind/eager_op_function_generator.cc +++ b/paddle/fluid/pybind/eager_op_function_generator.cc @@ -162,17 +162,22 @@ static inline std::string TempName(const std::string& name) { std::string GenerateOpFunctionsBody( const paddle::framework::proto::OpProto* op_proto, std::string func_name, - bool use_inplace_strategy = false, std::map inplace_map = {}) { auto& op_type = op_proto->type(); std::string input_args = ""; - std::string call_api_str = "auto out = " + op_type + "_dygraph_function("; + std::string call_api_str = ""; std::string ins_initializer_with_null = ""; std::string py_arg = ""; int arg_idx = 0; int input_args_num = 0; std::string ins_cast_str = ""; std::string view_strategy_str = ""; + if (!inplace_map.empty()) { + // change call_api_str for inplace op + call_api_str = "auto out = " + op_type + "__dygraph_function("; + } else { + call_api_str = "auto out = " + op_type + "_dygraph_function("; + } for (auto& input : op_proto->inputs()) { auto& in_name = input.name(); // skip those dispensable inputs, like ResidualData in conv2d @@ -288,8 +293,31 @@ std::string GenerateOpFunctionsBody( HANDLE_VIEW_BETWEEN_INPUT_AND_OUTPUT, viwe_input_name, viwe_output_name, viwe_input_name, viwe_output_name); } - - return_str = "return ToPyObject(out);"; + if (!inplace_map.empty()) { + // For inplace op, Use the input PyObject directly. + for (auto& inplace_pair : inplace_map) { + // Find index of inplace tensor, and directly use input PyObject. + std::string inplace_arg_name = inplace_pair.second; + std::string inplace_return_name = inplace_pair.first; + const char* RETURN_INPLACE_TENSOR_TEMPLATE = + "ssize_t arg_id = GetIdxFromCoreOpsInfoMap(core_ops_args_info, " + "\"%s\", \"%s\");\n" + " ssize_t return_id = " + "GetIdxFromCoreOpsInfoMap(core_ops_returns_info, \"%s\", \"%s\");\n" + " return ToPyObject(out, return_id, args, arg_id);"; + return_str = paddle::string::Sprintf(RETURN_INPLACE_TENSOR_TEMPLATE, + op_type, inplace_arg_name, op_type, + inplace_return_name); + // only support one inplace_var in temporary. + PADDLE_ENFORCE_EQ( + inplace_map.size(), 1, + paddle::platform::errors::InvalidArgument( + "size of inplace_map must be 1, but got %d", inplace_map.size())); + break; + } + } else { + return_str = "return ToPyObject(out);"; + } std::string function_args = ""; if (input_args == "") { @@ -383,7 +411,8 @@ GenerateOpFunctions() { continue; } std::string func_name = "eager_api_" + op_type; - std::string op_function_str = GenerateOpFunctionsBody(op_proto, func_name); + std::string op_function_str = + GenerateOpFunctionsBody(op_proto, func_name, {}); // generate pybind item auto bind_function_str = paddle::string::Sprintf( @@ -391,6 +420,40 @@ GenerateOpFunctions() { op_function_list.emplace_back(std::move(op_function_str)); bind_function_list.emplace_back(std::move(bind_function_str)); + + // NOTE(pangyoki): Inplace Strategy. + // In this case, output will reuse input varbase. + // Dygraph mode needs to be aligned with the in-place strategy in static + // mode, and the mapping relationships between output and input that have + // been defined in static mode should be used in dygraph mode. + // Find which ops need to use Inplace strategy in static mode, and get the + // mapping relationship between Inplace output and input. + auto& infer_inplace = + paddle::framework::OpInfoMap::Instance().Get(op_type).infer_inplace_; + std::map inplace_map; + // `sum` op has duplicate input. Don't consider adding inplace strategy + // for `sum` in temporary. + if (op_type != "sum" && infer_inplace) { + // Inplace OP: op_type_. + // The inplace OP needs a new implementation method. + auto in_to_outs = infer_inplace(true); + for (auto& inplace_pair : in_to_outs) { + inplace_map[inplace_pair.second] = inplace_pair.first; + } + + std::string inplace_op_type = op_type + "_"; + std::string inplace_func_name = "eager_api_" + inplace_op_type; + std::string inplace_op_function_str = + GenerateOpFunctionsBody(op_proto, inplace_func_name, inplace_map); + + // generate pybind item + auto inplace_bind_function_str = + paddle::string::Sprintf(PYBIND_ITEM_TEMPLATE, inplace_op_type, + inplace_func_name, inplace_op_type); + + op_function_list.emplace_back(std::move(inplace_op_function_str)); + bind_function_list.emplace_back(std::move(inplace_bind_function_str)); + } } if (append_custom_head_file) { op_function_list.emplace_back(CUSTOM_HANDWRITE_OP_FUNC_FILE); diff --git a/paddle/fluid/pybind/eager_utils.cc b/paddle/fluid/pybind/eager_utils.cc index 97bb32630d7..a23bb1230e1 100644 --- a/paddle/fluid/pybind/eager_utils.cc +++ b/paddle/fluid/pybind/eager_utils.cc @@ -417,6 +417,8 @@ PyObject* ToPyObject(bool value) { PyObject* ToPyObject(int value) { return PyLong_FromLong(value); } +PyObject* ToPyObject(uint32_t value) { return PyLong_FromUnsignedLong(value); } + PyObject* ToPyObject(int64_t value) { return PyLong_FromLongLong(value); } PyObject* ToPyObject(float value) { return PyLong_FromDouble(value); } @@ -442,6 +444,20 @@ PyObject* ToPyObject(const paddle::experimental::Tensor& value) { return obj; } +PyObject* ToPyObject(const paddle::experimental::Tensor& value, + ssize_t value_idx, PyObject* args, ssize_t arg_idx) { + // For inplace op, directly return the input PyObject of the inplace tensor. + // [Parameter] + // value: Useless parameter. + // value_idx: Useless parameter. + // args: Input PyObject. + // arg_idx: Index of inplace PyObject in input args. Used to find the input + // inplace PyObject. + PyObject* obj = PyTuple_GET_ITEM(args, arg_idx); + Py_INCREF(obj); + return obj; +} + PyObject* ToPyObject(const std::vector& value) { PyObject* result = PyList_New((Py_ssize_t)value.size()); diff --git a/paddle/fluid/pybind/eager_utils.h b/paddle/fluid/pybind/eager_utils.h index 1c4e2ab69a5..fba1485bcf4 100644 --- a/paddle/fluid/pybind/eager_utils.h +++ b/paddle/fluid/pybind/eager_utils.h @@ -56,6 +56,7 @@ framework::proto::VarType::Type CastPyArg2ProtoType(PyObject* obj, ssize_t arg_pos); PyObject* ToPyObject(int value); +PyObject* ToPyObject(uint32_t value); PyObject* ToPyObject(bool value); PyObject* ToPyObject(int64_t value); PyObject* ToPyObject(float value); @@ -63,6 +64,8 @@ PyObject* ToPyObject(double value); PyObject* ToPyObject(const char* value); PyObject* ToPyObject(const std::string& value); PyObject* ToPyObject(const paddle::experimental::Tensor& value); +PyObject* ToPyObject(const paddle::experimental::Tensor& value, + ssize_t value_idx, PyObject* args, ssize_t arg_idx); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); PyObject* ToPyObject(const std::vector& value); @@ -84,6 +87,17 @@ struct TupleTensorResult { TupleTensorResult::Run(out, result); PyTuple_SET_ITEM(result, N - 1, ToPyObject(std::get(out))); } + + static void Run(const Tuple& out, PyObject* result, ssize_t value_idx, + PyObject* args, ssize_t arg_idx) { + TupleTensorResult::Run(out, result, value_idx, args, arg_idx); + if (N - 1 == value_idx) { + PyTuple_SET_ITEM(result, N - 1, ToPyObject(std::get(out), + value_idx, args, arg_idx)); + } else { + PyTuple_SET_ITEM(result, N - 1, ToPyObject(std::get(out))); + } + } }; template @@ -91,6 +105,16 @@ struct TupleTensorResult { static void Run(const Tuple& out, PyObject* result) { PyTuple_SET_ITEM(result, 0, ToPyObject(std::get<0>(out))); } + + static void Run(const Tuple& out, PyObject* result, ssize_t value_idx, + PyObject* args, ssize_t arg_idx) { + if (value_idx == 0) { + PyTuple_SET_ITEM(result, 0, + ToPyObject(std::get<0>(out), value_idx, args, arg_idx)); + } else { + PyTuple_SET_ITEM(result, 0, ToPyObject(std::get<0>(out))); + } + } }; template @@ -103,6 +127,26 @@ PyObject* ToPyObject(const std::tuple& out) { return result; } +template +PyObject* ToPyObject(const std::tuple& out, ssize_t value_idx, + PyObject* args, ssize_t arg_idx) { + // For inplace op, directly return the input PyObject of the inplace tensor. + // [Parameter] + // out: Outputs tuple after executing op. + // value_idx: Index of inplace tensor in outputs tuple. Used to find the + // output inplace tensor. + // args: Input PyObject. + // arg_idx: Index of inplace PyObject in input args. Used to find the input + // inplace PyObject. + auto len = sizeof...(Args); + PyObject* result = PyTuple_New(len); + + TupleTensorResult::Run(out, result, value_idx, + args, arg_idx); + + return result; +} + paddle::experimental::Scalar CastPyArg2Scalar(PyObject* obj, const std::string& op_type, ssize_t arg_pos); diff --git a/paddle/fluid/pybind/op_function_common.cc b/paddle/fluid/pybind/op_function_common.cc index 09c3cea398b..1d483abd774 100644 --- a/paddle/fluid/pybind/op_function_common.cc +++ b/paddle/fluid/pybind/op_function_common.cc @@ -854,5 +854,30 @@ void InitOpsAttrTypeMap() { } } +ssize_t GetIdxFromCoreOpsInfoMap( + const std::unordered_map>& + core_ops_info_map, + const std::string& op_type, const std::string& name) { + // `core_ops_info_map` can be `core_ops_args_info` or `core_ops_returns_info`. + // `core_ops_args_info`: get index from core_ops_args_info[op_type] according + // to input name. + // `core_ops_returns_info`: get index from core_ops_returns_info[op_type] + // according to return name. + if (!core_ops_info_map.count(op_type)) { + PADDLE_THROW(platform::errors::Fatal( + "Op %s is not found in core_ops_*_info map.", op_type)); + } else { + auto args_list = core_ops_info_map.at(op_type); + auto it = std::find(args_list.begin(), args_list.end(), name); + if (it == args_list.end()) { + PADDLE_THROW(platform::errors::Fatal("%s is not found in op %s's args.", + name, op_type)); + } else { + return std::distance(args_list.begin(), it); + } + } + return -1; +} + } // namespace pybind } // namespace paddle diff --git a/paddle/fluid/pybind/op_function_common.h b/paddle/fluid/pybind/op_function_common.h index 7ead9852667..33d0e242a02 100644 --- a/paddle/fluid/pybind/op_function_common.h +++ b/paddle/fluid/pybind/op_function_common.h @@ -146,5 +146,10 @@ unsigned long GetUnsignedLongFromArgs( // NOLINT void InitOpsAttrTypeMap(); +ssize_t GetIdxFromCoreOpsInfoMap( + const std::unordered_map>& + core_ops_info_map, + const std::string& op_type, const std::string& name); + } // namespace pybind } // namespace paddle diff --git a/paddle/phi/api/include/tensor.h b/paddle/phi/api/include/tensor.h index ce40627bb0d..eae8d12fb37 100644 --- a/paddle/phi/api/include/tensor.h +++ b/paddle/phi/api/include/tensor.h @@ -481,7 +481,21 @@ class PADDLE_API Tensor final { */ void set_autograd_meta(std::shared_ptr autograd_meta); - /* Part 9: Auto generated Tensor methods */ + /* Part 9: Inplace methods */ + + /** + * @brief Increase inplace version + */ + void bump_inplace_version(); + + /** + * @brief Get current inplace version + * + * @return uint32_t + */ + uint32_t current_inplace_version(); + + /* Part 10: Auto generated Tensor methods */ private: /** diff --git a/paddle/phi/api/lib/tensor.cc b/paddle/phi/api/lib/tensor.cc index 6be85d72000..6090e6a400a 100644 --- a/paddle/phi/api/lib/tensor.cc +++ b/paddle/phi/api/lib/tensor.cc @@ -347,5 +347,36 @@ void Tensor::set_autograd_meta( autograd_meta_ = std::move(autograd_meta); } +void Tensor::bump_inplace_version() { + if (is_dense_tensor()) { + auto &inplace_version_counter = + std::dynamic_pointer_cast(impl_) + ->InplaceVersionCounter(); + VLOG(3) << "yoki: before bump inplace version: " + << inplace_version_counter.CurrentVersion(); + inplace_version_counter.Bump(); + VLOG(3) << "yoki: after bump inplace version: " + << inplace_version_counter.CurrentVersion(); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "bump_inplace_version is only supported on DenseTensor now.")); + } +} + +uint32_t Tensor::current_inplace_version() { + if (is_dense_tensor()) { + auto &inplace_version_counter = + std::dynamic_pointer_cast(impl_) + ->InplaceVersionCounter(); + VLOG(3) << "yoki: print version: " + << inplace_version_counter.CurrentVersion(); + return inplace_version_counter.CurrentVersion(); + } else { + PADDLE_THROW(phi::errors::Unimplemented( + "current_inplace_version is only supported on DenseTensor now.")); + } + return 0; +} + } // namespace experimental } // namespace paddle diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index c82172780b7..44e6f8e8f2a 100755 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -960,6 +960,7 @@ set_tests_properties(test_bicubic_interp_op PROPERTIES TIMEOUT 120) set_tests_properties(test_deformable_conv_op PROPERTIES TIMEOUT 120) set_tests_properties(test_nearest_interp_op PROPERTIES TIMEOUT 120) set_tests_properties(test_profiler PROPERTIES TIMEOUT 120) +set_tests_properties(test_inplace_eager_fluid PROPERTIES TIMEOUT 120) set_tests_properties(test_inplace_softmax_with_cross_entropy PROPERTIES TIMEOUT 120) set_tests_properties(test_cross_entropy2_op PROPERTIES TIMEOUT 120) set_tests_properties(test_fetch_unmerged PROPERTIES TIMEOUT 120) diff --git a/python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py b/python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py new file mode 100644 index 00000000000..a434c562000 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_inplace_eager_fluid.py @@ -0,0 +1,397 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import print_function + +import unittest +import numpy as np + +import paddle +import paddle.fluid.core as core +from paddle.fluid.framework import _test_eager_guard + + +class TestDygraphInplace(unittest.TestCase): + def setUp(self): + self.init_data() + self.set_np_compare_func() + + def init_data(self): + self.input_var_numpy = np.random.uniform(-5, 5, [10, 20, 1]) + self.dtype = "float32" + + def set_np_compare_func(self): + self.np_compare = np.array_equal + + def non_inplace_api_processing(self, var): + return paddle.squeeze(var) + + def inplace_api_processing(self, var): + return paddle.squeeze_(var) + + def test_inplace_api(self): + with _test_eager_guard(): + var = paddle.to_tensor(self.input_var_numpy).astype(self.dtype) + inplace_var = self.inplace_api_processing(var) + self.assertTrue(id(var) == id(inplace_var)) + + inplace_var.exp_() + self.assertTrue(np.array_equal(var.numpy(), inplace_var.numpy())) + + def test_forward_version(self): + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var = paddle.to_tensor(self.input_var_numpy).astype(self.dtype) + self.assertEqual(var.inplace_version, 0) + + inplace_var = self.inplace_api_processing(var) + self.assertEqual(var.inplace_version, 1) + + inplace_var.exp_() + self.assertEqual(var.inplace_version, 2) + + inplace_var = self.inplace_api_processing(inplace_var) + self.assertEqual(var.inplace_version, 3) + + def test_leaf_inplace_var_error(self): + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var = paddle.to_tensor(self.input_var_numpy).astype(self.dtype) + var.stop_gradient = False + + def leaf_inplace_error(): + self.inplace_api_processing(var) + + self.assertRaises(ValueError, leaf_inplace_error) + + def test_backward_error(self): + # It raises an error because the inplace operator will result + # in incorrect gradient computation. + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + + # Here, the gradient computation will use the value of var_b + var_c = var_b**2 + self.inplace_api_processing(var_b) + + loss = paddle.nn.functional.relu(var_c) + with self.assertRaisesRegexp( + RuntimeError, + "received current_inplace_version:{} != inplace_version_snapshot_:{}". + format(1, 0)): + loss.backward() + + def test_backward_success_1(self): + # var_b is modified inplace before using it, the inplace operator doesn't result + # in incorrect gradient computation. + grad_var_a, grad_var_a_inplace = 0, 1 + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + var_c = self.inplace_api_processing( + var_b) # var_b is modified inplace before using it + + # Here, the gradient computation will use the value of var_b + var_d = var_c**2 + loss = var_d.sum() + loss.backward() + grad_var_a_inplace = var_a.grad.numpy() + + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + var_c = self.non_inplace_api_processing(var_b) + var_d = var_c**2 + loss = var_d.sum() + loss.backward() + grad_var_a = var_a.grad.numpy() + + self.assertTrue(self.np_compare(grad_var_a_inplace, grad_var_a)) + + def test_backward_success_2(self): + # Although var_b is modified inplace after using it, it does not used in gradient computation. + # The inplace operator doesn't result in incorrect gradient computation. + grad_var_a, grad_var_a_inplace = 0, 1 + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + + var_c = self.inplace_api_processing( + var_b) # var_b is modified inplace before using it + + var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b + loss = var_d.sum() + + loss.backward() + grad_var_a_inplace = var_a.grad.numpy() + + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.to_tensor(self.input_var_numpy).astype( + self.dtype) + var_a.stop_gradient = False + + var_b = var_a**2 + + var_c = self.non_inplace_api_processing( + var_b) # var_b is modified inplace before using it + + var_d = var_c + var_c # Here, the grad op of sum doesn't use the value of var_b + loss = var_d.sum() + + loss.backward() + grad_var_a = var_a.grad.numpy() + self.assertTrue(np.array_equal(grad_var_a_inplace, grad_var_a)) + + +class TestDygraphInplaceUnsqueeze(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return paddle.unsqueeze(var, -1) + + def inplace_api_processing(self, var): + return paddle.unsqueeze_(var, -1) + + +class TestDygraphInplaceReshape(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return paddle.reshape(var, [-1]) + + def inplace_api_processing(self, var): + return paddle.reshape_(var, [-1]) + + +class TestDygraphInplaceFlatten(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.flatten() + + def inplace_api_processing(self, var): + return var.flatten_() + + +class TestDygraphInplaceScatter(TestDygraphInplace): + def init_data(self): + self.input_var_numpy = np.array([[1, 1], [2, 2], [3, 3]]) + self.dtype = "float32" + + def non_inplace_api_processing(self, var): + index = paddle.to_tensor([2, 1, 0, 1], dtype='int64') + updates = paddle.to_tensor( + [[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32') + + return paddle.scatter(var, index, updates, overwrite=False) + + def inplace_api_processing(self, var): + index = paddle.to_tensor([2, 1, 0, 1], dtype='int64') + updates = paddle.to_tensor( + [[1, 1], [2, 2], [3, 3], [4, 4]], dtype='float32') + + return paddle.scatter_(var, index, updates, overwrite=False) + + +class TestDygraphInplaceElu(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return paddle.nn.functional.elu(var) + + def inplace_api_processing(self, var): + return paddle.nn.functional.elu_(var) + + +class TestDygraphInplaceRelu(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return paddle.nn.functional.relu(var) + + def inplace_api_processing(self, var): + return paddle.nn.functional.relu_(var) + + +class TestDygraphInplaceSoftmax(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return paddle.nn.functional.softmax(var) + + def inplace_api_processing(self, var): + return paddle.nn.functional.softmax_(var) + + +class TestDygraphInplaceTanh(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return paddle.tanh(var) + + def inplace_api_processing(self, var): + return paddle.tanh_(var) + + +class TestDygraphInplaceCeil(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.ceil() + + def inplace_api_processing(self, var): + return var.ceil_() + + +class TestDygraphInplaceFloor(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.floor() + + def inplace_api_processing(self, var): + return var.floor_() + + +class TestDygraphInplaceExp(TestDygraphInplace): + def set_np_compare_func(self): + self.np_compare = np.allclose + + def non_inplace_api_processing(self, var): + return var.exp() + + def inplace_api_processing(self, var): + return var.exp_() + + +class TestDygraphInplaceReciprocal(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.reciprocal() + + def inplace_api_processing(self, var): + return var.reciprocal_() + + +class TestDygraphInplaceRound(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.round() + + def inplace_api_processing(self, var): + return var.round_() + + +class TestDygraphInplaceSqrt(TestDygraphInplace): + def init_data(self): + self.input_var_numpy = np.random.uniform(0, 5, [10, 20, 1]) + self.dtype = "float32" + + def non_inplace_api_processing(self, var): + return var.sqrt() + + def inplace_api_processing(self, var): + return var.sqrt_() + + +class TestDygraphInplaceRsqrt(TestDygraphInplaceSqrt): + def non_inplace_api_processing(self, var): + return var.rsqrt() + + def inplace_api_processing(self, var): + return var.rsqrt_() + + +class TestDygraphInplaceClip(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.clip(0.6, 1.5) + + def inplace_api_processing(self, var): + return var.clip_(0.6, 1.5) + + +class TestDygraphInplaceScale(TestDygraphInplace): + def non_inplace_api_processing(self, var): + return var.scale(scale=2.0, bias=3.0) + + def inplace_api_processing(self, var): + return var.scale_(scale=2.0, bias=3.0) + + +class TestDygraphInplaceAdd(TestDygraphInplace): + def init_data(self): + self.input_var_numpy = np.random.rand(2, 3, 4) + self.dtype = "float32" + self.input_var_numpy_2 = np.random.rand(2, 3, 4).astype(self.dtype) + + def non_inplace_api_processing(self, var): + input_var_2 = paddle.to_tensor(self.input_var_numpy_2) + return var.add(input_var_2) + + def inplace_api_processing(self, var): + input_var_2 = paddle.to_tensor(self.input_var_numpy_2) + return var.add_(input_var_2) + + +class TestDygraphInplaceSubtract(TestDygraphInplaceAdd): + def non_inplace_api_processing(self, var): + input_var_2 = paddle.to_tensor(self.input_var_numpy_2) + return var.subtract(input_var_2) + + def inplace_api_processing(self, var): + input_var_2 = paddle.to_tensor(self.input_var_numpy_2) + return var.subtract_(input_var_2) + + +class TestLossIsInplaceVar(unittest.TestCase): + def test_loss_is_inplace_var(self): + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.ones((2, 2)) + var_a.stop_gradient = False + + var_b = var_a * 2 + loss = var_b.tanh_() + + loss.backward() + inplace_grad_var_a = var_a.grad.numpy() + + with paddle.fluid.dygraph.guard(): + with _test_eager_guard(): + var_a = paddle.ones((2, 2)) + var_a.stop_gradient = False + + var_b = var_a * 2 + loss = var_b.tanh() + + loss.backward() + grad_var_a = var_a.grad.numpy() + + self.assertTrue(np.array_equal(inplace_grad_var_a, grad_var_a)) + + +class TestContinuouslyInplace(unittest.TestCase): + def test_continuously_inplace(self): + with _test_eager_guard(): + a = paddle.rand([2, 3]) + a.stop_gradient = False + b = a * 2 + + b.reshape_([-1]) + b.reshape_([2, 3]) + b.reshape_([-1]) + + b.backward() + + +if __name__ == '__main__': + unittest.main() -- GitLab