diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt b/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt index 8e89ea3f19762b339671a8418ed5d192bca846eb..94f7f717fb24a12e59286aa129d1058b0c30e74e 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/CMakeLists.txt @@ -16,9 +16,9 @@ add_custom_target(eager_final_state_codegen COMMAND "${PYTHON_EXECUTABLE}" "${PADDLE_SOURCE_DIR}/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py" "--api_yaml_path=${api_yaml_path}" "--backward_yaml_path=${backward_yaml_path}" - "--forwards_cc_path=${tmp_forwards_cc_path}" + "--forwards_cc_path=${tmp_forwards_cc_path}" "--forwards_h_path=${tmp_forwards_h_path}" - "--nodes_cc_path=${tmp_nodes_cc_path}" + "--nodes_cc_path=${tmp_nodes_cc_path}" "--nodes_h_path=${tmp_nodes_h_path}" COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_forwards_cc_path} ${forwards_cc_path} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${tmp_forwards_h_path} ${forwards_h_path} diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index 9d95b9488d298cce6ec38b3c304004d180470d1e..092c4b6e605db4aedf71af051c3eee7d37d566e1 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -146,10 +146,7 @@ paddle::small_vector, egr::kSlotSmallV {} // Call grad_api function - VLOG(3) << \"Final State Running: \" << \"{}\"; -{} - - // Get Output + VLOG(3) << \"Final State Running: {}\"; {} // Get GradIn autograd_meta {} @@ -641,7 +638,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): pass_stop_gradient_args_list.append(output_autograd_meta_name) pass_stop_gradient_args_str = ",".join(pass_stop_gradient_args_list) - # Node Construction + # Node Construction num_backward_inputs = len(forward_outputs_position_map.keys()) num_backward_outputs = len(forward_inputs_position_map.keys()) grad_node_name = GetGradNodeName(forward_api_name) @@ -701,6 +698,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): set_output_tensor_wrappers_list) # SetGradOutMeta & SetEdges + grad_node_out_list = [] set_grad_out_meta_list = [] set_edges_list = [] for name, (_, pos) in forward_inputs_position_map.items(): @@ -713,7 +711,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): if not has_corresponding_grad_output: continue - input_autograd_meta_name = GetAutoGradMetaName(name) + grad_node_out_list.append(name) is_optional = (name in self.optional_inputs) if is_optional: set_grad_out_meta = f"{indent}if({name}.get_ptr() != nullptr) grad_node->SetGradOutMeta(*({name}.get_ptr()), {pos});" @@ -755,6 +753,7 @@ class DygraphFunctionGeneratorBase(FunctionGeneratorBase): set_input_tensor_wrappers_str, set_grad_out_meta_str, set_out_rank_str, set_history_str, set_grad_in_meta_str, set_retain_grad_str, set_output_tensor_wrappers_str) + self.grad_node_out_list = grad_node_out_list def run(self): # Basic Validation Check @@ -1140,6 +1139,7 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): next_grad_api_contents = self.next_grad_api_contents grad_node_creation_str = "" + grad_node_out_list = [] if next_grad_api_contents: forward_api_contents = grad_api_contents forward_api_contents['api'] = forward_api_contents['backward_api'] @@ -1150,10 +1150,11 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): next_node_generator.run() next_node_generator.GenerateNodeCreationCodes() grad_node_creation_str = next_node_generator.node_creation_str + grad_node_out_list = next_node_generator.grad_node_out_list self.RecordGrad2NextGradNameMapping(next_node_generator) - return grad_node_creation_str + return grad_node_creation_str, grad_node_out_list def GenerateNodeDeclaration(self): forward_op_name = self.forward_api_name @@ -1214,7 +1215,8 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): set_attribute_methods_str, tensor_wrapper_members_str, attribute_members_str) - def GenerateNodeDefinition(self, grad_node_creation_str): + def GenerateNodeDefinition(self, grad_node_creation_str, + grad_node_out_list): namespace = self.namespace forward_api_name = self.forward_api_name backward_api_name = self.backward_api_name @@ -1290,28 +1292,41 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): get_grad_in_args_list.append(get_attr_str) get_grad_in_args_str = "\n".join(get_grad_in_args_list) - grad_api_args_str = ", ".join(grad_api_args) - - # Grad Function Call String - grad_api_namespace = f"paddle::experimental::{namespace}" - grad_function_call_str = f"{indent}auto grad_api_result = {grad_api_namespace}{backward_api_name}({grad_api_args_str});" - # Get Grad Outputs - get_outputs_str = "" - num_outputs = len(backward_grad_outputs_map.keys()) + # Grad Outputs for name, (ttype, fwd_position, grad_api_position) in backward_grad_outputs_map.items(): transformed_tensor_name = self.TransformToNextGradName(name) - - if num_outputs == 1: - get_tensor_str = f"{indent}auto& {transformed_tensor_name} = grad_api_result;" + if IsPlainTensorType(ttype): + grad_api_args.append(f"api_output[{fwd_position}][0]") else: - if IsPlainTensorType(ttype): - get_tensor_str = f"{indent}auto& {transformed_tensor_name} = grad_api_result[{grad_api_position}][0];" - else: - assert IsVectorTensorType(ttype) - get_tensor_str = f"{indent}auto& {transformed_tensor_name} = grad_api_result[{grad_api_position}];" - get_outputs_str += get_tensor_str + "\n" + assert IsVectorTensorType(ttype) + grad_api_args.append(f"api_output[{fwd_position}]") + + grad_api_args_str = ", ".join(grad_api_args) + + # Grad Function Call String + slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys()) + grad_api_namespace = f"paddle::experimental::{namespace}" + grad_function_call_str = f""" + const auto& out_metas = OutputMeta(); + paddle::small_vector, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs}); + paddle::small_vector, egr::kSlotSmallVectorSize> api_output({slot_num_bwd_outputs}); + for (int i = 0; i < {slot_num_bwd_outputs}; ++i) {{ + returns[i].resize(out_metas[i].size()); + if(returns[i].size() == 0) {{ + api_output[i].reserve(1); + api_output[i].push_back(nullptr); + continue; + }} + api_output[i].reserve(returns[i].size()); + for (size_t j = 0; j < returns[i].size(); ++j) {{ + api_output[i].push_back(&returns[i][j]); + }} + }} +""" + + grad_function_call_str = grad_function_call_str + f"{indent}{grad_api_namespace}{backward_api_name}({grad_api_args_str});" # Prepare for Node Creation if Necessary inputs_autograd_meta_str = "" @@ -1324,38 +1339,41 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): for name, (ttype, pos, grad_api_position) in backward_grad_inputs_map.items(): transformed_tensor_name = self.TransformToNextGradName(name) - - input_autograd_meta_name = GetAutoGradMetaName( - transformed_tensor_name) - if IsPlainTensorType(ttype): - input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});" - else: - assert IsVectorTensorType(ttype) - input_autograd_meta_vec_name = GetAutoGradMetaVectorName( + if transformed_tensor_name in grad_node_out_list: + input_autograd_meta_name = GetAutoGradMetaName( transformed_tensor_name) - input_autograd_meta = f"{indent}std::vector {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n" - input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" + if IsPlainTensorType(ttype): + input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});" + else: + assert IsVectorTensorType(ttype) + input_autograd_meta_vec_name = GetAutoGradMetaVectorName( + transformed_tensor_name) + input_autograd_meta = f"{indent}std::vector {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n" + input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" - inputs_autograd_meta_list.append(input_autograd_meta) - compute_require_grad_args_list.append(input_autograd_meta_name) + inputs_autograd_meta_list.append(input_autograd_meta) + compute_require_grad_args_list.append( + input_autograd_meta_name) # 2. Get TensorWrapper AutoGradMeta for name, (ttype, _, pos), in backward_forward_inputs_map.items(): transformed_tensor_name = self.TransformToNextGradName(name) - - input_autograd_meta_name = GetAutoGradMetaName( - transformed_tensor_name) - if IsPlainTensorType(ttype): - input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});" - else: - assert IsVectorTensorType(ttype) - input_autograd_meta_vec_name = GetAutoGradMetaVectorName( + if transformed_tensor_name in grad_node_out_list: + input_autograd_meta_name = GetAutoGradMetaName( transformed_tensor_name) - input_autograd_meta = f"{indent}std::vector {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n" - input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" + if IsPlainTensorType(ttype): + input_autograd_meta = f"{indent}egr::AutogradMeta* {input_autograd_meta_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});" + else: + assert IsVectorTensorType(ttype) + input_autograd_meta_vec_name = GetAutoGradMetaVectorName( + transformed_tensor_name) + input_autograd_meta = f"{indent}std::vector {input_autograd_meta_vec_name} = egr::EagerUtils::nullable_autograd_meta({transformed_tensor_name});\n" + input_autograd_meta += f"{indent}std::vector* {input_autograd_meta_name} = &{input_autograd_meta_vec_name};" + + inputs_autograd_meta_list.append(input_autograd_meta) + compute_require_grad_args_list.append( + input_autograd_meta_name) - inputs_autograd_meta_list.append(input_autograd_meta) - compute_require_grad_args_list.append(input_autograd_meta_name) inputs_autograd_meta_str = "\n".join(inputs_autograd_meta_list) compute_require_grad_args_str = ",".join( compute_require_grad_args_list) @@ -1363,28 +1381,26 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): # 3. Get Output AutoGradMeta outputs_autograd_meta_list = [] num_fwd_outputs = len(backward_grad_outputs_map.keys()) - for name, (rtype, pos, _) in backward_grad_outputs_map.items(): + for name, (rtype, pos, + grad_api_position) in backward_grad_outputs_map.items(): transformed_tensor_name = self.TransformToNextGradName(name) output_autograd_meta_name = GetAutoGradMetaName( transformed_tensor_name) output_autograd_meta_vec_name = GetAutoGradMetaVectorName( transformed_tensor_name) - if num_fwd_outputs == 1: - if IsPlainTensorType(rtype): - output_autograd_meta = f"{indent}egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});" - else: - assert IsVectorTensorType(rtype) - output_autograd_meta = f"{indent}std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});\n" - output_autograd_meta += f"{indent}std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name};" + if IsPlainTensorType(rtype): + output_autograd_meta = f""" + auto& {transformed_tensor_name} = returns[{pos}][0]; + egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});""" + else: - # Tuple api_result - if IsPlainTensorType(rtype): - output_autograd_meta = f"{indent}egr::AutogradMeta* {output_autograd_meta_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});" - else: - assert IsVectorTensorType(rtype) - output_autograd_meta = f"{indent}std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name});\n" - output_autograd_meta += f"{indent}std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name};" + assert IsVectorTensorType(rtype) + output_autograd_meta = f""" + auto& {transformed_tensor_name} = returns[{pos}]; + std::vector {output_autograd_meta_vec_name} = egr::EagerUtils::autograd_meta(&{transformed_tensor_name}); + std::vector* {output_autograd_meta_name} = &{output_autograd_meta_vec_name}; +""" outputs_autograd_meta_list.append(output_autograd_meta) outputs_autograd_meta_str = "\n".join(outputs_autograd_meta_list) @@ -1392,28 +1408,14 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): compute_require_grad_str = f"{indent}bool trace_backward = egr::Controller::Instance().HasGrad() && create_graph;\n" compute_require_grad_str += f"{indent}bool require_any_grad = egr::EagerUtils::ComputeRequireGrad({compute_require_grad_args_str});" - # Construct grad_api returns - slot_num_bwd_outputs = len(self.forward_inputs_position_map.keys()) - returns_str = f"{indent}paddle::small_vector, egr::kSlotSmallVectorSize> returns({slot_num_bwd_outputs});\n" - for name, (ttype, fwd_position, - grad_api_position) in backward_grad_outputs_map.items(): - transformed_tensor_name = self.TransformToNextGradName(name) - - # Rearrange output order accordingly - if IsPlainTensorType(ttype): - returns_str += f"{indent}returns[{fwd_position}] = {{ {transformed_tensor_name} }};\n" - else: - assert IsVectorTensorType(ttype) - returns_str += f"{indent}returns[{fwd_position}] = {transformed_tensor_name};\n" - - returns_str += f"{indent}if(NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);\n" + returns_str = f"{indent}if(NeedComplexToRealConversion()) HandleComplexGradToRealGrad(&returns);\n" returns_str += f"{indent}return returns;\n" grad_node_name = GetGradNodeName(forward_api_name) self.node_definition_str = GRAD_FUNCTION_TEMPLATE.format( grad_node_name, fill_zero_str, get_grad_in_args_str, grad_node_name, - grad_function_call_str, get_outputs_str, inputs_autograd_meta_str, + grad_function_call_str, inputs_autograd_meta_str, outputs_autograd_meta_str, compute_require_grad_str, grad_node_creation_str, returns_str) @@ -1426,16 +1428,17 @@ class DygraphNodeGenerator(DygraphFunctionGeneratorBase): ## Code Generation ## ##################### # Higher-order GradNode generation - grad_node_creation_str = self.GenerateHigherOrderNodeCreationCode() + grad_node_creation_str, grad_node_out_list = self.GenerateHigherOrderNodeCreationCode( + ) self.GenerateNodeDeclaration() - self.GenerateNodeDefinition(grad_node_creation_str) + self.GenerateNodeDefinition(grad_node_creation_str, grad_node_out_list) class DygraphYamlGenerator(YamlGeneratorBase): def __init__(self, api_yaml_path, backward_yaml_path): - # Parent members: + # Parent members: # self.namespace # self.api_yaml_path # self.forward_api_list diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt index b195ed1aefadc1c8bceee9ff450be37c2af9e9ec..ddeb073046bf1230e4de1df2ac15da3f615141eb 100644 --- a/paddle/phi/api/lib/CMakeLists.txt +++ b/paddle/phi/api/lib/CMakeLists.txt @@ -74,6 +74,7 @@ add_custom_command( COMMAND ${PYTHON_EXECUTABLE} ${api_gen_file} --api_yaml_path ${api_yaml_file} --api_header_path ${api_header_file_tmp} + --api_header_path ${api_header_file_tmp} --api_source_path ${api_source_file_tmp} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_header_file_tmp} ${api_header_file} COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp} ${api_source_file} diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index 38a60ab978900792077f5d7d65a7d31719eb6ae7..d80444e7f710c33de8e79fa8c9588503da620a94 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -592,19 +592,20 @@ Tensor conv2d_impl(const Tensor& input, return api_output; } -std::vector> conv2d_grad_impl( - const Tensor& input, - const Tensor& filter, - const Tensor& out_grad, - const std::vector& strides, - const std::vector& paddings, - const std::string& paddding_algorithm, - int groups, - const std::vector& dilations, - const std::string& data_format, - bool use_addto, - int workspace_size_MB, - bool exhaustive_search) { +void conv2d_grad_impl(const Tensor& input, + const Tensor& filter, + const Tensor& out_grad, + const std::vector& strides, + const std::vector& paddings, + const std::string& paddding_algorithm, + int groups, + const std::vector& dilations, + const std::string& data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search, + Tensor* input_grad, + Tensor* filter_grad) { Backend kernel_backend = Backend::UNDEFINED; DataLayout kernel_layout = DataLayout::UNDEFINED; DataType kernel_data_type = DataType::UNDEFINED; @@ -646,18 +647,15 @@ std::vector> conv2d_grad_impl( auto input_filter = PrepareData(filter, args1, {}); auto input_out_grad = PrepareData(out_grad, args2, {}); - std::vector> api_output(2); - api_output[0].emplace_back(); - auto kernel_out_0 = SetKernelOutput(kernel_backend, &api_output[0][0]); - api_output[1].emplace_back(); - auto kernel_out_1 = SetKernelOutput(kernel_backend, &api_output[1][0]); + auto kernel_out_0 = SetKernelOutput(kernel_backend, input_grad); + auto kernel_out_1 = SetKernelOutput(kernel_backend, filter_grad); phi::MetaTensor meta_out_0(kernel_out_0); phi::MetaTensor meta_out_1(kernel_out_1); phi::GeneralBinaryGradInferMeta(MakeMetaTensor(*input_input), MakeMetaTensor(*input_filter), - &meta_out_0, - &meta_out_1); + kernel_out_0 ? &meta_out_0 : nullptr, + kernel_out_1 ? &meta_out_1 : nullptr); using kernel_signature = void (*)(const platform::DeviceContext&, const phi::DenseTensor&, @@ -693,8 +691,6 @@ std::vector> conv2d_grad_impl( kernel_out_0, kernel_out_1); } - - return api_output; } Tensor copy_to_impl(const Tensor& x, Place place, bool blocking) { @@ -1080,8 +1076,9 @@ std::tuple sgd_impl( // but if we use this impl, it will not support. We need to be able to reuse // the autograd API here, which is not yet implemented // TODO(chenweihang): we should support call generated api in custom api impl -std::vector add_n_grad_impl(const std::vector& x, - const Tensor& out_grad) { +void add_n_grad_impl(const std::vector& x, + const Tensor& out_grad, + std::vector x_grad) { auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad); auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); @@ -1099,9 +1096,7 @@ std::vector add_n_grad_impl(const std::vector& x, auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(0), {}); - size_t out_number = x.size(); - std::vector x_grad; - auto dense_x_grad = SetKernelOutput(out_number, kernel_backend, &x_grad); + auto dense_x_grad = SetKernelOutput(&x_grad); using kernel_signature = void (*)(const platform::DeviceContext&, const phi::DenseTensor&, @@ -1117,8 +1112,6 @@ std::vector add_n_grad_impl(const std::vector& x, (*kernel_fn)( *dev_ctx, *dense_out_grad, phi::Scalar(1.0), 0.0, true, dense_x_grad_t); } - - return x_grad; } std::tuple batch_norm_impl( @@ -1250,7 +1243,7 @@ std::tuple batch_norm_impl( return api_output; } -Tensor imag_grad_impl(const Tensor& out_grad) { +void imag_grad_impl(const Tensor& out_grad, Tensor* x_grad) { phi::KernelKey kernel_key{ParseBackend(out_grad), out_grad.layout(), phi::dtype::ToComplex(out_grad.dtype())}; @@ -1264,8 +1257,7 @@ Tensor imag_grad_impl(const Tensor& out_grad) { auto dense_out_grad = TensorToDenseTensor(out_grad); - Tensor out; - auto kernel_out = SetKernelOutput(kernel_key.backend(), &out); + auto kernel_out = SetKernelOutput(kernel_key.backend(), x_grad); phi::MetaTensor meta_out(kernel_out); phi::RealAndImagGradInferMeta(*dense_out_grad, &meta_out); @@ -1274,11 +1266,9 @@ Tensor imag_grad_impl(const Tensor& out_grad) { auto* kernel_fn = kernel.GetVariadicKernelFn(); (*kernel_fn)(*dev_ctx, *dense_out_grad, kernel_out); - - return out; } -Tensor real_grad_impl(const Tensor& out_grad) { +void real_grad_impl(const Tensor& out_grad, Tensor* x_grad) { phi::KernelKey kernel_key{ParseBackend(out_grad), out_grad.layout(), phi::dtype::ToComplex(out_grad.dtype())}; @@ -1292,8 +1282,7 @@ Tensor real_grad_impl(const Tensor& out_grad) { auto dense_out_grad = TensorToDenseTensor(out_grad); - Tensor out; - auto kernel_out = SetKernelOutput(kernel_key.backend(), &out); + auto kernel_out = SetKernelOutput(kernel_key.backend(), x_grad); phi::MetaTensor meta_out(kernel_out); phi::RealAndImagGradInferMeta(*dense_out_grad, &meta_out); @@ -1302,8 +1291,6 @@ Tensor real_grad_impl(const Tensor& out_grad) { auto* kernel_fn = kernel.GetVariadicKernelFn(); (*kernel_fn)(*dev_ctx, *dense_out_grad, kernel_out); - - return out; } } // namespace experimental diff --git a/paddle/phi/api/lib/api_custom_impl.h b/paddle/phi/api/lib/api_custom_impl.h index 46abcd90de32a610794892aec9d828f156239dd0..d88a134654caf76e590d402210233c52d2b775e6 100644 --- a/paddle/phi/api/lib/api_custom_impl.h +++ b/paddle/phi/api/lib/api_custom_impl.h @@ -96,20 +96,6 @@ Tensor conv2d_impl(const Tensor& input, int workspace_size_MB, bool exhaustive_search); -std::vector> conv2d_grad_impl( - const Tensor& input, - const Tensor& filter, - const Tensor& out_grad, - const std::vector& strides, - const std::vector& paddings, - const std::string& paddding_algorithm, - int groups, - const std::vector& dilations, - const std::string& data_format, - bool use_addto, - int workspace_size_MB, - bool exhaustive_search); - Tensor copy_to_impl(const Tensor& x, Place place, bool blocking); std::vector split_impl(const Tensor& x, @@ -138,12 +124,28 @@ std::tuple sgd_impl( ////////////////// Backward(grad) api impls ////////////////////// -std::vector add_n_grad_impl(const std::vector& x, - const Tensor& out_grad); - -Tensor imag_grad_impl(const Tensor& x); - -Tensor real_grad_impl(const Tensor& x); +void add_n_grad_impl(const std::vector& x, + const Tensor& out_grad, + std::vector x_grad); + +void conv2d_grad_impl(const Tensor& input, + const Tensor& filter, + const Tensor& out_grad, + const std::vector& strides, + const std::vector& paddings, + const std::string& paddding_algorithm, + int groups, + const std::vector& dilations, + const std::string& data_format, + bool use_addto, + int workspace_size_MB, + bool exhaustive_search, + Tensor* input_grad, + Tensor* filter_grad); + +void imag_grad_impl(const Tensor& out_grad, Tensor* x_grad); + +void real_grad_impl(const Tensor& out_grad, Tensor* x_grad); } // namespace experimental } // namespace paddle diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index fb205212ff3719af04424df04e5e0f80383ffcba..2111829b8d60bd1960bff9c0bb684e9e5a384f04 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -113,10 +113,13 @@ phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor) { /* ------------------ for output ----------------------- */ phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) { - if (out->impl() == nullptr) { - out->set_impl(std::make_shared()); + if (out) { + if (out->impl() == nullptr) { + out->set_impl(std::make_shared()); + } + return static_cast(out->impl().get()); } - return static_cast(out->impl().get()); + return nullptr; } std::vector SetKernelOutput(size_t out_size, @@ -133,6 +136,18 @@ std::vector SetKernelOutput(size_t out_size, return results; } +std::vector SetKernelOutput(std::vector* out) { + std::vector results(out->size(), nullptr); + for (size_t i = 0; i < out->size(); ++i) { + if (out->at(i)) { + auto tensor_ptr = std::make_shared(); + results[i] = tensor_ptr.get(); + (*out)[i]->set_impl(tensor_ptr); + } + } + return results; +} + phi::SelectedRows* SetSelectedRowsKernelOutput(Backend backend, Tensor* out) { if (!out->initialized()) { auto select_rows = std::make_shared(); diff --git a/paddle/phi/api/lib/api_gen_utils.h b/paddle/phi/api/lib/api_gen_utils.h index 47b80bb3fc290dbba2abade53a1866a557c174a6..7303e6b46114dafdc0c76f4052049791407e6877 100644 --- a/paddle/phi/api/lib/api_gen_utils.h +++ b/paddle/phi/api/lib/api_gen_utils.h @@ -74,6 +74,9 @@ std::vector SetKernelOutput(size_t out_size, Backend backend, std::vector* out); +// For backward api +std::vector SetKernelOutput(std::vector* out); + phi::SelectedRows* SetSelectedRowsKernelOutput(Backend backend, Tensor* out); phi::TensorBase* SetSparseKernelOutput(Tensor* out, TensorType type); diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 519d21b323fc216dbff0d2745a0ce36b9c01eb41..e793eb8e6687205f095ede9e90dd3aae4ccb4ced 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -1998,7 +1998,9 @@ void StackInferMeta(const std::vector& x, void UnchangedMultiInferMeta(const std::vector& x, std::vector out) { for (size_t i = 0; i < x.size(); ++i) { - out[i]->share_meta(*x[i]); + if (out[i]) { + out[i]->share_meta(*x[i]); + } } } diff --git a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h index 5d365786001a3216755d89bef84fef5eed4c1f62..3c06b238d145c1b1ff18648f3c1dba4cac8b1c34 100644 --- a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h @@ -513,6 +513,20 @@ void MultiplyDoubleGradKernel(const Context& dev_ctx, funcs::InverseMultiplyFunctor>( dev_ctx, dout, ddy_safe, dx, axis); } + } else { + if (dx && dy) { + phi::funcs::ElemwiseGradCompute, MulGradDY>( + dev_ctx, + ddx_safe, + ddy_safe, + dout, + dout, + axis, + dx, + dy, + MulGradDX(), + MulGradDY()); + } } } diff --git a/paddle/phi/tests/api/test_matmul_api.cc b/paddle/phi/tests/api/test_matmul_api.cc index e2c324a6775c8f697c5871dcfe10a830244a103c..0d4ec7bd4f5924e2d7935b703bb2fbf2a78c5b3c 100644 --- a/paddle/phi/tests/api/test_matmul_api.cc +++ b/paddle/phi/tests/api/test_matmul_api.cc @@ -179,8 +179,18 @@ TEST(API, matmul_double_grad) { auto dx_grad = paddle::experimental::full({3, 3}, 2.0); // 2. test API - const auto out = paddle::experimental::matmul_double_grad( - x, y, out_grad, dx_grad, {}, false, false); + std::vector> out( + 3, std::vector(1)); + paddle::experimental::matmul_double_grad(x, + y, + out_grad, + dx_grad, + {}, + false, + false, + &out[0][0], + &out[1][0], + &out[2][0]); // 3. check result ASSERT_EQ(out.size(), 3UL); diff --git a/paddle/phi/tests/api/test_sparse_conv_api.cc b/paddle/phi/tests/api/test_sparse_conv_api.cc index 7c4aa164259071667e3d90994759c05454f407ff..c00113389adb74ce81ac66e929417829bf30c6f2 100644 --- a/paddle/phi/tests/api/test_sparse_conv_api.cc +++ b/paddle/phi/tests/api/test_sparse_conv_api.cc @@ -77,11 +77,11 @@ void TestConv3dBase(const std::vector& indices, kernel.size() * sizeof(T)); if (!std::is_same::value) { - auto outs = paddle::experimental::sparse::conv3d( + auto tensor_out = paddle::experimental::sparse::conv3d( x, weight, paddings, dilations, strides, 1, false); - auto out = std::dynamic_pointer_cast( - std::get<0>(outs).impl()); + auto out = + std::dynamic_pointer_cast(tensor_out.impl()); ASSERT_EQ(correct_out_dims.size(), out->dims().size()); for (int i = 0; i < correct_out_dims.size(); i++) { ASSERT_EQ(correct_out_dims[i], out->dims()[i]); diff --git a/python/paddle/utils/code_gen/api_base.py b/python/paddle/utils/code_gen/api_base.py index 717870ee01d0a04fa4b9c31a0d6582a851ed3bd6..af870fcc8e54d17f0338f174ddb476d1d9b97a3c 100644 --- a/python/paddle/utils/code_gen/api_base.py +++ b/python/paddle/utils/code_gen/api_base.py @@ -60,6 +60,12 @@ class BaseAPI(object): def get_api_func_name(self): return self.api + def get_declare_args(self): + return self.args_str['args_declare'] + + def get_define_args(self): + return self.args_str["args_define"] + def parse_args(self, api_name, api_item_yaml): optional_vars = [] if 'optional' in api_item_yaml: @@ -309,12 +315,12 @@ class BaseAPI(object): def gene_api_declaration(self): api_declaration = f""" -PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name()}({self.args_str['args_declare']}); +PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name()}({self.get_declare_args()}); """ if self.is_base_api and self.inplace_map is not None: api_declaration = api_declaration + f""" -PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self.args_str['args_declare']}); +PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self.get_declare_args()}); """ return api_declaration @@ -513,7 +519,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self {code_indent} auto {out_name}_{PREFIX_META_TENSOR_NAME}vec = MakeMetaTensor({out_name}); {code_indent} std::vector {out_name}_metas({out_name}_{PREFIX_META_TENSOR_NAME}vec.size()); {code_indent} for (size_t i = 0; i < {out_name}_{PREFIX_META_TENSOR_NAME}vec.size(); ++i) {{ -{code_indent} {out_name}_metas[i] = &{out_name}_{PREFIX_META_TENSOR_NAME}vec[i]; +{code_indent} {out_name}_metas[i] = {out_name}[i] ? &{out_name}_{PREFIX_META_TENSOR_NAME}vec[i] : nullptr; {code_indent} }}""" param_code = param_code + out_name + '_metas, ' @@ -521,8 +527,10 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self meta_tensor_code = meta_tensor_code + code_indent + " phi::MetaTensor " + out_name.replace( 'kernel_', PREFIX_META_TENSOR_NAME) + "(" + out_name + ");\n" - param_code = param_code + "&" + out_name.replace( - 'kernel_', PREFIX_META_TENSOR_NAME) + ", " + if len(kernel_output_names) == 1: + param_code = param_code + f"&{out_name.replace('kernel_', PREFIX_META_TENSOR_NAME)}, " + else: + param_code = param_code + f"{out_name} ? &{out_name.replace('kernel_', PREFIX_META_TENSOR_NAME)} : nullptr, " param_code = param_code[:-2] return f"""{meta_tensor_code} @@ -712,7 +720,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self # Override by child class def gene_return_code(self): - return "api_output" + return "return api_output;" # Override by child class def gene_output(self, @@ -748,7 +756,7 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self {code_indent} (*kernel_fn)({kernel_args}, {outputs_args}); {code_indent} }} -{code_indent} return {self.gene_return_code()};""" +{code_indent} {self.gene_return_code()}""" def gen_selected_rows_kernel_code(self, code_indent, inplace_flag=False): input_tensors, kernel_args, kernel_signature = self.get_selected_rows_kernel_args( @@ -775,12 +783,12 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self {code_indent} (*kernel_fn)({kernel_args}, {outputs_args}); {code_indent} }} -{code_indent} return {self.gene_return_code()};""" +{code_indent} {self.gene_return_code()}""" def gene_base_api_code(self, inplace_flag=False): api_func_name = self.get_api_func_name() + ('_' if inplace_flag else '') api_code = f""" -PADDLE_API {self.gene_return_type_code()} {api_func_name}({self.args_str["args_define"]}) {{ +PADDLE_API {self.gene_return_type_code()} {api_func_name}({self.get_define_args()}) {{ {self.gene_kernel_select()} """ @@ -802,6 +810,12 @@ PADDLE_API {self.gene_return_type_code()} {api_func_name}({self.args_str["args_d } """ + def gene_invoke_code(self, invoke_code, params_code): + return f""" +PADDLE_API {self.outputs['return_type']} {self.api}({params_code}) {{ + return {invoke_code}; +}}""" + def gene_api_code(self): if self.is_base_api: api_code = self.gene_base_api_code() @@ -821,12 +835,8 @@ PADDLE_API {self.gene_return_type_code()} {api_func_name}({self.args_str["args_d invoke_code = re.sub(pattern, adjust_name, self.invoke) params_code = re.sub(pattern, adjust_name, - self.args_str["args_define"]) + self.get_define_args()) else: invoke_code = self.invoke - params_code = self.args_str["args_define"] - return f""" -{self.outputs['return_type']} {self.api}({params_code}) {{ - return {invoke_code}; -}} -""" + params_code = self.get_define_args() + return self.gene_invoke_code(invoke_code, params_code) diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index 538958c2361bc74b466af6c96b4bddcdcf6e9001..8fd95f9a191c34dc36b536dd5304332a1acba0dd 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -53,7 +53,7 @@ class ForwardAPI(BaseAPI): else: return_out_list = [] for i, name in enumerate(self.outputs['names']): - if name not in self.intermediate_outs: + if name.split('@')[0] not in self.intermediate_outs: return_out_list.append(self.outputs['types'][i]) return return_out_list[0] if len( return_out_list) == 1 else "std::tuple<" + ",".join( @@ -61,19 +61,19 @@ class ForwardAPI(BaseAPI): def gene_return_code(self): if self.is_dygraph_api or len(self.intermediate_outs) == 0: - return "api_output" + return "return api_output;" else: return_out_list = [] for i, name in enumerate(self.outputs['names']): - if name not in self.intermediate_outs: + if name.split('@')[0] not in self.intermediate_outs: return_out_list.append(i) if len(return_out_list) == 1: - return f"std::get<{return_out_list[0]}>(api_output)" + return f"return std::get<{return_out_list[0]}>(api_output);" else: selected_code = [ f"std::get<{i}>(api_output)" for i in return_out_list ] - return '{' + ", ".join(selected_code) + '}' + return 'return {' + ", ".join(selected_code) + '};' def gene_output(self, output_type_list, diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index ff49fd426146b261bc5dc5e692cbcf14b9f99756..7c68829c0959f603a284b309bd771bf516eb8fa7 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -71,7 +71,7 @@ forward : add_n (Tensor[] x) -> Tensor(out) args : (Tensor[] x, Tensor out_grad) output : Tensor[](x_grad){x.size()} - invoke : add_n_grad_impl(x, out_grad) + invoke : add_n_grad_impl(x, out_grad, x_grad) no_need_buffer : x - backward_api : add_triple_grad @@ -286,7 +286,7 @@ forward : conv2d (Tensor input, Tensor filter, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) -> Tensor(out) args : (Tensor input, Tensor filter, Tensor out_grad, int[] strides, int[] paddings, str paddding_algorithm, int groups, int[] dilations, str data_format, bool use_addto, int workspace_size_MB, bool exhaustive_search) output : Tensor(input_grad), Tensor(filter_grad) - invoke : conv2d_grad_impl(input, filter, out_grad, strides, paddings, paddding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search) + invoke : conv2d_grad_impl(input, filter, out_grad, strides, paddings, paddding_algorithm, groups, dilations, data_format, use_addto, workspace_size_MB, exhaustive_search, input_grad, filter_grad) backward : conv2d_grad_grad - backward_api : conv2d_grad_grad @@ -766,7 +766,7 @@ forward : imag (Tensor x) -> Tensor(out) args : (Tensor out_grad) output : Tensor(x_grad) - invoke : imag_grad_impl(out_grad) + invoke : imag_grad_impl(out_grad, x_grad) - backward_api : index_sample_grad forward : index_sample (Tensor x, Tensor index) -> Tensor(out) @@ -1395,7 +1395,7 @@ forward : real (Tensor x) -> Tensor(out) args : (Tensor out_grad) output : Tensor(x_grad) - invoke : real_grad_impl(out_grad) + invoke : real_grad_impl(out_grad, x_grad) - backward_api : reciprocal_grad forward : reciprocal (Tensor x) -> Tensor(out) @@ -1796,7 +1796,7 @@ forward : sum_double_grad (Tensor grad_grad_x, int64_t[] dims={}, bool keep_dim=false) -> Tensor(grad_grad_out) args : (Tensor grad_grad_x, Tensor grad_grad_out_grad, int64_t[] dims={}, bool keep_dim=false, bool reduce_all=false) output : Tensor(grad_grad_x_grad) - invoke : sum_grad(grad_grad_x, grad_grad_out_grad, dims, keep_dim, reduce_all) + invoke : sum_grad(grad_grad_x, grad_grad_out_grad, dims, keep_dim, reduce_all, grad_grad_x_grad) no_need_buffer : x - backward_api : swish_grad diff --git a/python/paddle/utils/code_gen/backward_api_gen.py b/python/paddle/utils/code_gen/backward_api_gen.py index a88339c607c555d35e953b051f153e0796c2f913..a155a2c3d6c9f761990b17b22d14eda2789ab07d 100644 --- a/python/paddle/utils/code_gen/backward_api_gen.py +++ b/python/paddle/utils/code_gen/backward_api_gen.py @@ -77,6 +77,25 @@ class BackwardAPI(BaseAPI): f"{self.api} : Output error: The number of outputs should be less then the number of inputs of forward api. \ Please check the output of {self.api} in yaml." + def get_declare_args(self): + return self.get_define_args() + + def get_define_args(self): + out_type_map = { + 'Tensor': 'Tensor*', + 'std::vector': 'std::vector' + } + intputs_and_attrs = self.args_str['args_define'] + outs = [] + for i, name in enumerate(self.outputs['names']): + outs.append(out_type_map[self.outputs['types'][i]] + ' ' + + name.split('@')[0]) + result = intputs_and_attrs + ', ' + ", ".join(outs) + return result + + def gene_return_code(self): + return "" + def gene_kernel_backend_select(self): all_no_need_buffer = True for in_name in self.inputs['names']: @@ -91,8 +110,7 @@ class BackwardAPI(BaseAPI): return super().gene_kernel_backend_select() def get_return_type(self, out_type_list): - return out_type_list[0] if len( - out_type_list) == 1 else "std::vector>" + return 'void' def gene_output(self, output_type_list, @@ -109,23 +127,19 @@ class BackwardAPI(BaseAPI): inplace_assign = " = " + self.inplace_map[self.outputs['names'][ 0]] if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][0] in self.inplace_map else "" - output_create = f""" -{code_indent} {self.outputs['return_type']} api_output{inplace_assign};""" - + output_create = "" if output_type_list[0] == 'std::vector': assert self.outputs['out_size_expr'] is not None, \ f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." output_create = output_create + f""" -{code_indent} auto kernel_out = {set_out_func}({self.outputs['out_size_expr']}, kernel_backend, &api_output);""" +{code_indent} auto kernel_out = {set_out_func}(&{self.outputs['names'][0]});""" else: output_create = output_create + f""" -{code_indent} auto kernel_out = {set_out_func}(kernel_backend, &api_output);""" +{code_indent} auto kernel_out = {set_out_func}(kernel_backend, {self.outputs['names'][0]});""" elif len(output_type_list) > 1: - output_create = f""" -{code_indent} {self.outputs['return_type']} api_output({len(output_type_list)});""" - + output_create = "" for i, out_type_item in enumerate(output_type_list): kernel_output = kernel_output + f'kernel_out_{i}, ' output_names.append(f'kernel_out_{i}') @@ -133,26 +147,21 @@ class BackwardAPI(BaseAPI): if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][i] in self.inplace_map: output_create = output_create + f""" -{code_indent} api_output[{i}].emplace_back({self.inplace_map[self.outputs['names'][i]]});""" - - else: - output_create = output_create + f""" -{code_indent} api_output[{i}].emplace_back();""" +{code_indent} *{self.outputs['names'][i]} = {self.inplace_map[self.outputs['names'][i]]};""" output_create = output_create + f""" -{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, &api_output[{i}][0]);""" +{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, {self.outputs['names'][i]});""" else: - get_out_code = f'&api_output[{i}]' if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][i] in self.inplace_map: output_create = output_create + f""" -{code_indent} api_output[{i}] = {self.inplace_map[self.outputs['names'][i]]};""" +{code_indent} *{self.outputs['names'][i]} = {self.inplace_map[self.outputs['names'][i]]};""" assert self.outputs['out_size_expr'][i] is not None, \ f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." output_create = output_create + f""" -{code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, &api_output[{i}]);""" +{code_indent} auto kernel_out_{i} = {set_out_func}(&{self.outputs['names'][i]});""" kernel_output = kernel_output[:-2] else: @@ -162,6 +171,21 @@ class BackwardAPI(BaseAPI): return kernel_output, output_names, output_create + def gene_invoke_code(self, invoke_code, params_code): + inveke_func_name = invoke_code.split('(')[0].strip() + if inveke_func_name.endswith('_grad') or inveke_func_name.endswith( + '_grad_impl'): + return f""" +PADDLE_API {self.outputs['return_type']} {self.api}({params_code}) {{ + {invoke_code}; +}}""" + + else: + return f""" +PADDLE_API {self.outputs['return_type']} {self.api}({params_code}) {{ + *{self.outputs['names'][0].split('@')[0]} = {invoke_code}; +}}""" + def header_include(): return """ diff --git a/python/paddle/utils/code_gen/sparse_api_gen.py b/python/paddle/utils/code_gen/sparse_api_gen.py index c0316fc1642946d641326d5bde4eb77ac50f8129..eb9bca2eca7b7b78d665826eeb7d2cb6546fc59f 100644 --- a/python/paddle/utils/code_gen/sparse_api_gen.py +++ b/python/paddle/utils/code_gen/sparse_api_gen.py @@ -25,10 +25,9 @@ class SparseAPI(ForwardAPI): super(SparseAPI, self).__init__(api_item_yaml) def gene_api_declaration(self): - return f""" -// {", ".join(self.outputs['names'])} -PADDLE_API {self.outputs['return_type']} {self.get_api_func_name()}({self.args_str['args_declare']}); -""" + api_declaration = "// " + ', '.join(self.outputs['names']) + return api_declaration + super(SparseAPI, + self).gene_api_declaration() + '\n' def get_kernel_tensor_out_type(self, output_name): sparse_type = 'TensorType::DENSE_TENSOR' @@ -136,7 +135,8 @@ PADDLE_API {self.outputs['return_type']} {self.get_api_func_name()}({self.args_s kernel_context_code = self.gen_sparse_kernel_context( kernel_output_names) - + return_code = "" if len(self.gene_return_code( + )) == 0 else " " + self.gene_return_code() return f""" auto phi_kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( "{self.kernel['func'][0]}", {{kernel_backend, kernel_layout, kernel_data_type}}); @@ -148,13 +148,11 @@ PADDLE_API {self.outputs['return_type']} {self.get_api_func_name()}({self.args_s {output_create} {kernel_context_code} phi_kernel(&kernel_context); - - return api_output;""" +{return_code}""" def gene_base_api_code(self, inplace_flag=False): - api_func_name = self.get_api_func_name() return f""" -PADDLE_API {self.outputs['return_type']} {api_func_name}({self.args_str["args_define"]}) {{ +PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name()}({self.get_define_args()}) {{ {self.gene_kernel_select()} {self.gen_sparse_kernel_code(inplace_flag)} }} diff --git a/python/paddle/utils/code_gen/sparse_bw_api_gen.py b/python/paddle/utils/code_gen/sparse_bw_api_gen.py index 4f209a7592161b37cb48d04130966c0ccac1c26b..6dc4a2668ebb9a5d96eb134af8c2a96cf52ab5ca 100644 --- a/python/paddle/utils/code_gen/sparse_bw_api_gen.py +++ b/python/paddle/utils/code_gen/sparse_bw_api_gen.py @@ -34,9 +34,21 @@ class SparseBackwardAPI(SparseAPI, BackwardAPI): def get_return_type(self, out_type_list): return BackwardAPI.get_return_type(self, out_type_list) + def gene_return_type_code(self): + return self.outputs['return_type'] + + def gene_return_code(self): + return "" + def gene_api_declaration(self): return SparseAPI.gene_api_declaration(self) + def get_declare_args(self): + return BackwardAPI.get_declare_args(self) + + def get_define_args(self): + return BackwardAPI.get_define_args(self) + def gene_output(self, output_type_list, set_out_func, @@ -53,36 +65,21 @@ class SparseBackwardAPI(SparseAPI, BackwardAPI): 0]] if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][0] in self.inplace_map else "" output_create = f""" - {self.outputs['return_type']} api_output{inplace_assign}; - auto kernel_out = {set_out_func}(&api_output, {self.get_kernel_tensor_out_type(self.outputs['names'][0])});""" + auto kernel_out = {set_out_func}({self.outputs['names'][0].split('@')[0]}, {self.get_kernel_tensor_out_type(self.outputs['names'][0])});""" elif len(output_type_list) > 1: - output_create = f""" - {self.outputs['return_type']} api_output({len(output_type_list)});""" + output_create = "" for i, out_type_item in enumerate(output_type_list): kernel_output = kernel_output + f'kernel_out_{i}, ' output_names.append(f'kernel_out_{i}') - if out_type_item == 'Tensor': - get_out_code = f'&api_output[{i}][0]' - if inplace_flag and self.inplace_map is not None and self.outputs[ - 'names'][i] in self.inplace_map: - output_create = output_create + f""" - api_output[{i}].emplace_back({self.inplace_map[self.outputs['names'][i]]});""" - - else: - output_create = output_create + f""" - api_output[{i}].emplace_back();""" - - else: - get_out_code = f'&api_output[{i}]' - if inplace_flag and self.inplace_map is not None and self.outputs[ - 'names'][i] in self.inplace_map: - output_create = output_create + f""" - api_output[{i}] = {self.inplace_map[self.outputs['names'][i]]};""" + if inplace_flag and self.inplace_map is not None and self.outputs[ + 'names'][i] in self.inplace_map: + output_create = output_create + f""" + *{self.outputs['names'][i]} = {self.inplace_map[self.outputs['names'][i]]};""" output_create = output_create + f""" - auto kernel_out_{i} = {set_out_func}({get_out_code}, {self.get_kernel_tensor_out_type(self.outputs['names'][i])});""" + auto kernel_out_{i} = {set_out_func}({self.outputs['names'][i].split('@')[0]}, {self.get_kernel_tensor_out_type(self.outputs['names'][i])});""" kernel_output = kernel_output[:-2] else: diff --git a/python/paddle/utils/code_gen/strings_api_gen.py b/python/paddle/utils/code_gen/strings_api_gen.py index 061ea6c3ceef9c1fce34e1ed480a049604a6c8b5..815b9176cd22cb2ccd1bb8fe7d5b66d9ee151ee7 100644 --- a/python/paddle/utils/code_gen/strings_api_gen.py +++ b/python/paddle/utils/code_gen/strings_api_gen.py @@ -194,7 +194,7 @@ PADDLE_API {self.outputs['return_type']} {self.get_api_func_name()}({self.args_s {code_indent} auto* kernel_fn = kernel.GetVariadicKernelFn(); {code_indent} (*kernel_fn)({kernel_args}, {outputs_args}); -{code_indent} return {self.gene_return_code()};""" +{code_indent} {self.gene_return_code()}""" def gene_kernel_select(self) -> str: api = self.api