diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py index fdc7819e3159c13c058b842b2b2a8627cbbda2c4..3e4efba3f404b142b62062bedf0972ef1e15fc9d 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/eager_gen.py @@ -1007,7 +1007,11 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): else: assert IsVectorTensorType(ttype) if is_optional: - arg_str = f"const paddle::optional>& {name}" + if self.is_forward_only and is_inplaced and forward_inplace_map and name in forward_inplace_map.keys( + ): + arg_str = f"paddle::optional>& {name}" + else: + arg_str = f"const paddle::optional>& {name}" amp_tensors_vector_optional_list.append( f"if ({name}) amp_tensors_vector.push_back( *{name} );\n" ) @@ -1015,7 +1019,11 @@ class DygraphForwardFunctionGenerator(DygraphFunctionGeneratorBase): f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n" ) else: - arg_str = f"const std::vector& {name}" + if is_inplaced and forward_inplace_map and name in forward_inplace_map.keys( + ): + arg_str = f"std::vector& {name}" + else: + arg_str = f"const std::vector& {name}" amp_tensors_vector_list.append(f"{name}") amp_autocast_list.append( f"auto NEW_{name} = egr::EagerAmpAutoCasts(\"{name}\", {name}, amp_dst_dtype, op_name);\n" diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index 992f462a44ebe6bd66e3f405a6e8935e63f56ecb..d68c87a09a0e2174bb4becda6bb11e59824f1528 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -44,6 +44,34 @@ std::unique_ptr> TensorToDenseTensor( return pt_tensors; } +std::vector TensorToConstDenseTensorPtr( + const std::vector& tensors) { + std::vector pt_tensors(tensors.size()); + + for (size_t i = 0; i < tensors.size(); ++i) { + pt_tensors[i] = static_cast(tensors[i].impl().get()); + } + + return pt_tensors; +} + +paddle::optional> +TensorToConstDenseTensorPtr( + const paddle::optional>& tensors) { + paddle::optional> pt_tensors; + + if (tensors) { + pt_tensors = + paddle::optional>(tensors->size()); + for (size_t i = 0; i < tensors->size(); ++i) { + pt_tensors->at(i) = + static_cast(tensors->at(i).impl().get()); + } + } + + return pt_tensors; +} + std::shared_ptr TensorToSelectedRows(const Tensor& tensor) { return std::static_pointer_cast(tensor.impl()); } @@ -140,6 +168,29 @@ std::vector SetKernelOutput(size_t out_size, return results; } +std::vector SetInplaceVectorKernelOutput( + size_t out_size, Backend backend, std::vector* out) { + std::vector results(out->size(), nullptr); + for (size_t i = 0; i < out->size(); ++i) { + results[i] = static_cast(out->at(i).impl().get()); + } + return results; +} + +std::vector SetInplaceOptionalVectorKernelOutput( + size_t out_size, + Backend backend, + const paddle::optional>& out) { + std::vector results; + if (out) { + results = std::vector(out->size(), nullptr); + for (size_t i = 0; i < out->size(); ++i) { + results[i] = static_cast(out->at(i).impl().get()); + } + } + return results; +} + std::vector SetKernelOutput(std::vector* out) { std::vector results(out->size(), nullptr); for (size_t i = 0; i < out->size(); ++i) { diff --git a/paddle/phi/api/lib/api_gen_utils.h b/paddle/phi/api/lib/api_gen_utils.h index e990eb0279b0fc047fc0e4410533b5f2b0df8805..50602a1eca2cfe21459871b47a2bbbb9e7ca103c 100644 --- a/paddle/phi/api/lib/api_gen_utils.h +++ b/paddle/phi/api/lib/api_gen_utils.h @@ -38,6 +38,13 @@ paddle::optional TensorToDenseTensor( std::unique_ptr> TensorToDenseTensor( const std::vector& tensors); +std::vector TensorToConstDenseTensorPtr( + const std::vector& tensors); + +paddle::optional> +TensorToConstDenseTensorPtr( + const paddle::optional>& tensors); + std::shared_ptr TensorToSelectedRows(const Tensor& tensor); paddle::optional TensorToSelectedRows( @@ -72,6 +79,14 @@ std::vector SetKernelOutput(size_t out_size, Backend backend, std::vector* out); +std::vector SetInplaceVectorKernelOutput( + size_t out_size, Backend backend, std::vector* out); + +std::vector SetInplaceOptionalVectorKernelOutput( + size_t out_size, + Backend backend, + const paddle::optional>& out); + // For backward api std::vector SetKernelOutput(std::vector* out); diff --git a/paddle/phi/api/yaml/generator/api_base.py b/paddle/phi/api/yaml/generator/api_base.py index a1d38ca22cb61c71021fc3c62e64211ecd0c9284..2933742c7a55d0b0990c60febdce11a53c588216 100644 --- a/paddle/phi/api/yaml/generator/api_base.py +++ b/paddle/phi/api/yaml/generator/api_base.py @@ -58,9 +58,14 @@ class BaseAPI(object): def get_input_tensor_args(self, inplace_flag=False): input_args = [] inplace_type_map = { - "const Tensor&": "Tensor&", - "const paddle::optional&": "paddle::optional&", - "const std::vector&": "std::vector&" + "const Tensor&": + "Tensor&", + "const paddle::optional&": + "paddle::optional&", + "const std::vector&": + "std::vector&", + "const paddle::optional>&": + "paddle::optional>&" } for name in self.inputs['names']: name = name.split('@')[0] @@ -595,9 +600,16 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d if input_name in self.optional_vars: if self.inputs['input_info'][ input_name] == "const paddle::optional>&": - input_name_tensor_map[input_name].append( - (f"{PREFIX_TENSOR_NAME}{input_name}_vec", True)) - input_tensor_code = input_tensor_code + f""" + if input_name in self.inplace_map.values(): + input_name_tensor_map[input_name].append( + (f"{PREFIX_TENSOR_NAME}{input_name}", True)) + input_tensor_code = input_tensor_code + f""" +{code_indent} paddle::optional> {PREFIX_TENSOR_NAME}{input_name} = TensorToConstDenseTensorPtr({input_name});""" + else: + input_name_tensor_map[input_name].append( + (f"{PREFIX_TENSOR_NAME}{input_name}_vec", + True)) + input_tensor_code = input_tensor_code + f""" {code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag}); {code_indent} paddle::optional> {PREFIX_TENSOR_NAME}{input_name}; {code_indent} if ({PREFIX_TENSOR_NAME}{input_name}_vec){{ @@ -622,9 +634,16 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d elif self.inputs['input_info'][ input_name] == "const std::vector&": - input_name_tensor_map[input_name].append( - (f"{PREFIX_TENSOR_NAME}{input_name}_vec", True)) - input_tensor_code = input_tensor_code + f""" + if input_name in self.inplace_map.values(): + input_name_tensor_map[input_name].append( + (f"{PREFIX_TENSOR_NAME}{input_name}", True)) + input_tensor_code = input_tensor_code + f""" +{code_indent} std::vector {PREFIX_TENSOR_NAME}{input_name} = TensorToConstDenseTensorPtr({input_name});""" + else: + input_name_tensor_map[input_name].append( + (f"{PREFIX_TENSOR_NAME}{input_name}_vec", + True)) + input_tensor_code = input_tensor_code + f""" {code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({kernel_param.index(input_name)}), {trans_flag}); {code_indent} std::vector {PREFIX_TENSOR_NAME}{input_name}({PREFIX_TENSOR_NAME}{input_name}_vec->size()); {code_indent} for (size_t i = 0; i < {PREFIX_TENSOR_NAME}{input_name}.size(); ++i) {{ @@ -699,19 +718,23 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d {code_indent} ddims_vec.clear();""" for input_tensor, is_vector in input_name_tensor_map[input_name]: if is_vector: + input_tensor_truncate = input_tensor[:-4] + if input_name in self.inplace_map.values(): + input_tensor_truncate = input_tensor + if input_name in self.optional_vars: input_tensor_code = input_tensor_code + f""" -{code_indent} if ({input_tensor[:-4]}){{ -{code_indent} ddims_vec.reserve({input_tensor[:-4]}->size()); -{code_indent} for (size_t i = 0; i < {input_tensor[:-4]}->size(); ++i) {{ -{code_indent} ddims_vec.emplace_back((*{input_tensor[:-4]}->at(i)).dims()); +{code_indent} if ({input_tensor_truncate}){{ +{code_indent} ddims_vec.reserve({input_tensor_truncate}->size()); +{code_indent} for (size_t i = 0; i < {input_tensor_truncate}->size(); ++i) {{ +{code_indent} ddims_vec.emplace_back((*{input_tensor_truncate}->at(i)).dims()); {code_indent} }} {code_indent} }}""" else: input_tensor_code = input_tensor_code + f""" -{code_indent} ddims_vec.reserve({input_tensor[:-4]}.size()); -{code_indent} for (size_t i = 0; i < {input_tensor[:-4]}.size(); ++i) {{ -{code_indent} ddims_vec.emplace_back((*{input_tensor[:-4]}[i]).dims()); +{code_indent} ddims_vec.reserve({input_tensor_truncate}.size()); +{code_indent} for (size_t i = 0; i < {input_tensor_truncate}.size(); ++i) {{ +{code_indent} ddims_vec.emplace_back((*{input_tensor_truncate}[i]).dims()); {code_indent} }}""" else: input_tensor_code = input_tensor_code + f""" diff --git a/paddle/phi/api/yaml/generator/api_gen.py b/paddle/phi/api/yaml/generator/api_gen.py index 1eb030f7f9b6f3d502eace6ee046dc1c0a58acdc..d83bb3f14dcc998196fef954f2b21f16785c315f 100644 --- a/paddle/phi/api/yaml/generator/api_gen.py +++ b/paddle/phi/api/yaml/generator/api_gen.py @@ -179,8 +179,7 @@ class ForwardAPI(BaseAPI): for out_name in self.outputs['names']: if out_name in self.inplace_map: - output_create = output_create + self.inplace_map[ - out_name] + ', ' + output_create += self.inplace_map[out_name] + ', ' else: output_create += 'Tensor(), ' output_create = output_create[:-2] + '};' @@ -200,6 +199,13 @@ class ForwardAPI(BaseAPI): if out_dtype_list[i] == 'std::vector': assert self.outputs['out_size_expr'][i] is not None, \ f"{self.api}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." + # Special case for inplace vector and inplace optional + if self.outputs['names'][i] in self.inplace_map: + set_out_func = "SetInplaceVectorKernelOutput" + if self.inplace_map[self.outputs['names'] + [i]] in self.optional_vars: + set_out_func = "SetInplaceOptionalVectorKernelOutput" + get_out_code = f"std::get<{i}>(api_output)" output_create = output_create + f""" {code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, {get_out_code});""" diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml index e68e22965d9f1300f8b3b86db426ee237c0f6b6d..d4b7c6dcff4c1c7a5e50197ed921b74721ff57c6 100755 --- a/paddle/phi/api/yaml/legacy_api.yaml +++ b/paddle/phi/api/yaml/legacy_api.yaml @@ -1780,6 +1780,17 @@ func : mean_all backward : mean_all_grad +- api : merged_momentum_ + args : (Tensor[] param, Tensor[] grad, Tensor[] velocity, Tensor[] learning_rate, Tensor[] master_param, float mu, bool use_nesterov = false, str[] regularization_method = {}, float[] regularization_coeff = {}, bool multi_precision = false, float rescale_grad = 1.0f) + output : Tensor[](param_out){param.size()}, Tensor[](velocity_out){param.size()}, Tensor[](master_param_out){param.size()} + infer_meta : + func : MergedMomentumInferMeta + optional: master_param + kernel : + func : merged_momentum + data_type : param + inplace : (param -> param_out), (velocity -> velocity_out), (master_param -> master_param_out) + - api : meshgrid args : (Tensor[] inputs) output : Tensor[]{inputs.size()} diff --git a/python/paddle/optimizer/momentum.py b/python/paddle/optimizer/momentum.py index 2839b80b4e5a588a7dcf3c6212cee1d8ed81bfc2..3e53619c4cbcceca312bfc570313218606f630cd 100644 --- a/python/paddle/optimizer/momentum.py +++ b/python/paddle/optimizer/momentum.py @@ -473,17 +473,28 @@ class Momentum(Optimizer): find_master = self._multi_precision and key == 'FP16_LODTensor' if framework._non_static_mode(): - _, _, _ = _C_ops.merged_momentum( - self._param_dict[key], grad_dict[key], - self._velocity_dict[key], lr_dict[key], - self._master_weight_dict[key], self._param_dict[key], - self._velocity_dict[key], self._master_weight_dict[key], - 'mu', self._momentum, 'use_nesterov', - self._use_nesterov, 'regularization_method', - self._regularization_method_dict[key], - 'regularization_coeff', - self._regularization_coeff_dict[key], 'multi_precision', - find_master) + if in_dygraph_mode(): + _, _, _ = _C_ops.final_state_merged_momentum_( + self._param_dict[key], grad_dict[key], + self._velocity_dict[key], lr_dict[key], + self._master_weight_dict[key], self._momentum, + self._use_nesterov, + self._regularization_method_dict[key], + self._regularization_coeff_dict[key], find_master, + self._rescale_grad) + else: + _, _, _ = _C_ops.merged_momentum( + self._param_dict[key], grad_dict[key], + self._velocity_dict[key], lr_dict[key], + self._master_weight_dict[key], + self._param_dict[key], self._velocity_dict[key], + self._master_weight_dict[key], 'mu', self._momentum, + 'use_nesterov', self._use_nesterov, + 'regularization_method', + self._regularization_method_dict[key], + 'regularization_coeff', + self._regularization_coeff_dict[key], + 'multi_precision', find_master) else: inputs = { "Param": self._param_dict[key],