From aa45f931ebd7233a0c91c9d8085113d62e46cb22 Mon Sep 17 00:00:00 2001 From: zyfncg Date: Wed, 29 Jun 2022 19:12:39 +0800 Subject: [PATCH] Support code auto-gene for optimizer api in yaml (#43915) * support complexd selected_rows kernel in yaml * support configuring optimizer api in yaml * fix data transform bug --- paddle/phi/api/lib/CMakeLists.txt | 6 +- paddle/phi/api/lib/api_custom_impl.cc | 384 ------------------ paddle/phi/api/lib/api_custom_impl.h | 25 -- paddle/phi/api/lib/api_gen_utils.cc | 10 +- paddle/phi/api/lib/api_gen_utils.h | 6 +- python/paddle/fluid/optimizer.py | 4 +- python/paddle/optimizer/adam.py | 2 +- python/paddle/optimizer/sgd.py | 4 +- python/paddle/utils/code_gen/api_base.py | 285 ++++++------- python/paddle/utils/code_gen/api_gen.py | 46 ++- .../paddle/utils/code_gen/backward_api_gen.py | 18 +- python/paddle/utils/code_gen/legacy_api.yaml | 38 +- .../paddle/utils/code_gen/sparse_api_gen.py | 28 +- .../utils/code_gen/sparse_bw_api_gen.py | 16 +- .../paddle/utils/code_gen/strings_api_gen.py | 23 +- 15 files changed, 236 insertions(+), 659 deletions(-) diff --git a/paddle/phi/api/lib/CMakeLists.txt b/paddle/phi/api/lib/CMakeLists.txt index 9ff21d48420..750614561c5 100644 --- a/paddle/phi/api/lib/CMakeLists.txt +++ b/paddle/phi/api/lib/CMakeLists.txt @@ -224,7 +224,8 @@ add_custom_command( COMMAND ${CMAKE_COMMAND} -E copy_if_different ${api_source_file_tmp} ${api_source_file} COMMENT "copy_if_different ${api_header_file} ${api_source_file}" - DEPENDS ${api_yaml_file} ${api_gen_file} ${api_gen_base} + DEPENDS ${api_yaml_file} ${legacy_api_yaml_file} ${api_gen_file} + ${api_gen_base} VERBATIM) # generate backward api @@ -240,7 +241,8 @@ add_custom_command( COMMAND ${CMAKE_COMMAND} -E copy_if_different ${bw_api_source_file_tmp} ${bw_api_source_file} COMMENT "copy_if_different ${bw_api_header_file} ${bw_api_source_file}" - DEPENDS ${bw_api_yaml_file} ${bw_api_gen_file} ${api_gen_base} + DEPENDS ${bw_api_yaml_file} ${legacy_bw_api_yaml_file} ${bw_api_gen_file} + ${api_gen_base} VERBATIM) # generate sparse api diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index 5ca7f2b51ed..b68418885ca 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -32,237 +32,6 @@ limitations under the License. */ namespace paddle { namespace experimental { -std::tuple adam_impl( - const Tensor& param, - const Tensor& grad, - const Tensor& learning_rate, - const Tensor& moment1, - const Tensor& moment2, - const Tensor& beta1_pow, - const Tensor& beta2_pow, - const paddle::optional& master_param, - const paddle::optional& skip_update, - const Scalar& beta1, - const Scalar& beta2, - const Scalar& epsilon, - bool lazy_mode, - int64_t min_row_size_to_use_multithread, - bool multi_precision, - bool use_global_beta_pow) { - Backend kernel_backend = Backend::UNDEFINED; - DataLayout kernel_layout = DataLayout::UNDEFINED; - DataType kernel_data_type = DataType::UNDEFINED; - if (kernel_backend == Backend::UNDEFINED || - kernel_layout == DataLayout::UNDEFINED || - kernel_data_type == DataType::UNDEFINED) { - auto kernel_key_set = ParseKernelKeyByInputArgs(param); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - if (kernel_backend == Backend::UNDEFINED) { - kernel_backend = kernel_key.backend(); - } - if (kernel_layout == DataLayout::UNDEFINED) { - kernel_layout = kernel_key.layout(); - } - if (kernel_data_type == DataType::UNDEFINED) { - kernel_data_type = kernel_key.dtype(); - } - } - - std::string kernel_name = "adam"; - if (!phi::DenseTensor::classof(grad.impl().get())) { - kernel_name = "adam_dense_param_sparse_grad"; - } - - const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - kernel_name, {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << kernel_name << " API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - VLOG(6) << kernel_name << " API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - auto input_param = PrepareData(param, kernel.InputAt(0), {}); - auto input_lr = PrepareData(learning_rate, kernel.InputAt(2), {}); - auto input_moment1 = PrepareData(moment1, kernel.InputAt(3), {}); - auto input_moment2 = PrepareData(moment2, kernel.InputAt(4), {}); - auto input_beta1_pow = PrepareData(beta1_pow, kernel.InputAt(5), {}); - auto input_beta2_pow = PrepareData(beta2_pow, kernel.InputAt(6), {}); - auto input_master_param = PrepareData(master_param, kernel.InputAt(7), {}); - auto input_skip_update = PrepareData(skip_update, kernel.InputAt(8), {}); - - std::tuple api_output; - auto kernel_out_0 = input_param.get(); - auto kernel_out_1 = input_moment1.get(); - auto kernel_out_2 = input_moment2.get(); - auto kernel_out_3 = input_beta1_pow.get(); - auto kernel_out_4 = input_beta2_pow.get(); - phi::DenseTensor* kernel_out_5 = nullptr; - if (input_master_param) { - kernel_out_5 = input_master_param.get_ptr(); - } - - auto input_meta_ref_master_param = MakeMetaTensor(input_master_param); - - auto input_meta_ref_skip_update = MakeMetaTensor(input_skip_update); - - phi::MetaTensor meta_out_0(kernel_out_0); - phi::MetaTensor meta_out_1(kernel_out_1); - phi::MetaTensor meta_out_2(kernel_out_2); - phi::MetaTensor meta_out_3(kernel_out_3); - phi::MetaTensor meta_out_4(kernel_out_4); - phi::MetaTensor meta_out_5(kernel_out_5); - - if (phi::DenseTensor::classof(grad.impl().get())) { - auto input_grad = PrepareData(grad, kernel.InputAt(1), {}); - - phi::AdamInferMeta(MakeMetaTensor(*input_param), - MakeMetaTensor(*input_grad), - MakeMetaTensor(*input_lr), - MakeMetaTensor(*input_moment1), - MakeMetaTensor(*input_moment2), - MakeMetaTensor(*input_beta1_pow), - MakeMetaTensor(*input_beta2_pow), - input_meta_ref_master_param, - input_meta_ref_skip_update, - beta1, - beta2, - epsilon, - lazy_mode, - min_row_size_to_use_multithread, - multi_precision, - use_global_beta_pow, - &meta_out_0, - &meta_out_1, - &meta_out_2, - &meta_out_3, - &meta_out_4, - &meta_out_5); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const paddle::optional&, - const paddle::optional&, - const Scalar&, - const Scalar&, - const Scalar&, - bool, - int64_t, - bool, - bool, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - - (*kernel_fn)(*dev_ctx, - *input_param, - *input_grad, - *input_lr, - *input_moment1, - *input_moment2, - *input_beta1_pow, - *input_beta2_pow, - input_master_param, - input_skip_update, - beta1, - beta2, - epsilon, - lazy_mode, - min_row_size_to_use_multithread, - multi_precision, - use_global_beta_pow, - kernel_out_0, - kernel_out_1, - kernel_out_2, - kernel_out_3, - kernel_out_4, - kernel_out_5); - } else { - auto input_grad = TensorToSelectedRows(grad); - - phi::AdamInferMeta(MakeMetaTensor(*input_param), - MakeMetaTensor(*input_grad), - MakeMetaTensor(*input_lr), - MakeMetaTensor(*input_moment1), - MakeMetaTensor(*input_moment2), - MakeMetaTensor(*input_beta1_pow), - MakeMetaTensor(*input_beta2_pow), - input_meta_ref_master_param, - input_meta_ref_skip_update, - beta1, - beta2, - epsilon, - lazy_mode, - min_row_size_to_use_multithread, - multi_precision, - use_global_beta_pow, - &meta_out_0, - &meta_out_1, - &meta_out_2, - &meta_out_3, - &meta_out_4, - &meta_out_5); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - const phi::SelectedRows&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const paddle::optional&, - const paddle::optional&, - const Scalar&, - const Scalar&, - const Scalar&, - bool, - int64_t, - bool, - bool, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - - (*kernel_fn)(*dev_ctx, - *input_param, - *input_grad, - *input_lr, - *input_moment1, - *input_moment2, - *input_beta1_pow, - *input_beta2_pow, - input_master_param, - input_skip_update, - beta1, - beta2, - epsilon, - lazy_mode, - min_row_size_to_use_multithread, - multi_precision, - use_global_beta_pow, - kernel_out_0, - kernel_out_1, - kernel_out_2, - kernel_out_3, - kernel_out_4, - kernel_out_5); - } - return api_output; -} - ////////////////// Forward api impls ////////////////////// std::tuple adamw_impl( @@ -1100,159 +869,6 @@ std::tuple momentum_impl( return api_output; } -std::tuple sgd_impl( - const Tensor& param, - const Tensor& learning_rate, - const Tensor& grad, - const paddle::optional& master_param, - bool multi_precision) { - DataType kernel_data_type = ParseDataType(param); - auto kernel_key_set = ParseKernelKeyByInputArgs(param, learning_rate, grad); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - VLOG(6) << "sgd API kernel key: [" << kernel_key.backend() << ", " - << kernel_key.layout() << ", " << kernel_data_type << "]"; - - const auto& param_tensor = param.impl(); - std::string kernel_name = "sgd"; - if (phi::DenseTensor::classof(param_tensor.get())) { - if (!phi::DenseTensor::classof(grad.impl().get())) { - kernel_name = "sgd_dense_param_sparse_grad"; - } - } else { - kernel_name = "sgd_sparse_param_sparse_grad"; - } - const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - kernel_name, - {kernel_key.backend(), kernel_key.layout(), kernel_data_type}); - VLOG(6) << kernel_name << " API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); - - auto in_learning_rate = - PrepareData(learning_rate, kernel.InputAt(1), {false, true, true, true}); - - std::tuple out; - std::get<0>(out) = param; - if (master_param) { - std::get<1>(out) = *master_param; - } - phi::MetaTensor meta_out_0(std::get<0>(out).impl().get()); - phi::MetaTensor meta_out_1(master_param ? std::get<1>(out).impl().get() - : nullptr); - - if (phi::DenseTensor::classof(param_tensor.get())) { - auto in_param = PrepareData(param, kernel.InputAt(0), {}); - auto in_master_param_opt = PrepareData(master_param, kernel.InputAt(3), {}); - auto master_param_meta_opt = MakeMetaTensor(in_master_param_opt); - - phi::DenseTensor* kernel_out_0 = - SetKernelOutput(kernel_key.backend(), &std::get<0>(out)); - phi::DenseTensor* kernel_out_1 = - master_param - ? static_cast(std::get<1>(out).impl().get()) - : nullptr; - - if (phi::DenseTensor::classof(grad.impl().get())) { - auto in_grad = PrepareData(grad, kernel.InputAt(2), {}); - SgdInferMeta(MakeMetaTensor(*in_param), - MakeMetaTensor(*in_learning_rate), - MakeMetaTensor(*in_grad), - master_param_meta_opt, - multi_precision, - &meta_out_0, - &meta_out_1); - - using kernel_signature = - void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const paddle::optional&, - bool, - phi::DenseTensor*, - phi::DenseTensor*); - - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, - *in_param, - *in_learning_rate, - *in_grad, - in_master_param_opt, - multi_precision, - kernel_out_0, - kernel_out_1); - } else { - auto in_grad = TensorToSelectedRows(grad); - SgdInferMeta(MakeMetaTensor(*in_param), - MakeMetaTensor(*in_learning_rate), - MakeMetaTensor(*in_grad), - master_param_meta_opt, - multi_precision, - &meta_out_0, - &meta_out_1); - - using kernel_signature = - void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::SelectedRows&, - const paddle::optional&, - bool, - phi::DenseTensor*, - phi::DenseTensor*); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, - *in_param, - *in_learning_rate, - *in_grad, - in_master_param_opt, - multi_precision, - kernel_out_0, - kernel_out_1); - } - } else { - auto in_param = TensorToSelectedRows(param); - auto in_grad = TensorToSelectedRows(grad); - auto in_master_param_opt = TensorToSelectedRows(master_param); - auto master_param_meta = MakeMetaTensor(in_master_param_opt); - - phi::SelectedRows* kernel_out_0 = - SetSelectedRowsKernelOutput(kernel_key.backend(), &std::get<0>(out)); - phi::SelectedRows* kernel_out_1 = - master_param - ? static_cast(std::get<1>(out).impl().get()) - : nullptr; - - SgdInferMeta(MakeMetaTensor(*in_param), - MakeMetaTensor(*in_learning_rate), - MakeMetaTensor(*in_grad), - master_param_meta, - multi_precision, - &meta_out_0, - &meta_out_1); - - using kernel_signature = - void (*)(const platform::DeviceContext&, - const phi::SelectedRows&, - const phi::DenseTensor&, - const phi::SelectedRows&, - const paddle::optional&, - bool, - phi::SelectedRows*, - phi::SelectedRows*); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, - *in_param, - *in_learning_rate, - *in_grad, - in_master_param_opt, - multi_precision, - kernel_out_0, - kernel_out_1); - } - return out; -} - ////////////////// Backward(grad) api impls ////////////////////// // TODO(chenweihang): the original sum grad op can support higher-level diff --git a/paddle/phi/api/lib/api_custom_impl.h b/paddle/phi/api/lib/api_custom_impl.h index 22c5d193a2b..627ff2aabf1 100644 --- a/paddle/phi/api/lib/api_custom_impl.h +++ b/paddle/phi/api/lib/api_custom_impl.h @@ -31,24 +31,6 @@ namespace experimental { ////////////////// Forward api impls ////////////////////// -std::tuple adam_impl( - const Tensor& param, - const Tensor& grad, - const Tensor& learning_rate, - const Tensor& moment1, - const Tensor& moment2, - const Tensor& beta1_pow, - const Tensor& beta2_pow, - const paddle::optional& master_param, - const paddle::optional& skip_update, - const Scalar& beta1, - const Scalar& beta2, - const Scalar& epsilon, - bool lazy_mode, - int64_t min_row_size_to_use_multithread, - bool multi_precision, - bool use_global_beta_pow); - std::tuple adamw_impl( const Tensor& param, const Tensor& grad, @@ -132,13 +114,6 @@ std::tuple momentum_impl( bool multi_precision, float rescale_grad); -std::tuple sgd_impl( - const Tensor& param, - const Tensor& learning_rate, - const Tensor& grad, - const paddle::optional& master_param, - bool multi_precision); - ////////////////// Backward(grad) api impls ////////////////////// void add_n_grad_impl(const std::vector& x, diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index 633bb1a32a1..9d72a23cb54 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -62,7 +62,7 @@ std::shared_ptr TensorToStringTensor(const Tensor& tensor) { /* ----------------- for infer_meta --------------------- */ -phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor) { +phi::MetaTensor MakeMetaTensor(const phi::TensorBase& tensor) { return phi::MetaTensor(tensor); } @@ -94,10 +94,6 @@ std::vector MakeMetaTensor( return meta_tensors; } -phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor) { - return phi::MetaTensor(tensor); -} - phi::MetaTensor MakeMetaTensor( const paddle::optional& tensor) { if (tensor) { @@ -106,10 +102,6 @@ phi::MetaTensor MakeMetaTensor( return phi::MetaTensor(); } -phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor) { - return phi::MetaTensor(tensor); -} - /* ------------------ for output ----------------------- */ phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out) { diff --git a/paddle/phi/api/lib/api_gen_utils.h b/paddle/phi/api/lib/api_gen_utils.h index 83656a7b528..fe934481bcd 100644 --- a/paddle/phi/api/lib/api_gen_utils.h +++ b/paddle/phi/api/lib/api_gen_utils.h @@ -47,7 +47,7 @@ std::shared_ptr TensorToStringTensor(const Tensor& tensor); /* ----------------- for infer_meta --------------------- */ -phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor); +phi::MetaTensor MakeMetaTensor(const phi::TensorBase& tensor); phi::MetaTensor MakeMetaTensor( const paddle::optional& tensor); @@ -58,13 +58,9 @@ std::vector MakeMetaTensor( std::vector MakeMetaTensor( const std::vector& tensors); -phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor); - phi::MetaTensor MakeMetaTensor( const paddle::optional& tensor); -phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor); - /* ------------------ for output ----------------------- */ phi::DenseTensor* SetKernelOutput(Backend backend, Tensor* out); diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index 20e39e89f30..315382262a0 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -1370,8 +1370,8 @@ class SGDOptimizer(Optimizer): lr = self._create_param_lr(param_and_grad) if in_dygraph_mode(): - _C_ops.final_state_sgd(param_and_grad[0], lr, param_and_grad[1], - master_weight, find_master) + _C_ops.final_state_sgd_(param_and_grad[0], lr, param_and_grad[1], + master_weight, find_master) return None if _in_legacy_dygraph(): _C_ops.sgd(param_and_grad[0], lr, param_and_grad[1], master_weight, diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index ac2685972d1..14db8a620ac 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -342,7 +342,7 @@ class Adam(Optimizer): _beta2 = self._beta2 if not isinstance( self._beta2, Variable) else self._beta2.numpy().item(0) - _, _, _, _, _, _ = _C_ops.final_state_adam( + _, _, _, _, _, _ = _C_ops.final_state_adam_( param_and_grad[0], param_and_grad[1], lr, moment1, moment2, beta1_pow_acc, beta2_pow_acc, master_weight, found_inf, _beta1, _beta2, self._epsilon, self._lazy_mode, 1000, find_master, diff --git a/python/paddle/optimizer/sgd.py b/python/paddle/optimizer/sgd.py index 60b5e385761..a6b23bbefaf 100644 --- a/python/paddle/optimizer/sgd.py +++ b/python/paddle/optimizer/sgd.py @@ -143,8 +143,8 @@ class SGD(Optimizer): lr = self._create_param_lr(param_and_grad) if in_dygraph_mode(): - _C_ops.final_state_sgd(param_and_grad[0], lr, param_and_grad[1], - master_weight, find_master) + _C_ops.final_state_sgd_(param_and_grad[0], lr, param_and_grad[1], + master_weight, find_master) return None if _in_legacy_dygraph(): _C_ops.sgd(param_and_grad[0], lr, param_and_grad[1], master_weight, diff --git a/python/paddle/utils/code_gen/api_base.py b/python/paddle/utils/code_gen/api_base.py index 41e0d2de5c0..aacb4ce55be 100644 --- a/python/paddle/utils/code_gen/api_base.py +++ b/python/paddle/utils/code_gen/api_base.py @@ -45,9 +45,6 @@ class BaseAPI(object): self.infer_meta = self.parse_infer_meta( api_item_yaml['infer_meta']) self.kernel = self.parse_kernel(api_item_yaml['kernel']) - self.support_selected_rows_kernel = False if len( - self.kernel['func'] - ) == 1 or not self.kernel['func'][1].endswith('_sr') else True self.data_transform = self.parse_data_transform(api_item_yaml) self.inplace_map, self.view_map = {}, {} @@ -61,6 +58,7 @@ class BaseAPI(object): input_args = [] inplace_type_map = { "const Tensor&": "Tensor&", + "const paddle::optional&": "paddle::optional&", "const std::vector&": "std::vector&" } for name in self.inputs['names']: @@ -285,6 +283,17 @@ class BaseAPI(object): tmp_in_out_list = in_out_str[1:-1].split('->') inputs = [item.strip() for item in tmp_in_out_list[0].split(',')] outputs = [item.strip() for item in tmp_in_out_list[1].split(',')] + + # check the tensor type + for item in inputs: + assert item in [ + 'dense', 'selected_rows', 'sparse_coo', 'sparse_csr' + ], f"{self.api} : Invalid input tensor type ('{item}'), here we only support 'dense', 'selected_rows', 'sparse_coo' and 'sparse_csr'." + for item in outputs: + assert item in [ + 'dense', 'selected_rows', 'sparse_coo', 'sparse_csr' + ], f"{self.api} : Invalid output tensor type ('{item}'), here we only support 'dense', 'selected_rows', 'sparse_coo' and 'sparse_csr'." + return (inputs, outputs) for func_item in kernel_funcs: @@ -440,11 +449,6 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d kernel_select_code = kernel_key_item_init + kernel_select_code if len(input_names) > 0: - if self.support_selected_rows_kernel: - kernel_select_code = kernel_select_code + f""" - KernelType kernel_type = ParseKernelTypeByInputArgs({", ".join(input_names)}); -""" - kernel_select_code = kernel_select_code + f""" if (kernel_backend == Backend::UNDEFINED || kernel_layout == DataLayout::UNDEFINED @@ -528,8 +532,8 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d {code_indent} phi::{infer_meta['func']}({param_code}); """ - def get_kernel_args(self, code_indent): - input_trans_map = { + def get_kernel_args(self, kernel_tensor_type=None, code_indent=''): + dense_input_trans_map = { 'const Tensor&': 'const phi::DenseTensor&', 'const std::vector&': @@ -541,10 +545,17 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d 'const paddle::optional>&': 'paddle::optional&>' } - out_trans_map = { + dense_out_trans_map = { 'Tensor': 'phi::DenseTensor*', 'std::vector': 'std::vector&' } + sr_input_trans_map = { + 'const Tensor&': + 'const phi::SelectedRows&', + 'const paddle::optional&': + 'const paddle::optional&' + } + sr_out_trans_map = {'Tensor': 'phi::SelectedRows*'} input_names = self.inputs['names'] input_infos = self.inputs['input_info'] kernel_args_type_list = ['const platform::DeviceContext&'] @@ -558,127 +569,72 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d for i, input_name in enumerate(input_names): # set input code if input_name in kernel_param: - trans_flag = "{}" - if input_name in self.data_transform['skip_transform']: - trans_flag = "{true}" - elif input_name in self.data_transform['support_trans_dtype']: - trans_flag = "{false, true}" - if input_name in self.optional_vars: - input_tensor_code = input_tensor_code + f""" + # input is dense tensor + if kernel_tensor_type is None or kernel_tensor_type[0][ + kernel_param.index(input_name)] == 'dense': + trans_flag = "{}" + if input_name in self.data_transform['skip_transform']: + trans_flag = "{true}" + elif input_name in self.data_transform[ + 'support_trans_dtype']: + trans_flag = "{false, true}" + if input_name in self.optional_vars: + input_tensor_code = input_tensor_code + f""" {code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareData({input_name}, kernel.InputAt({i}), {trans_flag});""" - else: - if self.inputs['input_info'][input_name] == "const Tensor&": - input_tensor_code = input_tensor_code + f""" + else: + if self.inputs['input_info'][ + input_name] == "const Tensor&": + input_tensor_code = input_tensor_code + f""" {code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = PrepareData({input_name}, kernel.InputAt({i}), {trans_flag});""" - elif self.inputs['input_info'][ - input_name] == "const std::vector&": - input_tensor_code = input_tensor_code + f""" + elif self.inputs['input_info'][ + input_name] == "const std::vector&": + input_tensor_code = input_tensor_code + f""" {code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_vec = PrepareData({input_name}, kernel.InputAt({i}), {trans_flag}); {code_indent} std::vector {PREFIX_TENSOR_NAME}{input_name}({PREFIX_TENSOR_NAME}{input_name}_vec->size()); {code_indent} for (size_t i = 0; i < {PREFIX_TENSOR_NAME}{input_name}.size(); ++i) {{ {code_indent} {PREFIX_TENSOR_NAME}{input_name}[i] = &{PREFIX_TENSOR_NAME}{input_name}_vec->at(i); {code_indent} }}""" - else: - # do nothing - pass - else: - if input_name in self.optional_vars: + else: + # do nothing + pass + else: # input is selected_rows input_tensor_code = input_tensor_code + f""" -{code_indent} {input_trans_map[input_infos[input_name]]} {PREFIX_TENSOR_NAME}{input_name}(paddle::none); -{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_ptr = TensorToDenseTensor({input_name}); -{code_indent} if ({PREFIX_TENSOR_NAME}{input_name}_ptr) {{ -{code_indent} {PREFIX_TENSOR_NAME}{input_name} = paddle::make_optional(*{PREFIX_TENSOR_NAME}{input_name}_ptr); -{code_indent} }}""" +{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = TensorToSelectedRows({input_name});""" + else: + if input_name in self.infer_meta['param']: + if input_name in self.optional_vars: + input_tensor_code = input_tensor_code + f""" +{code_indent} paddle::optional {PREFIX_TENSOR_NAME}{input_name} = {input_name} ? paddle::optional(*{input_name}->impl()) : paddle::none;""" - else: - input_tensor_code = input_tensor_code + f""" -{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = TensorToDenseTensor({input_name});""" + else: + input_tensor_code = input_tensor_code + f""" +{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = {input_name}.impl();""" - kernel_args = "*dev_ctx, " + kernel_args = ["*dev_ctx"] for param in kernel_param: if param in input_names: if param in self.optional_vars: - kernel_args = kernel_args + PREFIX_TENSOR_NAME + param + ", " + kernel_args.append(PREFIX_TENSOR_NAME + param) else: if self.inputs['input_info'][param] == "const Tensor&": - kernel_args = kernel_args + "*" + PREFIX_TENSOR_NAME + param + ", " + kernel_args.append("*" + PREFIX_TENSOR_NAME + param) elif self.inputs['input_info'][ param] == "const std::vector&": - kernel_args = kernel_args + PREFIX_TENSOR_NAME + param + ", " + kernel_args.append(PREFIX_TENSOR_NAME + param) else: # do nothing pass - kernel_in_type = input_trans_map[input_infos[param]] - kernel_args_type_list.append(kernel_in_type) - elif param in attr_names: - # set attr for kernel_context - if 'IntArray' in self.attrs['attr_info'][param][0]: - kernel_args_type_list.append('const phi::IntArray&') - param = 'phi::IntArray(' + param + ')' - elif 'Scalar' in self.attrs['attr_info'][param][0]: - kernel_args_type_list.append('const phi::Scalar&') - param = 'phi::Scalar(' + param + ')' - else: + # input is dense tensor + if kernel_tensor_type is None or kernel_tensor_type[0][ + kernel_param.index(param)] == 'dense': kernel_args_type_list.append( - self.attrs['attr_info'][param][0]) - kernel_args = kernel_args + param + ", " - elif isinstance(param, bool): - kernel_args = kernel_args + str(param).lower() + ", " - else: - kernel_args = kernel_args + str(param) + ", " - - for out_type in self.outputs['types']: - kernel_args_type_list.append(out_trans_map[out_type]) - - kernel_signature = "void(*)(" + ", ".join(kernel_args_type_list) + ")" - - return input_tensor_code, kernel_args[:-2], kernel_signature - - def get_selected_rows_kernel_args(self, code_indent): - input_trans_map = { - 'const Tensor&': - 'const phi::SelectedRows&', - 'const paddle::optional&': - 'const paddle::optional&' - } - out_trans_map = {'Tensor': 'phi::SelectedRows*'} - input_names = self.inputs['names'] - input_infos = self.inputs['input_info'] - kernel_args_type_list = ['const platform::DeviceContext&'] - - attr_names = self.attrs['names'] - kernel_param = self.kernel['param'] - if kernel_param is None: - kernel_param = input_names + attr_names - - input_tensor_code = "" - for i, input_name in enumerate(input_names): - # set input code - if input_name in self.optional_vars: - input_tensor_code = input_tensor_code + f""" - -{code_indent} {input_trans_map[input_infos[input_name]]} {PREFIX_TENSOR_NAME}{input_name}(paddle::none); -{code_indent} auto {PREFIX_TENSOR_NAME}{input_name}_ptr = TensorToSelectedRows({input_name}); -{code_indent} if ({PREFIX_TENSOR_NAME}{input_name}_ptr) {{ -{code_indent} {PREFIX_TENSOR_NAME}{input_name} = paddle::make_optional(*{PREFIX_TENSOR_NAME}{input_name}_ptr); -{code_indent} }}""" - - else: - input_tensor_code = input_tensor_code + f""" -{code_indent} auto {PREFIX_TENSOR_NAME}{input_name} = TensorToSelectedRows({input_name});""" - - kernel_args = "*dev_ctx, " - for param in kernel_param: - if param in input_names: - if param in self.optional_vars: - kernel_args = kernel_args + PREFIX_TENSOR_NAME + param + ", " - else: - kernel_args = kernel_args + "*" + PREFIX_TENSOR_NAME + param + ", " - kernel_in_type = input_trans_map[input_infos[param]] - kernel_args_type_list.append(kernel_in_type) + dense_input_trans_map[input_infos[param]]) + else: # input is selected_rows + kernel_args_type_list.append( + sr_input_trans_map[input_infos[param]]) elif param in attr_names: # set attr for kernel_context if 'IntArray' in self.attrs['attr_info'][param][0]: @@ -690,18 +646,22 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d else: kernel_args_type_list.append( self.attrs['attr_info'][param][0]) - kernel_args = kernel_args + param + ", " + kernel_args.append(param) elif isinstance(param, bool): - kernel_args = kernel_args + str(param).lower() + ", " + kernel_args.append(str(param).lower()) else: - kernel_args = kernel_args + str(param) + ", " + kernel_args.append(str(param)) - for out_type in self.outputs['types']: - kernel_args_type_list.append(out_trans_map[out_type]) + for i, out_type in enumerate(self.outputs['types']): + # output is dense tensor + if kernel_tensor_type is None or kernel_tensor_type[1][i] == 'dense': + kernel_args_type_list.append(dense_out_trans_map[out_type]) + else: # output is selected_rows + kernel_args_type_list.append(sr_out_trans_map[out_type]) kernel_signature = "void(*)(" + ", ".join(kernel_args_type_list) + ")" - return input_tensor_code, kernel_args[:-2], kernel_signature + return input_tensor_code, ", ".join(kernel_args), kernel_signature # Override by child class def gene_return_code(self): @@ -709,25 +669,27 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d # Override by child class def gene_output(self, - output_type_list, - set_out_func, - code_indent, + out_dtype_list, + out_tensor_type_list=None, + code_indent='', inplace_flag=False): return None, None, None - def gen_dense_tensor_kernel_code(self, code_indent, inplace_flag=False): + def gen_kernel_code(self, kernel_name, code_indent, inplace_flag=False): + kernel_dispatch = self.kernel['dispatch'][kernel_name] input_tensors, kernel_args, kernel_signature = self.get_kernel_args( - code_indent) + kernel_dispatch, code_indent) + out_tensor_type_list = kernel_dispatch[1] if kernel_dispatch else None outputs_args, kernel_output_names, output_create = self.gene_output( - self.outputs['types'], 'SetKernelOutput', code_indent, inplace_flag) - api_func_name = self.get_api_func_name() + ('_' if inplace_flag else '') + self.outputs['types'], out_tensor_type_list, code_indent, + inplace_flag) cudnn_args = '' if self.kernel[ 'use_gpudnn'] == 'false' else ', ' + self.kernel['use_gpudnn'] return f""" {code_indent} VLOG(6) << "{self.api} API kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]"; {code_indent} const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( -{code_indent} "{self.kernel['func'][0]}", {{kernel_backend, kernel_layout, kernel_data_type}}{cudnn_args}); -{code_indent} VLOG(6) << "{self.api} API kernel: " << kernel; +{code_indent} "{kernel_name}", {{kernel_backend, kernel_layout, kernel_data_type}}{cudnn_args}); +{code_indent} VLOG(6) << "{kernel_name} kernel: " << kernel; {code_indent} auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); {input_tensors} @@ -737,38 +699,42 @@ PADDLE_API {self.get_return_type(inplace_flag=True)} {api_func_name}({self.get_d {code_indent} using kernel_signature = {kernel_signature}; {code_indent} auto* kernel_fn = kernel.GetVariadicKernelFn(); {code_indent} {{ -{code_indent} paddle::platform::RecordEvent kernel_record_event(\"{api_func_name} compute\", paddle::platform::TracerEventType::OperatorInner, 1); +{code_indent} paddle::platform::RecordEvent kernel_record_event(\"{kernel_name} compute\", paddle::platform::TracerEventType::OperatorInner, 1); {code_indent} (*kernel_fn)({kernel_args}, {outputs_args}); {code_indent} }} {code_indent} {self.gene_return_code()}""" - def gen_selected_rows_kernel_code(self, code_indent, inplace_flag=False): - input_tensors, kernel_args, kernel_signature = self.get_selected_rows_kernel_args( - code_indent) - outputs_args, kernel_output_names, output_create = self.gene_output( - self.outputs['types'], 'SetSelectedRowsKernelOutput', code_indent, - inplace_flag) - api_func_name = self.get_api_func_name() + ('_' if inplace_flag else '') - return f""" -{code_indent} auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( -{code_indent} "{self.kernel['func'][1]}", {{kernel_backend, kernel_layout, kernel_data_type}}); -{code_indent} VLOG(6) << "{self.api} API SelectedRows kernel key: [" << kernel_backend << ", " << kernel_layout << ", "<< kernel_data_type << "]"; -{code_indent} VLOG(6) << "{self.api} API SelectedRows kernel: " << kernel; - -{code_indent} auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); -{input_tensors} -{output_create} -{self.gene_infer_meta(kernel_output_names, code_indent)} - -{code_indent} using kernel_signature = {kernel_signature}; -{code_indent} auto* kernel_fn = kernel.GetVariadicKernelFn(); -{code_indent} {{ -{code_indent} paddle::platform::RecordEvent kernel_record_event(\"{api_func_name} compute\", paddle::platform::TracerEventType::OperatorInner, 1); -{code_indent} (*kernel_fn)({kernel_args}, {outputs_args}); -{code_indent} }} + def get_condition_code(self, kernel_name): + assert self.kernel['dispatch'][kernel_name], \ + f"{self.api} api: the tensor type of inputs and outputs for kernel isn't set, see also 'kernel:func' of 'scale' in api.yaml." + input_types = self.kernel['dispatch'][kernel_name][0] + condition_list = [] + for i, in_type in enumerate(input_types): + if in_type == "dense": + if self.inputs['names'][i] in self.optional_vars: + condition_list.append( + f"(!{self.inputs['names'][i]} || {self.inputs['names'][i]}->is_dense_tensor())" + ) + else: + condition_list.append( + f"{self.inputs['names'][i]}.is_dense_tensor()") + else: + if self.inputs['names'][i] in self.optional_vars: + condition_list.append( + f"(!{self.inputs['names'][i]} || {self.inputs['names'][i]}->is_selected_rows())" + ) + else: + condition_list.append( + f"{self.inputs['names'][i]}.is_selected_rows()") + return " && ".join(condition_list) -{code_indent} {self.gene_return_code()}""" + def gene_dispatch_code(self, kernel_name, inplace_flag=False): + return f""" + if ({self.get_condition_code(kernel_name)}) {{ +{self.gen_kernel_code(kernel_name, ' ', inplace_flag)} + }} +""" def gene_base_api_code(self, inplace_flag=False): api_func_name = self.get_api_func_name() @@ -779,21 +745,20 @@ PADDLE_API {self.get_return_type(inplace_flag)} {api_func_name}({self.get_define {self.gene_kernel_select()} """ - if self.support_selected_rows_kernel: - code_indent = ' ' + if len(self.kernel['func']) > 1: + kernel_dispatch_code = '' + for kernel_name in self.kernel['func']: + kernel_dispatch_code += self.gene_dispatch_code( + kernel_name, inplace_flag) return api_code + f""" - if(kernel_type == KernelType::DENSE_TENSOR_KENREL){{ -{self.gen_dense_tensor_kernel_code(code_indent, inplace_flag)} - }} else {{ -{self.gen_selected_rows_kernel_code(code_indent, inplace_flag)} - }} +{kernel_dispatch_code} + PADDLE_THROW(phi::errors::Unimplemented( + "The kernel of ({self.api}) for input tensors is unimplemented, please check the type of input tensors.")); }} """ - else: - code_indent = '' - return api_code + self.gen_dense_tensor_kernel_code( - code_indent, inplace_flag) + """ + return api_code + self.gen_kernel_code(self.kernel['func'][0], '', + inplace_flag) + """ } """ diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index 7548c047ff5..a0775dd4c0a 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -24,6 +24,11 @@ inplace_out_type_map = { "std::vector": "std::vector&" } +inplace_optional_out_type_map = { + "Tensor": "paddle::optional&", + "std::vector": "paddle::optional>&" +} + class ForwardAPI(BaseAPI): @@ -80,7 +85,11 @@ class ForwardAPI(BaseAPI): for i, out_type in enumerate(self.outputs['types']): out_name = self.outputs['names'][i].split('@')[0] if inplace_flag and out_name in self.inplace_map: - out_type_list.append(inplace_out_type_map[out_type]) + if self.inplace_map[out_name] in self.optional_vars: + out_type_list.append( + inplace_optional_out_type_map[out_type]) + else: + out_type_list.append(inplace_out_type_map[out_type]) else: out_type_list.append(out_type) @@ -94,7 +103,11 @@ class ForwardAPI(BaseAPI): for i, out_type in enumerate(self.outputs['types']): out_name = self.outputs['names'][i].split('@')[0] if inplace_flag and out_name in self.inplace_map: - out_type_list.append(inplace_out_type_map[out_type]) + if self.inplace_map[out_name] in self.optional_vars: + out_type_list.append( + inplace_optional_out_type_map[out_type]) + else: + out_type_list.append(inplace_out_type_map[out_type]) elif self.is_dygraph_api or out_name not in self.intermediate_outs: out_type_list.append(out_type) @@ -120,16 +133,16 @@ class ForwardAPI(BaseAPI): return 'return {' + ", ".join(selected_code) + '};' def gene_output(self, - output_type_list, - set_out_func, - code_indent, + out_dtype_list, + out_tensor_type_list=None, + code_indent='', inplace_flag=False): kernel_output = "" output_names = [] output_create = "" return_type = self.get_return_type_with_intermediate(inplace_flag) - if len(output_type_list) == 1: + if len(out_dtype_list) == 1: kernel_output = 'kernel_out' output_names.append('kernel_out') inplace_assign = " = " + self.inplace_map[ @@ -137,7 +150,8 @@ class ForwardAPI(BaseAPI): 'names'][0] in self.inplace_map else "" output_create = f""" {code_indent} {return_type} api_output{inplace_assign};""" - + set_out_func = 'SetKernelOutput' if out_tensor_type_list is None or out_tensor_type_list[ + 0] == 'dense' else 'SetSelectedRowsKernelOutput' if return_type == 'std::vector': assert self.outputs['out_size_expr'][0] is not None, \ f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." @@ -155,7 +169,7 @@ class ForwardAPI(BaseAPI): {code_indent} kernel_out->ShareInplaceVersionCounterWith(*{PREFIX_TENSOR_NAME}{self.view_map[self.outputs['names'][0]]}); {code_indent} VLOG(3) << "Perform View between Output and Input Tensor, share allocation and inplace version.";""" - elif len(output_type_list) > 1: + elif len(out_dtype_list) > 1: output_create = f""" {code_indent} {return_type} api_output;""" @@ -171,19 +185,27 @@ class ForwardAPI(BaseAPI): output_create += 'Tensor(), ' output_create = output_create[:-2] + '};' - for i in range(len(output_type_list)): + for i in range(len(out_dtype_list)): kernel_output = kernel_output + f'kernel_out_{i}, ' output_names.append(f'kernel_out_{i}') + set_out_func = 'SetKernelOutput' if out_tensor_type_list is None or out_tensor_type_list[ + i] == 'dense' else 'SetSelectedRowsKernelOutput' + + get_out_code = f"&std::get<{i}>(api_output)" + if self.outputs['names'][ + i] in self.inplace_map and self.inplace_map[ + self.outputs['names'][i]] in self.optional_vars: + get_out_code = f"std::get<{i}>(api_output).get_ptr()" - if output_type_list[i] == 'std::vector': + if out_dtype_list[i] == 'std::vector': assert self.outputs['out_size_expr'][i] is not None, \ f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." output_create = output_create + f""" -{code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, &std::get<{i}>(api_output));""" +{code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, {get_out_code});""" else: output_create = output_create + f""" -{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, &std::get<{i}>(api_output));""" +{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, {get_out_code});""" if not inplace_flag and self.view_map is not None and self.outputs[ 'names'][i] in self.view_map: diff --git a/python/paddle/utils/code_gen/backward_api_gen.py b/python/paddle/utils/code_gen/backward_api_gen.py index 48bff2d1d3f..2439eff9f63 100644 --- a/python/paddle/utils/code_gen/backward_api_gen.py +++ b/python/paddle/utils/code_gen/backward_api_gen.py @@ -114,22 +114,24 @@ class BackwardAPI(BaseAPI): return 'void' def gene_output(self, - output_type_list, - set_out_func, - code_indent, + out_dtype_list, + out_tensor_type_list=None, + code_indent='', inplace_flag=False): kernel_output = "" output_names = [] output_create = "" - if len(output_type_list) == 1: + if len(out_dtype_list) == 1: kernel_output = 'kernel_out' output_names.append('kernel_out') inplace_assign = " = " + self.inplace_map[self.outputs['names'][ 0]] if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][0] in self.inplace_map else "" output_create = "" - if output_type_list[0] == 'std::vector': + set_out_func = 'SetKernelOutput' if out_tensor_type_list is None or out_tensor_type_list[ + 0] == 'dense' else 'SetSelectedRowsKernelOutput' + if out_dtype_list[0] == 'std::vector': assert self.outputs['out_size_expr'] is not None, \ f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." output_create = output_create + f""" @@ -139,11 +141,13 @@ class BackwardAPI(BaseAPI): output_create = output_create + f""" {code_indent} auto kernel_out = {set_out_func}(kernel_backend, {self.outputs['names'][0]});""" - elif len(output_type_list) > 1: + elif len(out_dtype_list) > 1: output_create = "" - for i, out_type_item in enumerate(output_type_list): + for i, out_type_item in enumerate(out_dtype_list): kernel_output = kernel_output + f'kernel_out_{i}, ' output_names.append(f'kernel_out_{i}') + set_out_func = 'SetKernelOutput' if out_tensor_type_list is None or out_tensor_type_list[ + i] == 'dense' else 'SetSelectedRowsKernelOutput' if out_type_item == 'Tensor': if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][i] in self.inplace_map: diff --git a/python/paddle/utils/code_gen/legacy_api.yaml b/python/paddle/utils/code_gen/legacy_api.yaml index 97faebca6a0..f5c261dc7e3 100644 --- a/python/paddle/utils/code_gen/legacy_api.yaml +++ b/python/paddle/utils/code_gen/legacy_api.yaml @@ -48,11 +48,17 @@ kernel : func : adadelta -- api : adam +- api : adam_ args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment1, Tensor moment2, Tensor beta1_pow, Tensor beta2_pow, Tensor master_param, Tensor skip_update, Scalar beta1, Scalar beta2, Scalar epsilon, bool lazy_mode, int64_t min_row_size_to_use_multithread, bool multi_precision, bool use_global_beta_pow) output : Tensor(param_out), Tensor(moment1_out), Tensor(moment2_out), Tensor(beta1_pow_out), Tensor(beta2_pow_out), Tensor(master_param_outs) + infer_meta : + func : AdamInferMeta + kernel : + func : adam {dense, dense, dense, dense, dense, dense, dense, dense, dense -> dense, dense, dense, dense, dense, dense}, + adam_dense_param_sparse_grad {dense, selected_rows, dense, dense, dense, dense, dense, dense, dense -> dense, dense, dense, dense, dense, dense} + data_type : param optional : master_param, skip_update - invoke : adam_impl(param, grad, learning_rate, moment1, moment2, beta1_pow, beta2_pow, master_param, skip_update, beta1, beta2, epsilon, lazy_mode, min_row_size_to_use_multithread, multi_precision, use_global_beta_pow) + inplace : (param -> param_out), (moment1 -> moment1_out), (moment2 -> moment2_out), (beta1_pow -> beta1_pow_out), (beta2_pow -> beta2_pow_out), (master_param -> master_param_outs) - api : adamax args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment, Tensor inf_norm, Tensor beta1_pow, float beta1, float beta2, float epsilon) @@ -1015,7 +1021,8 @@ infer_meta : func : IsfiniteInferMeta kernel : - func : isfinite, infinite_sr + func : isfinite {dense -> dense}, + infinite_sr {selected_rows -> selected_rows} # isinf - api : isinf @@ -1024,7 +1031,8 @@ infer_meta : func : IsfiniteInferMeta kernel : - func : isinf, isinf_sr + func : isinf {dense -> dense}, + isinf_sr {selected_rows -> selected_rows} # isnan - api : isnan @@ -1033,7 +1041,8 @@ infer_meta : func : IsfiniteInferMeta kernel : - func : isnan, isnan_sr + func : isnan {dense -> dense}, + isnan_sr {selected_rows -> selected_rows} - api : kldiv_loss args : (Tensor x, Tensor label, str reduction) @@ -1774,7 +1783,8 @@ func : UnchangedInferMeta param : [x] kernel : - func : scale, scale_sr + func : scale {dense -> dense}, + scale_sr {selected_rows -> selected_rows} inplace : (x -> out) backward : scale_grad @@ -1829,11 +1839,20 @@ func : selu backward : selu_grad -- api : sgd +- api : sgd_ args : (Tensor param, Tensor learning_rate, Tensor grad, Tensor master_param, bool multi_precision) output : Tensor(param_out), Tensor(master_param_out) - invoke : sgd_impl(param, learning_rate, grad, master_param, multi_precision) + infer_meta : + func : SgdInferMeta + kernel : + func : sgd {dense, dense, dense, dense -> dense, dense}, + sgd_dense_param_sparse_grad {dense, dense, selected_rows, dense -> dense, dense}, + sgd_sparse_param_sparse_grad {selected_rows, dense, selected_rows, selected_rows -> selected_rows, selected_rows} + data_type : param + data_transform : + support_trans_dtype : learning_rate optional : master_param + inplace : (param -> param_out), (master_param -> master_param_out) - api : shape args : (Tensor input) @@ -1841,7 +1860,8 @@ infer_meta : func : ShapeInferMeta kernel : - func : shape, shape_sr + func : shape {dense -> dense}, + shape_sr {selected_rows -> selected_rows} data_transform: skip_transform : input diff --git a/python/paddle/utils/code_gen/sparse_api_gen.py b/python/paddle/utils/code_gen/sparse_api_gen.py index 4c2f453e533..aa087c9136b 100644 --- a/python/paddle/utils/code_gen/sparse_api_gen.py +++ b/python/paddle/utils/code_gen/sparse_api_gen.py @@ -31,18 +31,10 @@ class SparseAPI(ForwardAPI): {super(SparseAPI, self).gene_api_declaration()} """ - def get_kernel_tensor_out_type(self, output_name): - sparse_type = 'TensorType::DENSE_TENSOR' - if output_name.endswith('@SparseCooTensor'): - sparse_type = 'TensorType::SPARSE_COO' - elif output_name.endswith('@SparseCsrTensor'): - sparse_type = 'TensorType::SPARSE_CSR' - return sparse_type - def gene_output(self, - output_type_list, - set_out_func, - code_indent, + out_dtype_list, + out_tensor_type_list=None, + code_indent='', inplace_flag=False): kernel_output = "" output_names = [] @@ -54,7 +46,7 @@ class SparseAPI(ForwardAPI): 'sparse_csr': 'TensorType::SPARSE_CSR' } - if len(output_type_list) == 1: + if len(out_dtype_list) == 1: kernel_output = 'kernel_out' output_names.append('kernel_out') inplace_assign = " = " + self.inplace_map[self.outputs['names'][ @@ -62,9 +54,9 @@ class SparseAPI(ForwardAPI): 'names'][0] in self.inplace_map else "" output_create = f""" {return_type} api_output{inplace_assign}; - auto* kernel_out = {set_out_func}(&api_output, {output_type_map[output_type_list[0]]});""" + auto* kernel_out = SetSparseKernelOutput(&api_output, {output_type_map[out_dtype_list[0]]});""" - elif len(output_type_list) > 1: + elif len(out_dtype_list) > 1: output_create = f""" {return_type} api_output;""" @@ -80,11 +72,11 @@ class SparseAPI(ForwardAPI): output_create += 'Tensor(), ' output_create = output_create[:-2] + '};' - for i in range(len(output_type_list)): + for i in range(len(out_dtype_list)): kernel_output = kernel_output + f'kernel_out_{i}, ' output_names.append(f'kernel_out_{i}') output_create = output_create + f""" - auto* kernel_out_{i} = {set_out_func}(&std::get<{i}>(api_output), {output_type_map[output_type_list[i]]});""" + auto* kernel_out_{i} = SetSparseKernelOutput(&std::get<{i}>(api_output), {output_type_map[out_dtype_list[i]]});""" kernel_output = kernel_output[:-2] else: @@ -148,8 +140,7 @@ class SparseAPI(ForwardAPI): def gen_sparse_kernel_code(self, kernel_name, inplace_flag=False): _, kernel_output_names, output_create = self.gene_output( - self.kernel['dispatch'][kernel_name][1], 'SetSparseKernelOutput', - '', inplace_flag) + self.kernel['dispatch'][kernel_name][1], None, '', inplace_flag) kernel_context_code = self.gen_sparse_kernel_context( kernel_output_names) @@ -189,7 +180,6 @@ class SparseAPI(ForwardAPI): return " && ".join(condition_list) def gene_dispatch_code(self, kernel_name, inplace_flag=False): - dispatch_code = "" return f""" if ({self.get_condition_code(kernel_name)}) {{ {self.gen_sparse_kernel_code(kernel_name, inplace_flag)} diff --git a/python/paddle/utils/code_gen/sparse_bw_api_gen.py b/python/paddle/utils/code_gen/sparse_bw_api_gen.py index 3e0abead036..834e3d45d0b 100644 --- a/python/paddle/utils/code_gen/sparse_bw_api_gen.py +++ b/python/paddle/utils/code_gen/sparse_bw_api_gen.py @@ -48,9 +48,9 @@ class SparseBackwardAPI(SparseAPI, BackwardAPI): return BackwardAPI.get_define_args(self) def gene_output(self, - output_type_list, - set_out_func, - code_indent, + out_dtype_list, + out_tensor_type_list=None, + code_indent='', inplace_flag=False): kernel_output = "" output_names = [] @@ -61,19 +61,19 @@ class SparseBackwardAPI(SparseAPI, BackwardAPI): 'sparse_csr': 'TensorType::SPARSE_CSR' } - if len(output_type_list) == 1: + if len(out_dtype_list) == 1: kernel_output = 'kernel_out' output_names.append('kernel_out') inplace_assign = " = " + self.inplace_map[self.outputs['names'][ 0]] if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][0] in self.inplace_map else "" output_create = f""" - auto kernel_out = {set_out_func}({self.outputs['names'][0]}, {output_type_map[output_type_list[0]]});""" + auto kernel_out = SetSparseKernelOutput({self.outputs['names'][0]}, {output_type_map[out_dtype_list[0]]});""" - elif len(output_type_list) > 1: + elif len(out_dtype_list) > 1: output_create = "" - for i, out_type_item in enumerate(output_type_list): + for i, out_type_item in enumerate(out_dtype_list): kernel_output = kernel_output + f'kernel_out_{i}, ' output_names.append(f'kernel_out_{i}') if inplace_flag and self.inplace_map is not None and self.outputs[ @@ -82,7 +82,7 @@ class SparseBackwardAPI(SparseAPI, BackwardAPI): *{self.outputs['names'][i]} = {self.inplace_map[self.outputs['names'][i]]};""" output_create = output_create + f""" - auto kernel_out_{i} = {set_out_func}({self.outputs['names'][i]}, {output_type_map[output_type_list[i]]});""" + auto kernel_out_{i} = SetSparseKernelOutput({self.outputs['names'][i]}, {output_type_map[out_dtype_list[i]]});""" kernel_output = kernel_output[:-2] else: diff --git a/python/paddle/utils/code_gen/strings_api_gen.py b/python/paddle/utils/code_gen/strings_api_gen.py index 5b29c6076b4..1f3ec587d74 100644 --- a/python/paddle/utils/code_gen/strings_api_gen.py +++ b/python/paddle/utils/code_gen/strings_api_gen.py @@ -51,16 +51,16 @@ class StringsAPI(ForwardAPI): return tensor_type_dict[kernel_tensor_out_type] def gene_output(self, - output_type_list, - set_out_func, - code_indent, + out_dtype_list, + out_tensor_type_list=None, + code_indent='', inplace_flag=False): kernel_output = "" output_names = [] output_create = "" return_type = self.get_return_type(inplace_flag) - if len(output_type_list) == 1: + if len(out_dtype_list) == 1: kernel_output = 'kernel_out' output_names.append('kernel_out') kernel_tensor_out_type = self.get_kernel_tensor_out_type( @@ -71,13 +71,13 @@ class StringsAPI(ForwardAPI): 'names'][0] in self.inplace_map else "" output_create = f""" {return_type} api_output{inplace_assign}; - {tensor_type}* kernel_out = dynamic_cast<{tensor_type}*>({set_out_func}(kernel_backend, &api_output, {kernel_tensor_out_type}));""" + {tensor_type}* kernel_out = dynamic_cast<{tensor_type}*>(SetStringsKernelOutput(kernel_backend, &api_output, {kernel_tensor_out_type}));""" - elif len(output_type_list) > 1: + elif len(out_dtype_list) > 1: output_create = f""" {return_type} api_output;""" - for i in range(len(output_type_list)): + for i in range(len(out_dtype_list)): kernel_output = kernel_output + f'kernel_out_{i}, ' output_names.append(f'kernel_out_{i}') kernel_tensor_out_type = self.get_kernel_tensor_out_type( @@ -89,7 +89,7 @@ class StringsAPI(ForwardAPI): std::get<{i}>(api_output) = {self.inplace_map[self.outputs['names'][i]]};""" output_create = output_create + f""" - {tensor_type}* kernel_out_{i} = dynamic_cast<{tensor_type}*>({set_out_func}(&std::get<{i}>(api_output), {kernel_tensor_out_type}));""" + {tensor_type}* kernel_out_{i} = dynamic_cast<{tensor_type}*>(SetStringsKernelOutput(&std::get<{i}>(api_output), {kernel_tensor_out_type}));""" kernel_output = kernel_output[:-2] else: @@ -174,7 +174,7 @@ class StringsAPI(ForwardAPI): input_tensors, kernel_args, kernel_signature = self.get_kernel_args( code_indent) outputs_args, kernel_output_names, output_create = self.gene_output( - self.outputs['types'], 'SetStringsKernelOutput', '', inplace_flag) + self.outputs['types'], None, '', inplace_flag) return f""" // 1. Get kernel signature and kernel @@ -252,11 +252,6 @@ class StringsAPI(ForwardAPI): kernel_select_code = kernel_key_item_init + kernel_select_code if len(input_names) > 0: - if self.support_selected_rows_kernel: - kernel_select_code = kernel_select_code + f""" - KernelType kernel_type = ParseKernelTypeByInputArgs({", ".join(input_names)}); -""" - kernel_select_code = kernel_select_code + f""" auto kernel_key_set = ParseKernelKeyByInputArgs({kernel_select_args}); auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); -- GitLab