From dead24dd4cb86e13b34c941bd8c87b968f134eee Mon Sep 17 00:00:00 2001 From: zyfncg Date: Tue, 12 Apr 2022 10:03:43 +0800 Subject: [PATCH] [Phi] Support setting size of vector for out in yaml (#41576) * support setting vector out size in yaml * support setting size of vector for out in yaml --- .../final_state_generator/codegen_utils.py | 2 +- paddle/phi/api/lib/api_custom_impl.cc | 440 +----------------- paddle/phi/api/lib/api_custom_impl.h | 51 +- paddle/phi/api/lib/api_gen_utils.cc | 10 + paddle/phi/api/lib/api_gen_utils.h | 3 + paddle/phi/common/int_array.h | 2 + python/paddle/utils/code_gen/api.yaml | 14 +- python/paddle/utils/code_gen/api_base.py | 72 +-- python/paddle/utils/code_gen/api_gen.py | 20 +- python/paddle/utils/code_gen/backward.yaml | 42 +- .../paddle/utils/code_gen/backward_api_gen.py | 23 +- 11 files changed, 150 insertions(+), 529 deletions(-) diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py index 21b6b882a6f..b2cdd947aaf 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/codegen_utils.py @@ -226,7 +226,7 @@ def ParseYamlReturns(string): returns = [x.strip() for x in string.strip().split(",")] for i in range(len(returns)): - ret = returns[i] + ret = returns[i].split("{")[0].strip() ret_name = "" if "(" in ret and ")" in ret: diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index 637c3b9107a..0f1cbc3f191 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -297,10 +297,10 @@ std::vector split_impl(const Tensor& x, // Calculate the number of out tensors size_t out_number; - if (num_or_sections.GetData().size() == 1) { + if (num_or_sections.size() == 1) { out_number = num_or_sections.GetData()[0]; } else { - out_number = num_or_sections.GetData().size(); + out_number = num_or_sections.size(); } std::vector out; @@ -475,54 +475,6 @@ std::tuple momentum_impl( return api_output; } -std::vector unbind_impl(const Tensor& input, int axis) { - auto kernel_key_set = ParseKernelKeyByInputArgs(input); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - - Backend kernel_backend = kernel_key.backend(); - DataLayout kernel_layout = kernel_key.layout(); - DataType kernel_data_type = kernel_key.dtype(); - - auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "unbind", {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << "unbind API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - VLOG(6) << "unbind API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - - auto dense_input = PrepareData(input, kernel.InputAt(0), {}); - - // Calculate the number of out tensors - auto input_shape = input.dims(); - if (axis < 0) { - axis = input_shape.size() + axis; - } - auto out_num = input_shape[axis]; - - std::vector out; - auto dense_outs = SetKernelOutput(out_num, kernel_backend, &out); - std::vector meta_outs; - meta_outs.reserve(out_num); - std::vector meta_out_ptrs; - meta_out_ptrs.reserve(out_num); - for (int64_t i = 0; i < out_num; ++i) { - meta_outs.push_back(dense_outs[i]); - meta_out_ptrs.push_back(&meta_outs.back()); - } - - phi::UnbindInferMeta(MakeMetaTensor(*dense_input), axis, meta_out_ptrs); - - using kernel_signature = void (*)(const phi::DeviceContext&, - const phi::DenseTensor&, - int, - std::vector&); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, *dense_input, axis, dense_outs); - - return out; -} - ////////////////// Backward(grad) api impls ////////////////////// // TODO(chenweihang): the original sum grad op can support higher-level @@ -700,71 +652,6 @@ std::tuple batch_norm_impl( return api_output; } -std::vector concat_grad_impl(const std::vector& x, - const Tensor& out_grad, - const Scalar& axis) { - auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - - Backend kernel_backend = kernel_key.backend(); - DataLayout kernel_layout = kernel_key.layout(); - DataType kernel_data_type = kernel_key.dtype(); - - auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "concat_grad", {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << "concat_grad API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - VLOG(6) << "concat_grad API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - - // std::unique_ptr> - auto dense_x = PrepareData(x, kernel.InputAt(0), {}); - auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(1), {}); - - // Calculate the number of out tensors - size_t out_number = x.size(); - std::vector x_grad; - auto dense_x_grad = SetKernelOutput(out_number, kernel_backend, &x_grad); - - std::vector meta_x; - meta_x.reserve(x.size()); - std::vector meta_x_ptrs; - meta_x_ptrs.reserve(x.size()); - for (const auto& t : *dense_x) { - meta_x.push_back(t); - meta_x_ptrs.push_back(&meta_x.back()); - } - - std::vector meta_x_grad; - meta_x_grad.reserve(x.size()); - std::vector meta_x_grad_ptrs; - meta_x_grad_ptrs.reserve(x.size()); - for (size_t i = 0; i < out_number; ++i) { - meta_x_grad.push_back(*dense_x_grad[i]); - meta_x_grad_ptrs.push_back(&meta_x_grad.back()); - } - - phi::UnchangedMultiInferMeta(meta_x_ptrs, meta_x_grad_ptrs); - - std::vector dense_x_ptr; - dense_x_ptr.reserve(x.size()); - for (const auto& t : *dense_x) { - dense_x_ptr.push_back(&t); - } - - using kernel_signature = void (*)(const platform::DeviceContext&, - const std::vector&, - const phi::DenseTensor&, - const phi::Scalar&, - std::vector); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)( - *dev_ctx, dense_x_ptr, *dense_out_grad, phi::Scalar(axis), dense_x_grad); - - return x_grad; -} - Tensor imag_grad_impl(const Tensor& out_grad) { phi::KernelKey kernel_key{ParseBackend(out_grad), out_grad.layout(), @@ -821,328 +708,5 @@ Tensor real_grad_impl(const Tensor& out_grad) { return out; } -std::vector stack_grad_impl(const std::vector& x, - const Tensor& out_grad, - int axis) { - auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - - Backend kernel_backend = kernel_key.backend(); - DataLayout kernel_layout = kernel_key.layout(); - DataType kernel_data_type = kernel_key.dtype(); - - auto kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "stack_grad", {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << "stack_grad API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - VLOG(6) << "stack_grad API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - - auto dense_out_grad = PrepareData(out_grad, kernel.InputAt(0), {}); - - size_t out_number = x.size(); - std::vector x_grad; - auto dense_x_grad = SetKernelOutput(out_number, kernel_backend, &x_grad); - std::vector meta_x_grad; - meta_x_grad.reserve(out_number); - std::vector meta_x_grad_ptrs; - meta_x_grad_ptrs.reserve(out_number); - for (size_t i = 0; i < out_number; ++i) { - meta_x_grad.push_back(dense_x_grad[i]); - meta_x_grad_ptrs.push_back(&meta_x_grad.back()); - } - - phi::StackGradInferMeta( - MakeMetaTensor(*dense_out_grad), axis, meta_x_grad_ptrs); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - int axis, - std::vector); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, *dense_out_grad, axis, dense_x_grad); - - return x_grad; -} - -std::vector meshgrid_impl(const std::vector& inputs) { - Backend kernel_backend = Backend::UNDEFINED; - DataLayout kernel_layout = DataLayout::UNDEFINED; - DataType kernel_data_type = DataType::UNDEFINED; - - if (kernel_backend == Backend::UNDEFINED || - kernel_layout == DataLayout::UNDEFINED || - kernel_data_type == DataType::UNDEFINED) { - auto kernel_key_set = ParseKernelKeyByInputArgs(inputs); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - if (kernel_backend == Backend::UNDEFINED) { - kernel_backend = kernel_key.backend(); - } - if (kernel_layout == DataLayout::UNDEFINED) { - kernel_layout = kernel_key.layout(); - } - if (kernel_data_type == DataType::UNDEFINED) { - kernel_data_type = kernel_key.dtype(); - } - } - - const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "meshgrid", {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << "meshgrid API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - VLOG(6) << "meshgrid API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - - auto input_inputs_vec = PrepareData(inputs, kernel.InputAt(0), {}); - std::vector input_inputs(input_inputs_vec->size()); - for (size_t i = 0; i < input_inputs.size(); ++i) { - input_inputs[i] = &input_inputs_vec->at(i); - } - - auto x_meta_vec = MakeMetaTensor(input_inputs); - std::vector inputs_metas(x_meta_vec.size()); - for (size_t i = 0; i < x_meta_vec.size(); ++i) { - inputs_metas[i] = &x_meta_vec[i]; - } - - // Calculate the number of out tensors - size_t out_number = inputs.size(); - - std::vector out; - auto dense_outs = SetKernelOutput(out_number, kernel_backend, &out); - - std::vector meta_outs; - meta_outs.reserve(out_number); - std::vector meta_out_ptrs; - meta_out_ptrs.reserve(out_number); - for (size_t i = 0; i < out_number; ++i) { - meta_outs.push_back(dense_outs[i]); - meta_out_ptrs.push_back(&meta_outs.back()); - } - phi::MeshgridInferMeta(inputs_metas, meta_out_ptrs); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const std::vector&, - std::vector&); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, input_inputs, dense_outs); - - return out; -} - -std::vector meshgrid_grad_impl( - const std::vector& inputs, - const std::vector& outputs_grad) { - Backend kernel_backend = Backend::UNDEFINED; - DataLayout kernel_layout = DataLayout::UNDEFINED; - DataType kernel_data_type = DataType::UNDEFINED; - - if (kernel_backend == Backend::UNDEFINED || - kernel_layout == DataLayout::UNDEFINED || - kernel_data_type == DataType::UNDEFINED) { - auto kernel_key_set = ParseKernelKeyByInputArgs(inputs, outputs_grad); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - if (kernel_backend == Backend::UNDEFINED) { - kernel_backend = kernel_key.backend(); - } - if (kernel_layout == DataLayout::UNDEFINED) { - kernel_layout = kernel_key.layout(); - } - if (kernel_data_type == DataType::UNDEFINED) { - kernel_data_type = kernel_key.dtype(); - } - } - - const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "meshgrid_grad", {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << "meshgrid_grad API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - VLOG(6) << "meshgrid_grad API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - - auto input_inputs_vec = PrepareData(inputs, kernel.InputAt(0), {}); - std::vector input_inputs(input_inputs_vec->size()); - for (size_t i = 0; i < input_inputs.size(); ++i) { - input_inputs[i] = &input_inputs_vec->at(i); - } - auto input_outputs_grad_vec = - PrepareData(outputs_grad, kernel.InputAt(1), {}); - std::vector input_outputs_grad( - input_outputs_grad_vec->size()); - for (size_t i = 0; i < input_outputs_grad.size(); ++i) { - input_outputs_grad[i] = &input_outputs_grad_vec->at(i); - } - - size_t out_number = inputs.size(); - std::vector api_output; - auto kernel_out = SetKernelOutput(out_number, kernel_backend, &api_output); - - auto inputs_meta_vec = MakeMetaTensor(input_inputs); - std::vector inputs_metas(inputs_meta_vec.size()); - for (size_t i = 0; i < inputs_meta_vec.size(); ++i) { - inputs_metas[i] = &inputs_meta_vec[i]; - } - - auto outputs_grad_meta_vec = MakeMetaTensor(input_outputs_grad); - std::vector outputs_grad_metas( - outputs_grad_meta_vec.size()); - for (size_t i = 0; i < outputs_grad_meta_vec.size(); ++i) { - outputs_grad_metas[i] = &outputs_grad_meta_vec[i]; - } - - std::vector meta_outs; - meta_outs.reserve(out_number); - std::vector meta_out_ptrs; - meta_out_ptrs.reserve(out_number); - for (size_t i = 0; i < out_number; ++i) { - meta_outs.push_back(kernel_out[i]); - meta_out_ptrs.push_back(&meta_outs.back()); - } - - phi::MeshgridGradInferMeta(inputs_metas, outputs_grad_metas, meta_out_ptrs); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const std::vector&, - const std::vector&, - std::vector&); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, input_inputs, input_outputs_grad, kernel_out); - - return api_output; -} - -std::vector multi_dot_grad_impl(const std::vector& x, - const Tensor& out_grad) { - Backend kernel_backend = Backend::UNDEFINED; - DataLayout kernel_layout = DataLayout::UNDEFINED; - DataType kernel_data_type = DataType::UNDEFINED; - - if (kernel_backend == Backend::UNDEFINED || - kernel_layout == DataLayout::UNDEFINED || - kernel_data_type == DataType::UNDEFINED) { - auto kernel_key_set = ParseKernelKeyByInputArgs(x, out_grad); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - if (kernel_backend == Backend::UNDEFINED) { - kernel_backend = kernel_key.backend(); - } - if (kernel_layout == DataLayout::UNDEFINED) { - kernel_layout = kernel_key.layout(); - } - if (kernel_data_type == DataType::UNDEFINED) { - kernel_data_type = kernel_key.dtype(); - } - } - - VLOG(6) << "multi_dot_grad API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "multi_dot_grad", {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << "multi_dot_grad API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - - auto input_x_vec = PrepareData(x, kernel.InputAt(0), {}); - std::vector input_x(input_x_vec->size()); - for (size_t i = 0; i < input_x.size(); ++i) { - input_x[i] = &input_x_vec->at(i); - } - auto input_out_grad = PrepareData(out_grad, kernel.InputAt(1), {}); - - size_t out_number = input_x.size(); - std::vector api_output; - auto kernel_out = SetKernelOutput(out_number, kernel_backend, &api_output); - - auto x_meta_vec = MakeMetaTensor(input_x); - std::vector x_metas(x_meta_vec.size()); - for (size_t i = 0; i < x_meta_vec.size(); ++i) { - x_metas[i] = &x_meta_vec[i]; - } - - std::vector meta_outs; - meta_outs.reserve(out_number); - std::vector meta_out_ptrs; - meta_out_ptrs.reserve(out_number); - for (size_t i = 0; i < out_number; ++i) { - meta_outs.push_back(kernel_out[i]); - meta_out_ptrs.push_back(&meta_outs.back()); - } - - phi::MultiDotGradInferMeta( - x_metas, MakeMetaTensor(*input_out_grad), meta_out_ptrs); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const std::vector&, - const phi::DenseTensor&, - std::vector&); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, input_x, *input_out_grad, kernel_out); - - return api_output; -} - -std::vector multiplex_grad_impl(const std::vector& inputs, - const Tensor& ids, - const Tensor& out_grad) { - Backend kernel_backend = Backend::UNDEFINED; - DataLayout kernel_layout = DataLayout::UNDEFINED; - DataType kernel_data_type = DataType::UNDEFINED; - - if (kernel_backend == Backend::UNDEFINED || - kernel_layout == DataLayout::UNDEFINED || - kernel_data_type == DataType::UNDEFINED) { - auto kernel_key_set = ParseKernelKeyByInputArgs(out_grad); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - if (kernel_backend == Backend::UNDEFINED) { - kernel_backend = kernel_key.backend(); - } - if (kernel_layout == DataLayout::UNDEFINED) { - kernel_layout = kernel_key.layout(); - } - if (kernel_data_type == DataType::UNDEFINED) { - kernel_data_type = kernel_key.dtype(); - } - } - - VLOG(6) << "multiplex_grad API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "multiplex_grad", {kernel_backend, kernel_layout, kernel_data_type}); - VLOG(6) << "multiplex_grad API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - - auto input_ids = PrepareData(ids, kernel.InputAt(0), {}); - auto input_out_grad = PrepareData(out_grad, kernel.InputAt(1), {}); - - auto out_number = inputs.size(); - std::vector api_output; - auto kernel_out = SetKernelOutput(out_number, kernel_backend, &api_output); - - std::vector meta_outs; - meta_outs.reserve(out_number); - std::vector meta_out_ptrs; - meta_out_ptrs.reserve(out_number); - for (size_t i = 0; i < out_number; ++i) { - meta_outs.push_back(kernel_out[i]); - meta_out_ptrs.push_back(&meta_outs.back()); - } - - phi::MultiplexGradInferMeta(MakeMetaTensor(*input_ids), - MakeMetaTensor(*input_out_grad), - meta_out_ptrs); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - const phi::DenseTensor&, - std::vector&); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, *input_ids, *input_out_grad, kernel_out); - - return api_output; -} - } // namespace experimental } // namespace paddle diff --git a/paddle/phi/api/lib/api_custom_impl.h b/paddle/phi/api/lib/api_custom_impl.h index 0e360ce4a99..0d1ba3e98e5 100644 --- a/paddle/phi/api/lib/api_custom_impl.h +++ b/paddle/phi/api/lib/api_custom_impl.h @@ -30,6 +30,20 @@ namespace experimental { ////////////////// Forward api impls ////////////////////// +std::tuple batch_norm_impl( + const Tensor& x, + const Tensor& scale, + const Tensor& bias, + const Tensor& mean, + const Tensor& variance, + float momentum, + float epsilon, + const std::string& data_layout, + bool is_test, + bool use_global_stats, + bool trainable_statistics, + bool fuse_with_relu); + Tensor conv2d_impl(const Tensor& input, const Tensor& filter, const std::vector& strides, @@ -62,8 +76,6 @@ std::vector split_impl(const Tensor& x, const IntArray& num_or_sections, const Scalar& axis); -std::vector meshgrid_impl(const std::vector& inputs); - std::tuple momentum_impl( const Tensor& param, const Tensor& grad, @@ -77,49 +89,14 @@ std::tuple momentum_impl( bool multi_precision, float rescale_grad); -std::vector unbind_impl(const Tensor& input, int axis); - ////////////////// Backward(grad) api impls ////////////////////// std::vector add_n_grad_impl(const std::vector& x, const Tensor& out_grad); -std::tuple batch_norm_impl( - const Tensor& x, - const Tensor& scale, - const Tensor& bias, - const Tensor& mean, - const Tensor& variance, - float momentum, - float epsilon, - const std::string& data_layout, - bool is_test, - bool use_global_stats, - bool trainable_statistics, - bool fuse_with_relu); - -/************************ backward api impl ***************************/ - -std::vector concat_grad_impl(const std::vector& x, - const Tensor& out_grad, - const Scalar& axis); - Tensor imag_grad_impl(const Tensor& x); Tensor real_grad_impl(const Tensor& x); -std::vector stack_grad_impl(const std::vector& x, - const Tensor& out_grad, - int axis); -std::vector meshgrid_grad_impl(const std::vector& inputs, - const std::vector& outputs_grad); - -std::vector multi_dot_grad_impl(const std::vector& x, - const Tensor& out_grad); - -std::vector multiplex_grad_impl(const std::vector& inputs, - const Tensor& ids, - const Tensor& out_grad); - } // namespace experimental } // namespace paddle diff --git a/paddle/phi/api/lib/api_gen_utils.cc b/paddle/phi/api/lib/api_gen_utils.cc index 732ecacde94..f9db1529569 100644 --- a/paddle/phi/api/lib/api_gen_utils.cc +++ b/paddle/phi/api/lib/api_gen_utils.cc @@ -76,6 +76,16 @@ std::vector MakeMetaTensor( return meta_tensors; } +std::vector MakeMetaTensor( + const std::vector& tensors) { + std::vector meta_tensors; + meta_tensors.reserve(tensors.size()); + for (auto* t : tensors) { + meta_tensors.emplace_back(*t); + } + return meta_tensors; +} + phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor) { return phi::MetaTensor(tensor); } diff --git a/paddle/phi/api/lib/api_gen_utils.h b/paddle/phi/api/lib/api_gen_utils.h index d7ecef61c5b..035dfc52047 100644 --- a/paddle/phi/api/lib/api_gen_utils.h +++ b/paddle/phi/api/lib/api_gen_utils.h @@ -53,6 +53,9 @@ phi::MetaTensor MakeMetaTensor(const phi::DenseTensor& tensor); std::vector MakeMetaTensor( const std::vector& tensors); +std::vector MakeMetaTensor( + const std::vector& tensors); + phi::MetaTensor MakeMetaTensor(const phi::SelectedRows& tensor); phi::MetaTensor MakeMetaTensor(const phi::StringTensor& tensor); diff --git a/paddle/phi/common/int_array.h b/paddle/phi/common/int_array.h index 490d7dabd40..f9d07249e0f 100644 --- a/paddle/phi/common/int_array.h +++ b/paddle/phi/common/int_array.h @@ -96,6 +96,8 @@ class IntArrayBase { template IntArrayBase(const IntArrayBase& other) : array_(other.GetData()) {} + size_t size() const { return array_.size(); } + const std::vector& GetData() const { return array_; } private: diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 6ca61de063b..96bb3aafa50 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -1290,8 +1290,11 @@ - api : meshgrid args : (Tensor[] inputs) - output : Tensor[] - invoke : meshgrid_impl(inputs) + output : Tensor[]{inputs.size()} + infer_meta : + func : MeshgridInferMeta + kernel : + func : meshgrid backward : meshgrid_grad - api : min @@ -2059,8 +2062,11 @@ - api : unbind args : (Tensor input, int axis) - output : Tensor[] - invoke : unbind_impl(input, axis) + output : Tensor[] {axis<0 ? input.dims()[input.dims().size()+axis]:input.dims()[axis]} + infer_meta : + func : UnbindInferMeta + kernel : + func : unbind backward : unbind_grad # unfold diff --git a/python/paddle/utils/code_gen/api_base.py b/python/paddle/utils/code_gen/api_base.py index 275adac8b49..4325807746e 100644 --- a/python/paddle/utils/code_gen/api_base.py +++ b/python/paddle/utils/code_gen/api_base.py @@ -31,6 +31,7 @@ class BaseAPI(object): # outputs: # names : [], list of output names # types : [], list of output types + # out_size_expr : [], expression for getting size of vector # return_type : Tensor, vector, ..., the return type of api # args_str: # args_declare : "str" // str of function params with default value. Example: (..., bool flag=false) @@ -67,11 +68,12 @@ class BaseAPI(object): ] inputs, attrs, args_str = self.parse_input_and_attr( api_name, api_item_yaml['args'], optional_vars) - output_type_list, output_names, return_type = self.parse_output( + output_type_list, output_names, out_size_expr, return_type = self.parse_output( api_name, api_item_yaml['output']) return inputs, attrs, { 'names': output_names, 'types': output_type_list, + 'out_size_expr': out_size_expr, 'return_type': return_type }, args_str, optional_vars @@ -184,39 +186,36 @@ class BaseAPI(object): 'Tensor': 'Tensor', 'Tensor[]': 'std::vector' } - if re.search(r'\([a-zA-Z0-9_@]*\)', output_item): - result = re.search( - r"(?P[a-zA-Z0-9_[\]]+)\s*\((?P[a-zA-Z0-9_@]+)\)", - output_item) - out_type = result.group('out_type') - assert out_type in output_type_map, \ - f"{api_name} : Output type error: the output type only support Tensor and Tensor[], \ - but now is {out_type}." - - return output_type_map[out_type], result.group('name') - - else: - if output_item.strip() in output_type_map: - return output_type_map[output_item.strip()], 'out' - else: - raise ValueError( - "{} : Output type error: the output type only support Tensor and Tensor[], \ - but now is {}.".format(api_name, output_item.strip())) + result = re.search( + r"(?P[a-zA-Z0-9_[\]]+)\s*(?P\([a-zA-Z0-9_@]+\))?\s*(?P\{[^\}]+\})?", + output_item) + assert result is not None, f"{api_name} : the output config parse error." + out_type = result.group('out_type') + assert out_type in output_type_map, \ + f"{api_name} : Output type error: the output type only support Tensor and Tensor[], \ + but now is {out_type}." + + out_name = 'out' if result.group('name') is None else result.group( + 'name')[1:-1] + out_size_expr = None if result.group( + 'expr') is None else result.group('expr')[1:-1] + return output_type_map[out_type], out_name, out_size_expr temp_list = output_config.split(',') if len(temp_list) == 1: - out_type, out_name = parse_output_item(temp_list[0]) - return [out_type], [out_name], self.get_return_type([out_type]) + out_type, out_name, size_expr = parse_output_item(temp_list[0]) + return [out_type], [out_name], size_expr, self.get_return_type( + [out_type]) else: out_type_list = [] out_name_list = [] for output_item in temp_list: - out_type, out_name = parse_output_item(output_item) + out_type, out_name, size_expr = parse_output_item(output_item) out_type_list.append(out_type) out_name_list.append(out_name) - return out_type_list, out_name_list, self.get_return_type( + return out_type_list, out_name_list, size_expr, self.get_return_type( out_type_list) def parse_infer_meta(self, infer_meta_config): @@ -462,9 +461,8 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self attr_names = self.attrs['names'] infer_meta = self.infer_meta - infer_meta_params = infer_meta[ - 'param'] + kernel_output_names if infer_meta[ - 'param'] is not None else input_names + attr_names + kernel_output_names + infer_meta_params = infer_meta['param'] if infer_meta[ + 'param'] is not None else input_names + attr_names # generate meta tensors meta_tensor_code = "" param_code = "" @@ -500,11 +498,6 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self raise ValueError( f"{self.api} : Param of infer_meta error : {self.inputs['input_info'][param]} type is not supported." ) - elif param in kernel_output_names: - meta_tensor_code = meta_tensor_code + code_indent + " phi::MetaTensor " + param.replace( - 'kernel_', PREFIX_META_TENSOR_NAME) + "(" + param + ");\n" - param_code = param_code + "&" + param.replace( - 'kernel_', PREFIX_META_TENSOR_NAME) + ", " elif param in attr_names: param_code = param_code + param + ", " elif isinstance(param, str): @@ -514,6 +507,23 @@ PADDLE_API {self.gene_return_type_code()} {self.get_api_func_name() + '_'}({self else: param_code = param_code + str(param) + ", " + for i, out_name in enumerate(kernel_output_names): + if self.outputs['types'][i] == 'std::vector': + meta_tensor_code = meta_tensor_code + f""" +{code_indent} auto {out_name}_{PREFIX_META_TENSOR_NAME}vec = MakeMetaTensor({out_name}); +{code_indent} std::vector {out_name}_metas({out_name}_{PREFIX_META_TENSOR_NAME}vec.size()); +{code_indent} for (size_t i = 0; i < {out_name}_{PREFIX_META_TENSOR_NAME}vec.size(); ++i) {{ +{code_indent} {out_name}_metas[i] = &{out_name}_{PREFIX_META_TENSOR_NAME}vec[i]; +{code_indent} }}""" + + param_code = param_code + out_name + '_metas, ' + else: + meta_tensor_code = meta_tensor_code + code_indent + " phi::MetaTensor " + out_name.replace( + 'kernel_', + PREFIX_META_TENSOR_NAME) + "(" + out_name + ");\n" + param_code = param_code + "&" + out_name.replace( + 'kernel_', PREFIX_META_TENSOR_NAME) + ", " + param_code = param_code[:-2] return f"""{meta_tensor_code} {code_indent} phi::{infer_meta['func']}({param_code}); diff --git a/python/paddle/utils/code_gen/api_gen.py b/python/paddle/utils/code_gen/api_gen.py index 4087b55b513..538958c2361 100644 --- a/python/paddle/utils/code_gen/api_gen.py +++ b/python/paddle/utils/code_gen/api_gen.py @@ -91,7 +91,16 @@ class ForwardAPI(BaseAPI): 0]] if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][0] in self.inplace_map else "" output_create = f""" -{code_indent} {self.outputs['return_type']} api_output{inplace_assign}; +{code_indent} {self.outputs['return_type']} api_output{inplace_assign};""" + + if self.outputs['return_type'] == 'std::vector': + assert self.outputs['out_size_expr'] is not None, \ + f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." + output_create = output_create + f""" +{code_indent} auto kernel_out = {set_out_func}({self.outputs['out_size_expr']}, kernel_backend, &api_output);""" + + else: + output_create = output_create + f""" {code_indent} auto kernel_out = {set_out_func}(kernel_backend, &api_output);""" if not inplace_flag and self.view_map is not None and self.outputs[ @@ -113,7 +122,14 @@ class ForwardAPI(BaseAPI): output_create = output_create + f""" {code_indent} std::get<{i}>(api_output) = {self.inplace_map[self.outputs['names'][i]]};""" - output_create = output_create + f""" + if output_type_list[i] == 'std::vector': + assert self.outputs['out_size_expr'][i] is not None, \ + f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." + output_create = output_create + f""" +{code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, &std::get<{i}>(api_output));""" + + else: + output_create = output_create + f""" {code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, &std::get<{i}>(api_output));""" if not inplace_flag and self.view_map is not None and self.outputs[ diff --git a/python/paddle/utils/code_gen/backward.yaml b/python/paddle/utils/code_gen/backward.yaml index 555ec600bf7..90815cfe9af 100644 --- a/python/paddle/utils/code_gen/backward.yaml +++ b/python/paddle/utils/code_gen/backward.yaml @@ -44,7 +44,7 @@ - backward_api : add_n_grad forward : add_n (Tensor[] x) -> Tensor(out) args : (Tensor[] x, Tensor out_grad) - output : Tensor[](x_grad) + output : Tensor[](x_grad){x.size()} invoke : add_n_grad_impl(x, out_grad) no_need_buffer : x @@ -215,8 +215,12 @@ - backward_api : concat_grad forward : concat (Tensor[] x, Scalar axis) -> Tensor(out) args : (Tensor[] x, Tensor out_grad, Scalar axis = 0) - output : Tensor[](x_grad) - invoke : concat_grad_impl(x, out_grad, axis) + output : Tensor[](x_grad){x.size()} + infer_meta : + func : UnchangedMultiInferMeta + param : [x] + kernel : + func : concat_grad no_need_buffer : x - backward_api : conj_grad @@ -944,8 +948,11 @@ - backward_api : meshgrid_grad forward : meshgrid (Tensor[] inputs) -> Tensor[](outputs) args : (Tensor[] inputs, Tensor[] outputs_grad) - output : Tensor[](inputs_grad) - invoke : meshgrid_grad_impl(inputs, outputs_grad) + output : Tensor[](inputs_grad){inputs.size()} + infer_meta : + func : MeshgridGradInferMeta + kernel : + func : meshgrid_grad - backward_api : min_grad forward: min (Tensor x, int64_t[] dims={}, bool keep_dim=false) -> Tensor(out) @@ -1001,14 +1008,22 @@ - backward_api : multi_dot_grad forward : multi_dot (Tensor[] x) -> Tensor(out) args : (Tensor[] x, Tensor out_grad) - output : Tensor[](x_grad) - invoke : multi_dot_grad_impl(x, out_grad) + output : Tensor[](x_grad) {x.size()} + infer_meta : + func : MultiDotGradInferMeta + kernel : + func : multi_dot_grad - backward_api : multiplex_grad forward : multiplex (Tensor[] ins, Tensor ids) -> Tensor(out) args : (Tensor[] ins, Tensor ids, Tensor out_grad) - output : Tensor[](ins_grad) - invoke : multiplex_grad_impl(ins, ids, out_grad) + output : Tensor[](ins_grad){ins.size()} + infer_meta : + func : MultiplexGradInferMeta + param : [ids, out_grad] + kernel : + func : multiplex_grad + param : [ids, out_grad] - backward_api : multiply_grad forward : multiply (Tensor x, Tensor y) -> Tensor(out) @@ -1448,8 +1463,13 @@ - backward_api : stack_grad forward : stack (Tensor[] x, int axis) -> Tensor(out) args : (Tensor[] x, Tensor out_grad, int axis) - output : Tensor[](x_grad) - invoke : stack_grad_impl(x, out_grad, axis) + output : Tensor[](x_grad){x.size()} + infer_meta : + func : StackGradInferMeta + param: [out_grad, axis] + kernel : + func : stack_grad + param : [out_grad, axis] no_need_buffer : x - backward_api : strided_slice_grad diff --git a/python/paddle/utils/code_gen/backward_api_gen.py b/python/paddle/utils/code_gen/backward_api_gen.py index 46aa3e7e23d..a88339c607c 100644 --- a/python/paddle/utils/code_gen/backward_api_gen.py +++ b/python/paddle/utils/code_gen/backward_api_gen.py @@ -35,7 +35,7 @@ class BackwardAPI(BaseAPI): r"(?P[a-z][a-z0-9_]+)\s*(?P\([^\)]+\))\s*->\s*(?P.+)", forward_config) api = result.group('api') - _, outputs, _ = self.parse_output(self.api, result.group('outputs')) + _, outputs, _, _ = self.parse_output(self.api, result.group('outputs')) outputs = [item.split('@')[0] for item in outputs] fw_inputs, fw_attrs, _, = self.parse_input_and_attr( api, result.group('args')) @@ -110,7 +110,16 @@ class BackwardAPI(BaseAPI): 0]] if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][0] in self.inplace_map else "" output_create = f""" -{code_indent} {self.outputs['return_type']} api_output{inplace_assign}; +{code_indent} {self.outputs['return_type']} api_output{inplace_assign};""" + + if output_type_list[0] == 'std::vector': + assert self.outputs['out_size_expr'] is not None, \ + f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." + output_create = output_create + f""" +{code_indent} auto kernel_out = {set_out_func}({self.outputs['out_size_expr']}, kernel_backend, &api_output);""" + + else: + output_create = output_create + f""" {code_indent} auto kernel_out = {set_out_func}(kernel_backend, &api_output);""" elif len(output_type_list) > 1: @@ -121,7 +130,6 @@ class BackwardAPI(BaseAPI): kernel_output = kernel_output + f'kernel_out_{i}, ' output_names.append(f'kernel_out_{i}') if out_type_item == 'Tensor': - get_out_code = f'&api_output[{i}][0]' if inplace_flag and self.inplace_map is not None and self.outputs[ 'names'][i] in self.inplace_map: output_create = output_create + f""" @@ -131,6 +139,9 @@ class BackwardAPI(BaseAPI): output_create = output_create + f""" {code_indent} api_output[{i}].emplace_back();""" + output_create = output_create + f""" +{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, &api_output[{i}][0]);""" + else: get_out_code = f'&api_output[{i}]' if inplace_flag and self.inplace_map is not None and self.outputs[ @@ -138,8 +149,10 @@ class BackwardAPI(BaseAPI): output_create = output_create + f""" {code_indent} api_output[{i}] = {self.inplace_map[self.outputs['names'][i]]};""" - output_create = output_create + f""" -{code_indent} auto kernel_out_{i} = {set_out_func}(kernel_backend, {get_out_code});""" + assert self.outputs['out_size_expr'][i] is not None, \ + f"{api_name}: The out size expr : '{{expr}}' should be set when output has Tensor[]. You can refer 'split' api." + output_create = output_create + f""" +{code_indent} auto kernel_out_{i} = {set_out_func}({self.outputs['out_size_expr'][i]}, kernel_backend, &api_output[{i}]);""" kernel_output = kernel_output[:-2] else: -- GitLab