diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index 37fd84e8b1b139caa3a72b97bacc3757ff49eea3..2fa40320e558308dd2ea60dd5aa7c9e40ad6e210 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -129,276 +129,8 @@ Tensor copy_to_impl(const Tensor& x, Place place, bool blocking) { return out; } -Tensor embedding_impl(const Tensor& x, - const Tensor& weight, - int64_t padding_idx, - bool sparse) { - DataType kernel_data_type = ParseDataType(weight); - auto kernel_key_set = ParseKernelKeyByInputArgs(weight); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - VLOG(6) << "embedding API kernel key: [" << kernel_key.backend() << ", " - << kernel_key.layout() << ", " << kernel_data_type << "]"; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_key.backend()); - - Tensor api_output; - - if (phi::DenseTensor::classof(weight.impl().get())) { - auto kernel_result = - phi::KernelFactory::Instance().SelectKernelOrThrowError( - "embedding", - {kernel_key.backend(), kernel_key.layout(), kernel_data_type}); - const auto& kernel = kernel_result.kernel; - VLOG(6) << "embedding API kernel: " << kernel; - - auto input_x = PrepareData(x, kernel.InputAt(0), {}); - auto input_weight = PrepareData(weight, kernel.InputAt(1), {}); - - auto* kernel_out = SetKernelOutput(&api_output); - phi::MetaTensor meta_out(kernel_out); - - phi::EmbeddingInferMeta(MakeMetaTensor(*input_x), - MakeMetaTensor(*input_weight), - padding_idx, - sparse, - &meta_out); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - const phi::DenseTensor&, - int64_t, - phi::DenseTensor*); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - { - (*kernel_fn)(*dev_ctx, *input_x, *input_weight, padding_idx, kernel_out); - } - } else { - auto kernel_result = - phi::KernelFactory::Instance().SelectKernelOrThrowError( - "sparse_weight_embedding", - {kernel_key.backend(), kernel_key.layout(), kernel_data_type}); - const auto& kernel = kernel_result.kernel; - VLOG(6) << "sparse_weight_embedding API kernel: " << kernel; - - auto input_x = PrepareData(x, kernel.InputAt(0), {}); - auto input_weight = TensorToSelectedRows(weight); - - auto* kernel_out = SetKernelOutput(&api_output); - phi::MetaTensor meta_out(kernel_out); - - phi::EmbeddingInferMeta(MakeMetaTensor(*input_x), - MakeMetaTensor(*input_weight), - padding_idx, - sparse, - &meta_out); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - const phi::SelectedRows&, - int64_t, - phi::DenseTensor*); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - { - (*kernel_fn)(*dev_ctx, *input_x, *input_weight, padding_idx, kernel_out); - } - } - return api_output; -} - -std::vector split_impl(const Tensor& x, - const IntArray& num_or_sections, - const Scalar& axis) { - auto kernel_key_set = ParseKernelKeyByInputArgs(x); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - - Backend kernel_backend = kernel_key.backend(); - DataLayout kernel_layout = kernel_key.layout(); - DataType kernel_data_type = kernel_key.dtype(); - - auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "split", {kernel_backend, kernel_layout, kernel_data_type}); - const auto& kernel = kernel_result.kernel; - VLOG(6) << "split API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - VLOG(6) << "split API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - - auto dense_x = PrepareData(x, kernel.InputAt(0), {}); - - // Calculate the number of out tensors - size_t out_number; - if (num_or_sections.size() == 1) { - if (num_or_sections.GetData()[0] < 0) { - out_number = 1; - } else { - out_number = num_or_sections.GetData()[0]; - } - } else { - out_number = num_or_sections.size(); - } - - std::vector out; - auto dense_outs = SetKernelOutput(out_number, &out); - std::vector meta_outs; - meta_outs.reserve(out_number); - std::vector meta_out_ptrs; - meta_out_ptrs.reserve(out_number); - for (size_t i = 0; i < out_number; ++i) { - meta_outs.push_back(dense_outs[i]); - meta_out_ptrs.push_back(&meta_outs.back()); - } - - phi::SplitInferMeta( - MakeMetaTensor(*dense_x), num_or_sections, axis, meta_out_ptrs); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - const phi::IntArray&, - const phi::Scalar&, - std::vector&); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - (*kernel_fn)(*dev_ctx, - *dense_x, - phi::IntArray(num_or_sections), - phi::Scalar(axis), - dense_outs); - - return out; -} - ////////////////// Backward(grad) api impls ////////////////////// -std::tuple batch_norm_impl( - const Tensor& x, - const Tensor& scale, - const Tensor& bias, - const Tensor& mean, - const Tensor& variance, - float momentum, - float epsilon, - const std::string& data_layout, - bool is_test, - bool use_global_stats, - bool trainable_statistics, - bool fuse_with_relu) { - Backend kernel_backend = Backend::UNDEFINED; - DataLayout kernel_layout = DataLayout::UNDEFINED; - DataType kernel_data_type = DataType::UNDEFINED; - - kernel_data_type = ParseDataType(x); - - if (kernel_backend == Backend::UNDEFINED || - kernel_layout == DataLayout::UNDEFINED || - kernel_data_type == DataType::UNDEFINED) { - auto kernel_key_set = ParseKernelKeyByInputArgs(x); - auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); - if (kernel_backend == Backend::UNDEFINED) { - kernel_backend = kernel_key.backend(); - } - if (kernel_layout == DataLayout::UNDEFINED) { - kernel_layout = kernel_key.layout(); - } - if (kernel_data_type == DataType::UNDEFINED) { - kernel_data_type = kernel_key.dtype(); - } - } - - auto kernel_result = phi::KernelFactory::Instance().SelectKernelOrThrowError( - "batch_norm", {kernel_backend, kernel_layout, kernel_data_type}); - const auto& kernel = kernel_result.kernel; - VLOG(6) << "batch_norm API kernel key: [" << kernel_backend << ", " - << kernel_layout << ", " << kernel_data_type << "]"; - VLOG(6) << "batch_norm API kernel: " << kernel; - - auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); - - auto input_x = PrepareData(x, kernel.InputAt(0), {}); - auto input_scale = PrepareData(scale, kernel.InputAt(1), {}); - auto input_bias = PrepareData(bias, kernel.InputAt(2), {}); - auto input_mean = PrepareData(mean, kernel.InputAt(3), {}); - auto input_variance = PrepareData(variance, kernel.InputAt(4), {}); - - std::tuple api_output; - auto kernel_out_0 = SetKernelOutput(&std::get<0>(api_output)); - std::get<1>(api_output).set_impl(mean.impl()); - std::get<2>(api_output).set_impl(variance.impl()); - auto kernel_out_1 = SetKernelOutput(&std::get<1>(api_output)); - auto kernel_out_2 = SetKernelOutput(&std::get<2>(api_output)); - auto kernel_out_3 = SetKernelOutput(&std::get<3>(api_output)); - auto kernel_out_4 = SetKernelOutput(&std::get<4>(api_output)); - auto kernel_out_5 = SetKernelOutput(&std::get<5>(api_output)); - phi::MetaTensor meta_out_0(kernel_out_0); - phi::MetaTensor meta_out_1(kernel_out_1); - phi::MetaTensor meta_out_2(kernel_out_2); - phi::MetaTensor meta_out_3(kernel_out_3); - phi::MetaTensor meta_out_4(kernel_out_4); - phi::MetaTensor meta_out_5(kernel_out_5); - - phi::BatchNormInferMeta(MakeMetaTensor(*input_x), - MakeMetaTensor(*input_scale), - MakeMetaTensor(*input_bias), - MakeMetaTensor(*input_mean), - MakeMetaTensor(*input_variance), - momentum, - epsilon, - data_layout, - is_test, - use_global_stats, - trainable_statistics, - fuse_with_relu, - &meta_out_0, - &meta_out_1, - &meta_out_2, - &meta_out_3, - &meta_out_4, - &meta_out_5); - - using kernel_signature = void (*)(const platform::DeviceContext&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - const phi::DenseTensor&, - float, - float, - const std::string&, - bool, - bool, - bool, - bool, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*, - phi::DenseTensor*); - auto* kernel_fn = kernel.GetVariadicKernelFn(); - { - (*kernel_fn)(*dev_ctx, - *input_x, - *input_scale, - *input_bias, - *input_mean, - *input_variance, - momentum, - epsilon, - data_layout, - is_test, - use_global_stats, - trainable_statistics, - fuse_with_relu, - kernel_out_0, - kernel_out_1, - kernel_out_2, - kernel_out_3, - kernel_out_4, - kernel_out_5); - } - - return api_output; -} - void imag_grad_impl(const Tensor& out_grad, Tensor* x_grad) { phi::KernelKey kernel_key{ParseBackend(out_grad), out_grad.layout(), diff --git a/paddle/phi/api/lib/api_custom_impl.h b/paddle/phi/api/lib/api_custom_impl.h index ab1d17051499c2597519dc2efcc52ef31e1c8025..474f4f981185f25c98b83e5a8bb59b9f87c88b98 100644 --- a/paddle/phi/api/lib/api_custom_impl.h +++ b/paddle/phi/api/lib/api_custom_impl.h @@ -33,31 +33,8 @@ namespace experimental { Tensor add_n_impl(const std::vector& x); -std::tuple batch_norm_impl( - const Tensor& x, - const Tensor& scale, - const Tensor& bias, - const Tensor& mean, - const Tensor& variance, - float momentum, - float epsilon, - const std::string& data_layout, - bool is_test, - bool use_global_stats, - bool trainable_statistics, - bool fuse_with_relu); - Tensor copy_to_impl(const Tensor& x, Place place, bool blocking); -Tensor embedding_impl(const Tensor& x, - const Tensor& weight, - int64_t padding_idx, - bool sparse); - -std::vector split_impl(const Tensor& x, - const IntArray& num_or_sections, - const Scalar& axis); - ////////////////// Backward(grad) api impls ////////////////////// void imag_grad_impl(const Tensor& out_grad, Tensor* x_grad); diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index 7905d2094f0b3cb90d29c2645b473465e4557a32..d6e9218f306425d76c126c92d8875236d2f8fbd3 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -328,7 +328,12 @@ - op : batch_norm args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) output : Tensor(out), Tensor(mean_out), Tensor(variance_out), Tensor(saved_mean), Tensor(saved_variance), Tensor(reserve_space) - invoke : batch_norm_impl(x, scale, bias, mean, variance, momentum, epsilon, data_layout, is_test, use_global_stats, trainable_statistics, fuse_with_relu) + infer_meta: + func : BatchNormInferMeta + kernel : + func : batch_norm + data_type : x + view : (mean -> mean_out), (variance -> variance_out) backward : batch_norm_grad - op : bce_loss @@ -798,7 +803,14 @@ - op : embedding args : (Tensor x, Tensor weight, int64_t padding_idx=-1, bool sparse=false) output : Tensor - invoke : embedding_impl(x, weight, padding_idx, sparse) + infer_meta : + func : EmbeddingInferMeta + param : [x, weight, padding_idx] + kernel : + func : embedding {dense, dense -> dense} + sparse_weight_embedding {dense, selected_rows -> dense} + param : [x, weight, padding_idx] + data_type : weight backward : embedding_grad - op : empty diff --git a/paddle/phi/infermeta/binary.cc b/paddle/phi/infermeta/binary.cc index a2cd4fad27c825e2f6dd042af354c01ed94376f4..f4be109f7c79dbde5e9c28e2489f3bad344daa6a 100644 --- a/paddle/phi/infermeta/binary.cc +++ b/paddle/phi/infermeta/binary.cc @@ -90,32 +90,6 @@ void AllValueCompareInferMeta(const MetaTensor& x, out->set_dtype(DataType::BOOL); } -void EmbeddingInferMeta(const MetaTensor& input, - const MetaTensor& weight, - int64_t padding_idx, - MetaTensor* out) { - auto table_dims = weight.dims(); - auto ids_dims = input.dims(); - int ids_rank = ids_dims.size(); - VLOG(5) << "ids rank is " << ids_rank << std::endl; - PADDLE_ENFORCE_EQ( - table_dims.size(), - 2, - phi::errors::InvalidArgument( - "ShapeError: The dimensions of the 'lookup table' must be 2. " - "But received lookup table's dimensions = %d, " - "lookup table's shape = [%s].", - table_dims.size(), - table_dims)); - - auto output_dims = phi::vectorize(ids_dims); - output_dims.push_back(table_dims[1]); - - out->set_dims(phi::make_ddim(output_dims)); - out->set_dtype(weight.dtype()); - out->share_lod(input); -} - void KLDivInferMeta(const MetaTensor& x, const MetaTensor& label, const std::string& reduction, @@ -1196,7 +1170,6 @@ void ElementwiseRawInferMeta(const MetaTensor& x, void EmbeddingInferMeta(const MetaTensor& x, const MetaTensor& weight, int64_t padding_idx, - bool sparse, MetaTensor* out) { const auto& table_dims = weight.dims(); const auto& ids_dims = x.dims(); diff --git a/paddle/phi/infermeta/binary.h b/paddle/phi/infermeta/binary.h index 59fedfe2550690de3e35b39cd1d0a9db1b4d5f81..d310f6282b09d88f85abe4c6d808379864c06162 100644 --- a/paddle/phi/infermeta/binary.h +++ b/paddle/phi/infermeta/binary.h @@ -38,11 +38,6 @@ void AllValueCompareInferMeta(const MetaTensor& x, MetaTensor* out, MetaConfig config = MetaConfig()); -void EmbeddingInferMeta(const MetaTensor& input, - const MetaTensor& weight, - int64_t padding_idx, - MetaTensor* out); - void KLDivInferMeta(const MetaTensor& x, const MetaTensor& label, const std::string& reduction, @@ -201,7 +196,6 @@ void ElementwiseRawInferMeta(const MetaTensor& x_meta, void EmbeddingInferMeta(const MetaTensor& x, const MetaTensor& weight, int64_t padding_idx, - bool sparse, MetaTensor* out); void ExpandAsInferMeta(const MetaTensor& x, diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index e2a7291f74bd657484da7d2b346bef97efa61cb9..6b7f60a8618b733605aa1d861a3fcb3cf64d807e 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -2901,5 +2901,4 @@ void GraphSendUVInferMeta(const MetaTensor& x, } // namespace phi -PD_REGISTER_INFER_META_FN(batch_norm, phi::BatchNormInferMeta); PD_REGISTER_INFER_META_FN(batch_norm_infer, phi::BatchNormInferInferMeta);