From ce256f7532ca541d3d8195dfd7897c43faeb5fa3 Mon Sep 17 00:00:00 2001 From: PommesPeter <54879512+PommesPeter@users.noreply.github.com> Date: Fri, 12 May 2023 22:32:09 +0800 Subject: [PATCH] =?UTF-8?q?=E3=80=90Hackathon=204=20No.20=E3=80=91Add=20i0?= =?UTF-8?q?=20/=20i0e=20to=20paddle=20(#52058)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * added base code for i0 and i0e * added grad base code for i0 and i0e * added i0 and i0e python code * added ops and backward yaml config * added i0 and i0e cpu kernel, but not test. * added i0 and i0e code and unitest files * added test files * added i0/i0e gpu implementation code * updated code style * updated code style * fixed unitests code * updated i0 with eigen3 * fixed bug and added more test cases * refactor: fixed static graph bug * refactor: removed i0 and i0e from op_compat * refactor: updated code style * refactor: updated op_compat.yaml * refactor: updated op_compat.yaml * refactor: fixed op name mapping and optimize unittest case * refactor: manually implement i0 / i0e * refactor: added grad kernel for i0 / i0e,didn't finish * Update math.py * refactor: added equation to doc in English and added comments for computing i0 / i0e gradient * refactor: removed eigen implementation * refactor: finished i0 / i0e cpu and gpu op * refactor: updated code style * fix: find a bug but not fix * fix: incorrect unittest cases * update: updated code style and remove my file * update: updated unittest case * fix: fixed sign error * fix: fixed mistakes when merging * refactor: updated code style * refactor: remove unused code * refactor: updated code style --- paddle/phi/api/yaml/backward.yaml | 20 ++ paddle/phi/api/yaml/ops.yaml | 18 ++ paddle/phi/kernels/cpu/i0_grad_kernel.cc | 42 +++ paddle/phi/kernels/cpu/i0_kernel.cc | 37 +++ paddle/phi/kernels/cpu/i0e_grad_kernel.cc | 44 +++ paddle/phi/kernels/cpu/i0e_kernel.cc | 37 +++ paddle/phi/kernels/gpu/i0_grad_kernel.cu | 39 +++ paddle/phi/kernels/gpu/i0_kernel.cu | 35 +++ paddle/phi/kernels/gpu/i0e_grad_kernel.cu | 40 +++ paddle/phi/kernels/gpu/i0e_kernel.cu | 35 +++ paddle/phi/kernels/i0_grad_kernel.h | 28 ++ paddle/phi/kernels/i0_kernel.h | 31 +++ paddle/phi/kernels/i0e_grad_kernel.h | 29 ++ paddle/phi/kernels/i0e_kernel.h | 32 +++ .../impl/bessel_grad_kernel_cuda_impl.h | 90 +++++++ .../kernels/impl/bessel_grad_kernel_impl.h | 112 ++++++++ .../kernels/impl/bessel_kernel_cuda_impl.h | 231 ++++++++++++++++ paddle/phi/kernels/impl/bessel_kernel_impl.h | 254 ++++++++++++++++++ python/paddle/__init__.py | 4 + .../fluid/tests/unittests/test_i0_op.py | 144 ++++++++++ .../fluid/tests/unittests/test_i0e_op.py | 148 ++++++++++ python/paddle/tensor/__init__.py | 4 + python/paddle/tensor/math.py | 73 +++++ 23 files changed, 1527 insertions(+) create mode 100644 paddle/phi/kernels/cpu/i0_grad_kernel.cc create mode 100644 paddle/phi/kernels/cpu/i0_kernel.cc create mode 100644 paddle/phi/kernels/cpu/i0e_grad_kernel.cc create mode 100644 paddle/phi/kernels/cpu/i0e_kernel.cc create mode 100644 paddle/phi/kernels/gpu/i0_grad_kernel.cu create mode 100644 paddle/phi/kernels/gpu/i0_kernel.cu create mode 100644 paddle/phi/kernels/gpu/i0e_grad_kernel.cu create mode 100644 paddle/phi/kernels/gpu/i0e_kernel.cu create mode 100644 paddle/phi/kernels/i0_grad_kernel.h create mode 100644 paddle/phi/kernels/i0_kernel.h create mode 100644 paddle/phi/kernels/i0e_grad_kernel.h create mode 100644 paddle/phi/kernels/i0e_kernel.h create mode 100644 paddle/phi/kernels/impl/bessel_grad_kernel_cuda_impl.h create mode 100644 paddle/phi/kernels/impl/bessel_grad_kernel_impl.h create mode 100644 paddle/phi/kernels/impl/bessel_kernel_cuda_impl.h create mode 100644 paddle/phi/kernels/impl/bessel_kernel_impl.h create mode 100644 python/paddle/fluid/tests/unittests/test_i0_op.py create mode 100644 python/paddle/fluid/tests/unittests/test_i0e_op.py diff --git a/paddle/phi/api/yaml/backward.yaml b/paddle/phi/api/yaml/backward.yaml index 27bdf904eee..e3ef0460884 100644 --- a/paddle/phi/api/yaml/backward.yaml +++ b/paddle/phi/api/yaml/backward.yaml @@ -775,6 +775,26 @@ kernel : func : huber_loss_grad +- backward_op : i0_grad + forward : i0 (Tensor x) -> Tensor(out) + args : (Tensor x, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : i0_grad + +- backward_op : i0e_grad + forward : i0e (Tensor x) -> Tensor(out) + args : (Tensor x, Tensor out, Tensor out_grad) + output : Tensor(x_grad) + infer_meta : + func : UnchangedInferMeta + param : [x] + kernel : + func : i0e_grad + - backward_op : imag_grad forward : imag (Tensor x) -> Tensor(out) args : (Tensor out_grad) diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index de9d6c4f29c..b8f99bbe106 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -920,6 +920,24 @@ intermediate : residual backward : huber_loss_grad +- op : i0 + args : (Tensor x) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + kernel : + func : i0 + backward : i0_grad + +- op : i0e + args : (Tensor x) + output : Tensor(out) + infer_meta : + func : UnchangedInferMeta + kernel : + func : i0e + backward : i0e_grad + - op : imag args : (Tensor x) output : Tensor (out) diff --git a/paddle/phi/kernels/cpu/i0_grad_kernel.cc b/paddle/phi/kernels/cpu/i0_grad_kernel.cc new file mode 100644 index 00000000000..12e4ba437e1 --- /dev/null +++ b/paddle/phi/kernels/cpu/i0_grad_kernel.cc @@ -0,0 +1,42 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/i0_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/for_range.h" +#include "paddle/phi/kernels/impl/bessel_grad_kernel_impl.h" + +namespace phi { + +template +void I0GradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + DenseTensor* x_grad) { + auto size = x.numel(); + auto* x_data = x.data(); + auto* out_grad_data = out_grad.data(); + auto* x_grad_data = ctx.template Alloc(x_grad); + + phi::funcs::ForRange for_range(ctx, size); + I0GradFunctor functor(x_data, out_grad_data, x_grad_data, size); + for_range(functor); +} + +} // namespace phi + +PD_REGISTER_KERNEL(i0_grad, CPU, ALL_LAYOUT, phi::I0GradKernel, float, double) { +} diff --git a/paddle/phi/kernels/cpu/i0_kernel.cc b/paddle/phi/kernels/cpu/i0_kernel.cc new file mode 100644 index 00000000000..3c6ba8f78ae --- /dev/null +++ b/paddle/phi/kernels/cpu/i0_kernel.cc @@ -0,0 +1,37 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/i0_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/for_range.h" +#include "paddle/phi/kernels/impl/bessel_kernel_impl.h" + +namespace phi { + +template +void I0Kernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { + const int64_t size = x.numel(); + const T* x_data = x.data(); + T* out_data = ctx.template Alloc(out); + + phi::funcs::ForRange for_range(ctx, size); + I0Functor functor(x_data, out_data, size); + for_range(functor); +} + +} // namespace phi + +PD_REGISTER_KERNEL(i0, CPU, ALL_LAYOUT, phi::I0Kernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/i0e_grad_kernel.cc b/paddle/phi/kernels/cpu/i0e_grad_kernel.cc new file mode 100644 index 00000000000..4521e084585 --- /dev/null +++ b/paddle/phi/kernels/cpu/i0e_grad_kernel.cc @@ -0,0 +1,44 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include "paddle/phi/kernels/i0e_grad_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/for_range.h" +#include "paddle/phi/kernels/impl/bessel_grad_kernel_impl.h" + +namespace phi { + +template +void I0eGradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& out_grad, + DenseTensor* x_grad) { + auto size = x.numel(); + auto* x_data = x.data(); + auto* out_data = out.data(); + auto* out_grad_data = out_grad.data(); + auto* x_gard_data = ctx.template Alloc(x_grad); + + phi::funcs::ForRange for_range(ctx, size); + I0eGradFunctor functor(x_data, out_data, out_grad_data, x_gard_data, size); + for_range(functor); +} + +} // namespace phi + +PD_REGISTER_KERNEL( + i0e_grad, CPU, ALL_LAYOUT, phi::I0eGradKernel, float, double) {} diff --git a/paddle/phi/kernels/cpu/i0e_kernel.cc b/paddle/phi/kernels/cpu/i0e_kernel.cc new file mode 100644 index 00000000000..c3a9ff7780d --- /dev/null +++ b/paddle/phi/kernels/cpu/i0e_kernel.cc @@ -0,0 +1,37 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/i0e_kernel.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/for_range.h" +#include "paddle/phi/kernels/impl/bessel_kernel_impl.h" + +namespace phi { + +template +void I0eKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { + int64_t size = x.numel(); + const T* x_data = x.data(); + T* out_data = ctx.template Alloc(out); + + phi::funcs::ForRange for_range(ctx, size); + I0eFunctor functor(x_data, out_data, size); + for_range(functor); +} + +} // namespace phi + +PD_REGISTER_KERNEL(i0e, CPU, ALL_LAYOUT, phi::I0eKernel, float, double) {} diff --git a/paddle/phi/kernels/gpu/i0_grad_kernel.cu b/paddle/phi/kernels/gpu/i0_grad_kernel.cu new file mode 100644 index 00000000000..9883a7db054 --- /dev/null +++ b/paddle/phi/kernels/gpu/i0_grad_kernel.cu @@ -0,0 +1,39 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/i0_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/elementwise_base.h" +#include "paddle/phi/kernels/impl/bessel_grad_kernel_cuda_impl.h" + +namespace phi { + +template +void I0GradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + DenseTensor* x_grad) { + ctx.template Alloc(x_grad); + std::vector ins = {&x, &out_grad}; + std::vector outs = {x_grad}; + auto functor = CudaI0GradFunctor(); + phi::funcs::ElementwiseKernel(ctx, ins, &outs, functor); +} + +} // namespace phi + +PD_REGISTER_KERNEL(i0_grad, GPU, ALL_LAYOUT, phi::I0GradKernel, float, double) { +} diff --git a/paddle/phi/kernels/gpu/i0_kernel.cu b/paddle/phi/kernels/gpu/i0_kernel.cu new file mode 100644 index 00000000000..e8641933c37 --- /dev/null +++ b/paddle/phi/kernels/gpu/i0_kernel.cu @@ -0,0 +1,35 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/i0_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/elementwise_base.h" +#include "paddle/phi/kernels/impl/bessel_kernel_cuda_impl.h" + +namespace phi { + +template +void I0Kernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { + ctx.template Alloc(out); + std::vector ins = {&x}; + std::vector outs = {out}; + auto functor = CudaI0Functor(); + phi::funcs::ElementwiseKernel(ctx, ins, &outs, functor); +} + +} // namespace phi + +PD_REGISTER_KERNEL(i0, GPU, ALL_LAYOUT, phi::I0Kernel, float, double) {} diff --git a/paddle/phi/kernels/gpu/i0e_grad_kernel.cu b/paddle/phi/kernels/gpu/i0e_grad_kernel.cu new file mode 100644 index 00000000000..1f50de93ce2 --- /dev/null +++ b/paddle/phi/kernels/gpu/i0e_grad_kernel.cu @@ -0,0 +1,40 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/i0e_grad_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/elementwise_base.h" +#include "paddle/phi/kernels/impl/bessel_grad_kernel_cuda_impl.h" + +namespace phi { + +template +void I0eGradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& out_grad, + DenseTensor* x_grad) { + ctx.template Alloc(x_grad); + std::vector ins = {&x, &out, &out_grad}; + std::vector outs = {x_grad}; + auto functor = CudaI0eGradFunctor(); + phi::funcs::ElementwiseKernel(ctx, ins, &outs, functor); +} + +} // namespace phi + +PD_REGISTER_KERNEL( + i0e_grad, GPU, ALL_LAYOUT, phi::I0eGradKernel, float, double) {} diff --git a/paddle/phi/kernels/gpu/i0e_kernel.cu b/paddle/phi/kernels/gpu/i0e_kernel.cu new file mode 100644 index 00000000000..06f8758ef39 --- /dev/null +++ b/paddle/phi/kernels/gpu/i0e_kernel.cu @@ -0,0 +1,35 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/i0e_kernel.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/elementwise_base.h" +#include "paddle/phi/kernels/impl/bessel_kernel_cuda_impl.h" + +namespace phi { + +template +void I0eKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out) { + ctx.template Alloc(out); + std::vector ins = {&x}; + std::vector outs = {out}; + auto functor = CudaI0eFunctor(); + phi::funcs::ElementwiseKernel(ctx, ins, &outs, functor); +} + +} // namespace phi + +PD_REGISTER_KERNEL(i0e, GPU, ALL_LAYOUT, phi::I0eKernel, float, double) {} diff --git a/paddle/phi/kernels/i0_grad_kernel.h b/paddle/phi/kernels/i0_grad_kernel.h new file mode 100644 index 00000000000..618e9336a41 --- /dev/null +++ b/paddle/phi/kernels/i0_grad_kernel.h @@ -0,0 +1,28 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/device_context.h" + +namespace phi { + +template +void I0GradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& out_grad, + DenseTensor* x_grad); + +} // namespace phi diff --git a/paddle/phi/kernels/i0_kernel.h b/paddle/phi/kernels/i0_kernel.h new file mode 100644 index 00000000000..6378965d004 --- /dev/null +++ b/paddle/phi/kernels/i0_kernel.h @@ -0,0 +1,31 @@ +// Copyright (c) 2023PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +/** + * @brief This kernel calculate Modified Bessel function of order 0. + * @param ctx device context + * @param x The input tensor of i0 + * @param out The output tensor of i0 kernel, it has the same shape and + * dtype with input. Each element corresponds to input tensor + */ +template +void I0Kernel(const Context& ctx, const DenseTensor& x, DenseTensor* out); + +} // namespace phi diff --git a/paddle/phi/kernels/i0e_grad_kernel.h b/paddle/phi/kernels/i0e_grad_kernel.h new file mode 100644 index 00000000000..78a80cc5059 --- /dev/null +++ b/paddle/phi/kernels/i0e_grad_kernel.h @@ -0,0 +1,29 @@ +// Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" +#include "paddle/phi/core/device_context.h" + +namespace phi { + +template +void I0eGradKernel(const Context& ctx, + const DenseTensor& x, + const DenseTensor& out, + const DenseTensor& out_grad, + DenseTensor* x_grad); + +} // namespace phi diff --git a/paddle/phi/kernels/i0e_kernel.h b/paddle/phi/kernels/i0e_kernel.h new file mode 100644 index 00000000000..db298dea83a --- /dev/null +++ b/paddle/phi/kernels/i0e_kernel.h @@ -0,0 +1,32 @@ +/* Copyright (c) 2023PaddlePaddle Authors. All Rights Reserved. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +/** + * @brief This kernel calculate Exponentially scaled modified Bessel function of + * order 0.. + * @param ctx device context + * @param x The input tensor of i0e + * @param out The output tensor of i0e kernel, it has the same shape and + * dtype with input. Each element corresponds to input tensor + */ +template +void I0eKernel(const Context& ctx, const DenseTensor& x, DenseTensor* out); + +} // namespace phi diff --git a/paddle/phi/kernels/impl/bessel_grad_kernel_cuda_impl.h b/paddle/phi/kernels/impl/bessel_grad_kernel_cuda_impl.h new file mode 100644 index 00000000000..7789e3d8e93 --- /dev/null +++ b/paddle/phi/kernels/impl/bessel_grad_kernel_cuda_impl.h @@ -0,0 +1,90 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/impl/bessel_kernel_cuda_impl.h" + +namespace phi { + +template +struct CudaI0GradFunctor { + __device__ __forceinline__ T operator()(const T _x, const T _out_grad) const { + using MT = typename phi::dtype::MPTypeTrait::Type; + const MT mp_x = static_cast(_x); + const MT mp_out_grad = static_cast(_out_grad); + // get ouput of i1 + MT x = std::abs(mp_x); + if (x <= MT{8.0}) { + auto coeff_pair_A = ChebyshevCoefficientsI1e_A(); + auto A = std::get<0>(coeff_pair_A); + auto len = std::get<1>(coeff_pair_A); + MT y = (x / MT{2.0}) - MT{2.0}; + + const MT i1_out = std::exp(x) * x * Chbevl(y, A, len); + const MT i1_data = (mp_x < MT{0.0}) ? -i1_out : i1_out; + // calculate i0 gradient + return static_cast(i1_data * mp_out_grad); + } + auto coeff_pair_B = ChebyshevCoefficientsI1e_B(); + auto B = std::get<0>(coeff_pair_B); + auto len = std::get<1>(coeff_pair_B); + MT y = (MT{32.0} / x) - MT{2.0}; + + const MT i1_out = (std::exp(x) * Chbevl(y, B, len)) / std::sqrt(x); + const MT i1_data = (mp_x < MT{0.0}) ? -i1_out : i1_out; + + return static_cast(i1_data * mp_out_grad); + } +}; + +template +struct CudaI0eGradFunctor { + __device__ __forceinline__ T operator()(const T _x, + const T _out, + const T _out_grad) const { + using MT = typename phi::dtype::MPTypeTrait::Type; + const MT mp_x = static_cast(_x); + const MT mp_out = static_cast(_out); + const MT mp_out_grad = static_cast(_out_grad); + // get output of i1e + MT x = std::abs(mp_x); + if (x <= MT{8.0}) { + auto coeff_pair_A = ChebyshevCoefficientsI1e_A(); + auto A = std::get<0>(coeff_pair_A); + auto len = std::get<1>(coeff_pair_A); + MT y = (x / MT{2.0}) - MT{2.0}; + + const MT i1e_out = Chbevl(y, A, len) * x; + const MT i1e_data = (mp_x < MT{0.0}) ? -i1e_out : i1e_out; + // calculate i0e gradient + return static_cast((i1e_data - std::copysign(MT{1.0}, mp_x) * mp_out) * + mp_out_grad); + } + auto coeff_pair_B = ChebyshevCoefficientsI1e_B(); + auto B = std::get<0>(coeff_pair_B); + auto len = std::get<1>(coeff_pair_B); + MT y = (MT{32.0} / x) - MT{2.0}; + + const MT i1e_out = Chbevl(y, B, len) / std::sqrt(x); + const MT i1e_data = (mp_x < MT{0.0}) ? -i1e_out : i1e_out; + + return static_cast((i1e_data - std::copysign(MT{1.0}, mp_x) * mp_out) * + mp_out_grad); + } +}; + +} // namespace phi diff --git a/paddle/phi/kernels/impl/bessel_grad_kernel_impl.h b/paddle/phi/kernels/impl/bessel_grad_kernel_impl.h new file mode 100644 index 00000000000..1de9a9fcbfc --- /dev/null +++ b/paddle/phi/kernels/impl/bessel_grad_kernel_impl.h @@ -0,0 +1,112 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/backends/all_context.h" +#include "paddle/phi/common/amp_type_traits.h" +#include "paddle/phi/core/kernel_registry.h" +#include "paddle/phi/kernels/funcs/for_range.h" +#include "paddle/phi/kernels/impl/bessel_kernel_impl.h" + +namespace phi { + +template +struct I0GradFunctor { + I0GradFunctor(const T* x, const T* out_grad, T* x_grad, int64_t numel) + : inp_x_(x), + inp_out_grad_(out_grad), + output_x_grad_(x_grad), + numel_(numel) {} + + HOSTDEVICE void operator()(int64_t idx) const { + using MT = typename phi::dtype::MPTypeTrait::Type; + const MT mp_x = static_cast(inp_x_[idx]); + const MT mp_out_grad = static_cast(inp_out_grad_[idx]); + + MT x = std::abs(mp_x); + if (x <= T{8.0}) { + auto coeff_pair_A = ChebyshevCoefficientsI1e_A(); + auto A = std::get<0>(coeff_pair_A); + auto len = std::get<1>(coeff_pair_A); + MT y = (x / MT{2.0}) - MT{2.0}; + + const MT i1_out = std::exp(x) * x * Chbevl(y, A, len); + const MT i1_data = (mp_x < T{0.0}) ? -i1_out : i1_out; + output_x_grad_[idx] = static_cast(i1_data * mp_out_grad); + } else { + auto coeff_pair_B = ChebyshevCoefficientsI1e_B(); + auto B = std::get<0>(coeff_pair_B); + auto len = std::get<1>(coeff_pair_B); + MT y = (MT{32.0} / x) - MT{2.0}; + + const MT i1_out = (std::exp(x) * Chbevl(y, B, len)) / std::sqrt(x); + const MT i1_data = (mp_x < MT{0.0}) ? -i1_out : i1_out; + output_x_grad_[idx] = static_cast(i1_data * mp_out_grad); + } + } + + private: + const T* inp_x_; + const T* inp_out_grad_; + T* output_x_grad_; + int64_t numel_; +}; + +template +struct I0eGradFunctor { + I0eGradFunctor( + const T* x, const T* out, const T* out_grad, T* x_grad, int64_t numel) + : inp_x_(x), + inp_out_(out), + inp_out_grad_(out_grad), + output_x_grad_(x_grad), + numel_(numel) {} + + HOSTDEVICE void operator()(int64_t idx) const { + T x = std::abs(inp_x_[idx]); + if (x <= T{8.0}) { + auto coeff_pair_A = ChebyshevCoefficientsI1e_A(); + auto A = std::get<0>(coeff_pair_A); + auto len = std::get<1>(coeff_pair_A); + T y = (x / T{2.0}) - T{2.0}; + + const T out = Chbevl(y, A, len) * x; + const T i1e_out = (inp_x_[idx] < T{0.0}) ? -out : out; + output_x_grad_[idx] = + (i1e_out - std::copysign(T{1.0}, inp_x_[idx]) * inp_out_[idx]) * + inp_out_grad_[idx]; + } else { + auto coeff_pair_B = ChebyshevCoefficientsI1e_B(); + auto B = std::get<0>(coeff_pair_B); + auto len = std::get<1>(coeff_pair_B); + T y = (T{32.0} / x) - T{2.0}; + + const T out = Chbevl(y, B, len) / std::sqrt(x); + const T i1e_out = (inp_x_[idx] < T{0.0}) ? -out : out; + output_x_grad_[idx] = + (i1e_out - std::copysign(T{1.0}, inp_x_[idx]) * inp_out_[idx]) * + inp_out_grad_[idx]; + } + } + + private: + const T* inp_x_; + const T* inp_out_; + const T* inp_out_grad_; + T* output_x_grad_; + int64_t numel_; +}; + +} // namespace phi diff --git a/paddle/phi/kernels/impl/bessel_kernel_cuda_impl.h b/paddle/phi/kernels/impl/bessel_kernel_cuda_impl.h new file mode 100644 index 00000000000..32649c90190 --- /dev/null +++ b/paddle/phi/kernels/impl/bessel_kernel_cuda_impl.h @@ -0,0 +1,231 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +__host__ __device__ std::tuple ChebyshevCoefficientsI0e_A() { + /* Chebyshev coefficients for I0e(x) in the interval [0,8]. */ + static const T coeff[] = { + -4.41534164647933937950E-18, 3.33079451882223809783E-17, + -2.43127984654795469359E-16, 1.71539128555513303061E-15, + -1.16853328779934516808E-14, 7.67618549860493561688E-14, + -4.85644678311192946090E-13, 2.95505266312963983461E-12, + -1.72682629144155570723E-11, 9.67580903537323691224E-11, + -5.18979560163526290666E-10, 2.65982372468238665035E-9, + -1.30002500998624804212E-8, 6.04699502254191894932E-8, + -2.67079385394061173391E-7, 1.11738753912010371815E-6, + -4.41673835845875056359E-6, 1.64484480707288970893E-5, + -5.75419501008210370398E-5, 1.88502885095841655729E-4, + -5.76375574538582365885E-4, 1.63947561694133579842E-3, + -4.32430999505057594430E-3, 1.05464603945949983183E-2, + -2.37374148058994688156E-2, 4.93052842396707084878E-2, + -9.49010970480476444210E-2, 1.71620901522208775349E-1, + -3.04682672343198398683E-1, 6.76795274409476084995E-1}; + return std::make_tuple(coeff, 30); +} + +template +__host__ __device__ std::tuple ChebyshevCoefficientsI0e_B() { + /* Chebyshev coefficients for I0e(x) in the inverted interval [8,infinity]. */ + static const T coeff[] = { + -7.23318048787475395456E-18, -4.83050448594418207126E-18, + 4.46562142029675999901E-17, 3.46122286769746109310E-17, + -2.82762398051658348494E-16, -3.42548561967721913462E-16, + 1.77256013305652638360E-15, 3.81168066935262242075E-15, + -9.55484669882830764870E-15, -4.15056934728722208663E-14, + 1.54008621752140982691E-14, 3.85277838274214270114E-13, + 7.18012445138366623367E-13, -1.79417853150680611778E-12, + -1.32158118404477131188E-11, -3.14991652796324136454E-11, + 1.18891471078464383424E-11, 4.94060238822496958910E-10, + 3.39623202570838634515E-9, 2.26666899049817806459E-8, + 2.04891858946906374183E-7, 2.89137052083475648297E-6, + 6.88975834691682398426E-5, 3.36911647825569408990E-3, + 8.04490411014108831608E-1}; + + return std::make_tuple(coeff, 25); +} + +template +__host__ __device__ T Chbevl(T x, const T array[], size_t len) { + T b0, b1, b2; + + b0 = array[0]; + b1 = static_cast(0.0); + + for (size_t i = 1; i < len; ++i) { + b2 = b1; + b1 = b0; + b0 = x * b1 - b2 + array[i]; + } + + return (static_cast(0.5) * (b0 - b2)); +} + +template +struct CudaI0Functor { + __device__ __forceinline__ T operator()(const T _x) const { + using MT = typename phi::dtype::MPTypeTrait::Type; + const MT mp_x = static_cast(_x); + MT x = std::abs(mp_x); + if (x <= MT{8.0}) { + auto coeff_pair_A = ChebyshevCoefficientsI0e_A(); + auto A = std::get<0>(coeff_pair_A); + auto len = std::get<1>(coeff_pair_A); + MT y = (x / MT{2.0}) - MT{2.0}; + + return static_cast(std::exp(x) * Chbevl(y, A, len)); + } + auto coeff_pair_B = ChebyshevCoefficientsI0e_B(); + auto B = std::get<0>(coeff_pair_B); + auto len = std::get<1>(coeff_pair_B); + MT y = (MT{32.0} / x) - MT{2.0}; + + return static_cast(std::exp(x) * Chbevl(y, B, len) / std::sqrt(x)); + } +}; + +template +struct CudaI0eFunctor { + __device__ __forceinline__ T operator()(const T _x) const { + using MT = typename phi::dtype::MPTypeTrait::Type; + const MT mp_x = static_cast(_x); + MT x = std::abs(mp_x); + if (x <= MT{8.0}) { + auto coeff_pair_A = ChebyshevCoefficientsI0e_A(); + auto A = std::get<0>(coeff_pair_A); + auto len = std::get<1>(coeff_pair_A); + MT y = (x / MT{2.0}) - MT{2.0}; + + return static_cast(Chbevl(y, A, len)); + } + auto coeff_pair_B = ChebyshevCoefficientsI0e_B(); + auto B = std::get<0>(coeff_pair_B); + auto len = std::get<1>(coeff_pair_B); + MT y = (MT{32.0} / x) - MT{2.0}; + + return static_cast(Chbevl(y, B, len) / std::sqrt(x)); + } +}; + +template +__host__ __device__ typename std::enable_if::value, + std::tuple>::type +ChebyshevCoefficientsI1e_A() { + /* Chebyshev coefficients for exp(-x) I1(x) + * in the interval [0,8]. + * + * lim(x->0){ exp(-x) I1(x) / x } = 1/2. + */ + static const T coeff[] = { + 2.77791411276104639959E-18, -2.11142121435816608115E-17, + 1.55363195773620046921E-16, -1.10559694773538630805E-15, + 7.60068429473540693410E-15, -5.04218550472791168711E-14, + 3.22379336594557470981E-13, -1.98397439776494371520E-12, + 1.17361862988909016308E-11, -6.66348972350202774223E-11, + 3.62559028155211703701E-10, -1.88724975172282928790E-9, + 9.38153738649577178388E-9, -4.44505912879632808065E-8, + 2.00329475355213526229E-7, -8.56872026469545474066E-7, + 3.47025130813767847674E-6, -1.32731636560394358279E-5, + 4.78156510755005422638E-5, -1.61760815825896745588E-4, + 5.12285956168575772895E-4, -1.51357245063125314899E-3, + 4.15642294431288815669E-3, -1.05640848946261981558E-2, + 2.47264490306265168283E-2, -5.29459812080949914269E-2, + 1.02643658689847095384E-1, -1.76416518357834055153E-1, + 2.52587186443633654823E-1}; + return std::make_tuple(coeff, 29); +} + +template +__host__ __device__ typename std::enable_if::value, + std::tuple>::type +ChebyshevCoefficientsI1e_A() { + /* Chebyshev coefficients for exp(-x) I1(x) + * in the interval [0,8]. + * + * lim(x->0){ exp(-x) I1(x) / x } = 1/2. + */ + static const T coeff[] = {9.38153738649577178388E-9f, + -4.44505912879632808065E-8f, + 2.00329475355213526229E-7f, + -8.56872026469545474066E-7f, + 3.47025130813767847674E-6f, + -1.32731636560394358279E-5f, + 4.78156510755005422638E-5f, + -1.61760815825896745588E-4f, + 5.12285956168575772895E-4f, + -1.51357245063125314899E-3f, + 4.15642294431288815669E-3f, + -1.05640848946261981558E-2f, + 2.47264490306265168283E-2f, + -5.29459812080949914269E-2f, + 1.02643658689847095384E-1f, + -1.76416518357834055153E-1f, + 2.52587186443633654823E-1f}; + return std::make_tuple(coeff, 17); +} + +template +__host__ __device__ typename std::enable_if::value, + std::tuple>::type +ChebyshevCoefficientsI1e_B() { + /* Chebyshev coefficients for exp(-x) sqrt(x) I1(x) + * in the inverted interval [8,infinity]. + * + * lim(x->inf){ exp(-x) sqrt(x) I1(x) } = 1/sqrt(2pi). + */ + static const T coeff[] = { + 7.51729631084210481353E-18, 4.41434832307170791151E-18, + -4.65030536848935832153E-17, -3.20952592199342395980E-17, + 2.96262899764595013876E-16, 3.30820231092092828324E-16, + -1.88035477551078244854E-15, -3.81440307243700780478E-15, + 1.04202769841288027642E-14, 4.27244001671195135429E-14, + -2.10154184277266431302E-14, -4.08355111109219731823E-13, + -7.19855177624590851209E-13, 2.03562854414708950722E-12, + 1.41258074366137813316E-11, 3.25260358301548823856E-11, + -1.89749581235054123450E-11, -5.58974346219658380687E-10, + -3.83538038596423702205E-9, -2.63146884688951950684E-8, + -2.51223623787020892529E-7, -3.88256480887769039346E-6, + -1.10588938762623716291E-4, -9.76109749136146840777E-3, + 7.78576235018280120474E-1}; + + return std::make_tuple(coeff, 25); +} + +template +__host__ __device__ typename std::enable_if::value, + std::tuple>::type +ChebyshevCoefficientsI1e_B() { + /* Chebyshev coefficients for exp(-x) sqrt(x) I1(x) + * in the inverted interval [8,infinity]. + * + * lim(x->inf){ exp(-x) sqrt(x) I1(x) } = 1/sqrt(2pi). + */ + static const T coeff[] = {-3.83538038596423702205E-9f, + -2.63146884688951950684E-8f, + -2.51223623787020892529E-7f, + -3.88256480887769039346E-6f, + -1.10588938762623716291E-4f, + -9.76109749136146840777E-3f, + 7.78576235018280120474E-1f}; + + return std::make_tuple(coeff, 7); +} + +} // namespace phi diff --git a/paddle/phi/kernels/impl/bessel_kernel_impl.h b/paddle/phi/kernels/impl/bessel_kernel_impl.h new file mode 100644 index 00000000000..1df09075393 --- /dev/null +++ b/paddle/phi/kernels/impl/bessel_kernel_impl.h @@ -0,0 +1,254 @@ +/* Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include "paddle/phi/backends/all_context.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template +static inline std::tuple ChebyshevCoefficientsI0e_A() { + /* Chebyshev coefficients for exp(-x) I0(x) + * in the interval [0,8]. + * + * lim(x->0) { exp(-x) I0(x) } = 1. + */ + static const T coeff[] = { + -4.41534164647933937950E-18, 3.33079451882223809783E-17, + -2.43127984654795469359E-16, 1.71539128555513303061E-15, + -1.16853328779934516808E-14, 7.67618549860493561688E-14, + -4.85644678311192946090E-13, 2.95505266312963983461E-12, + -1.72682629144155570723E-11, 9.67580903537323691224E-11, + -5.18979560163526290666E-10, 2.65982372468238665035E-9, + -1.30002500998624804212E-8, 6.04699502254191894932E-8, + -2.67079385394061173391E-7, 1.11738753912010371815E-6, + -4.41673835845875056359E-6, 1.64484480707288970893E-5, + -5.75419501008210370398E-5, 1.88502885095841655729E-4, + -5.76375574538582365885E-4, 1.63947561694133579842E-3, + -4.32430999505057594430E-3, 1.05464603945949983183E-2, + -2.37374148058994688156E-2, 4.93052842396707084878E-2, + -9.49010970480476444210E-2, 1.71620901522208775349E-1, + -3.04682672343198398683E-1, 6.76795274409476084995E-1}; + return std::make_tuple(coeff, 30); +} + +template +static inline std::tuple ChebyshevCoefficientsI0e_B() { + /* Chebyshev coefficients for exp(-x) sqrt(x) I0(x) + * in the inverted interval [8,infinity]. + * + * lim(x->inf){ exp(-x) sqrt(x) I0(x) } = 1/sqrt(2pi). + */ + static const T coeff[] = { + -7.23318048787475395456E-18, -4.83050448594418207126E-18, + 4.46562142029675999901E-17, 3.46122286769746109310E-17, + -2.82762398051658348494E-16, -3.42548561967721913462E-16, + 1.77256013305652638360E-15, 3.81168066935262242075E-15, + -9.55484669882830764870E-15, -4.15056934728722208663E-14, + 1.54008621752140982691E-14, 3.85277838274214270114E-13, + 7.18012445138366623367E-13, -1.79417853150680611778E-12, + -1.32158118404477131188E-11, -3.14991652796324136454E-11, + 1.18891471078464383424E-11, 4.94060238822496958910E-10, + 3.39623202570838634515E-9, 2.26666899049817806459E-8, + 2.04891858946906374183E-7, 2.89137052083475648297E-6, + 6.88975834691682398426E-5, 3.36911647825569408990E-3, + 8.04490411014108831608E-1}; + + return std::make_tuple(coeff, 25); +} + +template +static inline T Chbevl(T x, const T array[], size_t len) { + T b0, b1, b2; + + b0 = array[0]; + b1 = static_cast(0.0); + + for (size_t i = 1; i < len; ++i) { + b2 = b1; + b1 = b0; + b0 = x * b1 - b2 + array[i]; + } + + return (static_cast(0.5) * (b0 - b2)); +} + +template +struct I0eFunctor { + I0eFunctor(const T* input, T* output, int64_t numel) + : input_(input), output_(output), numel_(numel) {} + + HOSTDEVICE void operator()(int64_t idx) const { + T x = std::abs(input_[idx]); + if (x <= T{8.0}) { + auto coeff_pair_A = ChebyshevCoefficientsI0e_A(); + auto A = std::get<0>(coeff_pair_A); + auto len = std::get<1>(coeff_pair_A); + T y = (x / T{2.0}) - T{2.0}; + + output_[idx] = static_cast(Chbevl(y, A, len)); + } else { + auto coeff_pair_B = ChebyshevCoefficientsI0e_B(); + auto B = std::get<0>(coeff_pair_B); + auto len = std::get<1>(coeff_pair_B); + T y = (T{32.0} / x) - T{2.0}; + + output_[idx] = static_cast(Chbevl(y, B, len) / std::sqrt(x)); + } + } + + private: + const T* input_; + T* output_; + int64_t numel_; +}; + +template +struct I0Functor { + I0Functor(const T* input, T* output, int64_t numel) + : input_(input), output_(output), numel_(numel) {} + + HOSTDEVICE void operator()(int64_t idx) const { + T x = std::abs(input_[idx]); + if (x <= T{8.0}) { + auto coeff_pair_A = ChebyshevCoefficientsI0e_A(); + auto A = std::get<0>(coeff_pair_A); + auto len = std::get<1>(coeff_pair_A); + T y = (x / T{2.0}) - T{2.0}; + + output_[idx] = static_cast(std::exp(x) * Chbevl(y, A, len)); + } else { + auto coeff_pair_B = ChebyshevCoefficientsI0e_B(); + auto B = std::get<0>(coeff_pair_B); + auto len = std::get<1>(coeff_pair_B); + T y = (T{32.0} / x) - T{2.0}; + + output_[idx] = + static_cast(std::exp(x) * Chbevl(y, B, len) / std::sqrt(x)); + } + } + + private: + const T* input_; + T* output_; + int64_t numel_; +}; + +template +static inline typename std::enable_if::value, + std::tuple>::type +ChebyshevCoefficientsI1e_A() { + /* Chebyshev coefficients for exp(-x) I1(x) + * in the interval [0,8]. + * + * lim(x->0){ exp(-x) I1(x) / x } = 1/2. + */ + static const T coeff[] = { + 2.77791411276104639959E-18, -2.11142121435816608115E-17, + 1.55363195773620046921E-16, -1.10559694773538630805E-15, + 7.60068429473540693410E-15, -5.04218550472791168711E-14, + 3.22379336594557470981E-13, -1.98397439776494371520E-12, + 1.17361862988909016308E-11, -6.66348972350202774223E-11, + 3.62559028155211703701E-10, -1.88724975172282928790E-9, + 9.38153738649577178388E-9, -4.44505912879632808065E-8, + 2.00329475355213526229E-7, -8.56872026469545474066E-7, + 3.47025130813767847674E-6, -1.32731636560394358279E-5, + 4.78156510755005422638E-5, -1.61760815825896745588E-4, + 5.12285956168575772895E-4, -1.51357245063125314899E-3, + 4.15642294431288815669E-3, -1.05640848946261981558E-2, + 2.47264490306265168283E-2, -5.29459812080949914269E-2, + 1.02643658689847095384E-1, -1.76416518357834055153E-1, + 2.52587186443633654823E-1}; + return std::make_tuple(coeff, 29); +} + +template +static inline typename std::enable_if::value, + std::tuple>::type +ChebyshevCoefficientsI1e_A() { + /* Chebyshev coefficients for exp(-x) I1(x) + * in the interval [0,8]. + * + * lim(x->0){ exp(-x) I1(x) / x } = 1/2. + */ + static const T coeff[] = {9.38153738649577178388E-9f, + -4.44505912879632808065E-8f, + 2.00329475355213526229E-7f, + -8.56872026469545474066E-7f, + 3.47025130813767847674E-6f, + -1.32731636560394358279E-5f, + 4.78156510755005422638E-5f, + -1.61760815825896745588E-4f, + 5.12285956168575772895E-4f, + -1.51357245063125314899E-3f, + 4.15642294431288815669E-3f, + -1.05640848946261981558E-2f, + 2.47264490306265168283E-2f, + -5.29459812080949914269E-2f, + 1.02643658689847095384E-1f, + -1.76416518357834055153E-1f, + 2.52587186443633654823E-1f}; + return std::make_tuple(coeff, 17); +} + +template +static inline typename std::enable_if::value, + std::tuple>::type +ChebyshevCoefficientsI1e_B() { + /* Chebyshev coefficients for exp(-x) sqrt(x) I1(x) + * in the inverted interval [8,infinity]. + * + * lim(x->inf){ exp(-x) sqrt(x) I1(x) } = 1/sqrt(2pi). + */ + static const T coeff[] = { + 7.51729631084210481353E-18, 4.41434832307170791151E-18, + -4.65030536848935832153E-17, -3.20952592199342395980E-17, + 2.96262899764595013876E-16, 3.30820231092092828324E-16, + -1.88035477551078244854E-15, -3.81440307243700780478E-15, + 1.04202769841288027642E-14, 4.27244001671195135429E-14, + -2.10154184277266431302E-14, -4.08355111109219731823E-13, + -7.19855177624590851209E-13, 2.03562854414708950722E-12, + 1.41258074366137813316E-11, 3.25260358301548823856E-11, + -1.89749581235054123450E-11, -5.58974346219658380687E-10, + -3.83538038596423702205E-9, -2.63146884688951950684E-8, + -2.51223623787020892529E-7, -3.88256480887769039346E-6, + -1.10588938762623716291E-4, -9.76109749136146840777E-3, + 7.78576235018280120474E-1}; + + return std::make_tuple(coeff, 25); +} + +template +static inline typename std::enable_if::value, + std::tuple>::type +ChebyshevCoefficientsI1e_B() { + /* Chebyshev coefficients for exp(-x) sqrt(x) I1(x) + * in the inverted interval [8,infinity]. + * + * lim(x->inf){ exp(-x) sqrt(x) I1(x) } = 1/sqrt(2pi). + */ + static const T coeff[] = {-3.83538038596423702205E-9f, + -2.63146884688951950684E-8f, + -2.51223623787020892529E-7f, + -3.88256480887769039346E-6f, + -1.10588938762623716291E-4f, + -9.76109749136146840777E-3f, + 7.78576235018280120474E-1f}; + + return std::make_tuple(coeff, 7); +} + +} // namespace phi diff --git a/python/paddle/__init__.py b/python/paddle/__init__.py index 8804cfff6de..8626e5e9f37 100644 --- a/python/paddle/__init__.py +++ b/python/paddle/__init__.py @@ -304,6 +304,8 @@ from .tensor.math import trapezoid # noqa: F401 from .tensor.math import cumulative_trapezoid # noqa: F401 from .tensor.math import vander # noqa: F401 from .tensor.math import nextafter # noqa: F401 +from .tensor.math import i0 # noqa: F401 +from .tensor.math import i0e # noqa: F401 from .tensor.random import bernoulli # noqa: F401 from .tensor.random import poisson # noqa: F401 @@ -698,4 +700,6 @@ __all__ = [ # noqa 'vander', 'unflatten', 'nextafter', + 'i0', + 'i0e', ] diff --git a/python/paddle/fluid/tests/unittests/test_i0_op.py b/python/paddle/fluid/tests/unittests/test_i0_op.py new file mode 100644 index 00000000000..a30f67312c8 --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_i0_op.py @@ -0,0 +1,144 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from eager_op_test import OpTest +from scipy import special + +import paddle +from paddle.fluid import core + +np.random.seed(100) +paddle.seed(100) + + +def output_i0(x): + return special.i0(x) + + +def ref_i0_grad(x, dout): + gradx = special.i1(x) + return dout * gradx + + +class TestI0API(unittest.TestCase): + DTYPE = "float64" + DATA = [0, 1, 2, 3, 4, 5] + + def setUp(self): + self.x = np.array(self.DATA).astype(self.DTYPE) + self.place = [paddle.CPUPlace()] + if core.is_compiled_with_cuda(): + self.place.append(paddle.CUDAPlace(0)) + + def test_api_static(self): + def run(place): + paddle.enable_static() + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.static.data( + name="x", shape=self.x.shape, dtype=self.DTYPE + ) + out = paddle.i0(x) + exe = paddle.static.Executor(place) + res = exe.run( + paddle.static.default_main_program(), + feed={"x": self.x}, + fetch_list=[out], + ) + out_ref = output_i0(self.x) + np.testing.assert_allclose(res[0], out_ref, rtol=1e-5) + paddle.disable_static() + + for place in self.place: + run(place) + + def test_api_dygraph(self): + def run(place): + paddle.disable_static(place) + x = paddle.to_tensor(self.x) + out = paddle.i0(x) + + out_ref = output_i0(self.x) + np.testing.assert_allclose(out.numpy(), out_ref, rtol=1e-5) + paddle.enable_static() + + for place in self.place: + run(place) + + def test_empty_input_error(self): + for place in self.place: + paddle.disable_static(place) + x = None + self.assertRaises(ValueError, paddle.i0, x) + paddle.enable_static() + + +class TestI0Float32Zero2EightCase(TestI0API): + DTYPE = "float32" + DATA = [0, 1, 2, 3, 4, 5, 6, 7, 8] + + +class TestI0Float32OverEightCase(TestI0API): + DTYPE = "float32" + DATA = [9, 10, 11, 12] + + +class TestI0Float64Zero2EightCase(TestI0API): + DTYPE = "float64" + DATA = [0, 1, 2, 3, 4, 5, 6, 7, 8] + + +class TestI0Float64OverEightCase(TestI0API): + DTYPE = "float64" + DATA = [9, 10, 11, 12] + + +class TestI0Op(OpTest): + def setUp(self) -> None: + self.op_type = "i0" + self.python_api = paddle.i0 + self.init_config() + self.outputs = {"out": self.target} + + def init_config(self): + self.dtype = np.float64 + zero_case = np.zeros(1).astype(self.dtype) + rand_case = np.random.randn(100).astype(self.dtype) + one2eight_case = np.random.uniform(low=1, high=8, size=100).astype( + self.dtype + ) + over_eight_case = np.random.uniform(low=9, high=15, size=100).astype( + self.dtype + ) + self.case = np.concatenate( + [zero_case, rand_case, one2eight_case, over_eight_case] + ) + self.inputs = {'x': self.case} + self.target = output_i0(self.inputs['x']) + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad( + ['x'], + 'out', + user_defined_grads=[ref_i0_grad(self.case, 1 / self.case.size)], + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/fluid/tests/unittests/test_i0e_op.py b/python/paddle/fluid/tests/unittests/test_i0e_op.py new file mode 100644 index 00000000000..0543d53effe --- /dev/null +++ b/python/paddle/fluid/tests/unittests/test_i0e_op.py @@ -0,0 +1,148 @@ +# Copyright (c) 2023 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest + +import numpy as np +from eager_op_test import OpTest +from scipy import special + +import paddle +from paddle.fluid import core + +np.random.seed(100) +paddle.seed(100) + + +def output_i0e(x): + return special.i0e(x) + + +def ref_i0e_grad(x, dout): + eps = np.finfo(x.dtype).eps + not_tiny = abs(x) > eps + safe_x = np.where(not_tiny, x, eps) + gradx = special.i1e(x) - np.sign(x) * output_i0e(safe_x) + gradx = np.where(not_tiny, gradx, -1.0) + return dout * gradx + + +class TestI0eAPI(unittest.TestCase): + DTYPE = "float64" + DATA = [0, 1, 2, 3, 4, 5] + + def setUp(self): + self.x = np.array(self.DATA).astype(self.DTYPE) + self.place = [paddle.CPUPlace()] + if core.is_compiled_with_cuda(): + self.place.append(paddle.CUDAPlace(0)) + + def test_api_static(self): + def run(place): + paddle.enable_static() + with paddle.static.program_guard(paddle.static.Program()): + x = paddle.static.data( + name="x", shape=self.x.shape, dtype=self.DTYPE + ) + y = paddle.i0e(x) + exe = paddle.static.Executor(place) + res = exe.run( + paddle.static.default_main_program(), + feed={"x": self.x}, + fetch_list=[y], + ) + out_ref = output_i0e(self.x) + np.testing.assert_allclose(out_ref, res[0], rtol=1e-5) + paddle.disable_static() + + for place in self.place: + run(place) + + def test_api_dygraph(self): + def run(place): + paddle.disable_static(place) + x = paddle.to_tensor(self.x) + out = paddle.i0e(x) + + out_ref = output_i0e(self.x) + np.testing.assert_allclose(out_ref, out.numpy(), rtol=1e-5) + paddle.enable_static() + + for place in self.place: + run(place) + + def test_empty_input_error(self): + for place in self.place: + paddle.disable_static(place) + x = None + self.assertRaises(ValueError, paddle.i0e, x) + paddle.enable_static() + + +class TestI0eFloat32Zero2EightCase(TestI0eAPI): + DTYPE = "float32" + DATA = [0, 1, 2, 3, 4, 5, 6, 7, 8] + + +class TestI0eFloat32OverEightCase(TestI0eAPI): + DTYPE = "float32" + DATA = [9, 10, 11, 12] + + +class TestI0eFloat64Zero2EightCase(TestI0eAPI): + DTYPE = "float64" + DATA = [0, 1, 2, 3, 4, 5, 6, 7, 8] + + +class TestI0eFloat64OverEightCase(TestI0eAPI): + DTYPE = "float64" + DATA = [9, 10, 11, 12] + + +class TestI0eOp(OpTest): + def setUp(self) -> None: + self.op_type = "i0e" + self.python_api = paddle.i0e + self.init_config() + self.outputs = {"out": self.target} + + def init_config(self): + self.dtype = np.float64 + zero_case = np.zeros(1).astype(self.dtype) + rand_case = np.random.randn(100).astype(self.dtype) + one2eight_case = np.random.uniform(low=1, high=8, size=100).astype( + self.dtype + ) + over_eight_case = np.random.uniform(low=9, high=15, size=100).astype( + self.dtype + ) + self.case = np.concatenate( + [zero_case, rand_case, one2eight_case, over_eight_case] + ) + self.inputs = {'x': self.case} + self.target = output_i0e(self.inputs['x']) + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad( + ['x'], + 'out', + user_defined_grads=[ref_i0e_grad(self.case, 1 / self.case.size)], + ) + + +if __name__ == "__main__": + unittest.main() diff --git a/python/paddle/tensor/__init__.py b/python/paddle/tensor/__init__.py index a399fad5a8f..1e26e4ce56d 100644 --- a/python/paddle/tensor/__init__.py +++ b/python/paddle/tensor/__init__.py @@ -256,6 +256,8 @@ from .math import sigmoid # noqa: F401 from .math import sigmoid_ # noqa: F401 from .math import vander # noqa: F401 from .math import nextafter # noqa: F401 +from .math import i0 # noqa: F401 +from .math import i0e # noqa: F401 from .random import multinomial # noqa: F401 from .random import standard_normal # noqa: F401 @@ -550,6 +552,8 @@ tensor_method_func = [ # noqa 'vander', 'nextafter', 'unflatten', + 'i0', + 'i0e', ] # this list used in math_op_patch.py for magic_method bind diff --git a/python/paddle/tensor/math.py b/python/paddle/tensor/math.py index 40163ddf5d9..cbe0474960d 100644 --- a/python/paddle/tensor/math.py +++ b/python/paddle/tensor/math.py @@ -5584,3 +5584,76 @@ def nextafter(x, y, name=None): outputs = {"out": out} helper.append_op(type=op_type, inputs=inputs, outputs=outputs) return out + + +def i0(x, name=None): + r""" + The function used to calculate modified bessel function of order 0. + + Equation: + .. math:: + + I_0(x) = \sum^{\infty}_{k=0}\frac{(x^2/4)^k}{(k!)^2} + + Args: + x (Tensor): The input tensor, it's data type should be float32, float64. + name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. + + Returns: + - out (Tensor), A Tensor. the value of the modified bessel function of order 0 at x. + + Examples: + .. code-block:: python + + import paddle + + x = paddle.to_tensor([0, 1, 2, 3, 4], dtype="float32") + print(paddle.i0(x)) + # (Tensor(shape=[5], dtype=float32, place=Place(cpu), stop_gradient=True, [0.99999994 , 1.26606596 , 2.27958512 , 4.88079262 , 11.30192089]), + """ + if in_dygraph_mode(): + return _C_ops.i0(x) + else: + check_variable_and_dtype(x, "x", ["float32", "float64"], "i0") + + helper = LayerHelper("i0", **locals()) + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op(type='i0', inputs={'x': x}, outputs={'out': out}) + return out + + +def i0e(x, name=None): + r""" + The function used to calculate exponentially scaled modified Bessel function of order 0. + + Equation: + .. math:: + + I_0(x) = \sum^{\infty}_{k=0}\frac{(x^2/4)^k}{(k!)^2} \\ + I_{0e}(x) = e^{-|x|}I_0(x) + + Args: + x (Tensor): The input tensor, it's data type should be float32, float64. + name (str, optional): For details, please refer to :ref:`api_guide_Name`. Generally, no setting is required. Default: None. + + Returns: + - out (Tensor), A Tensor. the value of the exponentially scaled modified Bessel function of order 0 at x. + + Examples: + .. code-block:: python + + import paddle + + x = paddle.to_tensor([0, 1, 2, 3, 4], dtype="float32") + print(paddle.i0e(x)) + # (Tensor(shape=[5], dtype=float32, place=Place(cpu), stop_gradient=True, [1., 0.46575961, 0.30850832, 0.24300035, 0.20700192]), + """ + if in_dygraph_mode(): + return _C_ops.i0e(x) + else: + check_variable_and_dtype(x, "x", ["float32", "float64"], "i0e") + + helper = LayerHelper("i0e", **locals()) + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op(type='i0e', inputs={'x': x}, outputs={'out': out}) + return out -- GitLab