diff --git a/paddle/fluid/operators/controlflow/logical_op.cu b/paddle/fluid/operators/controlflow/logical_op.cu index 4a3fc6c895174c088fc98a017515c58101cd4d70..07399cb2a684cad2f25aa084c7e7c1d1554b36a6 100644 --- a/paddle/fluid/operators/controlflow/logical_op.cu +++ b/paddle/fluid/operators/controlflow/logical_op.cu @@ -1,8 +1,11 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -12,9 +15,6 @@ limitations under the License. */ #include "paddle/fluid/operators/controlflow/logical_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op_broadcast.cu.h" -namespace ops = paddle::operators; -namespace plat = paddle::platform; - namespace paddle { namespace operators { @@ -22,9 +22,10 @@ template class BinaryLogicalOpKernel : public framework::OpKernel { public: - using InT = typename Functor::ELEMENT_TYPE; - using OutT = bool; void Compute(const framework::ExecutionContext& ctx) const override { + using InT = typename Functor::ELEMENT_TYPE; + using OutT = bool; + auto functor = Functor(); std::vector ins; std::vector outs; @@ -45,6 +46,9 @@ class BinaryLogicalOpKernel } // namespace operators } // namespace paddle +namespace ops = paddle::operators; +namespace plat = paddle::platform; + #define REGISTER_LOGICAL_CUDA_KERNEL(op_name, func) \ REGISTER_OP_CUDA_KERNEL( \ op_name, \ diff --git a/paddle/fluid/operators/controlflow/logical_op.h b/paddle/fluid/operators/controlflow/logical_op.h index ee63da60fcd0fea223414d10d74f84f52e9e9e45..15cd643a858cc018e3007fa90ec479900cd243be 100644 --- a/paddle/fluid/operators/controlflow/logical_op.h +++ b/paddle/fluid/operators/controlflow/logical_op.h @@ -1,8 +1,11 @@ /* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. + Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 + Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. diff --git a/paddle/fluid/operators/elementwise/elementwise_functor.h b/paddle/fluid/operators/elementwise/elementwise_functor.h index e80dfba325937796d5539022850356bd1addd3ca..8a6cadc2413dc7c35455e6f118e3c034073e32f2 100644 --- a/paddle/fluid/operators/elementwise/elementwise_functor.h +++ b/paddle/fluid/operators/elementwise/elementwise_functor.h @@ -22,6 +22,7 @@ namespace paddle { namespace operators { // Define the binary functors used in elementwise ops. +// Note: InverseXxxFunctor is needed when calling ElementwiseComputeEx on CPU. // Add template diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.cc b/paddle/fluid/operators/elementwise/elementwise_mod_op.cc index bb116c9c65ac0cbd2d799d126c93ba7fc6c3c320..28cd57618ee3baddae73d93f9657881e53784c8f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op.cc @@ -66,8 +66,8 @@ REGISTER_OP_CPU_KERNEL( elementwise_mod, ops::ElementwiseModKernel, ops::ElementwiseModKernel, - ops::ElementwiseModFPKernel, - ops::ElementwiseModFPKernel); + ops::ElementwiseModKernel, + ops::ElementwiseModKernel); REGISTER_OP_VERSION(elementwise_mod) .AddCheckpoint( diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.cu b/paddle/fluid/operators/elementwise/elementwise_mod_op.cu index 4ef957c617870e67e880afcff022fbef73dc8e5b..f01953a9acc37250d5734f02149f9be93e1301d8 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.cu +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op.cu @@ -14,9 +14,6 @@ limitations under the License. */ #include "paddle/fluid/operators/elementwise/elementwise_mod_op.h" -namespace ops = paddle::operators; -namespace plat = paddle::platform; - namespace paddle { namespace operators { @@ -38,6 +35,9 @@ class ElementwiseModKernel } // namespace operators } // namespace paddle +namespace ops = paddle::operators; +namespace plat = paddle::platform; + REGISTER_OP_CUDA_KERNEL( elementwise_mod, ops::ElementwiseModKernel, ops::ElementwiseModKernel, diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.h b/paddle/fluid/operators/elementwise/elementwise_mod_op.h index bfb12e44b6b94cfcdbc0b2eceb03c73733ff7774..2c001ceda4f1934ed16396ce0c450294e89c4070 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.h +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op.h @@ -44,9 +44,9 @@ struct ModFunctor +template struct InverseModFunctor { - inline HOSTDEVICE T operator()(T a, T b) const { + inline HOSTDEVICE T operator()(const T a, const T b) const { T res = b % a; if ((res != 0) && ((res < 0) != (a < 0))) res += a; return res; @@ -54,8 +54,9 @@ struct InverseModFunctor { }; template -struct InverseModFunctorFP { - inline HOSTDEVICE T operator()(T a, T b) const { +struct InverseModFunctor< + T, typename std::enable_if_t::value>> { + inline HOSTDEVICE T operator()(const T a, const T b) const { T res = fmod(b, a); if ((res != 0) && ((a < 0) != (res < 0))) res += a; return res; @@ -78,22 +79,6 @@ void elementwise_mod(const framework::ExecutionContext &ctx, } } -template -void elementwise_mod_fp(const framework::ExecutionContext &ctx, - const framework::Tensor *x, const framework::Tensor *y, - framework::Tensor *z) { - int axis = ctx.Attr("axis"); - auto x_dims = x->dims(); - auto y_dims = y->dims(); - if (x_dims.size() >= y_dims.size()) { - ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, - ModFunctor(), z); - } else { - ElementwiseComputeEx, DeviceContext, T>( - ctx, x, y, axis, InverseModFunctorFP(), z); - } -} - template class ElementwiseModKernel : public framework::OpKernel { public: @@ -109,20 +94,5 @@ class ElementwiseModKernel : public framework::OpKernel { } }; -template -class ElementwiseModFPKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto *y = ctx.Input("Y"); - auto *z = ctx.Output("Out"); - - z->mutable_data(ctx.GetPlace()); - - // dtype of x and y is float or double - elementwise_mod_fp(ctx, x, y, z); - } -}; - } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_op_function.h b/paddle/fluid/operators/elementwise/elementwise_op_function.h index 7cd04318d3f49ce07d8d34175c43562d1d602180..20544e6872a9b130690a8f8debe04cb4cb657f5b 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_function.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_function.h @@ -199,10 +199,6 @@ void ElementwiseComputeEx(const framework::ExecutionContext &ctx, const framework::Tensor *x, const framework::Tensor *y, int axis, Functor func, framework::Tensor *z) { - z->mutable_data(ctx.GetPlace()); - auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); - auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); - auto pt_z = paddle::experimental::MakePtenDenseTensor(*z); if (platform::is_gpu_place(ctx.GetPlace())) { #if defined(__NVCC__) || defined(__HIPCC__) std::vector ins = {x, y}; @@ -217,6 +213,11 @@ void ElementwiseComputeEx(const framework::ExecutionContext &ctx, return; } + z->mutable_data(ctx.GetPlace()); + auto pt_x = paddle::experimental::MakePtenDenseTensor(*x); + auto pt_y = paddle::experimental::MakePtenDenseTensor(*y); + auto pt_z = paddle::experimental::MakePtenDenseTensor(*z); + const auto &dev_ctx = ctx.template device_context(); pten::ElementwiseCompute( diff --git a/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h b/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h index 8c9c6a617df17b8c93565fd616d283f8d89530d0..c09d2363bb59331d1742dd4cb59df0a08ddff27f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h +++ b/paddle/fluid/operators/elementwise/elementwise_op_impl.cu.h @@ -16,9 +16,6 @@ limitations under the License. */ #include "paddle/fluid/framework/pten_utils.h" #include "paddle/fluid/framework/tensor.h" -#include "paddle/fluid/operators/kernel_primitives/kernel_primitives.h" -#include "paddle/fluid/platform/aligned_vector.h" -#include "paddle/fluid/platform/function_traits.h" // only can include the headers in paddle/top/api dirs #include "paddle/pten/api/lib/utils/tensor_utils.h" @@ -27,8 +24,6 @@ limitations under the License. */ namespace paddle { namespace operators { -namespace kps = paddle::operators::kernel_primitives; - using ElementwiseType = pten::ElementwiseType; template struct PowFunctor { - inline HOSTDEVICE T operator()(T a, T b) const { + inline HOSTDEVICE T operator()(const T a, const T b) const { // TODO(wujionghao): A potential speed improvement is supporting different // types in C++. #if defined(__CUDA_ARCH__) || defined(__HIPCC__) diff --git a/paddle/fluid/operators/svd_helper.h b/paddle/fluid/operators/svd_helper.h index 8a3622a6b1b5ef974dc3ec3fe1a04246bc3bd52f..48315980e3134aaa3939452bd3c003984ecf567a 100644 --- a/paddle/fluid/operators/svd_helper.h +++ b/paddle/fluid/operators/svd_helper.h @@ -26,7 +26,6 @@ #include "paddle/fluid/operators/elementwise/elementwise_op_function.h" #include "paddle/fluid/operators/math/blas.h" #include "paddle/fluid/operators/math/complex_functors.h" -#include "paddle/fluid/operators/math/functors.h" #include "paddle/fluid/operators/math/math_function.h" #include "paddle/fluid/platform/device_context.h" #include "paddle/fluid/platform/for_range.h" diff --git a/paddle/pten/kernels/funcs/elementwise_functor.h b/paddle/pten/kernels/funcs/elementwise_functor.h index 7f33150739e1c50383c33d84445cff6eaa450983..a61d0de8feee0473dfee4ea379b8e848d7d1d781 100644 --- a/paddle/pten/kernels/funcs/elementwise_functor.h +++ b/paddle/pten/kernels/funcs/elementwise_functor.h @@ -22,6 +22,7 @@ namespace pten { namespace funcs { // Define the binary functors used in elementwise ops. +// Note: InverseXxxFunctor is needed when calling ElementwiseComputeEx on CPU. // Add template @@ -48,10 +49,22 @@ template struct MultiplyFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { return a * b; } }; +template <> +struct MultiplyFunctor { + inline HOSTDEVICE bool operator()(const bool a, const bool b) const { + return a && b; + } +}; template struct InverseMultiplyFunctor { inline HOSTDEVICE T operator()(const T a, const T b) const { return b * a; } }; +template <> +struct InverseMultiplyFunctor { + inline HOSTDEVICE bool operator()(const bool a, const bool b) const { + return b && a; + } +}; // Divide #define DIV_ERROR_INFO \