diff --git a/paddle/fluid/framework/new_executor/standalone_executor_test.cc b/paddle/fluid/framework/new_executor/standalone_executor_test.cc index 4231c75748167a196a9b929bc3ef65548b191b4a..e7f18bdc88a110583f499a987fdda223d9d0663f 100644 --- a/paddle/fluid/framework/new_executor/standalone_executor_test.cc +++ b/paddle/fluid/framework/new_executor/standalone_executor_test.cc @@ -54,7 +54,7 @@ USE_OP(sum); USE_OP_ITSELF(slice_grad); USE_OP_ITSELF(lookup_table_grad); USE_OP(sqrt); -USE_OP(elementwise_max); +USE_OP_ITSELF(elementwise_max); USE_OP_ITSELF(elementwise_div); USE_OP_ITSELF(sgd); USE_OP(squared_l2_norm); diff --git a/paddle/fluid/operators/elementwise/elementwise_functor.h b/paddle/fluid/operators/elementwise/elementwise_functor.h index 54931d99292f9d1453e2a3deb72e75ed63c9f46f..5dfb7eece9ec77b3f10196654e976e335af890e9 100644 --- a/paddle/fluid/operators/elementwise/elementwise_functor.h +++ b/paddle/fluid/operators/elementwise/elementwise_functor.h @@ -70,75 +70,29 @@ struct InverseFloorDivFunctor { // Maximum template -struct MaxFunctor { - inline HOSTDEVICE T operator()(const T a, const T b) const { - return a > b ? a : b; - } -}; +using MaxFunctor = phi::funcs::MaximumFunctor; // Minmum template -struct MinFunctor { - inline HOSTDEVICE T operator()(const T a, const T b) const { - return a < b ? a : b; - } -}; +using MinFunctor = phi::funcs::MinimumFunctor; template using Complex = paddle::platform::complex; +// Ternary compare template -struct MinGradXFunctor { - inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const { - return dout * static_cast(x < y); - } -}; +using MaxGradXFunctor = phi::funcs::MaxGradXFunctor; template -struct MinGradYFunctor { - inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const { - return dout * static_cast(x >= y); - } -}; - +using MaxGradYFunctor = phi::funcs::MaxGradYFunctor; template -struct MinGradXYFunctor { - inline HOSTDEVICE phi::Array operator()(const InT x, const InT y, - const InT dout) { - phi::Array outs; - // dx = dout * (x < y) - outs[0] = static_cast(dout * static_cast(x < y)); - // dy = dout * (x >= y) - outs[1] = static_cast(dout * static_cast(x >= y)); - return outs; - } -}; +using MaxGradXYFunctor = phi::funcs::MaxGradXYFunctor; -// Ternary compare template -struct MaxGradXFunctor { - inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const { - return dout * static_cast(x > y); - } -}; +using MinGradXFunctor = phi::funcs::MinGradXFunctor; template -struct MaxGradYFunctor { - inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const { - return dout * static_cast(x <= y); - } -}; - +using MinGradYFunctor = phi::funcs::MinGradYFunctor; template -struct MaxGradXYFunctor { - inline HOSTDEVICE phi::Array operator()(const InT x, const InT y, - const InT dout) { - phi::Array outs; - // dx = dout * (x > y) - outs[0] = static_cast(dout * static_cast(x > y)); - // dy = dout * (x <= y) - outs[1] = static_cast(dout * static_cast(x <= y)); - return outs; - } -}; +using MinGradXYFunctor = phi::funcs::MinGradXYFunctor; } // namespace operators } // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op.cc b/paddle/fluid/operators/elementwise/elementwise_max_op.cc index d91315cc511aa80c0e9c44ccc688b2746eac764e..f11c22f24555a4bae1d275766f964d4cb5388817 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_max_op.cc @@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/elementwise/elementwise_max_op.h" - #include #include "paddle/fluid/operators/elementwise/elementwise_op.h" @@ -119,23 +117,6 @@ REGISTER_OPERATOR(elementwise_max, ops::ElementwiseOp, REGISTER_OPERATOR(elementwise_max_grad, ops::ElementwiseOpGrad); -REGISTER_OP_CPU_KERNEL( - elementwise_max, - ops::ElementwiseMaxKernel, - ops::ElementwiseMaxKernel, - ops::ElementwiseMaxKernel, - ops::ElementwiseMaxKernel, - ops::ElementwiseMaxKernel); -REGISTER_OP_CPU_KERNEL( - elementwise_max_grad, - ops::ElementwiseMaxGradKernel, - ops::ElementwiseMaxGradKernel, - ops::ElementwiseMaxGradKernel, - ops::ElementwiseMaxGradKernel, - ops::ElementwiseMaxGradKernel); - REGISTER_OP_VERSION(elementwise_max) .AddCheckpoint( R"ROC(Register elementwise_max for adding the attribute of Scale_y)ROC", diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op.cu b/paddle/fluid/operators/elementwise/elementwise_max_op.cu deleted file mode 100644 index 0d5f56fda17322d86ef13990e9fc2432816dc9cb..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/elementwise/elementwise_max_op.cu +++ /dev/null @@ -1,88 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/elementwise/elementwise_max_op.h" - -namespace paddle { -namespace operators { - -template -class ElementwiseMaxKernel - : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - std::vector ins; - std::vector outs; - const auto& dev_ctx = - ctx.template device_context(); - - int axis = PackTensorsIntoVector(ctx, &ins, &outs); - paddle::operators::LaunchElementwiseCudaKernel(dev_ctx, ins, &outs, axis, - MaxFunctor()); - } -}; - -template -typename std::enable_if< - std::is_same::value>::type -ElementwiseMaxGrad(const framework::ExecutionContext& ctx, - const framework::Tensor* x, const framework::Tensor* y, - const framework::Tensor* out, const framework::Tensor* dout, - framework::Tensor* dx, framework::Tensor* dy) { - int axis = ctx.Attr("axis"); - const auto& dev_ctx = - ctx.template device_context(); - const auto place = ctx.GetPlace(); - if (dx != nullptr && dy != nullptr) { - std::vector ins = {x, y, dout}; - GetGradXAndYOut( - dev_ctx, place, axis, ins, dout, dx, dy, MaxGradXYFunctor()); - } else if (dx != nullptr && dy == nullptr) { - std::vector ins = {x, y, dout}; - GetGradXOrYOut( - dev_ctx, place, axis, ins, dout, dx, MaxGradXFunctor()); - } else if (dx == nullptr && dy != nullptr) { - std::vector ins = {x, y, dout}; - GetGradXOrYOut( - dev_ctx, place, axis, ins, dout, dy, MaxGradYFunctor()); - } -} - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_CUDA_KERNEL( - elementwise_max, - ops::ElementwiseMaxKernel, - ops::ElementwiseMaxKernel, - ops::ElementwiseMaxKernel, - ops::ElementwiseMaxKernel, - ops::ElementwiseMaxKernel, - ops::ElementwiseMaxKernel); -REGISTER_OP_CUDA_KERNEL( - elementwise_max_grad, - ops::ElementwiseMaxGradKernel, - ops::ElementwiseMaxGradKernel, - ops::ElementwiseMaxGradKernel, - ops::ElementwiseMaxGradKernel, - ops::ElementwiseMaxGradKernel, - ops::ElementwiseMaxGradKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op.h b/paddle/fluid/operators/elementwise/elementwise_max_op.h deleted file mode 100644 index afe1073d89a06618af95490ac6d264073bd930d4..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/elementwise/elementwise_max_op.h +++ /dev/null @@ -1,93 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "paddle/fluid/operators/elementwise/elementwise_op.h" - -namespace paddle { -namespace operators { - -template -class ElementwiseMaxKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* z = ctx.Output("Out"); - - z->mutable_data(ctx.GetPlace()); - int axis = ctx.Attr("axis"); - ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, - MaxFunctor(), z); - } -}; - -template -struct MaxGradDx { - HOSTDEVICE T operator()(T x, T y, T out, T dout) const { - return dout * static_cast(x > y); - } -}; - -template -struct MaxGradDy { - HOSTDEVICE T operator()(T x, T y, T out, T dout) const { - return dout * static_cast(x <= y); - } -}; - -template -typename std::enable_if< - std::is_same::value>::type -ElementwiseMaxGrad(const framework::ExecutionContext& ctx, - const framework::Tensor* x, const framework::Tensor* y, - const framework::Tensor* out, const framework::Tensor* dout, - framework::Tensor* dx, framework::Tensor* dy) { - int axis = ctx.Attr("axis"); - ElemwiseGradCompute, MaxGradDy>( - ctx, *x, *y, *out, *dout, axis, dx, dy, MaxGradDx(), MaxGradDy()); -} - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -template -typename std::enable_if< - std::is_same::value>::type -ElementwiseMaxGrad(const framework::ExecutionContext& ctx, - const framework::Tensor* x, const framework::Tensor* y, - const framework::Tensor* out, const framework::Tensor* dout, - framework::Tensor* dx, framework::Tensor* dy); -#endif - -template -class ElementwiseMaxGradKernel : public ElemwiseGradKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - ElemwiseGradKernel::Compute(ctx); - using Tensor = framework::Tensor; - - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* out = dout; // out is not necessary - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); - - ElementwiseMaxGrad(ctx, x, y, out, dout, dx, dy); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc index 87a0264c3ce7b219bb437c8187aaa19a3c30150f..176d1328192eaf2c3c582c5d68a0acde3d07b770 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_max_op_npu.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/elementwise/elementwise_max_op.h" #include "paddle/fluid/operators/elementwise/elementwise_npu.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_max_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_max_op_xpu.cc index 9fb3c9d493dc04a3dda6bd4a25c0d759e0b44c20..f8cc6d2b985be30ee9b2a2e98680b55c888ff08f 100644 --- a/paddle/fluid/operators/elementwise/elementwise_max_op_xpu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_max_op_xpu.cc @@ -14,7 +14,6 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/elementwise/elementwise_max_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_xpu.h" namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.cc b/paddle/fluid/operators/elementwise/elementwise_min_op.cc index dad80a2c33f3abfde457a6d750f89e47374fae13..5f552460efd42c9a95c57ad576890ce798dbd0a0 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_min_op.cc @@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/elementwise/elementwise_min_op.h" - #include #include "paddle/fluid/operators/elementwise/elementwise_op.h" @@ -119,19 +117,6 @@ REGISTER_OPERATOR(elementwise_min, ops::ElementwiseOp, REGISTER_OPERATOR(elementwise_min_grad, ops::ElementwiseOpGrad); -REGISTER_OP_CPU_KERNEL( - elementwise_min, - ops::ElementwiseMinKernel, - ops::ElementwiseMinKernel, - ops::ElementwiseMinKernel, - ops::ElementwiseMinKernel); -REGISTER_OP_CPU_KERNEL( - elementwise_min_grad, - ops::ElementwiseMinGradKernel, - ops::ElementwiseMinGradKernel, - ops::ElementwiseMinGradKernel, - ops::ElementwiseMinGradKernel); - REGISTER_OP_VERSION(elementwise_min) .AddCheckpoint( R"ROC(Register elementwise_min for adding the attribute of Scale_y)ROC", diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.cu b/paddle/fluid/operators/elementwise/elementwise_min_op.cu deleted file mode 100644 index fb8bc9ac7f83c8dd99e40685acc68eec4c77b3ce..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.cu +++ /dev/null @@ -1,84 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/elementwise/elementwise_min_op.h" - -namespace paddle { -namespace operators { - -template -class ElementwiseMinKernel - : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - std::vector ins; - std::vector outs; - const auto& dev_ctx = - ctx.template device_context(); - - int axis = PackTensorsIntoVector(ctx, &ins, &outs); - paddle::operators::LaunchElementwiseCudaKernel(dev_ctx, ins, &outs, axis, - MinFunctor()); - } -}; - -template -typename std::enable_if< - std::is_same::value>::type -ElementwiseMinGrad(const framework::ExecutionContext& ctx, - const framework::Tensor* x, const framework::Tensor* y, - const framework::Tensor* out, const framework::Tensor* dout, - framework::Tensor* dx, framework::Tensor* dy) { - int axis = ctx.Attr("axis"); - const auto& dev_ctx = - ctx.template device_context(); - const auto place = ctx.GetPlace(); - if (dx != nullptr && dy != nullptr) { - std::vector ins = {x, y, dout}; - GetGradXAndYOut( - dev_ctx, place, axis, ins, dout, dx, dy, MinGradXYFunctor()); - } else if (dx != nullptr && dy == nullptr) { - std::vector ins = {x, y, dout}; - GetGradXOrYOut( - dev_ctx, place, axis, ins, dout, dx, MinGradXFunctor()); - } else if (dx == nullptr && dy != nullptr) { - std::vector ins = {x, y, dout}; - GetGradXOrYOut( - dev_ctx, place, axis, ins, dout, dy, MinGradYFunctor()); - } -} - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_CUDA_KERNEL( - elementwise_min, - ops::ElementwiseMinKernel, - ops::ElementwiseMinKernel, - ops::ElementwiseMinKernel, - ops::ElementwiseMinKernel, - ops::ElementwiseMinKernel); -REGISTER_OP_CUDA_KERNEL( - elementwise_min_grad, - ops::ElementwiseMinGradKernel, - ops::ElementwiseMinGradKernel, - ops::ElementwiseMinGradKernel, - ops::ElementwiseMinGradKernel, - ops::ElementwiseMinGradKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op.h b/paddle/fluid/operators/elementwise/elementwise_min_op.h deleted file mode 100644 index 283ad2adde978680d4d0c3a579d55e588368a28e..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/elementwise/elementwise_min_op.h +++ /dev/null @@ -1,113 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include -#include "paddle/fluid/operators/elementwise/elementwise_op.h" - -namespace paddle { -namespace operators { - -template -class ElementwiseMinKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* z = ctx.Output("Out"); - - z->mutable_data(ctx.GetPlace()); - int axis = ctx.Attr("axis"); - ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, - MinFunctor(), z); - } -}; - -template -struct MinGradDx { - HOSTDEVICE T operator()(T x, T y, T out, T dout) const { - return dout * (x < y); - } -}; - -template -struct MinGradDy { - HOSTDEVICE T operator()(T x, T y, T out, T dout) const { - return dout * (x >= y); - } -}; - -#ifdef PADDLE_CUDA_FP16 -template <> -struct MinGradDx { - HOSTDEVICE platform::float16 operator()(platform::float16 x, - platform::float16 y, - platform::float16 out, - platform::float16 dout) const { - return x < y ? dout : static_cast(0); - } -}; - -template <> -struct MinGradDy { - HOSTDEVICE platform::float16 operator()(platform::float16 x, - platform::float16 y, - platform::float16 out, - platform::float16 dout) const { - return x >= y ? dout : static_cast(0); - } -}; -#endif - -template -typename std::enable_if< - std::is_same::value>::type -ElementwiseMinGrad(const framework::ExecutionContext& ctx, - const framework::Tensor* x, const framework::Tensor* y, - const framework::Tensor* out, const framework::Tensor* dout, - framework::Tensor* dx, framework::Tensor* dy) { - int axis = ctx.Attr("axis"); - ElemwiseGradCompute, MinGradDy>( - ctx, *x, *y, *out, *dout, axis, dx, dy, MinGradDx(), MinGradDy()); -} - -#if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) -template -typename std::enable_if< - std::is_same::value>::type -ElementwiseMinGrad(const framework::ExecutionContext& ctx, - const framework::Tensor* x, const framework::Tensor* y, - const framework::Tensor* out, const framework::Tensor* dout, - framework::Tensor* dx, framework::Tensor* dy); -#endif - -template -class ElementwiseMinGradKernel : public ElemwiseGradKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - ElemwiseGradKernel::Compute(ctx); - using Tensor = framework::Tensor; - - auto* x = ctx.Input("X"); - auto* y = ctx.Input("Y"); - auto* dout = ctx.Input(framework::GradVarName("Out")); - auto* dx = ctx.Output(framework::GradVarName("X")); - auto* dy = ctx.Output(framework::GradVarName("Y")); - auto* out = dout; // Fake out, not used - ElementwiseMinGrad(ctx, x, y, out, dout, dx, dy); - } -}; -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc index d790a8f6969b5a1734f1ba8012ef916b0dde57ed..fa94f85ec6a31243ce2b2dc2a1497e98d263dbd2 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_min_op_npu.cc @@ -16,7 +16,6 @@ limitations under the License. */ #include #include "paddle/fluid/framework/tensor_util.h" -#include "paddle/fluid/operators/elementwise/elementwise_min_op.h" #include "paddle/fluid/operators/elementwise/elementwise_npu.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/fluid/operators/elementwise/elementwise_min_op_xpu.cc b/paddle/fluid/operators/elementwise/elementwise_min_op_xpu.cc index 5d4ff69b06dffcf8e1391cb0bddfe4f01124c421..b501ff48c73d2c593c655727c4b34923b332c202 100644 --- a/paddle/fluid/operators/elementwise/elementwise_min_op_xpu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_min_op_xpu.cc @@ -14,7 +14,6 @@ limitations under the License. */ #ifdef PADDLE_WITH_XPU -#include "paddle/fluid/operators/elementwise/elementwise_max_op.h" #include "paddle/fluid/operators/elementwise/elementwise_op.h" #include "paddle/fluid/operators/elementwise/elementwise_xpu.h" namespace paddle { diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.cc b/paddle/fluid/operators/elementwise/elementwise_mod_op.cc index 28cd57618ee3baddae73d93f9657881e53784c8f..e693925748807ffed1c3a7cf7207ce81b8646ef8 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op.cc @@ -12,8 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/elementwise/elementwise_mod_op.h" - #include #include "paddle/fluid/operators/elementwise/elementwise_op.h" @@ -62,13 +60,6 @@ namespace ops = paddle::operators; REGISTER_OP_WITHOUT_GRADIENT(elementwise_mod, ops::ElementwiseOp, ops::ElementwiseModOpMaker); -REGISTER_OP_CPU_KERNEL( - elementwise_mod, - ops::ElementwiseModKernel, - ops::ElementwiseModKernel, - ops::ElementwiseModKernel, - ops::ElementwiseModKernel); - REGISTER_OP_VERSION(elementwise_mod) .AddCheckpoint( R"ROC(Register elementwise_mod for adding the attribute of Scale_y)ROC", diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.cu b/paddle/fluid/operators/elementwise/elementwise_mod_op.cu deleted file mode 100644 index 6ecff73257cdf7785b4ac45fd9ac0de581418e02..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.cu +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/elementwise/elementwise_mod_op.h" - -namespace paddle { -namespace operators { - -template -class ElementwiseModKernel - : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - std::vector ins; - std::vector outs; - const auto& cuda_ctx = - ctx.template device_context(); - int axis = PackTensorsIntoVector(ctx, &ins, &outs); - paddle::operators::LaunchElementwiseCudaKernel(cuda_ctx, ins, &outs, - axis, ModFunctor()); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; - -REGISTER_OP_CUDA_KERNEL( - elementwise_mod, ops::ElementwiseModKernel, - ops::ElementwiseModKernel, - ops::ElementwiseModKernel, - ops::ElementwiseModKernel); diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op.h b/paddle/fluid/operators/elementwise/elementwise_mod_op.h deleted file mode 100644 index 2c001ceda4f1934ed16396ce0c450294e89c4070..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op.h +++ /dev/null @@ -1,98 +0,0 @@ -/* Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once - -#include "paddle/fluid/operators/elementwise/elementwise_op.h" - -namespace paddle { -namespace operators { - -template -struct ModFunctor { - inline HOSTDEVICE T operator()(const T a, const T b) const { - T res = a % b; - - // Accoding to #PR26732: in dividen % divsor - // remainder shall have the same sign as divsor. - if ((res != 0) && ((b ^ res) < 0)) res += b; - return res; - } -}; - -template -struct ModFunctor::value>> { - inline HOSTDEVICE T operator()(const T a, const T b) const { - T res = fmod(a, b); - - // Accoding to #PR26732: in dividen % divsor - // remainder shall have the same sign as divsor. - if ((res != 0) && ((res < 0) != (b < 0))) res += b; - return res; - } -}; - -template -struct InverseModFunctor { - inline HOSTDEVICE T operator()(const T a, const T b) const { - T res = b % a; - if ((res != 0) && ((res < 0) != (a < 0))) res += a; - return res; - } -}; - -template -struct InverseModFunctor< - T, typename std::enable_if_t::value>> { - inline HOSTDEVICE T operator()(const T a, const T b) const { - T res = fmod(b, a); - if ((res != 0) && ((a < 0) != (res < 0))) res += a; - return res; - } -}; - -template -void elementwise_mod(const framework::ExecutionContext &ctx, - const framework::Tensor *x, const framework::Tensor *y, - framework::Tensor *z) { - int axis = ctx.Attr("axis"); - auto x_dims = x->dims(); - auto y_dims = y->dims(); - if (x_dims.size() >= y_dims.size()) { - ElementwiseComputeEx, DeviceContext, T>(ctx, x, y, axis, - ModFunctor(), z); - } else { - ElementwiseComputeEx, DeviceContext, T>( - ctx, x, y, axis, InverseModFunctor(), z); - } -} - -template -class ElementwiseModKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext &ctx) const override { - auto *x = ctx.Input("X"); - auto *y = ctx.Input("Y"); - auto *z = ctx.Output("Out"); - - z->mutable_data(ctx.GetPlace()); - - // dtype of x and y is int64 or int32 - elementwise_mod(ctx, x, y, z); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc b/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc index 900bd5d6c29268f3a5af95cd606698416a5787b2..049b0710145ec627990fb75481cf1667bff84134 100644 --- a/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc +++ b/paddle/fluid/operators/elementwise/elementwise_mod_op_npu.cc @@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/elementwise/elementwise_mod_op.h" #include "paddle/fluid/operators/elementwise/elementwise_npu.h" #include "paddle/fluid/platform/device/npu/npu_op_runner.h" diff --git a/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc b/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc index d5b78909e9287ee0c6cf93164a19b49733a2d76d..b617064987507339743ab145ef94bdf15bdbdca2 100644 --- a/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_grad_kernel.cc @@ -135,6 +135,32 @@ void MultiplyGradKernel(const Context& dev_ctx, dev_ctx, x, y, *out, dout, axis, dx, dy, MulGradDX(), MulGradDY()); } +template +void MaximumGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + funcs::ElementwiseGradPreProcess(dout, dx); + phi::funcs::ElemwiseGradCompute, MaxGradDy>( + dev_ctx, x, y, dout, dout, axis, dx, dy, MaxGradDx(), MaxGradDy()); +} + +template +void MinimumGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + funcs::ElementwiseGradPreProcess(dout, dx); + phi::funcs::ElemwiseGradCompute, MinGradDy>( + dev_ctx, x, y, dout, dout, axis, dx, dy, MinGradDx(), MinGradDy()); +} + } // namespace phi PD_REGISTER_KERNEL(add_grad, @@ -259,6 +285,7 @@ PD_REGISTER_KERNEL(multiply_triple_grad, phi::dtype::bfloat16, phi::dtype::complex, phi::dtype::complex) {} + PD_REGISTER_KERNEL(fmax_grad, CPU, ALL_LAYOUT, @@ -276,3 +303,23 @@ PD_REGISTER_KERNEL(fmin_grad, double, int, int64_t) {} + +PD_REGISTER_KERNEL(maximum_grad, + CPU, + ALL_LAYOUT, + phi::MaximumGradKernel, + float, + double, + int, + int64_t, + phi::dtype::bfloat16) {} + +PD_REGISTER_KERNEL(minimum_grad, + CPU, + ALL_LAYOUT, + phi::MinimumGradKernel, + float, + double, + int, + int64_t, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/cpu/elementwise_kernel.cc b/paddle/phi/kernels/cpu/elementwise_kernel.cc index 004f40ddedadf5e2609868478c7b0d4169b73a63..1de40cb9466b295e5656ae231cb754990e004195 100644 --- a/paddle/phi/kernels/cpu/elementwise_kernel.cc +++ b/paddle/phi/kernels/cpu/elementwise_kernel.cc @@ -70,6 +70,49 @@ void DivideRawKernel(const Context& dev_ctx, } } +template +void MaximumRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out) { + // allocate memory for out + dev_ctx.template Alloc(out); + funcs::ElementwiseCompute, T>( + dev_ctx, x, y, axis, funcs::MaximumFunctor(), out); +} + +template +void MinimumRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out) { + // allocate memory for out + dev_ctx.template Alloc(out); + funcs::ElementwiseCompute, T>( + dev_ctx, x, y, axis, funcs::MinimumFunctor(), out); +} + +template +void ModuloRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out) { + // allocate memory for out + dev_ctx.template Alloc(out); + auto x_dims = x.dims(); + auto y_dims = y.dims(); + if (x_dims.size() >= y_dims.size()) { + funcs::ElementwiseCompute, T>( + dev_ctx, x, y, axis, funcs::ModuloFunctor(), out); + } else { + funcs::ElementwiseCompute, T>( + dev_ctx, x, y, axis, funcs::InverseModuloFunctor(), out); + } +} + // Create the definition of Add DEFINE_CPU_ELEMENTWISE_OP(Add) @@ -138,3 +181,29 @@ PD_REGISTER_KERNEL(multiply_raw, complex64, complex128, phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(maximum_raw, + CPU, + ALL_LAYOUT, + phi::MaximumRawKernel, + float, + double, + int, + int64_t, + phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(minimum_raw, + CPU, + ALL_LAYOUT, + phi::MinimumRawKernel, + float, + double, + int, + int64_t, + phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(modulo_raw, + CPU, + ALL_LAYOUT, + phi::ModuloRawKernel, + float, + double, + int, + int64_t) {} diff --git a/paddle/phi/kernels/elementwise_grad_kernel.h b/paddle/phi/kernels/elementwise_grad_kernel.h index fb2633cc9fcea7c619193ad964ad62247ed654dd..95832013caff5d366f264123262d3c05fb36b35f 100644 --- a/paddle/phi/kernels/elementwise_grad_kernel.h +++ b/paddle/phi/kernels/elementwise_grad_kernel.h @@ -142,4 +142,21 @@ void ElementwiseFMinGradKernel(const Context& dev_ctx, DenseTensor* x_grad, DenseTensor* y_grad); +template +void MaximumGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy); + +template +void MinimumGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy); } // namespace phi diff --git a/paddle/phi/kernels/elementwise_kernel.cc b/paddle/phi/kernels/elementwise_kernel.cc index 9d10a48c9e0795d8914c0c6cfb49b7686575cfac..019d4fed5b28eaed72a370bfb51c1a75807964fd 100644 --- a/paddle/phi/kernels/elementwise_kernel.cc +++ b/paddle/phi/kernels/elementwise_kernel.cc @@ -55,6 +55,32 @@ void MultiplyKernel(const Context& dev_ctx, MultiplyRawKernel(dev_ctx, x, y, axis, out); } +template +void MaximumKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + MaximumRawKernel(dev_ctx, x, y, axis, out); +} + +template +void MinimumKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + MinimumRawKernel(dev_ctx, x, y, axis, out); +} + +template +void ModuloKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out) { + int axis = -1; + ModuloRawKernel(dev_ctx, x, y, axis, out); +} } // namespace phi using complex64 = ::phi::dtype::complex; @@ -105,6 +131,26 @@ PD_REGISTER_KERNEL(multiply, complex64, complex128, phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(maximum, + CPU, + ALL_LAYOUT, + phi::MaximumKernel, + float, + double, + int, + int64_t, + phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(minimum, + CPU, + ALL_LAYOUT, + phi::MinimumKernel, + float, + double, + int, + int64_t, + phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL( + modulo, CPU, ALL_LAYOUT, phi::ModuloKernel, float, double, int, int64_t) {} #if defined(PADDLE_WITH_CUDA) || defined(PADDLE_WITH_HIP) @@ -158,4 +204,26 @@ PD_REGISTER_KERNEL(multiply, phi::dtype::float16, complex64, complex128) {} +PD_REGISTER_KERNEL(maximum, + GPU, + ALL_LAYOUT, + phi::MaximumKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL(minimum, + GPU, + ALL_LAYOUT, + phi::MinimumKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} +PD_REGISTER_KERNEL( + modulo, GPU, ALL_LAYOUT, phi::ModuloKernel, float, double, int, int64_t) {} #endif diff --git a/paddle/phi/kernels/elementwise_kernel.h b/paddle/phi/kernels/elementwise_kernel.h index a6ba7bdac5829f88c153496c908a6e7ac14f91d2..f9c9c7f7139f3ad2298ba0ebcae026746ab450dc 100644 --- a/paddle/phi/kernels/elementwise_kernel.h +++ b/paddle/phi/kernels/elementwise_kernel.h @@ -85,6 +85,45 @@ void MultiplyKernel(const Context& dev_ctx, const DenseTensor& y, DenseTensor* out); +template +void MaximumRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void MaximumKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +void MinimumRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void MinimumKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + +template +void ModuloRawKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + int axis, + DenseTensor* out); + +template +void ModuloKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + DenseTensor* out); + template DenseTensor Add(const Context& dev_ctx, const DenseTensor& x, @@ -129,4 +168,36 @@ DenseTensor Multiply(const Context& dev_ctx, return dense_out; } +template +DenseTensor Maximum(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + MaximumKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +template +DenseTensor Minimum(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + MinimumKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} + +template +DenseTensor Modulo(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y) { + DenseTensor dense_out; + MetaTensor meta_out(&dense_out); + ElementwiseInferMeta(x, y, &meta_out); + ModuloKernel(dev_ctx, x, y, &dense_out); + return dense_out; +} } // namespace phi diff --git a/paddle/phi/kernels/funcs/elementwise_functor.h b/paddle/phi/kernels/funcs/elementwise_functor.h index ac262fe2d571e587e3bdfa6a2d4e58bd5b865e68..1e39cf55035129cc0df92e689cb44a32aeba5562 100644 --- a/paddle/phi/kernels/funcs/elementwise_functor.h +++ b/paddle/phi/kernels/funcs/elementwise_functor.h @@ -422,5 +422,121 @@ struct MultiplyGradXYFunctor, ComplexType> { } }; +// Maximum +template +struct MaximumFunctor { + inline HOSTDEVICE T operator()(const T a, const T b) const { + return a > b ? a : b; + } +}; + +template +struct MaxGradXFunctor { + inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const { + return dout * static_cast(x > y); + } +}; + +template +struct MaxGradYFunctor { + inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const { + return dout * static_cast(x <= y); + } +}; + +template +struct MaxGradXYFunctor { + inline HOSTDEVICE phi::Array operator()(const InT x, + const InT y, + const InT dout) { + phi::Array outs; + // dx = dout * (x > y) + outs[0] = static_cast(dout * static_cast(x > y)); + // dy = dout * (x <= y) + outs[1] = static_cast(dout * static_cast(x <= y)); + return outs; + } +}; + +// Minimum +template +struct MinimumFunctor { + inline HOSTDEVICE T operator()(const T a, const T b) const { + return a < b ? a : b; + } +}; +template +struct MinGradXFunctor { + inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const { + return dout * static_cast(x < y); + } +}; +template +struct MinGradYFunctor { + inline HOSTDEVICE T operator()(const T x, const T y, const T dout) const { + return dout * static_cast(x >= y); + } +}; + +template +struct MinGradXYFunctor { + inline HOSTDEVICE phi::Array operator()(const InT x, + const InT y, + const InT dout) { + phi::Array outs; + // dx = dout * (x < y) + outs[0] = static_cast(dout * static_cast(x < y)); + // dy = dout * (x >= y) + outs[1] = static_cast(dout * static_cast(x >= y)); + return outs; + } +}; + +// Modulo +template +struct ModuloFunctor { + inline HOSTDEVICE T operator()(const T a, const T b) const { + T res = a % b; + + // Accoding to #PR26732: in dividen % divsor + // remainder shall have the same sign as divsor. + if ((res != 0) && ((b ^ res) < 0)) res += b; + return res; + } +}; + +template +struct ModuloFunctor< + T, + typename std::enable_if_t::value>> { + inline HOSTDEVICE T operator()(const T a, const T b) const { + T res = fmod(a, b); + + // Accoding to #PR26732: in dividen % divsor + // remainder shall have the same sign as divsor. + if ((res != 0) && ((res < 0) != (b < 0))) res += b; + return res; + } +}; + +template +struct InverseModuloFunctor { + inline HOSTDEVICE T operator()(const T a, const T b) const { + T res = b % a; + if ((res != 0) && ((res < 0) != (a < 0))) res += a; + return res; + } +}; + +template +struct InverseModuloFunctor< + T, + typename std::enable_if_t::value>> { + inline HOSTDEVICE T operator()(const T a, const T b) const { + T res = fmod(b, a); + if ((res != 0) && ((a < 0) != (res < 0))) res += a; + return res; + } +}; } // namespace funcs } // namespace phi diff --git a/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu b/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu index 3392a3cec4ecad08b0442a54c3c3dbc652ebd0b6..52819fd3dee8c5a0314f4e95e563e3f3db32a136 100644 --- a/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu +++ b/paddle/phi/kernels/gpu/elementwise_grad_kernel.cu @@ -148,6 +148,67 @@ void MultiplyGradKernel(const Context& dev_ctx, ElementwiseMulGrad(dev_ctx, x, y, dout, dx, dy, axis); } +template +void MaximumGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + const auto place = dev_ctx.GetPlace(); + if (dx != nullptr && dy != nullptr) { + std::vector ins = {&x, &y, &dout}; + GetGradXAndYOut( + dev_ctx, + place, + axis, + ins, + dout, + dx, + dy, + funcs::MaxGradXYFunctor()); + } else if (dx != nullptr && dy == nullptr) { + std::vector ins = {&x, &y, &dout}; + GetGradXOrYOut( + dev_ctx, place, axis, ins, dout, dx, funcs::MaxGradXFunctor()); + } else if (dy != nullptr && dx == nullptr) { + std::vector ins = {&x, &y, &dout}; + GetGradXOrYOut( + dev_ctx, place, axis, ins, dout, dy, funcs::MaxGradYFunctor()); + } +} + +template +void MinimumGradKernel(const Context& dev_ctx, + const DenseTensor& x, + const DenseTensor& y, + const DenseTensor& dout, + int axis, + DenseTensor* dx, + DenseTensor* dy) { + const auto place = dev_ctx.GetPlace(); + if (dx != nullptr && dy != nullptr) { + std::vector ins = {&x, &y, &dout}; + GetGradXAndYOut( + dev_ctx, + place, + axis, + ins, + dout, + dx, + dy, + funcs::MinGradXYFunctor()); + } else if (dx != nullptr && dy == nullptr) { + std::vector ins = {&x, &y, &dout}; + GetGradXOrYOut( + dev_ctx, place, axis, ins, dout, dx, funcs::MinGradXFunctor()); + } else if (dy != nullptr && dx == nullptr) { + std::vector ins = {&x, &y, &dout}; + GetGradXOrYOut( + dev_ctx, place, axis, ins, dout, dy, funcs::MinGradYFunctor()); + } +} } // namespace phi PD_REGISTER_KERNEL(add_grad, @@ -299,3 +360,25 @@ PD_REGISTER_KERNEL(fmin_grad, double, int, int64_t) {} + +PD_REGISTER_KERNEL(maximum_grad, + GPU, + ALL_LAYOUT, + phi::MaximumGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} + +PD_REGISTER_KERNEL(minimum_grad, + GPU, + ALL_LAYOUT, + phi::MinimumGradKernel, + float, + double, + int, + int64_t, + phi::dtype::float16, + phi::dtype::bfloat16) {} diff --git a/paddle/phi/kernels/gpu/elementwise_kernel.cu b/paddle/phi/kernels/gpu/elementwise_kernel.cu index 8de55e8a412d36c615ed923984c1a3fadc073d0b..bd6995cb1353c71ad9a0cbda857c25f0beeccf7e 100644 --- a/paddle/phi/kernels/gpu/elementwise_kernel.cu +++ b/paddle/phi/kernels/gpu/elementwise_kernel.cu @@ -49,6 +49,12 @@ DEFINE_CUDA_ELEMENTWISE_OP(Subtract) DEFINE_CUDA_ELEMENTWISE_OP(Multiply) // Create the definition of Divide DEFINE_CUDA_ELEMENTWISE_OP(Divide) +// Create the definition of Maximum +DEFINE_CUDA_ELEMENTWISE_OP(Maximum) +// Create the definition of Minimum +DEFINE_CUDA_ELEMENTWISE_OP(Minimum) +// Create the definition of Modulo +DEFINE_CUDA_ELEMENTWISE_OP(Modulo) } // namespace phi @@ -114,3 +120,31 @@ PD_REGISTER_KERNEL(multiply_raw, complex64, complex128, bfloat16) {} +PD_REGISTER_KERNEL(maximum_raw, + GPU, + ALL_LAYOUT, + phi::MaximumRawKernel, + float, + double, + int, + int64_t, + float16, + bfloat16) {} +PD_REGISTER_KERNEL(minimum_raw, + GPU, + ALL_LAYOUT, + phi::MinimumRawKernel, + float, + double, + int, + int64_t, + float16, + bfloat16) {} +PD_REGISTER_KERNEL(modulo_raw, + GPU, + ALL_LAYOUT, + phi::ModuloRawKernel, + float, + double, + int, + int64_t) {} diff --git a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h index 0b7a5d3bcb26a360eb5f7f664ead7932f428cc64..07e5bf9ae040e10e1f339b0100ba7f8daccd438f 100644 --- a/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h +++ b/paddle/phi/kernels/impl/elementwise_grad_kernel_impl.h @@ -628,4 +628,42 @@ void MultiplyTripleGradKernel(const Context& dev_ctx, } } +/* +****************************** + Maximum Grad +****************************** +*/ + +template +struct MaxGradDx { + HOSTDEVICE T operator()(T x, T y, T out, T dout) const { + return dout * static_cast(x > y); + } +}; + +template +struct MaxGradDy { + HOSTDEVICE T operator()(T x, T y, T out, T dout) const { + return dout * static_cast(x <= y); + } +}; + +/* +****************************** + Minimum Grad +****************************** +*/ +template +struct MinGradDx { + HOSTDEVICE T operator()(T x, T y, T out, T dout) const { + return dout * static_cast(x < y); + } +}; + +template +struct MinGradDy { + HOSTDEVICE T operator()(T x, T y, T out, T dout) const { + return dout * static_cast(x >= y); + } +}; } // namespace phi diff --git a/paddle/phi/ops/compat/elementwise_sig.cc b/paddle/phi/ops/compat/elementwise_sig.cc index bb05689dee1d31e2a81bfa15793ee6de52f63120..7f00af6f9af86f35709c2d120a7a47917d8d8431 100644 --- a/paddle/phi/ops/compat/elementwise_sig.cc +++ b/paddle/phi/ops/compat/elementwise_sig.cc @@ -55,6 +55,33 @@ KernelSignature ElementwiseDivOpArgumentMapping( return KernelSignature("divide_raw", {"X", "Y"}, {"axis"}, {"Out"}); } +KernelSignature ElementwiseMaxOpArgumentMapping( + const ArgumentMappingContext& ctx) { + int axis = paddle::any_cast(ctx.Attr("axis")); + if (axis == -1) { + return KernelSignature("maximum", {"X", "Y"}, {}, {"Out"}); + } + return KernelSignature("maximum_raw", {"X", "Y"}, {"axis"}, {"Out"}); +} + +KernelSignature ElementwiseMinOpArgumentMapping( + const ArgumentMappingContext& ctx) { + int axis = paddle::any_cast(ctx.Attr("axis")); + if (axis == -1) { + return KernelSignature("minimum", {"X", "Y"}, {}, {"Out"}); + } + return KernelSignature("minimum_raw", {"X", "Y"}, {"axis"}, {"Out"}); +} + +KernelSignature ElementwiseModOpArgumentMapping( + const ArgumentMappingContext& ctx) { + int axis = paddle::any_cast(ctx.Attr("axis")); + if (axis == -1) { + return KernelSignature("modulo", {"X", "Y"}, {}, {"Out"}); + } + return KernelSignature("modulo_raw", {"X", "Y"}, {"axis"}, {"Out"}); +} + KernelSignature ElementwiseAddGradOpArgumentMapping( const ArgumentMappingContext& ctx) { return KernelSignature("add_grad", @@ -158,12 +185,30 @@ KernelSignature ElementwiseMulTripleGradOpArgumentMapping( {"D_X", "D_Y", "D_DOut", "D_DDX", "D_DDY"}); } +KernelSignature ElementwiseMaxGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature("maximum_grad", + {"X", "Y", GradVarName("Out")}, + {"axis"}, + {GradVarName("X"), GradVarName("Y")}); +} + +KernelSignature ElementwiseMinGradOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature("minimum_grad", + {"X", "Y", GradVarName("Out")}, + {"axis"}, + {GradVarName("X"), GradVarName("Y")}); +} } // namespace phi PD_REGISTER_BASE_KERNEL_NAME(elementwise_add, add); PD_REGISTER_BASE_KERNEL_NAME(elementwise_sub, subtract); PD_REGISTER_BASE_KERNEL_NAME(elementwise_mul, multiply); PD_REGISTER_BASE_KERNEL_NAME(elementwise_div, divide); +PD_REGISTER_BASE_KERNEL_NAME(elementwise_max, maximum); +PD_REGISTER_BASE_KERNEL_NAME(elementwise_min, minimum); +PD_REGISTER_BASE_KERNEL_NAME(elementwise_mod, modulo); PD_REGISTER_BASE_KERNEL_NAME(elementwise_add_grad, add_grad); PD_REGISTER_BASE_KERNEL_NAME(elementwise_add_grad_grad, add_double_grad); PD_REGISTER_BASE_KERNEL_NAME(elementwise_add_triple_grad, add_triple_grad); @@ -178,6 +223,8 @@ PD_REGISTER_BASE_KERNEL_NAME(elementwise_fmax, fmax); PD_REGISTER_BASE_KERNEL_NAME(elementwise_fmin, fmin); PD_REGISTER_BASE_KERNEL_NAME(elementwise_fmax_grad, fmax_grad); PD_REGISTER_BASE_KERNEL_NAME(elementwise_fmin_grad, fmin_grad); +PD_REGISTER_BASE_KERNEL_NAME(elementwise_max_grad, maximum_grad); +PD_REGISTER_BASE_KERNEL_NAME(elementwise_min_grad, minimum_grad); PD_REGISTER_ARG_MAPPING_FN(elementwise_add, phi::ElementwiseAddOpArgumentMapping); @@ -187,6 +234,12 @@ PD_REGISTER_ARG_MAPPING_FN(elementwise_mul, phi::ElementwiseMulOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(elementwise_div, phi::ElementwiseDivOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(elementwise_max, + phi::ElementwiseMaxOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(elementwise_min, + phi::ElementwiseMinOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(elementwise_mod, + phi::ElementwiseModOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(elementwise_add_grad, phi::ElementwiseAddGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(elementwise_add_grad_grad, @@ -211,8 +264,11 @@ PD_REGISTER_ARG_MAPPING_FN(elementwise_fmax, phi::ElementwiseFMaxOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(elementwise_fmin, phi::ElementwiseFMinOpArgumentMapping); - PD_REGISTER_ARG_MAPPING_FN(elementwise_fmax_grad, phi::ElementwiseFMaxGradOpArgumentMapping); PD_REGISTER_ARG_MAPPING_FN(elementwise_fmin_grad, phi::ElementwiseFMinGradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(elementwise_max_grad, + phi::ElementwiseMaxGradOpArgumentMapping); +PD_REGISTER_ARG_MAPPING_FN(elementwise_min_grad, + phi::ElementwiseMinGradOpArgumentMapping);