未验证 提交 975f99ab 编写于 作者: Y YuanRisheng 提交者: GitHub

[Phi]Move Relu/Cos/Sin/Tan/Acos/Asin/Atan/Sinh/Cosh/Asinh/Acosh/Atanh kernels...

[Phi]Move Relu/Cos/Sin/Tan/Acos/Asin/Atan/Sinh/Cosh/Asinh/Acosh/Atanh kernels in Activation to Phi (#40175)

* move activation op

* adjust code format

* fix compile bugs

* fix ci bugs

* code format adjust

* code format adjust2

* activate ci status

* modify according to comment
上级 f1fe2ad4
...@@ -478,7 +478,7 @@ function(op_library TARGET) ...@@ -478,7 +478,7 @@ function(op_library TARGET)
if (${pybind_flag} EQUAL 0) if (${pybind_flag} EQUAL 0)
# NOTE(*): activation use macro to regist the kernels, set use_op manually. # NOTE(*): activation use macro to regist the kernels, set use_op manually.
if(${TARGET} STREQUAL "activation") if(${TARGET} STREQUAL "activation")
file(APPEND ${pybind_file} "USE_OP(relu);\n") file(APPEND ${pybind_file} "USE_OP_ITSELF(relu);\n")
elseif(${TARGET} STREQUAL "fake_dequantize") elseif(${TARGET} STREQUAL "fake_dequantize")
file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n") file(APPEND ${pybind_file} "USE_OP(fake_dequantize_max_abs);\n")
elseif(${TARGET} STREQUAL "fake_quantize") elseif(${TARGET} STREQUAL "fake_quantize")
......
...@@ -27,7 +27,7 @@ USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); ...@@ -27,7 +27,7 @@ USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
USE_OP(leaky_relu); USE_OP(leaky_relu);
USE_OP_DEVICE_KERNEL(leaky_relu, MKLDNN); USE_OP_DEVICE_KERNEL(leaky_relu, MKLDNN);
USE_OP(gelu); USE_OP(gelu);
USE_OP(relu); USE_OP_ITSELF(relu);
USE_OP(tanh); USE_OP(tanh);
USE_OP_DEVICE_KERNEL(tanh, MKLDNN); USE_OP_DEVICE_KERNEL(tanh, MKLDNN);
......
...@@ -675,7 +675,7 @@ TEST(BuildCinnPassTest, NoNeedBufferInput) { ...@@ -675,7 +675,7 @@ TEST(BuildCinnPassTest, NoNeedBufferInput) {
USE_PASS(build_cinn_pass); USE_PASS(build_cinn_pass);
USE_OP(mul); USE_OP(mul);
USE_OP(relu); USE_OP_ITSELF(relu);
USE_OP_ITSELF(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP(relu_grad); USE_OP_ITSELF(relu_grad);
USE_OP_ITSELF(elementwise_add_grad); USE_OP_ITSELF(elementwise_add_grad);
...@@ -301,5 +301,5 @@ TEST(CinnCompilerTest, Compile) { ...@@ -301,5 +301,5 @@ TEST(CinnCompilerTest, Compile) {
USE_PASS(build_cinn_pass); USE_PASS(build_cinn_pass);
USE_PASS(graph_viz_pass); USE_PASS(graph_viz_pass);
USE_OP(mul); USE_OP(mul);
USE_OP(relu); USE_OP_ITSELF(relu);
USE_OP_ITSELF(elementwise_add); USE_OP_ITSELF(elementwise_add);
...@@ -226,7 +226,7 @@ TEST(test_prepare_op, test_prepare_data_cpu_mkldnn) { ...@@ -226,7 +226,7 @@ TEST(test_prepare_op, test_prepare_data_cpu_mkldnn) {
} // namespace paddle } // namespace paddle
USE_OP_ITSELF(split); USE_OP_ITSELF(split);
USE_OP(relu); USE_OP_ITSELF(relu);
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
USE_OP_DEVICE_KERNEL(relu, MKLDNN); USE_OP_DEVICE_KERNEL(relu, MKLDNN);
#endif #endif
...@@ -52,7 +52,7 @@ TEST(Relu6OpConverter, main) { test_activation("relu6"); } ...@@ -52,7 +52,7 @@ TEST(Relu6OpConverter, main) { test_activation("relu6"); }
} // namespace inference } // namespace inference
} // namespace paddle } // namespace paddle
USE_OP(relu); USE_OP_ITSELF(relu);
USE_OP(sigmoid); USE_OP(sigmoid);
USE_OP(tanh); USE_OP(tanh);
USE_OP(relu6); USE_OP(relu6);
...@@ -132,7 +132,9 @@ struct CudnnReluGradFunctor : public CudnnActivationGradFunctor<T> { ...@@ -132,7 +132,9 @@ struct CudnnReluGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnReluGradFunctor(const CUDADeviceContext& ctx) explicit CudnnReluGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, GPUDNN_ACTIVATION_RELU) {} : CudnnActivationGradFunctor<T>(ctx, 0.0, GPUDNN_ACTIVATION_RELU) {}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
}; };
template <typename T> template <typename T>
...@@ -146,7 +148,9 @@ struct CudnnRelu6GradFunctor : public CudnnActivationGradFunctor<T> { ...@@ -146,7 +148,9 @@ struct CudnnRelu6GradFunctor : public CudnnActivationGradFunctor<T> {
: CudnnActivationGradFunctor<T>(ctx, 6.0, : CudnnActivationGradFunctor<T>(ctx, 6.0,
GPUDNN_ACTIVATION_CLIPPED_RELU) {} GPUDNN_ACTIVATION_CLIPPED_RELU) {}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
}; };
template <typename T> template <typename T>
...@@ -159,7 +163,9 @@ struct CudnnSigmoidGradFunctor : public CudnnActivationGradFunctor<T> { ...@@ -159,7 +163,9 @@ struct CudnnSigmoidGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnSigmoidGradFunctor(const CUDADeviceContext& ctx) explicit CudnnSigmoidGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, GPUDNN_ACTIVATION_SIGMOID) {} : CudnnActivationGradFunctor<T>(ctx, 0.0, GPUDNN_ACTIVATION_SIGMOID) {}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
}; };
template <typename T> template <typename T>
...@@ -172,7 +178,9 @@ struct CudnnTanhGradFunctor : public CudnnActivationGradFunctor<T> { ...@@ -172,7 +178,9 @@ struct CudnnTanhGradFunctor : public CudnnActivationGradFunctor<T> {
explicit CudnnTanhGradFunctor(const CUDADeviceContext& ctx) explicit CudnnTanhGradFunctor(const CUDADeviceContext& ctx)
: CudnnActivationGradFunctor<T>(ctx, 0.0, GPUDNN_ACTIVATION_TANH) {} : CudnnActivationGradFunctor<T>(ctx, 0.0, GPUDNN_ACTIVATION_TANH) {}
static constexpr ActBwdOpFwdDeps FwdDeps() { return kDepOut; } static constexpr ActBwdOpFwdDeps FwdDeps() {
return ActBwdOpFwdDeps::kDepOut;
}
}; };
template <typename Functor> template <typename Functor>
...@@ -197,7 +205,8 @@ class CudnnActivationGradKernel ...@@ -197,7 +205,8 @@ class CudnnActivationGradKernel
public: public:
using T = typename Functor::ELEMENT_TYPE; using T = typename Functor::ELEMENT_TYPE;
void Compute(const framework::ExecutionContext& context) const override { void Compute(const framework::ExecutionContext& context) const override {
static_assert(Functor::FwdDeps() == kDepOut, "Forward deps must be Out."); static_assert(Functor::FwdDeps() == ActBwdOpFwdDeps::kDepOut,
"Forward deps must be Out.");
const framework::Tensor *X, *Out, *dOut; const framework::Tensor *X, *Out, *dOut;
X = Out = dOut = nullptr; X = Out = dOut = nullptr;
......
...@@ -34,7 +34,8 @@ using paddle::framework::Tensor; ...@@ -34,7 +34,8 @@ using paddle::framework::Tensor;
template <typename GradFunctor> template <typename GradFunctor>
static constexpr bool CanInplaceAct() { static constexpr bool CanInplaceAct() {
return GradFunctor::FwdDeps() == kDepOut || GradFunctor::FwdDeps() == kNoDeps; return GradFunctor::FwdDeps() == ActBwdOpFwdDeps::kDepOut ||
GradFunctor::FwdDeps() == ActBwdOpFwdDeps::kNoDeps;
} }
#define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \ #define REGISTER_ACTIVATION_OP_MAKER(OP_NAME, OP_COMMENT) \
...@@ -921,7 +922,8 @@ class ActivationOpDoubleGrad : public framework::OperatorWithKernel { ...@@ -921,7 +922,8 @@ class ActivationOpDoubleGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
if (static_cast<int>(kDepValue) & static_cast<int>(kDepX)) { if (static_cast<int>(kDepValue) &
static_cast<int>(ActBwdOpFwdDeps::kDepX)) {
if (ctx->HasOutput("DX")) { if (ctx->HasOutput("DX")) {
ctx->ShareDim("X", "DX"); ctx->ShareDim("X", "DX");
ctx->ShareLoD("X", "DX"); ctx->ShareLoD("X", "DX");
...@@ -931,7 +933,8 @@ class ActivationOpDoubleGrad : public framework::OperatorWithKernel { ...@@ -931,7 +933,8 @@ class ActivationOpDoubleGrad : public framework::OperatorWithKernel {
ctx->ShareLoD("X", "DDOut"); ctx->ShareLoD("X", "DDOut");
} }
} }
if (static_cast<int>(kDepValue) & static_cast<int>(kDepOut)) { if (static_cast<int>(kDepValue) &
static_cast<int>(ActBwdOpFwdDeps::kDepOut)) {
if (ctx->HasOutput("DOut")) { if (ctx->HasOutput("DOut")) {
ctx->ShareDim("Out", "DOut"); ctx->ShareDim("Out", "DOut");
ctx->ShareLoD("Out", "DOut"); ctx->ShareLoD("Out", "DOut");
...@@ -960,13 +963,15 @@ class ActivationOpDoubleGrad2 : public framework::OperatorWithKernel { ...@@ -960,13 +963,15 @@ class ActivationOpDoubleGrad2 : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
if (static_cast<int>(kDepValue) & static_cast<int>(kDepX)) { if (static_cast<int>(kDepValue) &
static_cast<int>(ActBwdOpFwdDeps::kDepX)) {
if (ctx->HasOutput("DDOut")) { if (ctx->HasOutput("DDOut")) {
ctx->ShareDim("X", "DDOut"); ctx->ShareDim("X", "DDOut");
ctx->ShareLoD("X", "DDOut"); ctx->ShareLoD("X", "DDOut");
} }
} }
if (static_cast<int>(kDepValue) & static_cast<int>(kDepOut)) { if (static_cast<int>(kDepValue) &
static_cast<int>(ActBwdOpFwdDeps::kDepOut)) {
if (ctx->HasOutput("DDOut")) { if (ctx->HasOutput("DDOut")) {
ctx->ShareDim("Out", "DDOut"); ctx->ShareDim("Out", "DDOut");
ctx->ShareLoD("Out", "DDOut"); ctx->ShareLoD("Out", "DDOut");
...@@ -987,7 +992,8 @@ class ActivationOpTripleGrad : public framework::OperatorWithKernel { ...@@ -987,7 +992,8 @@ class ActivationOpTripleGrad : public framework::OperatorWithKernel {
using framework::OperatorWithKernel::OperatorWithKernel; using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override { void InferShape(framework::InferShapeContext* ctx) const override {
if (static_cast<int>(kDepValue) & static_cast<int>(kDepX)) { if (static_cast<int>(kDepValue) &
static_cast<int>(ActBwdOpFwdDeps::kDepX)) {
if (ctx->HasOutput("DX")) { if (ctx->HasOutput("DX")) {
ctx->ShareDim("X", "DX"); ctx->ShareDim("X", "DX");
ctx->ShareLoD("X", "DX"); ctx->ShareLoD("X", "DX");
...@@ -997,7 +1003,8 @@ class ActivationOpTripleGrad : public framework::OperatorWithKernel { ...@@ -997,7 +1003,8 @@ class ActivationOpTripleGrad : public framework::OperatorWithKernel {
ctx->ShareLoD("X", "DDOut"); ctx->ShareLoD("X", "DDOut");
} }
} }
if (static_cast<int>(kDepValue) & static_cast<int>(kDepOut)) { if (static_cast<int>(kDepValue) &
static_cast<int>(ActBwdOpFwdDeps::kDepOut)) {
if (ctx->HasOutput("D_DOut")) { if (ctx->HasOutput("D_DOut")) {
ctx->ShareDim("Out", "D_DOut"); ctx->ShareDim("Out", "D_DOut");
ctx->ShareLoD("Out", "D_DOut"); ctx->ShareLoD("Out", "D_DOut");
...@@ -1464,6 +1471,18 @@ namespace plat = paddle::platform; ...@@ -1464,6 +1471,18 @@ namespace plat = paddle::platform;
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL); FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL);
REGISTER_ACTIVATION_OP(cos, Cos, CosFunctor, CosGradFunctor)
REGISTER_ACTIVATION_OP(tan, Tan, TanFunctor, TanGradFunctor);
REGISTER_ACTIVATION_OP(acos, Acos, AcosFunctor, AcosGradFunctor);
REGISTER_ACTIVATION_OP(sin, Sin, SinFunctor, SinGradFunctor);
REGISTER_ACTIVATION_OP(asin, Asin, AsinFunctor, AsinGradFunctor);
REGISTER_ACTIVATION_OP(atan, Atan, AtanFunctor, AtanGradFunctor);
REGISTER_ACTIVATION_OP(sinh, Sinh, SinhFunctor, SinhGradFunctor);
REGISTER_ACTIVATION_OP(cosh, Cosh, CoshFunctor, CoshGradFunctor);
REGISTER_ACTIVATION_OP(asinh, Asinh, AsinhFunctor, AsinhGradFunctor);
REGISTER_ACTIVATION_OP(acosh, Acosh, AcoshFunctor, AcoshGradFunctor);
REGISTER_ACTIVATION_OP(atanh, Atanh, AtanhFunctor, AtanhGradFunctor);
/* ========================== sigmoid register ============================= /* ========================== sigmoid register =============================
*/ */
// 1. Register Sigmoid Operator // 1. Register Sigmoid Operator
...@@ -1584,16 +1603,6 @@ REGISTER_OPERATOR( ...@@ -1584,16 +1603,6 @@ REGISTER_OPERATOR(
ops::ActivationOpDoubleGrad2<ops::ReluGradFunctor<float>::FwdDeps()>, ops::ActivationOpDoubleGrad2<ops::ReluGradFunctor<float>::FwdDeps()>,
ops::ActivationDoubleGradOpInplaceInferer); ops::ActivationDoubleGradOpInplaceInferer);
REGISTER_ACTIVATION_CPU_KERNEL(relu, Relu, ReluCPUFunctor, ReluGradFunctor);
REGISTER_OP_CPU_KERNEL(
relu_grad_grad,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::ReluGradGradFunctor<float>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::ReluGradGradFunctor<double>>,
ops::ActivationDoubleGradKernel<plat::CPUDeviceContext,
ops::ReluGradGradFunctor<plat::float16>>);
/* ========================================================================== */ /* ========================================================================== */
/* ======================== leaky relu register ============================ */ /* ======================== leaky relu register ============================ */
......
...@@ -29,7 +29,7 @@ USE_OP_ITSELF(elementwise_add); ...@@ -29,7 +29,7 @@ USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
USE_OP(elementwise_mul); USE_OP(elementwise_mul);
USE_OP_DEVICE_KERNEL(elementwise_mul, MKLDNN); USE_OP_DEVICE_KERNEL(elementwise_mul, MKLDNN);
USE_OP(relu); USE_OP_ITSELF(relu);
USE_OP_DEVICE_KERNEL(relu, MKLDNN); USE_OP_DEVICE_KERNEL(relu, MKLDNN);
USE_OP_ITSELF(softmax); USE_OP_ITSELF(softmax);
USE_OP_DEVICE_KERNEL(softmax, MKLDNN); USE_OP_DEVICE_KERNEL(softmax, MKLDNN);
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
USE_OP_ITSELF(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN); USE_OP_DEVICE_KERNEL(elementwise_add, MKLDNN);
USE_OP(relu); USE_OP_ITSELF(relu);
USE_OP_DEVICE_KERNEL(relu, MKLDNN); USE_OP_DEVICE_KERNEL(relu, MKLDNN);
USE_OP_ITSELF(softmax); USE_OP_ITSELF(softmax);
USE_OP_DEVICE_KERNEL(softmax, MKLDNN); USE_OP_DEVICE_KERNEL(softmax, MKLDNN);
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
USE_OP(pool2d); USE_OP(pool2d);
USE_OP_DEVICE_KERNEL(pool2d, MKLDNN); USE_OP_DEVICE_KERNEL(pool2d, MKLDNN);
USE_OP(relu); USE_OP_ITSELF(relu);
USE_OP_DEVICE_KERNEL(relu, MKLDNN); USE_OP_DEVICE_KERNEL(relu, MKLDNN);
USE_OP_ITSELF(transpose); USE_OP_ITSELF(transpose);
USE_OP_DEVICE_KERNEL(transpose, MKLDNN); USE_OP_DEVICE_KERNEL(transpose, MKLDNN);
......
...@@ -22,7 +22,7 @@ limitations under the License. */ ...@@ -22,7 +22,7 @@ limitations under the License. */
namespace fw = paddle::framework; namespace fw = paddle::framework;
namespace plat = paddle::platform; namespace plat = paddle::platform;
USE_OP(relu); USE_OP_ITSELF(relu);
USE_OP_DEVICE_KERNEL(relu, MLU); USE_OP_DEVICE_KERNEL(relu, MLU);
// relu // relu
......
...@@ -20,7 +20,7 @@ limitations under the License. */ ...@@ -20,7 +20,7 @@ limitations under the License. */
#include "paddle/fluid/operators/common_infer_shape_functions.h" #include "paddle/fluid/operators/common_infer_shape_functions.h"
#include "paddle/phi/core/ddim.h" #include "paddle/phi/core/ddim.h"
USE_OP(relu); USE_OP_ITSELF(relu);
USE_OP_ITSELF(elementwise_add); USE_OP_ITSELF(elementwise_add);
USE_OP_ITSELF(softmax); USE_OP_ITSELF(softmax);
......
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/unary.h"
namespace phi {
#define DECLARE_ACTIVATION_GRAD_KERNEL_DepX(name) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
DenseTensor* dx);
#define DECLARE_ACTIVATION_GRAD_KERNEL_DepOut(name) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& out, \
const DenseTensor& dout, \
DenseTensor* dx);
template <typename T, typename Context>
void ReluDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& out,
const DenseTensor& ddx,
DenseTensor* ddout);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Cos);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Tan);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Acos);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Sin);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Asin);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Atan);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Sinh);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Cosh);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Asinh);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Acosh);
DECLARE_ACTIVATION_GRAD_KERNEL_DepX(Atanh);
DECLARE_ACTIVATION_GRAD_KERNEL_DepOut(Relu);
} // namespace phi
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/infermeta/unary.h"
namespace phi {
#define DECLARE_ACTIVATION_KERNEL(name) \
template <typename T, typename Context> \
void name##Kernel( \
const Context& dev_ctx, const DenseTensor& x, DenseTensor* out);
DECLARE_ACTIVATION_KERNEL(Cos)
DECLARE_ACTIVATION_KERNEL(Tan)
DECLARE_ACTIVATION_KERNEL(Acos)
DECLARE_ACTIVATION_KERNEL(Sin)
DECLARE_ACTIVATION_KERNEL(Asin)
DECLARE_ACTIVATION_KERNEL(Atan)
DECLARE_ACTIVATION_KERNEL(Sinh)
DECLARE_ACTIVATION_KERNEL(Cosh)
DECLARE_ACTIVATION_KERNEL(Asinh)
DECLARE_ACTIVATION_KERNEL(Acosh)
DECLARE_ACTIVATION_KERNEL(Atanh)
DECLARE_ACTIVATION_KERNEL(Relu)
} // namespace phi
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/activation_grad_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/activation_grad_impl.h"
namespace phi {
#define DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
DenseTensor* dx) { \
functor_class functor; \
ActivationGradImpl<T, Context, functor_class>( \
dev_ctx, &x, nullptr, &dout, dx, functor); \
}
#define DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepOut(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& out, \
const DenseTensor& dout, \
DenseTensor* dx) { \
functor_class functor; \
ActivationGradImpl<T, Context, functor_class>( \
dev_ctx, nullptr, &out, &dout, dx, functor); \
}
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(Cos, funcs::CosGradFunctor<T>);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(Tan, funcs::TanGradFunctor<T>);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(Acos, funcs::AcosGradFunctor<T>);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(Sin, funcs::SinGradFunctor<T>);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(Asin, funcs::AsinGradFunctor<T>);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(Atan, funcs::AtanGradFunctor<T>);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(Sinh, funcs::SinhGradFunctor<T>);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(Cosh, funcs::CoshGradFunctor<T>);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(Asinh, funcs::AsinhGradFunctor<T>);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(Acosh, funcs::AcoshGradFunctor<T>);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepX(Atanh, funcs::AtanhGradFunctor<T>);
DEFINE_CPU_ACTIVATION_GRAD_KERNEL_DepOut(Relu, funcs::ReluGradFunctor<T>);
} // namespace phi
PD_REGISTER_KERNEL(
cos_grad, CPU, ALL_LAYOUT, phi::CosGradKernel, float, double) {}
PD_REGISTER_KERNEL(
tan_grad, CPU, ALL_LAYOUT, phi::TanGradKernel, float, double) {}
PD_REGISTER_KERNEL(
acos_grad, CPU, ALL_LAYOUT, phi::AcosGradKernel, float, double) {}
PD_REGISTER_KERNEL(
sin_grad, CPU, ALL_LAYOUT, phi::SinGradKernel, float, double) {}
PD_REGISTER_KERNEL(
asin_grad, CPU, ALL_LAYOUT, phi::AsinGradKernel, float, double) {}
PD_REGISTER_KERNEL(
atan_grad, CPU, ALL_LAYOUT, phi::AtanGradKernel, float, double) {}
PD_REGISTER_KERNEL(
sinh_grad, CPU, ALL_LAYOUT, phi::SinhGradKernel, float, double) {}
PD_REGISTER_KERNEL(
cosh_grad, CPU, ALL_LAYOUT, phi::CoshGradKernel, float, double) {}
PD_REGISTER_KERNEL(
asinh_grad, CPU, ALL_LAYOUT, phi::AsinhGradKernel, float, double) {}
PD_REGISTER_KERNEL(
acosh_grad, CPU, ALL_LAYOUT, phi::AcoshGradKernel, float, double) {}
PD_REGISTER_KERNEL(
atanh_grad, CPU, ALL_LAYOUT, phi::AtanhGradKernel, float, double) {}
PD_REGISTER_KERNEL(
relu_grad, CPU, ALL_LAYOUT, phi::ReluGradKernel, float, double) {}
PD_REGISTER_KERNEL(relu_double_grad,
CPU,
ALL_LAYOUT,
phi::ReluDoubleGradKernel,
float,
double,
phi::dtype::float16) {}
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/activation_kernel.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/activation_impl.h"
namespace phi {
#define DEFINE_CPU_ACTIVATION_KERNEL(name, functor_class) \
template <typename T, typename Context> \
void name##Kernel( \
const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { \
functor_class functor; \
ActivationImpl<T, Context, functor_class>(dev_ctx, x, out, functor); \
}
DEFINE_CPU_ACTIVATION_KERNEL(Sin, funcs::SinFunctor<T>)
DEFINE_CPU_ACTIVATION_KERNEL(Cos, funcs::CosFunctor<T>)
DEFINE_CPU_ACTIVATION_KERNEL(Tan, funcs::TanFunctor<T>)
DEFINE_CPU_ACTIVATION_KERNEL(Asin, funcs::AsinFunctor<T>)
DEFINE_CPU_ACTIVATION_KERNEL(Atan, funcs::AtanFunctor<T>)
DEFINE_CPU_ACTIVATION_KERNEL(Acos, funcs::AcosFunctor<T>)
DEFINE_CPU_ACTIVATION_KERNEL(Sinh, funcs::SinhFunctor<T>)
DEFINE_CPU_ACTIVATION_KERNEL(Cosh, funcs::CoshFunctor<T>)
DEFINE_CPU_ACTIVATION_KERNEL(Asinh, funcs::AsinhFunctor<T>)
DEFINE_CPU_ACTIVATION_KERNEL(Acosh, funcs::AcoshFunctor<T>)
DEFINE_CPU_ACTIVATION_KERNEL(Atanh, funcs::AtanhFunctor<T>)
DEFINE_CPU_ACTIVATION_KERNEL(Relu, funcs::ReluCPUFunctor<T>)
} // namespace phi
PD_REGISTER_KERNEL(sin, CPU, ALL_LAYOUT, phi::SinKernel, float, double) {}
PD_REGISTER_KERNEL(cos, CPU, ALL_LAYOUT, phi::CosKernel, float, double) {}
PD_REGISTER_KERNEL(tan, CPU, ALL_LAYOUT, phi::TanKernel, float, double) {}
PD_REGISTER_KERNEL(acos, CPU, ALL_LAYOUT, phi::AcosKernel, float, double) {}
PD_REGISTER_KERNEL(asin, CPU, ALL_LAYOUT, phi::AsinKernel, float, double) {}
PD_REGISTER_KERNEL(atan, CPU, ALL_LAYOUT, phi::AtanKernel, float, double) {}
PD_REGISTER_KERNEL(sinh, CPU, ALL_LAYOUT, phi::SinhKernel, float, double) {}
PD_REGISTER_KERNEL(cosh, CPU, ALL_LAYOUT, phi::CoshKernel, float, double) {}
PD_REGISTER_KERNEL(asinh, CPU, ALL_LAYOUT, phi::AsinhKernel, float, double) {}
PD_REGISTER_KERNEL(acosh, CPU, ALL_LAYOUT, phi::AcoshKernel, float, double) {}
PD_REGISTER_KERNEL(atanh, CPU, ALL_LAYOUT, phi::AtanhKernel, float, double) {}
PD_REGISTER_KERNEL(relu, CPU, ALL_LAYOUT, phi::ReluKernel, float, double) {}
此差异已折叠。
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/activation_grad_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h"
#include "paddle/phi/kernels/impl/activation_grad_impl.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
namespace phi {
template <typename T, typename Context, typename Functor>
void ActivationGradGPUImpl(const Context& dev_ctx,
const DenseTensor* x,
const DenseTensor* out,
const DenseTensor* d_out,
DenseTensor* d_x,
const Functor& functor) {
if (static_cast<int>(Functor::FwdDeps()) &
static_cast<int>(funcs::ActBwdOpFwdDeps::kDepOut)) {
PADDLE_ENFORCE_NOT_NULL(
out, errors::NotFound("The input DenseTensor Out can not be nullptr"));
}
PADDLE_ENFORCE_NOT_NULL(
d_out, errors::NotFound("The input DenseTensor dOut can not be nullptr"));
PADDLE_ENFORCE_NOT_NULL(
d_x, errors::NotFound("The output DenseTensor dX can not be nullptr"));
if (!out) {
out = d_out; // fake out
}
if (static_cast<int>(Functor::FwdDeps()) &
static_cast<int>(funcs::ActBwdOpFwdDeps::kDepX)) {
PADDLE_ENFORCE_NOT_NULL(
x, errors::NotFound("The input DenseTensor X can not be nullptr"));
} else {
VLOG(10) << "Inplace activation of Op Functor: " << typeid(Functor).name();
x = d_x;
}
dev_ctx.template Alloc<T>(d_x);
std::vector<const DenseTensor*> ins = {d_out};
std::vector<DenseTensor*> outs = {d_x};
if (static_cast<int>(Functor::FwdDeps()) ==
static_cast<int>(funcs::ActBwdOpFwdDeps::kDepOut)) {
// Only need forward output Out
ins.push_back(out);
funcs::ElementwiseKernel<T>(dev_ctx, ins, &outs, functor);
} else if (static_cast<int>(Functor::FwdDeps()) ==
static_cast<int>(funcs::ActBwdOpFwdDeps::kDepX)) {
// Only need forward input X
ins.push_back(x);
funcs::ElementwiseKernel<T>(dev_ctx, ins, &outs, functor);
} else {
funcs::ElementwiseKernel<T>(dev_ctx, ins, &outs, functor);
}
}
#define DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& x, \
const DenseTensor& dout, \
DenseTensor* dx) { \
functor_class functor; \
ActivationGradGPUImpl<T, Context, functor_class>( \
dev_ctx, &x, nullptr, &dout, dx, functor); \
}
#define DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepOut(name, functor_class) \
template <typename T, typename Context> \
void name##GradKernel(const Context& dev_ctx, \
const DenseTensor& out, \
const DenseTensor& dout, \
DenseTensor* dx) { \
functor_class functor; \
ActivationGradGPUImpl<T, Context, functor_class>( \
dev_ctx, nullptr, &out, &dout, dx, functor); \
}
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepOut(Relu, funcs::CudaReluGradFunctor<T>);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(Cos, funcs::CudaCosGradFunctor<T>);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(Tan, funcs::CudaTanGradFunctor<T>);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(Acos, funcs::CudaAcosGradFunctor<T>);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(Sin, funcs::CudaSinGradFunctor<T>);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(Asin, funcs::CudaAsinGradFunctor<T>);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(Atan, funcs::CudaAtanGradFunctor<T>);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(Sinh, funcs::CudaSinhGradFunctor<T>);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(Cosh, funcs::CudaCoshGradFunctor<T>);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(Asinh, funcs::CudaAsinhGradFunctor<T>);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(Acosh, funcs::CudaAcoshGradFunctor<T>);
DEFINE_GPU_ACTIVATION_GRAD_KERNEL_DepX(Atanh, funcs::CudaAtanhGradFunctor<T>);
} // namespace phi
PD_REGISTER_KERNEL(cos_grad,
GPU,
ALL_LAYOUT,
phi::CosGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(tan_grad,
GPU,
ALL_LAYOUT,
phi::TanGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(acos_grad,
GPU,
ALL_LAYOUT,
phi::AcosGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(sin_grad,
GPU,
ALL_LAYOUT,
phi::SinGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(asin_grad,
GPU,
ALL_LAYOUT,
phi::AsinGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(atan_grad,
GPU,
ALL_LAYOUT,
phi::AtanGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(sinh_grad,
GPU,
ALL_LAYOUT,
phi::SinhGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(cosh_grad,
GPU,
ALL_LAYOUT,
phi::CoshGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(asinh_grad,
GPU,
ALL_LAYOUT,
phi::AsinhGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(acosh_grad,
GPU,
ALL_LAYOUT,
phi::AcoshGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(atanh_grad,
GPU,
ALL_LAYOUT,
phi::AtanhGradKernel,
float,
double,
phi::dtype::float16) {}
#ifdef PADDLE_WITH_HIP
PD_REGISTER_KERNEL(relu_grad,
GPU,
ALL_LAYOUT,
phi::ReluGradKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(relu_double_grad,
GPU,
ALL_LAYOUT,
phi::ReluDoubleGradKernel,
float,
double,
phi::dtype::float16) {}
#else
PD_REGISTER_KERNEL(relu_grad,
GPU,
ALL_LAYOUT,
phi::ReluGradKernel,
float,
double,
phi::dtype::float16,
phi::dtype::bfloat16) {}
PD_REGISTER_KERNEL(relu_double_grad,
GPU,
ALL_LAYOUT,
phi::ReluDoubleGradKernel,
float,
double,
phi::dtype::float16,
phi::dtype::bfloat16) {}
#endif
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/kernels/activation_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/bfloat16.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/funcs/elementwise_base.h"
#include "paddle/phi/kernels/impl/activation_grad_impl.h"
#include "paddle/fluid/platform/device/gpu/gpu_device_function.h"
namespace phi {
template <typename T, typename Context, typename Functor>
void ActivationGPUImpl(const Context& dev_ctx,
const DenseTensor& x,
DenseTensor* out,
const Functor& functor) {
PADDLE_ENFORCE_NOT_NULL(out,
errors::NotFound("Output Out should not be nullptr"));
dev_ctx.template Alloc<T>(out);
std::vector<const DenseTensor*> ins = {&x};
std::vector<DenseTensor*> outs = {out};
funcs::ElementwiseKernel<T>(dev_ctx, ins, &outs, functor);
}
#define DEFINE_GPU_ACTIVATION_KERNEL(name, functor_class) \
template <typename T, typename Context> \
void name##Kernel( \
const Context& dev_ctx, const DenseTensor& x, DenseTensor* out) { \
functor_class functor; \
ActivationGPUImpl<T, Context, functor_class>(dev_ctx, x, out, functor); \
}
DEFINE_GPU_ACTIVATION_KERNEL(Cos, funcs::CudaCosFunctor<T>)
DEFINE_GPU_ACTIVATION_KERNEL(Tan, funcs::CudaTanFunctor<T>)
DEFINE_GPU_ACTIVATION_KERNEL(Acos, funcs::CudaAcosFunctor<T>)
DEFINE_GPU_ACTIVATION_KERNEL(Sin, funcs::CudaSinFunctor<T>)
DEFINE_GPU_ACTIVATION_KERNEL(Asin, funcs::CudaAsinFunctor<T>)
DEFINE_GPU_ACTIVATION_KERNEL(Atan, funcs::CudaAtanFunctor<T>)
DEFINE_GPU_ACTIVATION_KERNEL(Sinh, funcs::CudaSinhFunctor<T>)
DEFINE_GPU_ACTIVATION_KERNEL(Cosh, funcs::CudaCoshFunctor<T>)
DEFINE_GPU_ACTIVATION_KERNEL(Asinh, funcs::CudaAsinhFunctor<T>)
DEFINE_GPU_ACTIVATION_KERNEL(Acosh, funcs::CudaAcoshFunctor<T>)
DEFINE_GPU_ACTIVATION_KERNEL(Atanh, funcs::CudaAtanhFunctor<T>)
DEFINE_GPU_ACTIVATION_KERNEL(Relu, funcs::CudaReluFunctor<T>)
} // namespace phi
#ifdef PADDLE_WITH_HIP
PD_REGISTER_KERNEL(relu,
GPU,
ALL_LAYOUT,
phi::ReluKernel,
float,
double,
phi::dtype::float16) {}
#else
PD_REGISTER_KERNEL(relu,
GPU,
ALL_LAYOUT,
phi::ReluKernel,
float,
double,
phi::dtype::float16,
phi::dtype::bfloat16) {}
#endif
PD_REGISTER_KERNEL(
sin, GPU, ALL_LAYOUT, phi::SinKernel, float, double, phi::dtype::float16) {}
PD_REGISTER_KERNEL(
cos, GPU, ALL_LAYOUT, phi::CosKernel, float, double, phi::dtype::float16) {}
PD_REGISTER_KERNEL(
tan, GPU, ALL_LAYOUT, phi::TanKernel, float, double, phi::dtype::float16) {}
PD_REGISTER_KERNEL(acos,
GPU,
ALL_LAYOUT,
phi::AcosKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(asin,
GPU,
ALL_LAYOUT,
phi::AsinKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(atan,
GPU,
ALL_LAYOUT,
phi::AtanKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(sinh,
GPU,
ALL_LAYOUT,
phi::SinhKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(cosh,
GPU,
ALL_LAYOUT,
phi::CoshKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(asinh,
GPU,
ALL_LAYOUT,
phi::AsinhKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(acosh,
GPU,
ALL_LAYOUT,
phi::AcoshKernel,
float,
double,
phi::dtype::float16) {}
PD_REGISTER_KERNEL(atanh,
GPU,
ALL_LAYOUT,
phi::AtanhKernel,
float,
double,
phi::dtype::float16) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/activation_functor.h"
#include "paddle/fluid/platform/device_context.h"
namespace phi {
template <typename T, typename Context, typename Functor>
void ActivationGradImpl(const Context& dev_ctx,
const DenseTensor* X,
const DenseTensor* Out,
const DenseTensor* dOut,
DenseTensor* dX,
const Functor& functor) {
if (static_cast<int>(Functor::FwdDeps()) &
static_cast<int>(funcs::ActBwdOpFwdDeps::kDepOut)) {
PADDLE_ENFORCE_NOT_NULL(
Out, errors::NotFound("The input DenseTensor Out can not be nullptr"));
}
PADDLE_ENFORCE_NOT_NULL(
dOut, errors::NotFound("The input DenseTensor dOut can not be nullptr"));
PADDLE_ENFORCE_NOT_NULL(
dX, errors::NotFound("The output DenseTensor dX can not be nullptr"));
if (!Out) {
Out = dOut; // fake out
}
if (static_cast<int>(Functor::FwdDeps()) &
static_cast<int>(funcs::ActBwdOpFwdDeps::kDepX)) {
PADDLE_ENFORCE_NOT_NULL(
X, errors::NotFound("The input DenseTensor X can not be nullptr"));
} else {
VLOG(10) << "Inplace activation of Op Functor: " << typeid(Functor).name();
X = dX;
}
dev_ctx.template Alloc<T>(dX);
auto dout = phi::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dOut, "Input", "Out@GRAD", "ActivationGrad"));
auto out = phi::EigenVector<T>::Flatten(
GET_DATA_SAFELY(Out, "Input", "Out", "ActivationGrad"));
auto dx = phi::EigenVector<T>::Flatten(
GET_DATA_SAFELY(dX, "Input", "X@GRAD", "ActivationGrad"));
auto x = phi::EigenVector<T>::Flatten(
GET_DATA_SAFELY(X, "Input", "X", "ActivationGrad"));
auto* place = dev_ctx.eigen_device();
// use 32bit index to speed up computation
bool use_32bit_index = out.size() < Eigen::NumTraits<int>::highest();
bool is_gpu_place = paddle::platform::is_gpu_place(dev_ctx.GetPlace());
if (use_32bit_index && is_gpu_place) {
functor(*place,
To32BitIndex(x),
To32BitIndex(out),
To32BitIndex(dout),
To32BitIndex(dx));
} else {
functor(*place, x, out, dout, dx);
}
}
template <typename T, typename Context, typename Functor>
void ActivationDoubleGradImpl(const Context& dev_ctx,
const DenseTensor* X,
const DenseTensor* Out,
const DenseTensor* ddX,
DenseTensor* dX,
DenseTensor* dOut,
DenseTensor* ddOut,
const Functor& functor) {
if (static_cast<int>(Functor::FwdDeps()) &
static_cast<int>(funcs::ActBwdOpFwdDeps::kDepX)) {
PADDLE_ENFORCE_NOT_NULL(
X, errors::NotFound("The input DenseTensor X can not be nullptr"));
} else {
VLOG(10) << "Inplace activation of Op Functor: " << typeid(Functor).name();
X = ddX;
}
if (static_cast<int>(Functor::FwdDeps()) &
static_cast<int>(funcs::ActBwdOpFwdDeps::kDepOut)) {
PADDLE_ENFORCE_NOT_NULL(
Out, errors::NotFound("The input DenseTensor Out can not be nullptr"));
} else {
VLOG(10) << "Inplace activation of Op Functor: " << typeid(Functor).name();
Out = ddX;
}
if (ddOut) {
dev_ctx.template Alloc<T>(ddOut);
}
if (dOut) {
dev_ctx.template Alloc<T>(dOut);
}
if (dX) {
dX->Resize(Out->dims());
dev_ctx.template Alloc<T>(dX);
}
functor(dev_ctx, X, Out, ddX, ddOut, dOut, dX);
}
template <typename T, typename Context>
void ReluDoubleGradKernel(const Context& dev_ctx,
const DenseTensor& out,
const DenseTensor& ddx,
DenseTensor* ddout) {
funcs::ReluGradGradFunctor<T> relu_double_grad_functor;
ActivationDoubleGradImpl<T, Context, funcs::ReluGradGradFunctor<T>>(
dev_ctx,
nullptr,
&out,
&ddx,
nullptr,
nullptr,
ddout,
relu_double_grad_functor);
}
} // namespace phi
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/kernels/funcs/activation_functor.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/fluid/platform/device_context.h"
namespace phi {
#define ToString(x) #x
template <typename T, typename Context, typename Functor>
void ActivationImpl(const Context& dev_ctx,
const DenseTensor& X,
DenseTensor* Out,
const Functor& functor) {
PADDLE_ENFORCE_NOT_NULL(Out,
errors::NotFound("Output Out should not be nullptr"));
dev_ctx.template Alloc<T>(Out);
auto x = phi::EigenVector<T>::Flatten(
GET_DATA_SAFELY(&X, "Input", "X", "Activation"));
auto out = phi::EigenVector<T>::Flatten(
GET_DATA_SAFELY(Out, "Output", "Out", "Activation"));
auto* place = dev_ctx.eigen_device();
// use 32bit index to speed up computation
bool use_32bit_index = out.size() < Eigen::NumTraits<int>::highest();
bool is_gpu_place = paddle::platform::is_gpu_place(dev_ctx.GetPlace());
if (use_32bit_index && is_gpu_place) {
functor(*place, To32BitIndex(x), To32BitIndex(out));
} else {
functor(*place, x, out);
}
}
} // namespace phi
/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/phi/core/compat/op_utils.h"
namespace phi {
#define DefineActGradDepXOpArgMap(func_name, op_name) \
KernelSignature func_name##GradOpArgumentMapping( \
const ArgumentMappingContext& ctx) { \
return KernelSignature( \
op_name "_grad", {"X", GradVarName("Out")}, {}, {GradVarName("X")}); \
}
#define DefineActGradDepOutOpArgMap(func_name, op_name) \
KernelSignature func_name##GradOpArgumentMapping( \
const ArgumentMappingContext& ctx) { \
return KernelSignature( \
op_name "_grad", {"Out", GradVarName("Out")}, {}, {GradVarName("X")}); \
}
KernelSignature ReluDoubleGradOpArgumentMapping(
const ArgumentMappingContext& ctx) {
return KernelSignature("relu_double_grad", {"Out", "DDX"}, {}, {"DDOut"});
}
DefineActGradDepXOpArgMap(Cos, "cos");
DefineActGradDepXOpArgMap(Tan, "tan");
DefineActGradDepXOpArgMap(Acos, "acos");
DefineActGradDepXOpArgMap(Sin, "sin");
DefineActGradDepXOpArgMap(Asin, "asin");
DefineActGradDepXOpArgMap(Atan, "atan");
DefineActGradDepXOpArgMap(Sinh, "sinh");
DefineActGradDepXOpArgMap(Cosh, "cosh");
DefineActGradDepXOpArgMap(Asinh, "asinh");
DefineActGradDepXOpArgMap(Acosh, "acosh");
DefineActGradDepXOpArgMap(Atanh, "atanh");
DefineActGradDepOutOpArgMap(Relu, "relu");
} // namespace phi
PD_REGISTER_BASE_KERNEL_NAME(relu_grad_grad, relu_double_grad);
PD_REGISTER_ARG_MAPPING_FN(cos_grad, phi::CosGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(tan_grad, phi::TanGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(acos_grad, phi::AcosGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sin_grad, phi::SinGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(asin_grad, phi::AsinGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(atan_grad, phi::AtanGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(sinh_grad, phi::SinhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(cosh_grad, phi::CoshGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(asinh_grad, phi::AsinhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(acosh_grad, phi::AcoshGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(atanh_grad, phi::AtanhGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(relu_grad, phi::ReluGradOpArgumentMapping);
PD_REGISTER_ARG_MAPPING_FN(relu_grad_grad,
phi::ReluDoubleGradOpArgumentMapping);
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册