未验证 提交 859c4077 编写于 作者: C carryyu 提交者: GitHub

【PFCC算子性能优化】 SeluKernel Optimization (#44490)

* [PFCC] SeluKernel Optimization

* selu kernel optimization

* add private

Co-authored-by: carryyu <>
上级 0fd8ee63
...@@ -2158,6 +2158,50 @@ struct CudaExpFunctor<double> : public BaseActivationFunctor<double> { ...@@ -2158,6 +2158,50 @@ struct CudaExpFunctor<double> : public BaseActivationFunctor<double> {
} }
}; };
template <typename T>
struct CudaSeluFunctor : public BaseActivationFunctor<T> {
typename BaseActivationFunctor<T>::AttrPair GetAttrs() {
return {{"scale", &scale}, {"alpha", &alpha}};
}
__device__ __forceinline__ T operator()(const T x) const {
T res = x;
if (res <= zero) {
res = alpha * expf(res) - alpha;
}
res *= scale;
return res;
}
private:
float scale;
float alpha;
T zero = static_cast<T>(0.0f);
};
template <>
struct CudaSeluFunctor<double> : public BaseActivationFunctor<double> {
typename BaseActivationFunctor<double>::AttrPair GetAttrs() {
return {{"scale", &scale}, {"alpha", &alpha}};
}
__device__ __forceinline__ double operator()(const double x) const {
double res = x;
double alpha_cast = static_cast<double>(alpha);
double scale_cast = static_cast<double>(scale);
if (res <= zero) {
res = alpha_cast * exp(res) - alpha_cast;
}
res *= scale_cast;
return res;
}
private:
float scale;
float alpha;
double zero = static_cast<double>(0.0f);
};
template <typename T> template <typename T>
struct CudaSquareFunctor : public BaseActivationFunctor<T> { struct CudaSquareFunctor : public BaseActivationFunctor<T> {
// square(x) = x * x // square(x) = x * x
......
...@@ -132,6 +132,7 @@ DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid, ...@@ -132,6 +132,7 @@ DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(HardSigmoid,
CudaHardSigmoidFunctor, CudaHardSigmoidFunctor,
slope, slope,
offset) offset)
DEFINE_GPU_ACT_KERNEL_WITH_TWO_ATTRS(Selu, CudaSeluFunctor, scale, alpha)
template <typename T, typename Context> template <typename T, typename Context>
void HardSwishKernel(const Context& dev_ctx, void HardSwishKernel(const Context& dev_ctx,
...@@ -265,3 +266,4 @@ PD_REGISTER_KERNEL(pow, ...@@ -265,3 +266,4 @@ PD_REGISTER_KERNEL(pow,
int, int,
int64_t, int64_t,
phi::dtype::float16) {} phi::dtype::float16) {}
PD_REGISTER_KERNEL(selu, GPU, ALL_LAYOUT, phi::SeluKernel, float, double) {}
// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/kernels/selu_kernel.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/kernels/impl/selu_kernel_impl.h"
PD_REGISTER_KERNEL(selu, GPU, ALL_LAYOUT, phi::SeluKernel, float, double) {}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册