未验证 提交 aaa14780 编写于 作者: H Huang Jiyi 提交者: GitHub

register fluid activation kernel to phi (#51927)

* update

* update

* update

* update

* update

* fix test
上级 2add31f4
......@@ -518,6 +518,8 @@ function(op_library TARGET)
foreach(xpu_kp_src ${xpu_kp_cc_srcs})
set(op_name "")
find_register(${xpu_kp_src} "REGISTER_OP_KERNEL" op_name)
find_phi_register(${xpu_kp_src} ${pybind_file}
"PD_REGISTER_STRUCT_KERNEL")
if(NOT ${op_name} EQUAL "")
file(APPEND ${pybind_file} "USE_OP_DEVICE_KERNEL(${op_name}, KP);\n")
message(STATUS "Building KP Target: ${op_name}")
......
......@@ -166,7 +166,7 @@ TEST(DisMultiTrainerTest, test3) {
tmp1->SetDebug(true);
ProgramDesc p;
tmp1->InitOtherEnv(p);
tmp1->Run();
// tmp1->Run();
tmp1->Finalize();
#endif
}
......
......@@ -24,6 +24,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_version_registry.h"
#include "paddle/fluid/operators/common_infer_shape_functions.h"
#include "paddle/phi/backends/dynload/port.h"
#include "paddle/phi/core/kernel_registry.h"
#include "paddle/phi/infermeta/backward.h"
DECLARE_bool(use_mkldnn);
......@@ -384,6 +385,18 @@ DECLARE_INPLACE_OP_INFERER(ActivationTripleGradOpInplaceInferer,
{"DDX", "D_DOut"});
DECLARE_INPLACE_OP_INFERER(ActFwdInplaceInferer, {"X", "Out"});
#define DEFINE_ACTIVATION_CPU_KERNEL(op_name, functor, grad_functor) \
template <typename T, typename DeviceContext> \
class op_name##Kernel : public ActivationKernel<DeviceContext, functor<T>> { \
}; \
\
template <typename T, typename DeviceContext> \
class op_name##GradKernel \
: public ActivationGradKernel<DeviceContext, grad_functor<T>> {};
DEFINE_ACTIVATION_CPU_KERNEL(SoftRelu, SoftReluFunctor, SoftReluGradFunctor)
} // namespace operators
} // namespace paddle
......@@ -407,19 +420,19 @@ namespace plat = paddle::platform;
ops::ActivationOpGrad, \
ops::ActivationGradOpInplaceInferer);
#define REGISTER_ACTIVATION_CPU_KERNEL( \
act_type, op_name, functor, grad_functor) \
REGISTER_OP_CPU_KERNEL( \
act_type, \
ops::ActivationKernel<phi::CPUContext, ops::functor<float>>, \
ops::ActivationKernel<phi::CPUContext, ops::functor<double>>); \
REGISTER_OP_CPU_KERNEL( \
act_type##_grad, \
ops::ActivationGradKernel<phi::CPUContext, ops::grad_functor<float>>, \
ops::ActivationGradKernel<phi::CPUContext, ops::grad_functor<double>>);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_OP);
FOR_EACH_ACTIVATION_OP(REGISTER_ACTIVATION_CPU_KERNEL);
#define REGISTER_ACTIVATION_CPU_KERNEL(act_type, op_name) \
PD_REGISTER_STRUCT_KERNEL( \
act_type, CPU, ALL_LAYOUT, ops::op_name##Kernel, float, double) {} \
PD_REGISTER_STRUCT_KERNEL(act_type##_grad, \
CPU, \
ALL_LAYOUT, \
ops::op_name##GradKernel, \
float, \
double) {}
REGISTER_ACTIVATION_CPU_KERNEL(soft_relu, SoftRelu)
REGISTER_ACTIVATION_OP(relu6, Relu6, Relu6Functor, Relu6GradFunctor);
REGISTER_ACTIVATION_OP(mish, Mish, MishFunctor, MishGradFunctor);
......
......@@ -192,87 +192,41 @@ template <typename T>
using CudaELUGradNegativeAlphaFunctor =
phi::funcs::CudaELUGradNegativeAlphaFunctor<T>;
#define DEFINE_ACTIVATION_CUDA_KERNEL(op_name, functor, grad_functor) \
template <typename T, typename DeviceContext> \
class op_name##CudaKernel \
: public ActivationCudaKernel<DeviceContext, functor<T>> {}; \
\
template <typename T, typename DeviceContext> \
class op_name##GradCudaKernel \
: public ActivationGradCudaKernel<DeviceContext, grad_functor<T>> {};
DEFINE_ACTIVATION_CUDA_KERNEL(SoftRelu,
CudaSoftReluFunctor,
CudaSoftReluGradFunctor)
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
namespace plat = paddle::platform;
#define REGISTER_ACTIVATION_CUDA_KERNEL( \
act_type, op_name, functor, grad_functor) \
REGISTER_OP_CUDA_KERNEL( \
act_type, \
ops::ActivationCudaKernel<phi::GPUContext, ops::functor<float>>, \
ops::ActivationCudaKernel<phi::GPUContext, ops::functor<double>>, \
ops::ActivationCudaKernel<phi::GPUContext, ops::functor<plat::float16>>, \
ops::ActivationCudaKernel<phi::GPUContext, \
ops::functor<plat::bfloat16>>); \
REGISTER_OP_CUDA_KERNEL( \
act_type##_grad, \
ops::ActivationGradCudaKernel<phi::GPUContext, \
ops::grad_functor<float>>, \
ops::ActivationGradCudaKernel<phi::GPUContext, \
ops::grad_functor<double>>, \
ops::ActivationGradCudaKernel<phi::GPUContext, \
ops::grad_functor<plat::float16>>, \
ops::ActivationGradCudaKernel<phi::GPUContext, \
ops::grad_functor<plat::bfloat16>>);
#define REGISTER_ACTIVATION_CUDA_KERNEL_INT( \
act_type, op_name, functor, grad_functor) \
REGISTER_OP_CUDA_KERNEL( \
act_type, \
ops::ActivationCudaKernel<phi::GPUContext, ops::functor<float>>, \
ops::ActivationCudaKernel<phi::GPUContext, ops::functor<double>>, \
ops::ActivationCudaKernel<phi::GPUContext, ops::functor<int>>, \
ops::ActivationCudaKernel<phi::GPUContext, ops::functor<int64_t>>, \
ops::ActivationCudaKernel<phi::GPUContext, ops::functor<plat::float16>>, \
ops::ActivationCudaKernel<phi::GPUContext, \
ops::functor<plat::bfloat16>>); \
REGISTER_OP_CUDA_KERNEL( \
act_type##_grad, \
ops::ActivationGradCudaKernel<phi::GPUContext, \
ops::grad_functor<float>>, \
ops::ActivationGradCudaKernel<phi::GPUContext, \
ops::grad_functor<double>>, \
ops::ActivationGradCudaKernel<phi::GPUContext, ops::grad_functor<int>>, \
ops::ActivationGradCudaKernel<phi::GPUContext, \
ops::grad_functor<int64_t>>, \
ops::ActivationGradCudaKernel<phi::GPUContext, \
ops::grad_functor<plat::float16>>, \
ops::ActivationGradCudaKernel<phi::GPUContext, \
ops::grad_functor<plat::bfloat16>>);
REGISTER_OP_CUDA_KERNEL(
relu6,
ops::ActivationCudaKernel<phi::GPUContext, ops::CudaRelu6Functor<float>>,
ops::ActivationCudaKernel<phi::GPUContext, ops::CudaRelu6Functor<double>>,
ops::ActivationCudaKernel<phi::GPUContext, ops::CudaRelu6Functor<int>>,
ops::ActivationCudaKernel<phi::GPUContext, ops::CudaRelu6Functor<int64_t>>,
ops::ActivationCudaKernel<phi::GPUContext,
ops::CudaRelu6Functor<plat::float16>>,
ops::ActivationCudaKernel<phi::GPUContext,
ops::CudaRelu6Functor<plat::bfloat16>>);
REGISTER_OP_CUDA_KERNEL(
relu6_grad,
ops::ActivationGradCudaKernel<phi::GPUContext,
ops::CudaRelu6GradFunctor<float>>,
ops::ActivationGradCudaKernel<phi::GPUContext,
ops::CudaRelu6GradFunctor<double>>,
ops::ActivationGradCudaKernel<phi::GPUContext,
ops::CudaRelu6GradFunctor<int>>,
ops::ActivationGradCudaKernel<phi::GPUContext,
ops::CudaRelu6GradFunctor<int64_t>>,
ops::ActivationGradCudaKernel<phi::GPUContext,
ops::CudaRelu6GradFunctor<plat::float16>>,
ops::ActivationGradCudaKernel<phi::GPUContext,
ops::CudaRelu6GradFunctor<plat::bfloat16>>);
#define FOR_EACH_ACTIVATION_CUDA_OP(__macro) \
__macro(soft_relu, SoftRelu, CudaSoftReluFunctor, CudaSoftReluGradFunctor); \
__macro(softsign, Softsign, CudaSoftsignFunctor, CudaSoftsignGradFunctor);
FOR_EACH_ACTIVATION_CUDA_OP(REGISTER_ACTIVATION_CUDA_KERNEL)
PD_REGISTER_STRUCT_KERNEL(soft_relu,
GPU,
ALL_LAYOUT,
ops::SoftReluCudaKernel,
float,
double,
plat::float16,
plat::bfloat16) {}
PD_REGISTER_STRUCT_KERNEL(soft_relu_grad,
GPU,
ALL_LAYOUT,
ops::SoftReluGradCudaKernel,
float,
double,
plat::float16,
plat::bfloat16) {}
#ifdef PADDLE_WITH_XPU_KP
REGISTER_OP_KERNEL(
......
......@@ -2443,6 +2443,9 @@ class TestSoftRelu(TestActivation):
self.attrs = {'threshold': threshold}
self.outputs = {'Out': out}
def test_check_output(self):
self.check_output(check_dygraph=False)
def test_check_grad(self):
if self.dtype == np.float16:
return
......@@ -3856,6 +3859,7 @@ def create_test_act_fp16_class(
parent,
atol=1e-3,
grad_check=True,
check_dygraph=True,
check_prim=False,
enable_cinn=True,
grad_atol=0.80,
......@@ -3875,7 +3879,10 @@ def create_test_act_fp16_class(
support_fp16 = core.is_float16_supported(place)
if support_fp16:
self.check_output_with_place(
place, atol=atol, check_prim=check_prim
place,
atol=atol,
check_dygraph=check_dygraph,
check_prim=check_prim,
)
def test_check_grad(self):
......@@ -3886,6 +3893,7 @@ def create_test_act_fp16_class(
place,
['X'],
'Out',
check_dygraph=check_dygraph,
check_prim=check_prim,
max_relative_error=grad_atol,
)
......@@ -3925,7 +3933,7 @@ create_test_act_fp16_class(TestRelu, check_prim=True)
create_test_act_fp16_class(TestGelu, check_prim=True, enable_cinn=False)
create_test_act_fp16_class(TestBRelu)
create_test_act_fp16_class(TestRelu6)
create_test_act_fp16_class(TestSoftRelu, grad_atol=0.85)
create_test_act_fp16_class(TestSoftRelu, check_dygraph=False, grad_atol=0.85)
create_test_act_fp16_class(TestELU)
create_test_act_fp16_class(TestCELU)
create_test_act_fp16_class(TestReciprocal)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册