From 123538d77859614d19120cc1f5bd3551b329e527 Mon Sep 17 00:00:00 2001 From: ysh329 Date: Fri, 21 Aug 2020 08:30:10 +0800 Subject: [PATCH] [KERNEL][OPENCL] Add hard sigmoid opencl kernel. test=develop (#4177) * [OPENCL] Add hard sigmoid opencl kernel. test=develop * optimize hard sigmoid. test=develop --- lite/api/paddle_place.cc | 6 +++- lite/api/paddle_place.h | 3 +- .../cl_kernel/image/activation_kernel.cl | 16 ++++++++++ .../opencl/activation_image_compute.cc | 25 ++++++++++++++- .../opencl/activation_image_compute_test.cc | 31 +++++++++++++++---- lite/operators/activation_ops.cc | 1 + 6 files changed, 73 insertions(+), 9 deletions(-) diff --git a/lite/api/paddle_place.cc b/lite/api/paddle_place.cc index 6cddbc23ed..e70c09e991 100644 --- a/lite/api/paddle_place.cc +++ b/lite/api/paddle_place.cc @@ -55,8 +55,12 @@ const std::string& ActivationTypeToStr(ActivationType act) { "Tanh", "Swish", "Exp", + "Abs", + "HardSwish", + "Reciprocal", "ThresholdedRelu", - "Elu"}; + "Elu", + "HardSigmoid"}; auto x = static_cast(act); CHECK_LT(x, static_cast(ActivationType::NUM)); return act2string[x]; diff --git a/lite/api/paddle_place.h b/lite/api/paddle_place.h index 0b53c82662..9ee4a5d2e7 100644 --- a/lite/api/paddle_place.h +++ b/lite/api/paddle_place.h @@ -109,7 +109,8 @@ enum class ActivationType : int { kReciprocal = 11, kThresholdedRelu = 12, kElu = 13, - NUM = 14, + kHardSigmoid = 14, + NUM = 15, }; static size_t PrecisionTypeLength(PrecisionType type) { diff --git a/lite/backends/opencl/cl_kernel/image/activation_kernel.cl b/lite/backends/opencl/cl_kernel/image/activation_kernel.cl index a4070f747a..f7387c2026 100644 --- a/lite/backends/opencl/cl_kernel/image/activation_kernel.cl +++ b/lite/backends/opencl/cl_kernel/image/activation_kernel.cl @@ -66,6 +66,22 @@ __kernel void sigmoid(__read_only image2d_t input, WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), out); } +__kernel void hard_sigmoid(__read_only image2d_t input, + __write_only image2d_t output, + __private const float value_offset, + __private const float scale) { + const int x = get_global_id(0); // image_width + const int y = get_global_id(1); // image_height + + const sampler_t sampler = + CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + + CL_DTYPE4 in = READ_IMG_TYPE(CL_DTYPE_CHAR, input, sampler, (int2)(x, y)); + CL_DTYPE4 out = clamp(in * scale + value_offset, 0.0, 1.0); + + WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), out); +} + __kernel void leaky_relu(__read_only image2d_t input, __write_only image2d_t output, __private const float threshold, diff --git a/lite/kernels/opencl/activation_image_compute.cc b/lite/kernels/opencl/activation_image_compute.cc index 52a0e43a1e..92ace84f85 100644 --- a/lite/kernels/opencl/activation_image_compute.cc +++ b/lite/kernels/opencl/activation_image_compute.cc @@ -72,7 +72,12 @@ class ActivationComputeImageDefault case 8: kernel_func_name_ = "exp_act"; break; - default: + case 14: + kernel_func_name_ = "hard_sigmoid"; + scale_ = act_param_->hard_sigmoid_slope; + threshold_ = act_param_->hard_sigmoid_offset; + break; + defauln: LOG(FATAL) << "This act type:" << act_type << " doesn't support."; return; } @@ -316,3 +321,21 @@ REGISTER_LITE_KERNEL( PRECISION(kFP16), DATALAYOUT(kImageDefault))}) .Finalize(); + +// Hard Sigmoid +REGISTER_LITE_KERNEL( + hard_sigmoid, + kOpenCL, + kFP16, + kImageDefault, + paddle::lite::kernels::opencl::ActivationComputeImageDefault, + ImageDefault) + .BindInput("X", + {LiteType::GetTensorTy(TARGET(kOpenCL), + PRECISION(kFP16), + DATALAYOUT(kImageDefault))}) + .BindOutput("Out", + {LiteType::GetTensorTy(TARGET(kOpenCL), + PRECISION(kFP16), + DATALAYOUT(kImageDefault))}) + .Finalize(); diff --git a/lite/kernels/opencl/activation_image_compute_test.cc b/lite/kernels/opencl/activation_image_compute_test.cc index ad0d83a8e3..ea174b6801 100644 --- a/lite/kernels/opencl/activation_image_compute_test.cc +++ b/lite/kernels/opencl/activation_image_compute_test.cc @@ -57,13 +57,22 @@ void act_compute_ref(const dtype *x_data, case 8: // exp out_data[i] = expf(x_data[i]); break; + case 14: // hard sigmoid + // scale ==> slope + { + float tmp = x_data[i] * scale + threshold; + tmp = tmp < 1.0f ? tmp : 1.0f; + tmp = tmp > 0.0f ? tmp : 0.0f; + out_data[i] = tmp; + break; + } default: break; } } } -// #define ACT_FP16_LOOP_TEST +// #define ACT_FP16_LOOP_TEST // #define ACT_FP16_PRINT_RESULT TEST(act_image2d_fp16, compute) { LOG(INFO) << "main steps of test: host -> layout(buf2img) -> relu(img) -> " @@ -75,7 +84,7 @@ TEST(act_image2d_fp16, compute) { for (auto c : {1, 3, 8, 23, 32}) { for (int h = 12; h <= 100; h += 13) { for (int w = 12; w <= 100; w += 25) { - for (auto act_type : {1, 2, 4, 5, 6, 7, 8}) { + for (auto act_type : {1, 2, 4, 5, 6, 7, 8, 14}) { for (auto scale : {0.5, 0.8}) { for (auto threshold : {6.0}) { #else @@ -83,9 +92,9 @@ TEST(act_image2d_fp16, compute) { const int c = 2; const int h = 3; const int w = 4; - const int act_type = 4; - const float scale = 0.5f; - const float threshold = 6.f; + const int act_type = 14; + const float scale = 2.0f; + const float threshold = 1.0f; #endif // ACT_FP16_LOOP_TEST @@ -117,6 +126,9 @@ TEST(act_image2d_fp16, compute) { case 8: // tanh func_name = "exp"; break; + case 14: // hard sigmoid + func_name = "hard_sigmoid"; + break; } LOG(INFO) << "func_name: " << func_name; // set layout kernels @@ -166,6 +178,9 @@ TEST(act_image2d_fp16, compute) { actParam.Relu_clipped_coef = threshold; actParam.Leaky_relu_alpha = scale; actParam.Swish_beta = scale; + // hard sigmoid + actParam.hard_sigmoid_slope = scale; + actParam.hard_sigmoid_offset = threshold; const DDim x_dim = DDim(std::vector{n, c, h, w}); @@ -191,7 +206,8 @@ TEST(act_image2d_fp16, compute) { std::default_random_engine engine; std::uniform_real_distribution dist(-1, 1); for (int i = 0; i < x_dim.production(); ++i) { - mapped_x[i] = dist(engine); + mapped_x[i] = + (i - x_dim.production() / 2) / 10.; // dist(engine); mapped_y[i] = 0.0f; } auto *act_in_data = act_in.mutable_data( @@ -316,3 +332,6 @@ USE_LITE_KERNEL(relu6, kOpenCL, kFP16, kImageDefault, ImageDefault); // sigmoid image2d fp16 USE_LITE_KERNEL(sigmoid, kOpenCL, kFP16, kImageDefault, ImageDefault); + +// hard_sigmoid image2d fp16 +USE_LITE_KERNEL(hard_sigmoid, kOpenCL, kFP16, kImageDefault, ImageDefault); diff --git a/lite/operators/activation_ops.cc b/lite/operators/activation_ops.cc index c519016aa8..9b20f4348b 100644 --- a/lite/operators/activation_ops.cc +++ b/lite/operators/activation_ops.cc @@ -60,6 +60,7 @@ bool ActivationOp::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) { param_.active_type = lite_api::ActivationType::kSwish; } else if (opdesc.Type() == "hard_sigmoid") { // hard_sigomid + param_.active_type = lite_api::ActivationType::kHardSigmoid; param_.hard_sigmoid_slope = opdesc.GetAttr("slope"); param_.hard_sigmoid_offset = opdesc.GetAttr("offset"); } else if (opdesc.Type() == "sigmoid") { -- GitLab