[KERNEL][OPENCL] Add hard sigmoid opencl kernel. test=develop (#4177)

* [OPENCL] Add hard sigmoid opencl kernel. test=develop * optimize hard sigmoid. test=develop

[KERNEL][OPENCL] Add hard sigmoid opencl kernel. test=develop (#4177)
* [OPENCL] Add hard sigmoid opencl kernel. test=develop * optimize hard sigmoid. test=develop
123538d7 · ysh329 · GitHub · 9bf15fef · 123538d7 · 123538d7
6 changed file
--- a/lite/api/paddle_place.cc
+++ b/lite/api/paddle_place.cc
@@ -55,8 +55,12 @@ const std::string& ActivationTypeToStr(ActivationType act) {
                                           "Tanh",
                                           "Swish",
                                           "Exp",
+                                           "Abs",
+                                           "HardSwish",
+                                           "Reciprocal",
                                           "ThresholdedRelu",
-                                           "Elu"};
+                                           "Elu",
+                                           "HardSigmoid"};
  auto x = static_cast<int>(act);
  CHECK_LT(x, static_cast<int>(ActivationType::NUM));
  return act2string[x];

--- a/lite/api/paddle_place.h
+++ b/lite/api/paddle_place.h
@@ -109,7 +109,8 @@ enum class ActivationType : int {
  kReciprocal = 11,
  kThresholdedRelu = 12,
  kElu = 13,
-  NUM = 14,
+  kHardSigmoid = 14,
+  NUM = 15,
 };

 static size_t PrecisionTypeLength(PrecisionType type) {

--- a/lite/backends/opencl/cl_kernel/image/activation_kernel.cl
+++ b/lite/backends/opencl/cl_kernel/image/activation_kernel.cl
@@ -66,6 +66,22 @@ __kernel void sigmoid(__read_only image2d_t input,
  WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), out);
 }

+__kernel void hard_sigmoid(__read_only image2d_t input,
+                           __write_only image2d_t output,
+                           __private const float value_offset,
+                           __private const float scale) {
+  const int x = get_global_id(0);  // image_width
+  const int y = get_global_id(1);  // image_height
+
+  const sampler_t sampler =
+      CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
+
+  CL_DTYPE4 in = READ_IMG_TYPE(CL_DTYPE_CHAR, input, sampler, (int2)(x, y));
+  CL_DTYPE4 out = clamp(in * scale + value_offset, 0.0, 1.0);
+
+  WRITE_IMG_TYPE(CL_DTYPE_CHAR, output, (int2)(x, y), out);
+}
+
 __kernel void leaky_relu(__read_only image2d_t input,
                         __write_only image2d_t output,
                         __private const float threshold,

--- a/lite/kernels/opencl/activation_image_compute.cc
+++ b/lite/kernels/opencl/activation_image_compute.cc
@@ -72,7 +72,12 @@ class ActivationComputeImageDefault
      case 8:
        kernel_func_name_ = "exp_act";
        break;
-      default:
+      case 14:
+        kernel_func_name_ = "hard_sigmoid";
+        scale_ = act_param_->hard_sigmoid_slope;
+        threshold_ = act_param_->hard_sigmoid_offset;
+        break;
+      defauln:
        LOG(FATAL) << "This act type:" << act_type << " doesn't support.";
        return;
    }
@@ -316,3 +321,21 @@ REGISTER_LITE_KERNEL(
                                       PRECISION(kFP16),
                                       DATALAYOUT(kImageDefault))})
    .Finalize();
+
+// Hard Sigmoid
+REGISTER_LITE_KERNEL(
+    hard_sigmoid,
+    kOpenCL,
+    kFP16,
+    kImageDefault,
+    paddle::lite::kernels::opencl::ActivationComputeImageDefault,
+    ImageDefault)
+    .BindInput("X",
+               {LiteType::GetTensorTy(TARGET(kOpenCL),
+                                      PRECISION(kFP16),
+                                      DATALAYOUT(kImageDefault))})
+    .BindOutput("Out",
+                {LiteType::GetTensorTy(TARGET(kOpenCL),
+                                       PRECISION(kFP16),
+                                       DATALAYOUT(kImageDefault))})
+    .Finalize();
--- a/lite/kernels/opencl/activation_image_compute_test.cc
+++ b/lite/kernels/opencl/activation_image_compute_test.cc
@@ -57,6 +57,15 @@ void act_compute_ref(const dtype *x_data,
      case 8:  // exp
        out_data[i] = expf(x_data[i]);
        break;
+      case 14:  // hard sigmoid
+                // scale ==> slope
+        {
+          float tmp = x_data[i] * scale + threshold;
+          tmp = tmp < 1.0f ? tmp : 1.0f;
+          tmp = tmp > 0.0f ? tmp : 0.0f;
+          out_data[i] = tmp;
+          break;
+        }
      default:
        break;
    }
@@ -75,7 +84,7 @@ TEST(act_image2d_fp16, compute) {
    for (auto c : {1, 3, 8, 23, 32}) {
      for (int h = 12; h <= 100; h += 13) {
        for (int w = 12; w <= 100; w += 25) {
-          for (auto act_type : {1, 2, 4, 5, 6, 7, 8}) {
+          for (auto act_type : {1, 2, 4, 5, 6, 7, 8, 14}) {
            for (auto scale : {0.5, 0.8}) {
              for (auto threshold : {6.0}) {
 #else
@@ -83,9 +92,9 @@ TEST(act_image2d_fp16, compute) {
  const int c = 2;
  const int h = 3;
  const int w = 4;
-  const int act_type = 4;
-  const float scale = 0.5f;
-  const float threshold = 6.f;
+  const int act_type = 14;
+  const float scale = 2.0f;
+  const float threshold = 1.0f;

 #endif  // ACT_FP16_LOOP_TEST

@@ -117,6 +126,9 @@ TEST(act_image2d_fp16, compute) {
                  case 8:  // tanh
                    func_name = "exp";
                    break;
+                  case 14:  // hard sigmoid
+                    func_name = "hard_sigmoid";
+                    break;
                }
                LOG(INFO) << "func_name: " << func_name;
                // set layout kernels
@@ -166,6 +178,9 @@ TEST(act_image2d_fp16, compute) {
                actParam.Relu_clipped_coef = threshold;
                actParam.Leaky_relu_alpha = scale;
                actParam.Swish_beta = scale;
+                // hard sigmoid
+                actParam.hard_sigmoid_slope = scale;
+                actParam.hard_sigmoid_offset = threshold;

                const DDim x_dim =
                    DDim(std::vector<DDim::value_type>{n, c, h, w});
@@ -191,7 +206,8 @@ TEST(act_image2d_fp16, compute) {
                std::default_random_engine engine;
                std::uniform_real_distribution<float> dist(-1, 1);
                for (int i = 0; i < x_dim.production(); ++i) {
-                  mapped_x[i] = dist(engine);
+                  mapped_x[i] =
+                      (i - x_dim.production() / 2) / 10.;  // dist(engine);
                  mapped_y[i] = 0.0f;
                }
                auto *act_in_data = act_in.mutable_data<half_t, cl::Image2D>(
@@ -316,3 +332,6 @@ USE_LITE_KERNEL(relu6, kOpenCL, kFP16, kImageDefault, ImageDefault);

 // sigmoid image2d fp16
 USE_LITE_KERNEL(sigmoid, kOpenCL, kFP16, kImageDefault, ImageDefault);
+
+// hard_sigmoid image2d fp16
+USE_LITE_KERNEL(hard_sigmoid, kOpenCL, kFP16, kImageDefault, ImageDefault);
--- a/lite/operators/activation_ops.cc
+++ b/lite/operators/activation_ops.cc
@@ -60,6 +60,7 @@ bool ActivationOp::AttachImpl(const cpp::OpDesc& opdesc, lite::Scope* scope) {
    param_.active_type = lite_api::ActivationType::kSwish;
  } else if (opdesc.Type() == "hard_sigmoid") {
    // hard_sigomid
+    param_.active_type = lite_api::ActivationType::kHardSigmoid;
    param_.hard_sigmoid_slope = opdesc.GetAttr<float>("slope");
    param_.hard_sigmoid_offset = opdesc.GetAttr<float>("offset");
  } else if (opdesc.Type() == "sigmoid") {