add opencl Scale op (#1645)

* fix memcpy size in opencl fetch kernel * add opencl Scale op

add opencl Scale op (#1645)
* fix memcpy size in opencl fetch kernel * add opencl Scale op
452900fd · Jiaying Zhao · GitHub · 6b32f9ad · 452900fd · 452900fd
9 changed file
--- a/src/common/types.cpp
+++ b/src/common/types.cpp
@@ -46,6 +46,7 @@ const char *G_OP_TYPE_RELU6 = "relu6";
 const char *G_OP_TYPE_LEAKY_RELU = "leaky_relu";
 const char *G_OP_TYPE_RESHAPE = "reshape";
 const char *G_OP_TYPE_RESHAPE2 = "reshape2";
+const char *G_OP_TYPE_SCALE = "scale";
 const char *G_OP_TYPE_SIGMOID = "sigmoid";
 const char *G_OP_TYPE_SOFTMAX = "softmax";
 const char *G_OP_TYPE_TRANSPOSE = "transpose";
@@ -128,6 +129,7 @@ std::unordered_map<
        {G_OP_TYPE_RELU, {{"X"}, {"Out"}}},
        {G_OP_TYPE_RELU6, {{"X"}, {"Out"}}},
        {G_OP_TYPE_LEAKY_RELU, {{"X"}, {"Out"}}},
+        {G_OP_TYPE_SCALE, {{"X"}, {"Out"}}},
        {G_OP_TYPE_SOFTMAX, {{"X"}, {"Out"}}},
        {G_OP_TYPE_SIGMOID, {{"X"}, {"Out"}}},
        {G_OP_TYPE_MUL, {{"X"}, {"Out"}}},

--- a/src/common/types.h
+++ b/src/common/types.h
@@ -144,6 +144,7 @@ extern const char *G_OP_TYPE_RELU;
 extern const char *G_OP_TYPE_RELU6;
 extern const char *G_OP_TYPE_LEAKY_RELU;
 extern const char *G_OP_TYPE_RESHAPE;
+extern const char *G_OP_TYPE_SCALE;
 extern const char *G_OP_TYPE_SIGMOID;
 extern const char *G_OP_TYPE_SOFTMAX;
 extern const char *G_OP_TYPE_TRANSPOSE;

--- a/src/framework/load_ops.h
+++ b/src/framework/load_ops.h
@@ -142,7 +142,7 @@ LOAD_OP1(depthwise_conv2d, CPU);
 LOAD_OP1(conv2d_transpose, CPU);
 #endif
 #ifdef SCALE_OP
-LOAD_OP1(scale, CPU);
+LOAD_OP2(scale, CPU, GPU_CL);
 #endif
 #ifdef ELEMENTWISEADD_OP
 LOAD_OP2(elementwise_add, CPU, GPU_CL);

--- a/src/operators/kernel/arm/scale_kernel.cpp
+++ b/src/operators/kernel/arm/scale_kernel.cpp
@@ -22,6 +22,11 @@ limitations under the License. */
 namespace paddle_mobile {
 namespace operators {

+template <>
+bool ScaleKernel<CPU, float>::Init(ScaleParam<CPU> *param) {
+  return true;
+}
+
 template <>
 void ScaleKernel<CPU, float>::Compute(const ScaleParam<CPU> &param) {
  const auto input = param.InputX();

--- a/src/operators/kernel/cl/cl_kernel/scale_kernel.cl
+++ b/src/operators/kernel/cl/cl_kernel/scale_kernel.cl
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+
+__kernel void scale(__read_only image2d_t input,
+                    __write_only image2d_t output,
+                    __private float scale,
+                    __private float bias,
+                    __private float out_width){
+
+  const int out_c = get_global_id(0);
+  const int out_w = get_global_id(1);
+  const int out_nh = get_global_id(2);
+
+  const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE |
+                            CLK_ADDRESS_CLAMP |
+                            CLK_FILTER_NEAREST;
+
+  int pos_x = mad24(out_c, out_width, out_w);
+  half4 in = read_imageh(input, sampler, (int2)(pos_x, out_nh));
+  in = scale * in + bias;
+  write_imageh(output, (int2)(pos_x, out_nh), in);
+}
--- a/src/operators/kernel/cl/scale_kernel.cpp
+++ b/src/operators/kernel/cl/scale_kernel.cpp
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#ifdef SCALE_OP
+
+#include "operators/kernel/scale_kernel.h"
+
+namespace paddle_mobile {
+namespace operators {
+
+template <>
+bool ScaleKernel<GPU_CL, float>::Init(ScaleParam<GPU_CL>* param) {
+  this->cl_helper_.AddKernel("scale", "scale_kernel.cl");
+  return true;
+}
+
+template <>
+void ScaleKernel<GPU_CL, float>::Compute(const ScaleParam<GPU_CL>& param) {
+  auto kernel = this->cl_helper_.KernelAt(0);
+  const auto* input = param.InputX();
+  auto* output = param.Out();
+  const float scale = param.Scale();
+  const float bias = param.Bias();
+  auto default_work_size = this->cl_helper_.DefaultWorkSize(*output);
+  auto inputImage = input->GetCLImage();
+  auto outputImage = output->GetCLImage();
+  int out_width = output->dims()[3];
+  clSetKernelArg(kernel, 0, sizeof(cl_mem), &inputImage);
+  clSetKernelArg(kernel, 1, sizeof(cl_mem), &outputImage);
+  clSetKernelArg(kernel, 2, sizeof(float), &scale);
+  clSetKernelArg(kernel, 3, sizeof(float), &bias);
+  clSetKernelArg(kernel, 4, sizeof(int), &out_width);
+  clEnqueueNDRangeKernel(this->cl_helper_.CLCommandQueue(), kernel,
+                         default_work_size.size(), NULL,
+                         default_work_size.data(), NULL, 0, NULL, NULL);
+}
+
+template class ScaleKernel<GPU_CL, float>;
+
+}  // namespace operators
+}  // namespace paddle_mobile
+
+#endif
--- a/src/operators/kernel/scale_kernel.h
+++ b/src/operators/kernel/scale_kernel.h
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

+#ifdef SCALE_OP
+
 #include "framework/operator.h"
 #include "operators/op_param.h"

@@ -25,6 +27,9 @@ class ScaleKernel
    : public framework::OpKernelBase<DeviceType, ScaleParam<DeviceType>> {
 public:
  void Compute(const ScaleParam<DeviceType>& param);
+  bool Init(ScaleParam<DeviceType>* param);
 };
 }  // namespace operators
 }  // namespace paddle_mobile
+
+#endif
--- a/src/operators/scale_op.cpp
+++ b/src/operators/scale_op.cpp
@@ -32,5 +32,7 @@ namespace ops = paddle_mobile::operators;
 #ifdef PADDLE_MOBILE_CPU
 REGISTER_OPERATOR_CPU(scale, ops::ScaleOp);
 #endif
-
+#ifdef PADDLE_MOBILE_CL
+REGISTER_OPERATOR_CL(scale, ops::ScaleOp);
+#endif
 #endif
--- a/test/test_helper.h
+++ b/test/test_helper.h
@@ -43,6 +43,7 @@ static const char *g_inceptionv3 =
    "../models/InceptionV3_Spatial_Attention_Model";
 static const char *g_nlp = "../models/nlp";
 static const char *g_super = "../models/superresoltion";
+static const char *g_superv2 = "../models/superv2";
 static const char *g_resnet_50 = "../models/resnet_50";
 static const char *g_resnet = "../models/resnet";
 static const char *g_googlenet_combine = "../models/googlenet_combine";