Merge branch 'opencl' into opencl

7f4daf16 · Ray Liu · GitHub · 0d62d783 · 27747953 · 7f4daf16
4 changed file
--- a/src/framework/cl/cl_image.h
+++ b/src/framework/cl/cl_image.h
@@ -124,18 +124,7 @@ class CLImage {
    tensor_dims_ = dim;
    if (tensor_data) {
      tensor_data_ = tensor_data;
-    } else {
-      int numel = 1;
-      for (int i = 0; i < dim.size(); i++) {
-        numel *= dim[i];
-      }
-      tensor_data_ = static_cast<float *>(
-          paddle_mobile::memory::Alloc(sizeof(float) * numel));
-      for (int i = 0; i < numel; i++) {
-        tensor_data_[i] = 0;
-      }
    }
    size_t new_dims[] = {1, 1, 1, 1};
    for (int j = 0; j < dim.size(); ++j) {

--- a/src/operators/kernel/cl/cl_kernel/relu.cl
+++ b/src/operators/kernel/cl/cl_kernel/relu.cl
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
 #pragma OPENCL EXTENSION cl_khr_fp16 : enable
 __kernel void relu(__read_only image2d_t input,
@@ -12,6 +24,6 @@ __kernel void relu(__read_only image2d_t input,
                            CLK_FILTER_NEAREST;
  half4 in = read_imageh(input, sampler, (int2)(x, y));
-  in = max((half4)(0.0), in);
+  in = max((half4)(0.0f,0.0f,0.0f,0.0f), in);
  write_imageh(output, (int2)(x, y), in);
 }
\ No newline at end of file
--- a/src/operators/kernel/cl/cl_kernel/reshape.cl
+++ b/src/operators/kernel/cl/cl_kernel/reshape.cl
@@ -12,6 +12,8 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
 __kernel void reshape(__read_only image2d_t input,
                      __write_only image2d_t output,
                      __private const int d0,
@@ -36,14 +38,14 @@ __kernel void reshape(__read_only image2d_t input,
    int t = obx * 4 + i;
    if (t > x1) break;
    int oindex = oby * x1 * x2 * x3 + t * x2 * x3 + ox * x3 + oy;
-    int i0, i1, i2, i3;
    int i3 = oindex % d3; oindex /= d3;
    int i2 = oindex % d2; oindex /= d2;
    int i1 = oindex % d1; oindex /= d1;
    int i0 = oindex;
    int ix = (i1 / 4) * d3 + i3;
    int iy = i0 * d2 + i2;
-    r[i] = read_imageh(input, sampler, (int2)(ix, iy))[i1%4];
+    half4 p = read_imageh(input, sampler, (int2)(ix, iy));
+    ((half*)&r)[i] = ((half*)&p)[i1%4];
  }
  write_imageh(output, (int2)(x, y), r);
 }
\ No newline at end of file
--- a/src/operators/kernel/cl/feed_kernel.cpp
+++ b/src/operators/kernel/cl/feed_kernel.cpp
@@ -61,7 +61,7 @@ void FeedKernel<GPU_CL, float>::Compute(const FeedParam<GPU_CL> &param) {
  size_t region[3] = {height, width, 1};
  clEnqueueReadImage(commandQueue, cl_image, CL_TRUE, origin, region, 0, 0, out,
                     0, NULL, NULL);
-  //  for (int i = 0; i < numel; i++) DLOG << Half2Float(out[i]);
 }
 template class FeedKernel<GPU_CL, float>;