update some cmake config.

53a3f12d · Zhen Wang · ZhenWang · 40452e72 · 53a3f12d · 53a3f12d
9 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -150,7 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF)
 option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
 option(LITE_WITH_X86  "Enable X86 in lite mode"  ON)
 option(LITE_WITH_ARM  "Enable ARM in lite mode"  OFF)
-option(LITE_WITH_CL   "Enable OpenCL support in lite" OFF)
+option(LITE_WITH_OPENCL   "Enable OpenCL support in lite" OFF)
 option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK  "Enable light-weight framework" OFF)
 option(LITE_WITH_PROFILE  "Enable profile mode in lite framework"  OFF)
@@ -167,12 +167,6 @@ endif()
 include_directories("${PADDLE_SOURCE_DIR}")
-# for opencl
-if (LITE_WITH_CL)
-    include(external/opencl-headers)
-    include(external/opencl-clhpp)
-endif()
 # for mobile
 if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
    message(STATUS "Building the mobile framework")
@@ -188,6 +182,12 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
    include(external/eigen)     # download eigen3
    include(ccache)             # set ccache for compilation
+    # for opencl
+    if (LITE_WITH_OPENCL)
+        include(external/opencl-headers)
+        include(external/opencl-clhpp)
+    endif()
    include(generic)            # simplify cmake module
    include(configure)          # add paddle env configuration

--- a/cmake/configure.cmake
+++ b/cmake/configure.cmake
@@ -176,8 +176,8 @@ if (LITE_WITH_ARM)
    add_definitions("-DLITE_WITH_ARM")
 endif()
-if (LITE_WITH_CL)
+if (LITE_WITH_OPENCL)
-    add_definitions("-DLITE_WITH_CL")
+    add_definitions("-DLITE_WITH_OPENCL")
 endif()
 if (LITE_WITH_PROFILE)

--- a/cmake/external/opencl-clhpp.cmake
+++ b/cmake/external/opencl-clhpp.cmake
@@ -29,6 +29,8 @@ ExternalProject_Add(
                    -DBUILD_EXAMPLES=OFF
                    -DBUILD_TESTS=OFF
                    -DCMAKE_INSTALL_PREFIX=${OPENCL_CLHPP_INSTALL_DIR}
+  CMAKE_CACHE_ARGS  -DCMAKE_INSTALL_PREFIX:PATH=${OPENCL_CLHPP_INSTALL_DIR}
+                    -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
 )
 ADD_DEPENDENCIES(opencl_clhpp opencl_headers)
--- a/paddle/fluid/lite/opencl/CMakeLists.txt
+++ b/paddle/fluid/lite/opencl/CMakeLists.txt
-if (NOT LITE_WITH_CL)
+if (NOT LITE_WITH_OPENCL)
    return()
 endif()
@@ -6,13 +6,13 @@ find_library(opencl-lib
        NAMES OpenCL)
 message(STATUS "The OpenCL library path : ${opencl-lib}")
-add_compile_options(-fno-strict-aliasing -Wno-ignored-qualifiers)
 cc_library(cl_tool SRCS cl_tool.cc)
+target_compile_options(cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers)
 cc_library(cl_half SRCS cl_half.cc)
+target_compile_options(cl_half BEFORE PUBLIC -fno-strict-aliasing)
 cc_library(cl_engine SRCS cl_engine.cc DEPS cl_tool)
 cc_library(cl_context SRCS cl_context.cc DEPS cl_engine)
-cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context)
+cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context proto_desc)
 cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor)
 cc_library(cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine)
 cc_test(test_cl_runtime SRCS cl_test.cc DEPS cl_engine cl_context)

--- a/paddle/fluid/lite/opencl/cl_engine.cc
+++ b/paddle/fluid/lite/opencl/cl_engine.cc
@@ -152,13 +152,12 @@ bool CLEngine::InitializeDevice() {
    return false;
  }
  auto ext_data = device_->getInfo<CL_DEVICE_EXTENSIONS>();
+  LOG(INFO) << "The extensions supported by this device: " << ext_data;
  if (ext_data.find("cl_khr_fp16") != std::string::npos) {
    LOG(INFO) << "The chosen device supports the half data type.";
  } else {
-    LOG(INFO) << "The chosen device doesn't support the half data type!";
+    LOG(ERROR) << "The chosen device doesn't support the half data type!";
-    LOG(INFO) << "The extensions supported by this device: " << ext_data;
+    return false;
-    // LOG(ERROR) << "The chosen platform doesn't support the half data type!";
-    // return false;
  }
  auto max_units = device_->getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
  LOG(INFO) << "The chosen device has " << max_units << " compute units.";

--- a/paddle/fluid/lite/opencl/cl_kernel/batchnorm_kernel.cl
+++ b/paddle/fluid/lite/opencl/cl_kernel/batchnorm_kernel.cl
-/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-    http://www.apache.org/licenses/LICENSE-2.0
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-__kernel void batchnorm(__private const int out_width,
-                        __read_only image2d_t input,
-                        __read_only image2d_t new_scale_image,
-                        __read_only image2d_t new_bias_image,
-                        __write_only image2d_t output) {
-  const int out_c = get_global_id(0);
-  const int out_w = get_global_id(1);
-  const int out_nh = get_global_id(2);
-  const sampler_t sampler =
-      CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
-  float4 new_scale = read_imagef(new_scale_image, sampler, (int2)(out_c, 0));
-  float4 new_bias = read_imagef(new_bias_image, sampler, (int2)(out_c, 0));
-  int pos_x = mad24(out_c, out_width, out_w);
-  float4 in = read_imagef(input, sampler, (int2)(pos_x, out_nh));
-  float4 out = mad(in, new_scale, new_bias);
-  write_imagef(output, (int2)(pos_x, out_nh), out);
-}
--- a/paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
+++ b/paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
@@ -12,15 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */
-__kernel void elementwise_add(__read_only image2d_t input, __read_only image2d_t bias, __write_only image2d_t outputImage) {
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+__kernel void elementwise_add(__global image2d_t input, __global image2d_t bias,__write_only image2d_t outputImage) {
     int x = get_global_id(0);
     int y = get_global_id(1);
     const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
     int2 coords;
     coords.x = x;
     coords.y = y;
-    float4 in = read_imagef(input, sampler, coords);
+     half4 in = read_imageh(input, sampler, coords);
-    float4 b = read_imagef(bias, sampler, coords);
+     half4 biase = read_imageh(bias, sampler, coords);
-    float4 output = in + b;
+     half4 output = in + biase;
-    write_imagef(outputImage, coords, output);
+     write_imageh(outputImage,coords,output);
-}
+ }
--- a/paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
+++ b/paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
+/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+    http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma OPENCL EXTENSION cl_khr_fp16 : enable
+#define MIN_VALUE -FLT_MAX
+__kernel void pool_max(
+    __private const int in_height, __private const int in_width,
+    __private const int out_height, __private const int out_width,
+    __private const int pad_top, __private const int pad_left,
+    __private const int stride_h, __private const int stride_w,
+    __private const int ksize_h, __private const int ksize_w,
+    __read_only image2d_t input, __write_only image2d_t output) {
+  const int out_c = get_global_id(0);
+  const int out_w = get_global_id(1);
+  const int out_nh = get_global_id(2);
+  const int out_n = out_nh / out_height;
+  const int out_h = out_nh % out_height;
+  const sampler_t sampler =
+      CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
+  int start_h = out_h * stride_h - pad_top;
+  int end_h = min(start_h + ksize_h, in_height);
+  start_h = max(start_h,0);
+  int start_w = out_w * stride_w - pad_left;
+  int end_w = min(start_w + ksize_w, in_width);
+  start_w = max(start_w,0);
+  const int pos_in_x = out_c * in_width;
+  const int pos_in_y = out_n * in_height;
+  half4 max_value = (half4)(MIN_VALUE);
+  for (int y = start_h; y < end_h; ++y) {
+    for (int x = start_w; x < end_w; ++x) {
+      half4 tmp = read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y));
+      max_value = max(max_value, tmp);
+    }
+  }
+  const int pos_out_x = mad24(out_c, out_width, out_w);
+  write_imageh(output, (int2)(pos_out_x, out_nh), max_value);
+}
+__kernel void pool_avg(
+    __private const int in_height, __private const int in_width,
+    __private const int out_height, __private const int out_width,
+    __private const int pad_top, __private const int pad_left,
+    __private const int stride_h, __private const int stride_w,
+    __private const int ksize_h, __private const int ksize_w,
+    __read_only image2d_t input, __write_only image2d_t output) {
+  const int out_c = get_global_id(0);
+  const int out_w = get_global_id(1);
+  const int out_nh = get_global_id(2);
+  const int out_n = out_nh / out_height;
+  const int out_h = out_nh % out_height;
+  const sampler_t sampler =
+      CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
+  int start_h = max(out_h * stride_h - pad_top, 0);
+  int end_h = min(start_h + ksize_h, in_height);
+  int start_w = max(out_w * stride_w - pad_left, 0);
+  int end_w = min(start_w + ksize_w, in_width);
+  const int pos_in_x = out_c * in_width;
+  const int pos_in_y = out_n * in_height;
+  half4 sum = (half4)(0.0f);
+  int num = 0;
+  for (int y = start_h; y < end_h; ++y) {
+    for (int x = start_w; x < end_w; ++x) {
+      sum += read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y));
+      num++;
+    }
+  }
+  half4 avg = sum / num;
+  const int pos_out_x = mad24(out_c, out_width, out_w);
+  write_imageh(output, (int2)(pos_out_x, out_nh), avg);
+}
--- a/paddle/fluid/lite/tools/build.sh
+++ b/paddle/fluid/lite/tools/build.sh
@@ -25,9 +25,18 @@ function cmake_x86 {
    cmake ..  -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags}
 }
-function cmake_cl {
+function cmake_opencl {
-    prepare_for_codegen
+    cmake .. \
-    cmake ..  -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON -DLITE_WITH_CL=ON ${common_flags}
+        -DLITE_WITH_OPENCL=ON \
+        -DWITH_GPU=OFF \
+        -DWITH_MKL=OFF \
+        -DWITH_LITE=ON \
+        -DLITE_WITH_CUDA=OFF \
+        -DLITE_WITH_X86=OFF \
+        -DLITE_WITH_ARM=ON \
+        -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
+        -DWITH_TESTING=ON \
+        -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2
 }
@@ -428,8 +437,8 @@ function main {
                cmake_x86
                shift
                ;;
-            cmake_cl)
+            cmake_opencl)
-                cmake_cl
+                cmake_opencl
                shift
                ;;
            cmake_cuda)