diff --git a/CMakeLists.txt b/CMakeLists.txt index 09e8d542a04ca528f278e4d490d85abc4850daee..59f565014b59f1393243a892f81f2069edd6eb9e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,7 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF) option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF) option(LITE_WITH_X86 "Enable X86 in lite mode" ON) option(LITE_WITH_ARM "Enable ARM in lite mode" OFF) -option(LITE_WITH_CL "Enable OpenCL support in lite" OFF) +option(LITE_WITH_OPENCL "Enable OpenCL support in lite" OFF) option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF) option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF) @@ -167,12 +167,6 @@ endif() include_directories("${PADDLE_SOURCE_DIR}") -# for opencl -if (LITE_WITH_CL) - include(external/opencl-headers) - include(external/opencl-clhpp) -endif() - # for mobile if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) message(STATUS "Building the mobile framework") @@ -188,6 +182,12 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK) include(external/eigen) # download eigen3 include(ccache) # set ccache for compilation + # for opencl + if (LITE_WITH_OPENCL) + include(external/opencl-headers) + include(external/opencl-clhpp) + endif() + include(generic) # simplify cmake module include(configure) # add paddle env configuration diff --git a/cmake/configure.cmake b/cmake/configure.cmake index 707d859f9fccb886e31e92beaca19ced64da6e4e..95ae0be6384855256644eacb09369a004f999c51 100644 --- a/cmake/configure.cmake +++ b/cmake/configure.cmake @@ -176,8 +176,8 @@ if (LITE_WITH_ARM) add_definitions("-DLITE_WITH_ARM") endif() -if (LITE_WITH_CL) - add_definitions("-DLITE_WITH_CL") +if (LITE_WITH_OPENCL) + add_definitions("-DLITE_WITH_OPENCL") endif() if (LITE_WITH_PROFILE) diff --git a/cmake/external/opencl-clhpp.cmake b/cmake/external/opencl-clhpp.cmake index 0d10e09da4f5039155ff3ba70fb44401d2a6755e..ea724860d9b40ab5669975cebc6d5e1d7b662fb4 100644 --- a/cmake/external/opencl-clhpp.cmake +++ b/cmake/external/opencl-clhpp.cmake @@ -29,6 +29,8 @@ ExternalProject_Add( -DBUILD_EXAMPLES=OFF -DBUILD_TESTS=OFF -DCMAKE_INSTALL_PREFIX=${OPENCL_CLHPP_INSTALL_DIR} + CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${OPENCL_CLHPP_INSTALL_DIR} + -DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE} ) ADD_DEPENDENCIES(opencl_clhpp opencl_headers) diff --git a/paddle/fluid/lite/opencl/CMakeLists.txt b/paddle/fluid/lite/opencl/CMakeLists.txt index 740d51128d52fdecab49ab561159950531d9f0ea..309f2a305888f594cd6b95bd3eaafda6e1bc1a78 100644 --- a/paddle/fluid/lite/opencl/CMakeLists.txt +++ b/paddle/fluid/lite/opencl/CMakeLists.txt @@ -1,4 +1,4 @@ -if (NOT LITE_WITH_CL) +if (NOT LITE_WITH_OPENCL) return() endif() @@ -6,13 +6,13 @@ find_library(opencl-lib NAMES OpenCL) message(STATUS "The OpenCL library path : ${opencl-lib}") -add_compile_options(-fno-strict-aliasing -Wno-ignored-qualifiers) - cc_library(cl_tool SRCS cl_tool.cc) +target_compile_options(cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers) cc_library(cl_half SRCS cl_half.cc) +target_compile_options(cl_half BEFORE PUBLIC -fno-strict-aliasing) cc_library(cl_engine SRCS cl_engine.cc DEPS cl_tool) cc_library(cl_context SRCS cl_context.cc DEPS cl_engine) -cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context) +cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context proto_desc) cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor) cc_library(cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine) cc_test(test_cl_runtime SRCS cl_test.cc DEPS cl_engine cl_context) diff --git a/paddle/fluid/lite/opencl/cl_engine.cc b/paddle/fluid/lite/opencl/cl_engine.cc index 6a2cf674f9e5e70dac309d8e52caf436cf93bcff..97301ecba281077a23653e8ceac1d6e6e3907aa3 100644 --- a/paddle/fluid/lite/opencl/cl_engine.cc +++ b/paddle/fluid/lite/opencl/cl_engine.cc @@ -152,13 +152,12 @@ bool CLEngine::InitializeDevice() { return false; } auto ext_data = device_->getInfo(); + LOG(INFO) << "The extensions supported by this device: " << ext_data; if (ext_data.find("cl_khr_fp16") != std::string::npos) { LOG(INFO) << "The chosen device supports the half data type."; } else { - LOG(INFO) << "The chosen device doesn't support the half data type!"; - LOG(INFO) << "The extensions supported by this device: " << ext_data; - // LOG(ERROR) << "The chosen platform doesn't support the half data type!"; - // return false; + LOG(ERROR) << "The chosen device doesn't support the half data type!"; + return false; } auto max_units = device_->getInfo(); LOG(INFO) << "The chosen device has " << max_units << " compute units."; diff --git a/paddle/fluid/lite/opencl/cl_kernel/batchnorm_kernel.cl b/paddle/fluid/lite/opencl/cl_kernel/batchnorm_kernel.cl deleted file mode 100644 index 5453e9c451bb36b0aad5d52f93849d10b4273003..0000000000000000000000000000000000000000 --- a/paddle/fluid/lite/opencl/cl_kernel/batchnorm_kernel.cl +++ /dev/null @@ -1,35 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -__kernel void batchnorm(__private const int out_width, - __read_only image2d_t input, - __read_only image2d_t new_scale_image, - __read_only image2d_t new_bias_image, - __write_only image2d_t output) { - const int out_c = get_global_id(0); - const int out_w = get_global_id(1); - const int out_nh = get_global_id(2); - - const sampler_t sampler = - CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; - - float4 new_scale = read_imagef(new_scale_image, sampler, (int2)(out_c, 0)); - float4 new_bias = read_imagef(new_bias_image, sampler, (int2)(out_c, 0)); - - int pos_x = mad24(out_c, out_width, out_w); - float4 in = read_imagef(input, sampler, (int2)(pos_x, out_nh)); - float4 out = mad(in, new_scale, new_bias); - - write_imagef(output, (int2)(pos_x, out_nh), out); -} diff --git a/paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl b/paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl index 6a8d07cd39cb7caa0d3dc93b1a9c62b6b479ef73..f304764868959ce028a8448c4d311db878cc1f6e 100644 --- a/paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl +++ b/paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl @@ -12,15 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -__kernel void elementwise_add(__read_only image2d_t input, __read_only image2d_t bias, __write_only image2d_t outputImage) { - int x = get_global_id(0); - int y = get_global_id(1); - const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; - int2 coords; - coords.x = x; - coords.y = y; - float4 in = read_imagef(input, sampler, coords); - float4 b = read_imagef(bias, sampler, coords); - float4 output = in + b; - write_imagef(outputImage, coords, output); -} +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +__kernel void elementwise_add(__global image2d_t input, __global image2d_t bias,__write_only image2d_t outputImage) { + int x = get_global_id(0); + int y = get_global_id(1); + const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + int2 coords; + coords.x = x; + coords.y = y; + half4 in = read_imageh(input, sampler, coords); + half4 biase = read_imageh(bias, sampler, coords); + half4 output = in + biase; + write_imageh(outputImage,coords,output); + } diff --git a/paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl b/paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl new file mode 100644 index 0000000000000000000000000000000000000000..a6a4da690fa921d281786fcddebf7362d3c52119 --- /dev/null +++ b/paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl @@ -0,0 +1,91 @@ +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma OPENCL EXTENSION cl_khr_fp16 : enable +#define MIN_VALUE -FLT_MAX + +__kernel void pool_max( + __private const int in_height, __private const int in_width, + __private const int out_height, __private const int out_width, + __private const int pad_top, __private const int pad_left, + __private const int stride_h, __private const int stride_w, + __private const int ksize_h, __private const int ksize_w, + __read_only image2d_t input, __write_only image2d_t output) { + const int out_c = get_global_id(0); + const int out_w = get_global_id(1); + const int out_nh = get_global_id(2); + const int out_n = out_nh / out_height; + const int out_h = out_nh % out_height; + + const sampler_t sampler = + CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + + int start_h = out_h * stride_h - pad_top; + int end_h = min(start_h + ksize_h, in_height); + start_h = max(start_h,0); + + int start_w = out_w * stride_w - pad_left; + int end_w = min(start_w + ksize_w, in_width); + start_w = max(start_w,0); + + const int pos_in_x = out_c * in_width; + const int pos_in_y = out_n * in_height; + half4 max_value = (half4)(MIN_VALUE); + for (int y = start_h; y < end_h; ++y) { + for (int x = start_w; x < end_w; ++x) { + half4 tmp = read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y)); + max_value = max(max_value, tmp); + } + } + + const int pos_out_x = mad24(out_c, out_width, out_w); + write_imageh(output, (int2)(pos_out_x, out_nh), max_value); +} + +__kernel void pool_avg( + __private const int in_height, __private const int in_width, + __private const int out_height, __private const int out_width, + __private const int pad_top, __private const int pad_left, + __private const int stride_h, __private const int stride_w, + __private const int ksize_h, __private const int ksize_w, + __read_only image2d_t input, __write_only image2d_t output) { + const int out_c = get_global_id(0); + const int out_w = get_global_id(1); + const int out_nh = get_global_id(2); + const int out_n = out_nh / out_height; + const int out_h = out_nh % out_height; + + const sampler_t sampler = + CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST; + + int start_h = max(out_h * stride_h - pad_top, 0); + int end_h = min(start_h + ksize_h, in_height); + + int start_w = max(out_w * stride_w - pad_left, 0); + int end_w = min(start_w + ksize_w, in_width); + + const int pos_in_x = out_c * in_width; + const int pos_in_y = out_n * in_height; + half4 sum = (half4)(0.0f); + int num = 0; + for (int y = start_h; y < end_h; ++y) { + for (int x = start_w; x < end_w; ++x) { + sum += read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y)); + num++; + } + } + half4 avg = sum / num; + const int pos_out_x = mad24(out_c, out_width, out_w); + write_imageh(output, (int2)(pos_out_x, out_nh), avg); +} diff --git a/paddle/fluid/lite/tools/build.sh b/paddle/fluid/lite/tools/build.sh index 94d964b0a15a4f891c6736f3d8348970d1fa2a0f..1c43cc661b2d1390becf5cae13044a185b2a7311 100755 --- a/paddle/fluid/lite/tools/build.sh +++ b/paddle/fluid/lite/tools/build.sh @@ -25,9 +25,18 @@ function cmake_x86 { cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags} } -function cmake_cl { - prepare_for_codegen - cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON -DLITE_WITH_CL=ON ${common_flags} +function cmake_opencl { + cmake .. \ + -DLITE_WITH_OPENCL=ON \ + -DWITH_GPU=OFF \ + -DWITH_MKL=OFF \ + -DWITH_LITE=ON \ + -DLITE_WITH_CUDA=OFF \ + -DLITE_WITH_X86=OFF \ + -DLITE_WITH_ARM=ON \ + -DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \ + -DWITH_TESTING=ON \ + -DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2 } @@ -428,8 +437,8 @@ function main { cmake_x86 shift ;; - cmake_cl) - cmake_cl + cmake_opencl) + cmake_opencl shift ;; cmake_cuda)