提交 53a3f12d 编写于 作者: Z Zhen Wang 提交者: ZhenWang

update some cmake config.

上级 40452e72
......@@ -150,7 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF)
option(LITE_WITH_CUDA "Enable CUDA in lite mode" OFF)
option(LITE_WITH_X86 "Enable X86 in lite mode" ON)
option(LITE_WITH_ARM "Enable ARM in lite mode" OFF)
option(LITE_WITH_CL "Enable OpenCL support in lite" OFF)
option(LITE_WITH_OPENCL "Enable OpenCL support in lite" OFF)
option(LITE_WITH_LIGHT_WEIGHT_FRAMEWORK "Enable light-weight framework" OFF)
option(LITE_WITH_PROFILE "Enable profile mode in lite framework" OFF)
......@@ -167,12 +167,6 @@ endif()
include_directories("${PADDLE_SOURCE_DIR}")
# for opencl
if (LITE_WITH_CL)
include(external/opencl-headers)
include(external/opencl-clhpp)
endif()
# for mobile
if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
message(STATUS "Building the mobile framework")
......@@ -188,6 +182,12 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
include(external/eigen) # download eigen3
include(ccache) # set ccache for compilation
# for opencl
if (LITE_WITH_OPENCL)
include(external/opencl-headers)
include(external/opencl-clhpp)
endif()
include(generic) # simplify cmake module
include(configure) # add paddle env configuration
......
......@@ -176,8 +176,8 @@ if (LITE_WITH_ARM)
add_definitions("-DLITE_WITH_ARM")
endif()
if (LITE_WITH_CL)
add_definitions("-DLITE_WITH_CL")
if (LITE_WITH_OPENCL)
add_definitions("-DLITE_WITH_OPENCL")
endif()
if (LITE_WITH_PROFILE)
......
......@@ -29,6 +29,8 @@ ExternalProject_Add(
-DBUILD_EXAMPLES=OFF
-DBUILD_TESTS=OFF
-DCMAKE_INSTALL_PREFIX=${OPENCL_CLHPP_INSTALL_DIR}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=${OPENCL_CLHPP_INSTALL_DIR}
-DCMAKE_BUILD_TYPE:STRING=${THIRD_PARTY_BUILD_TYPE}
)
ADD_DEPENDENCIES(opencl_clhpp opencl_headers)
if (NOT LITE_WITH_CL)
if (NOT LITE_WITH_OPENCL)
return()
endif()
......@@ -6,13 +6,13 @@ find_library(opencl-lib
NAMES OpenCL)
message(STATUS "The OpenCL library path : ${opencl-lib}")
add_compile_options(-fno-strict-aliasing -Wno-ignored-qualifiers)
cc_library(cl_tool SRCS cl_tool.cc)
target_compile_options(cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers)
cc_library(cl_half SRCS cl_half.cc)
target_compile_options(cl_half BEFORE PUBLIC -fno-strict-aliasing)
cc_library(cl_engine SRCS cl_engine.cc DEPS cl_tool)
cc_library(cl_context SRCS cl_context.cc DEPS cl_engine)
cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context)
cc_library(cl_helper SRCS cl_helper.cc DEPS cl_context proto_desc)
cc_library(cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor)
cc_library(cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine)
cc_test(test_cl_runtime SRCS cl_test.cc DEPS cl_engine cl_context)
......
......@@ -152,13 +152,12 @@ bool CLEngine::InitializeDevice() {
return false;
}
auto ext_data = device_->getInfo<CL_DEVICE_EXTENSIONS>();
LOG(INFO) << "The extensions supported by this device: " << ext_data;
if (ext_data.find("cl_khr_fp16") != std::string::npos) {
LOG(INFO) << "The chosen device supports the half data type.";
} else {
LOG(INFO) << "The chosen device doesn't support the half data type!";
LOG(INFO) << "The extensions supported by this device: " << ext_data;
// LOG(ERROR) << "The chosen platform doesn't support the half data type!";
// return false;
LOG(ERROR) << "The chosen device doesn't support the half data type!";
return false;
}
auto max_units = device_->getInfo<CL_DEVICE_MAX_COMPUTE_UNITS>();
LOG(INFO) << "The chosen device has " << max_units << " compute units.";
......
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
__kernel void batchnorm(__private const int out_width,
__read_only image2d_t input,
__read_only image2d_t new_scale_image,
__read_only image2d_t new_bias_image,
__write_only image2d_t output) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
float4 new_scale = read_imagef(new_scale_image, sampler, (int2)(out_c, 0));
float4 new_bias = read_imagef(new_bias_image, sampler, (int2)(out_c, 0));
int pos_x = mad24(out_c, out_width, out_w);
float4 in = read_imagef(input, sampler, (int2)(pos_x, out_nh));
float4 out = mad(in, new_scale, new_bias);
write_imagef(output, (int2)(pos_x, out_nh), out);
}
......@@ -12,15 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
__kernel void elementwise_add(__read_only image2d_t input, __read_only image2d_t bias, __write_only image2d_t outputImage) {
int x = get_global_id(0);
int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int2 coords;
coords.x = x;
coords.y = y;
float4 in = read_imagef(input, sampler, coords);
float4 b = read_imagef(bias, sampler, coords);
float4 output = in + b;
write_imagef(outputImage, coords, output);
}
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
__kernel void elementwise_add(__global image2d_t input, __global image2d_t bias,__write_only image2d_t outputImage) {
int x = get_global_id(0);
int y = get_global_id(1);
const sampler_t sampler = CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int2 coords;
coords.x = x;
coords.y = y;
half4 in = read_imageh(input, sampler, coords);
half4 biase = read_imageh(bias, sampler, coords);
half4 output = in + biase;
write_imageh(outputImage,coords,output);
}
/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#define MIN_VALUE -FLT_MAX
__kernel void pool_max(
__private const int in_height, __private const int in_width,
__private const int out_height, __private const int out_width,
__private const int pad_top, __private const int pad_left,
__private const int stride_h, __private const int stride_w,
__private const int ksize_h, __private const int ksize_w,
__read_only image2d_t input, __write_only image2d_t output) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const int out_n = out_nh / out_height;
const int out_h = out_nh % out_height;
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int start_h = out_h * stride_h - pad_top;
int end_h = min(start_h + ksize_h, in_height);
start_h = max(start_h,0);
int start_w = out_w * stride_w - pad_left;
int end_w = min(start_w + ksize_w, in_width);
start_w = max(start_w,0);
const int pos_in_x = out_c * in_width;
const int pos_in_y = out_n * in_height;
half4 max_value = (half4)(MIN_VALUE);
for (int y = start_h; y < end_h; ++y) {
for (int x = start_w; x < end_w; ++x) {
half4 tmp = read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y));
max_value = max(max_value, tmp);
}
}
const int pos_out_x = mad24(out_c, out_width, out_w);
write_imageh(output, (int2)(pos_out_x, out_nh), max_value);
}
__kernel void pool_avg(
__private const int in_height, __private const int in_width,
__private const int out_height, __private const int out_width,
__private const int pad_top, __private const int pad_left,
__private const int stride_h, __private const int stride_w,
__private const int ksize_h, __private const int ksize_w,
__read_only image2d_t input, __write_only image2d_t output) {
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const int out_n = out_nh / out_height;
const int out_h = out_nh % out_height;
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
int start_h = max(out_h * stride_h - pad_top, 0);
int end_h = min(start_h + ksize_h, in_height);
int start_w = max(out_w * stride_w - pad_left, 0);
int end_w = min(start_w + ksize_w, in_width);
const int pos_in_x = out_c * in_width;
const int pos_in_y = out_n * in_height;
half4 sum = (half4)(0.0f);
int num = 0;
for (int y = start_h; y < end_h; ++y) {
for (int x = start_w; x < end_w; ++x) {
sum += read_imageh(input, sampler, (int2)(pos_in_x + x, pos_in_y + y));
num++;
}
}
half4 avg = sum / num;
const int pos_out_x = mad24(out_c, out_width, out_w);
write_imageh(output, (int2)(pos_out_x, out_nh), avg);
}
......@@ -25,9 +25,18 @@ function cmake_x86 {
cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON ${common_flags}
}
function cmake_cl {
prepare_for_codegen
cmake .. -DWITH_GPU=OFF -DWITH_MKLDNN=OFF -DLITE_WITH_X86=ON -DLITE_WITH_CL=ON ${common_flags}
function cmake_opencl {
cmake .. \
-DLITE_WITH_OPENCL=ON \
-DWITH_GPU=OFF \
-DWITH_MKL=OFF \
-DWITH_LITE=ON \
-DLITE_WITH_CUDA=OFF \
-DLITE_WITH_X86=OFF \
-DLITE_WITH_ARM=ON \
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK=ON \
-DWITH_TESTING=ON \
-DARM_TARGET_OS=$1 -DARM_TARGET_ARCH_ABI=$2
}
......@@ -428,8 +437,8 @@ function main {
cmake_x86
shift
;;
cmake_cl)
cmake_cl
cmake_opencl)
cmake_opencl
shift
;;
cmake_cuda)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册