Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
53a3f12d
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
53a3f12d
编写于
6月 20, 2019
作者:
Z
Zhen Wang
提交者:
ZhenWang
6月 21, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update some cmake config.
上级
40452e72
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
136 addition
and
69 deletion
+136
-69
CMakeLists.txt
CMakeLists.txt
+7
-7
cmake/configure.cmake
cmake/configure.cmake
+2
-2
cmake/external/opencl-clhpp.cmake
cmake/external/opencl-clhpp.cmake
+2
-0
paddle/fluid/lite/opencl/CMakeLists.txt
paddle/fluid/lite/opencl/CMakeLists.txt
+4
-4
paddle/fluid/lite/opencl/cl_engine.cc
paddle/fluid/lite/opencl/cl_engine.cc
+3
-4
paddle/fluid/lite/opencl/cl_kernel/batchnorm_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/batchnorm_kernel.cl
+0
-35
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
+13
-12
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
+91
-0
paddle/fluid/lite/tools/build.sh
paddle/fluid/lite/tools/build.sh
+14
-5
未找到文件。
CMakeLists.txt
浏览文件 @
53a3f12d
...
...
@@ -150,7 +150,7 @@ option(WITH_LITE "Enable lite framework" OFF)
option
(
LITE_WITH_CUDA
"Enable CUDA in lite mode"
OFF
)
option
(
LITE_WITH_X86
"Enable X86 in lite mode"
ON
)
option
(
LITE_WITH_ARM
"Enable ARM in lite mode"
OFF
)
option
(
LITE_WITH_CL
"Enable OpenCL support in lite"
OFF
)
option
(
LITE_WITH_
OPEN
CL
"Enable OpenCL support in lite"
OFF
)
option
(
LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
"Enable light-weight framework"
OFF
)
option
(
LITE_WITH_PROFILE
"Enable profile mode in lite framework"
OFF
)
...
...
@@ -167,12 +167,6 @@ endif()
include_directories
(
"
${
PADDLE_SOURCE_DIR
}
"
)
# for opencl
if
(
LITE_WITH_CL
)
include
(
external/opencl-headers
)
include
(
external/opencl-clhpp
)
endif
()
# for mobile
if
(
WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK
)
message
(
STATUS
"Building the mobile framework"
)
...
...
@@ -188,6 +182,12 @@ if (WITH_LITE AND LITE_WITH_LIGHT_WEIGHT_FRAMEWORK)
include
(
external/eigen
)
# download eigen3
include
(
ccache
)
# set ccache for compilation
# for opencl
if
(
LITE_WITH_OPENCL
)
include
(
external/opencl-headers
)
include
(
external/opencl-clhpp
)
endif
()
include
(
generic
)
# simplify cmake module
include
(
configure
)
# add paddle env configuration
...
...
cmake/configure.cmake
浏览文件 @
53a3f12d
...
...
@@ -176,8 +176,8 @@ if (LITE_WITH_ARM)
add_definitions
(
"-DLITE_WITH_ARM"
)
endif
()
if
(
LITE_WITH_CL
)
add_definitions
(
"-DLITE_WITH_CL"
)
if
(
LITE_WITH_
OPEN
CL
)
add_definitions
(
"-DLITE_WITH_
OPEN
CL"
)
endif
()
if
(
LITE_WITH_PROFILE
)
...
...
cmake/external/opencl-clhpp.cmake
浏览文件 @
53a3f12d
...
...
@@ -29,6 +29,8 @@ ExternalProject_Add(
-DBUILD_EXAMPLES=OFF
-DBUILD_TESTS=OFF
-DCMAKE_INSTALL_PREFIX=
${
OPENCL_CLHPP_INSTALL_DIR
}
CMAKE_CACHE_ARGS -DCMAKE_INSTALL_PREFIX:PATH=
${
OPENCL_CLHPP_INSTALL_DIR
}
-DCMAKE_BUILD_TYPE:STRING=
${
THIRD_PARTY_BUILD_TYPE
}
)
ADD_DEPENDENCIES
(
opencl_clhpp opencl_headers
)
paddle/fluid/lite/opencl/CMakeLists.txt
浏览文件 @
53a3f12d
if
(
NOT LITE_WITH_CL
)
if
(
NOT LITE_WITH_
OPEN
CL
)
return
()
endif
()
...
...
@@ -6,13 +6,13 @@ find_library(opencl-lib
NAMES OpenCL
)
message
(
STATUS
"The OpenCL library path :
${
opencl-lib
}
"
)
add_compile_options
(
-fno-strict-aliasing -Wno-ignored-qualifiers
)
cc_library
(
cl_tool SRCS cl_tool.cc
)
target_compile_options
(
cl_tool BEFORE PUBLIC -Wno-ignored-qualifiers
)
cc_library
(
cl_half SRCS cl_half.cc
)
target_compile_options
(
cl_half BEFORE PUBLIC -fno-strict-aliasing
)
cc_library
(
cl_engine SRCS cl_engine.cc DEPS cl_tool
)
cc_library
(
cl_context SRCS cl_context.cc DEPS cl_engine
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
)
cc_library
(
cl_helper SRCS cl_helper.cc DEPS cl_context
proto_desc
)
cc_library
(
cl_image_converter SRCS cl_image_converter.cc DEPS cl_half lite_tensor
)
cc_library
(
cl_image SRCS cl_image.cc DEPS cl_half lite_tensor cl_image_converter cl_engine
)
cc_test
(
test_cl_runtime SRCS cl_test.cc DEPS cl_engine cl_context
)
...
...
paddle/fluid/lite/opencl/cl_engine.cc
浏览文件 @
53a3f12d
...
...
@@ -152,13 +152,12 @@ bool CLEngine::InitializeDevice() {
return
false
;
}
auto
ext_data
=
device_
->
getInfo
<
CL_DEVICE_EXTENSIONS
>
();
LOG
(
INFO
)
<<
"The extensions supported by this device: "
<<
ext_data
;
if
(
ext_data
.
find
(
"cl_khr_fp16"
)
!=
std
::
string
::
npos
)
{
LOG
(
INFO
)
<<
"The chosen device supports the half data type."
;
}
else
{
LOG
(
INFO
)
<<
"The chosen device doesn't support the half data type!"
;
LOG
(
INFO
)
<<
"The extensions supported by this device: "
<<
ext_data
;
// LOG(ERROR) << "The chosen platform doesn't support the half data type!";
// return false;
LOG
(
ERROR
)
<<
"The chosen device doesn't support the half data type!"
;
return
false
;
}
auto
max_units
=
device_
->
getInfo
<
CL_DEVICE_MAX_COMPUTE_UNITS
>
();
LOG
(
INFO
)
<<
"The chosen device has "
<<
max_units
<<
" compute units."
;
...
...
paddle/fluid/lite/opencl/cl_kernel/batchnorm_kernel.cl
已删除
100644 → 0
浏览文件 @
40452e72
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
__kernel
void
batchnorm
(
__private
const
int
out_width,
__read_only
image2d_t
input,
__read_only
image2d_t
new_scale_image,
__read_only
image2d_t
new_bias_image,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
float4
new_scale
=
read_imagef
(
new_scale_image,
sampler,
(
int2
)(
out_c,
0
))
;
float4
new_bias
=
read_imagef
(
new_bias_image,
sampler,
(
int2
)(
out_c,
0
))
;
int
pos_x
=
mad24
(
out_c,
out_width,
out_w
)
;
float4
in
=
read_imagef
(
input,
sampler,
(
int2
)(
pos_x,
out_nh
))
;
float4
out
=
mad
(
in,
new_scale,
new_bias
)
;
write_imagef
(
output,
(
int2
)(
pos_x,
out_nh
)
,
out
)
;
}
paddle/fluid/lite/opencl/cl_kernel/elementwise_add_kernel.cl
浏览文件 @
53a3f12d
...
...
@@ -12,15 +12,16 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
__kernel
void
elementwise_add
(
__read_only
image2d_t
input,
__read_only
image2d_t
bias,
__write_only
image2d_t
outputImage
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int2
coords
;
coords.x
=
x
;
coords.y
=
y
;
float4
in
=
read_imagef
(
input,
sampler,
coords
)
;
float4
b
=
read_imagef
(
bias,
sampler,
coords
)
;
float4
output
=
in
+
b
;
write_imagef
(
outputImage,
coords,
output
)
;
}
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
__kernel
void
elementwise_add
(
__global
image2d_t
input,
__global
image2d_t
bias,__write_only
image2d_t
outputImage
)
{
int
x
=
get_global_id
(
0
)
;
int
y
=
get_global_id
(
1
)
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int2
coords
;
coords.x
=
x
;
coords.y
=
y
;
half4
in
=
read_imageh
(
input,
sampler,
coords
)
;
half4
biase
=
read_imageh
(
bias,
sampler,
coords
)
;
half4
output
=
in
+
biase
;
write_imageh
(
outputImage,coords,output
)
;
}
paddle/fluid/lite/opencl/cl_kernel/pool_kernel.cl
0 → 100644
浏览文件 @
53a3f12d
/*
Copyright
(
c
)
2018
PaddlePaddle
Authors.
All
Rights
Reserved.
Licensed
under
the
Apache
License,
Version
2.0
(
the
"License"
)
;
you
may
not
use
this
file
except
in
compliance
with
the
License.
You
may
obtain
a
copy
of
the
License
at
http://www.apache.org/licenses/LICENSE-2.0
Unless
required
by
applicable
law
or
agreed
to
in
writing,
software
distributed
under
the
License
is
distributed
on
an
"AS IS"
BASIS,
WITHOUT
WARRANTIES
OR
CONDITIONS
OF
ANY
KIND,
either
express
or
implied.
See
the
License
for
the
specific
language
governing
permissions
and
limitations
under
the
License.
*/
#
pragma
OPENCL
EXTENSION
cl_khr_fp16
:
enable
#
define
MIN_VALUE
-FLT_MAX
__kernel
void
pool_max
(
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
pad_top,
__private
const
int
pad_left,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int
start_h
=
out_h
*
stride_h
-
pad_top
;
int
end_h
=
min
(
start_h
+
ksize_h,
in_height
)
;
start_h
=
max
(
start_h,0
)
;
int
start_w
=
out_w
*
stride_w
-
pad_left
;
int
end_w
=
min
(
start_w
+
ksize_w,
in_width
)
;
start_w
=
max
(
start_w,0
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
max_value
=
(
half4
)(
MIN_VALUE
)
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
half4
tmp
=
read_imageh
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
max_value
=
max
(
max_value,
tmp
)
;
}
}
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_imageh
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
max_value
)
;
}
__kernel
void
pool_avg
(
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
pad_top,
__private
const
int
pad_left,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__read_only
image2d_t
input,
__write_only
image2d_t
output
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
;
const
int
out_nh
=
get_global_id
(
2
)
;
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
int
start_h
=
max
(
out_h
*
stride_h
-
pad_top,
0
)
;
int
end_h
=
min
(
start_h
+
ksize_h,
in_height
)
;
int
start_w
=
max
(
out_w
*
stride_w
-
pad_left,
0
)
;
int
end_w
=
min
(
start_w
+
ksize_w,
in_width
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
half4
sum
=
(
half4
)(
0.0f
)
;
int
num
=
0
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
sum
+=
read_imageh
(
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
num++
;
}
}
half4
avg
=
sum
/
num
;
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
write_imageh
(
output,
(
int2
)(
pos_out_x,
out_nh
)
,
avg
)
;
}
paddle/fluid/lite/tools/build.sh
浏览文件 @
53a3f12d
...
...
@@ -25,9 +25,18 @@ function cmake_x86 {
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
${
common_flags
}
}
function
cmake_cl
{
prepare_for_codegen
cmake ..
-DWITH_GPU
=
OFF
-DWITH_MKLDNN
=
OFF
-DLITE_WITH_X86
=
ON
-DLITE_WITH_CL
=
ON
${
common_flags
}
function
cmake_opencl
{
cmake ..
\
-DLITE_WITH_OPENCL
=
ON
\
-DWITH_GPU
=
OFF
\
-DWITH_MKL
=
OFF
\
-DWITH_LITE
=
ON
\
-DLITE_WITH_CUDA
=
OFF
\
-DLITE_WITH_X86
=
OFF
\
-DLITE_WITH_ARM
=
ON
\
-DLITE_WITH_LIGHT_WEIGHT_FRAMEWORK
=
ON
\
-DWITH_TESTING
=
ON
\
-DARM_TARGET_OS
=
$1
-DARM_TARGET_ARCH_ABI
=
$2
}
...
...
@@ -428,8 +437,8 @@ function main {
cmake_x86
shift
;;
cmake_cl
)
cmake_cl
cmake_
open
cl
)
cmake_
open
cl
shift
;;
cmake_cuda
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录