Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
fbce6342
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
332
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
fbce6342
编写于
2月 28, 2020
作者:
X
xiebaiyuan
提交者:
GitHub
2月 28, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[LITE][OPENCL][Image] fix global avg pool issue ,test=develop (#3028)
上级
f6c4370d
变更
2
显示空白变更内容
内联
并排
Showing
2 changed file
with
162 addition
and
27 deletion
+162
-27
lite/backends/opencl/cl_kernel/image/pool_kernel.cl
lite/backends/opencl/cl_kernel/image/pool_kernel.cl
+138
-26
lite/kernels/opencl/pool_image_compute.cc
lite/kernels/opencl/pool_image_compute.cc
+24
-1
未找到文件。
lite/backends/opencl/cl_kernel/image/pool_kernel.cl
浏览文件 @
fbce6342
...
...
@@ -37,18 +37,19 @@ __kernel void pool_max(__read_only image2d_t input,
int
start_h
=
out_h
*
stride_h
-
pad_top
;
int
end_h
=
min
(
start_h
+
ksize_h,
in_height
)
;
start_h
=
max
(
start_h,0
)
;
start_h
=
max
(
start_h,
0
)
;
int
start_w
=
out_w
*
stride_w
-
pad_left
;
int
end_w
=
min
(
start_w
+
ksize_w,
in_width
)
;
start_w
=
max
(
start_w,0
)
;
start_w
=
max
(
start_w,
0
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
CL_DTYPE4
max_value
=
(
CL_DTYPE4
)(
MIN_VALUE
)
;
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
CL_DTYPE4
tmp
=
READ_IMG_TYPE
(
CL_DTYPE_CHAR,
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
CL_DTYPE4
tmp
=
READ_IMG_TYPE
(
CL_DTYPE_CHAR,
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
max_value
=
max
(
max_value,
tmp
)
;
}
}
...
...
@@ -90,10 +91,121 @@ __kernel void pool_avg(__read_only image2d_t input,
for
(
int
y
=
start_h
; y < end_h; ++y) {
for
(
int
x
=
start_w
; x < end_w; ++x) {
sum
+=
READ_IMG_TYPE
(
CL_DTYPE_CHAR,
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
sum
+=
READ_IMG_TYPE
(
CL_DTYPE_CHAR,
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
}
}
CL_DTYPE4
avg
=
sum
/
(
ksize_h
*
ksize_w
)
;
const
int
pos_out_x
=
mad24
(
out_c,
out_width,
out_w
)
;
WRITE_IMG_TYPE
(
CL_DTYPE_CHAR,
output,
(
int2
)(
pos_out_x,
out_nh
)
,
avg
)
;
}
__kernel
void
pool_avg_global
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
pad_top,
__private
const
int
pad_left
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
; // =1
const
int
out_nh
=
get_global_id
(
2
)
; // = n*1
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
//
do
not
use
dtype4
here
//
skip
issue
for
half
2048
float4
sum
=
(
float4
)(
0.0f
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
for
(
int
y
=
0
; y < in_height; ++y) {
for
(
int
x
=
0
; x < in_width; ++x) {
half4
tmp
=
READ_IMG_TYPE
(
CL_DTYPE_CHAR,
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
))
;
sum.x
=
convert_float
(
tmp.x
)
+
sum.x
;
sum.y
=
convert_float
(
tmp.y
)
+
sum.y
;
sum.z
=
convert_float
(
tmp.z
)
+
sum.z
;
sum.w
=
convert_float
(
tmp.w
)
+
sum.w
;
}
}
const
float
global_size_div
=
1.0f
/
(
in_height
*
in_width
)
;
half4
avg
;
avg.x
=
convert_half
((
sum.x
*
global_size_div
))
;
avg.y
=
convert_half
((
sum.y
*
global_size_div
))
;
avg.z
=
convert_half
((
sum.z
*
global_size_div
))
;
avg.w
=
convert_half
((
sum.w
*
global_size_div
))
;
#
ifdef
DEBUG
if
(
out_c
==
0
)
{
printf
(
"\033[31msum.x= %f \033 \n "
,
sum.x
)
;
printf
(
"sum.y=%f \n "
,
sum.y
)
;
printf
(
"sum.z=%f \n "
,
sum.z
)
;
printf
(
"sum.w=%f \n "
,
sum.w
)
;
printf
(
"one4.x=%f \n "
,
convert_float
(
one4.x
))
;
printf
(
"in_height=%d \n "
,
in_height
)
;
printf
(
"in_width=%d \n "
,
in_width
)
;
printf
(
"ksize_h=%d \n "
,
ksize_h
)
;
printf
(
"ksize_w=%d \n "
,
ksize_w
)
;
printf
(
"stride_h=%d \n "
,
stride_h
)
;
printf
(
"stride_w=%d \n "
,
stride_w
)
;
printf
(
"pad_top=%d \n "
,
pad_top
)
;
printf
(
"pad_left=%d \n "
,
pad_left
)
;
printf
(
"out_width=%d \n "
,
out_width
)
;
printf
(
"out_height=%d \n "
,
out_height
)
;
printf
(
"i++=%d \n "
,
i++
)
;
printf
(
"avg.x=%f \n "
,
convert_float
(
avg.x
))
;
printf
(
"avg.y=%f \n "
,
convert_float
(
avg.y
))
;
printf
(
"avg.z=%f \n "
,
convert_float
(
avg.z
))
;
printf
(
"avg.w=%f \n "
,
convert_float
(
avg.w
))
;
}
#
endif
WRITE_IMG_TYPE
(
CL_DTYPE_CHAR,
output,
(
int2
)(
out_c,
out_nh
)
,
avg
)
;
}
__kernel
void
pool_max_global
(
__read_only
image2d_t
input,
__write_only
image2d_t
output,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
out_height,
__private
const
int
out_width,
__private
const
int
ksize_h,
__private
const
int
ksize_w,
__private
const
int
stride_h,
__private
const
int
stride_w,
__private
const
int
pad_top,
__private
const
int
pad_left
)
{
const
int
out_c
=
get_global_id
(
0
)
;
const
int
out_w
=
get_global_id
(
1
)
; // =1
const
int
out_nh
=
get_global_id
(
2
)
; // = n*1
const
int
out_n
=
out_nh
/
out_height
;
const
int
out_h
=
out_nh
%
out_height
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_TRUE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
CL_DTYPE4
max_value
=
(
CL_DTYPE4
)(
MIN_VALUE
)
;
const
int
pos_in_x
=
out_c
*
in_width
;
const
int
pos_in_y
=
out_n
*
in_height
;
for
(
int
y
=
0
; y < in_height; ++y) {
for
(
int
x
=
0
; x < in_width; ++x) {
max_value
=
max
(
max_value,
READ_IMG_TYPE
(
CL_DTYPE_CHAR,
input,
sampler,
(
int2
)(
pos_in_x
+
x,
pos_in_y
+
y
)))
;
}
}
WRITE_IMG_TYPE
(
CL_DTYPE_CHAR,
output,
(
int2
)(
out_c,
out_nh
)
,
max_value
)
;
}
\ No newline at end of file
lite/kernels/opencl/pool_image_compute.cc
浏览文件 @
fbce6342
...
...
@@ -13,6 +13,7 @@
// limitations under the License.
#include <vector>
#include "lite/backends/opencl/cl_half.h"
#include "lite/backends/opencl/cl_include.h"
#include "lite/core/kernel.h"
...
...
@@ -37,7 +38,12 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
void
PrepareForRun
()
override
{
const
auto
&
param
=
*
param_
.
get_mutable
<
param_t
>
();
kernel_func_name_
+=
param
.
pooling_type
;
const
bool
global_pooling
=
param
.
global_pooling
;
if
(
global_pooling
)
{
kernel_func_name_
+=
"_global"
;
}
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
context
.
cl_context
()
->
AddKernel
(
kernel_func_name_
,
"image/pool_kernel.cl"
,
build_options_
);
...
...
@@ -52,6 +58,10 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
std
::
vector
<
int
>
paddings
=
*
param
.
paddings
;
std
::
vector
<
int
>
strides
=
param
.
strides
;
std
::
vector
<
int
>
ksize
=
param
.
ksize
;
VLOG
(
4
)
<<
"global_pooling: "
<<
global_pooling
;
VLOG
(
4
)
<<
"pooling_type: "
<<
pooling_type
;
VLOG
(
4
)
<<
"paddings : "
<<
paddings
[
0
]
<<
" "
<<
paddings
[
1
]
<<
" "
<<
paddings
[
2
]
<<
" "
<<
paddings
[
3
]
<<
" "
;
if
(
global_pooling
)
{
for
(
size_t
i
=
0
;
i
<
ksize
.
size
();
++
i
)
{
paddings
[
2
*
i
]
=
0
;
...
...
@@ -59,6 +69,18 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
ksize
[
i
]
=
static_cast
<
int
>
(
in_dims
[
i
+
2
]);
}
}
VLOG
(
4
)
<<
"in_dims : ["
<<
in_dims
.
size
()
<<
"]"
<<
in_dims
[
0
]
<<
" "
<<
in_dims
[
1
]
<<
" "
<<
in_dims
[
2
]
<<
" "
<<
in_dims
[
3
];
VLOG
(
4
)
<<
"out_dims : ["
<<
out_dims
.
size
()
<<
"]"
<<
out_dims
[
0
]
<<
" "
<<
out_dims
[
1
]
<<
" "
<<
out_dims
[
2
]
<<
" "
<<
out_dims
[
3
];
VLOG
(
4
)
<<
"paddings fixed : "
<<
paddings
[
0
]
<<
" "
<<
paddings
[
1
]
<<
" "
<<
paddings
[
2
]
<<
" "
<<
paddings
[
3
]
<<
" "
;
VLOG
(
4
)
<<
"strides : ["
<<
strides
.
size
()
<<
"]"
<<
strides
[
0
]
<<
" "
<<
strides
[
1
];
VLOG
(
4
)
<<
"ksize : ["
<<
ksize
.
size
()
<<
"]"
<<
ksize
[
0
]
<<
" "
<<
ksize
[
1
]
<<
" "
<<
ksize
[
2
]
<<
" "
<<
ksize
[
3
];
VLOG
(
4
)
<<
"paddings : ["
<<
paddings
.
size
()
<<
"]"
<<
paddings
[
0
]
<<
" "
<<
paddings
[
1
]
<<
" "
<<
paddings
[
2
]
<<
" "
<<
paddings
[
3
];
bool
pads_equal
=
(
paddings
[
0
]
==
paddings
[
1
])
&&
(
paddings
[
2
]
==
paddings
[
3
]);
if
(
!
pads_equal
)
{
...
...
@@ -86,7 +108,8 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
int
w
=
out_dims
[
3
];
int
nh
=
out_dims
[
0
]
*
out_dims
[
2
];
auto
global_work_size
=
cl
::
NDRange
(
c_block
,
w
,
nh
);
VLOG
(
4
)
<<
"global_work_size : ["
<<
3
<<
"]"
<<
c_block
<<
" "
<<
w
<<
" "
<<
nh
<<
" "
;
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
*
x_img
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录