Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
9562d42a
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9562d42a
编写于
4月 12, 2020
作者:
xiebaiyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[LITE][OPENCL]use shared_ptr with cl::kernel , init cl::event when use ,test=develop
上级
c8918d89
变更
31
隐藏空白更改
内联
并排
Showing
31 changed file
with
551 addition
and
521 deletion
+551
-521
lite/backends/opencl/cl_context.cc
lite/backends/opencl/cl_context.cc
+3
-3
lite/backends/opencl/cl_context.h
lite/backends/opencl/cl_context.h
+2
-2
lite/kernels/opencl/activation_buffer_compute.cc
lite/kernels/opencl/activation_buffer_compute.cc
+8
-8
lite/kernels/opencl/activation_image_compute.cc
lite/kernels/opencl/activation_image_compute.cc
+13
-11
lite/kernels/opencl/bilinear_interp_image_compute.cc
lite/kernels/opencl/bilinear_interp_image_compute.cc
+10
-10
lite/kernels/opencl/box_coder_image_compute.cc
lite/kernels/opencl/box_coder_image_compute.cc
+7
-7
lite/kernels/opencl/concat_buffer_compute.cc
lite/kernels/opencl/concat_buffer_compute.cc
+20
-20
lite/kernels/opencl/concat_image_compute.cc
lite/kernels/opencl/concat_image_compute.cc
+18
-18
lite/kernels/opencl/conv_buffer_compute.cc
lite/kernels/opencl/conv_buffer_compute.cc
+9
-9
lite/kernels/opencl/conv_image_compute.cc
lite/kernels/opencl/conv_image_compute.cc
+204
-184
lite/kernels/opencl/conv_image_compute.h
lite/kernels/opencl/conv_image_compute.h
+0
-1
lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc
lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc
+17
-17
lite/kernels/opencl/dropout_image_compute.cc
lite/kernels/opencl/dropout_image_compute.cc
+5
-5
lite/kernels/opencl/elementwise_add_buffer_compute.cc
lite/kernels/opencl/elementwise_add_buffer_compute.cc
+7
-7
lite/kernels/opencl/elementwise_add_image_compute.cc
lite/kernels/opencl/elementwise_add_image_compute.cc
+15
-12
lite/kernels/opencl/elementwise_mul_compute.cc
lite/kernels/opencl/elementwise_mul_compute.cc
+17
-17
lite/kernels/opencl/elementwise_mul_image_compute.cc
lite/kernels/opencl/elementwise_mul_image_compute.cc
+20
-20
lite/kernels/opencl/elementwise_sub_image_compute.cc
lite/kernels/opencl/elementwise_sub_image_compute.cc
+8
-8
lite/kernels/opencl/fc_buffer_compute.cc
lite/kernels/opencl/fc_buffer_compute.cc
+16
-14
lite/kernels/opencl/grid_sampler_image_compute.cc
lite/kernels/opencl/grid_sampler_image_compute.cc
+12
-11
lite/kernels/opencl/instance_norm_image_compute.cc
lite/kernels/opencl/instance_norm_image_compute.cc
+17
-17
lite/kernels/opencl/layout_image_compute.cc
lite/kernels/opencl/layout_image_compute.cc
+27
-27
lite/kernels/opencl/lrn_image_compute.cc
lite/kernels/opencl/lrn_image_compute.cc
+9
-9
lite/kernels/opencl/mul_buffer_compute.cc
lite/kernels/opencl/mul_buffer_compute.cc
+7
-7
lite/kernels/opencl/nearest_interp_image_compute.cc
lite/kernels/opencl/nearest_interp_image_compute.cc
+9
-9
lite/kernels/opencl/pad2d_image_compute.cc
lite/kernels/opencl/pad2d_image_compute.cc
+12
-12
lite/kernels/opencl/pool_buffer_compute.cc
lite/kernels/opencl/pool_buffer_compute.cc
+15
-15
lite/kernels/opencl/pool_image_compute.cc
lite/kernels/opencl/pool_image_compute.cc
+13
-13
lite/kernels/opencl/reshape_image_compute.cc
lite/kernels/opencl/reshape_image_compute.cc
+14
-14
lite/kernels/opencl/scale_image_compute.cc
lite/kernels/opencl/scale_image_compute.cc
+11
-8
lite/kernels/opencl/slice_image_compute.cc
lite/kernels/opencl/slice_image_compute.cc
+6
-6
未找到文件。
lite/backends/opencl/cl_context.cc
浏览文件 @
9562d42a
...
...
@@ -68,16 +68,16 @@ void CLContext::AddKernel(const std::string &kernel_name,
kernel_offset_
[
kernel_key
.
str
()]
=
kernels_
.
size
()
-
1
;
}
cl
::
Kernel
&
CLContext
::
GetKernel
(
const
int
index
)
{
std
::
shared_ptr
<
cl
::
Kernel
>
&
CLContext
::
GetKernel
(
const
int
index
)
{
VLOG
(
3
)
<<
" --- kernel count: "
<<
kernels_
.
size
()
<<
" --- "
;
CHECK
(
static_cast
<
size_t
>
(
index
)
<
kernels_
.
size
())
<<
"The index must be less than the size of kernels."
;
CHECK
(
kernels_
[
index
]
!=
nullptr
)
<<
"The target kernel pointer cannot be null."
;
return
*
(
kernels_
[
index
])
;
return
kernels_
[
index
]
;
}
cl
::
Kernel
&
CLContext
::
GetKernel
(
const
std
::
string
&
name
)
{
std
::
shared_ptr
<
cl
::
Kernel
>
&
CLContext
::
GetKernel
(
const
std
::
string
&
name
)
{
auto
it
=
kernel_offset_
.
find
(
name
);
CHECK
(
it
!=
kernel_offset_
.
end
())
<<
"Cannot find the kernel function: "
<<
name
;
...
...
lite/backends/opencl/cl_context.h
浏览文件 @
9562d42a
...
...
@@ -54,9 +54,9 @@ class CLContext {
const
std
::
string
&
options
=
""
,
const
std
::
string
&
time_stamp
=
""
);
cl
::
Kernel
&
GetKernel
(
const
int
index
);
std
::
shared_ptr
<
cl
::
Kernel
>
&
GetKernel
(
const
int
index
);
cl
::
Kernel
&
GetKernel
(
const
std
::
string
&
name
);
std
::
shared_ptr
<
cl
::
Kernel
>
&
GetKernel
(
const
std
::
string
&
name
);
cl
::
NDRange
DefaultWorkSize
(
const
CLImage
&
image
);
...
...
lite/kernels/opencl/activation_buffer_compute.cc
浏览文件 @
9562d42a
...
...
@@ -54,16 +54,16 @@ class ReluCompute
VLOG
(
4
)
<<
TargetToStr
(
param
.
Out
->
target
());
int
arg_idx
=
0
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_buf
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
(
const
int
)
count
);
status
=
kernel
->
setArg
(
++
arg_idx
,
(
const
int
)
count
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_buf
);
CL_CHECK_FATAL
(
status
);
auto
global_work_size
=
cl
::
NDRange
{
count
};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
@@ -112,16 +112,16 @@ class SigmoidCompute
VLOG
(
4
)
<<
TargetToStr
(
param
.
Out
->
target
());
int
arg_idx
=
0
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_buf
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
(
const
int
)
count
);
status
=
kernel
->
setArg
(
++
arg_idx
,
(
const
int
)
count
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_buf
);
CL_CHECK_FATAL
(
status
);
auto
global_work_size
=
cl
::
NDRange
{
count
};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/activation_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -84,7 +84,7 @@ class ActivationComputeImageDefault
STL
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_name_
<<
build_options_
<<
time_stamp_
;
kernel_
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
}
void
ReInitWhenNeeded
()
override
{
...
...
@@ -117,16 +117,20 @@ class ActivationComputeImageDefault
auto
*
x_img
=
act_param_
->
X
->
data
<
half_t
,
cl
::
Image2D
>
();
auto
*
out_img
=
act_param_
->
Out
->
mutable_data
<
half_t
,
cl
::
Image2D
>
(
out_img_shape_
[
0
],
out_img_shape_
[
1
]);
auto
kernel
=
kernel_
;
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_name_
<<
build_options_
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
;
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
x_img
);
status
=
kernel
->
setArg
(
0
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
*
out_img
);
status
=
kernel
->
setArg
(
1
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
threshold_
);
status
=
kernel
->
setArg
(
2
,
threshold_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
3
,
scale_
);
status
=
kernel
->
setArg
(
3
,
scale_
);
CL_CHECK_FATAL
(
status
);
#ifndef LITE_SHUTDOWN_LOG
...
...
@@ -145,10 +149,8 @@ class ActivationComputeImageDefault
VLOG
(
4
)
<<
"kernel func name:"
<<
kernel_func_name_
;
#endif
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
cl
::
NullRange
,
...
...
@@ -168,7 +170,7 @@ class ActivationComputeImageDefault
std
::
string
kernel_func_name_
{};
float
threshold_
{
6.
f
};
float
scale_
{
1.
f
};
cl
::
Kernel
kernel
_
;
cl
::
Kernel
kernel
;
bool
first_epoch_for_reinit_
{
true
};
cl
::
NDRange
global_work_size_
=
cl
::
NDRange
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
1
)};
...
...
lite/kernels/opencl/bilinear_interp_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -118,23 +118,23 @@ class BilinearInterpImageCompute
VLOG
(
4
)
<<
"default_work_size: "
<<
default_work_size
[
0
]
<<
", "
<<
default_work_size
[
1
]
<<
", "
<<
default_work_size
[
2
];
#endif
cl_int
status
=
kernel
.
setArg
(
arg_idx
++
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
++
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
*
out_img
);
status
=
kernel
->
setArg
(
arg_idx
++
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
scale_h
);
status
=
kernel
->
setArg
(
arg_idx
++
,
scale_h
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
scale_w
);
status
=
kernel
->
setArg
(
arg_idx
++
,
scale_w
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
align_delta
);
status
=
kernel
->
setArg
(
arg_idx
++
,
align_delta
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
in_h
);
status
=
kernel
->
setArg
(
arg_idx
++
,
in_h
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
in_w
);
status
=
kernel
->
setArg
(
arg_idx
++
,
in_w
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
out_h
);
status
=
kernel
->
setArg
(
arg_idx
++
,
out_h
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
out_w
);
status
=
kernel
->
setArg
(
arg_idx
++
,
out_w
);
CL_CHECK_FATAL
(
status
);
auto
global_work_size
=
...
...
@@ -143,7 +143,7 @@ class BilinearInterpImageCompute
static_cast
<
cl
::
size_type
>
(
default_work_size
[
2
])};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/box_coder_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -104,24 +104,24 @@ class BoxCoderComputeImage : public KernelLite<TARGET(kOpenCL),
<<
default_work_size
[
1
]
<<
", "
<<
default_work_size
[
2
];
#endif
int
arg_idx
=
0
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
++
,
*
prior_box_image
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
++
,
*
prior_box_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
*
prior_box_var_image
);
status
=
kernel
->
setArg
(
arg_idx
++
,
*
prior_box_var_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
*
target_box_image
);
status
=
kernel
->
setArg
(
arg_idx
++
,
*
target_box_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
*
out_buf
);
status
=
kernel
->
setArg
(
arg_idx
++
,
*
out_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
out_C
);
status
=
kernel
->
setArg
(
arg_idx
++
,
out_C
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
out_H
);
status
=
kernel
->
setArg
(
arg_idx
++
,
out_H
);
CL_CHECK_FATAL
(
status
);
auto
global_work_size
=
cl
::
NDRange
{
static_cast
<
cl
::
size_type
>
(
default_work_size
[
0
]),
static_cast
<
cl
::
size_type
>
(
default_work_size
[
2
])};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/concat_buffer_compute.cc
浏览文件 @
9562d42a
...
...
@@ -103,28 +103,28 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL),
auto
axis0
=
inputs
[
0
]
->
dims
()[
axis_
];
int
total0
=
axis0
*
post_size_
;
int
total1
=
(
axis_size_
-
axis0
)
*
post_size_
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_buf0
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_buf0
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
x_buf1
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
x_buf1
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
int
>
(
axis0
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
int
>
(
axis0
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
axis_size_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
axis_size_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
pre_size_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
pre_size_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
post_size_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
post_size_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
total
);
status
=
kernel
->
setArg
(
++
arg_idx
,
total
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
total0
);
status
=
kernel
->
setArg
(
++
arg_idx
,
total0
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
total1
);
status
=
kernel
->
setArg
(
++
arg_idx
,
total1
);
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
@@ -140,24 +140,24 @@ class ConcatCompute : public KernelLite<TARGET(kOpenCL),
auto
*
x_buf
=
inputs
[
i
]
->
data
<
float
,
cl
::
Buffer
>
();
global_work_size
=
cl
::
NDRange
{
static_cast
<
size_t
>
(
size
)};
int
total0
=
size
*
post_size_
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_buf
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
int
>
(
size
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
int
>
(
size
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
pre_size_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
pre_size_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
post_size_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
post_size_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
start
);
status
=
kernel
->
setArg
(
++
arg_idx
,
start
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
total
);
status
=
kernel
->
setArg
(
++
arg_idx
,
total
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
total0
);
status
=
kernel
->
setArg
(
++
arg_idx
,
total0
);
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/concat_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -170,25 +170,25 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
if
(
inputs
.
size
()
==
2
)
{
auto
*
x_buf0
=
inputs
[
0
]
->
data
<
half_t
,
cl
::
Image2D
>
();
auto
*
x_buf1
=
inputs
[
1
]
->
data
<
half_t
,
cl
::
Image2D
>
();
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_buf0
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_buf0
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
x_buf1
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
x_buf1
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
flag_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
flag_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
int
>
(
inputs
[
0
]
->
dims
()[
axis_
]));
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
int
>
(
inputs
[
0
]
->
dims
()[
axis_
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
out_c
);
status
=
kernel
->
setArg
(
++
arg_idx
,
out_c
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
out_w
);
status
=
kernel
->
setArg
(
++
arg_idx
,
out_w
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
width_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
width_
);
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
@@ -213,25 +213,25 @@ class ConcatComputeImage : public KernelLite<TARGET(kOpenCL),
static_cast
<
cl
::
size_type
>
(
image_shape
[
"width"
]
/
in_dims
[
in_dims
.
size
()
-
1
]),
static_cast
<
cl
::
size_type
>
(
image_shape
[
"height"
])};
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_buf
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
flag_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
flag_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
start
);
status
=
kernel
->
setArg
(
++
arg_idx
,
start
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
out_c
);
status
=
kernel
->
setArg
(
++
arg_idx
,
out_c
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
out_w
);
status
=
kernel
->
setArg
(
++
arg_idx
,
out_w
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
in_w
);
status
=
kernel
->
setArg
(
++
arg_idx
,
in_w
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
width_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
width_
);
CL_CHECK_FATAL
(
status
);
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/conv_buffer_compute.cc
浏览文件 @
9562d42a
...
...
@@ -283,25 +283,25 @@ void ConvCompute::GemmBatched(cl::Kernel& kernel,
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
*
filter_d
);
status
=
kernel
->
setArg
(
arg_idx
,
*
filter_d
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
x_d
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
x_d
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_d
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_d
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
output_d
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
output_d
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
m
);
status
=
kernel
->
setArg
(
++
arg_idx
,
m
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
n
);
status
=
kernel
->
setArg
(
++
arg_idx
,
n
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
k
);
status
=
kernel
->
setArg
(
++
arg_idx
,
k
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
batch_size
);
status
=
kernel
->
setArg
(
++
arg_idx
,
batch_size
);
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
local_work_size
,
...
...
lite/kernels/opencl/conv_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -382,11 +382,11 @@ void ConvImageCompute::PrepareForRun() {
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_names_
[
0
]
<<
build_options_
[
0
]
<<
time_stamp_
;
kernel_
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
VLOG
(
4
)
<<
"kernel_key: "
<<
kernel_key
.
str
();
VLOG
(
4
)
<<
"kernel ready ... "
<<
kernel_key
.
str
();
size_t
max_work_group_size
=
0
;
kernel
_
.
getWorkGroupInfo
<
size_t
>
(
CLRuntime
::
Global
()
->
device
(),
kernel
->
getWorkGroupInfo
<
size_t
>
(
CLRuntime
::
Global
()
->
device
(),
CL_KERNEL_WORK_GROUP_SIZE
,
&
max_work_group_size
);
...
...
@@ -503,49 +503,51 @@ void ConvImageCompute::Conv2d1x1opt(bool is_turn) {
bias_image
=
bias_gpu_image_
->
data
<
half_t
,
cl
::
Image2D
>
();
}
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_names_
[
0
]
<<
build_options_
[
0
]
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
c_blk_
);
status
=
kernel
->
setArg
(
arg_idx
,
c_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
w_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
w_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
nh_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
nh_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
filter_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
filter_image
);
CL_CHECK_FATAL
(
status
);
if
(
has_bias
)
{
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_image
);
CL_CHECK_FATAL
(
status
);
}
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
strides
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
strides
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
offset
);
status
=
kernel
->
setArg
(
++
arg_idx
,
offset
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_c_block
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_c_block
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_c
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_c
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
dilations
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
dilations
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
default_w_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
default_w_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
local_work_size_
,
...
...
@@ -651,56 +653,58 @@ void ConvImageCompute::Conv2d3x3(bool is_turn) {
if
(
has_bias
)
{
bias_image
=
bias_gpu_image_
->
data
<
half_t
,
cl
::
Image2D
>
();
}
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_names_
[
0
]
<<
build_options_
[
0
]
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
c_blk_
);
status
=
kernel
->
setArg
(
arg_idx
,
c_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
w_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
w_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
nh_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
nh_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
filter_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
filter_image
);
CL_CHECK_FATAL
(
status
);
if
(
has_bias
)
{
#ifndef LITE_SHUTDOWN_LOG
VLOG
(
4
)
<<
"set bias_image: "
;
#endif
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_image
);
CL_CHECK_FATAL
(
status
);
}
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
strides
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
strides
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
offset
);
status
=
kernel
->
setArg
(
++
arg_idx
,
offset
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_c_block
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_c_block
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
dilations
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
dilations
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_channel
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_channel
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
filter_channel
);
status
=
kernel
->
setArg
(
++
arg_idx
,
filter_channel
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
filter_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
filter_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
filter_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
filter_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
new_groups
);
status
=
kernel
->
setArg
(
++
arg_idx
,
new_groups
);
CL_CHECK_FATAL
(
status
);
#ifndef LITE_SHUTDOWN_LOG
...
...
@@ -710,7 +714,7 @@ void ConvImageCompute::Conv2d3x3(bool is_turn) {
#endif
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
cl
::
NullRange
,
...
...
@@ -786,48 +790,50 @@ void ConvImageCompute::Conv2d3x3opt(bool is_turn) {
bias_image
=
bias_gpu_image_
->
data
<
half_t
,
cl
::
Image2D
>
();
}
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_names_
[
0
]
<<
build_options_
[
0
]
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
c_blk_
);
status
=
kernel
->
setArg
(
arg_idx
,
c_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
w_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
w_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
nh_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
nh_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
filter_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
filter_image
);
CL_CHECK_FATAL
(
status
);
if
(
has_bias
)
{
#ifndef LITE_SHUTDOWN_LOG
VLOG
(
4
)
<<
"set bias_image: "
;
#endif
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_image
);
CL_CHECK_FATAL
(
status
);
}
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
strides
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
strides
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
paddings
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
paddings
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
dilations
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
dilations
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
batch
);
status
=
kernel
->
setArg
(
++
arg_idx
,
batch
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_channel
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_channel
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_height
);
CL_CHECK_FATAL
(
status
);
#ifndef LITE_SHUTDOWN_LOG
...
...
@@ -837,7 +843,7 @@ void ConvImageCompute::Conv2d3x3opt(bool is_turn) {
#endif
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
local_work_size_
,
...
...
@@ -919,46 +925,48 @@ void ConvImageCompute::Conv2d5x5(bool is_turn) {
bias_image
=
bias_gpu_image_
->
data
<
half_t
,
cl
::
Image2D
>
();
}
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_names_
[
0
]
<<
build_options_
[
0
]
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
c_blk_
);
status
=
kernel
->
setArg
(
arg_idx
,
c_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
w_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
w_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
nh_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
nh_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
filter_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
filter_image
);
CL_CHECK_FATAL
(
status
);
if
(
has_bias
)
{
#ifndef LITE_SHUTDOWN_LOG
VLOG
(
4
)
<<
"set bias_image: "
;
#endif
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_image
);
CL_CHECK_FATAL
(
status
);
}
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
strides
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
strides
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
offset
);
status
=
kernel
->
setArg
(
++
arg_idx
,
offset
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_c_block
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_c_block
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
dilations
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
dilations
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_height
);
CL_CHECK_FATAL
(
status
);
#ifndef LITE_SHUTDOWN_LOG
...
...
@@ -968,7 +976,7 @@ void ConvImageCompute::Conv2d5x5(bool is_turn) {
#endif
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
cl
::
NullRange
,
...
...
@@ -1044,50 +1052,52 @@ void ConvImageCompute::Conv2d5x5opt(bool is_turn) {
bias_image
=
bias_gpu_image_
->
data
<
half_t
,
cl
::
Image2D
>
();
}
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_names_
[
0
]
<<
build_options_
[
0
]
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
c_blk_
);
status
=
kernel
->
setArg
(
arg_idx
,
c_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
w_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
w_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
nh_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
nh_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
filter_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
filter_image
);
CL_CHECK_FATAL
(
status
);
if
(
has_bias
)
{
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_image
);
CL_CHECK_FATAL
(
status
);
}
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
strides
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
strides
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
paddings
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
paddings
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
dilations
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
dilations
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
batch
);
status
=
kernel
->
setArg
(
++
arg_idx
,
batch
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_channel
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_channel
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_height
);
CL_CHECK_FATAL
(
status
);
// VLOG(4) << "out_image: " << out_image;
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
local_work_size_
,
...
...
@@ -1169,46 +1179,48 @@ void ConvImageCompute::Conv2d7x7(bool is_turn) {
bias_image
=
bias_gpu_image_
->
data
<
half_t
,
cl
::
Image2D
>
();
}
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_names_
[
0
]
<<
build_options_
[
0
]
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
c_blk_
);
status
=
kernel
->
setArg
(
arg_idx
,
c_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
w_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
w_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
nh_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
nh_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
filter_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
filter_image
);
CL_CHECK_FATAL
(
status
);
if
(
has_bias
)
{
#ifndef LITE_SHUTDOWN_LOG
VLOG
(
4
)
<<
"set bias_image: "
;
#endif
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_image
);
CL_CHECK_FATAL
(
status
);
}
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
strides
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
strides
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
offset
);
status
=
kernel
->
setArg
(
++
arg_idx
,
offset
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_c_block
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_c_block
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
dilations
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
dilations
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_height
);
CL_CHECK_FATAL
(
status
);
#ifndef LITE_SHUTDOWN_LOG
...
...
@@ -1218,7 +1230,7 @@ void ConvImageCompute::Conv2d7x7(bool is_turn) {
#endif
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
cl
::
NullRange
,
...
...
@@ -1292,49 +1304,51 @@ void ConvImageCompute::Conv2d7x7opt(bool is_turn) {
bias_image
=
bias_gpu_image_
->
data
<
half_t
,
cl
::
Image2D
>
();
}
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_names_
[
0
]
<<
build_options_
[
0
]
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
c_blk_
);
status
=
kernel
->
setArg
(
arg_idx
,
c_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
w_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
w_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
nh_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
nh_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
filter_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
filter_image
);
CL_CHECK_FATAL
(
status
);
if
(
has_bias
)
{
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_image
);
CL_CHECK_FATAL
(
status
);
}
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
strides
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
strides
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
paddings
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
paddings
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
dilations
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
dilations
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
batch
);
status
=
kernel
->
setArg
(
++
arg_idx
,
batch
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_channel
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_channel
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_height
);
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
local_work_size_
,
...
...
@@ -1371,19 +1385,21 @@ void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) {
auto
*
output_img
=
param
.
output
->
mutable_data
<
half_t
,
cl
::
Image2D
>
(
image_shape
[
"width"
],
image_shape
[
"height"
]);
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_names_
[
0
]
<<
build_options_
[
0
]
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
c_blk_
);
status
=
kernel
->
setArg
(
arg_idx
,
c_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
w_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
w_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
nh_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
nh_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
filter_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
filter_img
);
CL_CHECK_FATAL
(
status
);
const
bool
has_bias
=
param
.
bias
!=
nullptr
;
...
...
@@ -1395,30 +1411,30 @@ void ConvImageCompute::DepthwiseConv2d3x3s1(bool is_turn) {
#ifndef LITE_SHUTDOWN_LOG
VLOG
(
4
)
<<
"set bias_image: "
;
#endif
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_image
);
CL_CHECK_FATAL
(
status
);
}
status
=
kernel
.
setArg
(
++
arg_idx
,
*
output_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
output_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
dilations
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
dilations
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
1
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
1
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
3
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
3
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
3
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
3
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
local_work_size_
,
...
...
@@ -1458,7 +1474,9 @@ void ConvImageCompute::DepthwiseConv2d3x3(bool is_turn) {
auto
*
output_img
=
param
.
output
->
mutable_data
<
half_t
,
cl
::
Image2D
>
(
image_shape
[
"width"
],
image_shape
[
"height"
]);
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_names_
[
0
]
<<
build_options_
[
0
]
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
#ifndef LITE_SHUTDOWN_LOG
VLOG
(
4
)
<<
"setArg"
;
...
...
@@ -1474,15 +1492,15 @@ void ConvImageCompute::DepthwiseConv2d3x3(bool is_turn) {
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
c_blk_
);
status
=
kernel
->
setArg
(
arg_idx
,
c_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
w_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
w_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
nh_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
nh_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
filter_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
filter_img
);
CL_CHECK_FATAL
(
status
);
const
bool
has_bias
=
param
.
bias
!=
nullptr
;
const
bool
is_element_wise_bias
=
...
...
@@ -1493,30 +1511,30 @@ void ConvImageCompute::DepthwiseConv2d3x3(bool is_turn) {
#ifndef LITE_SHUTDOWN_LOG
VLOG
(
4
)
<<
"set bias_image: "
;
#endif
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_image
);
CL_CHECK_FATAL
(
status
);
}
status
=
kernel
.
setArg
(
++
arg_idx
,
*
output_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
output_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
offset
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
offset
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
dilations
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
dilations
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
input_c_block
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
input_c_block
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
3
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
3
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
3
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
3
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
cl
::
NullRange
,
...
...
@@ -1600,50 +1618,52 @@ void ConvImageCompute::DepthwiseConv2d(bool is_turn) {
bias_image
=
bias_gpu_image_
->
data
<
half_t
,
cl
::
Image2D
>
();
}
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_names_
[
0
]
<<
build_options_
[
0
]
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
c_blk_
);
status
=
kernel
->
setArg
(
arg_idx
,
c_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
w_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
w_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
nh_blk_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
nh_blk_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
filter_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
filter_image
);
CL_CHECK_FATAL
(
status
);
if
(
has_bias
)
{
#ifndef LITE_SHUTDOWN_LOG
VLOG
(
4
)
<<
"set bias_image: "
;
#endif
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_image
);
CL_CHECK_FATAL
(
status
);
}
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
strides
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
strides
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
offset
);
status
=
kernel
->
setArg
(
++
arg_idx
,
offset
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_c_block
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_c_block
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
dilations
[
0
]);
status
=
kernel
->
setArg
(
++
arg_idx
,
dilations
[
0
]);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
input_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
input_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
output_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
output_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
filter_width
);
status
=
kernel
->
setArg
(
++
arg_idx
,
filter_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
filter_height
);
status
=
kernel
->
setArg
(
++
arg_idx
,
filter_height
);
CL_CHECK_FATAL
(
status
);
#ifndef LITE_SHUTDOWN_LOG
...
...
@@ -1653,7 +1673,7 @@ void ConvImageCompute::DepthwiseConv2d(bool is_turn) {
#endif
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/conv_image_compute.h
浏览文件 @
9562d42a
...
...
@@ -71,7 +71,6 @@ class ConvImageCompute : public KernelLite<TARGET(kOpenCL),
int
default_w_blk_
=
1
;
int
default_nh_blk_
=
1
;
cl
::
Kernel
kernel_
;
cl
::
NDRange
local_work_size_
=
cl
::
NDRange
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
1
)};
bool
use_lws_
{
true
};
...
...
lite/kernels/opencl/depthwise_conv2d_buffer_compute.cc
浏览文件 @
9562d42a
...
...
@@ -75,41 +75,41 @@ class DepthwiseConv2dCompute
cl_int
status
;
auto
numel
=
output_dims
.
production
();
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
static_cast
<
const
int
>
(
numel
));
status
=
kernel
->
setArg
(
arg_idx
,
static_cast
<
const
int
>
(
numel
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
3
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
x_dims
[
3
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
1
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
1
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
3
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
output_dims
[
3
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
filter_dims
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
filter_dims
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
filter_dims
[
3
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
filter_dims
[
3
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
1
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
1
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
1
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
1
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
output_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
output_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
filter_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
filter_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
bias_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
bias_buf
);
CL_CHECK_FATAL
(
status
);
auto
global_work_size
=
cl
::
NDRange
(
static_cast
<
size_t
>
(
numel
));
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/dropout_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -70,13 +70,13 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL),
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
*
x_img
);
status
=
kernel
->
setArg
(
arg_idx
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
out_w
);
status
=
kernel
->
setArg
(
++
arg_idx
,
out_w
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
dropout_prob
);
status
=
kernel
->
setArg
(
++
arg_idx
,
dropout_prob
);
CL_CHECK_FATAL
(
status
);
const
std
::
vector
<
size_t
>&
default_work_size
=
...
...
@@ -90,7 +90,7 @@ class DropoutComputeImage2D : public KernelLite<TARGET(kOpenCL),
static_cast
<
cl
::
size_type
>
(
default_work_size
.
data
()[
2
])};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/elementwise_add_buffer_compute.cc
浏览文件 @
9562d42a
...
...
@@ -49,22 +49,22 @@ void ElementwiseAddCompute::Run() {
VLOG
(
4
)
<<
TargetToStr
(
ele_param_
->
Out
->
target
());
#endif
int
arg_idx
=
0
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_buf
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
y_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
y_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
(
const
int
)
batch_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
(
const
int
)
batch_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
(
const
int
)
channels_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
(
const
int
)
channels_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
(
const
int
)
num_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
(
const
int
)
num_
);
CL_CHECK_FATAL
(
status
);
auto
global_work_size
=
cl
::
NDRange
{
channels_
,
batch_
};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/elementwise_add_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -66,7 +66,7 @@ void ElementwiseAddImageCompute::ReInitWhenNeeded() {
STL
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_name_
<<
build_options_
<<
time_stamp_
;
kernel_
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
// compute image shape
paddle
::
lite
::
CLImageConverterDefault
default_convertor
;
...
...
@@ -90,6 +90,8 @@ void ElementwiseAddImageCompute::GetGlobalWorkSize() {
}
void
ElementwiseAddImageCompute
::
Run
()
{
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
auto
*
x
=
ele_param_
->
X
;
auto
*
y
=
ele_param_
->
Y
;
auto
*
out
=
ele_param_
->
Out
;
...
...
@@ -118,13 +120,16 @@ void ElementwiseAddImageCompute::Run() {
#endif
cl_int
status
;
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_name_
<<
build_options_
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
if
(
y_dims
.
size
()
==
4
)
{
status
=
kernel
.
setArg
(
0
,
*
x_img
);
status
=
kernel
->
setArg
(
0
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
*
y_img
);
status
=
kernel
->
setArg
(
1
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_img
);
status
=
kernel
->
setArg
(
2
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
}
else
if
(
y_dims
.
size
()
==
1
)
{
if
(
axis
==
x_dims
.
size
()
-
1
||
axis
==
x_dims
.
size
()
-
3
)
{
...
...
@@ -132,13 +137,13 @@ void ElementwiseAddImageCompute::Run() {
#ifndef LITE_SHUTDOWN_LOG
VLOG
(
4
)
<<
"tensor_w:"
<<
tensor_w
;
#endif
status
=
kernel
.
setArg
(
0
,
*
x_img
);
status
=
kernel
->
setArg
(
0
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
*
y_img
);
status
=
kernel
->
setArg
(
1
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_img
);
status
=
kernel
->
setArg
(
2
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
3
,
tensor_w
);
status
=
kernel
->
setArg
(
3
,
tensor_w
);
CL_CHECK_FATAL
(
status
);
}
else
{
LOG
(
FATAL
)
<<
"ElementwiseAddImage doesn't support axis:"
<<
axis
...
...
@@ -151,10 +156,8 @@ void ElementwiseAddImageCompute::Run() {
<<
", y->dims.size():"
<<
y_dims
.
size
();
}
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/elementwise_mul_compute.cc
浏览文件 @
9562d42a
...
...
@@ -96,51 +96,51 @@ void ElementwiseMulFloatImageCompute::Run() {
auto
x_dims
=
x
->
dims
();
if
(
y_dims
==
x_dims
)
{
// kernel: elementwise_mul(channel_mul_d4)
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
y_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
}
else
if
(
y_dims
.
size
()
==
1
||
y_dims
.
size
()
==
4
)
{
auto
tensor_w
=
x_dims
[
x_dims
.
size
()
-
1
];
VLOG
(
4
)
<<
"tensor_w:"
<<
tensor_w
;
// kernel: channel_mul_d1 / channel_mul_d4
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
y_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
tensor_w
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
tensor_w
));
CL_CHECK_FATAL
(
status
);
}
else
if
(
y_dims
.
size
()
==
2
)
{
if
(
x_dims
[
0
]
==
y_dims
[
0
]
&&
x_dims
[
1
]
==
y_dims
[
1
])
{
auto
tensor_w
=
x_dims
[
x_dims
.
size
()
-
1
];
VLOG
(
4
)
<<
"tensor_w:"
<<
tensor_w
;
// kernel: channel_mul_d2_nc
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
y_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
tensor_w
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
tensor_w
));
CL_CHECK_FATAL
(
status
);
}
else
{
auto
y_tensor_h
=
y
->
dims
()[
0
];
auto
y_tensor_w
=
y
->
dims
()[
1
];
VLOG
(
4
)
<<
"y_tensor_w:"
<<
y_tensor_w
<<
" y_tensor_h:"
<<
y_tensor_h
;
// kernel: channel_mul_d2_hw
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
y_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
y_tensor_w
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
y_tensor_w
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
y_tensor_h
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
y_tensor_h
));
CL_CHECK_FATAL
(
status
);
}
}
else
{
...
...
@@ -151,7 +151,7 @@ void ElementwiseMulFloatImageCompute::Run() {
auto
global_work_size
=
cl
::
NDRange
{
static_cast
<
cl
::
size_type
>
(
x_img_width
),
static_cast
<
cl
::
size_type
>
(
x_img_height
)};
auto
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/elementwise_mul_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -124,57 +124,57 @@ class ElementwiseMulImageCompute
if
(
bias_dims
==
x_dims
)
{
// kernel_func_name_ = "elementwise_mul";
cl_int
status
=
kernel
.
setArg
(
0
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
0
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
*
y_img
);
status
=
kernel
->
setArg
(
1
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_img
);
status
=
kernel
->
setArg
(
2
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
}
else
{
const
int
bias_dim_size
=
bias_dims
.
size
();
if
(
bias_dim_size
==
1
)
{
// kernel_func_name_ = "channel_mul_d1";
const
int
tensor_w
=
x_dims
[
x_dims
.
size
()
-
1
];
cl_int
status
=
kernel
.
setArg
(
0
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
0
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
*
y_img
);
status
=
kernel
->
setArg
(
1
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_img
);
status
=
kernel
->
setArg
(
2
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
3
,
tensor_w
);
status
=
kernel
->
setArg
(
3
,
tensor_w
);
CL_CHECK_FATAL
(
status
);
}
else
if
(
bias_dim_size
==
2
)
{
// kernel_func_name_ = "channel_mul_d2";
const
int
tensor_w
=
x_dims
[
x_dims
.
size
()
-
1
];
cl_int
status
=
kernel
.
setArg
(
0
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
0
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
*
y_img
);
status
=
kernel
->
setArg
(
1
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_img
);
status
=
kernel
->
setArg
(
2
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
3
,
tensor_w
);
status
=
kernel
->
setArg
(
3
,
tensor_w
);
CL_CHECK_FATAL
(
status
);
}
else
if
(
bias_dim_size
==
3
)
{
// kernel_func_name_ = "channel_mul_d3";
const
int
tensor_w
=
x_dims
[
x_dims
.
size
()
-
1
];
cl_int
status
=
kernel
.
setArg
(
0
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
0
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
*
y_img
);
status
=
kernel
->
setArg
(
1
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_img
);
status
=
kernel
->
setArg
(
2
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
3
,
tensor_w
);
status
=
kernel
->
setArg
(
3
,
tensor_w
);
CL_CHECK_FATAL
(
status
);
}
else
if
(
bias_dim_size
==
4
)
{
// kernel_func_name_ = "channel_mul_d4";
const
int
tensor_w
=
x_dims
[
x_dims
.
size
()
-
1
];
cl_int
status
=
kernel
.
setArg
(
0
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
0
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
*
y_img
);
status
=
kernel
->
setArg
(
1
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_img
);
status
=
kernel
->
setArg
(
2
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
3
,
tensor_w
);
status
=
kernel
->
setArg
(
3
,
tensor_w
);
CL_CHECK_FATAL
(
status
);
}
else
{
LOG
(
FATAL
)
<<
"Unsupported ElementwiseMul with x_dims:"
<<
x_dims
...
...
@@ -186,7 +186,7 @@ class ElementwiseMulImageCompute
cl
::
NDRange
{
static_cast
<
cl
::
size_type
>
(
x_img_width
),
static_cast
<
cl
::
size_type
>
(
x_img_height
)};
auto
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/elementwise_sub_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -101,11 +101,11 @@ void ElementwiseSubImageCompute::Run() {
int
arg_idx
=
0
;
auto
y_dims
=
y
->
dims
();
if
(
y_dims
.
size
()
==
4
)
{
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
y_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
}
else
if
(
y_dims
.
size
()
==
1
)
{
if
(
axis
==
x
->
dims
().
size
()
-
1
||
axis
==
x
->
dims
().
size
()
-
3
)
{
...
...
@@ -113,13 +113,13 @@ void ElementwiseSubImageCompute::Run() {
#ifndef LITE_SHUTDOWN_LOG
VLOG
(
4
)
<<
"tensor_w:"
<<
tensor_w
;
#endif
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
y_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
y_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
tensor_w
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
tensor_w
));
CL_CHECK_FATAL
(
status
);
}
else
{
LOG
(
FATAL
)
<<
"ElementwiseSubImage doesn't support axis:"
<<
axis
...
...
@@ -139,7 +139,7 @@ void ElementwiseSubImageCompute::Run() {
#endif
auto
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/fc_buffer_compute.cc
浏览文件 @
9562d42a
...
...
@@ -81,7 +81,7 @@ class FcCompute
time_stamp_
);
STL
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_name_
<<
build_options_
<<
time_stamp_
;
kernel_
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
// compute global work size
GetGlobalWorkSize
();
...
...
@@ -103,28 +103,30 @@ class FcCompute
auto
*
bias_buf
=
fc_param_
->
bias
->
data
<
float
,
cl
::
Buffer
>
();
auto
*
out_buf
=
fc_param_
->
output
->
mutable_data
<
float
,
cl
::
Buffer
>
(
TARGET
(
kOpenCL
));
auto
kernel
=
kernel_
;
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_name_
<<
build_options_
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
;
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
x_buf
);
status
=
kernel
->
setArg
(
0
,
*
x_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
*
w_buf
);
status
=
kernel
->
setArg
(
1
,
*
w_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
*
bias_buf
);
status
=
kernel
->
setArg
(
2
,
*
bias_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
3
,
*
out_buf
);
status
=
kernel
->
setArg
(
3
,
*
out_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
4
,
static_cast
<
const
int
>
(
m_
));
status
=
kernel
->
setArg
(
4
,
static_cast
<
const
int
>
(
m_
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
5
,
static_cast
<
const
int
>
(
n_
));
status
=
kernel
->
setArg
(
5
,
static_cast
<
const
int
>
(
n_
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
6
,
static_cast
<
const
int
>
(
k_
));
status
=
kernel
->
setArg
(
6
,
static_cast
<
const
int
>
(
k_
));
CL_CHECK_FATAL
(
status
);
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
cl
::
NullRange
,
...
...
@@ -143,7 +145,7 @@ class FcCompute
bool
first_epoch_for_reinit_
{
true
};
DDim
last_x_dims_
;
cl
::
NDRange
global_work_size_
;
cl
::
Kernel
kernel
_
;
cl
::
Kernel
kernel
;
std
::
shared_ptr
<
cl
::
Event
>
event_
{
new
cl
::
Event
};
};
...
...
lite/kernels/opencl/grid_sampler_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -48,7 +48,7 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
STL
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_name_
<<
build_options_
<<
time_stamp_
;
kernel_
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
VLOG
(
4
)
<<
"kernel_key: "
<<
kernel_key
.
str
();
}
...
...
@@ -116,22 +116,24 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
#endif
cl_int
status
;
auto
kernel
=
kernel_
;
status
=
kernel
.
setArg
(
0
,
*
x_img
);
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_name_
<<
build_options_
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
status
=
kernel
->
setArg
(
0
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
*
grid_img
);
status
=
kernel
->
setArg
(
1
,
*
grid_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
*
out_img
);
status
=
kernel
->
setArg
(
2
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
3
,
out_height
);
status
=
kernel
->
setArg
(
3
,
out_height
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
4
,
out_width
);
status
=
kernel
->
setArg
(
4
,
out_width
);
CL_CHECK_FATAL
(
status
);
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
cl
::
NullRange
,
...
...
@@ -148,7 +150,6 @@ class GridSamplerImageCompute : public KernelLite<TARGET(kOpenCL),
DDim
out_img_shape_
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
(
{
static_cast
<
DDim
::
value_type
>
(
1
),
static_cast
<
DDim
::
value_type
>
(
1
)}));
std
::
string
kernel_func_name_
{
"grid_sampler"
};
cl
::
Kernel
kernel_
;
cl
::
NDRange
global_work_size_
=
cl
::
NDRange
{
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
1
),
static_cast
<
size_t
>
(
1
)};
std
::
string
build_options_
{
"-DCL_DTYPE_half"
};
...
...
lite/kernels/opencl/instance_norm_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -120,25 +120,25 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
kernel_key
<<
kernel_func_name_
<<
build_options_
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
cl_int
status
=
kernel
.
setArg
(
0
,
out_w
);
cl_int
status
=
kernel
->
setArg
(
0
,
out_w
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
out_h
);
status
=
kernel
->
setArg
(
1
,
out_h
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
out_c_group
);
status
=
kernel
->
setArg
(
2
,
out_c_group
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
3
,
lws1
);
status
=
kernel
->
setArg
(
3
,
lws1
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
4
,
lws2
);
status
=
kernel
->
setArg
(
4
,
lws2
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
5
,
epsilon
);
status
=
kernel
->
setArg
(
5
,
epsilon
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
6
,
*
x_img
);
status
=
kernel
->
setArg
(
6
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
7
,
*
out_img
);
status
=
kernel
->
setArg
(
7
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
local_work_size
,
...
...
@@ -244,23 +244,23 @@ class InstanceNormImageCompute : public KernelLite<TARGET(kOpenCL),
auto
*
bias_img
=
bias_image_
.
data
<
half_t
,
cl
::
Image2D
>
();
float
epsilon
=
instance_norm_param_
->
epsilon
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
++
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
++
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
*
out_img
);
status
=
kernel
->
setArg
(
arg_idx
++
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
*
scale_img
);
status
=
kernel
->
setArg
(
arg_idx
++
,
*
scale_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
*
bias_img
);
status
=
kernel
->
setArg
(
arg_idx
++
,
*
bias_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
epsilon
);
status
=
kernel
->
setArg
(
arg_idx
++
,
epsilon
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
in_h
);
status
=
kernel
->
setArg
(
arg_idx
++
,
in_h
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
in_w
);
status
=
kernel
->
setArg
(
arg_idx
++
,
in_w
);
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
local_work_size
,
...
...
lite/kernels/opencl/layout_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -99,21 +99,21 @@ class LayoutComputeBufferChwToImageDefault
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
int
arg_idx
=
0
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_data
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_data
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
y_data
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
y_data
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_H
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_H
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_W
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_W
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_C
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_C
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride0
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride0
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride1
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride1
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride2
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride2
));
CL_CHECK_FATAL
(
status
);
VLOG
(
2
)
<<
"gws:[3D]"
<<
((
new_dims
[
1
]
+
3
)
/
4
)
<<
" "
<<
new_dims
[
3
]
...
...
@@ -123,7 +123,7 @@ class LayoutComputeBufferChwToImageDefault
static_cast
<
cl
::
size_type
>
(
new_dims
[
3
]),
static_cast
<
cl
::
size_type
>
(
new_dims
[
0
]
*
new_dims
[
2
])};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
@@ -205,21 +205,21 @@ class LayoutComputeImageDefaultToBufferChw
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
int
arg_idx
=
0
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_data
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_data
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_width
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_width
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_height
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_height
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
y_data
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
y_data
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
size_ch
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
size_ch
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
size_block
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
size_block
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
size_batch
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
size_batch
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
C
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
C
));
CL_CHECK_FATAL
(
status
);
#ifndef LITE_SHUTDOWN_LOG
VLOG
(
2
)
<<
"gws:[3D]"
<<
((
new_dims
[
1
]
+
3
)
/
4
)
<<
" "
<<
new_dims
[
3
]
...
...
@@ -230,7 +230,7 @@ class LayoutComputeImageDefaultToBufferChw
static_cast
<
cl
::
size_type
>
(
new_dims
[
3
]),
static_cast
<
cl
::
size_type
>
(
new_dims
[
0
]
*
new_dims
[
2
])};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
@@ -300,21 +300,21 @@ class LayoutComputeBufferChwToImage2DNw
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
int
arg_idx
=
0
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_data
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_data
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
y_data
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
y_data
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_H
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_H
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_W
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_W
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_N
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_N
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride0
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride0
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride1
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride1
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride2
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
Stride2
));
CL_CHECK_FATAL
(
status
);
VLOG
(
2
)
<<
"gws:[3D]"
<<
((
out_N
+
3
)
/
4
)
<<
" "
<<
out_W
<<
" "
...
...
@@ -324,7 +324,7 @@ class LayoutComputeBufferChwToImage2DNw
static_cast
<
cl
::
size_type
>
(
out_W
),
// w
static_cast
<
cl
::
size_type
>
(
out_C
*
out_H
)};
// ch
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/lrn_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -106,21 +106,21 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
VLOG
(
4
)
<<
"default_work_size: "
<<
default_work_size
[
0
]
<<
", "
<<
default_work_size
[
1
]
<<
", "
<<
default_work_size
[
3
];
#endif
cl_int
status
=
kernel
.
setArg
(
arg_idx
++
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
++
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
*
out_img
);
status
=
kernel
->
setArg
(
arg_idx
++
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
out_channel
);
status
=
kernel
->
setArg
(
arg_idx
++
,
out_channel
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
out_width
);
status
=
kernel
->
setArg
(
arg_idx
++
,
out_width
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
n_
);
status
=
kernel
->
setArg
(
arg_idx
++
,
n_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
k_
);
status
=
kernel
->
setArg
(
arg_idx
++
,
k_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
alpha_
);
status
=
kernel
->
setArg
(
arg_idx
++
,
alpha_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
beta_
);
status
=
kernel
->
setArg
(
arg_idx
++
,
beta_
);
CL_CHECK_FATAL
(
status
);
auto
global_work_size
=
...
...
@@ -129,7 +129,7 @@ class LrnImageCompute : public KernelLite<TARGET(kOpenCL),
static_cast
<
cl
::
size_type
>
(
default_work_size
[
2
])};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/mul_buffer_compute.cc
浏览文件 @
9562d42a
...
...
@@ -76,23 +76,23 @@ class MulCompute
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
*
x_buf
);
status
=
kernel
->
setArg
(
arg_idx
,
*
x_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
y_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
y_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
m_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
m_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
n_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
n_
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
k_
);
status
=
kernel
->
setArg
(
++
arg_idx
,
k_
);
CL_CHECK_FATAL
(
status
);
auto
global_work_size
=
cl
::
NDRange
{
static_cast
<
size_t
>
((
m_
+
3
)
/
4
),
static_cast
<
size_t
>
((
n_
+
3
)
/
4
)};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/nearest_interp_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -72,21 +72,21 @@ class NearestInterpComputeImageDefault
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
int
arg_idx
=
0
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
float
>
(
scale_h
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
float
>
(
scale_h
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
float
>
(
scale_w
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
float
>
(
scale_w
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims_h
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims_h
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims_h
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims_h
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims_w
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims_w
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims_w
));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims_w
));
CL_CHECK_FATAL
(
status
);
#ifndef LITE_SHUTDOWN_LOG
...
...
@@ -110,7 +110,7 @@ class NearestInterpComputeImageDefault
static_cast
<
cl
::
size_type
>
(
default_work_size
.
data
()[
1
]),
static_cast
<
cl
::
size_type
>
(
default_work_size
.
data
()[
2
])};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/pad2d_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -114,27 +114,27 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
int
pad_w1
=
pad2d_param_
->
paddings
[
3
];
float
pad_value
=
pad2d_param_
->
pad_value
;
cl_int
status
=
kernel
.
setArg
(
arg_idx
++
,
*
x_img
);
cl_int
status
=
kernel
->
setArg
(
arg_idx
++
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
*
out_img
);
status
=
kernel
->
setArg
(
arg_idx
++
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
in_h
);
status
=
kernel
->
setArg
(
arg_idx
++
,
in_h
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
in_w
);
status
=
kernel
->
setArg
(
arg_idx
++
,
in_w
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
out_h
);
status
=
kernel
->
setArg
(
arg_idx
++
,
out_h
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
out_w
);
status
=
kernel
->
setArg
(
arg_idx
++
,
out_w
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
pad_h0
);
status
=
kernel
->
setArg
(
arg_idx
++
,
pad_h0
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
pad_h1
);
status
=
kernel
->
setArg
(
arg_idx
++
,
pad_h1
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
pad_w0
);
status
=
kernel
->
setArg
(
arg_idx
++
,
pad_w0
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
pad_w1
);
status
=
kernel
->
setArg
(
arg_idx
++
,
pad_w1
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
arg_idx
++
,
pad_value
);
status
=
kernel
->
setArg
(
arg_idx
++
,
pad_value
);
CL_CHECK_FATAL
(
status
);
auto
global_work_size
=
...
...
@@ -143,7 +143,7 @@ class Pad2dCompute : public KernelLite<TARGET(kOpenCL),
static_cast
<
cl
::
size_type
>
(
default_work_size
[
2
])};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/pool_buffer_compute.cc
浏览文件 @
9562d42a
...
...
@@ -76,37 +76,37 @@ class PoolCompute
cl_int
status
;
auto
numel
=
out_dims
.
production
();
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
static_cast
<
const
int
>
(
numel
));
status
=
kernel
->
setArg
(
arg_idx
,
static_cast
<
const
int
>
(
numel
));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
input_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
input_buf
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims
[
1
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims
[
1
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims
[
3
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims
[
3
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims
[
3
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims
[
3
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
ksize
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
ksize
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
ksize
[
1
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
ksize
[
1
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
1
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
1
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
output_buf
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
output_buf
);
CL_CHECK_FATAL
(
status
);
auto
global_work_size
=
cl
::
NDRange
(
static_cast
<
size_t
>
(
numel
));
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/pool_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -125,33 +125,33 @@ class PoolComputeImage2D : public KernelLite<TARGET(kOpenCL),
#endif
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
*
x_img
);
status
=
kernel
->
setArg
(
arg_idx
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims
[
3
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
in_dims
[
3
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims
[
3
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
out_dims
[
3
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
ksize
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
ksize
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
ksize
[
1
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
ksize
[
1
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
1
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
strides
[
1
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
2
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
2
]));
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
0
]));
status
=
kernel
->
setArg
(
++
arg_idx
,
static_cast
<
const
int
>
(
paddings
[
0
]));
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/reshape_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -122,31 +122,31 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
int
arg_idx
=
0
;
cl_int
status
;
status
=
kernel
.
setArg
(
arg_idx
,
*
x_image
);
status
=
kernel
->
setArg
(
arg_idx
,
*
x_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_image
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_image
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
out_C
);
status
=
kernel
->
setArg
(
++
arg_idx
,
out_C
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
out_H
);
status
=
kernel
->
setArg
(
++
arg_idx
,
out_H
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
out_W
);
status
=
kernel
->
setArg
(
++
arg_idx
,
out_W
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
in_W
);
status
=
kernel
->
setArg
(
++
arg_idx
,
in_W
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
in_H
);
status
=
kernel
->
setArg
(
++
arg_idx
,
in_H
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
in_Stride0
);
status
=
kernel
->
setArg
(
++
arg_idx
,
in_Stride0
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
in_Stride1
);
status
=
kernel
->
setArg
(
++
arg_idx
,
in_Stride1
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
in_Stride2
);
status
=
kernel
->
setArg
(
++
arg_idx
,
in_Stride2
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
out_Stride0
);
status
=
kernel
->
setArg
(
++
arg_idx
,
out_Stride0
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
out_Stride1
);
status
=
kernel
->
setArg
(
++
arg_idx
,
out_Stride1
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
out_Stride2
);
status
=
kernel
->
setArg
(
++
arg_idx
,
out_Stride2
);
CL_CHECK_FATAL
(
status
);
auto
global_work_size
=
...
...
@@ -155,7 +155,7 @@ class ReshapeComputeFloatImage : public KernelLite<TARGET(kOpenCL),
static_cast
<
size_t
>
(
default_work_size
.
data
()[
2
])};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
lite/kernels/opencl/scale_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -45,7 +45,7 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
STL
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_name_
<<
build_options_
<<
time_stamp_
;
kernel_
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
}
void
ReInitWhenNeeded
()
override
{
...
...
@@ -82,19 +82,22 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
auto
&
context
=
ctx_
->
As
<
OpenCLContext
>
();
CHECK
(
context
.
cl_context
()
!=
nullptr
);
auto
kernel
=
kernel_
;
std
::
stringstream
kernel_key
;
kernel_key
<<
kernel_func_name_
<<
build_options_
<<
time_stamp_
;
auto
kernel
=
context
.
cl_context
()
->
GetKernel
(
kernel_key
.
str
());
;
cl_int
status
;
status
=
kernel
.
setArg
(
0
,
*
x_img
);
status
=
kernel
->
setArg
(
0
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
1
,
*
out_img
);
status
=
kernel
->
setArg
(
1
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
2
,
scale
);
status
=
kernel
->
setArg
(
2
,
scale
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
3
,
bias
);
status
=
kernel
->
setArg
(
3
,
bias
);
CL_CHECK_FATAL
(
status
);
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size_
,
cl
::
NullRange
,
...
...
@@ -111,7 +114,7 @@ class ScaleComputeImage2D : public KernelLite<TARGET(kOpenCL),
std
::
shared_ptr
<
cl
::
Event
>
event_
{
new
cl
::
Event
};
param_t
*
scale_param_
{
nullptr
};
cl
::
Kernel
kernel
_
;
cl
::
Kernel
kernel
;
bool
first_epoch_for_reinit_
{
true
};
DDim
last_x_dims_
;
DDim
out_img_shape_
=
DDim
(
std
::
vector
<
DDim
::
value_type
>
(
...
...
lite/kernels/opencl/slice_image_compute.cc
浏览文件 @
9562d42a
...
...
@@ -75,15 +75,15 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL),
cl_int
status
;
int
arg_idx
=
0
;
status
=
kernel
.
setArg
(
arg_idx
,
*
x_img
);
status
=
kernel
->
setArg
(
arg_idx
,
*
x_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
*
out_img
);
status
=
kernel
->
setArg
(
++
arg_idx
,
*
out_img
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
start
);
status
=
kernel
->
setArg
(
++
arg_idx
,
start
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
end
);
status
=
kernel
->
setArg
(
++
arg_idx
,
end
);
CL_CHECK_FATAL
(
status
);
status
=
kernel
.
setArg
(
++
arg_idx
,
dim_w
);
status
=
kernel
->
setArg
(
++
arg_idx
,
dim_w
);
CL_CHECK_FATAL
(
status
);
const
std
::
vector
<
size_t
>&
default_work_size
=
...
...
@@ -97,7 +97,7 @@ class SliceComputeImage2D : public KernelLite<TARGET(kOpenCL),
static_cast
<
cl
::
size_type
>
(
default_work_size
.
data
()[
2
])};
status
=
context
.
cl_context
()
->
GetCommandQueue
().
enqueueNDRangeKernel
(
kernel
,
*
kernel
.
get
()
,
cl
::
NullRange
,
global_work_size
,
cl
::
NullRange
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录