Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
6f7369b9
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6f7369b9
编写于
3月 04, 2019
作者:
xiebaiyuan
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
opencl opt
上级
4782257a
变更
5
展开全部
隐藏空白更改
内联
并排
Showing
5 changed file
with
1123 addition
and
93 deletion
+1123
-93
src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl
src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl
+851
-0
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
+165
-55
src/operators/kernel/cl/conv_add_kernel.cpp
src/operators/kernel/cl/conv_add_kernel.cpp
+101
-38
src/operators/kernel/conv_add_bn_relu_kernel.h
src/operators/kernel/conv_add_bn_relu_kernel.h
+3
-0
src/operators/kernel/conv_add_kernel.h
src/operators/kernel/conv_add_kernel.h
+3
-0
未找到文件。
src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl
浏览文件 @
6f7369b9
此差异已折叠。
点击以展开。
src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
浏览文件 @
6f7369b9
...
...
@@ -21,7 +21,7 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
operators
{
bool
optimise
=
true
;
template
<
>
bool
ConvAddBNReluKernel
<
GPU_CL
,
float
>::
Init
(
FusionConvAddBNReluParam
<
GPU_CL
>
*
param
)
{
...
...
@@ -139,7 +139,12 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
if
(
param
->
Filter
()
->
dims
()[
2
]
==
1
&&
param
->
Filter
()
->
dims
()[
3
]
==
1
)
{
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_add_bn_relu_kernel.cl"
);
if
(
optimise
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_spl"
,
"conv_add_bn_relu_kernel.cl"
);
}
else
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_add_bn_relu_kernel.cl"
);
}
DLOG
<<
" conv add bn relu conv 1x1"
;
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
param
->
Input
()
->
dims
()[
1
]
==
param
->
Output
()
->
dims
()[
1
]
&&
...
...
@@ -205,81 +210,186 @@ void ConvAddBNReluKernel<GPU_CL, float>::Compute(
cl_int
status
;
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
CL_CHECK_ERRORS
(
status
);
if
(
optimise
)
{
if
(
param
.
Filter
()
->
dims
()[
2
]
==
1
&&
param
.
Filter
()
->
dims
()[
3
]
==
1
)
{
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
CL_CHECK_ERRORS
(
status
);
int
maped_w
=
maptofactor
(
w
,
4
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
maped_w
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
new_scale
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
new_scale
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_mem
),
&
new_bias
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_mem
),
&
new_bias
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_mem
),
&
output
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_mem
),
&
output
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
stride
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
stride
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
offset
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
offset
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_c
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_c
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
dilation
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
dilation
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
input_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
input_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
input_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
input_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
15
,
sizeof
(
int
),
&
output_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
15
,
sizeof
(
int
),
&
output_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
16
,
sizeof
(
int
),
&
output_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
16
,
sizeof
(
int
),
&
output_height
);
CL_CHECK_ERRORS
(
status
);
// cl_event out_event = param.Output()->GetClEvent(
);
// cl_event wait_event = param.Input()->GetClEvent(
);
status
=
clSetKernelArg
(
kernel
,
17
,
sizeof
(
int
),
&
w
);
CL_CHECK_ERRORS
(
status
);
/*
if (param.Filter()->dims()[2] == 1 &&
param.Filter()->dims()[3] == 1 &&
param.Filter()->dims()[0] % 16 == 0) {
DLOG << " before modifi work size: " << default_work_size;
const
size_t
work_size
[
3
]
=
{
static_cast
<
const
uint32_t
>
(
default_work_size
.
data
()[
0
]),
static_cast
<
const
uint32_t
>
(
maped_w
),
static_cast
<
const
uint32_t
>
(
default_work_size
.
data
()[
2
])};
default_work_size[0] = default_work_size[0] / 4;
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
default_work_size
.
size
(),
NULL
,
work_size
,
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
}
else
{
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
CL_CHECK_ERRORS
(
status
);
DLOG << " modification work size: " << default_work_size;
DLOG << " input dims " << param.Input()->dims();
DLOG << " output dims " << param.Output()->dims();
DLOG << " filter dims: " << param.Filter()->dims();
DLOG << " biase dims : " << param.Bias()->dims();
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
CL_CHECK_ERRORS
(
status
);
}
*/
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
new_scale
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_mem
),
&
new_bias
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_mem
),
&
output
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
stride
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
offset
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_c
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
dilation
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
input_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
input_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
15
,
sizeof
(
int
),
&
output_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
16
,
sizeof
(
int
),
&
output_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
default_work_size
.
size
(),
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
default_work_size
.
size
(),
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
}
}
else
{
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
new_scale
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
cl_mem
),
&
new_bias
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
cl_mem
),
&
output
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
stride
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
offset
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_c
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
dilation
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
input_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
input_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
15
,
sizeof
(
int
),
&
output_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
16
,
sizeof
(
int
),
&
output_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
default_work_size
.
size
(),
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
}
}
template
class
ConvAddBNReluKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/cl/conv_add_kernel.cpp
浏览文件 @
6f7369b9
...
...
@@ -18,6 +18,7 @@ limitations under the License. */
namespace
paddle_mobile
{
namespace
operators
{
bool
optimise_convadd
=
true
;
template
<
>
bool
ConvAddKernel
<
GPU_CL
,
float
>::
Init
(
FusionConvAddParam
<
GPU_CL
>
*
param
)
{
...
...
@@ -35,8 +36,11 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
if
(
param
->
Filter
()
->
dims
()[
2
]
==
1
&&
param
->
Filter
()
->
dims
()[
3
]
==
1
)
{
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_add_kernel.cl"
);
if
(
optimise_convadd
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_spl"
,
"conv_add_kernel.cl"
);
}
else
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1"
,
"conv_add_kernel.cl"
);
}
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
param
->
Input
()
->
dims
()[
1
]
==
param
->
Output
()
->
dims
()[
1
]
&&
param
->
Filter
()
->
dims
()[
2
]
==
3
)
{
...
...
@@ -95,58 +99,117 @@ void ConvAddKernel<GPU_CL, float>::Compute(
cl_int
status
;
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
CL_CHECK_ERRORS
(
status
);
if
(
optimise_convadd
&&
param
.
Filter
()
->
dims
()[
2
]
==
1
&&
param
.
Filter
()
->
dims
()[
3
]
==
1
)
{
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
CL_CHECK_ERRORS
(
status
);
int
maped_w
=
maptofactor
(
w
,
4
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
maped_w
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
output
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
stride
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
&
offset
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
input_c
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
nh
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
dilation
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
input
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
input_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
output_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
outpu
t
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
output_heigh
t
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
stride
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
15
,
sizeof
(
int
),
&
w
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
&
offset
);
CL_CHECK_ERRORS
(
status
);
const
size_t
work_size
[
3
]
=
{
static_cast
<
const
uint32_t
>
(
default_work_size
.
data
()[
0
]),
static_cast
<
const
uint32_t
>
(
maped_w
),
static_cast
<
const
uint32_t
>
(
default_work_size
.
data
()[
2
])};
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
input_c
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
default_work_size
.
size
(),
NULL
,
work_size
,
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
}
else
{
status
=
clSetKernelArg
(
kernel
,
0
,
sizeof
(
int
),
&
c_block
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
dilation
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
1
,
sizeof
(
int
),
&
w
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_widt
h
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
2
,
sizeof
(
int
),
&
n
h
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
input_heigh
t
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
3
,
sizeof
(
cl_mem
),
&
inpu
t
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
output_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
4
,
sizeof
(
cl_mem
),
&
filter
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
output_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
5
,
sizeof
(
cl_mem
),
&
biase
);
CL_CHECK_ERRORS
(
status
);
// cl_event out_event = param.Output()->GetClEvent(
);
// cl_event wait_event = param.Input()->GetClEvent(
);
status
=
clSetKernelArg
(
kernel
,
6
,
sizeof
(
cl_mem
),
&
output
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
default_work_size
.
size
(),
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
7
,
sizeof
(
int
),
&
stride
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
8
,
sizeof
(
int
),
&
offset
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
9
,
sizeof
(
int
),
&
input_c
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
10
,
sizeof
(
int
),
&
dilation
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
11
,
sizeof
(
int
),
&
input_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
12
,
sizeof
(
int
),
&
input_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
13
,
sizeof
(
int
),
&
output_width
);
CL_CHECK_ERRORS
(
status
);
status
=
clSetKernelArg
(
kernel
,
14
,
sizeof
(
int
),
&
output_height
);
CL_CHECK_ERRORS
(
status
);
status
=
clEnqueueNDRangeKernel
(
this
->
cl_helper_
.
CLCommandQueue
(),
kernel
,
default_work_size
.
size
(),
NULL
,
default_work_size
.
data
(),
NULL
,
0
,
NULL
,
NULL
);
CL_CHECK_ERRORS
(
status
);
}
}
template
class
ConvAddKernel
<
GPU_CL
,
float
>;
...
...
src/operators/kernel/conv_add_bn_relu_kernel.h
浏览文件 @
6f7369b9
...
...
@@ -36,6 +36,9 @@ class ConvAddBNReluKernel
public:
void
Compute
(
const
FusionConvAddBNReluParam
<
DeviceType
>
&
param
);
bool
Init
(
FusionConvAddBNReluParam
<
DeviceType
>
*
param
);
inline
int
maptofactor
(
int
i
,
int
factor
)
{
return
(
i
+
factor
-
1
)
/
factor
;
}
};
}
// namespace operators
...
...
src/operators/kernel/conv_add_kernel.h
浏览文件 @
6f7369b9
...
...
@@ -41,6 +41,9 @@ class ConvAddKernel
public:
void
Compute
(
const
FusionConvAddParam
<
DeviceType
>
&
param
);
bool
Init
(
FusionConvAddParam
<
DeviceType
>
*
param
);
inline
int
maptofactor
(
int
i
,
int
factor
)
{
return
(
i
+
factor
-
1
)
/
factor
;
}
};
}
// namespace operators
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录