Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
f077276f
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
338
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f077276f
编写于
11月 06, 2019
作者:
X
xiebaiyuan
提交者:
GitHub
11月 06, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimise conv 1x1 ... ,test=develop (#2388)
optimise conv 1x1 ... ,test=develop (#2388)
上级
e74609b7
变更
7
显示空白变更内容
内联
并排
Showing
7 changed file
with
227 addition
and
75 deletion
+227
-75
mobile/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl
mobile/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl
+187
-65
mobile/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
mobile/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
+7
-2
mobile/src/operators/kernel/cl/conv_add_kernel.cpp
mobile/src/operators/kernel/cl/conv_add_kernel.cpp
+7
-2
mobile/src/operators/kernel/cl/conv_add_relu_kernel.cpp
mobile/src/operators/kernel/cl/conv_add_relu_kernel.cpp
+7
-2
mobile/src/operators/kernel/cl/conv_bn_relu_kernel.cpp
mobile/src/operators/kernel/cl/conv_bn_relu_kernel.cpp
+7
-2
mobile/src/operators/kernel/cl/conv_kernel.cpp
mobile/src/operators/kernel/cl/conv_kernel.cpp
+5
-1
mobile/src/operators/kernel/cl/conv_relu_kernel.cpp
mobile/src/operators/kernel/cl/conv_relu_kernel.cpp
+7
-1
未找到文件。
mobile/src/operators/kernel/cl/cl_kernel/conv_kernel.inc.cl
浏览文件 @
f077276f
...
@@ -1006,8 +1006,160 @@ __kernel void conv_1x1(__private const int global_size_dim0,
...
@@ -1006,8 +1006,160 @@ __kernel void conv_1x1(__private const int global_size_dim0,
write_imageh(output_image, output_pos, output);
write_imageh(output_image, output_pos, output);
}
}
__kernel void conv_1x1_simple(
__private const int global_size_dim0, __private const int global_size_dim1,
__private const int global_size_dim2, __read_only image2d_t input_image,
__read_only image2d_t filter,
#if defined(BIASE_CH) || defined(BIASE_ELE)
__read_only image2d_t bias,
#endif
#ifdef BATCH_NORM
__read_only image2d_t new_scale, __read_only image2d_t new_biase,
#endif
__write_only image2d_t output_image, __private const int stride,
__private const int offset, __private const int input_c,__private const int input_c_origin,
__private const int dilation,
__private const int input_width, /* of one block */
__private const int input_height, /* of one block */
__private const int output_width,
__private const int output_height,
__private const int old_w
) {
half zero = 0.0f;
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
int out_w0 = out_w;
int out_w1 = out_w + global_size_dim1;
int out_w2 = out_w + global_size_dim1 * 2;
int out_w3 = out_w + global_size_dim1 * 3;
int outpos_main = mul24(out_c , old_w);
int2 output_pos0 = (int2)(outpos_main + out_w0, out_nh);
int2 output_pos1 = (int2)(outpos_main + out_w1, out_nh);
int2 output_pos2 = (int2)(outpos_main + out_w2, out_nh);
int2 output_pos3 = (int2)(outpos_main + out_w3, out_nh);
const sampler_t sampler =
CLK_NORMALIZED_COORDS_TRUE |
CLK_ADDRESS_CLAMP
| CLK_FILTER_NEAREST;
int2 stride_xy = (int2)(stride, stride);
int2 ouput_pos_in_one_block0 = (int2)(out_w0, out_nh);
int2 in_pos_in_one_block0 =
ouput_pos_in_one_block0 * stride_xy + (int2)(offset, offset);
int2 ouput_pos_in_one_block1 = (int2)(out_w1, out_nh);
int2 in_pos_in_one_block1 =
ouput_pos_in_one_block1 * stride_xy + (int2)(offset, offset);
int2 ouput_pos_in_one_block2 = (int2)(out_w2, out_nh);
int2 in_pos_in_one_block2 =
ouput_pos_in_one_block2 * stride_xy + (int2)(offset, offset);
int2 ouput_pos_in_one_block3 = (int2)(out_w3, out_nh);
int2 in_pos_in_one_block3 =
ouput_pos_in_one_block3 * stride_xy + (int2)(offset, offset);
#ifdef BIASE_CH
half4 output0 = read_imageh(bias, sampler, (int2)(out_c, 0));
half4 output1 = output0;
half4 output2 = output0;
half4 output3 = output0;
#elif defined(BIASE_ELE)
half4 output0 = read_imageh(bias, sampler, output_pos0);
half4 output1 = output0;
half4 output2 = output0;
half4 output3 = output0;
#else
half4 output0 = 0.0f;
half4 output1 = 0.0f;
half4 output2 = 0.0f;
half4 output3 = 0.0f;
#endif
for (int i = 0; i < input_c; ++i) {
// ------------0---------------
int2 pos_in = (int2)(i * input_width + in_pos_in_one_block0.x, in_pos_in_one_block0.y);
half4 input0 = read_imageh(input_image, sampler, pos_in);
half4 weight0 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 0));
half4 weight1 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 1));
half4 weight2 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 2));
half4 weight3 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 3));
output0 = mad(input0.x, weight0, output0);
output0 = mad(input0.y, weight1, output0);
output0 = mad(input0.z, weight2, output0);
output0 = mad(input0.w, weight3, output0);
// -------------1--------------
pos_in = (int2)(i * input_width + in_pos_in_one_block1.x, in_pos_in_one_block1.y);
half4 input1 = read_imageh(input_image, sampler, pos_in);
output1 = mad(input1.x, weight0, output1);
output1 = mad(input1.y, weight1, output1);
output1 = mad(input1.z, weight2, output1);
output1 = mad(input1.w, weight3, output1);
// -------------2--------------
pos_in = (int2)(i * input_width + in_pos_in_one_block2.x, in_pos_in_one_block2.y);
half4 input2 = read_imageh(input_image, sampler, pos_in);
__kernel void conv_1x1_spl(
output2 = mad(input2.x, weight0, output2);
output2 = mad(input2.y, weight1, output2);
output2 = mad(input2.z, weight2, output2);
output2 = mad(input2.w, weight3, output2);
// -------------3--------------
pos_in = (int2)(i * input_width + in_pos_in_one_block3.x, in_pos_in_one_block3.y);
half4 input3 = read_imageh(input_image, sampler, pos_in);
output3 = mad(input3.x, weight0, output3);
output3 = mad(input3.y, weight1, output3);
output3 = mad(input3.z, weight2, output3);
output3 = mad(input3.w, weight3, output3);
}
#ifdef BATCH_NORM
output0 = output0 * read_imageh(new_scale, sampler, (int2)(out_c, 0)) +
read_imageh(new_biase, sampler, (int2)(out_c, 0));
output1 = output1 * read_imageh(new_scale, sampler, (int2)(out_c, 0)) +
read_imageh(new_biase, sampler, (int2)(out_c, 0));
output2 = output2 * read_imageh(new_scale, sampler, (int2)(out_c, 0)) +
read_imageh(new_biase, sampler, (int2)(out_c, 0));
output3 = output3 * read_imageh(new_scale, sampler, (int2)(out_c, 0)) +
read_imageh(new_biase, sampler, (int2)(out_c, 0));
#endif
#ifdef RELU
output0 = activation(output0);
output1 = activation(output1);
output2 = activation(output2);
output3 = activation(output3);
#endif
if (out_w0 < old_w) {
write_imageh(output_image, output_pos0, output0);
}
if (out_w1 < old_w){
write_imageh(output_image, output_pos1, output1);
}
if (out_w2 < old_w){
write_imageh(output_image, output_pos2, output2);
}
if (out_w3 < old_w){
write_imageh(output_image, output_pos3, output3);
}
}
__kernel void conv_1x1_wrapped(
__private const int global_size_dim0, __private const int global_size_dim1,
__private const int global_size_dim0, __private const int global_size_dim1,
__private const int global_size_dim2, __read_only image2d_t input_image,
__private const int global_size_dim2, __read_only image2d_t input_image,
__read_only image2d_t filter,
__read_only image2d_t filter,
...
@@ -1026,7 +1178,7 @@ __kernel void conv_1x1_spl(
...
@@ -1026,7 +1178,7 @@ __kernel void conv_1x1_spl(
__private const int output_height,
__private const int output_height,
__private const int old_w
__private const int old_w
) {
) {
half zero = 0.0f;
const int out_c = get_global_id(0);
const int out_c = get_global_id(0);
const int out_w = get_global_id(1);
const int out_w = get_global_id(1);
const int out_nh = get_global_id(2);
const int out_nh = get_global_id(2);
...
@@ -1065,14 +1217,14 @@ __kernel void conv_1x1_spl(
...
@@ -1065,14 +1217,14 @@ __kernel void conv_1x1_spl(
#ifdef BIASE_CH
#ifdef BIASE_CH
half4 output0 = read_imageh(bias, sampler, (int2)(out_c, 0));
half4 output0 = read_imageh(bias, sampler, (int2)(out_c, 0));
half4 output1 =
read_imageh(bias, sampler, (int2)(out_c, 0))
;
half4 output1 =
output0
;
half4 output2 =
read_imageh(bias, sampler, (int2)(out_c, 0))
;
half4 output2 =
output0
;
half4 output3 =
read_imageh(bias, sampler, (int2)(out_c, 0))
;
half4 output3 =
output0
;
#elif defined(BIASE_ELE)
#elif defined(BIASE_ELE)
half4 output0 = read_imageh(bias, sampler, output_pos0);
half4 output0 = read_imageh(bias, sampler, output_pos0);
half4 output1 =
read_imageh(bias, sampler, output_pos1)
;
half4 output1 =
output0
;
half4 output2 =
read_imageh(bias, sampler, output_pos2)
;
half4 output2 =
output0
;
half4 output3 =
read_imageh(bias, sampler, output_pos3)
;
half4 output3 =
output0
;
#else
#else
half4 output0 = 0.0f;
half4 output0 = 0.0f;
...
@@ -1083,6 +1235,10 @@ __kernel void conv_1x1_spl(
...
@@ -1083,6 +1235,10 @@ __kernel void conv_1x1_spl(
int max_w_bound = input_c * input_width;
int max_w_bound = input_c * input_width;
int burndary_index = input_c * 4 - input_c_origin;
int burndary_index = input_c * 4 - input_c_origin;
bool burndary_index_w = burndary_index==1||burndary_index==2||burndary_index==3;
bool burndary_index_z = burndary_index==2||burndary_index==3;
bool burndary_index_y = burndary_index==3;
for (int i = 0; i < input_c; ++i) {
for (int i = 0; i < input_c; ++i) {
// ------------0---------------
// ------------0---------------
int2 pos_in = (int2)(i * input_width + in_pos_in_one_block0.x, in_pos_in_one_block0.y);
int2 pos_in = (int2)(i * input_width + in_pos_in_one_block0.x, in_pos_in_one_block0.y);
...
@@ -1092,22 +1248,13 @@ __kernel void conv_1x1_spl(
...
@@ -1092,22 +1248,13 @@ __kernel void conv_1x1_spl(
half4 weight1 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 1));
half4 weight1 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 1));
half4 weight2 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 2));
half4 weight2 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 2));
half4 weight3 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 3));
half4 weight3 = read_imageh(filter, sampler, (int2)(out_c, i * 4 + 3));
int bound_gap = max_w_bound - pos_in.x - 1;
int bound_gap = max_w_bound - pos_in.x - 1;
if (bound_gap < input_width && bound_gap >= 0){
if (burndary_index==0){
bool outof_bound = bound_gap < input_width && bound_gap >= 0;
// do nothing
input0.w = select(input0.w,zero,outof_bound && burndary_index_w);
} else if (burndary_index==1){
input0.z = select(input0.z,zero,outof_bound && burndary_index_z);
input0.w = 0.0f;
input0.y = select(input0.y,zero,outof_bound && burndary_index_y);
} else if (burndary_index==2){
input0.z = 0.0f;
input0.w = 0.0f;
} else if (burndary_index==3){
input0.y = 0.0f;
input0.z = 0.0f;
input0.w = 0.0f;
}
}
output0 = mad(input0.x, weight0, output0);
output0 = mad(input0.x, weight0, output0);
output0 = mad(input0.y, weight1, output0);
output0 = mad(input0.y, weight1, output0);
output0 = mad(input0.z, weight2, output0);
output0 = mad(input0.z, weight2, output0);
...
@@ -1117,20 +1264,12 @@ __kernel void conv_1x1_spl(
...
@@ -1117,20 +1264,12 @@ __kernel void conv_1x1_spl(
half4 input1 = read_imageh(input_image, sampler, pos_in);
half4 input1 = read_imageh(input_image, sampler, pos_in);
bound_gap = max_w_bound - pos_in.x - 1;
bound_gap = max_w_bound - pos_in.x - 1;
if (bound_gap < input_width && bound_gap >= 0){
if (burndary_index==0){
outof_bound = bound_gap < input_width && bound_gap >= 0;
// do nothing
input1.w = select(input1.w,zero,outof_bound && burndary_index_w);
} else if (burndary_index==1){
input1.z = select(input1.z,zero,outof_bound && burndary_index_z);
input1.w = 0.0f;
input1.y = select(input1.y,zero,outof_bound && burndary_index_y);
} else if (burndary_index==2){
input1.z = 0.0f;
input1.w = 0.0f;
} else if (burndary_index==3){
input1.y = 0.0f;
input1.z = 0.0f;
input1.w = 0.0f;
}
}
output1 = mad(input1.x, weight0, output1);
output1 = mad(input1.x, weight0, output1);
output1 = mad(input1.y, weight1, output1);
output1 = mad(input1.y, weight1, output1);
output1 = mad(input1.z, weight2, output1);
output1 = mad(input1.z, weight2, output1);
...
@@ -1141,20 +1280,12 @@ __kernel void conv_1x1_spl(
...
@@ -1141,20 +1280,12 @@ __kernel void conv_1x1_spl(
half4 input2 = read_imageh(input_image, sampler, pos_in);
half4 input2 = read_imageh(input_image, sampler, pos_in);
bound_gap = max_w_bound - pos_in.x - 1;
bound_gap = max_w_bound - pos_in.x - 1;
if (bound_gap < input_width && bound_gap >= 0){
if (burndary_index==0){
outof_bound = bound_gap < input_width && bound_gap >= 0;
// do nothing
input2.w = select(input2.w,zero,outof_bound && burndary_index_w);
} else if (burndary_index==1){
input2.z = select(input2.z,zero,outof_bound && burndary_index_z);
input2.w = 0.0f;
input2.y = select(input2.y,zero,outof_bound && burndary_index_y);
} else if (burndary_index==2){
input2.z = 0.0f;
input2.w = 0.0f;
} else if (burndary_index==3){
input2.y = 0.0f;
input2.z = 0.0f;
input2.w = 0.0f;
}
}
output2 = mad(input2.x, weight0, output2);
output2 = mad(input2.x, weight0, output2);
output2 = mad(input2.y, weight1, output2);
output2 = mad(input2.y, weight1, output2);
output2 = mad(input2.z, weight2, output2);
output2 = mad(input2.z, weight2, output2);
...
@@ -1164,20 +1295,11 @@ __kernel void conv_1x1_spl(
...
@@ -1164,20 +1295,11 @@ __kernel void conv_1x1_spl(
pos_in = (int2)(i * input_width + in_pos_in_one_block3.x, in_pos_in_one_block3.y);
pos_in = (int2)(i * input_width + in_pos_in_one_block3.x, in_pos_in_one_block3.y);
half4 input3 = read_imageh(input_image, sampler, pos_in);
half4 input3 = read_imageh(input_image, sampler, pos_in);
bound_gap = max_w_bound - pos_in.x - 1;
bound_gap = max_w_bound - pos_in.x - 1;
if (bound_gap < input_width && bound_gap >= 0){
if (burndary_index==0){
outof_bound = bound_gap < input_width && bound_gap >= 0;
// do nothing
input3.w = select(input3.w,zero,outof_bound && (burndary_index==1||burndary_index==2||burndary_index==3));
} else if (burndary_index==1){
input3.z = select(input3.z,zero,outof_bound && (burndary_index==2||burndary_index==3));
input3.w = 0.0f;
input3.y = select(input3.y,zero,outof_bound && burndary_index==3);
} else if (burndary_index==2){
input3.z = 0.0f;
input3.w = 0.0f;
} else if (burndary_index==3){
input3.y = 0.0f;
input3.z = 0.0f;
input3.w = 0.0f;
}
}
output3 = mad(input3.x, weight0, output3);
output3 = mad(input3.x, weight0, output3);
output3 = mad(input3.y, weight1, output3);
output3 = mad(input3.y, weight1, output3);
...
...
mobile/src/operators/kernel/cl/conv_add_bn_relu_kernel.cpp
浏览文件 @
f077276f
...
@@ -152,8 +152,13 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
...
@@ -152,8 +152,13 @@ bool ConvAddBNReluKernel<GPU_CL, float>::Init(
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_spl"
,
conv_kernel_file
,
build_options
);
if
(
param
->
Input
()
->
dims
()[
1
]
%
4
==
0
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_simple"
,
conv_kernel_file
,
build_options
);
}
else
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_wrapped"
,
conv_kernel_file
,
build_options
);
}
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
param
->
Input
()
->
dims
()[
1
]
==
param
->
Output
()
->
dims
()[
1
]
&&
param
->
Input
()
->
dims
()[
1
]
==
param
->
Output
()
->
dims
()[
1
]
&&
param
->
Filter
()
->
dims
()[
2
]
==
3
)
{
param
->
Filter
()
->
dims
()[
2
]
==
3
)
{
...
...
mobile/src/operators/kernel/cl/conv_add_kernel.cpp
浏览文件 @
f077276f
...
@@ -48,8 +48,13 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
...
@@ -48,8 +48,13 @@ bool ConvAddKernel<GPU_CL, float>::Init(FusionConvAddParam<GPU_CL> *param) {
param
->
ExecMode
()
=
ConvParam
<
GPU_CL
>::
EXEC_SLIDINGWINDOW1x1_FLOAT
;
param
->
ExecMode
()
=
ConvParam
<
GPU_CL
>::
EXEC_SLIDINGWINDOW1x1_FLOAT
;
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
cl_helper_
.
CLCommandQueue
());
if
(
param
->
Input
()
->
dims
()[
1
]
%
4
==
0
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_spl"
,
conv_kernel_file
,
build_options
);
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_simple"
,
conv_kernel_file
,
build_options
);
}
else
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_wrapped"
,
conv_kernel_file
,
build_options
);
}
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
param
->
Input
()
->
dims
()[
1
]
==
param
->
Output
()
->
dims
()[
1
]
&&
param
->
Input
()
->
dims
()[
1
]
==
param
->
Output
()
->
dims
()[
1
]
&&
...
...
mobile/src/operators/kernel/cl/conv_add_relu_kernel.cpp
浏览文件 @
f077276f
...
@@ -50,8 +50,13 @@ bool ConvAddReluKernel<GPU_CL, float>::Init(
...
@@ -50,8 +50,13 @@ bool ConvAddReluKernel<GPU_CL, float>::Init(
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_spl"
,
conv_kernel_file
,
build_options
);
if
(
param
->
Input
()
->
dims
()[
1
]
%
4
==
0
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_simple"
,
conv_kernel_file
,
build_options
);
}
else
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_wrapped"
,
conv_kernel_file
,
build_options
);
}
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
param
->
Input
()
->
dims
()[
1
]
==
param
->
Output
()
->
dims
()[
1
]
&&
param
->
Input
()
->
dims
()[
1
]
==
param
->
Output
()
->
dims
()[
1
]
&&
param
->
Filter
()
->
dims
()[
2
]
==
3
)
{
param
->
Filter
()
->
dims
()[
2
]
==
3
)
{
...
...
mobile/src/operators/kernel/cl/conv_bn_relu_kernel.cpp
浏览文件 @
f077276f
...
@@ -107,8 +107,13 @@ bool ConvBNReluKernel<GPU_CL, float>::Init(
...
@@ -107,8 +107,13 @@ bool ConvBNReluKernel<GPU_CL, float>::Init(
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_spl"
,
conv_kernel_file
,
build_options
);
if
(
param
->
Input
()
->
dims
()[
1
]
%
4
==
0
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_simple"
,
conv_kernel_file
,
build_options
);
}
else
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_wrapped"
,
conv_kernel_file
,
build_options
);
}
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
param
->
Input
()
->
dims
()[
1
]
==
param
->
Output
()
->
dims
()[
1
]
&&
param
->
Input
()
->
dims
()[
1
]
==
param
->
Output
()
->
dims
()[
1
]
&&
param
->
Filter
()
->
dims
()[
2
]
==
3
)
{
param
->
Filter
()
->
dims
()[
2
]
==
3
)
{
...
...
mobile/src/operators/kernel/cl/conv_kernel.cpp
100755 → 100644
浏览文件 @
f077276f
...
@@ -45,7 +45,11 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
...
@@ -45,7 +45,11 @@ bool ConvKernel<GPU_CL, float>::Init(ConvParam<GPU_CL> *param) {
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_spl"
,
conv_kernel_file
);
if
(
param
->
Input
()
->
dims
()[
1
]
%
4
==
0
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_simple"
,
conv_kernel_file
);
}
else
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_wrapped"
,
conv_kernel_file
);
}
DLOG
<<
"conv 1x1"
;
DLOG
<<
"conv 1x1"
;
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
...
...
mobile/src/operators/kernel/cl/conv_relu_kernel.cpp
浏览文件 @
f077276f
...
@@ -46,7 +46,13 @@ bool ConvReluKernel<GPU_CL, float>::Init(FusionConvReluParam<GPU_CL> *param) {
...
@@ -46,7 +46,13 @@ bool ConvReluKernel<GPU_CL, float>::Init(FusionConvReluParam<GPU_CL> *param) {
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
param
->
Filter
()
->
InitNImage
(
cl_helper_
.
CLContext
(),
cl_helper_
.
CLCommandQueue
());
cl_helper_
.
CLCommandQueue
());
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_spl"
,
conv_kernel_file
,
build_options
);
if
(
param
->
Input
()
->
dims
()[
1
]
%
4
==
0
)
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_simple"
,
conv_kernel_file
,
build_options
);
}
else
{
this
->
cl_helper_
.
AddKernel
(
"conv_1x1_wrapped"
,
conv_kernel_file
,
build_options
);
}
DLOG
<<
"conv 1x1"
;
DLOG
<<
"conv 1x1"
;
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
}
else
if
(
param
->
Filter
()
->
dims
()[
1
]
==
1
&&
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录