Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
e52c49b3
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
e52c49b3
编写于
11月 30, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support conv 1x1 with stride == 2 and padding == SAME.
上级
29c3f0f7
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
96 addition
and
57 deletion
+96
-57
mace/kernels/opencl/cl/conv_2d_1x1.cl
mace/kernels/opencl/cl/conv_2d_1x1.cl
+42
-20
mace/kernels/opencl/cl/conv_2d_3x3.cl
mace/kernels/opencl/cl/conv_2d_3x3.cl
+11
-10
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+8
-4
mace/kernels/opencl/conv_2d_opencl_3x3.cc
mace/kernels/opencl/conv_2d_opencl_3x3.cc
+1
-1
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+34
-15
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+0
-7
未找到文件。
mace/kernels/opencl/cl/conv_2d_1x1.cl
浏览文件 @
e52c49b3
...
...
@@ -10,8 +10,13 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
__read_only
image2d_t
bn_offset,
/*
cout%4
*
cout/4
*/
#
endif
__write_only
image2d_t
output,
__private
const
int
in_height,
__private
const
int
in_width,
__private
const
int
in_ch_blks,
__private
const
int
width
)
{
__private
const
int
height,
__private
const
int
width,
__private
const
int
padding_top,
__private
const
int
padding_left
)
{
const
int
out_ch_blk
=
get_global_id
(
0
)
;
const
int
out_w_blk
=
get_global_id
(
1
)
;
const
int
out_w_blks
=
get_global_size
(
1
)
;
...
...
@@ -32,24 +37,37 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
#
endif
int4
w
;
w.x
=
out_w_blk
;
#
if
STRIDE
==
1
w.x
=
out_w_blk
-
padding_left
;
w.y
=
w.x
+
out_w_blks
;
w.z
=
w.y
+
out_w_blks
;
w.w
=
w.z
+
out_w_blks
;
int
out_hb_idx
=
(
out_hb
%
height
)
-
padding_top
;
#
else
w.x
=
out_w_blk
*
2
-
padding_left
;
w.y
=
(
out_w_blk
+
out_w_blks
)
*
2
-
padding_left
;
w.z
=
(
out_w_blk
+
2
*
out_w_blks
)
*
2
-
padding_left
;
w.w
=
(
out_w_blk
+
3
*
out_w_blks
)
*
2
-
padding_left
;
int
out_hb_idx
=
(
out_hb
%
height
)
*
2
-
padding_top
;
#
endif
w.x
=
select
(
w.x,
INT_MIN,
(
w.x
<
0
|
| w.x >= in_width));
w.y = select(w.y, INT_MIN, (w.y < 0 || w.y >= in_width));
w.z = select(w.z, INT_MIN, (w.z < 0 || w.z >= in_width));
w.w = select(w.w, INT_MIN, (w.w < 0 |
|
w.w
>=
in_width
))
;
out_hb_idx
=
select
(
out_hb_idx
+
(
out_hb
/
height
)
*
in_height,
-1
,
out_hb_idx
>=
in_height
)
;
//
Unrolling
this
loop
hurt
perfmance
int
in_x_base
=
0
;
for
(
int
in_ch_blk
=
0
; in_ch_blk < in_ch_blks; ++in_ch_blk) {
DATA_TYPE4
in0
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.x,
out_hb
))
;
DATA_TYPE4
in1
=
0
;
DATA_TYPE4
in2
=
0
;
DATA_TYPE4
in3
=
0
;
if
(
w.y
<
width
)
{
//
conditional
load
hurt
perf,
this
branching
helps
sometimes
in1
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.y,
out_hb
))
;
in2
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.z,
out_hb
))
;
in3
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.w,
out_hb
))
;
}
DATA_TYPE4
in0
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.x,
out_hb_idx
))
;
DATA_TYPE4
in1
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.y,
out_hb_idx
))
;
DATA_TYPE4
in2
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.z,
out_hb_idx
))
;
DATA_TYPE4
in3
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_x_base
+
w.w,
out_hb_idx
))
;
const
int
filter_x0
=
in_ch_blk
<<
2
;
DATA_TYPE4
weights0
=
READ_IMAGET
(
filter,
sampler,
(
int2
)(
filter_x0,
out_ch_blk
))
;
...
...
@@ -78,7 +96,7 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
out3
+=
in3.z
*
weights2
;
out3
+=
in3.w
*
weights3
;
in_x_base
+=
width
;
in_x_base
+=
in_
width
;
}
#
ifdef
FUSED_BATCH_NORM
...
...
@@ -111,14 +129,18 @@ __kernel void conv_2d_1x1(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
#
endif
const
int
out_x_base
=
out_ch_blk
*
width
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w.x,
out_hb
)
,
out0
)
;
int
out_x_idx
=
out_w_blk
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
out_x_idx,
out_hb
)
,
out0
)
;
if
(
w.y
>=
width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w.y,
out_hb
)
,
out1
)
;
out_x_idx
+=
out_w_blks
;
if
(
out_x_idx
>=
width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
out_x_idx,
out_hb
)
,
out1
)
;
if
(
w.z
>=
width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w.z,
out_hb
)
,
out2
)
;
out_x_idx
+=
out_w_blks
;
if
(
out_x_idx
>=
width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
out_x_idx,
out_hb
)
,
out2
)
;
if
(
w.w
>=
width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
w.w,
out_hb
)
,
out3
)
;
out_x_idx
+=
out_w_blks
;
if
(
out_x_idx
>=
width
)
return
;
WRITE_IMAGET
(
output,
(
int2
)(
out_x_base
+
out_x_idx,
out_hb
)
,
out3
)
;
}
mace/kernels/opencl/cl/conv_2d_3x3.cl
浏览文件 @
e52c49b3
...
...
@@ -19,23 +19,24 @@ __kernel void conv_2d_3x3(__read_only image2d_t input, /* [c%4 * w * c/4, h * b]
const
int
out_hb
=
get_global_id
(
2
)
;
const
int
rounded_in_ch
=
in_ch_blks
*
4
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
#
ifdef
BIAS
float4
out0
=
convert_float4
(
READ_IMAGET
(
bias,
sampler,
(
int2
)(
out_ch_blk,
0
)))
;
float4
out1
=
out0
;
float4
out2
=
out0
;
float4
out3
=
out0
;
float4
out4
=
out0
;
#
else
float4
out0
=
0
;
float4
out1
=
0
;
float4
out2
=
0
;
float4
out3
=
0
;
float4
out4
=
0
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
#
ifdef
BIAS
out0
=
convert_float4
(
READ_IMAGET
(
bias,
sampler,
(
int2
)(
out_ch_blk,
0
)))
;
out1
=
out0
;
out2
=
out0
;
out3
=
out0
;
out4
=
out0
;
#
endif
#
if
def
STRIDE_
1
#
if
STRIDE
==
1
int
in_width0
=
out_w_blk
-
padding_left
;
int
in_width1
=
in_width0
+
out_w_blks
;
int
in_width2
=
in_width1
+
out_w_blks
;
...
...
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
e52c49b3
...
...
@@ -15,6 +15,7 @@ void Conv1x1(const Tensor *input,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
const
int
stride
,
const
int
*
padding
,
Tensor
*
output
)
{
const
index_t
batch
=
output
->
dim
(
0
);
const
index_t
height
=
output
->
dim
(
1
);
...
...
@@ -29,9 +30,7 @@ void Conv1x1(const Tensor *input,
const
index_t
width_blocks
=
RoundUpDiv4
(
width
);
const
index_t
input_channel_blocks
=
RoundUpDiv4
(
input_channels
);
MACE_CHECK
(
stride
==
1
);
MACE_CHECK
(
input_batch
==
batch
);
MACE_CHECK
(
stride
!=
1
||
(
input_height
==
height
&&
input_width
==
width
));
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DataTypeToCLType
(
input
->
dtype
()));
...
...
@@ -54,8 +53,13 @@ void Conv1x1(const Tensor *input,
conv_2d_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
bias
->
buffer
())));
}
conv_2d_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
output
->
buffer
())));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
input_height
));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
input_width
));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
input_channel_blocks
));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
conv_2d_kernel
.
setArg
(
idx
++
,
padding
[
0
]
/
2
);
conv_2d_kernel
.
setArg
(
idx
++
,
padding
[
1
]
/
2
);
auto
command_queue
=
runtime
->
command_queue
();
cl_int
error
;
...
...
@@ -74,7 +78,7 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input,
const
Tensor
*
bias
,
const
int
*
padding
,
Tensor
*
output
)
{
Conv1x1
(
input
,
filter
,
bias
,
1
,
output
);
Conv1x1
(
input
,
filter
,
bias
,
1
,
padding
,
output
);
};
extern
void
Conv2dOpenclK1x1S2
(
const
Tensor
*
input
,
...
...
@@ -82,7 +86,7 @@ extern void Conv2dOpenclK1x1S2(const Tensor *input,
const
Tensor
*
bias
,
const
int
*
padding
,
Tensor
*
output
)
{
Conv1x1
(
input
,
filter
,
bias
,
2
,
output
);
Conv1x1
(
input
,
filter
,
bias
,
2
,
padding
,
output
);
};
}
// namespace kernels
...
...
mace/kernels/opencl/conv_2d_opencl_3x3.cc
浏览文件 @
e52c49b3
...
...
@@ -28,7 +28,7 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter,
built_options
.
emplace
(
input
->
dtype
()
==
DT_FLOAT
?
"-DTYPE_FLOAT"
:
""
);
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DataTypeToOPENCLCMDDataType
(
input
->
dtype
()));
built_options
.
emplace
(
bias
!=
nullptr
?
"-DBIAS"
:
""
);
built_options
.
emplace
(
stride
==
1
?
"-DSTRIDE_1"
:
""
);
built_options
.
emplace
(
"-DSTRIDE="
+
ToString
(
stride
)
);
auto
runtime
=
OpenCLRuntime
::
Get
();
auto
program
=
runtime
->
program
();
...
...
mace/ops/conv_2d_test.cc
浏览文件 @
e52c49b3
...
...
@@ -420,15 +420,6 @@ template<DeviceType D>
void
TestConv1x1
()
{
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
...
...
@@ -445,8 +436,37 @@ void TestConv1x1() {
{
1.0
f
,
2.0
f
,
1.0
f
,
2.0
f
,
1.0
f
,
2.0
f
,
1.0
f
,
2.0
f
,
1.0
f
,
2.0
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
// Run
net
.
RunOp
(
D
);
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
// Check
auto
expected
=
CreateTensor
<
float
>
(
...
...
@@ -465,9 +485,9 @@ TEST_F(Conv2dOpTest, CPUConv1x1) {
TestConv1x1
<
DeviceType
::
CPU
>
();
}
//
TEST_F(Conv2dOpTest, OPENCLConv1x1) {
//
TestConv1x1<DeviceType::OPENCL>();
//
}
TEST_F
(
Conv2dOpTest
,
OPENCLConv1x1
)
{
TestConv1x1
<
DeviceType
::
OPENCL
>
();
}
template
<
DeviceType
D
,
typename
T
>
static
void
TestComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
...
...
@@ -631,4 +651,3 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
//TEST_F(Conv2dOpTest, OPENCLHalfAlignedConvNxNS12) {
// TestHalfComplexConvNxNS12<DeviceType::OPENCL, half>({32, 32, 64, 128});
//}
mace/ops/ops_test_util.h
浏览文件 @
e52c49b3
...
...
@@ -351,13 +351,6 @@ void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) {
Expector
<
EXP_TYPE
,
RES_TYPE
>::
Near
(
x
,
y
,
abs_err
);
}
template
<
typename
T
>
std
::
string
ToString
(
const
T
&
input
)
{
std
::
stringstream
ss
;
ss
<<
input
;
return
ss
.
str
();
}
template
<
DeviceType
D
,
typename
T
>
void
BufferToImage
(
OpsTestNet
&
net
,
const
std
::
string
&
input_name
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录