Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
8bb5716a
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
8bb5716a
编写于
9月 18, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'conv2d-neon' into 'master'
Neon conv2d 3x3 stride 2 kernel. See merge request !44
上级
291a5ee6
89b4b039
变更
5
展开全部
隐藏空白更改
内联
并排
Showing
5 changed file
with
287 addition
and
209 deletion
+287
-209
mace/kernels/neon/conv_2d_neon.cc
mace/kernels/neon/conv_2d_neon.cc
+23
-15
mace/kernels/neon/conv_2d_neon_3x3.cc
mace/kernels/neon/conv_2d_neon_3x3.cc
+253
-186
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+5
-2
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+2
-2
mace/ops/pooling_test.cc
mace/ops/pooling_test.cc
+4
-4
未找到文件。
mace/kernels/neon/conv_2d_neon.cc
浏览文件 @
8bb5716a
...
@@ -22,6 +22,13 @@ extern void Conv2dNeonK3x3S1(const float *input,
...
@@ -22,6 +22,13 @@ extern void Conv2dNeonK3x3S1(const float *input,
float
*
output
,
float
*
output
,
const
index_t
*
output_shape
);
const
index_t
*
output_shape
);
extern
void
Conv2dNeonK3x3S2
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
extern
void
Conv2dNeonK5x5S1
(
const
float
*
input
,
extern
void
Conv2dNeonK5x5S1
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
float
*
filter
,
...
@@ -30,27 +37,25 @@ extern void Conv2dNeonK5x5S1(const float *input,
...
@@ -30,27 +37,25 @@ extern void Conv2dNeonK5x5S1(const float *input,
const
index_t
*
output_shape
);
const
index_t
*
output_shape
);
template
<
>
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
float
>::
const
index_t
*
input_shape
,
operator
()(
const
float
*
input
,
// NCHW
const
float
*
filter
,
const
index_t
*
input_shape
,
const
index_t
*
filter_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
const
index_t
*
filter_shape
,
float
*
output
,
const
float
*
bias
,
// c_out
const
index_t
*
output_shape
)
{
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
typedef
void
(
*
Conv2dNeonFunction
)(
typedef
void
(
*
Conv2dNeonFunction
)(
const
float
*
input
,
// NCHW
const
float
*
input
,
const
index_t
*
input_shape
,
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
filter
,
const
float
*
bias
,
// c_out
const
float
*
bias
,
float
*
output
,
// NCHW
float
*
output
,
const
index_t
*
output_shape
);
const
index_t
*
output_shape
);
// Selection matrix: kernel_size x stride_size
// Selection matrix: kernel_size x stride_size
static
const
Conv2dNeonFunction
selector
[
5
][
2
]
=
{
static
const
Conv2dNeonFunction
selector
[
5
][
2
]
=
{
{
Conv2dNeonK1x1S1
,
nullptr
},
{
Conv2dNeonK1x1S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK3x3S1
,
nullptr
},
{
Conv2dNeonK3x3S1
,
Conv2dNeonK3x3S2
},
{
nullptr
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK5x5S1
,
nullptr
}};
{
Conv2dNeonK5x5S1
,
nullptr
}};
// not implement yet
// not implement yet
...
@@ -59,7 +64,10 @@ operator()(const float *input, // NCHW
...
@@ -59,7 +64,10 @@ operator()(const float *input, // NCHW
if
(
kernel_h
!=
kernel_w
||
kernel_h
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
if
(
kernel_h
!=
kernel_w
||
kernel_h
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
2
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
strides_
[
0
]
>
2
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
]
==
nullptr
)
{
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
]
==
nullptr
)
{
LOG
(
WARNING
)
<<
"NEON conv2d kernel not implementated, using slow vesion"
;
LOG
(
WARNING
)
<<
"NEON conv2d kernel with "
<<
"filter"
<<
kernel_h
<<
"x"
<<
kernel_w
<<
","
<<
" stride "
<<
strides_
[
0
]
<<
"x"
<<
strides_
[
1
]
<<
" is not implemented yet, using slow version"
;
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
input
,
input_shape
,
filter
,
filter_shape
,
bias
,
output
,
output_shape
);
input
,
input_shape
,
filter
,
filter_shape
,
bias
,
output
,
output_shape
);
return
;
return
;
...
...
mace/kernels/neon/conv_2d_neon_3x3.cc
浏览文件 @
8bb5716a
此差异已折叠。
点击以展开。
mace/ops/conv_2d_benchmark.cc
浏览文件 @
8bb5716a
...
@@ -61,8 +61,7 @@ static void Conv2d(int iters,
...
@@ -61,8 +61,7 @@ static void Conv2d(int iters,
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \
mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, OC); \
OC); \
} \
} \
BENCHMARK( \
BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE)
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE)
...
@@ -77,6 +76,10 @@ BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float);
...
@@ -77,6 +76,10 @@ BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
3
,
3
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
3
,
3
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
1
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
1
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
3
,
3
,
1
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
3
,
3
,
1
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
2
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
3
,
3
,
2
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
2
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
3
,
3
,
2
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
5
,
5
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
5
,
5
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
31
,
5
,
5
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
31
,
5
,
5
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
5
,
5
,
1
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
5
,
5
,
1
,
SAME
,
128
,
float
);
...
...
mace/ops/conv_2d_test.cc
浏览文件 @
8bb5716a
...
@@ -174,8 +174,8 @@ TEST_F(Conv2dOpTest, ConvNxNS12) {
...
@@ -174,8 +174,8 @@ TEST_F(Conv2dOpTest, ConvNxNS12) {
// generate random input
// generate random input
index_t
batch
=
1
+
rand
()
%
10
;
index_t
batch
=
1
+
rand
()
%
10
;
index_t
input_channels
=
1
+
rand
()
%
50
;
index_t
input_channels
=
1
+
rand
()
%
50
;
index_t
height
=
7
+
rand
()
%
100
;
index_t
height
=
11
+
rand
()
%
100
;
index_t
width
=
7
+
rand
()
%
100
;
index_t
width
=
11
+
rand
()
%
100
;
index_t
output_channels
=
1
+
rand
()
%
50
;
index_t
output_channels
=
1
+
rand
()
%
50
;
// Construct graph
// Construct graph
auto
&
net
=
test_net
();
auto
&
net
=
test_net
();
...
...
mace/ops/pooling_test.cc
浏览文件 @
8bb5716a
...
@@ -155,9 +155,9 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
...
@@ -155,9 +155,9 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
net
.
RunOp
(
DeviceType
::
NEON
);
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
6
,
8
,
9
,
16
,
18
,
19
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
6
,
8
,
9
,
16
,
18
,
19
});
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
TEST_F
(
PoolingOpTest
,
MAX_k3x3s2x2
)
{
TEST_F
(
PoolingOpTest
,
MAX_k3x3s2x2
)
{
...
@@ -183,7 +183,7 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
...
@@ -183,7 +183,7 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
net
.
RunOp
(
DeviceType
::
NEON
);
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
11
,
13
,
14
,
16
,
18
,
19
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
11
,
13
,
14
,
16
,
18
,
19
});
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录