Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
ff39c7a5
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ff39c7a5
编写于
9月 13, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add conv2d k1x1s1 NEON kernel
上级
b9492125
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
390 addition
and
56 deletion
+390
-56
mace/core/allocator.h
mace/core/allocator.h
+1
-1
mace/core/logging.h
mace/core/logging.h
+14
-3
mace/core/testing/test_benchmark.cc
mace/core/testing/test_benchmark.cc
+14
-1
mace/core/testing/test_benchmark.h
mace/core/testing/test_benchmark.h
+1
-0
mace/core/testing/test_benchmark_main.cc
mace/core/testing/test_benchmark_main.cc
+6
-1
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+8
-8
mace/kernels/neon/conv_2d_neon.cc
mace/kernels/neon/conv_2d_neon.cc
+47
-31
mace/kernels/neon/conv_2d_neon_1x1.cc
mace/kernels/neon/conv_2d_neon_1x1.cc
+187
-0
mace/ops/BUILD
mace/ops/BUILD
+14
-6
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+37
-0
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+51
-0
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+10
-5
未找到文件。
mace/core/allocator.h
浏览文件 @
ff39c7a5
...
...
@@ -31,7 +31,7 @@ class Allocator {
template
<
typename
T
>
T
*
New
(
size_t
num_elements
)
{
if
(
num_elements
>
(
std
::
numeric_limits
<
size_t
>::
max
()
/
sizeof
(
T
)))
{
return
NULL
;
return
nullptr
;
}
void
*
p
=
New
(
sizeof
(
T
)
*
num_elements
);
T
*
typed_p
=
reinterpret_cast
<
T
*>
(
p
);
...
...
mace/core/logging.h
浏览文件 @
ff39c7a5
...
...
@@ -106,16 +106,27 @@ class LogMessageFatal : public LogMessage {
if (VLOG_IS_ON(lvl)) \
::mace::internal::LogMessage(__FILE__, __LINE__, mace::INFO)
// MACE_CHECK dies with a fatal error if condition is not true. It is *not*
// controlled by NDEBUG, so the check will be executed regardless of
// compilation mode. Therefore, it is safe to do things like:
// MACE_CHECK/MACE_ASSERT dies with a fatal error if condition is not true.
// MACE_ASSERT is controlled by NDEBUG ('-c opt' for bazel) while MACE_CHECK
// will be executed regardless of compilation mode.
// Therefore, it is safe to do things like:
// MACE_CHECK(fp->Write(x) == 4)
// MACE_CHECK(fp->Write(x) == 4, "Write failed")
// which are not correct for MACE_ASSERT.
#define MACE_CHECK(condition, ...) \
if (!(condition)) \
LOG(FATAL) << "Check failed: " #condition " " \
<< ::mace::internal::MakeString(__VA_ARGS__)
#ifndef NDEBUG
#define MACE_ASSERT(condition, ...) \
if (!(condition)) \
LOG(FATAL) << "Assert failed: " #condition " " \
<< ::mace::internal::MakeString(__VA_ARGS__)
#else
#define MACE_ASSERT(condition, ...) ((void)0)
#endif
template
<
typename
T
>
T
&&
CheckNotNull
(
const
char
*
file
,
int
line
,
const
char
*
exprtext
,
T
&&
t
)
{
if
(
t
==
nullptr
)
{
...
...
mace/core/testing/test_benchmark.cc
浏览文件 @
ff39c7a5
...
...
@@ -6,7 +6,9 @@
#include <cstdlib>
#include <algorithm>
#include <regex>
#include <vector>
#include "mace/core/logging.h"
#include "mace/core/testing/env_time.h"
#include "mace/core/testing/test_benchmark.h"
...
...
@@ -52,12 +54,23 @@ Benchmark* Benchmark::ArgPair(int x, int y) {
// Run all benchmarks
void
Benchmark
::
Run
()
{
Run
(
"all"
);
}
void
Benchmark
::
Run
(
const
char
*
pattern
)
{
if
(
!
all_benchmarks
)
return
;
if
(
std
::
string
(
pattern
)
==
"all"
)
{
pattern
=
".*"
;
}
std
::
regex
regex
(
pattern
);
// Compute name width.
int
width
=
10
;
char
name
[
100
];
std
::
smatch
match
;
for
(
auto
b
:
*
all_benchmarks
)
{
if
(
!
std
::
regex_match
(
b
->
name_
,
match
,
regex
))
continue
;
for
(
auto
arg
:
b
->
args_
)
{
strcpy
(
name
,
b
->
name_
.
c_str
());
if
(
arg
.
first
>=
0
)
{
...
...
@@ -74,7 +87,7 @@ void Benchmark::Run() {
printf
(
"%-*s %10s %10s
\n
"
,
width
,
"Benchmark"
,
"Time(ns)"
,
"Iterations"
);
printf
(
"%s
\n
"
,
string
(
width
+
22
,
'-'
).
c_str
());
for
(
auto
b
:
*
all_benchmarks
)
{
if
(
!
std
::
regex_match
(
b
->
name_
,
match
,
regex
))
continue
;
for
(
auto
arg
:
b
->
args_
)
{
strcpy
(
name
,
b
->
name_
.
c_str
());
if
(
arg
.
first
>=
0
)
{
...
...
mace/core/testing/test_benchmark.h
浏览文件 @
ff39c7a5
...
...
@@ -28,6 +28,7 @@ class Benchmark {
Benchmark
*
ArgPair
(
int
x
,
int
y
);
static
void
Run
();
static
void
Run
(
const
char
*
pattern
);
private:
string
name_
;
...
...
mace/core/testing/test_benchmark_main.cc
浏览文件 @
ff39c7a5
...
...
@@ -9,7 +9,12 @@
int
main
(
int
argc
,
char
**
argv
)
{
std
::
cout
<<
"Running main() from test_main.cc
\n
"
;
mace
::
testing
::
Benchmark
::
Run
();
// TODO Use gflags
if
(
argc
==
2
)
{
mace
::
testing
::
Benchmark
::
Run
(
argv
[
1
]);
}
else
{
mace
::
testing
::
Benchmark
::
Run
(
"all"
);
}
return
0
;
}
mace/kernels/conv_2d.h
浏览文件 @
ff39c7a5
...
...
@@ -108,14 +108,14 @@ class Conv2dFunctor {
const
int
*
dilations_
;
// [dilation_h, dilation_w]
};
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
);
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
}
// namespace kernels
}
// namespace mace
...
...
mace/kernels/neon/conv_2d_neon.cc
浏览文件 @
ff39c7a5
...
...
@@ -9,39 +9,48 @@
namespace
mace
{
namespace
kernels
{
static
inline
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
static
inline
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
&
output_tensor
,
std
::
vector
<
index_t
>&
output_shape
)
{
Tensor
*
output_tensor
)
{
index_t
batch
=
input_shape
[
0
];
index_t
channels
=
input_shape
[
1
];
index_t
height
=
input_shape
[
2
];
index_t
width
=
input_shape
[
3
];
output_shape
[
0
]
=
batch
;
output_shape
[
1
]
=
channels
;
output_shape
[
2
]
=
paddings
[
0
]
+
height
;
output_shape
[
3
]
=
paddings
[
1
]
+
width
;
index_t
output_width
=
output_shape
[
3
];
int
padded_left
=
paddings
[
1
]
/
2
;
std
::
vector
<
index_t
>
output_shape
({
batch
,
channels
,
paddings
[
0
]
+
height
,
paddings
[
1
]
+
width
});
output_tensor
.
Resize
(
output_shape
);
float
*
output_ptr
=
output_tensor
.
mutable_data
<
float
>
();
memset
(
output_ptr
,
0
,
output_tensor
.
size
()
*
sizeof
(
float
));
output_ptr
+=
paddings
[
0
]
/
2
*
output_width
;
const
index_t
output_width
=
output_shape
[
3
];
const
int
padded_top
=
paddings
[
0
]
/
2
;
const
int
padded_left
=
paddings
[
1
]
/
2
;
output_tensor
->
Resize
(
output_shape
);
float
*
output_ptr
=
output_tensor
->
mutable_data
<
float
>
();
memset
(
output_ptr
,
0
,
output_tensor
->
size
()
*
sizeof
(
float
));
// Skip the padded top rows
output_ptr
+=
padded_top
*
output_width
;
for
(;
batch
>
0
;
--
batch
)
{
for
(;
channels
>
0
;
--
channels
)
{
for
(;
height
>
0
;
--
height
)
{
memcpy
(
output_ptr
+
padded_left
,
input
,
width
*
sizeof
(
float
));
memcpy
(
output_ptr
+
padded_left
,
input
,
width
*
sizeof
(
float
));
input
+=
width
;
output_ptr
+=
output_width
;
}
// Skip the padded bottom in this channel and top in the next channel
output_ptr
+=
paddings
[
0
]
*
output_width
;
}
}
}
extern
void
Conv2dNeonK1x1S1
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
...
...
@@ -57,9 +66,10 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const float* input, // N
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
);
// Selection matrix: kernel_size x stride_size
static
const
Conv2dNeonFunction
selector
[
5
][
2
]
=
{
{
nullptr
,
Conv2dNeonK1x1S1
,
nullptr
},
{
...
...
@@ -80,10 +90,13 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const float* input, // N
}
};
// not implement yet
if
(
paddings_
[
0
]
!=
paddings_
[
1
]
||
paddings_
[
0
]
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
4
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
selector
[
paddings_
[
0
]
-
1
][
strides_
[
0
]
-
1
]
==
nullptr
)
{
index_t
kernel_h
=
filter_shape
[
2
];
index_t
kernel_w
=
filter_shape
[
3
];
if
(
kernel_h
!=
kernel_w
||
kernel_h
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
2
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
]
==
nullptr
)
{
LOG
(
WARNING
)
<<
"NEON conv2d kernel not implementated, using slow vesion"
;
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
input
,
input_shape
,
...
...
@@ -94,19 +107,22 @@ void Conv2dFunctor<DeviceType::NEON, float>::operator()(const float* input, // N
output_shape
);
}
// Keep this alive during kernel execution
Tensor
padded_input
;
std
::
vector
<
index_t
>
padded_input_shape
(
4
);
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
padded_input
,
padded_input_shape
);
auto
conv2d_neon_func
=
selector
[
paddings_
[
0
]
-
1
][
strides_
[
0
]
-
1
];
conv2d_neon_func
(
padded_input
.
data
<
float
>
(),
padded_input_shape
.
data
(),
filter
,
bias
,
output
,
output_shape
);
if
(
paddings_
[
0
]
>
0
||
paddings_
[
1
]
>
0
)
{
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
&
padded_input
);
input
=
padded_input
.
data
<
float
>
();
input_shape
=
padded_input
.
shape
().
data
();
}
auto
conv2d_neon_func
=
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
];
conv2d_neon_func
(
input
,
input_shape
,
filter
,
bias
,
output
,
output_shape
);
}
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
}
// namespace mace
mace/kernels/neon/conv_2d_neon_1x1.cc
0 → 100644
浏览文件 @
ff39c7a5
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/conv_2d.h"
namespace
mace
{
namespace
kernels
{
void
Conv2dNeonK1x1S1
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
const
index_t
batch
=
output_shape
[
0
];
const
index_t
channels
=
output_shape
[
1
];
const
index_t
height
=
output_shape
[
2
];
const
index_t
width
=
output_shape
[
3
];
const
index_t
input_batch
=
input_shape
[
0
];
const
index_t
input_channels
=
input_shape
[
1
];
const
index_t
input_height
=
input_shape
[
2
];
const
index_t
input_width
=
input_shape
[
3
];
MACE_CHECK
(
input_batch
==
batch
&&
input_height
==
height
&&
input_width
==
width
);
const
index_t
total_pixels
=
height
*
width
;
// Process 4 * 2 = 8 pixels for each innermost loop
// TODO Does 64 bit v.s. 32 bit index matters? need benchmark
const
index_t
total_loops
=
total_pixels
>>
3
;
const
index_t
loop_remaining
=
total_pixels
&
7
;
// benchmark omp collapsed(2)
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
const
float
*
filter_ptr
=
filter
;
#pragma omp parallel for
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
// TODO Will GCC opt these out?
float
*
channel_output_start
=
output
+
n
*
channels
*
height
*
width
+
c
*
height
*
width
;
const
float
*
input_ptr
=
input
+
n
*
input_channels
*
input_height
*
input_width
;
// Fill with bias
float
*
output_ptr
=
channel_output_start
;
for
(
index_t
ptr
=
0
;
ptr
<
total_pixels
;
++
ptr
)
{
output_ptr
[
ptr
]
=
bias
[
c
];
// TODO can we avoid this?
}
index_t
inc
=
0
;
// Process 4 input channels in batch
for
(;
inc
+
3
<
input_channels
;
inc
+=
4
)
{
float
*
output_ptr
=
channel_output_start
;
// The begining of each input feature map channel
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
const
float
*
input_ptr1
=
input_ptr
+
total_pixels
;
const
float
*
input_ptr2
=
input_ptr1
+
total_pixels
;
const
float
*
input_ptr3
=
input_ptr2
+
total_pixels
;
// filter is in c_out, c_in, 1, 1 order
MACE_ASSERT
(
filter_ptr
==
filter
+
c
*
input_channels
+
inc
);
const
float
k0
=
filter_ptr
[
0
];
const
float
k1
=
filter_ptr
[
1
];
const
float
k2
=
filter_ptr
[
2
];
const
float
k3
=
filter_ptr
[
3
];
filter_ptr
+=
4
;
const
float32x4_t
vk0
=
vdupq_n_f32
(
k0
);
const
float32x4_t
vk1
=
vdupq_n_f32
(
k1
);
const
float32x4_t
vk2
=
vdupq_n_f32
(
k2
);
const
float32x4_t
vk3
=
vdupq_n_f32
(
k3
);
index_t
loop_itr
=
total_loops
;
for
(;
loop_itr
>
0
;
--
loop_itr
)
{
// Process 2 group of 4 floats
float32x4_t
out0
=
vld1q_f32
(
output_ptr
);
float32x4_t
out4
=
vld1q_f32
(
output_ptr
+
4
);
const
float32x4_t
in00
=
vld1q_f32
(
input_ptr
);
const
float32x4_t
in04
=
vld1q_f32
(
input_ptr
+
4
);
out0
=
vfmaq_f32
(
out0
,
in00
,
vk0
);
out4
=
vfmaq_f32
(
out4
,
in04
,
vk0
);
const
float32x4_t
in10
=
vld1q_f32
(
input_ptr1
);
const
float32x4_t
in14
=
vld1q_f32
(
input_ptr1
+
4
);
out0
=
vfmaq_f32
(
out0
,
in10
,
vk1
);
out4
=
vfmaq_f32
(
out4
,
in14
,
vk1
);
const
float32x4_t
in20
=
vld1q_f32
(
input_ptr2
);
const
float32x4_t
in24
=
vld1q_f32
(
input_ptr2
+
4
);
out0
=
vfmaq_f32
(
out0
,
in20
,
vk2
);
out4
=
vfmaq_f32
(
out4
,
in24
,
vk2
);
const
float32x4_t
in30
=
vld1q_f32
(
input_ptr3
);
const
float32x4_t
in34
=
vld1q_f32
(
input_ptr3
+
4
);
out0
=
vfmaq_f32
(
out0
,
in30
,
vk3
);
out4
=
vfmaq_f32
(
out4
,
in34
,
vk3
);
float
prev_output
=
output_ptr
[
0
];
// Save output
vst1q_f32
(
output_ptr
,
out0
);
vst1q_f32
(
output_ptr
+
4
,
out4
);
output_ptr
+=
8
;
input_ptr
+=
8
;
input_ptr1
+=
8
;
input_ptr2
+=
8
;
input_ptr3
+=
8
;
}
// Process the remaining pixels
index_t
remaining_pixels
=
loop_remaining
;
for
(;
remaining_pixels
>
0
;
--
remaining_pixels
)
{
const
float
mul
=
*
input_ptr
*
k0
;
const
float
mul1
=
*
input_ptr1
*
k1
;
const
float
mul2
=
*
input_ptr2
*
k2
;
const
float
mul3
=
*
input_ptr3
*
k3
;
float
prev_output
=
output_ptr
[
0
];
*
output_ptr
+=
mul
+
mul1
+
mul2
+
mul3
;
++
output_ptr
;
++
input_ptr
;
++
input_ptr1
;
++
input_ptr2
;
++
input_ptr3
;
}
// Skip these 4 feature maps
input_ptr
+=
3
*
total_pixels
;
}
// Process the remaining channels
for
(;
inc
<
input_channels
;
++
inc
)
{
float
*
output_ptr
=
channel_output_start
;
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
MACE_ASSERT
(
filter_ptr
==
filter
+
c
*
input_channels
+
inc
);
const
float
k0
=
filter_ptr
[
0
];
++
filter_ptr
;
const
float32x4_t
vk0
=
vdupq_n_f32
(
k0
);
index_t
loop_itr
=
total_loops
;
for
(;
loop_itr
>
0
;
--
loop_itr
)
{
float32x4_t
out0
=
vld1q_f32
(
output_ptr
);
float32x4_t
out4
=
vld1q_f32
(
output_ptr
+
4
);
const
float32x4_t
in0
=
vld1q_f32
(
input_ptr
);
const
float32x4_t
in4
=
vld1q_f32
(
input_ptr
+
4
);
out0
=
vfmaq_f32
(
out0
,
in0
,
vk0
);
out4
=
vfmaq_f32
(
out4
,
in4
,
vk0
);
// Save output
vst1q_f32
(
output_ptr
,
out0
);
vst1q_f32
(
output_ptr
+
4
,
out4
);
output_ptr
+=
8
;
input_ptr
+=
8
;
}
// Process the remaining pixels
index_t
remaining_pixels
=
loop_remaining
;
for
(;
remaining_pixels
>
0
;
--
remaining_pixels
)
{
const
float
mul
=
*
input_ptr
*
k0
;
*
output_ptr
+=
mul
;
++
output_ptr
;
++
input_ptr
;
}
}
}
}
};
}
// namespace kernels
}
// namespace mace
mace/ops/BUILD
浏览文件 @
ff39c7a5
...
...
@@ -25,7 +25,7 @@ cc_library(
name
=
"ops"
,
srcs
=
glob
(
[
"*.cc"
],
exclude
=
[
"*_test.cc"
],
exclude
=
[
"*_test.cc"
,
"*_benchmark.cc"
],
),
hdrs
=
glob
(
[
"*.h"
],
...
...
@@ -46,11 +46,6 @@ cc_test(
[
"*_test.cc"
],
),
copts
=
[
"-std=c++11"
],
linkopts
=
if_android
([
"-pie"
,
"-llog"
,
"-latomic"
,
]),
linkstatic
=
1
,
deps
=
[
":ops"
,
...
...
@@ -58,3 +53,16 @@ cc_test(
"@gtest//:gtest_main"
,
],
)
cc_test
(
name
=
"ops_benchmark"
,
srcs
=
glob
([
"*_benchmark.cc"
]),
deps
=
[
":ops"
,
"//mace/core:core"
,
"//mace/core:test_benchmark_main"
,
],
copts
=
[
'-std=c++11'
],
linkstatic
=
1
,
testonly
=
1
,
)
mace/ops/conv_2d_benchmark.cc
0 → 100644
浏览文件 @
ff39c7a5
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/conv_2d.h"
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
Conv2d
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
stride
,
Padding
padding
,
int
output_channels
)
{
mace
::
testing
::
StopTiming
();
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
}
}
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \
static void BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, OC); \
} \
BENCHMARK(BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
1
,
1
,
1
,
VALID
,
128
,
float
);
}
// namespace mace
mace/ops/conv_2d_test.cc
浏览文件 @
ff39c7a5
...
...
@@ -136,4 +136,55 @@ TEST_F(Conv2dOpTest, Combined) {
ExpectTensorNear
<
float
>
(
expected
,
*
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
Conv2dOpTest
,
Conv1x1
)
{
// Construct graph
OpDefBuilder
(
"Conv2d"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
operator_def
());
// Add args
AddIntsArg
(
"strides"
,
{
1
,
1
});
AddIntArg
(
"padding"
,
Padding
::
VALID
);
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
5
,
3
,
10
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
AddInputFromArray
<
float
>
(
"Filter"
,
{
2
,
5
,
1
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
});
AddInputFromArray
<
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
// Run
RunOp
(
DeviceType
::
NEON
);
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
2
,
3
,
10
},
{
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
});
ExpectTensorNear
<
float
>
(
expected
,
*
GetOutput
(
"Output"
),
0.001
);
}
// TODO we need more tests
mace/ops/ops_test_util.h
浏览文件 @
ff39c7a5
...
...
@@ -9,8 +9,8 @@
#include "gtest/gtest.h"
#include "mace/core/common.h"
#include "mace/core/tensor.h"
#include "mace/core/net.h"
#include "mace/core/tensor.h"
namespace
mace
{
...
...
@@ -29,7 +29,7 @@ class OpDefBuilder {
return
*
this
;
}
void
Finalize
(
OperatorDef
*
op_def
)
const
{
MACE_CHECK
(
op_def
!=
NULL
,
"input should not be null."
);
MACE_CHECK
(
op_def
!=
nullptr
,
"input should not be null."
);
*
op_def
=
op_def_
;
}
OperatorDef
op_def_
;
...
...
@@ -49,6 +49,7 @@ class OpsTestBase : public ::testing::Test {
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
float
*
input_data
=
input
->
mutable_data
<
float
>
();
// TODO check the dims
memcpy
(
input_data
,
data
.
data
(),
data
.
size
()
*
sizeof
(
T
));
}
...
...
@@ -96,14 +97,18 @@ class OpsTestBase : public ::testing::Test {
OperatorDef
*
operator_def
()
{
return
&
op_def_
;
}
bool
RunOp
()
{
bool
RunOp
(
DeviceType
device
)
{
NetDef
net_def
;
net_def
.
add_op
()
->
CopyFrom
(
op_def_
);
VLOG
(
0
)
<<
net_def
.
DebugString
();
auto
net
=
CreateNet
(
net_def
,
&
ws_
,
DeviceType
::
CPU
);
auto
net
=
CreateNet
(
net_def
,
&
ws_
,
device
);
return
net
->
Run
();
}
bool
RunOp
()
{
return
RunOp
(
DeviceType
::
CPU
);
}
Tensor
*
GetOutput
(
const
char
*
output_name
)
{
return
ws_
.
GetTensor
(
output_name
);
}
...
...
@@ -209,6 +214,6 @@ void ExpectTensorNear(const Tensor& x, const Tensor& y, const double abs_err) {
Expector
<
T
>::
Near
(
x
,
y
,
abs_err
);
}
}
//
namespace mace
}
// namespace mace
#endif // MACE_OPS_TEST_UTIL_H_
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录