Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
578b382a
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
578b382a
编写于
9月 15, 2017
作者:
吴
吴承辉
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'style' into 'master'
Fix Google Style See merge request !43
上级
9c9af68e
8ae8f575
变更
66
显示空白变更内容
内联
并排
Showing
66 changed file
with
1096 addition
and
1361 deletion
+1096
-1361
mace/core/allocator.cc
mace/core/allocator.cc
+3
-7
mace/core/allocator.h
mace/core/allocator.h
+4
-6
mace/core/common.h
mace/core/common.h
+7
-7
mace/core/logging.cc
mace/core/logging.cc
+2
-3
mace/core/logging.h
mace/core/logging.h
+18
-21
mace/core/macros.h
mace/core/macros.h
+1
-2
mace/core/net.cc
mace/core/net.cc
+10
-17
mace/core/net.h
mace/core/net.h
+11
-18
mace/core/operator.cc
mace/core/operator.cc
+9
-20
mace/core/operator.h
mace/core/operator.h
+33
-50
mace/core/proto_utils.cc
mace/core/proto_utils.cc
+69
-92
mace/core/proto_utils.h
mace/core/proto_utils.h
+35
-58
mace/core/registry.h
mace/core/registry.h
+21
-22
mace/core/serializer.cc
mace/core/serializer.cc
+13
-16
mace/core/serializer.h
mace/core/serializer.h
+4
-4
mace/core/tensor.h
mace/core/tensor.h
+25
-30
mace/core/testing/test_benchmark.cc
mace/core/testing/test_benchmark.cc
+3
-6
mace/core/testing/test_benchmark.h
mace/core/testing/test_benchmark.h
+3
-3
mace/core/testing/test_benchmark_main.cc
mace/core/testing/test_benchmark_main.cc
+0
-1
mace/core/types.h
mace/core/types.h
+15
-16
mace/core/workspace.cc
mace/core/workspace.cc
+9
-8
mace/core/workspace.h
mace/core/workspace.h
+3
-5
mace/examples/benchmark_example.cc
mace/examples/benchmark_example.cc
+2
-3
mace/kernels/addn.h
mace/kernels/addn.h
+7
-9
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+14
-25
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+84
-95
mace/kernels/conv_pool_2d_util.cc
mace/kernels/conv_pool_2d_util.cc
+17
-21
mace/kernels/conv_pool_2d_util.h
mace/kernels/conv_pool_2d_util.h
+11
-15
mace/kernels/neon/addn_neon.cc
mace/kernels/neon/addn_neon.cc
+6
-7
mace/kernels/neon/batch_norm_neon.cc
mace/kernels/neon/batch_norm_neon.cc
+17
-21
mace/kernels/neon/conv_2d_neon.cc
mace/kernels/neon/conv_2d_neon.cc
+27
-55
mace/kernels/neon/conv_2d_neon_1x1.cc
mace/kernels/neon/conv_2d_neon_1x1.cc
+30
-31
mace/kernels/neon/conv_2d_neon_3x3.cc
mace/kernels/neon/conv_2d_neon_3x3.cc
+73
-66
mace/kernels/neon/conv_2d_neon_5x5.cc
mace/kernels/neon/conv_2d_neon_5x5.cc
+16
-16
mace/kernels/neon/max_pooling_neon_2x2.cc
mace/kernels/neon/max_pooling_neon_2x2.cc
+7
-12
mace/kernels/neon/max_pooling_neon_3x3.cc
mace/kernels/neon/max_pooling_neon_3x3.cc
+9
-14
mace/kernels/neon/pooling_neon.cc
mace/kernels/neon/pooling_neon.cc
+20
-34
mace/kernels/neon/relu_neon.cc
mace/kernels/neon/relu_neon.cc
+6
-7
mace/kernels/pooling.h
mace/kernels/pooling.h
+28
-36
mace/kernels/relu.h
mace/kernels/relu.h
+4
-4
mace/kernels/resize_bilinear.h
mace/kernels/resize_bilinear.h
+28
-31
mace/ops/addn.cc
mace/ops/addn.cc
+2
-2
mace/ops/addn.h
mace/ops/addn.h
+4
-4
mace/ops/addn_benchmark.cc
mace/ops/addn_benchmark.cc
+12
-15
mace/ops/addn_test.cc
mace/ops/addn_test.cc
+1
-1
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+2
-2
mace/ops/batch_norm.h
mace/ops/batch_norm.h
+47
-42
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+21
-21
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+17
-18
mace/ops/conv_2d.cc
mace/ops/conv_2d.cc
+2
-2
mace/ops/conv_2d.h
mace/ops/conv_2d.h
+12
-17
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+26
-22
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+74
-102
mace/ops/conv_pool_2d_base.h
mace/ops/conv_pool_2d_base.h
+8
-9
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+45
-40
mace/ops/pooling.cc
mace/ops/pooling.cc
+2
-3
mace/ops/pooling.h
mace/ops/pooling.h
+19
-26
mace/ops/pooling_benchmark.cc
mace/ops/pooling_benchmark.cc
+19
-17
mace/ops/pooling_test.cc
mace/ops/pooling_test.cc
+46
-71
mace/ops/relu.cc
mace/ops/relu.cc
+2
-2
mace/ops/relu.h
mace/ops/relu.h
+4
-4
mace/ops/relu_benchmark.cc
mace/ops/relu_benchmark.cc
+11
-13
mace/ops/relu_test.cc
mace/ops/relu_test.cc
+1
-1
mace/ops/resize_bilinear.cc
mace/ops/resize_bilinear.cc
+4
-3
mace/ops/resize_bilinear.h
mace/ops/resize_bilinear.h
+10
-9
mace/ops/resize_bilinear_test.cc
mace/ops/resize_bilinear_test.cc
+1
-1
未找到文件。
mace/core/allocator.cc
浏览文件 @
578b382a
...
...
@@ -7,13 +7,9 @@
namespace
mace
{
static
std
::
unique_ptr
<
CPUAllocator
>
g_cpu_allocator
(
new
CPUAllocator
());
CPUAllocator
*
cpu_allocator
()
{
return
g_cpu_allocator
.
get
();
}
CPUAllocator
*
cpu_allocator
()
{
return
g_cpu_allocator
.
get
();
}
void
SetCPUAllocator
(
CPUAllocator
*
alloc
)
{
g_cpu_allocator
.
reset
(
alloc
);
}
void
SetCPUAllocator
(
CPUAllocator
*
alloc
)
{
g_cpu_allocator
.
reset
(
alloc
);
}
Allocator
*
GetDeviceAllocator
(
DeviceType
type
)
{
switch
(
type
)
{
...
...
mace/core/allocator.h
浏览文件 @
578b382a
...
...
@@ -39,7 +39,7 @@ class Allocator {
}
};
class
CPUAllocator
:
public
Allocator
{
class
CPUAllocator
:
public
Allocator
{
public:
~
CPUAllocator
()
override
{}
void
*
New
(
size_t
nbytes
)
override
{
...
...
@@ -55,9 +55,7 @@ class CPUAllocator: public Allocator {
return
data
;
}
void
Delete
(
void
*
data
)
override
{
free
(
data
);
}
void
Delete
(
void
*
data
)
override
{
free
(
data
);
}
void
CopyBytes
(
void
*
dst
,
const
void
*
src
,
size_t
size
)
override
{
memcpy
(
dst
,
src
,
size
);
...
...
mace/core/common.h
浏览文件 @
578b382a
...
...
@@ -5,12 +5,12 @@
#ifndef MACE_CORE_COMMON_H_
#define MACE_CORE_COMMON_H_
#include <
set
>
#include <
algorithm
>
#include <map>
#include <string>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include <algorithm>
#include "mace/core/logging.h"
...
...
@@ -25,7 +25,7 @@ typedef int64_t index_t;
// Disable the copy and assignment operator for a class.
#ifndef DISABLE_COPY_AND_ASSIGN
#define DISABLE_COPY_AND_ASSIGN(classname) \
private:
\
private:
\
classname(const classname&) = delete; \
classname& operator=(const classname&) = delete
#endif
...
...
mace/core/logging.cc
浏览文件 @
578b382a
...
...
@@ -2,7 +2,6 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/logging.h"
#include <stdlib.h>
...
...
@@ -62,11 +61,11 @@ void LogMessage::GenerateLogMessage() {
#else
void
LogMessage
::
GenerateLogMessage
()
{
fprintf
(
stderr
,
"%c %s:%d] %s
\n
"
,
"IWEF"
[
severity_
],
fname_
,
line_
,
str
().
c_str
());
fprintf
(
stderr
,
"%c %s:%d] %s
\n
"
,
"IWEF"
[
severity_
],
fname_
,
line_
,
str
().
c_str
());
}
#endif
namespace
{
// Parse log level (int64_t) from environment variable (char*)
...
...
mace/core/logging.h
浏览文件 @
578b382a
...
...
@@ -5,8 +5,8 @@
#ifndef MACE_CORE_LOGGING_H_
#define MACE_CORE_LOGGING_H_
#include <sstream>
#include <limits>
#include <sstream>
#include <string>
#undef ERROR
...
...
@@ -30,8 +30,8 @@ inline void MakeStringInternal(std::stringstream& ss, const T& t) {
}
template
<
typename
T
,
typename
...
Args
>
inline
void
MakeStringInternal
(
std
::
stringstream
&
ss
,
const
T
&
t
,
const
Args
&
...
args
)
{
inline
void
MakeStringInternal
(
std
::
stringstream
&
ss
,
const
T
&
t
,
const
Args
&
...
args
)
{
MakeStringInternal
(
ss
,
t
);
MakeStringInternal
(
ss
,
args
...);
}
...
...
@@ -48,9 +48,7 @@ template <>
inline
string
MakeString
(
const
string
&
str
)
{
return
str
;
}
inline
string
MakeString
(
const
char
*
c_str
)
{
return
string
(
c_str
);
}
inline
string
MakeString
(
const
char
*
c_str
)
{
return
string
(
c_str
);
}
class
LogMessage
:
public
std
::
basic_ostringstream
<
char
>
{
public:
...
...
@@ -85,8 +83,7 @@ class LogMessageFatal : public LogMessage {
::mace::internal::LogMessage(__FILE__, __LINE__, mace::WARNING)
#define _MACE_LOG_ERROR \
::mace::internal::LogMessage(__FILE__, __LINE__, mace::ERROR)
#define _MACE_LOG_FATAL \
::mace::internal::LogMessageFatal(__FILE__, __LINE__)
#define _MACE_LOG_FATAL ::mace::internal::LogMessageFatal(__FILE__, __LINE__)
#define _MACE_LOG_QFATAL _MACE_LOG_FATAL
...
...
@@ -96,10 +93,10 @@ class LogMessageFatal : public LogMessage {
// Turn VLOG off when under mobile devices for considerations of binary size.
#define VLOG_IS_ON(lvl) ((lvl) <= 0)
#else
// Otherwise, Set MACE_CPP_MIN_VLOG_LEVEL environment to update minimum log level
// Otherwise, Set MACE_CPP_MIN_VLOG_LEVEL environment to update minimum log
// level
// of VLOG
#define VLOG_IS_ON(lvl) \
((lvl) <= ::mace::internal::LogMessage::MinVLogLevel())
#define VLOG_IS_ON(lvl) ((lvl) <= ::mace::internal::LogMessage::MinVLogLevel())
#endif
#define VLOG(lvl) \
...
...
mace/core/macros.h
浏览文件 @
578b382a
...
...
@@ -17,5 +17,4 @@
#define MACE_PREDICT_TRUE(x) (x)
#endif
#endif //MACE_CORE_MACROS_H_
#endif // MACE_CORE_MACROS_H_
mace/core/net.cc
浏览文件 @
578b382a
...
...
@@ -6,22 +6,19 @@
namespace
mace
{
NetBase
::
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
Workspace
*
ws
,
NetBase
::
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
:
name_
(
net_def
->
name
())
{
}
:
name_
(
net_def
->
name
())
{}
SimpleNet
::
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
:
NetBase
(
net_def
,
ws
,
type
)
{
SimpleNet
::
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
:
NetBase
(
net_def
,
ws
,
type
)
{
VLOG
(
1
)
<<
"Constructing SimpleNet "
<<
net_def
->
name
();
for
(
int
idx
=
0
;
idx
<
net_def
->
op_size
();
++
idx
)
{
const
auto
&
operator_def
=
net_def
->
op
(
idx
);
VLOG
(
1
)
<<
"Creating operator "
<<
operator_def
.
name
()
<<
":"
<<
operator_def
.
type
();
std
::
unique_ptr
<
OperatorBase
>
op
{
nullptr
};
std
::
unique_ptr
<
OperatorBase
>
op
{
nullptr
};
OperatorDef
temp_def
(
operator_def
);
op
=
CreateOperator
(
temp_def
,
ws
,
type
);
operators_
.
emplace_back
(
std
::
move
(
op
));
...
...
@@ -40,20 +37,16 @@ bool SimpleNet::Run() {
return
true
;
}
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
{
std
::
shared_ptr
<
NetDef
>
tmp_net_def
(
new
NetDef
(
net_def
));
return
CreateNet
(
tmp_net_def
,
ws
,
type
);
}
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
{
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
{
unique_ptr
<
NetBase
>
net
(
new
SimpleNet
(
net_def
,
ws
,
type
));
return
net
;
}
}
// namespace mace
mace/core/net.h
浏览文件 @
578b382a
...
...
@@ -6,24 +6,21 @@
#define MACE_CORE_NET_H_
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/proto/mace.pb.h"
namespace
mace
{
class
NetBase
{
public:
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
Workspace
*
ws
,
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
virtual
~
NetBase
()
noexcept
{}
virtual
bool
Run
()
=
0
;
const
string
&
Name
()
const
{
return
name_
;
}
const
string
&
Name
()
const
{
return
name_
;
}
protected:
string
name_
;
...
...
@@ -33,8 +30,7 @@ class NetBase {
class
SimpleNet
:
public
NetBase
{
public:
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
bool
Run
()
override
;
...
...
@@ -45,13 +41,10 @@ class SimpleNet : public NetBase {
DISABLE_COPY_AND_ASSIGN
(
SimpleNet
);
};
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
}
// namespace mace
...
...
mace/core/operator.cc
浏览文件 @
578b382a
...
...
@@ -11,33 +11,22 @@ std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() {
return
&
g_device_type_registry
;
}
MACE_DEFINE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
MACE_DEFINE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
CPU
,
CPUOperatorRegistry
);
MACE_DEFINE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
MACE_DEFINE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
NEON
,
NEONOperatorRegistry
);
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
)
{
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
)
{
OperatorRegistry
*
registry
=
gDeviceTypeRegistry
()
->
at
(
type
);
return
registry
->
Create
(
operator_def
.
type
(),
operator_def
,
ws
);
}
OperatorBase
::
OperatorBase
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
OperatorBase
::
OperatorBase
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
operator_ws_
(
ws
),
operator_def_
(
std
::
make_shared
<
OperatorDef
>
(
operator_def
))
{
}
operator_def_
(
std
::
make_shared
<
OperatorDef
>
(
operator_def
))
{}
}
// namespace mace
mace/core/operator.h
浏览文件 @
578b382a
...
...
@@ -5,12 +5,12 @@
#ifndef MACE_CORE_OPERATOR_H
#define MACE_CORE_OPERATOR_H
#include "mace/core/proto_utils.h"
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/tensor.h"
#include "mace/core/proto_utils.h"
#include "mace/core/registry.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/proto/mace.pb.h"
namespace
mace
{
...
...
@@ -23,22 +23,21 @@ class OperatorBase {
MACE_CHECK
(
operator_def_
,
"operator_def was null!"
);
return
ArgumentHelper
::
HasArgument
(
*
operator_def_
,
name
);
}
template
<
typename
T
>
template
<
typename
T
>
inline
T
GetSingleArgument
(
const
string
&
name
,
const
T
&
default_value
)
const
{
MACE_CHECK
(
operator_def_
,
"operator_def was null!"
);
return
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
T
>
(
*
operator_def_
,
name
,
default_value
);
}
template
<
typename
T
>
template
<
typename
T
>
inline
bool
HasSingleArgumentOfType
(
const
string
&
name
)
const
{
MACE_CHECK
(
operator_def_
,
"operator_def was null!"
);
return
ArgumentHelper
::
HasSingleArgumentOfType
<
OperatorDef
,
T
>
(
*
operator_def_
,
name
);
}
template
<
typename
T
>
template
<
typename
T
>
inline
vector
<
T
>
GetRepeatedArgument
(
const
string
&
name
,
const
vector
<
T
>
&
default_value
=
{})
const
{
const
string
&
name
,
const
vector
<
T
>
&
default_value
=
{})
const
{
MACE_CHECK
(
operator_def_
,
"operator_def was null!"
);
return
ArgumentHelper
::
GetRepeatedArgument
<
OperatorDef
,
T
>
(
*
operator_def_
,
name
,
default_value
);
...
...
@@ -49,9 +48,7 @@ class OperatorBase {
return
inputs_
[
idx
];
}
inline
Tensor
*
Output
(
int
idx
)
{
return
outputs_
[
idx
];
}
inline
Tensor
*
Output
(
int
idx
)
{
return
outputs_
[
idx
];
}
inline
int
InputSize
()
{
return
inputs_
.
size
();
}
inline
int
OutputSize
()
{
return
outputs_
.
size
();
}
...
...
@@ -70,9 +67,7 @@ class OperatorBase {
operator_def_
=
operator_def
;
}
inline
bool
has_debug_def
()
const
{
return
operator_def_
!=
nullptr
;
}
inline
bool
has_debug_def
()
const
{
return
operator_def_
!=
nullptr
;
}
protected:
Workspace
*
operator_ws_
;
...
...
@@ -90,26 +85,22 @@ class Operator : public OperatorBase {
:
OperatorBase
(
operator_def
,
ws
)
{
for
(
const
string
&
input_str
:
operator_def
.
input
())
{
const
Tensor
*
tensor
=
ws
->
GetTensor
(
input_str
);
MACE_CHECK
(
tensor
!=
nullptr
,
"op "
,
operator_def
.
type
(),
": Encountered a non-existing input tensor: "
,
input_str
);
MACE_CHECK
(
tensor
!=
nullptr
,
"op "
,
operator_def
.
type
(),
": Encountered a non-existing input tensor: "
,
input_str
);
inputs_
.
push_back
(
tensor
);
}
for
(
const
string
&
output_str
:
operator_def
.
output
())
{
outputs_
.
push_back
(
MACE_CHECK_NOTNULL
(
ws
->
CreateTensor
(
output_str
,
DeviceContext
<
D
>::
allocator
(),
DataTypeToEnum
<
T
>::
v
())));
outputs_
.
push_back
(
MACE_CHECK_NOTNULL
(
ws
->
CreateTensor
(
output_str
,
DeviceContext
<
D
>::
allocator
(),
DataTypeToEnum
<
T
>::
v
())));
}
}
virtual
bool
Run
()
override
=
0
;
~
Operator
()
noexcept
override
{}
};
// OP_INPUT_TAGS and OP_OUTPUT_TAGS are optional features to name the indices of the
// OP_INPUT_TAGS and OP_OUTPUT_TAGS are optional features to name the indices of
// the
// operator's inputs and outputs, in order to avoid confusion. For example, for
// a fully convolution layer that has input, weight and bias, you can define its
// input tags as:
...
...
@@ -145,37 +136,29 @@ struct DeviceTypeRegisterer {
#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \
namespace { \
static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(
\
DeviceType)(type, ®istry_function);
\
static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(
DeviceType)(
\
type, ®istry_function);
\
}
MACE_DECLARE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_DECLARE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_CPU_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_DECLARE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_NEON_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__)
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
);
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
);
}
// namespace mace
#endif
//
MACE_CORE_OPERATOR_H
#endif
//
MACE_CORE_OPERATOR_H
mace/core/proto_utils.cc
浏览文件 @
578b382a
...
...
@@ -5,9 +5,9 @@
#include "mace/core/proto_utils.h"
#include <fcntl.h>
#include <unistd.h>
#include <cerrno>
#include <fstream>
#include <unistd.h>
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream_impl.h"
...
...
@@ -82,8 +82,7 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
return
proto
->
ParseFromCodedStream
(
&
coded_stream
);
}
void
WriteProtoToBinaryFile
(
const
MessageLite
&
/*proto*/
,
void
WriteProtoToBinaryFile
(
const
MessageLite
&
/*proto*/
,
const
char
*
/*filename*/
)
{
LOG
(
FATAL
)
<<
"Not implemented yet."
;
}
...
...
@@ -118,7 +117,7 @@ void WriteProtoToTextFile(const Message& proto, const char* filename) {
}
bool
ReadProtoFromBinaryFile
(
const
char
*
filename
,
MessageLite
*
proto
)
{
#if defined
(_MSC_VER) // for MSC compiler binary flag needs to be specified
#if defined(_MSC_VER) // for MSC compiler binary flag needs to be specified
int
fd
=
open
(
filename
,
O_RDONLY
|
O_BINARY
);
#else
int
fd
=
open
(
filename
,
O_RDONLY
);
...
...
@@ -138,8 +137,8 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
void
WriteProtoToBinaryFile
(
const
MessageLite
&
proto
,
const
char
*
filename
)
{
int
fd
=
open
(
filename
,
O_WRONLY
|
O_CREAT
|
O_TRUNC
,
0644
);
MACE_CHECK
(
fd
!=
-
1
,
"File cannot be created: "
,
filename
,
" error number: "
,
errno
);
MACE_CHECK
(
fd
!=
-
1
,
"File cannot be created: "
,
filename
,
" error number: "
,
errno
);
std
::
unique_ptr
<
ZeroCopyOutputStream
>
raw_output
(
new
FileOutputStream
(
fd
));
std
::
unique_ptr
<
CodedOutputStream
>
coded_output
(
new
CodedOutputStream
(
raw_output
.
get
()));
...
...
@@ -151,18 +150,17 @@ void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) {
#endif // MACE_USE_LITE_PROTO
ArgumentHelper
::
ArgumentHelper
(
const
OperatorDef
&
def
)
{
for
(
auto
&
arg
:
def
.
arg
())
{
ArgumentHelper
::
ArgumentHelper
(
const
OperatorDef
&
def
)
{
for
(
auto
&
arg
:
def
.
arg
())
{
if
(
arg_map_
.
find
(
arg
.
name
())
!=
arg_map_
.
end
())
{
MACE_CHECK
(
arg
.
SerializeAsString
()
==
arg_map_
[
arg
.
name
()].
SerializeAsString
(),
"Found argument of the same name '"
,
arg
.
name
(),
"' but with different contents: "
,
ProtoDebugString
(
def
));
"Found argument of the same name '"
,
arg
.
name
(),
"' but with different contents: "
,
ProtoDebugString
(
def
));
LOG
(
WARNING
)
<<
"Duplicated argument name found in operator def: "
<<
ProtoDebugString
(
def
)
<<
", arg: "
<<
ProtoDebugString
(
arg
);
<<
ProtoDebugString
(
def
)
<<
", arg: "
<<
ProtoDebugString
(
arg
);
}
arg_map_
[
arg
.
name
()]
=
arg
;
...
...
@@ -171,8 +169,7 @@ ArgumentHelper::ArgumentHelper(const OperatorDef &def) {
ArgumentHelper
::
ArgumentHelper
(
const
NetDef
&
netdef
)
{
for
(
auto
&
arg
:
netdef
.
arg
())
{
MACE_CHECK
(
arg_map_
.
count
(
arg
.
name
())
==
0
,
MACE_CHECK
(
arg_map_
.
count
(
arg
.
name
())
==
0
,
"Duplicated argument name found in net def: "
,
ProtoDebugString
(
netdef
));
arg_map_
[
arg
.
name
()]
=
arg
;
...
...
@@ -192,31 +189,23 @@ bool SupportsLosslessConversion(const InputType& value) {
}
}
#define INSTANTIATE_GET_SINGLE_ARGUMENT(
\
T, fieldname, enforce_lossless_conversion)
\
#define INSTANTIATE_GET_SINGLE_ARGUMENT(
T, fieldname,
\
enforce_lossless_conversion)
\
template <> \
T ArgumentHelper::GetSingleArgument<T>(
\
const string& name, const T& default_value) const {
\
T ArgumentHelper::GetSingleArgument<T>(
const string& name,
\
const T& default_value) const {
\
if (arg_map_.count(name) == 0) { \
VLOG(1) << "Using default parameter value " << default_value \
<< " for parameter " << name; \
return default_value; \
} \
MACE_CHECK( \
arg_map_.at(name).has_##fieldname(), \
"Argument ", \
name, \
MACE_CHECK(arg_map_.at(name).has_##fieldname(), "Argument ", name, \
" does not have the right field: expected field " #fieldname); \
auto value = arg_map_.at(name).fieldname(); \
if (enforce_lossless_conversion) { \
auto supportsConversion = \
SupportsLosslessConversion<decltype(value), T>(value); \
MACE_CHECK( \
supportsConversion, \
"Value", \
value, \
" of argument ", \
name, \
MACE_CHECK(supportsConversion, "Value", value, " of argument ", name, \
"cannot be represented correctly in a target type"); \
} \
return value; \
...
...
@@ -242,8 +231,8 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(size_t, i, true)
INSTANTIATE_GET_SINGLE_ARGUMENT
(
string
,
s
,
false
)
#undef INSTANTIATE_GET_SINGLE_ARGUMENT
#define INSTANTIATE_GET_REPEATED_ARGUMENT(
\
T, fieldname, enforce_lossless_conversion)
\
#define INSTANTIATE_GET_REPEATED_ARGUMENT(
T, fieldname,
\
enforce_lossless_conversion)
\
template <> \
vector<T> ArgumentHelper::GetRepeatedArgument<T>( \
const string& name, const std::vector<T>& default_value) const { \
...
...
@@ -255,12 +244,7 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(string, s, false)
if (enforce_lossless_conversion) { \
auto supportsConversion = \
SupportsLosslessConversion<decltype(v), T>(v); \
MACE_CHECK( \
supportsConversion, \
"Value", \
v, \
" of argument ", \
name, \
MACE_CHECK(supportsConversion, "Value", v, " of argument ", name, \
"cannot be represented correctly in a target type"); \
} \
values.push_back(v); \
...
...
@@ -282,13 +266,13 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(string, strings, false)
#undef INSTANTIATE_GET_REPEATED_ARGUMENT
#define MACE_MAKE_SINGULAR_ARGUMENT(T, fieldname) \
template <>
\
Argument MakeArgument(const string& name, const T& value) {
\
template <>
\
Argument MakeArgument(const string& name, const T& value) {
\
Argument arg; \
arg.set_name(name); \
arg.set_##fieldname(value); \
return arg; \
}
}
MACE_MAKE_SINGULAR_ARGUMENT
(
bool
,
i
)
MACE_MAKE_SINGULAR_ARGUMENT
(
float
,
f
)
...
...
@@ -306,15 +290,15 @@ Argument MakeArgument(const string& name, const MessageLite& value) {
}
#define MACE_MAKE_REPEATED_ARGUMENT(T, fieldname) \
template <>
\
Argument MakeArgument(const string& name, const vector<T>& value) {
\
template <>
\
Argument MakeArgument(const string& name, const vector<T>& value) {
\
Argument arg; \
arg.set_name(name); \
for (const auto& v : value) { \
arg.add_##fieldname(v); \
} \
return arg; \
}
}
MACE_MAKE_REPEATED_ARGUMENT
(
float
,
floats
)
MACE_MAKE_REPEATED_ARGUMENT
(
int
,
ints
)
...
...
@@ -328,30 +312,23 @@ const Argument& GetArgument(const OperatorDef& def, const string& name) {
return
arg
;
}
}
MACE_CHECK
(
false
,
"Argument named "
,
name
,
"does not exist in operator "
,
MACE_CHECK
(
false
,
"Argument named "
,
name
,
"does not exist in operator "
,
ProtoDebugString
(
def
));
}
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
def_value
)
{
for
(
const
Argument
&
arg
:
def
.
arg
())
{
if
(
arg
.
name
()
==
name
)
{
MACE_CHECK
(
arg
.
has_i
(),
"Can't parse argument as bool: "
,
ProtoDebugString
(
arg
));
MACE_CHECK
(
arg
.
has_i
(),
"Can't parse argument as bool: "
,
ProtoDebugString
(
arg
));
return
arg
.
i
();
}
}
return
def_value
;
}
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
OperatorDef
*
def
)
{
for
(
int
i
=
0
;
i
<
def
->
arg_size
();
++
i
)
{
if
(
def
->
arg
(
i
).
name
()
==
name
)
{
...
...
mace/core/proto_utils.h
浏览文件 @
578b382a
...
...
@@ -12,15 +12,14 @@
#include "google/protobuf/message.h"
#endif // !MACE_USE_LITE_PROTO
#include "mace/proto/mace.pb.h"
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
namespace
mace
{
using
std
::
string
;
using
::
google
::
protobuf
::
MessageLite
;
// Common interfaces that reads file contents into a string.
bool
ReadStringFromFile
(
const
char
*
filename
,
string
*
str
);
bool
WriteStringToFile
(
const
string
&
str
,
const
char
*
filename
);
...
...
@@ -46,8 +45,7 @@ inline string ProtoDebugString(const MessageLite& proto) {
// Text format MessageLite wrappers: these functions do nothing but just
// allowing things to compile. It will produce a runtime error if you are using
// MessageLite but still want text support.
inline
bool
ReadProtoFromTextFile
(
const
char
*
/*filename*/
,
inline
bool
ReadProtoFromTextFile
(
const
char
*
/*filename*/
,
MessageLite
*
/*proto*/
)
{
LOG
(
FATAL
)
<<
"If you are running lite version, you should not be "
<<
"calling any text-format protobuffers."
;
...
...
@@ -57,8 +55,7 @@ inline bool ReadProtoFromTextFile(const string filename, MessageLite* proto) {
return
ReadProtoFromTextFile
(
filename
.
c_str
(),
proto
);
}
inline
void
WriteProtoToTextFile
(
const
MessageLite
&
/*proto*/
,
inline
void
WriteProtoToTextFile
(
const
MessageLite
&
/*proto*/
,
const
char
*
/*filename*/
)
{
LOG
(
FATAL
)
<<
"If you are running lite version, you should not be "
<<
"calling any text-format protobuffers."
;
...
...
@@ -107,13 +104,10 @@ inline bool ReadProtoFromFile(const string& filename, Message* proto) {
#endif // MACE_USE_LITE_PROTO
template
<
class
IterableInputs
=
std
::
initializer_list
<
string
>,
template
<
class
IterableInputs
=
std
::
initializer_list
<
string
>,
class
IterableOutputs
=
std
::
initializer_list
<
string
>
,
class
IterableArgs
=
std
::
initializer_list
<
Argument
>>
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
const
IterableInputs
&
inputs
,
const
IterableOutputs
&
outputs
,
const
IterableArgs
&
args
)
{
...
...
@@ -134,19 +128,12 @@ OperatorDef CreateOperatorDef(
// A simplified version compared to the full CreateOperator, if you do not need
// to specify args.
template
<
class
IterableInputs
=
std
::
initializer_list
<
string
>,
template
<
class
IterableInputs
=
std
::
initializer_list
<
string
>,
class
IterableOutputs
=
std
::
initializer_list
<
string
>>
inline
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
inline
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
const
IterableInputs
&
inputs
,
const
IterableOutputs
&
outputs
)
{
return
CreateOperatorDef
(
type
,
name
,
inputs
,
outputs
,
return
CreateOperatorDef
(
type
,
name
,
inputs
,
outputs
,
std
::
vector
<
Argument
>
());
}
...
...
@@ -166,9 +153,7 @@ class ArgumentHelper {
}
template
<
typename
Def
,
typename
T
>
static
T
GetSingleArgument
(
const
Def
&
def
,
const
string
&
name
,
static
T
GetSingleArgument
(
const
Def
&
def
,
const
string
&
name
,
const
T
&
default_value
)
{
return
ArgumentHelper
(
def
).
GetSingleArgument
<
T
>
(
name
,
default_value
);
}
...
...
@@ -180,8 +165,7 @@ class ArgumentHelper {
template
<
typename
Def
,
typename
T
>
static
vector
<
T
>
GetRepeatedArgument
(
const
Def
&
def
,
const
string
&
name
,
const
Def
&
def
,
const
string
&
name
,
const
std
::
vector
<
T
>&
default_value
=
std
::
vector
<
T
>
())
{
return
ArgumentHelper
(
def
).
GetRepeatedArgument
<
T
>
(
name
,
default_value
);
}
...
...
@@ -192,8 +176,7 @@ class ArgumentHelper {
}
template
<
typename
Def
,
typename
MessageType
>
static
vector
<
MessageType
>
GetRepeatedMessageArgument
(
const
Def
&
def
,
static
vector
<
MessageType
>
GetRepeatedMessageArgument
(
const
Def
&
def
,
const
string
&
name
)
{
return
ArgumentHelper
(
def
).
GetRepeatedMessageArgument
<
MessageType
>
(
name
);
}
...
...
@@ -216,8 +199,7 @@ class ArgumentHelper {
MACE_CHECK
(
arg_map_
.
count
(
name
),
"Cannot find parameter named "
+
name
);
MessageType
message
;
if
(
arg_map_
.
at
(
name
).
has_s
())
{
MACE_CHECK
(
message
.
ParseFromString
(
arg_map_
.
at
(
name
).
s
()),
MACE_CHECK
(
message
.
ParseFromString
(
arg_map_
.
at
(
name
).
s
()),
"Faild to parse content from the string"
);
}
else
{
VLOG
(
1
)
<<
"Return empty message for parameter "
<<
name
;
...
...
@@ -230,8 +212,7 @@ class ArgumentHelper {
MACE_CHECK
(
arg_map_
.
count
(
name
),
"Cannot find parameter named "
+
name
);
vector
<
MessageType
>
messages
(
arg_map_
.
at
(
name
).
strings_size
());
for
(
int
i
=
0
;
i
<
messages
.
size
();
++
i
)
{
MACE_CHECK
(
messages
[
i
].
ParseFromString
(
arg_map_
.
at
(
name
).
strings
(
i
)),
MACE_CHECK
(
messages
[
i
].
ParseFromString
(
arg_map_
.
at
(
name
).
strings
(
i
)),
"Faild to parse content from the string"
);
}
return
messages
;
...
...
@@ -242,14 +223,10 @@ class ArgumentHelper {
};
const
Argument
&
GetArgument
(
const
OperatorDef
&
def
,
const
string
&
name
);
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
def_value
=
false
);
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
OperatorDef
*
def
);
template
<
typename
T
>
...
...
mace/core/registry.h
浏览文件 @
578b382a
...
...
@@ -12,7 +12,7 @@ namespace mace {
template
<
class
SrcType
,
class
ObjectType
,
class
...
Args
>
class
Registry
{
public:
typedef
std
::
function
<
std
::
unique_ptr
<
ObjectType
>
(
Args
...)
>
Creator
;
typedef
std
::
function
<
std
::
unique_ptr
<
ObjectType
>
(
Args
...)
>
Creator
;
Registry
()
:
registry_
()
{}
...
...
@@ -24,7 +24,7 @@ class Registry {
inline
bool
Has
(
const
SrcType
&
key
)
{
return
registry_
.
count
(
key
)
!=
0
;
}
unique_ptr
<
ObjectType
>
Create
(
const
SrcType
&
key
,
Args
...
args
)
{
unique_ptr
<
ObjectType
>
Create
(
const
SrcType
&
key
,
Args
...
args
)
{
if
(
registry_
.
count
(
key
)
==
0
)
{
VLOG
(
2
)
<<
"Key not registered: "
<<
key
;
return
nullptr
;
...
...
@@ -60,7 +60,7 @@ class Registerer {
}
template
<
class
DerivedType
>
static
unique_ptr
<
ObjectType
>
DefaultCreator
(
Args
...
args
)
{
static
unique_ptr
<
ObjectType
>
DefaultCreator
(
Args
...
args
)
{
return
std
::
unique_ptr
<
ObjectType
>
(
new
DerivedType
(
args
...));
}
};
...
...
@@ -86,12 +86,12 @@ class Registerer {
}
#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DECLARE_TYPED_REGISTRY(
\
RegistryName, std::string, ObjectType,
##__VA_ARGS__)
MACE_DECLARE_TYPED_REGISTRY(
RegistryName, std::string, ObjectType,
\
##__VA_ARGS__)
#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DEFINE_TYPED_REGISTRY(
\
RegistryName, std::string, ObjectType,
##__VA_ARGS__)
MACE_DEFINE_TYPED_REGISTRY(
RegistryName, std::string, ObjectType,
\
##__VA_ARGS__)
#define MACE_REGISTER_TYPED_CREATOR(RegistryName, key, ...) \
namespace { \
...
...
@@ -101,8 +101,7 @@ class Registerer {
#define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, \
RegistryName(), \
key, RegistryName(), \
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \
}
...
...
mace/core/serializer.cc
浏览文件 @
578b382a
...
...
@@ -4,7 +4,6 @@
#include "mace/core/serializer.h"
namespace
mace
{
unique_ptr
<
TensorProto
>
Serializer
::
Serialize
(
const
Tensor
&
tensor
,
...
...
@@ -15,8 +14,8 @@ unique_ptr<TensorProto> Serializer::Serialize(const Tensor &tensor,
unique_ptr
<
Tensor
>
Serializer
::
Deserialize
(
const
TensorProto
&
proto
,
DeviceType
type
)
{
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
GetDeviceAllocator
(
type
),
proto
.
data_type
()));
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
GetDeviceAllocator
(
type
),
proto
.
data_type
()));
vector
<
index_t
>
dims
;
for
(
const
index_t
d
:
proto
.
dims
())
{
dims
.
push_back
(
d
);
...
...
@@ -25,8 +24,7 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
switch
(
proto
.
data_type
())
{
case
DT_FLOAT
:
tensor
->
Copy
<
float
>
(
proto
.
float_data
().
data
(),
proto
.
float_data
().
size
());
tensor
->
Copy
<
float
>
(
proto
.
float_data
().
data
(),
proto
.
float_data
().
size
());
break
;
case
DT_DOUBLE
:
tensor
->
Copy
<
double
>
(
proto
.
double_data
().
data
(),
...
...
@@ -65,8 +63,7 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
for
(
int
i
=
0
;
i
<
proto
.
string_data
().
size
();
++
i
)
{
content
[
i
]
=
proto
.
string_data
(
i
);
}
}
break
;
}
break
;
default:
MACE_NOT_IMPLEMENTED
;
break
;
...
...
mace/core/serializer.h
浏览文件 @
578b382a
...
...
@@ -5,9 +5,9 @@
#ifndef MACE_CORE_SERIALIZER_H_
#define MACE_CORE_SERIALIZER_H_
#include "mace/proto/mace.pb.h"
#include "mace/core/common.h"
#include "mace/core/tensor.h"
#include "mace/proto/mace.pb.h"
namespace
mace
{
...
...
mace/core/tensor.h
浏览文件 @
578b382a
...
...
@@ -5,11 +5,11 @@
#ifndef MACE_CORE_TENSOR_H_
#define MACE_CORE_TENSOR_H_
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/allocator.h"
#include "mace/core/
types
.h"
#include "mace/core/
common
.h"
#include "mace/core/logging.h"
#include "mace/core/types.h"
#include "mace/proto/mace.pb.h"
namespace
mace
{
...
...
@@ -41,20 +41,17 @@ namespace mace {
break; \
}
#define CASES(TYPE_ENUM, STMTS) \
CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \
, LOG(FATAL) << "Unexpected type: " << TYPE_ENUM;)
class
Tensor
{
public:
Tensor
()
:
alloc_
(
cpu_allocator
()),
size_
(
0
),
dtype_
(
DT_FLOAT
),
data_
(
nullptr
)
{};
:
alloc_
(
cpu_allocator
()),
size_
(
0
),
dtype_
(
DT_FLOAT
),
data_
(
nullptr
){};
Tensor
(
Allocator
*
a
,
DataType
type
)
:
alloc_
(
a
),
size_
(
0
),
dtype_
(
type
),
data_
(
nullptr
)
{};
:
alloc_
(
a
),
size_
(
0
),
dtype_
(
type
),
data_
(
nullptr
){};
~
Tensor
()
{
if
(
alloc_
&&
data_
.
get
())
{
...
...
@@ -92,9 +89,8 @@ class Tensor {
if
(
data_
.
get
()
||
size_
==
0
)
{
return
data_
.
get
();
}
else
{
CASES
(
dtype_
,
data_
.
reset
(
alloc_
->
New
(
size_
*
sizeof
(
T
)),
[
this
](
void
*
ptr
)
{
alloc_
->
Delete
(
ptr
);
}));
CASES
(
dtype_
,
data_
.
reset
(
alloc_
->
New
(
size_
*
sizeof
(
T
)),
[
this
](
void
*
ptr
)
{
alloc_
->
Delete
(
ptr
);
}));
return
data_
.
get
();
}
}
...
...
@@ -116,13 +112,9 @@ class Tensor {
}
}
inline
void
ResizeLike
(
const
Tensor
&
other
)
{
Resize
(
other
.
shape
());
}
inline
void
ResizeLike
(
const
Tensor
&
other
)
{
Resize
(
other
.
shape
());
}
inline
void
ResizeLike
(
const
Tensor
*
other
)
{
Resize
(
other
->
shape
());
}
inline
void
ResizeLike
(
const
Tensor
*
other
)
{
Resize
(
other
->
shape
());
}
template
<
typename
T
>
inline
void
Copy
(
const
T
*
src
,
index_t
size
)
{
...
...
@@ -132,7 +124,8 @@ class Tensor {
template
<
typename
SrcType
,
typename
DstType
>
inline
void
CopyWithCast
(
const
SrcType
*
src
,
size_t
size
)
{
MACE_CHECK
(
static_cast
<
index_t
>
(
size
)
==
size_
,
"copy src and dst with different size."
);
MACE_CHECK
(
static_cast
<
index_t
>
(
size
)
==
size_
,
"copy src and dst with different size."
);
unique_ptr
<
DstType
[]
>
buffer
(
new
DstType
[
size
]);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
buffer
[
i
]
=
static_cast
<
DstType
>
(
src
[
i
]);
...
...
@@ -146,10 +139,11 @@ class Tensor {
inline
void
DebugPrint
()
{
std
::
stringstream
os
;
for
(
int
i
:
shape_
)
{
for
(
int
i
:
shape_
)
{
os
<<
i
<<
", "
;
}
LOG
(
INFO
)
<<
"Tensor shape: "
<<
os
.
str
()
<<
" type: "
<<
DataType_Name
(
dtype_
);
LOG
(
INFO
)
<<
"Tensor shape: "
<<
os
.
str
()
<<
" type: "
<<
DataType_Name
(
dtype_
);
os
.
str
(
""
);
os
.
clear
();
...
...
@@ -175,7 +169,8 @@ class Tensor {
private:
inline
int64_t
NumElements
()
const
{
return
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
());
return
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
());
}
Allocator
*
alloc_
;
...
...
@@ -189,4 +184,4 @@ class Tensor {
}
// namespace tensor
#endif
//
MACE_CORE_TENSOR_H_
#endif
//
MACE_CORE_TENSOR_H_
mace/core/testing/test_benchmark.cc
浏览文件 @
578b382a
...
...
@@ -51,11 +51,8 @@ Benchmark* Benchmark::ArgPair(int x, int y) {
return
this
;
}
// Run all benchmarks
void
Benchmark
::
Run
()
{
Run
(
"all"
);
}
void
Benchmark
::
Run
()
{
Run
(
"all"
);
}
void
Benchmark
::
Run
(
const
char
*
pattern
)
{
if
(
!
all_benchmarks
)
return
;
...
...
@@ -113,8 +110,8 @@ void Benchmark::Run(const char* pattern) {
(
items_processed
*
1e-6
)
/
seconds
);
full_label
+=
buf
;
}
printf
(
"%-*s %10.0f %10d
\t
%s
\n
"
,
width
,
name
,
seconds
*
1e9
/
iters
,
iters
,
full_label
.
c_str
());
printf
(
"%-*s %10.0f %10d
\t
%s
\n
"
,
width
,
name
,
seconds
*
1e9
/
iters
,
iters
,
full_label
.
c_str
());
}
}
}
...
...
mace/core/testing/test_benchmark.h
浏览文件 @
578b382a
...
...
@@ -13,8 +13,8 @@
#define MACE_BENCHMARK_CONCAT(a, b, c) a##b##c
#define BENCHMARK(n) \
static ::mace::testing::Benchmark* MACE_BENCHMARK_CONCAT(
__benchmark_, n, __LINE__) =
\
(new ::mace::testing::Benchmark(#n, (n)))
static ::mace::testing::Benchmark* MACE_BENCHMARK_CONCAT( \
__benchmark_, n, __LINE__) =
(new ::mace::testing::Benchmark(#n, (n)))
namespace
mace
{
namespace
testing
{
...
...
mace/core/testing/test_benchmark_main.cc
浏览文件 @
578b382a
...
...
@@ -17,4 +17,3 @@ int main(int argc, char** argv) {
}
return
0
;
}
mace/core/types.h
浏览文件 @
578b382a
...
...
@@ -18,7 +18,6 @@ struct DataTypeToEnum {
static_assert
(
IsValidDataType
<
T
>::
value
,
"Specified Data Type not supported"
);
};
// EnumToDataType<VALUE>::Type is the type for DataType constant VALUE, e.g.
// EnumToDataType<DT_FLOAT>::Type is float.
template
<
DataType
VALUE
>
...
...
mace/core/workspace.cc
浏览文件 @
578b382a
...
...
@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/common.h"
#include "mace/core/workspace.h"
#include "mace/core/common.h"
#include "mace/core/serializer.h"
namespace
mace
{
...
...
@@ -16,8 +16,7 @@ vector<string> Workspace::Tensors() const {
return
names
;
}
Tensor
*
Workspace
::
CreateTensor
(
const
string
&
name
,
Allocator
*
alloc
,
Tensor
*
Workspace
::
CreateTensor
(
const
string
&
name
,
Allocator
*
alloc
,
DataType
type
)
{
if
(
HasTensor
(
name
))
{
VLOG
(
1
)
<<
"Tensor "
<<
name
<<
" already exists. Skipping."
;
...
...
@@ -46,13 +45,15 @@ const Tensor* Workspace::GetTensor(const string& name) const {
}
Tensor
*
Workspace
::
GetTensor
(
const
string
&
name
)
{
return
const_cast
<
Tensor
*>
(
static_cast
<
const
Workspace
*>
(
this
)
->
GetTensor
(
name
));
return
const_cast
<
Tensor
*>
(
static_cast
<
const
Workspace
*>
(
this
)
->
GetTensor
(
name
));
}
void
Workspace
::
LoadModelTensor
(
const
NetDef
&
net_def
,
DeviceType
type
)
{
void
Workspace
::
LoadModelTensor
(
const
NetDef
&
net_def
,
DeviceType
type
)
{
Serializer
serializer
;
for
(
auto
&
tensor_proto
:
net_def
.
tensors
())
{
tensor_map_
[
tensor_proto
.
name
()]
=
serializer
.
Deserialize
(
tensor_proto
,
type
);
for
(
auto
&
tensor_proto
:
net_def
.
tensors
())
{
tensor_map_
[
tensor_proto
.
name
()]
=
serializer
.
Deserialize
(
tensor_proto
,
type
);
}
}
...
...
mace/core/workspace.h
浏览文件 @
578b382a
...
...
@@ -5,7 +5,6 @@
#ifndef MACE_CORE_WORKSPACE_H_
#define MACE_CORE_WORKSPACE_H_
#include "mace/core/common.h"
#include "mace/core/tensor.h"
#include "mace/proto/mace.pb.h"
...
...
@@ -42,5 +41,4 @@ class Workspace {
}
// namespace mace
#endif // MACE_CORE_WORKSPACE_H_
mace/examples/benchmark_example.cc
浏览文件 @
578b382a
...
...
@@ -14,7 +14,7 @@ static void foo(int iters) {
float
*
out
=
new
float
[
N
];
while
(
iters
--
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
out
[
i
]
=
inp
[
i
]
*
2.0
;
}
}
...
...
@@ -24,7 +24,6 @@ static void foo(int iters) {
BENCHMARK
(
foo
);
static
void
bar
(
int
iters
,
int
n
)
{
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
n
;
mace
::
testing
::
ItemsProcessed
(
tot
);
...
...
@@ -34,7 +33,7 @@ static void bar(int iters, int n) {
float
*
out
=
new
float
[
n
];
while
(
iters
--
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
out
[
i
]
=
inp
[
i
]
*
2.0
;
}
}
...
...
mace/kernels/addn.h
浏览文件 @
578b382a
...
...
@@ -10,10 +10,9 @@
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
AddNFunctor
{
void
operator
()(
const
vector
<
const
T
*>&
inputs
,
T
*
output
,
index_t
size
)
{
void
operator
()(
const
vector
<
const
T
*>&
inputs
,
T
*
output
,
index_t
size
)
{
memset
(
output
,
0
,
size
*
sizeof
(
T
));
int
n
=
inputs
.
size
();
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
...
...
@@ -25,9 +24,8 @@ struct AddNFunctor {
};
template
<
>
void
AddNFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
vector
<
const
float
*>&
inputs
,
float
*
output
,
index_t
size
);
void
AddNFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
vector
<
const
float
*>&
inputs
,
float
*
output
,
index_t
size
);
}
// namespace kernels
}
// namespace mace
...
...
mace/kernels/batch_norm.h
浏览文件 @
578b382a
...
...
@@ -11,26 +11,21 @@
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
BatchNormFunctor
{
float
variance_epsilon_
;
BatchNormFunctor
(
const
float
variance_epsilon
)
:
variance_epsilon_
(
variance_epsilon
)
{}
:
variance_epsilon_
(
variance_epsilon
)
{}
void
operator
()(
const
T
*
input
,
const
T
*
scale
,
const
T
*
offset
,
const
T
*
mean
,
const
T
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
T
*
output
)
{
void
operator
()(
const
T
*
input
,
const
T
*
scale
,
const
T
*
offset
,
const
T
*
mean
,
const
T
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
T
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon} }
// ( \offset - \frac { \scale * mean } {
// \sqrt{var+\variance_epsilon} }
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
...
...
@@ -53,16 +48,10 @@ struct BatchNormFunctor {
};
template
<
>
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
);
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
);
}
// namepsace kernels
}
// namespace mace
...
...
mace/kernels/conv_2d.h
浏览文件 @
578b382a
...
...
@@ -10,15 +10,11 @@
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
Conv2dFunctor
{
public:
Conv2dFunctor
(
const
int
*
strides
,
const
int
*
paddings
,
const
int
*
dilations
)
:
strides_
(
strides
),
paddings_
(
paddings
),
dilations_
(
dilations
)
{}
Conv2dFunctor
(
const
int
*
strides
,
const
int
*
paddings
,
const
int
*
dilations
)
:
strides_
(
strides
),
paddings_
(
paddings
),
dilations_
(
dilations
)
{}
void
operator
()(
const
T
*
input
,
// NCHW
const
index_t
*
input_shape
,
...
...
@@ -64,31 +60,27 @@ class Conv2dFunctor {
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
index_t
offset
=
n
*
channels
*
height
*
width
+
c
*
height
*
width
+
h
*
width
+
w
;
c
*
height
*
width
+
h
*
width
+
w
;
T
sum
=
0
;
const
T
*
filter_ptr
=
filter
+
c
*
kernel_size
;
for
(
int
inc
=
0
;
inc
<
input_channels
;
++
inc
)
{
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
int
inh
=
padded_h_start
+
h
*
stride_h
+
dilation_h
*
kh
;
int
inw
=
padded_w_start
+
w
*
stride_w
+
dilation_w
*
kw
;
if
(
inh
<
0
||
inh
>=
input_height
||
inw
<
0
||
inw
>=
input_width
)
{
MACE_CHECK
(
inh
>=
padded_h_start
&&
inh
<
padded_h_stop
&&
inw
>=
padded_w_start
&&
inw
<
padded_w_stop
,
"Out of range read from input: "
,
inh
,
", "
,
inw
);
if
(
inh
<
0
||
inh
>=
input_height
||
inw
<
0
||
inw
>=
input_width
)
{
MACE_CHECK
(
inh
>=
padded_h_start
&&
inh
<
padded_h_stop
&&
inw
>=
padded_w_start
&&
inw
<
padded_w_stop
,
"Out of range read from input: "
,
inh
,
", "
,
inw
);
// else padding with 0:
// sum += 0;
}
else
{
index_t
input_offset
=
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
+
in
h
*
input_width
+
in
w
;
inc
*
input_height
*
input_width
+
inh
*
input_width
+
inw
;
sum
+=
input
[
input_offset
]
*
*
filter_ptr
;
}
++
filter_ptr
;
...
...
@@ -109,12 +101,9 @@ class Conv2dFunctor {
};
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
}
// namespace kernels
...
...
mace/kernels/conv_pool_2d_util.cc
浏览文件 @
578b382a
...
...
@@ -9,10 +9,8 @@ namespace kernels {
void
CalcPaddingAndOutputSize
(
const
index_t
*
input_shape
,
// NCHW
const
index_t
*
filter_shape
,
// OIHW
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
int
*
padding_size
)
{
MACE_CHECK
(
dilations
[
0
]
>
0
&&
dilations
[
1
]
>
0
,
"Invalid dilations, must >= 1"
);
...
...
@@ -43,14 +41,16 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
output_height
=
(
input_shape
[
2
]
-
k_extent_height
)
/
strides
[
0
]
+
1
;
output_width
=
(
input_shape
[
3
]
-
k_extent_width
)
/
strides
[
1
]
+
1
;
break
;
case
SAME
:
output_height
=
(
input_shape
[
2
]
-
1
)
/
strides
[
0
]
+
1
;
case
SAME
:
output_height
=
(
input_shape
[
2
]
-
1
)
/
strides
[
0
]
+
1
;
output_width
=
(
input_shape
[
3
]
-
1
)
/
strides
[
1
]
+
1
;
break
;
case
FULL
:
output_height
=
(
input_shape
[
2
]
+
k_extent_height
-
2
)
/
strides
[
0
]
+
1
;
output_width
=
(
input_shape
[
3
]
+
k_extent_width
-
2
)
/
strides
[
1
]
+
1
;
break
;
default:
MACE_CHECK
(
false
,
"Unsupported padding type: "
,
padding
);
default:
MACE_CHECK
(
false
,
"Unsupported padding type: "
,
padding
);
}
// Note: TensorFlow may padded one more on the right/bottom side
...
...
@@ -58,10 +58,10 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
// utilize the more centered features. We need to benchmark
// based on the model accuracy.
padding_size
[
0
]
=
(
output_height
-
1
)
*
strides
[
0
]
+
k_extent_height
-
input_shape
[
2
];
padding_size
[
1
]
=
(
output_width
-
1
)
*
strides
[
1
]
+
k_extent_width
-
input_shape
[
3
];
padding_size
[
0
]
=
(
output_height
-
1
)
*
strides
[
0
]
+
k_extent_height
-
input_shape
[
2
];
padding_size
[
1
]
=
(
output_width
-
1
)
*
strides
[
1
]
+
k_extent_width
-
input_shape
[
3
];
output_shape
[
0
]
=
input_shape
[
0
];
output_shape
[
1
]
=
output_channels
;
...
...
@@ -69,19 +69,15 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
output_shape
[
3
]
=
output_width
;
}
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
)
{
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
)
{
index_t
batch
=
input_shape
[
0
];
index_t
channels
=
input_shape
[
1
];
index_t
height
=
input_shape
[
2
];
index_t
width
=
input_shape
[
3
];
std
::
vector
<
index_t
>
output_shape
({
batch
,
channels
,
paddings
[
0
]
+
height
,
paddings
[
1
]
+
width
});
std
::
vector
<
index_t
>
output_shape
(
{
batch
,
channels
,
paddings
[
0
]
+
height
,
paddings
[
1
]
+
width
});
const
index_t
output_width
=
output_shape
[
3
];
const
int
padded_top
=
paddings
[
0
]
/
2
;
...
...
mace/kernels/conv_pool_2d_util.h
浏览文件 @
578b382a
...
...
@@ -19,16 +19,12 @@ namespace kernels {
void
CalcPaddingAndOutputSize
(
const
index_t
*
input_shape
,
// NCHW
const
index_t
*
filter_shape
,
// OIHW
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
int
*
padding_size
);
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
);
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
);
}
// namespace kernels
}
// namespace mace
...
...
mace/kernels/neon/addn_neon.cc
浏览文件 @
578b382a
...
...
@@ -2,16 +2,15 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/addn.h"
#include <arm_neon.h>
namespace
mace
{
namespace
kernels
{
template
<
>
void
AddNFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
vector
<
const
float
*>&
inputs
,
float
*
output
,
index_t
size
)
{
void
AddNFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
vector
<
const
float
*>
&
inputs
,
float
*
output
,
index_t
size
)
{
// TODO: neon mem copy
memset
(
output
,
0
,
size
*
sizeof
(
float
));
int
n
=
inputs
.
size
();
...
...
mace/kernels/neon/batch_norm_neon.cc
浏览文件 @
578b382a
...
...
@@ -2,26 +2,22 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/batch_norm.h"
#include <arm_neon.h>
namespace
mace
{
namespace
kernels
{
template
<
>
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
)
{
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon} }
// ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon}
// }
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
...
...
@@ -36,8 +32,8 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(const float* input,
float32x4_t
new_scale_f
=
vdupq_n_f32
(
new_scale
);
float32x4_t
new_offset_f
=
vdupq_n_f32
(
new_offset
);
for
(
index_t
i
=
0
;
i
<
n
;
++
i
)
{
const
float
*
input_sample_ptr
=
input
+
pos
;
float
*
output_sample_ptr
=
output
+
pos
;
const
float
*
input_sample_ptr
=
input
+
pos
;
float
*
output_sample_ptr
=
output
+
pos
;
for
(
index_t
j
=
0
;
j
<
count
;
++
j
)
{
float32x4_t
input_f
=
vld1q_f32
(
input_sample_ptr
);
...
...
mace/kernels/neon/conv_2d_neon.cc
浏览文件 @
578b382a
...
...
@@ -20,17 +20,18 @@ extern void Conv2dNeonK5x5S1(const float *input, const index_t *input_shape,
const
float
*
filter
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
template
<
>
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
// NCHW
float
>::
operator
()(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
typedef
void
(
*
Conv2dNeonFunction
)(
const
float
*
input
,
// NCHW
typedef
void
(
*
Conv2dNeonFunction
)(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
...
...
@@ -38,44 +39,20 @@ void Conv2dFunctor<DeviceType::NEON,
const
index_t
*
output_shape
);
// Selection matrix: kernel_size x stride_size
static
const
Conv2dNeonFunction
selector
[
5
][
2
]
=
{
{
Conv2dNeonK1x1S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK3x3S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK5x5S1
,
nullptr
}
};
{
Conv2dNeonK1x1S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK3x3S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK5x5S1
,
nullptr
}};
// not implement yet
index_t
kernel_h
=
filter_shape
[
2
];
index_t
kernel_w
=
filter_shape
[
3
];
if
(
kernel_h
!=
kernel_w
||
kernel_h
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
2
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
if
(
kernel_h
!=
kernel_w
||
kernel_h
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
2
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
]
==
nullptr
)
{
LOG
(
WARNING
)
<<
"NEON conv2d kernel not implementated, using slow vesion"
;
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
input
,
input_shape
,
filter
,
filter_shape
,
bias
,
output
,
output_shape
);
input
,
input_shape
,
filter
,
filter_shape
,
bias
,
output
,
output_shape
);
return
;
}
...
...
@@ -87,12 +64,7 @@ void Conv2dFunctor<DeviceType::NEON,
input_shape
=
padded_input
.
shape
().
data
();
}
auto
conv2d_neon_func
=
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
];
conv2d_neon_func
(
input
,
input_shape
,
filter
,
bias
,
output
,
output_shape
);
conv2d_neon_func
(
input
,
input_shape
,
filter
,
bias
,
output
,
output_shape
);
}
}
// namespace kernels
...
...
mace/kernels/neon/conv_2d_neon_1x1.cc
浏览文件 @
578b382a
...
...
@@ -24,8 +24,7 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
const
index_t
input_height
=
input_shape
[
2
];
const
index_t
input_width
=
input_shape
[
3
];
MACE_CHECK
(
input_batch
==
batch
&&
input_height
==
height
&&
MACE_CHECK
(
input_batch
==
batch
&&
input_height
==
height
&&
input_width
==
width
);
const
index_t
total_pixels
=
height
*
width
;
...
...
@@ -37,12 +36,13 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
// benchmark omp collapsed(2)
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
const
float
*
filter_ptr
=
filter
;
#pragma omp parallel for
#pragma omp parallel for
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
// TODO Will GCC opt these out?
float
*
channel_output_start
=
output
+
n
*
channels
*
height
*
width
+
c
*
height
*
width
;
const
float
*
input_ptr
=
input
+
n
*
input_channels
*
input_height
*
input_width
;
const
float
*
input_ptr
=
input
+
n
*
input_channels
*
input_height
*
input_width
;
// Fill with bias
float
*
output_ptr
=
channel_output_start
;
...
...
@@ -55,15 +55,14 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
for
(;
inc
+
3
<
input_channels
;
inc
+=
4
)
{
float
*
output_ptr
=
channel_output_start
;
// The begining of each input feature map channel
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
const
float
*
input_ptr1
=
input_ptr
+
total_pixels
;
const
float
*
input_ptr2
=
input_ptr1
+
total_pixels
;
const
float
*
input_ptr3
=
input_ptr2
+
total_pixels
;
// filter is in c_out, c_in, 1, 1 order
MACE_ASSERT
(
filter_ptr
==
filter
+
c
*
input_channels
+
inc
);
const
float
k0
=
filter_ptr
[
0
];
...
...
@@ -141,8 +140,8 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
// Process the remaining channels
for
(;
inc
<
input_channels
;
++
inc
)
{
float
*
output_ptr
=
channel_output_start
;
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
MACE_ASSERT
(
filter_ptr
==
filter
+
c
*
input_channels
+
inc
);
...
...
mace/kernels/neon/conv_2d_neon_3x3.cc
浏览文件 @
578b382a
...
...
@@ -16,7 +16,6 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
int
batch
=
output_shape
[
0
];
int
channels
=
output_shape
[
1
];
int
height
=
output_shape
[
2
];
...
...
@@ -34,54 +33,58 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
float
*
output_ptr_base
=
output
+
b
*
channels
*
height
*
width
;
for
(
int
oc
=
0
;
oc
<
channels
;
++
oc
)
{
const
float
*
filter_ptr
=
filter
+
oc
*
input_channels
*
kernel_h
*
kernel_w
;
const
float
*
input_ptr
=
input
+
b
*
input_channels
*
input_height
*
input_width
;
const
float
*
filter_ptr
=
filter
+
oc
*
input_channels
*
kernel_h
*
kernel_w
;
const
float
*
input_ptr
=
input
+
b
*
input_channels
*
input_height
*
input_width
;
float
*
output_ptr
=
output_ptr_base
+
oc
*
height
*
width
;
std
::
fill
(
output_ptr
,
output_ptr
+
height
*
width
,
bias
[
oc
]);
for
(
int
ic
=
0
;
ic
<
input_channels
;
++
ic
)
{
float32x4_t
filter0
=
vld1q_f32
(
filter_ptr
);
float32x4_t
filter3
=
vld1q_f32
(
filter_ptr
+
3
);
float32x4_t
filter6
=
vld1q_f32
(
filter_ptr
+
6
);
float32x4_t
filter3
=
vld1q_f32
(
filter_ptr
+
3
);
float32x4_t
filter6
=
vld1q_f32
(
filter_ptr
+
6
);
const
float
*
row
[
kRegisterSize
]
=
{
input_ptr
,
input_ptr
+
input_width
,
input_ptr
+
2
*
input_width
,
input_ptr
+
3
*
input_width
};
const
float
*
row
[
kRegisterSize
]
=
{
input_ptr
,
input_ptr
+
input_width
,
input_ptr
+
2
*
input_width
,
input_ptr
+
3
*
input_width
};
float
*
output_ptr1
=
output_ptr
;
float
*
output_ptr2
=
output_ptr
+
width
;
for
(
int
h
=
0
;
h
<
height_count
;
h
+=
2
)
{
int
count
=
width
>>
2
;
int
remain_count
=
width
&
3
;
for
(;
count
>
0
;
--
count
)
{
float32x4_t
sum0
=
vdupq_n_f32
(
.0
f
);
float32x4_t
sum1
=
vdupq_n_f32
(
.0
f
);
float32x4_t
row0_ext_0
=
vld1q_f32
(
row
[
0
]);
//0123
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
//4567
float32x4_t
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//1234
float32x4_t
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//2345
float32x4_t
row0_ext_0
=
vld1q_f32
(
row
[
0
]);
// 0123
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
// 4567
float32x4_t
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
// 1234
float32x4_t
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
// 2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_0
,
filter0
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_1
,
filter0
,
1
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_2
,
filter0
,
2
);
float32x4_t
row1_ext_0
=
vld1q_f32
(
row
[
1
]);
//0123
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
//4567
float32x4_t
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
//1234
float32x4_t
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
//2345
float32x4_t
row1_ext_0
=
vld1q_f32
(
row
[
1
]);
// 0123
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
// 4567
float32x4_t
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
// 1234
float32x4_t
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
// 2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_0
,
filter3
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_1
,
filter3
,
1
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_2
,
filter3
,
2
);
row0_ext_0
=
vld1q_f32
(
row
[
2
]);
//
0123
row0_latter
=
vld1q_f32
(
row
[
2
]
+
kRegisterSize
);
//
4567
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//
1234
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//
2345
row0_ext_0
=
vld1q_f32
(
row
[
2
]);
//
0123
row0_latter
=
vld1q_f32
(
row
[
2
]
+
kRegisterSize
);
//
4567
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//
1234
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//
2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_0
,
filter6
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_1
,
filter6
,
1
);
...
...
@@ -96,10 +99,10 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum1
=
vfmaq_laneq_f32
(
sum1
,
row0_ext_1
,
filter3
,
1
);
sum1
=
vfmaq_laneq_f32
(
sum1
,
row0_ext_2
,
filter3
,
2
);
row1_ext_0
=
vld1q_f32
(
row
[
3
]);
//
0123
row1_latter
=
vld1q_f32
(
row
[
3
]
+
kRegisterSize
);
//
4567
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
//
1234
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
//
2345
row1_ext_0
=
vld1q_f32
(
row
[
3
]);
//
0123
row1_latter
=
vld1q_f32
(
row
[
3
]
+
kRegisterSize
);
//
4567
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
//
1234
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
//
2345
sum1
=
vfmaq_laneq_f32
(
sum1
,
row1_ext_0
,
filter6
,
0
);
sum1
=
vfmaq_laneq_f32
(
sum1
,
row1_ext_1
,
filter6
,
1
);
...
...
@@ -114,15 +117,15 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
output_ptr1
+=
kRegisterSize
;
output_ptr2
+=
kRegisterSize
;
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
row
[
i
]
+=
kRegisterSize
;
}
}
for
(;
remain_count
>
0
;
--
remain_count
)
{
float32x4_t
row0
=
vld1q_f32
(
row
[
0
]);
//
0123
float32x4_t
row1
=
vld1q_f32
(
row
[
1
]);
//
0123
float32x4_t
row2
=
vld1q_f32
(
row
[
2
]);
//
0123
float32x4_t
row3
=
vld1q_f32
(
row
[
3
]);
//
0123
float32x4_t
row0
=
vld1q_f32
(
row
[
0
]);
//
0123
float32x4_t
row1
=
vld1q_f32
(
row
[
1
]);
//
0123
float32x4_t
row2
=
vld1q_f32
(
row
[
2
]);
//
0123
float32x4_t
row3
=
vld1q_f32
(
row
[
3
]);
//
0123
float32x4_t
sum
=
vmulq_f32
(
row0
,
filter0
);
sum
=
vmlaq_f32
(
sum
,
row1
,
filter3
);
...
...
@@ -138,13 +141,13 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
++
output_ptr1
;
++
output_ptr2
;
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
row
[
i
]
+=
1
;
}
}
output_ptr1
+=
width
;
output_ptr2
+=
width
;
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
row
[
i
]
+=
2
+
input_width
;
}
}
...
...
@@ -152,30 +155,34 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
if
(
height
!=
height_count
)
{
int
count
=
width
>>
2
;
int
remain_count
=
width
&
3
;
for
(;
count
>
0
;
--
count
)
{
for
(;
count
>
0
;
--
count
)
{
float32x4_t
sum0
=
vdupq_n_f32
(
.0
f
);
float32x4_t
row0_ext_0
=
vld1q_f32
(
row
[
0
]);
//0123
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
//4567
float32x4_t
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//1234
float32x4_t
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//2345
float32x4_t
row0_ext_0
=
vld1q_f32
(
row
[
0
]);
// 0123
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
// 4567
float32x4_t
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
// 1234
float32x4_t
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
// 2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_0
,
filter0
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_1
,
filter0
,
1
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_2
,
filter0
,
2
);
float32x4_t
row1_ext_0
=
vld1q_f32
(
row
[
1
]);
//0123
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
//4567
float32x4_t
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
//1234
float32x4_t
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
//2345
float32x4_t
row1_ext_0
=
vld1q_f32
(
row
[
1
]);
// 0123
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
// 4567
float32x4_t
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
// 1234
float32x4_t
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
// 2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_0
,
filter3
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_1
,
filter3
,
1
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_2
,
filter3
,
2
);
row0_ext_0
=
vld1q_f32
(
row
[
2
]);
//
0123
row0_latter
=
vld1q_f32
(
row
[
2
]
+
kRegisterSize
);
//
4567
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//
1234
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//
2345
row0_ext_0
=
vld1q_f32
(
row
[
2
]);
//
0123
row0_latter
=
vld1q_f32
(
row
[
2
]
+
kRegisterSize
);
//
4567
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//
1234
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//
2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_0
,
filter6
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_1
,
filter6
,
1
);
...
...
@@ -185,14 +192,14 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
output_row0
=
vaddq_f32
(
output_row0
,
sum0
);
vst1q_f32
(
output_ptr1
,
output_row0
);
output_ptr1
+=
kRegisterSize
;
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
row
[
i
]
+=
kRegisterSize
;
}
}
for
(;
remain_count
>
0
;
--
remain_count
)
{
float32x4_t
row0
=
vld1q_f32
(
row
[
0
]);
//
0123
float32x4_t
row1
=
vld1q_f32
(
row
[
1
]);
//
0123
float32x4_t
row2
=
vld1q_f32
(
row
[
2
]);
//
0123
float32x4_t
row0
=
vld1q_f32
(
row
[
0
]);
//
0123
float32x4_t
row1
=
vld1q_f32
(
row
[
1
]);
//
0123
float32x4_t
row2
=
vld1q_f32
(
row
[
2
]);
//
0123
float32x4_t
sum
=
vmulq_f32
(
row0
,
filter0
);
sum
=
vmlaq_f32
(
sum
,
row1
,
filter3
);
...
...
@@ -201,7 +208,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
*
output_ptr1
=
vaddvq_f32
(
sum
);
++
output_ptr1
;
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
row
[
i
]
+=
1
;
}
}
...
...
mace/kernels/neon/conv_2d_neon_5x5.cc
浏览文件 @
578b382a
...
...
@@ -30,17 +30,17 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
const
index_t
input_total_pixels_per_channel
=
input_height
*
input_width
;
const
index_t
output_total_pixels_per_channel
=
height
*
width
;
const
index_t
input_total_pixels_per_batch
=
input_total_pixels_per_channel
*
input_channels
;
const
index_t
output_total_pixels_per_batch
=
output_total_pixels_per_channel
*
channels
;
const
index_t
input_total_pixels_per_batch
=
input_total_pixels_per_channel
*
input_channels
;
const
index_t
output_total_pixels_per_batch
=
output_total_pixels_per_channel
*
channels
;
const
index_t
patch_size
=
input_channels
*
25
;
#pragma omp parallel for collapse(2)
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
float
*
output_ptr
=
output
+
n
*
output_total_pixels_per_batch
+
c
*
output_total_pixels_per_channel
;
float
*
output_ptr
=
output
+
n
*
output_total_pixels_per_batch
+
c
*
output_total_pixels_per_channel
;
const
float
*
input_ptr
=
input
+
n
*
input_total_pixels_per_batch
;
// Fill with bias
...
...
@@ -246,8 +246,8 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
sum2
=
r5
[
4
]
*
k4
[
4
];
float32x2_t
_ss
=
vadd_f32
(
vget_low_f32
(
_sum
),
vget_high_f32
(
_sum
));
float32x2_t
_ss2
=
vadd_f32
(
vget_low_f32
(
_sum2
),
vget_high_f32
(
_sum2
));
float32x2_t
_ss2
=
vadd_f32
(
vget_low_f32
(
_sum2
),
vget_high_f32
(
_sum2
));
float32x2_t
_ss_ss2
=
vpadd_f32
(
_ss
,
_ss2
);
sum
+=
vget_lane_f32
(
_ss_ss2
,
0
);
...
...
mace/kernels/neon/max_pooling_neon_2x2.cc
浏览文件 @
578b382a
...
...
@@ -2,19 +2,17 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include <float.h>
#include <limits>
#include <arm_neon.h>
#include "mace/core/common.h"
namespace
mace
{
namespace
kernels
{
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
...
...
@@ -86,8 +84,7 @@ void PoolingMaxNeonK2x2S2x2(const float *input,
for
(
int
kw
=
0
;
kw
<
2
;
++
kw
)
{
int
inh
=
h
*
2
-
padding_top
+
kh
;
int
inw
=
w
*
2
-
padding_left
+
kw
;
if
(
inh
>=
0
&&
inh
<
in_height
&&
inw
>=
0
&&
inw
<
in_width
)
{
if
(
inh
>=
0
&&
inh
<
in_height
&&
inw
>=
0
&&
inw
<
in_width
)
{
max
=
std
::
max
(
max
,
input
[
input_offset
+
inh
*
in_width
+
inw
]);
}
}
...
...
@@ -104,10 +101,8 @@ void PoolingMaxNeonK2x2S2x2(const float *input,
}
// assume the input has already been padded
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
index_t
in_height
=
in_shape
[
2
];
...
...
mace/kernels/neon/max_pooling_neon_3x3.cc
浏览文件 @
578b382a
...
...
@@ -2,19 +2,17 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include <float.h>
#include <limits>
#include <arm_neon.h>
#include "mace/core/common.h"
namespace
mace
{
namespace
kernels
{
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
...
...
@@ -112,8 +110,7 @@ void PoolingMaxNeonK3x3S2x2(const float *input,
for
(
int
kw
=
0
;
kw
<
3
;
++
kw
)
{
int
inh
=
h
*
2
-
padding_top
+
kh
;
int
inw
=
w
*
2
-
padding_left
+
kw
;
if
(
inh
>=
0
&&
inh
<
in_height
&&
inw
>=
0
&&
inw
<
in_width
)
{
if
(
inh
>=
0
&&
inh
<
in_height
&&
inw
>=
0
&&
inw
<
in_width
)
{
max
=
std
::
max
(
max
,
input
[
input_offset
+
inh
*
in_width
+
inw
]);
}
}
...
...
@@ -130,10 +127,8 @@ void PoolingMaxNeonK3x3S2x2(const float *input,
}
// assume the input has already been padded
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
index_t
in_height
=
in_shape
[
2
];
...
...
mace/kernels/neon/pooling_neon.cc
浏览文件 @
578b382a
...
...
@@ -2,45 +2,36 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/pooling.h"
#include <arm_neon.h>
#include "mace/kernels/conv_pool_2d_util.h"
namespace
mace
{
namespace
kernels
{
extern
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
extern
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
);
extern
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
extern
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
);
#ifdef __COPY_MAKE_PADDING
extern
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
extern
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
extern
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
extern
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
#endif
template
<
>
template
<
>
void
PoolingFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
index_t
*
output_shape
)
{
if
(
kernels_
[
0
]
==
2
&&
kernels_
[
1
]
==
2
&&
strides_
[
0
]
==
2
&&
strides_
[
1
]
==
2
&&
pooling_type_
==
MAX
)
{
if
(
kernels_
[
0
]
==
2
&&
kernels_
[
1
]
==
2
&&
strides_
[
0
]
==
2
&&
strides_
[
1
]
==
2
&&
pooling_type_
==
MAX
)
{
#ifdef __COPY_MAKE_PADDING
Tensor
padded_input
;
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
&
padded_input
);
...
...
@@ -50,9 +41,8 @@ void PoolingFunctor<DeviceType::NEON, float>::operator()(
#else
PoolingMaxNeonK2x2S2x2
(
input
,
input_shape
,
output
,
output_shape
,
paddings_
);
#endif
}
else
if
(
kernels_
[
0
]
==
3
&&
kernels_
[
1
]
==
3
&&
strides_
[
0
]
==
2
&&
strides_
[
1
]
==
2
&&
pooling_type_
==
MAX
)
{
}
else
if
(
kernels_
[
0
]
==
3
&&
kernels_
[
1
]
==
3
&&
strides_
[
0
]
==
2
&&
strides_
[
1
]
==
2
&&
pooling_type_
==
MAX
)
{
#ifdef __COPY_MAKE_PADDING
Tensor
padded_input
;
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
&
padded_input
);
...
...
@@ -65,11 +55,7 @@ void PoolingFunctor<DeviceType::NEON, float>::operator()(
}
else
{
// not implement yet
PoolingFunctor
<
DeviceType
::
CPU
,
float
>
(
pooling_type_
,
kernels_
,
strides_
,
paddings_
,
dilations_
)(
input
,
input_shape
,
output
,
output_shape
);
input
,
input_shape
,
output
,
output_shape
);
}
}
...
...
mace/kernels/neon/relu_neon.cc
浏览文件 @
578b382a
...
...
@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/relu.h"
#include <arm_neon.h>
namespace
mace
{
namespace
kernels
{
...
...
@@ -36,6 +36,5 @@ void ReluFunctor<DeviceType::NEON, float>::operator()(const float *input,
}
};
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
mace/kernels/pooling.h
浏览文件 @
578b382a
...
...
@@ -17,23 +17,18 @@ enum PoolingType {
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
PoolingFunctor
{
public:
PoolingFunctor
(
const
PoolingType
pooling_type
,
const
int
*
kernels
,
const
int
*
strides
,
const
int
*
paddings
,
const
int
*
dilations
)
PoolingFunctor
(
const
PoolingType
pooling_type
,
const
int
*
kernels
,
const
int
*
strides
,
const
int
*
paddings
,
const
int
*
dilations
)
:
pooling_type_
(
pooling_type
),
kernels_
(
kernels
),
strides_
(
strides
),
paddings_
(
paddings
),
dilations_
(
dilations
)
{}
void
operator
()(
const
T
*
input
,
const
index_t
*
input_shape
,
T
*
output
,
void
operator
()(
const
T
*
input
,
const
index_t
*
input_shape
,
T
*
output
,
const
index_t
*
output_shape
)
{
index_t
batch
=
output_shape
[
0
];
index_t
channels
=
output_shape
[
1
];
...
...
@@ -60,32 +55,31 @@ class PoolingFunctor {
#pragma omp parallel for collapse(2)
for
(
int
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
index_t
out_offset
=
n
*
channels
*
height
*
width
+
c
*
height
*
width
;
index_t
out_offset
=
n
*
channels
*
height
*
width
+
c
*
height
*
width
;
index_t
in_offset
=
n
*
input_channels
*
input_height
*
input_width
+
c
*
input_height
*
input_width
;
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
T
sum_or_max
=
0
;
switch
(
pooling_type_
)
{
case
AVG
:
break
;
case
MAX
:
sum_or_max
=
std
::
numeric_limits
<
T
>::
lowest
();
case
AVG
:
break
;
case
MAX
:
sum_or_max
=
std
::
numeric_limits
<
T
>::
lowest
();
break
;
default:
MACE_CHECK
(
false
,
"Unsupported pooling type: "
,
pooling_type_
);
MACE_CHECK
(
false
,
"Unsupported pooling type: "
,
pooling_type_
);
}
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
int
inh
=
padded_h_start
+
h
*
stride_h
+
dilation_h
*
kh
;
int
inw
=
padded_w_start
+
w
*
stride_w
+
dilation_w
*
kw
;
if
(
inh
>=
0
&&
inh
<
input_height
&&
inw
>=
0
&&
inw
<
input_width
)
{
index_t
input_offset
=
in_offset
+
inh
*
input_width
+
inw
;
if
(
inh
>=
0
&&
inh
<
input_height
&&
inw
>=
0
&&
inw
<
input_width
)
{
index_t
input_offset
=
in_offset
+
inh
*
input_width
+
inw
;
switch
(
pooling_type_
)
{
case
AVG
:
sum_or_max
+=
input
[
input_offset
];
case
AVG
:
sum_or_max
+=
input
[
input_offset
];
break
;
case
MAX
:
sum_or_max
=
std
::
max
(
sum_or_max
,
input
[
input_offset
]);
...
...
@@ -98,14 +92,14 @@ class PoolingFunctor {
}
}
switch
(
pooling_type_
)
{
case
AVG
:
output
[
out_offset
]
=
sum_or_max
/
(
kernel_h
*
kernel_w
);
case
AVG
:
output
[
out_offset
]
=
sum_or_max
/
(
kernel_h
*
kernel_w
);
break
;
case
MAX
:
output
[
out_offset
]
=
sum_or_max
;
case
MAX
:
output
[
out_offset
]
=
sum_or_max
;
break
;
default:
MACE_CHECK
(
false
,
"Unsupported pooling type: "
,
pooling_type_
);
MACE_CHECK
(
false
,
"Unsupported pooling type: "
,
pooling_type_
);
}
out_offset
+=
1
;
}
...
...
@@ -122,14 +116,12 @@ class PoolingFunctor {
const
int
*
dilations_
;
};
template
<
>
template
<
>
void
PoolingFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
index_t
*
output_shape
);
}
// namespace kernels
}
// namespace mace
#endif
//
MACE_KERNELS_POOLING_H
#endif
//
MACE_KERNELS_POOLING_H
mace/kernels/relu.h
浏览文件 @
578b382a
...
...
@@ -10,7 +10,7 @@
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
ReluFunctor
{
void
operator
()(
const
T
*
input
,
T
*
output
,
index_t
size
)
{
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
...
...
mace/kernels/resize_bilinear.h
浏览文件 @
578b382a
...
...
@@ -48,14 +48,13 @@ inline float ComputeLerp(const float top_left, const float top_right,
return
top
+
(
bottom
-
top
)
*
y_lerp
;
}
template
<
typename
T
>
void
ResizeImage
(
const
T
*
images
,
const
index_t
batch_size
,
const
index_t
in_height
,
const
index_t
in_width
,
const
index_t
out_height
,
const
index_t
out_width
,
const
index_t
channels
,
template
<
typename
T
>
void
ResizeImage
(
const
T
*
images
,
const
index_t
batch_size
,
const
index_t
in_height
,
const
index_t
in_width
,
const
index_t
out_height
,
const
index_t
out_width
,
const
index_t
channels
,
const
std
::
vector
<
CachedInterpolation
>
&
xs_vec
,
const
std
::
vector
<
CachedInterpolation
>
&
ys
,
float
*
output
)
{
const
std
::
vector
<
CachedInterpolation
>
&
ys
,
float
*
output
)
{
const
index_t
in_channel_size
=
in_height
*
in_width
;
const
index_t
in_batch_num_values
=
channels
*
in_channel_size
;
const
index_t
out_channel_size
=
out_height
*
out_width
;
...
...
@@ -65,10 +64,10 @@ void ResizeImage(const T *images,
#pragma omp parallel for collapse(2)
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
const
T
*
input_ptr
=
images
+
in_batch_num_values
*
b
+
in_channel_size
*
c
;
float
*
output_ptr
=
output
+
out_batch_num_values
*
b
+
out_channel_size
*
c
;
const
T
*
input_ptr
=
images
+
in_batch_num_values
*
b
+
in_channel_size
*
c
;
float
*
output_ptr
=
output
+
out_batch_num_values
*
b
+
out_channel_size
*
c
;
for
(
index_t
y
=
0
;
y
<
out_height
;
++
y
)
{
const
T
*
ys_input_lower_ptr
=
input_ptr
+
ys
[
y
].
lower
*
in_width
;
const
T
*
ys_input_upper_ptr
=
input_ptr
+
ys
[
y
].
upper
*
in_width
;
...
...
@@ -83,9 +82,8 @@ void ResizeImage(const T *images,
const
float
bottom_left
=
ys_input_upper_ptr
[
xs_lower
];
const
float
bottom_right
=
ys_input_upper_ptr
[
xs_upper
];
output_ptr
[
x
]
=
ComputeLerp
(
top_left
,
top_right
,
bottom_left
,
bottom_right
,
xs_lerp
,
ys_lerp
);
output_ptr
[
x
]
=
ComputeLerp
(
top_left
,
top_right
,
bottom_left
,
bottom_right
,
xs_lerp
,
ys_lerp
);
}
output_ptr
+=
out_width
;
}
...
...
@@ -94,16 +92,15 @@ void ResizeImage(const T *images,
}
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
ResizeBilinearFunctor
{
bool
align_corners_
;
ResizeBilinearFunctor
(
bool
align_corners
)
:
align_corners_
(
align_corners
)
{}
ResizeBilinearFunctor
(
bool
align_corners
)
:
align_corners_
(
align_corners
)
{}
void
operator
()(
const
T
*
input
,
T
*
output
,
index_t
n
,
index_t
channels
,
index_t
in
_height
,
index_t
in_width
,
index_t
out_height
,
index_t
out_width
)
{
void
operator
()(
const
T
*
input
,
T
*
output
,
index_t
n
,
index_t
channels
,
index_t
in_height
,
index_t
in_width
,
index_t
out
_height
,
index_t
out_width
)
{
if
(
out_height
==
in_height
&&
out_width
==
in_width
)
{
std
::
copy
(
input
,
input
+
channels
*
in_height
*
in_width
,
output
);
return
;
...
...
@@ -111,8 +108,8 @@ struct ResizeBilinearFunctor {
float
height_scale
=
CalculateResizeScale
(
in_height
,
out_height
,
align_corners_
);
float
width_scale
=
CalculateResizeScale
(
in_width
,
out_width
,
align_corners_
);
float
width_scale
=
CalculateResizeScale
(
in_width
,
out_width
,
align_corners_
);
std
::
vector
<
CachedInterpolation
>
ys
(
out_height
+
1
);
std
::
vector
<
CachedInterpolation
>
xs
(
out_width
+
1
);
...
...
@@ -121,8 +118,8 @@ struct ResizeBilinearFunctor {
ComputeInterpolationWeights
(
out_height
,
in_height
,
height_scale
,
ys
.
data
());
ComputeInterpolationWeights
(
out_width
,
in_width
,
width_scale
,
xs
.
data
());
ResizeImage
(
input
,
n
,
in_height
,
in_width
,
out_height
,
out_width
,
channels
,
xs
,
ys
,
output
);
ResizeImage
(
input
,
n
,
in_height
,
in_width
,
out_height
,
out_width
,
channels
,
xs
,
ys
,
output
);
}
};
...
...
mace/ops/addn.cc
浏览文件 @
578b382a
mace/ops/addn.h
浏览文件 @
578b382a
...
...
@@ -10,10 +10,10 @@
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
AddNOp
:
public
Operator
<
D
,
T
>
{
public:
AddNOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
AddNOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{}
bool
Run
()
override
{
...
...
mace/ops/addn_benchmark.cc
浏览文件 @
578b382a
...
...
@@ -10,7 +10,6 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
AddNBenchmark
(
int
iters
,
int
n
,
int
size
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
...
...
@@ -18,8 +17,7 @@ static void AddNBenchmark(int iters, int n, int size) {
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
op_def_builder
.
Input
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
());
}
op_def_builder
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
op_def_builder
.
Output
(
"Output"
).
Finalize
(
net
.
operator_def
());
// Add input data
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
...
...
@@ -32,17 +30,16 @@ static void AddNBenchmark(int iters, int n, int size) {
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
while
(
iters
--
)
{
net
.
RunOp
(
D
);
}
}
#define BM_ADDN_MACRO(N, SIZE, TYPE, DEVICE) \
static void BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE( \
int iters) { \
static void BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot
* (sizeof(TYPE)));
\
mace::testing::BytesProcessed(tot
*(sizeof(TYPE)));
\
AddNBenchmark<DEVICE, TYPE>(iters, N, SIZE); \
} \
BENCHMARK(BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE)
...
...
mace/ops/addn_test.cc
浏览文件 @
578b382a
mace/ops/batch_norm.cc
浏览文件 @
578b382a
mace/ops/batch_norm.h
浏览文件 @
578b382a
...
...
@@ -10,12 +10,13 @@
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
BatchNormOp
:
public
Operator
<
D
,
T
>
{
public:
BatchNormOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
BatchNormOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
),
functor_
(
OperatorBase
::
GetSingleArgument
<
float
>
(
"variance_epsilon"
,
1e-4
)){}
functor_
(
OperatorBase
::
GetSingleArgument
<
float
>
(
"variance_epsilon"
,
1e-4
))
{}
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
0
);
...
...
@@ -24,11 +25,16 @@ class BatchNormOp : public Operator<D, T> {
const
Tensor
*
mean
=
this
->
Input
(
3
);
const
Tensor
*
var
=
this
->
Input
(
4
);
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"input must be 4-dimensional. "
,
input
->
dim_size
());
MACE_CHECK
(
scale
->
dim_size
()
==
1
,
"scale must be 1-dimensional. "
,
scale
->
dim_size
());
MACE_CHECK
(
offset
->
dim_size
()
==
1
,
"offset must be 1-dimensional. "
,
offset
->
dim_size
());
MACE_CHECK
(
mean
->
dim_size
()
==
1
,
"mean must be 1-dimensional. "
,
mean
->
dim_size
());
MACE_CHECK
(
var
->
dim_size
()
==
1
,
"var must be 1-dimensional. "
,
var
->
dim_size
());
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"input must be 4-dimensional. "
,
input
->
dim_size
());
MACE_CHECK
(
scale
->
dim_size
()
==
1
,
"scale must be 1-dimensional. "
,
scale
->
dim_size
());
MACE_CHECK
(
offset
->
dim_size
()
==
1
,
"offset must be 1-dimensional. "
,
offset
->
dim_size
());
MACE_CHECK
(
mean
->
dim_size
()
==
1
,
"mean must be 1-dimensional. "
,
mean
->
dim_size
());
MACE_CHECK
(
var
->
dim_size
()
==
1
,
"var must be 1-dimensional. "
,
var
->
dim_size
());
Tensor
*
output
=
this
->
Output
(
0
);
output
->
ResizeLike
(
input
);
...
...
@@ -44,14 +50,13 @@ class BatchNormOp : public Operator<D, T> {
const
T
*
var_ptr
=
var
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
functor_
(
input_ptr
,
scale_ptr
,
offset_ptr
,
mean_ptr
,
var_ptr
,
n
,
channel
,
sample_size
,
output_ptr
);
functor_
(
input_ptr
,
scale_ptr
,
offset_ptr
,
mean_ptr
,
var_ptr
,
n
,
channel
,
sample_size
,
output_ptr
);
return
true
;
}
private:
kernels
::
BatchNormFunctor
<
D
,
T
>
functor_
;
};
}
// namespace mace
...
...
mace/ops/batch_norm_benchmark.cc
浏览文件 @
578b382a
...
...
@@ -8,8 +8,8 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
BatchNorm
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
static
void
BatchNorm
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
...
...
@@ -35,7 +35,7 @@ static void BatchNorm(int iters, int batch, int channels, int height, int width)
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
while
(
iters
--
)
{
net
.
RunOp
(
D
);
}
}
...
...
@@ -45,7 +45,7 @@ static void BatchNorm(int iters, int batch, int channels, int height, int width)
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot
* (sizeof(TYPE)));
\
mace::testing::BytesProcessed(tot
*(sizeof(TYPE)));
\
BatchNorm<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
...
...
mace/ops/batch_norm_test.cc
浏览文件 @
578b382a
...
...
@@ -33,8 +33,8 @@ TEST_F(BatchNormOpTest, SimpleCPU) {
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
6
,
2
},
{
-
3.86
,
-
3.86
,
-
1.51
,
-
1.51
,
0.83
,
0.83
,
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
6
,
2
},
{
-
3.86
,
-
3.86
,
-
1.51
,
-
1.51
,
0.83
,
0.83
,
3.17
,
3.17
,
5.51
,
5.51
,
7.86
,
7.86
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
...
...
@@ -77,5 +77,4 @@ TEST_F(BatchNormOpTest, SimpleNeon) {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
}
mace/ops/conv_2d.cc
浏览文件 @
578b382a
mace/ops/conv_2d.h
浏览文件 @
578b382a
...
...
@@ -13,11 +13,11 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
Conv2dOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
public:
Conv2dOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
)
{};
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
)
{};
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
...
...
@@ -27,21 +27,16 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
int
>
paddings
(
2
);
kernels
::
CalcPaddingAndOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
this
->
dilations_
.
data
(),
this
->
strides_
.
data
(),
this
->
padding_
,
output_shape
.
data
(),
kernels
::
CalcPaddingAndOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
this
->
dilations_
.
data
(),
this
->
strides_
.
data
(),
this
->
padding_
,
output_shape
.
data
(),
paddings
.
data
());
output
->
Resize
(
output_shape
);
auto
conv2d
=
kernels
::
Conv2dFunctor
<
D
,
T
>
(
this
->
strides_
.
data
(),
paddings
.
data
(),
this
->
dilations_
.
data
());
conv2d
(
input
->
data
<
T
>
(),
input
->
shape
().
data
(),
filter
->
data
<
T
>
(),
filter
->
shape
().
data
(),
bias
->
data
<
T
>
(),
output
->
mutable_data
<
T
>
(),
auto
conv2d
=
kernels
::
Conv2dFunctor
<
D
,
T
>
(
this
->
strides_
.
data
(),
paddings
.
data
(),
this
->
dilations_
.
data
());
conv2d
(
input
->
data
<
T
>
(),
input
->
shape
().
data
(),
filter
->
data
<
T
>
(),
filter
->
shape
().
data
(),
bias
->
data
<
T
>
(),
output
->
mutable_data
<
T
>
(),
output
->
shape
().
data
());
return
true
;
...
...
mace/ops/conv_2d_benchmark.cc
浏览文件 @
578b382a
...
...
@@ -13,8 +13,8 @@ namespace mace {
template
<
DeviceType
D
,
typename
T
>
static
void
Conv2d
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
stride
,
Padding
padding
,
int
output_channels
)
{
int
kernel_h
,
int
kernel_w
,
int
stride
,
Padding
padding
,
int
output_channels
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
...
...
@@ -32,7 +32,8 @@ static void Conv2d(int iters, int batch, int channels, int height, int width,
// Add input data
net
.
AddRandomInput
<
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
float
>
(
"Filter"
,
{
output_channels
,
channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
float
>
(
"Filter"
,
{
output_channels
,
channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
float
>
(
"Bias"
,
{
output_channels
});
// Warm-up
...
...
@@ -41,20 +42,23 @@ static void Conv2d(int iters, int batch, int channels, int height, int width,
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
while
(
iters
--
)
{
net
.
RunOp
(
D
);
}
}
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \
static void BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE( \
static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, OC); \
mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \
OC); \
} \
BENCHMARK(BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE)
BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
...
...
mace/ops/conv_2d_test.cc
浏览文件 @
578b382a
...
...
@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/operator.h"
#include "mace/ops/conv_2d.h"
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
...
...
@@ -26,16 +26,12 @@ TEST_F(Conv2dOpTest, Simple_VALID) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
net
.
AddInputFromArray
<
float
>
(
"Bias"
,
{
1
},
{
0.1
f
});
...
...
@@ -64,16 +60,12 @@ TEST_F(Conv2dOpTest, Simple_SAME) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
net
.
AddInputFromArray
<
float
>
(
"Bias"
,
{
1
},
{
0.1
f
});
...
...
@@ -81,10 +73,9 @@ TEST_F(Conv2dOpTest, Simple_SAME) {
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
3
,
3
},
{
8.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
18.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
8.1
f
});
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
1
,
3
,
3
},
{
8.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
18.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
8.1
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -105,36 +96,24 @@ TEST_F(Conv2dOpTest, Combined) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
5
,
5
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
2
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
5
,
5
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
2
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
});
net
.
AddInputFromArray
<
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
3
,
3
},
{
8.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
18.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
8.1
f
,
4.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
9.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
4.2
f
});
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
2
,
3
,
3
},
{
8.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
18.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
8.1
f
,
4.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
9.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
4.2
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -155,32 +134,26 @@ TEST_F(Conv2dOpTest, Conv1x1) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
5
,
3
,
10
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
2
,
5
,
1
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
5
,
3
,
10
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
2
,
5
,
1
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
});
net
.
AddInputFromArray
<
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
3
,
10
},
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
2
,
3
,
10
},
{
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
...
...
@@ -194,8 +167,7 @@ TEST_F(Conv2dOpTest, Conv1x1) {
// TODO we need more tests
TEST_F
(
Conv2dOpTest
,
ConvNxNS12
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
srand
(
time
(
NULL
));
...
...
@@ -206,7 +178,7 @@ TEST_F(Conv2dOpTest, ConvNxNS12) {
index_t
width
=
7
+
rand
()
%
100
;
index_t
output_channels
=
1
+
rand
()
%
50
;
// Construct graph
auto
&
net
=
test_net
();
auto
&
net
=
test_net
();
OpDefBuilder
(
"Conv2d"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
...
...
@@ -221,8 +193,8 @@ TEST_F(Conv2dOpTest, ConvNxNS12) {
// Add input data
net
.
AddRandomInput
<
float
>
(
"Input"
,
{
batch
,
input_channels
,
height
,
width
});
net
.
AddRandomInput
<
float
>
(
"Filter"
,
{
output_channels
,
input_channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
float
>
(
"Filter"
,
{
output_channels
,
input_channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
float
>
(
"Bias"
,
{
output_channels
});
// run cpu
net
.
RunOp
();
...
...
mace/ops/conv_pool_2d_base.h
浏览文件 @
578b382a
...
...
@@ -10,15 +10,14 @@
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
ConvPool2dOpBase
:
public
Operator
<
D
,
T
>
{
public:
ConvPool2dOpBase
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
strides_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"strides"
)),
padding_
(
static_cast
<
Padding
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"padding"
,
static_cast
<
int
>
(
SAME
)))),
padding_
(
static_cast
<
Padding
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"padding"
,
static_cast
<
int
>
(
SAME
)))),
dilations_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"dilations"
))
{}
protected:
...
...
mace/ops/ops_test_util.h
浏览文件 @
578b382a
...
...
@@ -43,31 +43,33 @@ class OpsTestNet {
public:
OpsTestNet
()
{}
template
<
typename
T
>
void
AddInputFromArray
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
template
<
typename
T
>
void
AddInputFromArray
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
T
*
input_data
=
input
->
mutable_data
<
T
>
();
MACE_CHECK
(
input
->
size
()
==
data
.
size
());
memcpy
(
input_data
,
data
.
data
(),
data
.
size
()
*
sizeof
(
T
));
}
template
<
typename
T
>
void
AddRepeatedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
template
<
typename
T
>
void
AddRepeatedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
T
data
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
T
*
input_data
=
input
->
mutable_data
<
T
>
();
MACE_CHECK
(
input
->
size
()
==
data
.
size
());
std
::
fill
(
input_data
,
input_data
+
input
->
size
(),
data
);
}
template
<
typename
T
>
void
AddRandomInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
bool
positive
=
false
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
template
<
typename
T
>
void
AddRandomInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
bool
positive
=
false
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
float
*
input_data
=
input
->
mutable_data
<
T
>
();
...
...
@@ -76,12 +78,16 @@ class OpsTestNet {
std
::
normal_distribution
<
T
>
nd
(
0
,
1
);
std
::
generate
(
input_data
,
input_data
+
input
->
size
(),
[
&
gen
,
&
nd
,
positive
]
{
return
positive
?
std
::
abs
(
nd
(
gen
))
:
nd
(
gen
);
});
[
&
gen
,
&
nd
,
positive
]
{
return
positive
?
std
::
abs
(
nd
(
gen
))
:
nd
(
gen
);
});
}
template
<
typename
T
>
void
AddFixedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
T
value
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
template
<
typename
T
>
void
AddFixedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
T
value
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
float
*
input_data
=
input
->
mutable_data
<
T
>
();
...
...
@@ -122,7 +128,8 @@ class OpsTestNet {
}
}
void
AddStringsArg
(
const
char
*
name
,
const
std
::
vector
<
const
char
*>
&
values
)
{
void
AddStringsArg
(
const
char
*
name
,
const
std
::
vector
<
const
char
*>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
...
...
@@ -145,9 +152,7 @@ class OpsTestNet {
return
net_
->
Run
();
}
bool
RunOp
()
{
return
RunOp
(
DeviceType
::
CPU
);
}
bool
RunOp
()
{
return
RunOp
(
DeviceType
::
CPU
);
}
Tensor
*
GetOutput
(
const
char
*
output_name
)
{
return
ws_
.
GetTensor
(
output_name
);
...
...
@@ -177,8 +182,9 @@ class OpsTestBase : public ::testing::Test {
OpsTestNet
test_net_
;
};
template
<
typename
T
>
unique_ptr
<
Tensor
>
CreateTensor
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
template
<
typename
T
>
unique_ptr
<
Tensor
>
CreateTensor
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
unique_ptr
<
Tensor
>
res
(
new
Tensor
(
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
()));
res
->
Resize
(
shape
);
T
*
input_data
=
res
->
mutable_data
<
T
>
();
...
...
@@ -209,40 +215,38 @@ inline std::string ShapeToString(const Tensor &x) {
return
std
::
string
(
stream
.
str
());
}
template
<
typename
T
>
template
<
typename
T
>
struct
is_floating_point_type
{
static
const
bool
value
=
std
::
is_same
<
T
,
float
>::
value
||
std
::
is_same
<
T
,
double
>::
value
;
static
const
bool
value
=
std
::
is_same
<
T
,
float
>::
value
||
std
::
is_same
<
T
,
double
>::
value
;
};
template
<
typename
T
>
template
<
typename
T
>
inline
void
ExpectEqual
(
const
T
&
a
,
const
T
&
b
)
{
EXPECT_EQ
(
a
,
b
);
}
template
<
>
template
<
>
inline
void
ExpectEqual
<
float
>
(
const
float
&
a
,
const
float
&
b
)
{
EXPECT_FLOAT_EQ
(
a
,
b
);
}
template
<
>
template
<
>
inline
void
ExpectEqual
<
double
>
(
const
double
&
a
,
const
double
&
b
)
{
EXPECT_DOUBLE_EQ
(
a
,
b
);
}
inline
void
AssertSameTypeDims
(
const
Tensor
&
x
,
const
Tensor
&
y
)
{
ASSERT_EQ
(
x
.
dtype
(),
y
.
dtype
());
ASSERT_TRUE
(
IsSameSize
(
x
,
y
))
<<
"x.shape ["
<<
ShapeToString
(
x
)
<<
"] vs "
ASSERT_TRUE
(
IsSameSize
(
x
,
y
))
<<
"x.shape ["
<<
ShapeToString
(
x
)
<<
"] vs "
<<
"y.shape [ "
<<
ShapeToString
(
y
)
<<
"]"
;
}
template
<
typename
T
,
bool
is_fp
=
is_floating_point_type
<
T
>
::
value
>
template
<
typename
T
,
bool
is_fp
=
is_floating_point_type
<
T
>
::
value
>
struct
Expector
;
// Partial specialization for float and double.
template
<
typename
T
>
template
<
typename
T
>
struct
Expector
<
T
,
true
>
{
static
void
Equal
(
const
T
&
a
,
const
T
&
b
)
{
ExpectEqual
(
a
,
b
);
}
...
...
@@ -262,15 +266,16 @@ struct Expector<T, true> {
auto
a
=
x
.
data
<
T
>
();
auto
b
=
y
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
x
.
size
();
++
i
)
{
EXPECT_NEAR
(
a
[
i
],
b
[
i
],
abs_err
)
<<
"a = "
<<
a
<<
" b = "
<<
b
<<
" index = "
<<
i
;
EXPECT_NEAR
(
a
[
i
],
b
[
i
],
abs_err
)
<<
"a = "
<<
a
<<
" b = "
<<
b
<<
" index = "
<<
i
;
}
}
};
template
<
typename
T
>
template
<
typename
T
>
void
ExpectTensorNear
(
const
Tensor
&
x
,
const
Tensor
&
y
,
const
double
abs_err
)
{
static_assert
(
is_floating_point_type
<
T
>::
value
,
"T is not a floating point type"
);
static_assert
(
is_floating_point_type
<
T
>::
value
,
"T is not a floating point type"
);
Expector
<
T
>::
Near
(
x
,
y
,
abs_err
);
}
...
...
mace/ops/pooling.cc
浏览文件 @
578b382a
...
...
@@ -2,7 +2,6 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/pooling.h"
namespace
mace
{
...
...
mace/ops/pooling.h
浏览文件 @
578b382a
...
...
@@ -11,17 +11,17 @@
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
PoolingOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
public:
public:
PoolingOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
),
kernels_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"kernels"
)),
pooling_type_
(
static_cast
<
PoolingType
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"pooling_type"
,
static_cast
<
int
>
(
AVG
))))
{};
pooling_type_
(
static_cast
<
PoolingType
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"pooling_type"
,
static_cast
<
int
>
(
AVG
))))
{};
bool
Run
()
override
{
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
std
::
vector
<
index_t
>
in_shape
=
input
->
shape
();
...
...
@@ -33,28 +33,21 @@ public:
filter_shape
[
1
]
=
in_shape
[
0
];
filter_shape
[
2
]
=
kernels_
[
0
];
filter_shape
[
3
]
=
kernels_
[
1
];
kernels
::
CalcPaddingAndOutputSize
(
in_shape
.
data
(),
filter_shape
.
data
(),
kernels
::
CalcPaddingAndOutputSize
(
in_shape
.
data
(),
filter_shape
.
data
(),
this
->
dilations_
.
data
(),
this
->
strides_
.
data
(),
this
->
padding_
,
output_shape
.
data
(),
paddings
.
data
());
this
->
strides_
.
data
(),
this
->
padding_
,
output_shape
.
data
(),
paddings
.
data
());
output
->
Resize
(
output_shape
);
auto
pooling_func
=
kernels
::
PoolingFunctor
<
D
,
T
>
(
pooling_type_
,
kernels_
.
data
(),
this
->
strides_
.
data
(),
paddings
.
data
(),
auto
pooling_func
=
kernels
::
PoolingFunctor
<
D
,
T
>
(
pooling_type_
,
kernels_
.
data
(),
this
->
strides_
.
data
(),
paddings
.
data
(),
this
->
dilations_
.
data
());
pooling_func
(
input
->
data
<
float
>
(),
in_shape
.
data
(),
output
->
mutable_data
<
float
>
(),
output
->
shape
().
data
());
pooling_func
(
input
->
data
<
float
>
(),
in_shape
.
data
(),
output
->
mutable_data
<
float
>
(),
output
->
shape
().
data
());
return
true
;
};
protected:
protected:
std
::
vector
<
int
>
kernels_
;
PoolingType
pooling_type_
;
...
...
@@ -64,4 +57,4 @@ protected:
}
// namespace mace
#endif
//
MACE_OPS_POOLING_H_
#endif
//
MACE_OPS_POOLING_H_
mace/ops/pooling_benchmark.cc
浏览文件 @
578b382a
...
...
@@ -2,20 +2,19 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/testing/test_benchmark.h"
#include "mace/core/operator.h"
#include "mace/kernels/pooling.h"
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
using
namespace
mace
::
kernels
;
template
<
DeviceType
D
>
static
void
Pooling
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel
,
int
stride
,
Padding
padding
,
template
<
DeviceType
D
>
static
void
Pooling
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel
,
int
stride
,
Padding
padding
,
PoolingType
pooling_type
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
...
...
@@ -46,14 +45,17 @@ static void Pooling(int iters, int batch, int channels, int height,
}
#define BM_POOLING_MACRO(N, C, H, W, KE, STRIDE, PA, PO, DEVICE) \
static void BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \
static void \
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(float)));\
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, PoolingType::PO); \
mace::testing::BytesProcessed(tot*(sizeof(float))); \
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, \
PoolingType::PO); \
} \
BENCHMARK(BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE)
BENCHMARK( \
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE)
#define BM_POOLING(N, C, H, W, K, S, PA, PO) \
BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, CPU); \
...
...
mace/ops/pooling_test.cc
浏览文件 @
578b382a
...
...
@@ -5,9 +5,9 @@
#include "gtest/gtest.h"
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/kernels/pooling.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
...
...
@@ -29,27 +29,21 @@ TEST_F(PoolingOpTest, MAX_VALID) {
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
2
,
2
},
{
5
,
7
,
13
,
15
,
21
,
23
,
29
,
31
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
2
,
2
},
{
5
,
7
,
13
,
15
,
21
,
23
,
29
,
31
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
PoolingOpTest
,
AVG_VALID
)
{
// Construct graph
auto
&
net
=
test_net
();
...
...
@@ -66,22 +60,17 @@ TEST_F(PoolingOpTest, AVG_VALID) {
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
AVG
);
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
2
,
2
,
2
},
{
2.5
,
4.5
,
10.5
,
12.5
,
18.5
,
20.5
,
26.5
,
28.5
});
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
2
,
2
,
2
},
{
2.5
,
4.5
,
10.5
,
12.5
,
18.5
,
20.5
,
26.5
,
28.5
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -103,16 +92,13 @@ TEST_F(PoolingOpTest, MAX_SAME) {
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
3
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
});
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
2
},
{
4
,
5
,
7
,
8
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
2
},
{
4
,
5
,
7
,
8
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -133,18 +119,15 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) {
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
2
},
{
10
,
11
,
14
,
15
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
2
},
{
10
,
11
,
14
,
15
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -165,18 +148,14 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
5
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
5
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
});
// Run
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
6
,
8
,
9
,
16
,
18
,
19
});
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
6
,
8
,
9
,
16
,
18
,
19
});
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -197,18 +176,14 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
5
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
5
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
});
// Run
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
11
,
13
,
14
,
16
,
18
,
19
});
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
11
,
13
,
14
,
16
,
18
,
19
});
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
mace/ops/relu.cc
浏览文件 @
578b382a
mace/ops/relu.h
浏览文件 @
578b382a
...
...
@@ -10,10 +10,10 @@
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
ReluOp
:
public
Operator
<
D
,
T
>
{
public:
ReluOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
ReluOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{}
bool
Run
()
override
{
const
Tensor
*
input_tensor
=
this
->
inputs_
[
0
];
...
...
mace/ops/relu_benchmark.cc
浏览文件 @
578b382a
...
...
@@ -10,7 +10,6 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
ReluBenchmark
(
int
iters
,
int
size
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
...
...
@@ -28,17 +27,16 @@ static void ReluBenchmark(int iters, int size) {
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
while
(
iters
--
)
{
net
.
RunOp
(
D
);
}
}
#define BM_RELU_MACRO(SIZE, TYPE, DEVICE) \
static void BM_RELU_##SIZE##_##TYPE##_##DEVICE( \
int iters) { \
static void BM_RELU_##SIZE##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot
* (sizeof(TYPE)));
\
mace::testing::BytesProcessed(tot
*(sizeof(TYPE)));
\
ReluBenchmark<DEVICE, TYPE>(iters, SIZE); \
} \
BENCHMARK(BM_RELU_##SIZE##_##TYPE##_##DEVICE)
...
...
mace/ops/relu_test.cc
浏览文件 @
578b382a
mace/ops/resize_bilinear.cc
浏览文件 @
578b382a
...
...
@@ -9,7 +9,8 @@ namespace mace {
REGISTER_CPU_OPERATOR
(
ResizeBilinear
,
ResizeBilinearOp
<
DeviceType
::
CPU
,
float
>
);
#if __ARM_NEON
REGISTER_NEON_OPERATOR
(
ResizeBilinear
,
ResizeBilinearOp
<
DeviceType
::
NEON
,
float
>
);
REGISTER_NEON_OPERATOR
(
ResizeBilinear
,
ResizeBilinearOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
}
// namespace mace
mace/ops/resize_bilinear.h
浏览文件 @
578b382a
...
...
@@ -5,18 +5,18 @@
#ifndef MACE_RESIZE_BILINEAR_H
#define MACE_RESIZE_BILINEAR_H
#include "mace/core/operator.h"
#include "mace/kernels/resize_bilinear.h"
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
ResizeBilinearOp
:
public
Operator
<
D
,
T
>
{
public:
ResizeBilinearOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
ResizeBilinearOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
),
functor_
(
OperatorBase
::
GetSingleArgument
<
bool
>
(
"align_corners"
,
false
))
{}
functor_
(
OperatorBase
::
GetSingleArgument
<
bool
>
(
"align_corners"
,
false
))
{}
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
0
);
...
...
@@ -24,8 +24,8 @@ class ResizeBilinearOp : public Operator<D, T> {
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"input must be 4-dimensional."
,
input
->
dim_size
());
MACE_CHECK
(
resize_dims
->
dim_size
()
==
1
,
"resize dim must be 2-dimensional."
,
resize_dims
->
dim_size
());
MACE_CHECK
(
resize_dims
->
dim_size
()
==
1
,
"resize dim must be 2-dimensional."
,
resize_dims
->
dim_size
());
Tensor
*
output
=
this
->
Output
(
0
);
...
...
@@ -35,7 +35,7 @@ class ResizeBilinearOp : public Operator<D, T> {
index_t
in_width
=
input
->
dim
(
3
);
index_t
out_height
=
resize_dims
->
data
<
index_t
>
()[
0
];
index_t
out_width
=
resize_dims
->
data
<
index_t
>
()[
1
];
vector
<
index_t
>
out_shape
{
n
,
channels
,
out_height
,
out_width
};
vector
<
index_t
>
out_shape
{
n
,
channels
,
out_height
,
out_width
};
output
->
Resize
(
out_shape
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
...
...
@@ -45,6 +45,7 @@ class ResizeBilinearOp : public Operator<D, T> {
out_height
,
out_width
);
return
true
;
}
private:
kernels
::
ResizeBilinearFunctor
<
D
,
T
>
functor_
;
};
...
...
mace/ops/resize_bilinear_test.cc
浏览文件 @
578b382a
...
...
@@ -2,9 +2,9 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/resize_bilinear.h"
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include "mace/ops/resize_bilinear.h"
using
namespace
mace
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录