Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
8ae8f575
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
8ae8f575
编写于
9月 15, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix google coding style
上级
9c9af68e
变更
66
隐藏空白更改
内联
并排
Showing
66 changed file
with
1096 addition
and
1361 deletion
+1096
-1361
mace/core/allocator.cc
mace/core/allocator.cc
+3
-7
mace/core/allocator.h
mace/core/allocator.h
+4
-6
mace/core/common.h
mace/core/common.h
+7
-7
mace/core/logging.cc
mace/core/logging.cc
+2
-3
mace/core/logging.h
mace/core/logging.h
+18
-21
mace/core/macros.h
mace/core/macros.h
+1
-2
mace/core/net.cc
mace/core/net.cc
+10
-17
mace/core/net.h
mace/core/net.h
+11
-18
mace/core/operator.cc
mace/core/operator.cc
+9
-20
mace/core/operator.h
mace/core/operator.h
+33
-50
mace/core/proto_utils.cc
mace/core/proto_utils.cc
+69
-92
mace/core/proto_utils.h
mace/core/proto_utils.h
+35
-58
mace/core/registry.h
mace/core/registry.h
+21
-22
mace/core/serializer.cc
mace/core/serializer.cc
+13
-16
mace/core/serializer.h
mace/core/serializer.h
+4
-4
mace/core/tensor.h
mace/core/tensor.h
+25
-30
mace/core/testing/test_benchmark.cc
mace/core/testing/test_benchmark.cc
+3
-6
mace/core/testing/test_benchmark.h
mace/core/testing/test_benchmark.h
+3
-3
mace/core/testing/test_benchmark_main.cc
mace/core/testing/test_benchmark_main.cc
+0
-1
mace/core/types.h
mace/core/types.h
+15
-16
mace/core/workspace.cc
mace/core/workspace.cc
+9
-8
mace/core/workspace.h
mace/core/workspace.h
+3
-5
mace/examples/benchmark_example.cc
mace/examples/benchmark_example.cc
+2
-3
mace/kernels/addn.h
mace/kernels/addn.h
+7
-9
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+14
-25
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+84
-95
mace/kernels/conv_pool_2d_util.cc
mace/kernels/conv_pool_2d_util.cc
+17
-21
mace/kernels/conv_pool_2d_util.h
mace/kernels/conv_pool_2d_util.h
+11
-15
mace/kernels/neon/addn_neon.cc
mace/kernels/neon/addn_neon.cc
+6
-7
mace/kernels/neon/batch_norm_neon.cc
mace/kernels/neon/batch_norm_neon.cc
+17
-21
mace/kernels/neon/conv_2d_neon.cc
mace/kernels/neon/conv_2d_neon.cc
+27
-55
mace/kernels/neon/conv_2d_neon_1x1.cc
mace/kernels/neon/conv_2d_neon_1x1.cc
+30
-31
mace/kernels/neon/conv_2d_neon_3x3.cc
mace/kernels/neon/conv_2d_neon_3x3.cc
+73
-66
mace/kernels/neon/conv_2d_neon_5x5.cc
mace/kernels/neon/conv_2d_neon_5x5.cc
+16
-16
mace/kernels/neon/max_pooling_neon_2x2.cc
mace/kernels/neon/max_pooling_neon_2x2.cc
+7
-12
mace/kernels/neon/max_pooling_neon_3x3.cc
mace/kernels/neon/max_pooling_neon_3x3.cc
+9
-14
mace/kernels/neon/pooling_neon.cc
mace/kernels/neon/pooling_neon.cc
+20
-34
mace/kernels/neon/relu_neon.cc
mace/kernels/neon/relu_neon.cc
+6
-7
mace/kernels/pooling.h
mace/kernels/pooling.h
+28
-36
mace/kernels/relu.h
mace/kernels/relu.h
+4
-4
mace/kernels/resize_bilinear.h
mace/kernels/resize_bilinear.h
+28
-31
mace/ops/addn.cc
mace/ops/addn.cc
+2
-2
mace/ops/addn.h
mace/ops/addn.h
+4
-4
mace/ops/addn_benchmark.cc
mace/ops/addn_benchmark.cc
+12
-15
mace/ops/addn_test.cc
mace/ops/addn_test.cc
+1
-1
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+2
-2
mace/ops/batch_norm.h
mace/ops/batch_norm.h
+47
-42
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+21
-21
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+17
-18
mace/ops/conv_2d.cc
mace/ops/conv_2d.cc
+2
-2
mace/ops/conv_2d.h
mace/ops/conv_2d.h
+12
-17
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+26
-22
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+74
-102
mace/ops/conv_pool_2d_base.h
mace/ops/conv_pool_2d_base.h
+8
-9
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+45
-40
mace/ops/pooling.cc
mace/ops/pooling.cc
+2
-3
mace/ops/pooling.h
mace/ops/pooling.h
+19
-26
mace/ops/pooling_benchmark.cc
mace/ops/pooling_benchmark.cc
+19
-17
mace/ops/pooling_test.cc
mace/ops/pooling_test.cc
+46
-71
mace/ops/relu.cc
mace/ops/relu.cc
+2
-2
mace/ops/relu.h
mace/ops/relu.h
+4
-4
mace/ops/relu_benchmark.cc
mace/ops/relu_benchmark.cc
+11
-13
mace/ops/relu_test.cc
mace/ops/relu_test.cc
+1
-1
mace/ops/resize_bilinear.cc
mace/ops/resize_bilinear.cc
+4
-3
mace/ops/resize_bilinear.h
mace/ops/resize_bilinear.h
+10
-9
mace/ops/resize_bilinear_test.cc
mace/ops/resize_bilinear_test.cc
+1
-1
未找到文件。
mace/core/allocator.cc
浏览文件 @
8ae8f575
...
...
@@ -7,13 +7,9 @@
namespace
mace
{
static
std
::
unique_ptr
<
CPUAllocator
>
g_cpu_allocator
(
new
CPUAllocator
());
CPUAllocator
*
cpu_allocator
()
{
return
g_cpu_allocator
.
get
();
}
CPUAllocator
*
cpu_allocator
()
{
return
g_cpu_allocator
.
get
();
}
void
SetCPUAllocator
(
CPUAllocator
*
alloc
)
{
g_cpu_allocator
.
reset
(
alloc
);
}
void
SetCPUAllocator
(
CPUAllocator
*
alloc
)
{
g_cpu_allocator
.
reset
(
alloc
);
}
Allocator
*
GetDeviceAllocator
(
DeviceType
type
)
{
switch
(
type
)
{
...
...
@@ -26,4 +22,4 @@ Allocator* GetDeviceAllocator(DeviceType type) {
return
nullptr
;
}
}
// namespace mace
}
// namespace mace
mace/core/allocator.h
浏览文件 @
8ae8f575
...
...
@@ -39,7 +39,7 @@ class Allocator {
}
};
class
CPUAllocator
:
public
Allocator
{
class
CPUAllocator
:
public
Allocator
{
public:
~
CPUAllocator
()
override
{}
void
*
New
(
size_t
nbytes
)
override
{
...
...
@@ -55,9 +55,7 @@ class CPUAllocator: public Allocator {
return
data
;
}
void
Delete
(
void
*
data
)
override
{
free
(
data
);
}
void
Delete
(
void
*
data
)
override
{
free
(
data
);
}
void
CopyBytes
(
void
*
dst
,
const
void
*
src
,
size_t
size
)
override
{
memcpy
(
dst
,
src
,
size
);
...
...
@@ -85,6 +83,6 @@ struct DeviceContext<DeviceType::NEON> {
Allocator
*
GetDeviceAllocator
(
DeviceType
type
);
}
// namespace mace
}
// namespace mace
#endif // MACE_CORE_ALLOCATOR_H_
#endif
// MACE_CORE_ALLOCATOR_H_
mace/core/common.h
浏览文件 @
8ae8f575
...
...
@@ -5,12 +5,12 @@
#ifndef MACE_CORE_COMMON_H_
#define MACE_CORE_COMMON_H_
#include <
set
>
#include <
algorithm
>
#include <map>
#include <string>
#include <memory>
#include <set>
#include <string>
#include <vector>
#include <algorithm>
#include "mace/core/logging.h"
...
...
@@ -24,9 +24,9 @@ typedef int64_t index_t;
// Disable the copy and assignment operator for a class.
#ifndef DISABLE_COPY_AND_ASSIGN
#define DISABLE_COPY_AND_ASSIGN(classname)
\
private:
\
classname(const classname&) = delete;
\
#define DISABLE_COPY_AND_ASSIGN(classname) \
private:
\
classname(const classname&) = delete; \
classname& operator=(const classname&) = delete
#endif
...
...
@@ -35,4 +35,4 @@ private: \
// TODO: need to fine tune this
#define kCostPerGroup 1024000000
#endif // MACE_CORE_COMMON_H_
#endif
// MACE_CORE_COMMON_H_
mace/core/logging.cc
浏览文件 @
8ae8f575
...
...
@@ -2,7 +2,6 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/logging.h"
#include <stdlib.h>
...
...
@@ -62,11 +61,11 @@ void LogMessage::GenerateLogMessage() {
#else
void
LogMessage
::
GenerateLogMessage
()
{
fprintf
(
stderr
,
"%c %s:%d] %s
\n
"
,
"IWEF"
[
severity_
],
fname_
,
line_
,
str
().
c_str
());
fprintf
(
stderr
,
"%c %s:%d] %s
\n
"
,
"IWEF"
[
severity_
],
fname_
,
line_
,
str
().
c_str
());
}
#endif
namespace
{
// Parse log level (int64_t) from environment variable (char*)
...
...
mace/core/logging.h
浏览文件 @
8ae8f575
...
...
@@ -5,8 +5,8 @@
#ifndef MACE_CORE_LOGGING_H_
#define MACE_CORE_LOGGING_H_
#include <sstream>
#include <limits>
#include <sstream>
#include <string>
#undef ERROR
...
...
@@ -30,8 +30,8 @@ inline void MakeStringInternal(std::stringstream& ss, const T& t) {
}
template
<
typename
T
,
typename
...
Args
>
inline
void
MakeStringInternal
(
std
::
stringstream
&
ss
,
const
T
&
t
,
const
Args
&
...
args
)
{
inline
void
MakeStringInternal
(
std
::
stringstream
&
ss
,
const
T
&
t
,
const
Args
&
...
args
)
{
MakeStringInternal
(
ss
,
t
);
MakeStringInternal
(
ss
,
args
...);
}
...
...
@@ -48,9 +48,7 @@ template <>
inline
string
MakeString
(
const
string
&
str
)
{
return
str
;
}
inline
string
MakeString
(
const
char
*
c_str
)
{
return
string
(
c_str
);
}
inline
string
MakeString
(
const
char
*
c_str
)
{
return
string
(
c_str
);
}
class
LogMessage
:
public
std
::
basic_ostringstream
<
char
>
{
public:
...
...
@@ -85,8 +83,7 @@ class LogMessageFatal : public LogMessage {
::mace::internal::LogMessage(__FILE__, __LINE__, mace::WARNING)
#define _MACE_LOG_ERROR \
::mace::internal::LogMessage(__FILE__, __LINE__, mace::ERROR)
#define _MACE_LOG_FATAL \
::mace::internal::LogMessageFatal(__FILE__, __LINE__)
#define _MACE_LOG_FATAL ::mace::internal::LogMessageFatal(__FILE__, __LINE__)
#define _MACE_LOG_QFATAL _MACE_LOG_FATAL
...
...
@@ -96,10 +93,10 @@ class LogMessageFatal : public LogMessage {
// Turn VLOG off when under mobile devices for considerations of binary size.
#define VLOG_IS_ON(lvl) ((lvl) <= 0)
#else
// Otherwise, Set MACE_CPP_MIN_VLOG_LEVEL environment to update minimum log level
// Otherwise, Set MACE_CPP_MIN_VLOG_LEVEL environment to update minimum log
// level
// of VLOG
#define VLOG_IS_ON(lvl) \
((lvl) <= ::mace::internal::LogMessage::MinVLogLevel())
#define VLOG_IS_ON(lvl) ((lvl) <= ::mace::internal::LogMessage::MinVLogLevel())
#endif
#define VLOG(lvl) \
...
...
@@ -113,16 +110,16 @@ class LogMessageFatal : public LogMessage {
// MACE_CHECK(fp->Write(x) == 4)
// MACE_CHECK(fp->Write(x) == 4, "Write failed")
// which are not correct for MACE_ASSERT.
#define MACE_CHECK(condition, ...) \
if (!(condition)) \
LOG(FATAL) << "Check failed: " #condition " " \
<< ::mace::internal::MakeString(__VA_ARGS__)
#define MACE_CHECK(condition, ...)
\
if (!(condition))
\
LOG(FATAL) << "Check failed: " #condition " " \
<< ::mace::internal::MakeString(__VA_ARGS__)
#ifndef NDEBUG
#define MACE_ASSERT(condition, ...) \
if (!(condition)) \
LOG(FATAL) << "Assert failed: " #condition " " \
<< ::mace::internal::MakeString(__VA_ARGS__)
#define MACE_ASSERT(condition, ...)
\
if (!(condition))
\
LOG(FATAL) << "Assert failed: " #condition " " \
<< ::mace::internal::MakeString(__VA_ARGS__)
#else
#define MACE_ASSERT(condition, ...) ((void)0)
#endif
...
...
@@ -135,9 +132,9 @@ T&& CheckNotNull(const char* file, int line, const char* exprtext, T&& t) {
return
std
::
forward
<
T
>
(
t
);
}
#define MACE_CHECK_NOTNULL(val)
\
#define MACE_CHECK_NOTNULL(val) \
::mace::internal::CheckNotNull(__FILE__, __LINE__, \
"'" #val "' Must be non NULL", (val))
"'" #val "' Must be non NULL", (val))
}
// namespace internal
}
// namespace mace
...
...
mace/core/macros.h
浏览文件 @
8ae8f575
...
...
@@ -17,5 +17,4 @@
#define MACE_PREDICT_TRUE(x) (x)
#endif
#endif //MACE_CORE_MACROS_H_
#endif // MACE_CORE_MACROS_H_
mace/core/net.cc
浏览文件 @
8ae8f575
...
...
@@ -6,22 +6,19 @@
namespace
mace
{
NetBase
::
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
Workspace
*
ws
,
NetBase
::
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
:
name_
(
net_def
->
name
())
{
}
:
name_
(
net_def
->
name
())
{}
SimpleNet
::
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
:
NetBase
(
net_def
,
ws
,
type
)
{
SimpleNet
::
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
:
NetBase
(
net_def
,
ws
,
type
)
{
VLOG
(
1
)
<<
"Constructing SimpleNet "
<<
net_def
->
name
();
for
(
int
idx
=
0
;
idx
<
net_def
->
op_size
();
++
idx
)
{
const
auto
&
operator_def
=
net_def
->
op
(
idx
);
VLOG
(
1
)
<<
"Creating operator "
<<
operator_def
.
name
()
<<
":"
<<
operator_def
.
type
();
std
::
unique_ptr
<
OperatorBase
>
op
{
nullptr
};
std
::
unique_ptr
<
OperatorBase
>
op
{
nullptr
};
OperatorDef
temp_def
(
operator_def
);
op
=
CreateOperator
(
temp_def
,
ws
,
type
);
operators_
.
emplace_back
(
std
::
move
(
op
));
...
...
@@ -40,20 +37,16 @@ bool SimpleNet::Run() {
return
true
;
}
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
{
std
::
shared_ptr
<
NetDef
>
tmp_net_def
(
new
NetDef
(
net_def
));
return
CreateNet
(
tmp_net_def
,
ws
,
type
);
}
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
{
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
{
unique_ptr
<
NetBase
>
net
(
new
SimpleNet
(
net_def
,
ws
,
type
));
return
net
;
}
}
// namespace mace
}
// namespace mace
mace/core/net.h
浏览文件 @
8ae8f575
...
...
@@ -6,35 +6,31 @@
#define MACE_CORE_NET_H_
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/proto/mace.pb.h"
namespace
mace
{
class
NetBase
{
public:
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
Workspace
*
ws
,
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
virtual
~
NetBase
()
noexcept
{}
virtual
bool
Run
()
=
0
;
const
string
&
Name
()
const
{
return
name_
;
}
const
string
&
Name
()
const
{
return
name_
;
}
protected:
string
name_
;
DISABLE_COPY_AND_ASSIGN
(
NetBase
);
DISABLE_COPY_AND_ASSIGN
(
NetBase
);
};
class
SimpleNet
:
public
NetBase
{
public:
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
bool
Run
()
override
;
...
...
@@ -42,17 +38,14 @@ class SimpleNet : public NetBase {
protected:
vector
<
unique_ptr
<
OperatorBase
>
>
operators_
;
DISABLE_COPY_AND_ASSIGN
(
SimpleNet
);
DISABLE_COPY_AND_ASSIGN
(
SimpleNet
);
};
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
}
// namespace mace
}
// namespace mace
#endif // MACE_CORE_NET_H_
#endif
// MACE_CORE_NET_H_
mace/core/operator.cc
浏览文件 @
8ae8f575
...
...
@@ -11,33 +11,22 @@ std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() {
return
&
g_device_type_registry
;
}
MACE_DEFINE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_DEFINE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
CPU
,
CPUOperatorRegistry
);
MACE_DEFINE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_DEFINE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
NEON
,
NEONOperatorRegistry
);
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
)
{
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
)
{
OperatorRegistry
*
registry
=
gDeviceTypeRegistry
()
->
at
(
type
);
return
registry
->
Create
(
operator_def
.
type
(),
operator_def
,
ws
);
}
OperatorBase
::
OperatorBase
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
OperatorBase
::
OperatorBase
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
operator_ws_
(
ws
),
operator_def_
(
std
::
make_shared
<
OperatorDef
>
(
operator_def
))
{
}
operator_def_
(
std
::
make_shared
<
OperatorDef
>
(
operator_def
))
{}
}
// namespace mace
}
// namespace mace
mace/core/operator.h
浏览文件 @
8ae8f575
...
...
@@ -5,12 +5,12 @@
#ifndef MACE_CORE_OPERATOR_H
#define MACE_CORE_OPERATOR_H
#include "mace/core/proto_utils.h"
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/tensor.h"
#include "mace/core/proto_utils.h"
#include "mace/core/registry.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/proto/mace.pb.h"
namespace
mace
{
...
...
@@ -23,22 +23,21 @@ class OperatorBase {
MACE_CHECK
(
operator_def_
,
"operator_def was null!"
);
return
ArgumentHelper
::
HasArgument
(
*
operator_def_
,
name
);
}
template
<
typename
T
>
template
<
typename
T
>
inline
T
GetSingleArgument
(
const
string
&
name
,
const
T
&
default_value
)
const
{
MACE_CHECK
(
operator_def_
,
"operator_def was null!"
);
return
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
T
>
(
*
operator_def_
,
name
,
default_value
);
}
template
<
typename
T
>
template
<
typename
T
>
inline
bool
HasSingleArgumentOfType
(
const
string
&
name
)
const
{
MACE_CHECK
(
operator_def_
,
"operator_def was null!"
);
return
ArgumentHelper
::
HasSingleArgumentOfType
<
OperatorDef
,
T
>
(
*
operator_def_
,
name
);
}
template
<
typename
T
>
template
<
typename
T
>
inline
vector
<
T
>
GetRepeatedArgument
(
const
string
&
name
,
const
vector
<
T
>
&
default_value
=
{})
const
{
const
string
&
name
,
const
vector
<
T
>
&
default_value
=
{})
const
{
MACE_CHECK
(
operator_def_
,
"operator_def was null!"
);
return
ArgumentHelper
::
GetRepeatedArgument
<
OperatorDef
,
T
>
(
*
operator_def_
,
name
,
default_value
);
...
...
@@ -49,9 +48,7 @@ class OperatorBase {
return
inputs_
[
idx
];
}
inline
Tensor
*
Output
(
int
idx
)
{
return
outputs_
[
idx
];
}
inline
Tensor
*
Output
(
int
idx
)
{
return
outputs_
[
idx
];
}
inline
int
InputSize
()
{
return
inputs_
.
size
();
}
inline
int
OutputSize
()
{
return
outputs_
.
size
();
}
...
...
@@ -70,9 +67,7 @@ class OperatorBase {
operator_def_
=
operator_def
;
}
inline
bool
has_debug_def
()
const
{
return
operator_def_
!=
nullptr
;
}
inline
bool
has_debug_def
()
const
{
return
operator_def_
!=
nullptr
;
}
protected:
Workspace
*
operator_ws_
;
...
...
@@ -80,7 +75,7 @@ class OperatorBase {
vector
<
const
Tensor
*>
inputs_
;
vector
<
Tensor
*>
outputs_
;
DISABLE_COPY_AND_ASSIGN
(
OperatorBase
);
DISABLE_COPY_AND_ASSIGN
(
OperatorBase
);
};
template
<
DeviceType
D
,
class
T
>
...
...
@@ -90,26 +85,22 @@ class Operator : public OperatorBase {
:
OperatorBase
(
operator_def
,
ws
)
{
for
(
const
string
&
input_str
:
operator_def
.
input
())
{
const
Tensor
*
tensor
=
ws
->
GetTensor
(
input_str
);
MACE_CHECK
(
tensor
!=
nullptr
,
"op "
,
operator_def
.
type
(),
": Encountered a non-existing input tensor: "
,
input_str
);
MACE_CHECK
(
tensor
!=
nullptr
,
"op "
,
operator_def
.
type
(),
": Encountered a non-existing input tensor: "
,
input_str
);
inputs_
.
push_back
(
tensor
);
}
for
(
const
string
&
output_str
:
operator_def
.
output
())
{
outputs_
.
push_back
(
MACE_CHECK_NOTNULL
(
ws
->
CreateTensor
(
output_str
,
DeviceContext
<
D
>::
allocator
(),
DataTypeToEnum
<
T
>::
v
())));
outputs_
.
push_back
(
MACE_CHECK_NOTNULL
(
ws
->
CreateTensor
(
output_str
,
DeviceContext
<
D
>::
allocator
(),
DataTypeToEnum
<
T
>::
v
())));
}
}
virtual
bool
Run
()
override
=
0
;
~
Operator
()
noexcept
override
{}
};
// OP_INPUT_TAGS and OP_OUTPUT_TAGS are optional features to name the indices of the
// OP_INPUT_TAGS and OP_OUTPUT_TAGS are optional features to name the indices of
// the
// operator's inputs and outputs, in order to avoid confusion. For example, for
// a fully convolution layer that has input, weight and bias, you can define its
// input tags as:
...
...
@@ -119,9 +110,9 @@ class Operator : public OperatorBase {
// you can now do
// auto& weight = Input(WEIGHT);
// to make it more clear.
#define OP_INPUT_TAGS(first_input, ...)
\
#define OP_INPUT_TAGS(first_input, ...) \
enum _InputTags { first_input = 0, __VA_ARGS__ }
#define OP_OUTPUT_TAGS(first_input, ...)
\
#define OP_OUTPUT_TAGS(first_input, ...) \
enum _OutputTags { first_input = 0, __VA_ARGS__ }
typedef
Registry
<
std
::
string
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*>
...
...
@@ -135,7 +126,7 @@ struct DeviceTypeRegisterer {
if
(
gDeviceTypeRegistry
()
->
count
(
type
))
{
LOG
(
ERROR
)
<<
"Device type "
<<
type
<<
"registered twice. This should not happen. Did you have "
"duplicated numbers assigned to different devices?"
;
"duplicated numbers assigned to different devices?"
;
std
::
exit
(
1
);
}
// Calling the registry function to get the actual registry pointer.
...
...
@@ -143,39 +134,31 @@ struct DeviceTypeRegisterer {
}
};
#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \
namespace { \
static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(
\
DeviceType)(type, ®istry_function);
\
#define MACE_REGISTER_DEVICE_TYPE(type, registry_function)
\
namespace {
\
static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(
DeviceType)(
\
type, ®istry_function);
\
}
MACE_DECLARE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_DECLARE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_CPU_OPERATOR(name, ...)
\
#define REGISTER_CPU_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_DECLARE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_NEON_OPERATOR(name, ...)
\
#define REGISTER_NEON_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__)
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
);
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
);
}
// namespace mace
}
// namespace mace
#endif
//
MACE_CORE_OPERATOR_H
#endif
//
MACE_CORE_OPERATOR_H
mace/core/proto_utils.cc
浏览文件 @
8ae8f575
...
...
@@ -5,9 +5,9 @@
#include "mace/core/proto_utils.h"
#include <fcntl.h>
#include <unistd.h>
#include <cerrno>
#include <fstream>
#include <unistd.h>
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream_impl.h"
...
...
@@ -82,13 +82,12 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
return
proto
->
ParseFromCodedStream
(
&
coded_stream
);
}
void
WriteProtoToBinaryFile
(
const
MessageLite
&
/*proto*/
,
const
char
*
/*filename*/
)
{
void
WriteProtoToBinaryFile
(
const
MessageLite
&
/*proto*/
,
const
char
*
/*filename*/
)
{
LOG
(
FATAL
)
<<
"Not implemented yet."
;
}
#else // MACE_USE_LITE_PROTO
#else
// MACE_USE_LITE_PROTO
// Full protocol buffer.
...
...
@@ -118,7 +117,7 @@ void WriteProtoToTextFile(const Message& proto, const char* filename) {
}
bool
ReadProtoFromBinaryFile
(
const
char
*
filename
,
MessageLite
*
proto
)
{
#if defined
(_MSC_VER) // for MSC compiler binary flag needs to be specified
#if defined(_MSC_VER) // for MSC compiler binary flag needs to be specified
int
fd
=
open
(
filename
,
O_RDONLY
|
O_BINARY
);
#else
int
fd
=
open
(
filename
,
O_RDONLY
);
...
...
@@ -138,8 +137,8 @@ bool ReadProtoFromBinaryFile(const char* filename, MessageLite* proto) {
void
WriteProtoToBinaryFile
(
const
MessageLite
&
proto
,
const
char
*
filename
)
{
int
fd
=
open
(
filename
,
O_WRONLY
|
O_CREAT
|
O_TRUNC
,
0644
);
MACE_CHECK
(
fd
!=
-
1
,
"File cannot be created: "
,
filename
,
" error number: "
,
errno
);
MACE_CHECK
(
fd
!=
-
1
,
"File cannot be created: "
,
filename
,
" error number: "
,
errno
);
std
::
unique_ptr
<
ZeroCopyOutputStream
>
raw_output
(
new
FileOutputStream
(
fd
));
std
::
unique_ptr
<
CodedOutputStream
>
coded_output
(
new
CodedOutputStream
(
raw_output
.
get
()));
...
...
@@ -151,18 +150,17 @@ void WriteProtoToBinaryFile(const MessageLite& proto, const char* filename) {
#endif // MACE_USE_LITE_PROTO
ArgumentHelper
::
ArgumentHelper
(
const
OperatorDef
&
def
)
{
for
(
auto
&
arg
:
def
.
arg
())
{
ArgumentHelper
::
ArgumentHelper
(
const
OperatorDef
&
def
)
{
for
(
auto
&
arg
:
def
.
arg
())
{
if
(
arg_map_
.
find
(
arg
.
name
())
!=
arg_map_
.
end
())
{
MACE_CHECK
(
arg
.
SerializeAsString
()
==
arg_map_
[
arg
.
name
()].
SerializeAsString
(),
"Found argument of the same name '"
,
arg
.
name
(),
"' but with different contents: "
,
ProtoDebugString
(
def
));
"Found argument of the same name '"
,
arg
.
name
(),
"' but with different contents: "
,
ProtoDebugString
(
def
));
LOG
(
WARNING
)
<<
"Duplicated argument name found in operator def: "
<<
ProtoDebugString
(
def
)
<<
", arg: "
<<
ProtoDebugString
(
arg
);
<<
ProtoDebugString
(
def
)
<<
", arg: "
<<
ProtoDebugString
(
arg
);
}
arg_map_
[
arg
.
name
()]
=
arg
;
...
...
@@ -171,10 +169,9 @@ ArgumentHelper::ArgumentHelper(const OperatorDef &def) {
ArgumentHelper
::
ArgumentHelper
(
const
NetDef
&
netdef
)
{
for
(
auto
&
arg
:
netdef
.
arg
())
{
MACE_CHECK
(
arg_map_
.
count
(
arg
.
name
())
==
0
,
"Duplicated argument name found in net def: "
,
ProtoDebugString
(
netdef
));
MACE_CHECK
(
arg_map_
.
count
(
arg
.
name
())
==
0
,
"Duplicated argument name found in net def: "
,
ProtoDebugString
(
netdef
));
arg_map_
[
arg
.
name
()]
=
arg
;
}
}
...
...
@@ -192,32 +189,24 @@ bool SupportsLosslessConversion(const InputType& value) {
}
}
#define INSTANTIATE_GET_SINGLE_ARGUMENT(
\
T, fieldname, enforce_lossless_conversion)
\
#define INSTANTIATE_GET_SINGLE_ARGUMENT(
T, fieldname,
\
enforce_lossless_conversion)
\
template <> \
T ArgumentHelper::GetSingleArgument<T>(
\
const string& name, const T& default_value) const {
\
T ArgumentHelper::GetSingleArgument<T>(
const string& name,
\
const T& default_value) const {
\
if (arg_map_.count(name) == 0) { \
VLOG(1) << "Using default parameter value " << default_value \
<< " for parameter " << name; \
return default_value; \
} \
MACE_CHECK( \
arg_map_.at(name).has_##fieldname(), \
"Argument ", \
name, \
" does not have the right field: expected field " #fieldname); \
MACE_CHECK(arg_map_.at(name).has_##fieldname(), "Argument ", name, \
" does not have the right field: expected field " #fieldname); \
auto value = arg_map_.at(name).fieldname(); \
if (enforce_lossless_conversion) { \
auto supportsConversion = \
SupportsLosslessConversion<decltype(value), T>(value); \
MACE_CHECK( \
supportsConversion, \
"Value", \
value, \
" of argument ", \
name, \
"cannot be represented correctly in a target type"); \
MACE_CHECK(supportsConversion, "Value", value, " of argument ", name, \
"cannot be represented correctly in a target type"); \
} \
return value; \
} \
...
...
@@ -242,30 +231,25 @@ INSTANTIATE_GET_SINGLE_ARGUMENT(size_t, i, true)
INSTANTIATE_GET_SINGLE_ARGUMENT
(
string
,
s
,
false
)
#undef INSTANTIATE_GET_SINGLE_ARGUMENT
#define INSTANTIATE_GET_REPEATED_ARGUMENT( \
T, fieldname, enforce_lossless_conversion) \
template <> \
vector<T> ArgumentHelper::GetRepeatedArgument<T>( \
const string& name, const std::vector<T>& default_value) const { \
if (arg_map_.count(name) == 0) { \
return default_value; \
} \
vector<T> values; \
for (const auto& v : arg_map_.at(name).fieldname()) { \
if (enforce_lossless_conversion) { \
auto supportsConversion = \
SupportsLosslessConversion<decltype(v), T>(v); \
MACE_CHECK( \
supportsConversion, \
"Value", \
v, \
" of argument ", \
name, \
"cannot be represented correctly in a target type"); \
} \
values.push_back(v); \
} \
return values; \
#define INSTANTIATE_GET_REPEATED_ARGUMENT(T, fieldname, \
enforce_lossless_conversion) \
template <> \
vector<T> ArgumentHelper::GetRepeatedArgument<T>( \
const string& name, const std::vector<T>& default_value) const { \
if (arg_map_.count(name) == 0) { \
return default_value; \
} \
vector<T> values; \
for (const auto& v : arg_map_.at(name).fieldname()) { \
if (enforce_lossless_conversion) { \
auto supportsConversion = \
SupportsLosslessConversion<decltype(v), T>(v); \
MACE_CHECK(supportsConversion, "Value", v, " of argument ", name, \
"cannot be represented correctly in a target type"); \
} \
values.push_back(v); \
} \
return values; \
}
INSTANTIATE_GET_REPEATED_ARGUMENT
(
float
,
floats
,
false
)
...
...
@@ -281,14 +265,14 @@ INSTANTIATE_GET_REPEATED_ARGUMENT(size_t, ints, true)
INSTANTIATE_GET_REPEATED_ARGUMENT
(
string
,
strings
,
false
)
#undef INSTANTIATE_GET_REPEATED_ARGUMENT
#define MACE_MAKE_SINGULAR_ARGUMENT(T, fieldname)
\
template <>
\
Argument MakeArgument(const string& name, const T& value) {
\
Argument arg;
\
arg.set_name(name);
\
arg.set_##fieldname(value);
\
return arg;
\
}
#define MACE_MAKE_SINGULAR_ARGUMENT(T, fieldname) \
template <>
\
Argument MakeArgument(const string& name, const T& value) {
\
Argument arg;
\
arg.set_name(name);
\
arg.set_##fieldname(value);
\
return arg;
\
}
MACE_MAKE_SINGULAR_ARGUMENT
(
bool
,
i
)
MACE_MAKE_SINGULAR_ARGUMENT
(
float
,
f
)
...
...
@@ -305,16 +289,16 @@ Argument MakeArgument(const string& name, const MessageLite& value) {
return
arg
;
}
#define MACE_MAKE_REPEATED_ARGUMENT(T, fieldname)
\
template <>
\
Argument MakeArgument(const string& name, const vector<T>& value) {
\
Argument arg;
\
arg.set_name(name);
\
for (const auto& v : value) {
\
arg.add_##fieldname(v);
\
}
\
return arg;
\
}
#define MACE_MAKE_REPEATED_ARGUMENT(T, fieldname) \
template <>
\
Argument MakeArgument(const string& name, const vector<T>& value) {
\
Argument arg;
\
arg.set_name(name);
\
for (const auto& v : value) {
\
arg.add_##fieldname(v);
\
}
\
return arg;
\
}
MACE_MAKE_REPEATED_ARGUMENT
(
float
,
floats
)
MACE_MAKE_REPEATED_ARGUMENT
(
int
,
ints
)
...
...
@@ -328,31 +312,24 @@ const Argument& GetArgument(const OperatorDef& def, const string& name) {
return
arg
;
}
}
MACE_CHECK
(
false
,
"Argument named "
,
name
,
"does not exist in operator "
,
ProtoDebugString
(
def
));
MACE_CHECK
(
false
,
"Argument named "
,
name
,
"does not exist in operator "
,
ProtoDebugString
(
def
));
}
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
def_value
)
{
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
def_value
)
{
for
(
const
Argument
&
arg
:
def
.
arg
())
{
if
(
arg
.
name
()
==
name
)
{
MACE_CHECK
(
arg
.
has_i
(),
"Can't parse argument as bool: "
,
ProtoDebugString
(
arg
));
MACE_CHECK
(
arg
.
has_i
(),
"Can't parse argument as bool: "
,
ProtoDebugString
(
arg
));
return
arg
.
i
();
}
}
return
def_value
;
}
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
OperatorDef
*
def
)
{
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
OperatorDef
*
def
)
{
for
(
int
i
=
0
;
i
<
def
->
arg_size
();
++
i
)
{
if
(
def
->
arg
(
i
).
name
()
==
name
)
{
return
def
->
mutable_arg
(
i
);
...
...
mace/core/proto_utils.h
浏览文件 @
8ae8f575
...
...
@@ -12,15 +12,14 @@
#include "google/protobuf/message.h"
#endif // !MACE_USE_LITE_PROTO
#include "mace/proto/mace.pb.h"
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
namespace
mace
{
using
std
::
string
;
using
::
google
::
protobuf
::
MessageLite
;
// Common interfaces that reads file contents into a string.
bool
ReadStringFromFile
(
const
char
*
filename
,
string
*
str
);
bool
WriteStringToFile
(
const
string
&
str
,
const
char
*
filename
);
...
...
@@ -46,22 +45,20 @@ inline string ProtoDebugString(const MessageLite& proto) {
// Text format MessageLite wrappers: these functions do nothing but just
// allowing things to compile. It will produce a runtime error if you are using
// MessageLite but still want text support.
inline
bool
ReadProtoFromTextFile
(
const
char
*
/*filename*/
,
MessageLite
*
/*proto*/
)
{
inline
bool
ReadProtoFromTextFile
(
const
char
*
/*filename*/
,
MessageLite
*
/*proto*/
)
{
LOG
(
FATAL
)
<<
"If you are running lite version, you should not be "
<<
"calling any text-format protobuffers."
;
<<
"calling any text-format protobuffers."
;
return
false
;
// Just to suppress compiler warning.
}
inline
bool
ReadProtoFromTextFile
(
const
string
filename
,
MessageLite
*
proto
)
{
return
ReadProtoFromTextFile
(
filename
.
c_str
(),
proto
);
}
inline
void
WriteProtoToTextFile
(
const
MessageLite
&
/*proto*/
,
const
char
*
/*filename*/
)
{
inline
void
WriteProtoToTextFile
(
const
MessageLite
&
/*proto*/
,
const
char
*
/*filename*/
)
{
LOG
(
FATAL
)
<<
"If you are running lite version, you should not be "
<<
"calling any text-format protobuffers."
;
<<
"calling any text-format protobuffers."
;
}
inline
void
WriteProtoToTextFile
(
const
MessageLite
&
proto
,
const
string
&
filename
)
{
...
...
@@ -107,16 +104,13 @@ inline bool ReadProtoFromFile(const string& filename, Message* proto) {
#endif // MACE_USE_LITE_PROTO
template
<
class
IterableInputs
=
std
::
initializer_list
<
string
>,
class
IterableOutputs
=
std
::
initializer_list
<
string
>
,
class
IterableArgs
=
std
::
initializer_list
<
Argument
>>
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
const
IterableInputs
&
inputs
,
const
IterableOutputs
&
outputs
,
const
IterableArgs
&
args
)
{
template
<
class
IterableInputs
=
std
::
initializer_list
<
string
>,
class
IterableOutputs
=
std
::
initializer_list
<
string
>
,
class
IterableArgs
=
std
::
initializer_list
<
Argument
>>
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
const
IterableInputs
&
inputs
,
const
IterableOutputs
&
outputs
,
const
IterableArgs
&
args
)
{
OperatorDef
def
;
def
.
set_type
(
type
);
def
.
set_name
(
name
);
...
...
@@ -134,20 +128,13 @@ OperatorDef CreateOperatorDef(
// A simplified version compared to the full CreateOperator, if you do not need
// to specify args.
template
<
class
IterableInputs
=
std
::
initializer_list
<
string
>,
class
IterableOutputs
=
std
::
initializer_list
<
string
>>
inline
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
const
IterableInputs
&
inputs
,
const
IterableOutputs
&
outputs
)
{
return
CreateOperatorDef
(
type
,
name
,
inputs
,
outputs
,
std
::
vector
<
Argument
>
());
template
<
class
IterableInputs
=
std
::
initializer_list
<
string
>,
class
IterableOutputs
=
std
::
initializer_list
<
string
>>
inline
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
const
IterableInputs
&
inputs
,
const
IterableOutputs
&
outputs
)
{
return
CreateOperatorDef
(
type
,
name
,
inputs
,
outputs
,
std
::
vector
<
Argument
>
());
}
/**
...
...
@@ -166,10 +153,8 @@ class ArgumentHelper {
}
template
<
typename
Def
,
typename
T
>
static
T
GetSingleArgument
(
const
Def
&
def
,
const
string
&
name
,
const
T
&
default_value
)
{
static
T
GetSingleArgument
(
const
Def
&
def
,
const
string
&
name
,
const
T
&
default_value
)
{
return
ArgumentHelper
(
def
).
GetSingleArgument
<
T
>
(
name
,
default_value
);
}
...
...
@@ -180,8 +165,7 @@ class ArgumentHelper {
template
<
typename
Def
,
typename
T
>
static
vector
<
T
>
GetRepeatedArgument
(
const
Def
&
def
,
const
string
&
name
,
const
Def
&
def
,
const
string
&
name
,
const
std
::
vector
<
T
>&
default_value
=
std
::
vector
<
T
>
())
{
return
ArgumentHelper
(
def
).
GetRepeatedArgument
<
T
>
(
name
,
default_value
);
}
...
...
@@ -192,9 +176,8 @@ class ArgumentHelper {
}
template
<
typename
Def
,
typename
MessageType
>
static
vector
<
MessageType
>
GetRepeatedMessageArgument
(
const
Def
&
def
,
const
string
&
name
)
{
static
vector
<
MessageType
>
GetRepeatedMessageArgument
(
const
Def
&
def
,
const
string
&
name
)
{
return
ArgumentHelper
(
def
).
GetRepeatedMessageArgument
<
MessageType
>
(
name
);
}
...
...
@@ -216,9 +199,8 @@ class ArgumentHelper {
MACE_CHECK
(
arg_map_
.
count
(
name
),
"Cannot find parameter named "
+
name
);
MessageType
message
;
if
(
arg_map_
.
at
(
name
).
has_s
())
{
MACE_CHECK
(
message
.
ParseFromString
(
arg_map_
.
at
(
name
).
s
()),
"Faild to parse content from the string"
);
MACE_CHECK
(
message
.
ParseFromString
(
arg_map_
.
at
(
name
).
s
()),
"Faild to parse content from the string"
);
}
else
{
VLOG
(
1
)
<<
"Return empty message for parameter "
<<
name
;
}
...
...
@@ -230,9 +212,8 @@ class ArgumentHelper {
MACE_CHECK
(
arg_map_
.
count
(
name
),
"Cannot find parameter named "
+
name
);
vector
<
MessageType
>
messages
(
arg_map_
.
at
(
name
).
strings_size
());
for
(
int
i
=
0
;
i
<
messages
.
size
();
++
i
)
{
MACE_CHECK
(
messages
[
i
].
ParseFromString
(
arg_map_
.
at
(
name
).
strings
(
i
)),
"Faild to parse content from the string"
);
MACE_CHECK
(
messages
[
i
].
ParseFromString
(
arg_map_
.
at
(
name
).
strings
(
i
)),
"Faild to parse content from the string"
);
}
return
messages
;
}
...
...
@@ -242,15 +223,11 @@ class ArgumentHelper {
};
const
Argument
&
GetArgument
(
const
OperatorDef
&
def
,
const
string
&
name
);
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
def_value
=
false
);
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
OperatorDef
*
def
);
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
def_value
=
false
);
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
OperatorDef
*
def
);
template
<
typename
T
>
Argument
MakeArgument
(
const
string
&
name
,
const
T
&
value
);
...
...
mace/core/registry.h
浏览文件 @
8ae8f575
...
...
@@ -12,7 +12,7 @@ namespace mace {
template
<
class
SrcType
,
class
ObjectType
,
class
...
Args
>
class
Registry
{
public:
typedef
std
::
function
<
std
::
unique_ptr
<
ObjectType
>
(
Args
...)
>
Creator
;
typedef
std
::
function
<
std
::
unique_ptr
<
ObjectType
>
(
Args
...)
>
Creator
;
Registry
()
:
registry_
()
{}
...
...
@@ -24,7 +24,7 @@ class Registry {
inline
bool
Has
(
const
SrcType
&
key
)
{
return
registry_
.
count
(
key
)
!=
0
;
}
unique_ptr
<
ObjectType
>
Create
(
const
SrcType
&
key
,
Args
...
args
)
{
unique_ptr
<
ObjectType
>
Create
(
const
SrcType
&
key
,
Args
...
args
)
{
if
(
registry_
.
count
(
key
)
==
0
)
{
VLOG
(
2
)
<<
"Key not registered: "
<<
key
;
return
nullptr
;
...
...
@@ -60,7 +60,7 @@ class Registerer {
}
template
<
class
DerivedType
>
static
unique_ptr
<
ObjectType
>
DefaultCreator
(
Args
...
args
)
{
static
unique_ptr
<
ObjectType
>
DefaultCreator
(
Args
...
args
)
{
return
std
::
unique_ptr
<
ObjectType
>
(
new
DerivedType
(
args
...));
}
};
...
...
@@ -74,36 +74,35 @@ class Registerer {
#endif
#define MACE_DECLARE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \
Registry<SrcType, ObjectType, ##__VA_ARGS__>* RegistryName();
\
typedef Registerer<SrcType, ObjectType, ##__VA_ARGS__>
\
Registry<SrcType, ObjectType, ##__VA_ARGS__>* RegistryName(); \
typedef Registerer<SrcType, ObjectType, ##__VA_ARGS__> \
Registerer##RegistryName;
#define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \
Registry<SrcType, ObjectType, ##__VA_ARGS__>* RegistryName() {
\
static Registry<SrcType, ObjectType, ##__VA_ARGS__>* registry =
\
new Registry<SrcType, ObjectType, ##__VA_ARGS__>();
\
return registry;
\
Registry<SrcType, ObjectType, ##__VA_ARGS__>* RegistryName() { \
static Registry<SrcType, ObjectType, ##__VA_ARGS__>* registry = \
new Registry<SrcType, ObjectType, ##__VA_ARGS__>(); \
return registry; \
}
#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DECLARE_TYPED_REGISTRY(
\
RegistryName, std::string, ObjectType,
##__VA_ARGS__)
#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...)
\
MACE_DECLARE_TYPED_REGISTRY(
RegistryName, std::string, ObjectType,
\
##__VA_ARGS__)
#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DEFINE_TYPED_REGISTRY(
\
RegistryName, std::string, ObjectType,
##__VA_ARGS__)
#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...)
\
MACE_DEFINE_TYPED_REGISTRY(
RegistryName, std::string, ObjectType,
\
##__VA_ARGS__)
#define MACE_REGISTER_TYPED_CREATOR(RegistryName, key, ...) \
namespace {
\
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, RegistryName(), __VA_ARGS__);
#define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \
namespace {
\
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, \
RegistryName(), \
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \
key, RegistryName(), \
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \
}
#define MACE_REGISTER_CREATOR(RegistryName, key, ...) \
...
...
@@ -112,6 +111,6 @@ class Registerer {
#define MACE_REGISTER_CLASS(RegistryName, key, ...) \
MACE_REGISTER_TYPED_CLASS(RegistryName, #key, __VA_ARGS__)
}
// namespace mace
}
// namespace mace
#endif // MACE_CORE_REGISTRY_H_
#endif
// MACE_CORE_REGISTRY_H_
mace/core/serializer.cc
浏览文件 @
8ae8f575
...
...
@@ -4,19 +4,18 @@
#include "mace/core/serializer.h"
namespace
mace
{
unique_ptr
<
TensorProto
>
Serializer
::
Serialize
(
const
Tensor
&
tensor
,
const
string
&
name
)
{
const
string
&
name
)
{
MACE_NOT_IMPLEMENTED
;
return
nullptr
;
}
unique_ptr
<
Tensor
>
Serializer
::
Deserialize
(
const
TensorProto
&
proto
,
DeviceType
type
)
{
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
GetDeviceAllocator
(
type
),
proto
.
data_type
()));
unique_ptr
<
Tensor
>
tensor
(
new
Tensor
(
GetDeviceAllocator
(
type
),
proto
.
data_type
()));
vector
<
index_t
>
dims
;
for
(
const
index_t
d
:
proto
.
dims
())
{
dims
.
push_back
(
d
);
...
...
@@ -25,8 +24,7 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
switch
(
proto
.
data_type
())
{
case
DT_FLOAT
:
tensor
->
Copy
<
float
>
(
proto
.
float_data
().
data
(),
proto
.
float_data
().
size
());
tensor
->
Copy
<
float
>
(
proto
.
float_data
().
data
(),
proto
.
float_data
().
size
());
break
;
case
DT_DOUBLE
:
tensor
->
Copy
<
double
>
(
proto
.
double_data
().
data
(),
...
...
@@ -34,39 +32,38 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
break
;
case
DT_INT32
:
tensor
->
template
Copy
<
int32_t
>(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
case
DT_UINT8
:
tensor
->
CopyWithCast
<
int32_t
,
uint8_t
>
(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
case
DT_INT16
:
tensor
->
CopyWithCast
<
int32_t
,
int16_t
>
(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
case
DT_INT8
:
tensor
->
CopyWithCast
<
int32_t
,
int8_t
>
(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
case
DT_INT64
:
tensor
->
Copy
<
int64_t
>
(
proto
.
int64_data
().
data
(),
proto
.
int64_data
().
size
());
proto
.
int64_data
().
size
());
break
;
case
DT_UINT16
:
tensor
->
CopyWithCast
<
int32_t
,
uint16_t
>
(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
case
DT_BOOL
:
tensor
->
CopyWithCast
<
int32_t
,
bool
>
(
proto
.
int32_data
().
data
(),
proto
.
int32_data
().
size
());
proto
.
int32_data
().
size
());
break
;
case
DT_STRING
:
{
string
*
content
=
tensor
->
mutable_data
<
string
>
();
for
(
int
i
=
0
;
i
<
proto
.
string_data
().
size
();
++
i
)
{
content
[
i
]
=
proto
.
string_data
(
i
);
}
}
break
;
}
break
;
default:
MACE_NOT_IMPLEMENTED
;
break
;
...
...
@@ -75,4 +72,4 @@ unique_ptr<Tensor> Serializer::Deserialize(const TensorProto &proto,
return
tensor
;
}
}
// namespace mace
\ No newline at end of file
}
// namespace mace
\ No newline at end of file
mace/core/serializer.h
浏览文件 @
8ae8f575
...
...
@@ -5,9 +5,9 @@
#ifndef MACE_CORE_SERIALIZER_H_
#define MACE_CORE_SERIALIZER_H_
#include "mace/proto/mace.pb.h"
#include "mace/core/common.h"
#include "mace/core/tensor.h"
#include "mace/proto/mace.pb.h"
namespace
mace
{
...
...
@@ -20,9 +20,9 @@ class Serializer {
unique_ptr
<
Tensor
>
Deserialize
(
const
TensorProto
&
proto
,
DeviceType
type
);
DISABLE_COPY_AND_ASSIGN
(
Serializer
);
DISABLE_COPY_AND_ASSIGN
(
Serializer
);
};
}
// namespace mace
}
// namespace mace
#endif // MACE_CORE_SERIALIZER_H_
#endif
// MACE_CORE_SERIALIZER_H_
mace/core/tensor.h
浏览文件 @
8ae8f575
...
...
@@ -5,11 +5,11 @@
#ifndef MACE_CORE_TENSOR_H_
#define MACE_CORE_TENSOR_H_
#include "mace/core/common.h"
#include "mace/proto/mace.pb.h"
#include "mace/core/allocator.h"
#include "mace/core/
types
.h"
#include "mace/core/
common
.h"
#include "mace/core/logging.h"
#include "mace/core/types.h"
#include "mace/proto/mace.pb.h"
namespace
mace
{
...
...
@@ -25,13 +25,13 @@ namespace mace {
switch (TYPE_ENUM) { \
CASE(float, SINGLE_ARG(STMTS)) \
CASE(double, SINGLE_ARG(STMTS)) \
CASE(int32_t, SINGLE_ARG(STMTS))
\
CASE(uint8_t, SINGLE_ARG(STMTS))
\
CASE(uint16_t, SINGLE_ARG(STMTS))
\
CASE(int16_t, SINGLE_ARG(STMTS))
\
CASE(int8_t, SINGLE_ARG(STMTS))
\
CASE(int32_t, SINGLE_ARG(STMTS)) \
CASE(uint8_t, SINGLE_ARG(STMTS)) \
CASE(uint16_t, SINGLE_ARG(STMTS)) \
CASE(int16_t, SINGLE_ARG(STMTS)) \
CASE(int8_t, SINGLE_ARG(STMTS)) \
CASE(string, SINGLE_ARG(STMTS)) \
CASE(int64_t, SINGLE_ARG(STMTS))
\
CASE(int64_t, SINGLE_ARG(STMTS)) \
CASE(bool, SINGLE_ARG(STMTS)) \
case DT_INVALID: \
INVALID; \
...
...
@@ -41,20 +41,17 @@ namespace mace {
break; \
}
#define CASES(TYPE_ENUM, STMTS) \
CASES_WITH_DEFAULT(TYPE_ENUM, STMTS, LOG(FATAL) << "Type not set"; \
, LOG(FATAL) << "Unexpected type: " << TYPE_ENUM;)
class
Tensor
{
public:
Tensor
()
:
alloc_
(
cpu_allocator
()),
size_
(
0
),
dtype_
(
DT_FLOAT
),
data_
(
nullptr
)
{};
:
alloc_
(
cpu_allocator
()),
size_
(
0
),
dtype_
(
DT_FLOAT
),
data_
(
nullptr
){};
Tensor
(
Allocator
*
a
,
DataType
type
)
:
alloc_
(
a
),
size_
(
0
),
dtype_
(
type
),
data_
(
nullptr
)
{};
:
alloc_
(
a
),
size_
(
0
),
dtype_
(
type
),
data_
(
nullptr
){};
~
Tensor
()
{
if
(
alloc_
&&
data_
.
get
())
{
...
...
@@ -92,9 +89,8 @@ class Tensor {
if
(
data_
.
get
()
||
size_
==
0
)
{
return
data_
.
get
();
}
else
{
CASES
(
dtype_
,
data_
.
reset
(
alloc_
->
New
(
size_
*
sizeof
(
T
)),
[
this
](
void
*
ptr
)
{
alloc_
->
Delete
(
ptr
);
}));
CASES
(
dtype_
,
data_
.
reset
(
alloc_
->
New
(
size_
*
sizeof
(
T
)),
[
this
](
void
*
ptr
)
{
alloc_
->
Delete
(
ptr
);
}));
return
data_
.
get
();
}
}
...
...
@@ -116,13 +112,9 @@ class Tensor {
}
}
inline
void
ResizeLike
(
const
Tensor
&
other
)
{
Resize
(
other
.
shape
());
}
inline
void
ResizeLike
(
const
Tensor
&
other
)
{
Resize
(
other
.
shape
());
}
inline
void
ResizeLike
(
const
Tensor
*
other
)
{
Resize
(
other
->
shape
());
}
inline
void
ResizeLike
(
const
Tensor
*
other
)
{
Resize
(
other
->
shape
());
}
template
<
typename
T
>
inline
void
Copy
(
const
T
*
src
,
index_t
size
)
{
...
...
@@ -132,7 +124,8 @@ class Tensor {
template
<
typename
SrcType
,
typename
DstType
>
inline
void
CopyWithCast
(
const
SrcType
*
src
,
size_t
size
)
{
MACE_CHECK
(
static_cast
<
index_t
>
(
size
)
==
size_
,
"copy src and dst with different size."
);
MACE_CHECK
(
static_cast
<
index_t
>
(
size
)
==
size_
,
"copy src and dst with different size."
);
unique_ptr
<
DstType
[]
>
buffer
(
new
DstType
[
size
]);
for
(
size_t
i
=
0
;
i
<
size
;
++
i
)
{
buffer
[
i
]
=
static_cast
<
DstType
>
(
src
[
i
]);
...
...
@@ -146,10 +139,11 @@ class Tensor {
inline
void
DebugPrint
()
{
std
::
stringstream
os
;
for
(
int
i
:
shape_
)
{
for
(
int
i
:
shape_
)
{
os
<<
i
<<
", "
;
}
LOG
(
INFO
)
<<
"Tensor shape: "
<<
os
.
str
()
<<
" type: "
<<
DataType_Name
(
dtype_
);
LOG
(
INFO
)
<<
"Tensor shape: "
<<
os
.
str
()
<<
" type: "
<<
DataType_Name
(
dtype_
);
os
.
str
(
""
);
os
.
clear
();
...
...
@@ -175,7 +169,8 @@ class Tensor {
private:
inline
int64_t
NumElements
()
const
{
return
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
());
return
std
::
accumulate
(
shape_
.
begin
(),
shape_
.
end
(),
1
,
std
::
multiplies
<
int64_t
>
());
}
Allocator
*
alloc_
;
...
...
@@ -184,9 +179,9 @@ class Tensor {
std
::
shared_ptr
<
void
>
data_
;
vector
<
index_t
>
shape_
;
DISABLE_COPY_AND_ASSIGN
(
Tensor
);
DISABLE_COPY_AND_ASSIGN
(
Tensor
);
};
}
// namespace tensor
}
// namespace tensor
#endif
//
MACE_CORE_TENSOR_H_
#endif
//
MACE_CORE_TENSOR_H_
mace/core/testing/test_benchmark.cc
浏览文件 @
8ae8f575
...
...
@@ -51,11 +51,8 @@ Benchmark* Benchmark::ArgPair(int x, int y) {
return
this
;
}
// Run all benchmarks
void
Benchmark
::
Run
()
{
Run
(
"all"
);
}
void
Benchmark
::
Run
()
{
Run
(
"all"
);
}
void
Benchmark
::
Run
(
const
char
*
pattern
)
{
if
(
!
all_benchmarks
)
return
;
...
...
@@ -113,8 +110,8 @@ void Benchmark::Run(const char* pattern) {
(
items_processed
*
1e-6
)
/
seconds
);
full_label
+=
buf
;
}
printf
(
"%-*s %10.0f %10d
\t
%s
\n
"
,
width
,
name
,
seconds
*
1e9
/
iters
,
iters
,
full_label
.
c_str
());
printf
(
"%-*s %10.0f %10d
\t
%s
\n
"
,
width
,
name
,
seconds
*
1e9
/
iters
,
iters
,
full_label
.
c_str
());
}
}
}
...
...
mace/core/testing/test_benchmark.h
浏览文件 @
8ae8f575
...
...
@@ -12,9 +12,9 @@
#include "mace/core/types.h"
#define MACE_BENCHMARK_CONCAT(a, b, c) a##b##c
#define BENCHMARK(n)
\
static ::mace::testing::Benchmark* MACE_BENCHMARK_CONCAT(
__benchmark_, n, __LINE__) =
\
(new ::mace::testing::Benchmark(#n, (n)))
#define BENCHMARK(n) \
static ::mace::testing::Benchmark* MACE_BENCHMARK_CONCAT( \
__benchmark_, n, __LINE__) =
(new ::mace::testing::Benchmark(#n, (n)))
namespace
mace
{
namespace
testing
{
...
...
mace/core/testing/test_benchmark_main.cc
浏览文件 @
8ae8f575
...
...
@@ -17,4 +17,3 @@ int main(int argc, char** argv) {
}
return
0
;
}
mace/core/types.h
浏览文件 @
8ae8f575
...
...
@@ -18,26 +18,25 @@ struct DataTypeToEnum {
static_assert
(
IsValidDataType
<
T
>::
value
,
"Specified Data Type not supported"
);
};
// EnumToDataType<VALUE>::Type is the type for DataType constant VALUE, e.g.
// EnumToDataType<DT_FLOAT>::Type is float.
template
<
DataType
VALUE
>
struct
EnumToDataType
{};
// Specializations below
// Template specialization for both DataTypeToEnum and EnumToDataType.
#define MATCH_TYPE_AND_ENUM(TYPE, ENUM)
\
template <>
\
struct DataTypeToEnum<TYPE> {
\
static DataType v() { return ENUM; }
\
static constexpr DataType value = ENUM;
\
};
\
template <>
\
struct IsValidDataType<TYPE> {
\
static constexpr bool value = true;
\
};
\
template <>
\
struct EnumToDataType<ENUM> {
\
typedef TYPE Type;
\
#define MATCH_TYPE_AND_ENUM(TYPE, ENUM) \
template <> \
struct DataTypeToEnum<TYPE> { \
static DataType v() { return ENUM; } \
static constexpr DataType value = ENUM; \
}; \
template <> \
struct IsValidDataType<TYPE> { \
static constexpr bool value = true; \
}; \
template <> \
struct EnumToDataType<ENUM> { \
typedef TYPE Type; \
}
MATCH_TYPE_AND_ENUM
(
float
,
DT_FLOAT
);
...
...
@@ -53,6 +52,6 @@ MATCH_TYPE_AND_ENUM(bool, DT_BOOL);
static
const
int32_t
kint32_tmax
=
((
int32_t
)
0x7FFFFFFF
);
}
// namespace mace
}
// namespace mace
#endif // MACE_CORE_TYPES_H_
#endif
// MACE_CORE_TYPES_H_
mace/core/workspace.cc
浏览文件 @
8ae8f575
...
...
@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/common.h"
#include "mace/core/workspace.h"
#include "mace/core/common.h"
#include "mace/core/serializer.h"
namespace
mace
{
...
...
@@ -16,8 +16,7 @@ vector<string> Workspace::Tensors() const {
return
names
;
}
Tensor
*
Workspace
::
CreateTensor
(
const
string
&
name
,
Allocator
*
alloc
,
Tensor
*
Workspace
::
CreateTensor
(
const
string
&
name
,
Allocator
*
alloc
,
DataType
type
)
{
if
(
HasTensor
(
name
))
{
VLOG
(
1
)
<<
"Tensor "
<<
name
<<
" already exists. Skipping."
;
...
...
@@ -46,14 +45,16 @@ const Tensor* Workspace::GetTensor(const string& name) const {
}
Tensor
*
Workspace
::
GetTensor
(
const
string
&
name
)
{
return
const_cast
<
Tensor
*>
(
static_cast
<
const
Workspace
*>
(
this
)
->
GetTensor
(
name
));
return
const_cast
<
Tensor
*>
(
static_cast
<
const
Workspace
*>
(
this
)
->
GetTensor
(
name
));
}
void
Workspace
::
LoadModelTensor
(
const
NetDef
&
net_def
,
DeviceType
type
)
{
void
Workspace
::
LoadModelTensor
(
const
NetDef
&
net_def
,
DeviceType
type
)
{
Serializer
serializer
;
for
(
auto
&
tensor_proto
:
net_def
.
tensors
())
{
tensor_map_
[
tensor_proto
.
name
()]
=
serializer
.
Deserialize
(
tensor_proto
,
type
);
for
(
auto
&
tensor_proto
:
net_def
.
tensors
())
{
tensor_map_
[
tensor_proto
.
name
()]
=
serializer
.
Deserialize
(
tensor_proto
,
type
);
}
}
}
// namespace mace
\ No newline at end of file
}
// namespace mace
\ No newline at end of file
mace/core/workspace.h
浏览文件 @
8ae8f575
...
...
@@ -5,7 +5,6 @@
#ifndef MACE_CORE_WORKSPACE_H_
#define MACE_CORE_WORKSPACE_H_
#include "mace/core/common.h"
#include "mace/core/tensor.h"
#include "mace/proto/mace.pb.h"
...
...
@@ -37,10 +36,9 @@ class Workspace {
private:
TensorMap
tensor_map_
;
DISABLE_COPY_AND_ASSIGN
(
Workspace
);
DISABLE_COPY_AND_ASSIGN
(
Workspace
);
};
}
// namespace mace
}
// namespace mace
#endif // MACE_CORE_WORKSPACE_H_
#endif
// MACE_CORE_WORKSPACE_H_
mace/examples/benchmark_example.cc
浏览文件 @
8ae8f575
...
...
@@ -14,7 +14,7 @@ static void foo(int iters) {
float
*
out
=
new
float
[
N
];
while
(
iters
--
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
out
[
i
]
=
inp
[
i
]
*
2.0
;
}
}
...
...
@@ -24,7 +24,6 @@ static void foo(int iters) {
BENCHMARK
(
foo
);
static
void
bar
(
int
iters
,
int
n
)
{
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
n
;
mace
::
testing
::
ItemsProcessed
(
tot
);
...
...
@@ -34,7 +33,7 @@ static void bar(int iters, int n) {
float
*
out
=
new
float
[
n
];
while
(
iters
--
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
for
(
int
i
=
0
;
i
<
n
;
i
++
)
{
out
[
i
]
=
inp
[
i
]
*
2.0
;
}
}
...
...
mace/kernels/addn.h
浏览文件 @
8ae8f575
...
...
@@ -10,10 +10,9 @@
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
AddNFunctor
{
void
operator
()(
const
vector
<
const
T
*>&
inputs
,
T
*
output
,
index_t
size
)
{
void
operator
()(
const
vector
<
const
T
*>&
inputs
,
T
*
output
,
index_t
size
)
{
memset
(
output
,
0
,
size
*
sizeof
(
T
));
int
n
=
inputs
.
size
();
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
...
...
@@ -25,11 +24,10 @@ struct AddNFunctor {
};
template
<
>
void
AddNFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
vector
<
const
float
*>&
inputs
,
float
*
output
,
index_t
size
);
void
AddNFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
vector
<
const
float
*>&
inputs
,
float
*
output
,
index_t
size
);
}
// namespace kernels
}
// namespace mace
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_ADDN_H_
\ No newline at end of file
#endif // MACE_KERNELS_ADDN_H_
\ No newline at end of file
mace/kernels/batch_norm.h
浏览文件 @
8ae8f575
...
...
@@ -11,26 +11,21 @@
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
BatchNormFunctor
{
float
variance_epsilon_
;
BatchNormFunctor
(
const
float
variance_epsilon
)
:
variance_epsilon_
(
variance_epsilon
)
{}
:
variance_epsilon_
(
variance_epsilon
)
{}
void
operator
()(
const
T
*
input
,
const
T
*
scale
,
const
T
*
offset
,
const
T
*
mean
,
const
T
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
T
*
output
)
{
void
operator
()(
const
T
*
input
,
const
T
*
scale
,
const
T
*
offset
,
const
T
*
mean
,
const
T
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
T
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon} }
// ( \offset - \frac { \scale * mean } {
// \sqrt{var+\variance_epsilon} }
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
...
...
@@ -53,18 +48,12 @@ struct BatchNormFunctor {
};
template
<
>
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
);
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
);
}
// namepsace kernels
}
// namespace mace
}
// namepsace kernels
}
// namespace mace
#endif // MACE_KERNELS_BATCH_NORM_H_
#endif // MACE_KERNELS_BATCH_NORM_H_
mace/kernels/conv_2d.h
浏览文件 @
8ae8f575
...
...
@@ -10,114 +10,103 @@
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
Conv2dFunctor
{
public:
Conv2dFunctor
(
const
int
*
strides
,
const
int
*
paddings
,
const
int
*
dilations
)
:
strides_
(
strides
),
paddings_
(
paddings
),
dilations_
(
dilations
)
{}
void
operator
()(
const
T
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
T
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
T
*
bias
,
// c_out
T
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
MACE_CHECK_NOTNULL
(
output
);
index_t
batch
=
output_shape
[
0
];
index_t
channels
=
output_shape
[
1
];
index_t
height
=
output_shape
[
2
];
index_t
width
=
output_shape
[
3
];
index_t
input_batch
=
input_shape
[
0
];
index_t
input_channels
=
input_shape
[
1
];
index_t
input_height
=
input_shape
[
2
];
index_t
input_width
=
input_shape
[
3
];
index_t
kernel_h
=
filter_shape
[
2
];
index_t
kernel_w
=
filter_shape
[
3
];
int
stride_h
=
strides_
[
0
];
int
stride_w
=
strides_
[
1
];
int
dilation_h
=
dilations_
[
0
];
int
dilation_w
=
dilations_
[
1
];
MACE_CHECK
(
batch
==
input_batch
,
"Input/Output batch size mismatch"
);
// The left-upper most offset of the padded input
int
padded_h_start
=
0
-
paddings_
[
0
]
/
2
;
int
padded_w_start
=
0
-
paddings_
[
1
]
/
2
;
index_t
padded_h_stop
=
input_height
+
paddings_
[
0
]
-
paddings_
[
0
]
/
2
;
index_t
padded_w_stop
=
input_width
+
paddings_
[
1
]
-
paddings_
[
1
]
/
2
;
index_t
kernel_size
=
input_channels
*
kernel_h
*
kernel_w
;
public:
Conv2dFunctor
(
const
int
*
strides
,
const
int
*
paddings
,
const
int
*
dilations
)
:
strides_
(
strides
),
paddings_
(
paddings
),
dilations_
(
dilations
)
{}
void
operator
()(
const
T
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
T
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
T
*
bias
,
// c_out
T
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
MACE_CHECK_NOTNULL
(
output
);
index_t
batch
=
output_shape
[
0
];
index_t
channels
=
output_shape
[
1
];
index_t
height
=
output_shape
[
2
];
index_t
width
=
output_shape
[
3
];
index_t
input_batch
=
input_shape
[
0
];
index_t
input_channels
=
input_shape
[
1
];
index_t
input_height
=
input_shape
[
2
];
index_t
input_width
=
input_shape
[
3
];
index_t
kernel_h
=
filter_shape
[
2
];
index_t
kernel_w
=
filter_shape
[
3
];
int
stride_h
=
strides_
[
0
];
int
stride_w
=
strides_
[
1
];
int
dilation_h
=
dilations_
[
0
];
int
dilation_w
=
dilations_
[
1
];
MACE_CHECK
(
batch
==
input_batch
,
"Input/Output batch size mismatch"
);
// The left-upper most offset of the padded input
int
padded_h_start
=
0
-
paddings_
[
0
]
/
2
;
int
padded_w_start
=
0
-
paddings_
[
1
]
/
2
;
index_t
padded_h_stop
=
input_height
+
paddings_
[
0
]
-
paddings_
[
0
]
/
2
;
index_t
padded_w_stop
=
input_width
+
paddings_
[
1
]
-
paddings_
[
1
]
/
2
;
index_t
kernel_size
=
input_channels
*
kernel_h
*
kernel_w
;
#pragma omp parallel for collapse(2)
for
(
int
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
index_t
offset
=
n
*
channels
*
height
*
width
+
c
*
height
*
width
+
h
*
width
+
w
;
T
sum
=
0
;
const
T
*
filter_ptr
=
filter
+
c
*
kernel_size
;
for
(
int
inc
=
0
;
inc
<
input_channels
;
++
inc
)
{
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
int
inh
=
padded_h_start
+
h
*
stride_h
+
dilation_h
*
kh
;
int
inw
=
padded_w_start
+
w
*
stride_w
+
dilation_w
*
kw
;
if
(
inh
<
0
||
inh
>=
input_height
||
inw
<
0
||
inw
>=
input_width
)
{
MACE_CHECK
(
inh
>=
padded_h_start
&&
inh
<
padded_h_stop
&&
inw
>=
padded_w_start
&&
inw
<
padded_w_stop
,
"Out of range read from input: "
,
inh
,
", "
,
inw
);
// else padding with 0:
// sum += 0;
}
else
{
index_t
input_offset
=
for
(
int
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
index_t
offset
=
n
*
channels
*
height
*
width
+
c
*
height
*
width
+
h
*
width
+
w
;
T
sum
=
0
;
const
T
*
filter_ptr
=
filter
+
c
*
kernel_size
;
for
(
int
inc
=
0
;
inc
<
input_channels
;
++
inc
)
{
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
int
inh
=
padded_h_start
+
h
*
stride_h
+
dilation_h
*
kh
;
int
inw
=
padded_w_start
+
w
*
stride_w
+
dilation_w
*
kw
;
if
(
inh
<
0
||
inh
>=
input_height
||
inw
<
0
||
inw
>=
input_width
)
{
MACE_CHECK
(
inh
>=
padded_h_start
&&
inh
<
padded_h_stop
&&
inw
>=
padded_w_start
&&
inw
<
padded_w_stop
,
"Out of range read from input: "
,
inh
,
", "
,
inw
);
// else padding with 0:
// sum += 0;
}
else
{
index_t
input_offset
=
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
+
inh
*
input_width
+
inw
;
sum
+=
input
[
input_offset
]
*
*
filter_ptr
;
}
++
filter_ptr
;
inc
*
input_height
*
input_width
+
inh
*
input_width
+
inw
;
sum
+=
input
[
input_offset
]
*
*
filter_ptr
;
}
++
filter_ptr
;
}
output
[
offset
]
=
sum
+
bias
[
c
];
}
output
[
offset
]
=
sum
+
bias
[
c
];
}
}
}
}
}
}
private:
const
int
*
strides_
;
// [stride_h, stride_w]
const
int
*
paddings_
;
// [padding_h, padding_w]
const
int
*
dilations_
;
// [dilation_h, dilation_w]
private:
const
int
*
strides_
;
// [stride_h, stride_w]
const
int
*
paddings_
;
// [padding_h, padding_w]
const
int
*
dilations_
;
// [dilation_h, dilation_w]
};
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_CONV_2D_H_
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_CONV_2D_H_
mace/kernels/conv_pool_2d_util.cc
浏览文件 @
8ae8f575
...
...
@@ -7,12 +7,10 @@
namespace
mace
{
namespace
kernels
{
void
CalcPaddingAndOutputSize
(
const
index_t
*
input_shape
,
// NCHW
void
CalcPaddingAndOutputSize
(
const
index_t
*
input_shape
,
// NCHW
const
index_t
*
filter_shape
,
// OIHW
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
int
*
padding_size
)
{
MACE_CHECK
(
dilations
[
0
]
>
0
&&
dilations
[
1
]
>
0
,
"Invalid dilations, must >= 1"
);
...
...
@@ -43,14 +41,16 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
output_height
=
(
input_shape
[
2
]
-
k_extent_height
)
/
strides
[
0
]
+
1
;
output_width
=
(
input_shape
[
3
]
-
k_extent_width
)
/
strides
[
1
]
+
1
;
break
;
case
SAME
:
output_height
=
(
input_shape
[
2
]
-
1
)
/
strides
[
0
]
+
1
;
case
SAME
:
output_height
=
(
input_shape
[
2
]
-
1
)
/
strides
[
0
]
+
1
;
output_width
=
(
input_shape
[
3
]
-
1
)
/
strides
[
1
]
+
1
;
break
;
case
FULL
:
output_height
=
(
input_shape
[
2
]
+
k_extent_height
-
2
)
/
strides
[
0
]
+
1
;
output_width
=
(
input_shape
[
3
]
+
k_extent_width
-
2
)
/
strides
[
1
]
+
1
;
break
;
default:
MACE_CHECK
(
false
,
"Unsupported padding type: "
,
padding
);
default:
MACE_CHECK
(
false
,
"Unsupported padding type: "
,
padding
);
}
// Note: TensorFlow may padded one more on the right/bottom side
...
...
@@ -58,10 +58,10 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
// utilize the more centered features. We need to benchmark
// based on the model accuracy.
padding_size
[
0
]
=
(
output_height
-
1
)
*
strides
[
0
]
+
k_extent_height
-
input_shape
[
2
];
padding_size
[
1
]
=
(
output_width
-
1
)
*
strides
[
1
]
+
k_extent_width
-
input_shape
[
3
];
padding_size
[
0
]
=
(
output_height
-
1
)
*
strides
[
0
]
+
k_extent_height
-
input_shape
[
2
];
padding_size
[
1
]
=
(
output_width
-
1
)
*
strides
[
1
]
+
k_extent_width
-
input_shape
[
3
];
output_shape
[
0
]
=
input_shape
[
0
];
output_shape
[
1
]
=
output_channels
;
...
...
@@ -69,19 +69,15 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
output_shape
[
3
]
=
output_width
;
}
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
)
{
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
)
{
index_t
batch
=
input_shape
[
0
];
index_t
channels
=
input_shape
[
1
];
index_t
height
=
input_shape
[
2
];
index_t
width
=
input_shape
[
3
];
std
::
vector
<
index_t
>
output_shape
({
batch
,
channels
,
paddings
[
0
]
+
height
,
paddings
[
1
]
+
width
});
std
::
vector
<
index_t
>
output_shape
(
{
batch
,
channels
,
paddings
[
0
]
+
height
,
paddings
[
1
]
+
width
});
const
index_t
output_width
=
output_shape
[
3
];
const
int
padded_top
=
paddings
[
0
]
/
2
;
...
...
@@ -105,5 +101,5 @@ void ConstructInputWithPadding(const float *input,
}
}
}
}
// namespace kernels
}
// namespace mace
}
// namespace kernels
}
// namespace mace
mace/kernels/conv_pool_2d_util.h
浏览文件 @
8ae8f575
...
...
@@ -10,26 +10,22 @@
namespace
mace
{
enum
Padding
{
VALID
=
0
,
// No padding
SAME
=
1
,
// Pads with half the filter size (rounded down) on both sides
FULL
=
2
,
// Pads with one less than the filter size on both sides
VALID
=
0
,
// No padding
SAME
=
1
,
// Pads with half the filter size (rounded down) on both sides
FULL
=
2
,
// Pads with one less than the filter size on both sides
};
namespace
kernels
{
void
CalcPaddingAndOutputSize
(
const
index_t
*
input_shape
,
// NCHW
void
CalcPaddingAndOutputSize
(
const
index_t
*
input_shape
,
// NCHW
const
index_t
*
filter_shape
,
// OIHW
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
int
*
padding_size
);
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
);
}
// namespace kernels
}
// namespace mace
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
);
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_CONV_POOL_2D_UTIL_H_
#endif
// MACE_KERNELS_CONV_POOL_2D_UTIL_H_
mace/kernels/neon/addn_neon.cc
浏览文件 @
8ae8f575
...
...
@@ -2,16 +2,15 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/addn.h"
#include <arm_neon.h>
namespace
mace
{
namespace
kernels
{
template
<
>
void
AddNFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
vector
<
const
float
*>&
inputs
,
float
*
output
,
index_t
size
)
{
void
AddNFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
vector
<
const
float
*>
&
inputs
,
float
*
output
,
index_t
size
)
{
// TODO: neon mem copy
memset
(
output
,
0
,
size
*
sizeof
(
float
));
int
n
=
inputs
.
size
();
...
...
@@ -22,7 +21,7 @@ void AddNFunctor<DeviceType::NEON, float>::operator()(const vector<const float*>
}
int64_t
element_per_group
=
size
/
groups
;
#pragma omp parallel for num_threads(1) // no significant performance improve
#pragma omp parallel for num_threads(1)
// no significant performance improve
for
(
int64_t
i
=
0
;
i
<
size
;
i
+=
element_per_group
)
{
int64_t
count
=
std
::
min
(
element_per_group
,
size
-
i
);
int
nn
=
count
>>
2
;
...
...
@@ -48,5 +47,5 @@ void AddNFunctor<DeviceType::NEON, float>::operator()(const vector<const float*>
}
};
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
mace/kernels/neon/batch_norm_neon.cc
浏览文件 @
8ae8f575
...
...
@@ -2,29 +2,25 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/batch_norm.h"
#include <arm_neon.h>
namespace
mace
{
namespace
kernels
{
template
<
>
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon} }
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
// ( \offset - \frac { \scale * mean } { \sqrt{var+\variance_epsilon}
// }
// new_scale = \frac{ \scale } { \sqrt{var+\variance_epsilon} }
// new_offset = \offset - mean * common_val;
// Y = new_scale * X + new_offset;
float
new_scale
,
new_offset
;
index_t
count
=
sample_size
>>
2
;
index_t
remain_count
=
sample_size
-
(
count
<<
2
);
...
...
@@ -36,8 +32,8 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(const float* input,
float32x4_t
new_scale_f
=
vdupq_n_f32
(
new_scale
);
float32x4_t
new_offset_f
=
vdupq_n_f32
(
new_offset
);
for
(
index_t
i
=
0
;
i
<
n
;
++
i
)
{
const
float
*
input_sample_ptr
=
input
+
pos
;
float
*
output_sample_ptr
=
output
+
pos
;
const
float
*
input_sample_ptr
=
input
+
pos
;
float
*
output_sample_ptr
=
output
+
pos
;
for
(
index_t
j
=
0
;
j
<
count
;
++
j
)
{
float32x4_t
input_f
=
vld1q_f32
(
input_sample_ptr
);
...
...
@@ -57,5 +53,5 @@ void BatchNormFunctor<DeviceType::NEON, float>::operator()(const float* input,
}
};
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
mace/kernels/neon/conv_2d_neon.cc
浏览文件 @
8ae8f575
...
...
@@ -20,62 +20,39 @@ extern void Conv2dNeonK5x5S1(const float *input, const index_t *input_shape,
const
float
*
filter
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
template
<
>
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
typedef
void
(
*
Conv2dNeonFunction
)(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
);
float
>::
operator
()(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
index_t
*
filter_shape
,
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
typedef
void
(
*
Conv2dNeonFunction
)(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
);
// Selection matrix: kernel_size x stride_size
static
const
Conv2dNeonFunction
selector
[
5
][
2
]
=
{
{
Conv2dNeonK1x1S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK3x3S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK5x5S1
,
nullptr
}
};
{
Conv2dNeonK1x1S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK3x3S1
,
nullptr
},
{
nullptr
,
nullptr
},
{
Conv2dNeonK5x5S1
,
nullptr
}};
// not implement yet
index_t
kernel_h
=
filter_shape
[
2
];
index_t
kernel_w
=
filter_shape
[
3
];
if
(
kernel_h
!=
kernel_w
||
kernel_h
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
2
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
if
(
kernel_h
!=
kernel_w
||
kernel_h
>
5
||
strides_
[
0
]
!=
strides_
[
1
]
||
strides_
[
0
]
>
2
||
dilations_
[
0
]
!=
1
||
dilations_
[
1
]
!=
1
||
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
]
==
nullptr
)
{
LOG
(
WARNING
)
<<
"NEON conv2d kernel not implementated, using slow vesion"
;
Conv2dFunctor
<
DeviceType
::
CPU
,
float
>
(
strides_
,
paddings_
,
dilations_
)(
input
,
input_shape
,
filter
,
filter_shape
,
bias
,
output
,
output_shape
);
input
,
input_shape
,
filter
,
filter_shape
,
bias
,
output
,
output_shape
);
return
;
}
...
...
@@ -87,13 +64,8 @@ void Conv2dFunctor<DeviceType::NEON,
input_shape
=
padded_input
.
shape
().
data
();
}
auto
conv2d_neon_func
=
selector
[
kernel_h
-
1
][
strides_
[
0
]
-
1
];
conv2d_neon_func
(
input
,
input_shape
,
filter
,
bias
,
output
,
output_shape
);
conv2d_neon_func
(
input
,
input_shape
,
filter
,
bias
,
output
,
output_shape
);
}
}
// namespace kernels
}
// namespace mace
}
// namespace kernels
}
// namespace mace
mace/kernels/neon/conv_2d_neon_1x1.cc
浏览文件 @
8ae8f575
...
...
@@ -8,25 +8,24 @@
namespace
mace
{
namespace
kernels
{
void
Conv2dNeonK1x1S1
(
const
float
*
input
,
// NCHW
void
Conv2dNeonK1x1S1
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
const
index_t
batch
=
output_shape
[
0
];
const
index_t
batch
=
output_shape
[
0
];
const
index_t
channels
=
output_shape
[
1
];
const
index_t
height
=
output_shape
[
2
];
const
index_t
width
=
output_shape
[
3
];
const
index_t
height
=
output_shape
[
2
];
const
index_t
width
=
output_shape
[
3
];
const
index_t
input_batch
=
input_shape
[
0
];
const
index_t
input_batch
=
input_shape
[
0
];
const
index_t
input_channels
=
input_shape
[
1
];
const
index_t
input_height
=
input_shape
[
2
];
const
index_t
input_width
=
input_shape
[
3
];
const
index_t
input_height
=
input_shape
[
2
];
const
index_t
input_width
=
input_shape
[
3
];
MACE_CHECK
(
input_batch
==
batch
&&
input_height
==
height
&&
input_width
==
width
);
MACE_CHECK
(
input_batch
==
batch
&&
input_height
==
height
&&
input_width
==
width
);
const
index_t
total_pixels
=
height
*
width
;
// Process 4 * 2 = 8 pixels for each innermost loop
...
...
@@ -37,17 +36,18 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
// benchmark omp collapsed(2)
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
const
float
*
filter_ptr
=
filter
;
#pragma omp parallel for
#pragma omp parallel for
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
// TODO Will GCC opt these out?
float
*
channel_output_start
=
output
+
n
*
channels
*
height
*
width
+
c
*
height
*
width
;
const
float
*
input_ptr
=
input
+
n
*
input_channels
*
input_height
*
input_width
;
output
+
n
*
channels
*
height
*
width
+
c
*
height
*
width
;
const
float
*
input_ptr
=
input
+
n
*
input_channels
*
input_height
*
input_width
;
// Fill with bias
float
*
output_ptr
=
channel_output_start
;
for
(
index_t
ptr
=
0
;
ptr
<
total_pixels
;
++
ptr
)
{
output_ptr
[
ptr
]
=
bias
[
c
];
// TODO can we avoid this?
output_ptr
[
ptr
]
=
bias
[
c
];
// TODO can we avoid this?
}
index_t
inc
=
0
;
...
...
@@ -55,15 +55,14 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
for
(;
inc
+
3
<
input_channels
;
inc
+=
4
)
{
float
*
output_ptr
=
channel_output_start
;
// The begining of each input feature map channel
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
const
float
*
input_ptr1
=
input_ptr
+
total_pixels
;
const
float
*
input_ptr1
=
input_ptr
+
total_pixels
;
const
float
*
input_ptr2
=
input_ptr1
+
total_pixels
;
const
float
*
input_ptr3
=
input_ptr2
+
total_pixels
;
// filter is in c_out, c_in, 1, 1 order
MACE_ASSERT
(
filter_ptr
==
filter
+
c
*
input_channels
+
inc
);
const
float
k0
=
filter_ptr
[
0
];
...
...
@@ -113,7 +112,7 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
vst1q_f32
(
output_ptr
+
4
,
out4
);
output_ptr
+=
8
;
input_ptr
+=
8
;
input_ptr
+=
8
;
input_ptr1
+=
8
;
input_ptr2
+=
8
;
input_ptr3
+=
8
;
...
...
@@ -121,7 +120,7 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
// Process the remaining pixels
index_t
remaining_pixels
=
loop_remaining
;
for
(;
remaining_pixels
>
0
;
--
remaining_pixels
)
{
const
float
mul
=
*
input_ptr
*
k0
;
const
float
mul
=
*
input_ptr
*
k0
;
const
float
mul1
=
*
input_ptr1
*
k1
;
const
float
mul2
=
*
input_ptr2
*
k2
;
const
float
mul3
=
*
input_ptr3
*
k3
;
...
...
@@ -141,9 +140,9 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
// Process the remaining channels
for
(;
inc
<
input_channels
;
++
inc
)
{
float
*
output_ptr
=
channel_output_start
;
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
MACE_ASSERT
(
input_ptr
==
input
+
n
*
input_channels
*
input_height
*
input_width
+
inc
*
input_height
*
input_width
);
MACE_ASSERT
(
filter_ptr
==
filter
+
c
*
input_channels
+
inc
);
const
float
k0
=
filter_ptr
[
0
];
...
...
@@ -166,13 +165,13 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
vst1q_f32
(
output_ptr
+
4
,
out4
);
output_ptr
+=
8
;
input_ptr
+=
8
;
input_ptr
+=
8
;
}
// Process the remaining pixels
index_t
remaining_pixels
=
loop_remaining
;
for
(;
remaining_pixels
>
0
;
--
remaining_pixels
)
{
const
float
mul
=
*
input_ptr
*
k0
;
*
output_ptr
+=
mul
;
++
output_ptr
;
...
...
@@ -183,5 +182,5 @@ void Conv2dNeonK1x1S1(const float* input, // NCHW
}
};
}
// namespace kernels
}
// namespace mace
}
// namespace kernels
}
// namespace mace
mace/kernels/neon/conv_2d_neon_3x3.cc
浏览文件 @
8ae8f575
...
...
@@ -10,78 +10,81 @@ namespace kernels {
static
const
int
kRegisterSize
=
4
;
void
Conv2dNeonK3x3S1
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
int
batch
=
output_shape
[
0
];
void
Conv2dNeonK3x3S1
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
int
batch
=
output_shape
[
0
];
int
channels
=
output_shape
[
1
];
int
height
=
output_shape
[
2
];
int
width
=
output_shape
[
3
];
int
height
=
output_shape
[
2
];
int
width
=
output_shape
[
3
];
int
input_batch
=
input_shape
[
0
];
int
input_batch
=
input_shape
[
0
];
int
input_channels
=
input_shape
[
1
];
int
input_height
=
input_shape
[
2
];
int
input_width
=
input_shape
[
3
];
int
input_height
=
input_shape
[
2
];
int
input_width
=
input_shape
[
3
];
int
kernel_h
=
3
;
int
kernel_w
=
3
;
int
kernel_w
=
3
;
int
height_count
=
(
height
>>
1
)
<<
1
;
for
(
int
b
=
0
;
b
<
batch
;
++
b
)
{
float
*
output_ptr_base
=
output
+
b
*
channels
*
height
*
width
;
for
(
int
oc
=
0
;
oc
<
channels
;
++
oc
)
{
const
float
*
filter_ptr
=
filter
+
oc
*
input_channels
*
kernel_h
*
kernel_w
;
const
float
*
input_ptr
=
input
+
b
*
input_channels
*
input_height
*
input_width
;
const
float
*
filter_ptr
=
filter
+
oc
*
input_channels
*
kernel_h
*
kernel_w
;
const
float
*
input_ptr
=
input
+
b
*
input_channels
*
input_height
*
input_width
;
float
*
output_ptr
=
output_ptr_base
+
oc
*
height
*
width
;
std
::
fill
(
output_ptr
,
output_ptr
+
height
*
width
,
bias
[
oc
]);
for
(
int
ic
=
0
;
ic
<
input_channels
;
++
ic
)
{
float32x4_t
filter0
=
vld1q_f32
(
filter_ptr
);
float32x4_t
filter3
=
vld1q_f32
(
filter_ptr
+
3
);
float32x4_t
filter6
=
vld1q_f32
(
filter_ptr
+
6
);
float32x4_t
filter3
=
vld1q_f32
(
filter_ptr
+
3
);
float32x4_t
filter6
=
vld1q_f32
(
filter_ptr
+
6
);
const
float
*
row
[
kRegisterSize
]
=
{
input_ptr
,
input_ptr
+
input_width
,
input_ptr
+
2
*
input_width
,
input_ptr
+
3
*
input_width
};
const
float
*
row
[
kRegisterSize
]
=
{
input_ptr
,
input_ptr
+
input_width
,
input_ptr
+
2
*
input_width
,
input_ptr
+
3
*
input_width
};
float
*
output_ptr1
=
output_ptr
;
float
*
output_ptr2
=
output_ptr
+
width
;
for
(
int
h
=
0
;
h
<
height_count
;
h
+=
2
)
{
int
count
=
width
>>
2
;
int
remain_count
=
width
&
3
;
for
(;
count
>
0
;
--
count
)
{
float32x4_t
sum0
=
vdupq_n_f32
(
.0
f
);
float32x4_t
sum1
=
vdupq_n_f32
(
.0
f
);
float32x4_t
row0_ext_0
=
vld1q_f32
(
row
[
0
]);
//0123
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
//4567
float32x4_t
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//1234
float32x4_t
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//2345
float32x4_t
row0_ext_0
=
vld1q_f32
(
row
[
0
]);
// 0123
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
// 4567
float32x4_t
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
// 1234
float32x4_t
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
// 2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_0
,
filter0
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_1
,
filter0
,
1
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_2
,
filter0
,
2
);
float32x4_t
row1_ext_0
=
vld1q_f32
(
row
[
1
]);
//0123
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
//4567
float32x4_t
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
//1234
float32x4_t
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
//2345
float32x4_t
row1_ext_0
=
vld1q_f32
(
row
[
1
]);
// 0123
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
// 4567
float32x4_t
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
// 1234
float32x4_t
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
// 2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_0
,
filter3
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_1
,
filter3
,
1
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_2
,
filter3
,
2
);
row0_ext_0
=
vld1q_f32
(
row
[
2
]);
//
0123
row0_latter
=
vld1q_f32
(
row
[
2
]
+
kRegisterSize
);
//
4567
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//
1234
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//
2345
row0_ext_0
=
vld1q_f32
(
row
[
2
]);
//
0123
row0_latter
=
vld1q_f32
(
row
[
2
]
+
kRegisterSize
);
//
4567
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//
1234
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//
2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_0
,
filter6
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_1
,
filter6
,
1
);
...
...
@@ -96,10 +99,10 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum1
=
vfmaq_laneq_f32
(
sum1
,
row0_ext_1
,
filter3
,
1
);
sum1
=
vfmaq_laneq_f32
(
sum1
,
row0_ext_2
,
filter3
,
2
);
row1_ext_0
=
vld1q_f32
(
row
[
3
]);
//
0123
row1_latter
=
vld1q_f32
(
row
[
3
]
+
kRegisterSize
);
//
4567
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
//
1234
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
//
2345
row1_ext_0
=
vld1q_f32
(
row
[
3
]);
//
0123
row1_latter
=
vld1q_f32
(
row
[
3
]
+
kRegisterSize
);
//
4567
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
//
1234
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
//
2345
sum1
=
vfmaq_laneq_f32
(
sum1
,
row1_ext_0
,
filter6
,
0
);
sum1
=
vfmaq_laneq_f32
(
sum1
,
row1_ext_1
,
filter6
,
1
);
...
...
@@ -114,15 +117,15 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
output_ptr1
+=
kRegisterSize
;
output_ptr2
+=
kRegisterSize
;
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
row
[
i
]
+=
kRegisterSize
;
}
}
for
(;
remain_count
>
0
;
--
remain_count
)
{
float32x4_t
row0
=
vld1q_f32
(
row
[
0
]);
//
0123
float32x4_t
row1
=
vld1q_f32
(
row
[
1
]);
//
0123
float32x4_t
row2
=
vld1q_f32
(
row
[
2
]);
//
0123
float32x4_t
row3
=
vld1q_f32
(
row
[
3
]);
//
0123
float32x4_t
row0
=
vld1q_f32
(
row
[
0
]);
//
0123
float32x4_t
row1
=
vld1q_f32
(
row
[
1
]);
//
0123
float32x4_t
row2
=
vld1q_f32
(
row
[
2
]);
//
0123
float32x4_t
row3
=
vld1q_f32
(
row
[
3
]);
//
0123
float32x4_t
sum
=
vmulq_f32
(
row0
,
filter0
);
sum
=
vmlaq_f32
(
sum
,
row1
,
filter3
);
...
...
@@ -138,13 +141,13 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
++
output_ptr1
;
++
output_ptr2
;
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
row
[
i
]
+=
1
;
}
}
output_ptr1
+=
width
;
output_ptr2
+=
width
;
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
for
(
int
i
=
0
;
i
<
kRegisterSize
;
++
i
)
{
row
[
i
]
+=
2
+
input_width
;
}
}
...
...
@@ -152,30 +155,34 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
if
(
height
!=
height_count
)
{
int
count
=
width
>>
2
;
int
remain_count
=
width
&
3
;
for
(;
count
>
0
;
--
count
)
{
for
(;
count
>
0
;
--
count
)
{
float32x4_t
sum0
=
vdupq_n_f32
(
.0
f
);
float32x4_t
row0_ext_0
=
vld1q_f32
(
row
[
0
]);
//0123
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
//4567
float32x4_t
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//1234
float32x4_t
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//2345
float32x4_t
row0_ext_0
=
vld1q_f32
(
row
[
0
]);
// 0123
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
// 4567
float32x4_t
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
// 1234
float32x4_t
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
// 2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_0
,
filter0
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_1
,
filter0
,
1
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_2
,
filter0
,
2
);
float32x4_t
row1_ext_0
=
vld1q_f32
(
row
[
1
]);
//0123
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
//4567
float32x4_t
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
//1234
float32x4_t
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
//2345
float32x4_t
row1_ext_0
=
vld1q_f32
(
row
[
1
]);
// 0123
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
// 4567
float32x4_t
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
// 1234
float32x4_t
row1_ext_2
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
2
);
// 2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_0
,
filter3
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_1
,
filter3
,
1
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row1_ext_2
,
filter3
,
2
);
row0_ext_0
=
vld1q_f32
(
row
[
2
]);
//
0123
row0_latter
=
vld1q_f32
(
row
[
2
]
+
kRegisterSize
);
//
4567
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//
1234
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//
2345
row0_ext_0
=
vld1q_f32
(
row
[
2
]);
//
0123
row0_latter
=
vld1q_f32
(
row
[
2
]
+
kRegisterSize
);
//
4567
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
//
1234
row0_ext_2
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
2
);
//
2345
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_0
,
filter6
,
0
);
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_1
,
filter6
,
1
);
...
...
@@ -185,14 +192,14 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
output_row0
=
vaddq_f32
(
output_row0
,
sum0
);
vst1q_f32
(
output_ptr1
,
output_row0
);
output_ptr1
+=
kRegisterSize
;
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
row
[
i
]
+=
kRegisterSize
;
}
}
for
(;
remain_count
>
0
;
--
remain_count
)
{
float32x4_t
row0
=
vld1q_f32
(
row
[
0
]);
//
0123
float32x4_t
row1
=
vld1q_f32
(
row
[
1
]);
//
0123
float32x4_t
row2
=
vld1q_f32
(
row
[
2
]);
//
0123
float32x4_t
row0
=
vld1q_f32
(
row
[
0
]);
//
0123
float32x4_t
row1
=
vld1q_f32
(
row
[
1
]);
//
0123
float32x4_t
row2
=
vld1q_f32
(
row
[
2
]);
//
0123
float32x4_t
sum
=
vmulq_f32
(
row0
,
filter0
);
sum
=
vmlaq_f32
(
sum
,
row1
,
filter3
);
...
...
@@ -201,7 +208,7 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
*
output_ptr1
=
vaddvq_f32
(
sum
);
++
output_ptr1
;
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
for
(
int
i
=
0
;
i
<
3
;
++
i
)
{
row
[
i
]
+=
1
;
}
}
...
...
@@ -213,5 +220,5 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
}
}
}
// namespace kernels
}
// namespace mace
}
// namespace kernels
}
// namespace mace
mace/kernels/neon/conv_2d_neon_5x5.cc
浏览文件 @
8ae8f575
...
...
@@ -10,11 +10,11 @@
namespace
mace
{
namespace
kernels
{
void
Conv2dNeonK5x5S1
(
const
float
*
input
,
// NCHW
void
Conv2dNeonK5x5S1
(
const
float
*
input
,
// NCHW
const
index_t
*
input_shape
,
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
float
*
filter
,
// c_out, c_in, kernel_h, kernel_w
const
float
*
bias
,
// c_out
float
*
output
,
// NCHW
const
index_t
*
output_shape
)
{
const
index_t
batch
=
output_shape
[
0
];
const
index_t
channels
=
output_shape
[
1
];
...
...
@@ -30,17 +30,17 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
const
index_t
input_total_pixels_per_channel
=
input_height
*
input_width
;
const
index_t
output_total_pixels_per_channel
=
height
*
width
;
const
index_t
input_total_pixels_per_batch
=
input_total_pixels_per_channel
*
input_channels
;
const
index_t
output_total_pixels_per_batch
=
output_total_pixels_per_channel
*
channels
;
const
index_t
input_total_pixels_per_batch
=
input_total_pixels_per_channel
*
input_channels
;
const
index_t
output_total_pixels_per_batch
=
output_total_pixels_per_channel
*
channels
;
const
index_t
patch_size
=
input_channels
*
25
;
#pragma omp parallel for collapse(2)
for
(
index_t
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
float
*
output_ptr
=
output
+
n
*
output_total_pixels_per_batch
+
c
*
output_total_pixels_per_channel
;
float
*
output_ptr
=
output
+
n
*
output_total_pixels_per_batch
+
c
*
output_total_pixels_per_channel
;
const
float
*
input_ptr
=
input
+
n
*
input_total_pixels_per_batch
;
// Fill with bias
...
...
@@ -53,7 +53,7 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
float
*
outptr2
=
outptr
+
width
;
const
float
*
inptr
=
input_ptr
+
inc
*
input_total_pixels_per_channel
;
const
float
*
filter_ptr
=
filter
+
c
*
patch_size
+
inc
*
25
;
const
float
*
filter_ptr
=
filter
+
c
*
patch_size
+
inc
*
25
;
const
float
*
r0
=
inptr
;
const
float
*
r1
=
inptr
+
input_width
;
...
...
@@ -246,8 +246,8 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
sum2
=
r5
[
4
]
*
k4
[
4
];
float32x2_t
_ss
=
vadd_f32
(
vget_low_f32
(
_sum
),
vget_high_f32
(
_sum
));
float32x2_t
_ss2
=
vadd_f32
(
vget_low_f32
(
_sum2
),
vget_high_f32
(
_sum2
));
float32x2_t
_ss2
=
vadd_f32
(
vget_low_f32
(
_sum2
),
vget_high_f32
(
_sum2
));
float32x2_t
_ss_ss2
=
vpadd_f32
(
_ss
,
_ss2
);
sum
+=
vget_lane_f32
(
_ss_ss2
,
0
);
...
...
@@ -414,7 +414,7 @@ void Conv2dNeonK5x5S1(const float* input, // NCHW
}
}
}
// namespace kernels
}
// namespace mace
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_NEON_CONV_2D_NEON_5X5_H_
#endif
// MACE_KERNELS_NEON_CONV_2D_NEON_5X5_H_
mace/kernels/neon/max_pooling_neon_2x2.cc
浏览文件 @
8ae8f575
...
...
@@ -2,19 +2,17 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include <float.h>
#include <limits>
#include <arm_neon.h>
#include "mace/core/common.h"
namespace
mace
{
namespace
kernels
{
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
...
...
@@ -44,7 +42,7 @@ void PoolingMaxNeonK2x2S2x2(const float *input,
int
w
=
0
;
int
num_vectors
=
0
;
if
(
!
((
h
==
0
&&
padding_top
>
0
)
||
(
h
==
out_height
-
1
&&
padding_bottom
>
0
)))
{
(
h
==
out_height
-
1
&&
padding_bottom
>
0
)))
{
r0
=
input
+
input_offset
+
(
h
*
2
-
padding_top
)
*
in_width
;
r1
=
r0
+
in_width
;
if
(
padding_left
>
0
)
{
...
...
@@ -86,8 +84,7 @@ void PoolingMaxNeonK2x2S2x2(const float *input,
for
(
int
kw
=
0
;
kw
<
2
;
++
kw
)
{
int
inh
=
h
*
2
-
padding_top
+
kh
;
int
inw
=
w
*
2
-
padding_left
+
kw
;
if
(
inh
>=
0
&&
inh
<
in_height
&&
inw
>=
0
&&
inw
<
in_width
)
{
if
(
inh
>=
0
&&
inh
<
in_height
&&
inw
>=
0
&&
inw
<
in_width
)
{
max
=
std
::
max
(
max
,
input
[
input_offset
+
inh
*
in_width
+
inw
]);
}
}
...
...
@@ -104,10 +101,8 @@ void PoolingMaxNeonK2x2S2x2(const float *input,
}
// assume the input has already been padded
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
index_t
in_height
=
in_shape
[
2
];
...
...
mace/kernels/neon/max_pooling_neon_3x3.cc
浏览文件 @
8ae8f575
...
...
@@ -2,19 +2,17 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include <float.h>
#include <limits>
#include <arm_neon.h>
#include "mace/core/common.h"
namespace
mace
{
namespace
kernels
{
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
...
...
@@ -44,7 +42,7 @@ void PoolingMaxNeonK3x3S2x2(const float *input,
int
num_vectors
=
0
;
const
float
*
r0
,
*
r1
,
*
r2
;
if
(
!
((
h
==
0
&&
padding_top
>
0
)
||
(
h
==
out_height
-
1
&&
padding_bottom
>
0
)))
{
(
h
==
out_height
-
1
&&
padding_bottom
>
0
)))
{
r0
=
input
+
input_offset
+
(
h
*
2
-
padding_top
)
*
in_width
;
r1
=
r0
+
in_width
;
r2
=
r1
+
in_width
;
...
...
@@ -112,8 +110,7 @@ void PoolingMaxNeonK3x3S2x2(const float *input,
for
(
int
kw
=
0
;
kw
<
3
;
++
kw
)
{
int
inh
=
h
*
2
-
padding_top
+
kh
;
int
inw
=
w
*
2
-
padding_left
+
kw
;
if
(
inh
>=
0
&&
inh
<
in_height
&&
inw
>=
0
&&
inw
<
in_width
)
{
if
(
inh
>=
0
&&
inh
<
in_height
&&
inw
>=
0
&&
inw
<
in_width
)
{
max
=
std
::
max
(
max
,
input
[
input_offset
+
inh
*
in_width
+
inw
]);
}
}
...
...
@@ -130,10 +127,8 @@ void PoolingMaxNeonK3x3S2x2(const float *input,
}
// assume the input has already been padded
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
index_t
in_height
=
in_shape
[
2
];
...
...
@@ -218,5 +213,5 @@ void PoolingMaxNeonK3x3S2x2Padded(const float *input,
}
}
}
// namespace kernels
}
// namespace mace
}
// namespace kernels
}
// namespace mace
mace/kernels/neon/pooling_neon.cc
浏览文件 @
8ae8f575
...
...
@@ -2,45 +2,36 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/pooling.h"
#include <arm_neon.h>
#include "mace/kernels/conv_pool_2d_util.h"
namespace
mace
{
namespace
kernels
{
extern
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
extern
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
);
extern
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
extern
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
);
#ifdef __COPY_MAKE_PADDING
extern
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
extern
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
extern
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
extern
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
#endif
template
<
>
template
<
>
void
PoolingFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
index_t
*
output_shape
)
{
if
(
kernels_
[
0
]
==
2
&&
kernels_
[
1
]
==
2
&&
strides_
[
0
]
==
2
&&
strides_
[
1
]
==
2
&&
pooling_type_
==
MAX
)
{
if
(
kernels_
[
0
]
==
2
&&
kernels_
[
1
]
==
2
&&
strides_
[
0
]
==
2
&&
strides_
[
1
]
==
2
&&
pooling_type_
==
MAX
)
{
#ifdef __COPY_MAKE_PADDING
Tensor
padded_input
;
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
&
padded_input
);
...
...
@@ -50,9 +41,8 @@ void PoolingFunctor<DeviceType::NEON, float>::operator()(
#else
PoolingMaxNeonK2x2S2x2
(
input
,
input_shape
,
output
,
output_shape
,
paddings_
);
#endif
}
else
if
(
kernels_
[
0
]
==
3
&&
kernels_
[
1
]
==
3
&&
strides_
[
0
]
==
2
&&
strides_
[
1
]
==
2
&&
pooling_type_
==
MAX
)
{
}
else
if
(
kernels_
[
0
]
==
3
&&
kernels_
[
1
]
==
3
&&
strides_
[
0
]
==
2
&&
strides_
[
1
]
==
2
&&
pooling_type_
==
MAX
)
{
#ifdef __COPY_MAKE_PADDING
Tensor
padded_input
;
ConstructInputWithPadding
(
input
,
input_shape
,
paddings_
,
&
padded_input
);
...
...
@@ -65,13 +55,9 @@ void PoolingFunctor<DeviceType::NEON, float>::operator()(
}
else
{
// not implement yet
PoolingFunctor
<
DeviceType
::
CPU
,
float
>
(
pooling_type_
,
kernels_
,
strides_
,
paddings_
,
dilations_
)(
input
,
input_shape
,
output
,
output_shape
);
input
,
input_shape
,
output
,
output_shape
);
}
}
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
mace/kernels/neon/relu_neon.cc
浏览文件 @
8ae8f575
...
...
@@ -2,17 +2,17 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <arm_neon.h>
#include "mace/kernels/relu.h"
#include <arm_neon.h>
namespace
mace
{
namespace
kernels
{
template
<
>
void
ReluFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
float
*
output
,
index_t
size
)
{
#pragma omp parallel for num_threads(1) // no significant performance improve
float
*
output
,
index_t
size
)
{
#pragma omp parallel for num_threads(1)
// no significant performance improve
for
(
int64_t
i
=
0
;
i
<
size
;
i
+=
kCostPerGroup
)
{
int64_t
count
=
std
::
min
(
static_cast
<
int64_t
>
(
kCostPerGroup
),
size
-
i
);
int
nn
=
count
>>
2
;
...
...
@@ -36,6 +36,5 @@ void ReluFunctor<DeviceType::NEON, float>::operator()(const float *input,
}
};
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
mace/kernels/pooling.h
浏览文件 @
8ae8f575
...
...
@@ -11,29 +11,24 @@
namespace
mace
{
enum
PoolingType
{
AVG
=
1
,
// avg_pool
MAX
=
2
,
// max_pool
AVG
=
1
,
// avg_pool
MAX
=
2
,
// max_pool
};
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
PoolingFunctor
{
public:
PoolingFunctor
(
const
PoolingType
pooling_type
,
const
int
*
kernels
,
const
int
*
strides
,
const
int
*
paddings
,
const
int
*
dilations
)
PoolingFunctor
(
const
PoolingType
pooling_type
,
const
int
*
kernels
,
const
int
*
strides
,
const
int
*
paddings
,
const
int
*
dilations
)
:
pooling_type_
(
pooling_type
),
kernels_
(
kernels
),
strides_
(
strides
),
paddings_
(
paddings
),
dilations_
(
dilations
)
{}
void
operator
()(
const
T
*
input
,
const
index_t
*
input_shape
,
T
*
output
,
void
operator
()(
const
T
*
input
,
const
index_t
*
input_shape
,
T
*
output
,
const
index_t
*
output_shape
)
{
index_t
batch
=
output_shape
[
0
];
index_t
channels
=
output_shape
[
1
];
...
...
@@ -60,32 +55,31 @@ class PoolingFunctor {
#pragma omp parallel for collapse(2)
for
(
int
n
=
0
;
n
<
batch
;
++
n
)
{
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
index_t
out_offset
=
n
*
channels
*
height
*
width
+
c
*
height
*
width
;
index_t
out_offset
=
n
*
channels
*
height
*
width
+
c
*
height
*
width
;
index_t
in_offset
=
n
*
input_channels
*
input_height
*
input_width
+
c
*
input_height
*
input_width
;
c
*
input_height
*
input_width
;
for
(
int
h
=
0
;
h
<
height
;
++
h
)
{
for
(
int
w
=
0
;
w
<
width
;
++
w
)
{
T
sum_or_max
=
0
;
switch
(
pooling_type_
)
{
case
AVG
:
break
;
case
MAX
:
sum_or_max
=
std
::
numeric_limits
<
T
>::
lowest
();
case
AVG
:
break
;
case
MAX
:
sum_or_max
=
std
::
numeric_limits
<
T
>::
lowest
();
break
;
default:
MACE_CHECK
(
false
,
"Unsupported pooling type: "
,
pooling_type_
);
MACE_CHECK
(
false
,
"Unsupported pooling type: "
,
pooling_type_
);
}
for
(
int
kh
=
0
;
kh
<
kernel_h
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
kernel_w
;
++
kw
)
{
int
inh
=
padded_h_start
+
h
*
stride_h
+
dilation_h
*
kh
;
int
inw
=
padded_w_start
+
w
*
stride_w
+
dilation_w
*
kw
;
if
(
inh
>=
0
&&
inh
<
input_height
&&
inw
>=
0
&&
inw
<
input_width
)
{
index_t
input_offset
=
in_offset
+
inh
*
input_width
+
inw
;
if
(
inh
>=
0
&&
inh
<
input_height
&&
inw
>=
0
&&
inw
<
input_width
)
{
index_t
input_offset
=
in_offset
+
inh
*
input_width
+
inw
;
switch
(
pooling_type_
)
{
case
AVG
:
sum_or_max
+=
input
[
input_offset
];
case
AVG
:
sum_or_max
+=
input
[
input_offset
];
break
;
case
MAX
:
sum_or_max
=
std
::
max
(
sum_or_max
,
input
[
input_offset
]);
...
...
@@ -98,14 +92,14 @@ class PoolingFunctor {
}
}
switch
(
pooling_type_
)
{
case
AVG
:
output
[
out_offset
]
=
sum_or_max
/
(
kernel_h
*
kernel_w
);
case
AVG
:
output
[
out_offset
]
=
sum_or_max
/
(
kernel_h
*
kernel_w
);
break
;
case
MAX
:
output
[
out_offset
]
=
sum_or_max
;
case
MAX
:
output
[
out_offset
]
=
sum_or_max
;
break
;
default:
MACE_CHECK
(
false
,
"Unsupported pooling type: "
,
pooling_type_
);
MACE_CHECK
(
false
,
"Unsupported pooling type: "
,
pooling_type_
);
}
out_offset
+=
1
;
}
...
...
@@ -122,14 +116,12 @@ class PoolingFunctor {
const
int
*
dilations_
;
};
template
<
>
template
<
>
void
PoolingFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
index_t
*
output_shape
);
}
// namespace kernels
}
// namespace mace
}
// namespace kernels
}
// namespace mace
#endif
//
MACE_KERNELS_POOLING_H
#endif
//
MACE_KERNELS_POOLING_H
mace/kernels/relu.h
浏览文件 @
8ae8f575
...
...
@@ -10,7 +10,7 @@
namespace
mace
{
namespace
kernels
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
ReluFunctor
{
void
operator
()(
const
T
*
input
,
T
*
output
,
index_t
size
)
{
for
(
index_t
i
=
0
;
i
<
size
;
++
i
)
{
...
...
@@ -24,7 +24,7 @@ void ReluFunctor<DeviceType::NEON, float>::operator()(const float *input,
float
*
output
,
index_t
size
);
}
// namespace kernels
}
// namespace mace
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_RELU_H_
\ No newline at end of file
#endif // MACE_KERNELS_RELU_H_
\ No newline at end of file
mace/kernels/resize_bilinear.h
浏览文件 @
8ae8f575
...
...
@@ -22,8 +22,8 @@ struct CachedInterpolation {
inline
float
CalculateResizeScale
(
index_t
in_size
,
index_t
out_size
,
bool
align_corners
)
{
return
(
align_corners
&&
out_size
>
1
)
?
(
in_size
-
1
)
/
static_cast
<
float
>
(
out_size
-
1
)
:
in_size
/
static_cast
<
float
>
(
out_size
);
?
(
in_size
-
1
)
/
static_cast
<
float
>
(
out_size
-
1
)
:
in_size
/
static_cast
<
float
>
(
out_size
);
}
inline
void
ComputeInterpolationWeights
(
const
index_t
out_size
,
...
...
@@ -41,21 +41,20 @@ inline void ComputeInterpolationWeights(const index_t out_size,
}
inline
float
ComputeLerp
(
const
float
top_left
,
const
float
top_right
,
const
float
bottom_left
,
const
float
bottom_right
,
const
float
x_lerp
,
const
float
y_lerp
)
{
const
float
bottom_left
,
const
float
bottom_right
,
const
float
x_lerp
,
const
float
y_lerp
)
{
const
float
top
=
top_left
+
(
top_right
-
top_left
)
*
x_lerp
;
const
float
bottom
=
bottom_left
+
(
bottom_right
-
bottom_left
)
*
x_lerp
;
return
top
+
(
bottom
-
top
)
*
y_lerp
;
}
template
<
typename
T
>
void
ResizeImage
(
const
T
*
images
,
const
index_t
batch_size
,
const
index_t
in_height
,
const
index_t
in_width
,
const
index_t
out_height
,
const
index_t
out_width
,
const
index_t
channels
,
template
<
typename
T
>
void
ResizeImage
(
const
T
*
images
,
const
index_t
batch_size
,
const
index_t
in_height
,
const
index_t
in_width
,
const
index_t
out_height
,
const
index_t
out_width
,
const
index_t
channels
,
const
std
::
vector
<
CachedInterpolation
>
&
xs_vec
,
const
std
::
vector
<
CachedInterpolation
>
&
ys
,
float
*
output
)
{
const
std
::
vector
<
CachedInterpolation
>
&
ys
,
float
*
output
)
{
const
index_t
in_channel_size
=
in_height
*
in_width
;
const
index_t
in_batch_num_values
=
channels
*
in_channel_size
;
const
index_t
out_channel_size
=
out_height
*
out_width
;
...
...
@@ -65,10 +64,10 @@ void ResizeImage(const T *images,
#pragma omp parallel for collapse(2)
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
const
T
*
input_ptr
=
images
+
in_batch_num_values
*
b
+
in_channel_size
*
c
;
float
*
output_ptr
=
output
+
out_batch_num_values
*
b
+
out_channel_size
*
c
;
const
T
*
input_ptr
=
images
+
in_batch_num_values
*
b
+
in_channel_size
*
c
;
float
*
output_ptr
=
output
+
out_batch_num_values
*
b
+
out_channel_size
*
c
;
for
(
index_t
y
=
0
;
y
<
out_height
;
++
y
)
{
const
T
*
ys_input_lower_ptr
=
input_ptr
+
ys
[
y
].
lower
*
in_width
;
const
T
*
ys_input_upper_ptr
=
input_ptr
+
ys
[
y
].
upper
*
in_width
;
...
...
@@ -83,9 +82,8 @@ void ResizeImage(const T *images,
const
float
bottom_left
=
ys_input_upper_ptr
[
xs_lower
];
const
float
bottom_right
=
ys_input_upper_ptr
[
xs_upper
];
output_ptr
[
x
]
=
ComputeLerp
(
top_left
,
top_right
,
bottom_left
,
bottom_right
,
xs_lerp
,
ys_lerp
);
output_ptr
[
x
]
=
ComputeLerp
(
top_left
,
top_right
,
bottom_left
,
bottom_right
,
xs_lerp
,
ys_lerp
);
}
output_ptr
+=
out_width
;
}
...
...
@@ -94,16 +92,15 @@ void ResizeImage(const T *images,
}
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
ResizeBilinearFunctor
{
bool
align_corners_
;
ResizeBilinearFunctor
(
bool
align_corners
)
:
align_corners_
(
align_corners
)
{}
ResizeBilinearFunctor
(
bool
align_corners
)
:
align_corners_
(
align_corners
)
{}
void
operator
()(
const
T
*
input
,
T
*
output
,
index_t
n
,
index_t
channels
,
index_t
in
_height
,
index_t
in_width
,
index_t
out_height
,
index_t
out_width
)
{
void
operator
()(
const
T
*
input
,
T
*
output
,
index_t
n
,
index_t
channels
,
index_t
in_height
,
index_t
in_width
,
index_t
out
_height
,
index_t
out_width
)
{
if
(
out_height
==
in_height
&&
out_width
==
in_width
)
{
std
::
copy
(
input
,
input
+
channels
*
in_height
*
in_width
,
output
);
return
;
...
...
@@ -111,8 +108,8 @@ struct ResizeBilinearFunctor {
float
height_scale
=
CalculateResizeScale
(
in_height
,
out_height
,
align_corners_
);
float
width_scale
=
CalculateResizeScale
(
in_width
,
out_width
,
align_corners_
);
float
width_scale
=
CalculateResizeScale
(
in_width
,
out_width
,
align_corners_
);
std
::
vector
<
CachedInterpolation
>
ys
(
out_height
+
1
);
std
::
vector
<
CachedInterpolation
>
xs
(
out_width
+
1
);
...
...
@@ -121,12 +118,12 @@ struct ResizeBilinearFunctor {
ComputeInterpolationWeights
(
out_height
,
in_height
,
height_scale
,
ys
.
data
());
ComputeInterpolationWeights
(
out_width
,
in_width
,
width_scale
,
xs
.
data
());
ResizeImage
(
input
,
n
,
in_height
,
in_width
,
out_height
,
out_width
,
channels
,
xs
,
ys
,
output
);
ResizeImage
(
input
,
n
,
in_height
,
in_width
,
out_height
,
out_width
,
channels
,
xs
,
ys
,
output
);
}
};
}
// namespace kernels
}
// namespace mace
}
// namespace kernels
}
// namespace mace
#endif // MACE_KERNELS_RESIZE_BILINEAR_H_
#endif
// MACE_KERNELS_RESIZE_BILINEAR_H_
mace/ops/addn.cc
浏览文件 @
8ae8f575
...
...
@@ -10,6 +10,6 @@ REGISTER_CPU_OPERATOR(AddN, AddNOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR
(
AddN
,
AddNOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
#endif
// __ARM_NEON
}
// namespace mace
}
// namespace mace
mace/ops/addn.h
浏览文件 @
8ae8f575
...
...
@@ -10,10 +10,10 @@
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
AddNOp
:
public
Operator
<
D
,
T
>
{
public:
AddNOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
AddNOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{}
bool
Run
()
override
{
...
...
@@ -36,6 +36,6 @@ class AddNOp : public Operator<D, T> {
kernels
::
AddNFunctor
<
D
,
T
>
functor_
;
};
}
// namespace mace
}
// namespace mace
#endif // MACE_OPS_ADDN_H_
#endif
// MACE_OPS_ADDN_H_
mace/ops/addn_benchmark.cc
浏览文件 @
8ae8f575
...
...
@@ -10,7 +10,6 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
AddNBenchmark
(
int
iters
,
int
n
,
int
size
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
...
...
@@ -18,8 +17,7 @@ static void AddNBenchmark(int iters, int n, int size) {
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
op_def_builder
.
Input
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
());
}
op_def_builder
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
op_def_builder
.
Output
(
"Output"
).
Finalize
(
net
.
operator_def
());
// Add input data
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
...
...
@@ -32,27 +30,26 @@ static void AddNBenchmark(int iters, int n, int size) {
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
while
(
iters
--
)
{
net
.
RunOp
(
D
);
}
}
#define BM_ADDN_MACRO(N, SIZE, TYPE, DEVICE) \
static void BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \
AddNBenchmark<DEVICE, TYPE>(iters, N, SIZE); \
} \
#define BM_ADDN_MACRO(N, SIZE, TYPE, DEVICE) \
static void BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \
AddNBenchmark<DEVICE, TYPE>(iters, N, SIZE); \
} \
BENCHMARK(BM_ADDN_##N##_##SIZE##_##TYPE##_##DEVICE)
#define BM_ADDN(N, SIZE, TYPE)
\
BM_ADDN_MACRO(N, SIZE, TYPE, CPU);
\
#define BM_ADDN(N, SIZE, TYPE) \
BM_ADDN_MACRO(N, SIZE, TYPE, CPU); \
BM_ADDN_MACRO(N, SIZE, TYPE, NEON);
BM_ADDN
(
10
,
1000
,
float
);
BM_ADDN
(
10
,
10000
,
float
);
BM_ADDN
(
100
,
1000
,
float
);
BM_ADDN
(
100
,
10000
,
float
);
}
// namespace mace
\ No newline at end of file
}
// namespace mace
\ No newline at end of file
mace/ops/addn_test.cc
浏览文件 @
8ae8f575
...
...
@@ -36,4 +36,4 @@ TEST_F(AddnOpTest, AddnOp) {
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
}
// namespace mace
}
// namespace mace
mace/ops/batch_norm.cc
浏览文件 @
8ae8f575
...
...
@@ -10,6 +10,6 @@ REGISTER_CPU_OPERATOR(BatchNorm, BatchNormOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR
(
BatchNorm
,
BatchNormOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
#endif
// __ARM_NEON
}
// namespace mace
\ No newline at end of file
}
// namespace mace
\ No newline at end of file
mace/ops/batch_norm.h
浏览文件 @
8ae8f575
...
...
@@ -10,50 +10,55 @@
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
BatchNormOp
:
public
Operator
<
D
,
T
>
{
public:
BatchNormOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
),
functor_
(
OperatorBase
::
GetSingleArgument
<
float
>
(
"variance_epsilon"
,
1e-4
)){}
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
0
);
const
Tensor
*
scale
=
this
->
Input
(
1
);
const
Tensor
*
offset
=
this
->
Input
(
2
);
const
Tensor
*
mean
=
this
->
Input
(
3
);
const
Tensor
*
var
=
this
->
Input
(
4
);
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"input must be 4-dimensional. "
,
input
->
dim_size
());
MACE_CHECK
(
scale
->
dim_size
()
==
1
,
"scale must be 1-dimensional. "
,
scale
->
dim_size
());
MACE_CHECK
(
offset
->
dim_size
()
==
1
,
"offset must be 1-dimensional. "
,
offset
->
dim_size
());
MACE_CHECK
(
mean
->
dim_size
()
==
1
,
"mean must be 1-dimensional. "
,
mean
->
dim_size
());
MACE_CHECK
(
var
->
dim_size
()
==
1
,
"var must be 1-dimensional. "
,
var
->
dim_size
());
Tensor
*
output
=
this
->
Output
(
0
);
output
->
ResizeLike
(
input
);
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
scale_ptr
=
scale
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
->
data
<
T
>
();
const
T
*
mean_ptr
=
mean
->
data
<
T
>
();
const
T
*
var_ptr
=
var
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
functor_
(
input_ptr
,
scale_ptr
,
offset_ptr
,
mean_ptr
,
var_ptr
,
n
,
channel
,
sample_size
,
output_ptr
);
return
true
;
}
private:
kernels
::
BatchNormFunctor
<
D
,
T
>
functor_
;
public:
BatchNormOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
),
functor_
(
OperatorBase
::
GetSingleArgument
<
float
>
(
"variance_epsilon"
,
1e-4
))
{}
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
0
);
const
Tensor
*
scale
=
this
->
Input
(
1
);
const
Tensor
*
offset
=
this
->
Input
(
2
);
const
Tensor
*
mean
=
this
->
Input
(
3
);
const
Tensor
*
var
=
this
->
Input
(
4
);
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"input must be 4-dimensional. "
,
input
->
dim_size
());
MACE_CHECK
(
scale
->
dim_size
()
==
1
,
"scale must be 1-dimensional. "
,
scale
->
dim_size
());
MACE_CHECK
(
offset
->
dim_size
()
==
1
,
"offset must be 1-dimensional. "
,
offset
->
dim_size
());
MACE_CHECK
(
mean
->
dim_size
()
==
1
,
"mean must be 1-dimensional. "
,
mean
->
dim_size
());
MACE_CHECK
(
var
->
dim_size
()
==
1
,
"var must be 1-dimensional. "
,
var
->
dim_size
());
Tensor
*
output
=
this
->
Output
(
0
);
output
->
ResizeLike
(
input
);
const
index_t
n
=
input
->
dim
(
0
);
const
index_t
channel
=
input
->
dim
(
1
);
const
index_t
sample_size
=
input
->
dim
(
2
)
*
input
->
dim
(
3
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
const
T
*
scale_ptr
=
scale
->
data
<
T
>
();
const
T
*
offset_ptr
=
offset
->
data
<
T
>
();
const
T
*
mean_ptr
=
mean
->
data
<
T
>
();
const
T
*
var_ptr
=
var
->
data
<
T
>
();
T
*
output_ptr
=
output
->
mutable_data
<
T
>
();
functor_
(
input_ptr
,
scale_ptr
,
offset_ptr
,
mean_ptr
,
var_ptr
,
n
,
channel
,
sample_size
,
output_ptr
);
return
true
;
}
private:
kernels
::
BatchNormFunctor
<
D
,
T
>
functor_
;
};
}
// namespace mace
}
// namespace mace
#endif // MACE_BATCH_NORM_H_
#endif
// MACE_BATCH_NORM_H_
mace/ops/batch_norm_benchmark.cc
浏览文件 @
8ae8f575
...
...
@@ -8,19 +8,19 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
BatchNorm
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
static
void
BatchNorm
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
OpDefBuilder
(
"BatchNorm"
,
"BatchNormBM"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
T
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
...
...
@@ -35,23 +35,23 @@ static void BatchNorm(int iters, int batch, int channels, int height, int width)
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
while
(
iters
--
)
{
net
.
RunOp
(
D
);
}
}
#define BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, DEVICE)
\
static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE(
\
int iters) {
\
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W;
\
mace::testing::ItemsProcessed(tot);
\
mace::testing::BytesProcessed(tot
* (sizeof(TYPE)));
\
BatchNorm<DEVICE, TYPE>(iters, N, C, H, W);
\
}
\
#define BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) {
\
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot
*(sizeof(TYPE)));
\
BatchNorm<DEVICE, TYPE>(iters, N, C, H, W); \
} \
BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_BATCH_NORM(N, C, H, W, TYPE)
\
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU);
\
#define BM_BATCH_NORM(N, C, H, W, TYPE) \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON);
BM_BATCH_NORM
(
1
,
1
,
512
,
512
,
float
);
...
...
@@ -65,4 +65,4 @@ BM_BATCH_NORM(1, 128, 256, 256, float);
BM_BATCH_NORM
(
1
,
128
,
512
,
512
,
float
);
BM_BATCH_NORM
(
32
,
1
,
256
,
256
,
float
);
BM_BATCH_NORM
(
32
,
3
,
256
,
256
,
float
);
}
// namespace mace
\ No newline at end of file
}
// namespace mace
\ No newline at end of file
mace/ops/batch_norm_test.cc
浏览文件 @
8ae8f575
...
...
@@ -13,17 +13,17 @@ TEST_F(BatchNormOpTest, SimpleCPU) {
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
6
,
2
},
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
net
.
AddInputFromArray
<
float
>
(
"Scale"
,
{
1
},
{
4.0
f
});
net
.
AddInputFromArray
<
float
>
(
"Offset"
,
{
1
},
{
2.0
});
net
.
AddInputFromArray
<
float
>
(
"Mean"
,
{
1
},
{
10
});
...
...
@@ -33,8 +33,8 @@ TEST_F(BatchNormOpTest, SimpleCPU) {
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
6
,
2
},
{
-
3.86
,
-
3.86
,
-
1.51
,
-
1.51
,
0.83
,
0.83
,
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
6
,
2
},
{
-
3.86
,
-
3.86
,
-
1.51
,
-
1.51
,
0.83
,
0.83
,
3.17
,
3.17
,
5.51
,
5.51
,
7.86
,
7.86
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
...
...
@@ -51,13 +51,13 @@ TEST_F(BatchNormOpTest, SimpleNeon) {
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Offset"
)
.
Input
(
"Mean"
)
.
Input
(
"Var"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add input data
net
.
AddRandomInput
<
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
...
...
@@ -77,5 +77,4 @@ TEST_F(BatchNormOpTest, SimpleNeon) {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
}
mace/ops/conv_2d.cc
浏览文件 @
8ae8f575
...
...
@@ -11,6 +11,6 @@ REGISTER_CPU_OPERATOR(Conv2d, Conv2dOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR
(
Conv2d
,
Conv2dOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
#endif
// __ARM_NEON
}
// namespace mace
}
// namespace mace
mace/ops/conv_2d.h
浏览文件 @
8ae8f575
...
...
@@ -13,11 +13,11 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
class
Conv2dOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
public:
Conv2dOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
)
{};
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
)
{};
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
...
...
@@ -27,21 +27,16 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
std
::
vector
<
index_t
>
output_shape
(
4
);
std
::
vector
<
int
>
paddings
(
2
);
kernels
::
CalcPaddingAndOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
this
->
dilations_
.
data
(),
this
->
strides_
.
data
(),
this
->
padding_
,
output_shape
.
data
(),
paddings
.
data
());
kernels
::
CalcPaddingAndOutputSize
(
input
->
shape
().
data
(),
filter
->
shape
().
data
(),
this
->
dilations_
.
data
(),
this
->
strides_
.
data
(),
this
->
padding_
,
output_shape
.
data
(),
paddings
.
data
());
output
->
Resize
(
output_shape
);
auto
conv2d
=
kernels
::
Conv2dFunctor
<
D
,
T
>
(
this
->
strides_
.
data
(),
paddings
.
data
(),
this
->
dilations_
.
data
());
conv2d
(
input
->
data
<
T
>
(),
input
->
shape
().
data
(),
filter
->
data
<
T
>
(),
filter
->
shape
().
data
(),
bias
->
data
<
T
>
(),
output
->
mutable_data
<
T
>
(),
auto
conv2d
=
kernels
::
Conv2dFunctor
<
D
,
T
>
(
this
->
strides_
.
data
(),
paddings
.
data
(),
this
->
dilations_
.
data
());
conv2d
(
input
->
data
<
T
>
(),
input
->
shape
().
data
(),
filter
->
data
<
T
>
(),
filter
->
shape
().
data
(),
bias
->
data
<
T
>
(),
output
->
mutable_data
<
T
>
(),
output
->
shape
().
data
());
return
true
;
...
...
@@ -52,6 +47,6 @@ class Conv2dOp : public ConvPool2dOpBase<D, T> {
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace mace
}
// namespace mace
#endif // MACE_OPS_CONV_2D_H_
#endif
// MACE_OPS_CONV_2D_H_
mace/ops/conv_2d_benchmark.cc
浏览文件 @
8ae8f575
...
...
@@ -13,17 +13,17 @@ namespace mace {
template
<
DeviceType
D
,
typename
T
>
static
void
Conv2d
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
stride
,
Padding
padding
,
int
output_channels
)
{
int
kernel_h
,
int
kernel_w
,
int
stride
,
Padding
padding
,
int
output_channels
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
OpDefBuilder
(
"Conv2d"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
});
...
...
@@ -32,7 +32,8 @@ static void Conv2d(int iters, int batch, int channels, int height, int width,
// Add input data
net
.
AddRandomInput
<
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
float
>
(
"Filter"
,
{
output_channels
,
channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
float
>
(
"Filter"
,
{
output_channels
,
channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
float
>
(
"Bias"
,
{
output_channels
});
// Warm-up
...
...
@@ -41,27 +42,30 @@ static void Conv2d(int iters, int batch, int channels, int height, int width,
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
while
(
iters
--
)
{
net
.
RunOp
(
D
);
}
}
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \
static void BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, OC); \
} \
BENCHMARK(BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \
static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \
OC); \
} \
BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE)
\
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU);
\
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
1
,
1
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
1
,
1
,
1
,
VALID
,
128
,
float
);
// Test bad alignments
BM_CONV_2D
(
1
,
64
,
33
,
31
,
1
,
1
,
1
,
VALID
,
128
,
float
);
// Test bad alignments
BM_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
33
,
31
,
3
,
3
,
1
,
VALID
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
1
,
SAME
,
128
,
float
);
...
...
@@ -71,4 +75,4 @@ BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float);
BM_CONV_2D
(
1
,
64
,
32
,
32
,
5
,
5
,
1
,
SAME
,
128
,
float
);
BM_CONV_2D
(
1
,
64
,
32
,
31
,
5
,
5
,
1
,
SAME
,
128
,
float
);
}
// namespace mace
}
// namespace mace
mace/ops/conv_2d_test.cc
浏览文件 @
8ae8f575
...
...
@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/operator.h"
#include "mace/ops/conv_2d.h"
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
...
...
@@ -14,11 +14,11 @@ TEST_F(Conv2dOpTest, Simple_VALID) {
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Conv2d"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
1
,
1
});
...
...
@@ -26,17 +26,13 @@ TEST_F(Conv2dOpTest, Simple_VALID) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
net
.
AddInputFromArray
<
float
>
(
"Bias"
,
{
1
},
{
0.1
f
});
// Run
...
...
@@ -52,11 +48,11 @@ TEST_F(Conv2dOpTest, Simple_SAME) {
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Conv2d"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
1
,
1
});
...
...
@@ -64,27 +60,22 @@ TEST_F(Conv2dOpTest, Simple_SAME) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
3
,
3
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
1
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
net
.
AddInputFromArray
<
float
>
(
"Bias"
,
{
1
},
{
0.1
f
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
3
,
3
},
{
8.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
18.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
8.1
f
});
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
1
,
3
,
3
},
{
8.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
18.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
8.1
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -93,11 +84,11 @@ TEST_F(Conv2dOpTest, Combined) {
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Conv2d"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
2
,
2
});
...
...
@@ -105,36 +96,24 @@ TEST_F(Conv2dOpTest, Combined) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
5
,
5
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
2
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
5
,
5
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
2
,
2
,
3
,
3
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
,
0.5
f
});
net
.
AddInputFromArray
<
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
3
,
3
},
{
8.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
18.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
8.1
f
,
4.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
9.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
4.2
f
});
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
2
,
3
,
3
},
{
8.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
18.1
f
,
12.1
f
,
8.1
f
,
12.1
f
,
8.1
f
,
4.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
9.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
4.2
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -143,11 +122,11 @@ TEST_F(Conv2dOpTest, Conv1x1) {
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Conv2d"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
1
,
1
});
...
...
@@ -155,38 +134,32 @@ TEST_F(Conv2dOpTest, Conv1x1) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
5
,
3
,
10
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
2
,
5
,
1
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
5
,
3
,
10
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
float
>
(
"Filter"
,
{
2
,
5
,
1
,
1
},
{
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
,
2.0
f
});
net
.
AddInputFromArray
<
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
3
,
10
},
{
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
});
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
2
,
3
,
10
},
{
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
5.1
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
,
10.2
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -194,8 +167,7 @@ TEST_F(Conv2dOpTest, Conv1x1) {
// TODO we need more tests
TEST_F
(
Conv2dOpTest
,
ConvNxNS12
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
Padding
type
)
{
srand
(
time
(
NULL
));
...
...
@@ -206,7 +178,7 @@ TEST_F(Conv2dOpTest, ConvNxNS12) {
index_t
width
=
7
+
rand
()
%
100
;
index_t
output_channels
=
1
+
rand
()
%
50
;
// Construct graph
auto
&
net
=
test_net
();
auto
&
net
=
test_net
();
OpDefBuilder
(
"Conv2d"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Filter"
)
...
...
@@ -221,8 +193,8 @@ TEST_F(Conv2dOpTest, ConvNxNS12) {
// Add input data
net
.
AddRandomInput
<
float
>
(
"Input"
,
{
batch
,
input_channels
,
height
,
width
});
net
.
AddRandomInput
<
float
>
(
"Filter"
,
{
output_channels
,
input_channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
float
>
(
"Filter"
,
{
output_channels
,
input_channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
float
>
(
"Bias"
,
{
output_channels
});
// run cpu
net
.
RunOp
();
...
...
mace/ops/conv_pool_2d_base.h
浏览文件 @
8ae8f575
...
...
@@ -10,16 +10,15 @@
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
ConvPool2dOpBase
:
public
Operator
<
D
,
T
>
{
public:
ConvPool2dOpBase
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
strides_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"strides"
)),
padding_
(
static_cast
<
Padding
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"padding"
,
static_cast
<
int
>
(
SAME
)))),
dilations_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"dilations"
))
{}
:
Operator
<
D
,
T
>
(
op_def
,
ws
),
strides_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"strides"
)),
padding_
(
static_cast
<
Padding
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"padding"
,
static_cast
<
int
>
(
SAME
)))),
dilations_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"dilations"
))
{}
protected:
std
::
vector
<
int
>
strides_
;
...
...
@@ -27,6 +26,6 @@ class ConvPool2dOpBase : public Operator<D, T> {
std
::
vector
<
int
>
dilations_
;
};
}
// namespace mace
}
// namespace mace
#endif // MACE_OPS_CONV_POOL_2D_BASE_H_
#endif
// MACE_OPS_CONV_POOL_2D_BASE_H_
mace/ops/ops_test_util.h
浏览文件 @
8ae8f575
...
...
@@ -43,31 +43,33 @@ class OpsTestNet {
public:
OpsTestNet
()
{}
template
<
typename
T
>
void
AddInputFromArray
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
template
<
typename
T
>
void
AddInputFromArray
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
T
*
input_data
=
input
->
mutable_data
<
T
>
();
MACE_CHECK
(
input
->
size
()
==
data
.
size
());
memcpy
(
input_data
,
data
.
data
(),
data
.
size
()
*
sizeof
(
T
));
}
template
<
typename
T
>
void
AddRepeatedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
T
data
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
template
<
typename
T
>
void
AddRepeatedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
T
data
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
T
*
input_data
=
input
->
mutable_data
<
T
>
();
MACE_CHECK
(
input
->
size
()
==
data
.
size
());
std
::
fill
(
input_data
,
input_data
+
input
->
size
(),
data
);
}
template
<
typename
T
>
void
AddRandomInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
bool
positive
=
false
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
template
<
typename
T
>
void
AddRandomInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
bool
positive
=
false
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
float
*
input_data
=
input
->
mutable_data
<
T
>
();
...
...
@@ -76,12 +78,16 @@ class OpsTestNet {
std
::
normal_distribution
<
T
>
nd
(
0
,
1
);
std
::
generate
(
input_data
,
input_data
+
input
->
size
(),
[
&
gen
,
&
nd
,
positive
]
{
return
positive
?
std
::
abs
(
nd
(
gen
))
:
nd
(
gen
);
});
[
&
gen
,
&
nd
,
positive
]
{
return
positive
?
std
::
abs
(
nd
(
gen
))
:
nd
(
gen
);
});
}
template
<
typename
T
>
void
AddFixedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
T
value
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
template
<
typename
T
>
void
AddFixedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
T
value
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
input
->
Resize
(
shape
);
float
*
input_data
=
input
->
mutable_data
<
T
>
();
...
...
@@ -122,7 +128,8 @@ class OpsTestNet {
}
}
void
AddStringsArg
(
const
char
*
name
,
const
std
::
vector
<
const
char
*>
&
values
)
{
void
AddStringsArg
(
const
char
*
name
,
const
std
::
vector
<
const
char
*>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
...
...
@@ -145,9 +152,7 @@ class OpsTestNet {
return
net_
->
Run
();
}
bool
RunOp
()
{
return
RunOp
(
DeviceType
::
CPU
);
}
bool
RunOp
()
{
return
RunOp
(
DeviceType
::
CPU
);
}
Tensor
*
GetOutput
(
const
char
*
output_name
)
{
return
ws_
.
GetTensor
(
output_name
);
...
...
@@ -177,8 +182,9 @@ class OpsTestBase : public ::testing::Test {
OpsTestNet
test_net_
;
};
template
<
typename
T
>
unique_ptr
<
Tensor
>
CreateTensor
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
template
<
typename
T
>
unique_ptr
<
Tensor
>
CreateTensor
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
unique_ptr
<
Tensor
>
res
(
new
Tensor
(
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
()));
res
->
Resize
(
shape
);
T
*
input_data
=
res
->
mutable_data
<
T
>
();
...
...
@@ -209,40 +215,38 @@ inline std::string ShapeToString(const Tensor &x) {
return
std
::
string
(
stream
.
str
());
}
template
<
typename
T
>
template
<
typename
T
>
struct
is_floating_point_type
{
static
const
bool
value
=
std
::
is_same
<
T
,
float
>::
value
||
std
::
is_same
<
T
,
double
>::
value
;
static
const
bool
value
=
std
::
is_same
<
T
,
float
>::
value
||
std
::
is_same
<
T
,
double
>::
value
;
};
template
<
typename
T
>
template
<
typename
T
>
inline
void
ExpectEqual
(
const
T
&
a
,
const
T
&
b
)
{
EXPECT_EQ
(
a
,
b
);
}
template
<
>
template
<
>
inline
void
ExpectEqual
<
float
>
(
const
float
&
a
,
const
float
&
b
)
{
EXPECT_FLOAT_EQ
(
a
,
b
);
}
template
<
>
template
<
>
inline
void
ExpectEqual
<
double
>
(
const
double
&
a
,
const
double
&
b
)
{
EXPECT_DOUBLE_EQ
(
a
,
b
);
}
inline
void
AssertSameTypeDims
(
const
Tensor
&
x
,
const
Tensor
&
y
)
{
ASSERT_EQ
(
x
.
dtype
(),
y
.
dtype
());
ASSERT_TRUE
(
IsSameSize
(
x
,
y
))
<<
"x.shape ["
<<
ShapeToString
(
x
)
<<
"] vs "
<<
"y.shape [ "
<<
ShapeToString
(
y
)
<<
"]"
;
ASSERT_TRUE
(
IsSameSize
(
x
,
y
))
<<
"x.shape ["
<<
ShapeToString
(
x
)
<<
"] vs "
<<
"y.shape [ "
<<
ShapeToString
(
y
)
<<
"]"
;
}
template
<
typename
T
,
bool
is_fp
=
is_floating_point_type
<
T
>
::
value
>
template
<
typename
T
,
bool
is_fp
=
is_floating_point_type
<
T
>
::
value
>
struct
Expector
;
// Partial specialization for float and double.
template
<
typename
T
>
template
<
typename
T
>
struct
Expector
<
T
,
true
>
{
static
void
Equal
(
const
T
&
a
,
const
T
&
b
)
{
ExpectEqual
(
a
,
b
);
}
...
...
@@ -262,18 +266,19 @@ struct Expector<T, true> {
auto
a
=
x
.
data
<
T
>
();
auto
b
=
y
.
data
<
T
>
();
for
(
int
i
=
0
;
i
<
x
.
size
();
++
i
)
{
EXPECT_NEAR
(
a
[
i
],
b
[
i
],
abs_err
)
<<
"a = "
<<
a
<<
" b = "
<<
b
<<
" index = "
<<
i
;
EXPECT_NEAR
(
a
[
i
],
b
[
i
],
abs_err
)
<<
"a = "
<<
a
<<
" b = "
<<
b
<<
" index = "
<<
i
;
}
}
};
template
<
typename
T
>
template
<
typename
T
>
void
ExpectTensorNear
(
const
Tensor
&
x
,
const
Tensor
&
y
,
const
double
abs_err
)
{
static_assert
(
is_floating_point_type
<
T
>::
value
,
"T is not a floating point type"
);
static_assert
(
is_floating_point_type
<
T
>::
value
,
"T is not a floating point type"
);
Expector
<
T
>::
Near
(
x
,
y
,
abs_err
);
}
}
// namespace mace
}
// namespace mace
#endif // MACE_OPS_TEST_UTIL_H_
#endif
// MACE_OPS_TEST_UTIL_H_
mace/ops/pooling.cc
浏览文件 @
8ae8f575
...
...
@@ -2,7 +2,6 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/pooling.h"
namespace
mace
{
...
...
@@ -11,6 +10,6 @@ REGISTER_CPU_OPERATOR(Pooling, PoolingOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR
(
Pooling
,
PoolingOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
#endif
// __ARM_NEON
}
// namespace mace
}
// namespace mace
mace/ops/pooling.h
浏览文件 @
8ae8f575
...
...
@@ -11,17 +11,17 @@
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
PoolingOp
:
public
ConvPool2dOpBase
<
D
,
T
>
{
public:
public:
PoolingOp
(
const
OperatorDef
&
op_def
,
Workspace
*
ws
)
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
),
kernels_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"kernels"
)),
pooling_type_
(
static_cast
<
PoolingType
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"pooling_type"
,
static_cast
<
int
>
(
AVG
))))
{};
:
ConvPool2dOpBase
<
D
,
T
>
(
op_def
,
ws
),
kernels_
(
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"kernels"
)),
pooling_type_
(
static_cast
<
PoolingType
>
(
OperatorBase
::
GetSingleArgument
<
int
>
(
"pooling_type"
,
static_cast
<
int
>
(
AVG
))))
{};
bool
Run
()
override
{
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
INPUT
);
Tensor
*
output
=
this
->
Output
(
OUTPUT
);
std
::
vector
<
index_t
>
in_shape
=
input
->
shape
();
...
...
@@ -33,28 +33,21 @@ public:
filter_shape
[
1
]
=
in_shape
[
0
];
filter_shape
[
2
]
=
kernels_
[
0
];
filter_shape
[
3
]
=
kernels_
[
1
];
kernels
::
CalcPaddingAndOutputSize
(
in_shape
.
data
(),
filter_shape
.
data
(),
kernels
::
CalcPaddingAndOutputSize
(
in_shape
.
data
(),
filter_shape
.
data
(),
this
->
dilations_
.
data
(),
this
->
strides_
.
data
(),
this
->
padding_
,
output_shape
.
data
(),
paddings
.
data
());
this
->
strides_
.
data
(),
this
->
padding_
,
output_shape
.
data
(),
paddings
.
data
());
output
->
Resize
(
output_shape
);
auto
pooling_func
=
kernels
::
PoolingFunctor
<
D
,
T
>
(
pooling_type_
,
kernels_
.
data
(),
this
->
strides_
.
data
(),
paddings
.
data
(),
this
->
dilations_
.
data
());
pooling_func
(
input
->
data
<
float
>
(),
in_shape
.
data
(),
output
->
mutable_data
<
float
>
(),
output
->
shape
().
data
());
auto
pooling_func
=
kernels
::
PoolingFunctor
<
D
,
T
>
(
pooling_type_
,
kernels_
.
data
(),
this
->
strides_
.
data
(),
paddings
.
data
(),
this
->
dilations_
.
data
());
pooling_func
(
input
->
data
<
float
>
(),
in_shape
.
data
(),
output
->
mutable_data
<
float
>
(),
output
->
shape
().
data
());
return
true
;
};
protected:
protected:
std
::
vector
<
int
>
kernels_
;
PoolingType
pooling_type_
;
...
...
@@ -62,6 +55,6 @@ protected:
OP_OUTPUT_TAGS
(
OUTPUT
);
};
}
// namespace mace
}
// namespace mace
#endif
//
MACE_OPS_POOLING_H_
#endif
//
MACE_OPS_POOLING_H_
mace/ops/pooling_benchmark.cc
浏览文件 @
8ae8f575
...
...
@@ -2,20 +2,19 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/testing/test_benchmark.h"
#include "mace/core/operator.h"
#include "mace/kernels/pooling.h"
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/kernels/conv_pool_2d_util.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
using
namespace
mace
::
kernels
;
template
<
DeviceType
D
>
static
void
Pooling
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel
,
int
stride
,
Padding
padding
,
template
<
DeviceType
D
>
static
void
Pooling
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel
,
int
stride
,
Padding
padding
,
PoolingType
pooling_type
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
...
...
@@ -45,18 +44,21 @@ static void Pooling(int iters, int batch, int channels, int height,
}
}
#define BM_POOLING_MACRO(N, C, H, W, KE, STRIDE, PA, PO, DEVICE) \
static void BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(float)));\
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, PoolingType::PO); \
} \
BENCHMARK(BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE)
#define BM_POOLING_MACRO(N, C, H, W, KE, STRIDE, PA, PO, DEVICE) \
static void \
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(float))); \
Pooling<DEVICE>(iters, N, C, H, W, KE, STRIDE, Padding::PA, \
PoolingType::PO); \
} \
BENCHMARK( \
BM_POOLING_##N##_##C##_##H##_##W##_K##KE##S##STRIDE##_##PA##_##PO##_##DEVICE)
#define BM_POOLING(N, C, H, W, K, S, PA, PO)
\
BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, CPU);
\
#define BM_POOLING(N, C, H, W, K, S, PA, PO) \
BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, CPU); \
BM_POOLING_MACRO(N, C, H, W, K, S, PA, PO, NEON);
BM_POOLING
(
1
,
3
,
129
,
129
,
2
,
2
,
SAME
,
MAX
);
...
...
mace/ops/pooling_test.cc
浏览文件 @
8ae8f575
...
...
@@ -5,9 +5,9 @@
#include "gtest/gtest.h"
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/kernels/pooling.h"
#include "mace/ops/conv_pool_2d_base.h"
#include "mace/ops/ops_test_util.h"
using
namespace
mace
;
...
...
@@ -17,9 +17,9 @@ TEST_F(PoolingOpTest, MAX_VALID) {
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"kernels"
,
{
2
,
2
});
...
...
@@ -29,34 +29,28 @@ TEST_F(PoolingOpTest, MAX_VALID) {
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
2
,
2
},
{
5
,
7
,
13
,
15
,
21
,
23
,
29
,
31
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
2
,
2
,
2
},
{
5
,
7
,
13
,
15
,
21
,
23
,
29
,
31
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
PoolingOpTest
,
AVG_VALID
)
{
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"kernels"
,
{
2
,
2
});
...
...
@@ -66,22 +60,17 @@ TEST_F(PoolingOpTest, AVG_VALID) {
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
AVG
);
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
2
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
2
,
2
,
2
},
{
2.5
,
4.5
,
10.5
,
12.5
,
18.5
,
20.5
,
26.5
,
28.5
});
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
2
,
2
,
2
},
{
2.5
,
4.5
,
10.5
,
12.5
,
18.5
,
20.5
,
26.5
,
28.5
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -90,9 +79,9 @@ TEST_F(PoolingOpTest, MAX_SAME) {
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"kernels"
,
{
2
,
2
});
...
...
@@ -103,16 +92,13 @@ TEST_F(PoolingOpTest, MAX_SAME) {
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
3
,
3
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
});
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
2
},
{
4
,
5
,
7
,
8
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
2
},
{
4
,
5
,
7
,
8
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -121,9 +107,9 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) {
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"kernels"
,
{
2
,
2
});
...
...
@@ -133,18 +119,15 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) {
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
4
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
2
},
{
10
,
11
,
14
,
15
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
2
},
{
10
,
11
,
14
,
15
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -153,9 +136,9 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
...
...
@@ -165,18 +148,14 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
5
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
5
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
});
// Run
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
6
,
8
,
9
,
16
,
18
,
19
});
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
6
,
8
,
9
,
16
,
18
,
19
});
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -185,9 +164,9 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
// Construct graph
auto
&
net
=
test_net
();
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
...
...
@@ -197,18 +176,14 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
5
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
});
net
.
AddInputFromArray
<
float
>
(
"Input"
,
{
1
,
1
,
4
,
5
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
});
// Run
net
.
RunOp
(
DeviceType
::
NEON
);
// Check
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
11
,
13
,
14
,
16
,
18
,
19
});
Tensor
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
11
,
13
,
14
,
16
,
18
,
19
});
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
mace/ops/relu.cc
浏览文件 @
8ae8f575
...
...
@@ -10,6 +10,6 @@ REGISTER_CPU_OPERATOR(Relu, ReluOp<DeviceType::CPU, float>);
#if __ARM_NEON
REGISTER_NEON_OPERATOR
(
Relu
,
ReluOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
#endif
// __ARM_NEON
}
// namespace mace
}
// namespace mace
mace/ops/relu.h
浏览文件 @
8ae8f575
...
...
@@ -10,10 +10,10 @@
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
ReluOp
:
public
Operator
<
D
,
T
>
{
public:
ReluOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
ReluOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
)
{}
bool
Run
()
override
{
const
Tensor
*
input_tensor
=
this
->
inputs_
[
0
];
...
...
@@ -31,6 +31,6 @@ class ReluOp : public Operator<D, T> {
kernels
::
ReluFunctor
<
D
,
T
>
functor_
;
};
}
// namespace mace
}
// namespace mace
#endif // MACE_OPS_RELU_H_
#endif
// MACE_OPS_RELU_H_
mace/ops/relu_benchmark.cc
浏览文件 @
8ae8f575
...
...
@@ -10,7 +10,6 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
ReluBenchmark
(
int
iters
,
int
size
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
...
...
@@ -28,26 +27,25 @@ static void ReluBenchmark(int iters, int size) {
}
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
while
(
iters
--
)
{
net
.
RunOp
(
D
);
}
}
#define BM_RELU_MACRO(SIZE, TYPE, DEVICE) \
static void BM_RELU_##SIZE##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot * (sizeof(TYPE))); \
ReluBenchmark<DEVICE, TYPE>(iters, SIZE); \
} \
#define BM_RELU_MACRO(SIZE, TYPE, DEVICE) \
static void BM_RELU_##SIZE##_##TYPE##_##DEVICE(int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * SIZE; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot*(sizeof(TYPE))); \
ReluBenchmark<DEVICE, TYPE>(iters, SIZE); \
} \
BENCHMARK(BM_RELU_##SIZE##_##TYPE##_##DEVICE)
#define BM_RELU(SIZE, TYPE)
\
BM_RELU_MACRO(SIZE, TYPE, CPU);
\
#define BM_RELU(SIZE, TYPE) \
BM_RELU_MACRO(SIZE, TYPE, CPU); \
BM_RELU_MACRO(SIZE, TYPE, NEON);
BM_RELU
(
1000
,
float
);
BM_RELU
(
100000
,
float
);
BM_RELU
(
10000000
,
float
);
}
// namespace mace
\ No newline at end of file
}
// namespace mace
\ No newline at end of file
mace/ops/relu_test.cc
浏览文件 @
8ae8f575
...
...
@@ -32,4 +32,4 @@ TEST_F(ReluOpTest, ReluOp) {
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
}
// namespace mace
}
// namespace mace
mace/ops/resize_bilinear.cc
浏览文件 @
8ae8f575
...
...
@@ -9,7 +9,8 @@ namespace mace {
REGISTER_CPU_OPERATOR
(
ResizeBilinear
,
ResizeBilinearOp
<
DeviceType
::
CPU
,
float
>
);
#if __ARM_NEON
REGISTER_NEON_OPERATOR
(
ResizeBilinear
,
ResizeBilinearOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
REGISTER_NEON_OPERATOR
(
ResizeBilinear
,
ResizeBilinearOp
<
DeviceType
::
NEON
,
float
>
);
#endif // __ARM_NEON
}
// namespace mace
}
// namespace mace
mace/ops/resize_bilinear.h
浏览文件 @
8ae8f575
...
...
@@ -5,18 +5,18 @@
#ifndef MACE_RESIZE_BILINEAR_H
#define MACE_RESIZE_BILINEAR_H
#include "mace/core/operator.h"
#include "mace/kernels/resize_bilinear.h"
namespace
mace
{
template
<
DeviceType
D
,
class
T
>
template
<
DeviceType
D
,
class
T
>
class
ResizeBilinearOp
:
public
Operator
<
D
,
T
>
{
public:
ResizeBilinearOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
ResizeBilinearOp
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
Operator
<
D
,
T
>
(
operator_def
,
ws
),
functor_
(
OperatorBase
::
GetSingleArgument
<
bool
>
(
"align_corners"
,
false
))
{}
functor_
(
OperatorBase
::
GetSingleArgument
<
bool
>
(
"align_corners"
,
false
))
{}
bool
Run
()
override
{
const
Tensor
*
input
=
this
->
Input
(
0
);
...
...
@@ -24,8 +24,8 @@ class ResizeBilinearOp : public Operator<D, T> {
MACE_CHECK
(
input
->
dim_size
()
==
4
,
"input must be 4-dimensional."
,
input
->
dim_size
());
MACE_CHECK
(
resize_dims
->
dim_size
()
==
1
,
"resize dim must be 2-dimensional."
,
resize_dims
->
dim_size
());
MACE_CHECK
(
resize_dims
->
dim_size
()
==
1
,
"resize dim must be 2-dimensional."
,
resize_dims
->
dim_size
());
Tensor
*
output
=
this
->
Output
(
0
);
...
...
@@ -35,7 +35,7 @@ class ResizeBilinearOp : public Operator<D, T> {
index_t
in_width
=
input
->
dim
(
3
);
index_t
out_height
=
resize_dims
->
data
<
index_t
>
()[
0
];
index_t
out_width
=
resize_dims
->
data
<
index_t
>
()[
1
];
vector
<
index_t
>
out_shape
{
n
,
channels
,
out_height
,
out_width
};
vector
<
index_t
>
out_shape
{
n
,
channels
,
out_height
,
out_width
};
output
->
Resize
(
out_shape
);
const
T
*
input_ptr
=
input
->
data
<
T
>
();
...
...
@@ -45,10 +45,11 @@ class ResizeBilinearOp : public Operator<D, T> {
out_height
,
out_width
);
return
true
;
}
private:
kernels
::
ResizeBilinearFunctor
<
D
,
T
>
functor_
;
};
}
// namespace mace
}
// namespace mace
#endif // MACE_RESIZE_BILINEAR_H
#endif
// MACE_RESIZE_BILINEAR_H
mace/ops/resize_bilinear_test.cc
浏览文件 @
8ae8f575
...
...
@@ -2,9 +2,9 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/ops/resize_bilinear.h"
#include "mace/core/operator.h"
#include "mace/ops/ops_test_util.h"
#include "mace/ops/resize_bilinear.h"
using
namespace
mace
;
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录