Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
冰之2023
Mace
提交
291a5ee6
Mace
项目概览
冰之2023
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
291a5ee6
编写于
9月 15, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
set clang-format to BinPackParameters=false
上级
578b382a
变更
25
隐藏空白更改
内联
并排
Showing
25 changed file
with
221 addition
and
96 deletion
+221
-96
mace/core/logging.h
mace/core/logging.h
+2
-1
mace/core/net.cc
mace/core/net.cc
+8
-4
mace/core/net.h
mace/core/net.h
+8
-4
mace/core/operator.cc
mace/core/operator.cc
+8
-3
mace/core/operator.h
mace/core/operator.h
+8
-3
mace/core/proto_utils.cc
mace/core/proto_utils.cc
+4
-2
mace/core/proto_utils.h
mace/core/proto_utils.h
+12
-6
mace/core/registry.h
mace/core/registry.h
+2
-1
mace/core/workspace.cc
mace/core/workspace.cc
+2
-1
mace/kernels/batch_norm.h
mace/kernels/batch_norm.h
+18
-6
mace/kernels/conv_2d.h
mace/kernels/conv_2d.h
+6
-2
mace/kernels/conv_pool_2d_util.cc
mace/kernels/conv_pool_2d_util.cc
+8
-4
mace/kernels/conv_pool_2d_util.h
mace/kernels/conv_pool_2d_util.h
+8
-4
mace/kernels/neon/batch_norm_neon.cc
mace/kernels/neon/batch_norm_neon.cc
+9
-3
mace/kernels/neon/conv_2d_neon.cc
mace/kernels/neon/conv_2d_neon.cc
+18
-9
mace/kernels/neon/conv_2d_neon_3x3.cc
mace/kernels/neon/conv_2d_neon_3x3.cc
+8
-4
mace/kernels/neon/max_pooling_neon_2x2.cc
mace/kernels/neon/max_pooling_neon_2x2.cc
+8
-4
mace/kernels/neon/max_pooling_neon_3x3.cc
mace/kernels/neon/max_pooling_neon_3x3.cc
+8
-4
mace/kernels/neon/pooling_neon.cc
mace/kernels/neon/pooling_neon.cc
+15
-7
mace/kernels/pooling.h
mace/kernels/pooling.h
+11
-4
mace/kernels/resize_bilinear.h
mace/kernels/resize_bilinear.h
+23
-10
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+2
-2
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+9
-2
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+8
-4
mace/ops/pooling_benchmark.cc
mace/ops/pooling_benchmark.cc
+8
-2
未找到文件。
mace/core/logging.h
浏览文件 @
291a5ee6
...
...
@@ -30,7 +30,8 @@ inline void MakeStringInternal(std::stringstream& ss, const T& t) {
}
template
<
typename
T
,
typename
...
Args
>
inline
void
MakeStringInternal
(
std
::
stringstream
&
ss
,
const
T
&
t
,
inline
void
MakeStringInternal
(
std
::
stringstream
&
ss
,
const
T
&
t
,
const
Args
&
...
args
)
{
MakeStringInternal
(
ss
,
t
);
MakeStringInternal
(
ss
,
args
...);
...
...
mace/core/net.cc
浏览文件 @
291a5ee6
...
...
@@ -6,12 +6,14 @@
namespace
mace
{
NetBase
::
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
NetBase
::
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
:
name_
(
net_def
->
name
())
{}
SimpleNet
::
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
Workspace
*
ws
,
DeviceType
type
)
:
NetBase
(
net_def
,
ws
,
type
)
{
VLOG
(
1
)
<<
"Constructing SimpleNet "
<<
net_def
->
name
();
for
(
int
idx
=
0
;
idx
<
net_def
->
op_size
();
++
idx
)
{
...
...
@@ -37,14 +39,16 @@ bool SimpleNet::Run() {
return
true
;
}
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
{
std
::
shared_ptr
<
NetDef
>
tmp_net_def
(
new
NetDef
(
net_def
));
return
CreateNet
(
tmp_net_def
,
ws
,
type
);
}
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
)
{
Workspace
*
ws
,
DeviceType
type
)
{
unique_ptr
<
NetBase
>
net
(
new
SimpleNet
(
net_def
,
ws
,
type
));
return
net
;
}
...
...
mace/core/net.h
浏览文件 @
291a5ee6
...
...
@@ -14,7 +14,8 @@ namespace mace {
class
NetBase
{
public:
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
virtual
~
NetBase
()
noexcept
{}
...
...
@@ -30,7 +31,8 @@ class NetBase {
class
SimpleNet
:
public
NetBase
{
public:
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
bool
Run
()
override
;
...
...
@@ -41,10 +43,12 @@ class SimpleNet : public NetBase {
DISABLE_COPY_AND_ASSIGN
(
SimpleNet
);
};
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>&
net_def
,
Workspace
*
ws
,
DeviceType
type
);
Workspace
*
ws
,
DeviceType
type
);
}
// namespace mace
...
...
mace/core/operator.cc
浏览文件 @
291a5ee6
...
...
@@ -11,16 +11,21 @@ std::map<int32_t, OperatorRegistry*>* gDeviceTypeRegistry() {
return
&
g_device_type_registry
;
}
MACE_DEFINE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
MACE_DEFINE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
CPU
,
CPUOperatorRegistry
);
MACE_DEFINE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
MACE_DEFINE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
NEON
,
NEONOperatorRegistry
);
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
)
{
Workspace
*
ws
,
DeviceType
type
)
{
OperatorRegistry
*
registry
=
gDeviceTypeRegistry
()
->
at
(
type
);
return
registry
->
Create
(
operator_def
.
type
(),
operator_def
,
ws
);
}
...
...
mace/core/operator.h
浏览文件 @
291a5ee6
...
...
@@ -140,7 +140,9 @@ struct DeviceTypeRegisterer {
type, ®istry_function); \
}
MACE_DECLARE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
MACE_DECLARE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
...
...
@@ -148,7 +150,9 @@ MACE_DECLARE_REGISTRY(CPUOperatorRegistry, OperatorBase, const OperatorDef &,
#define REGISTER_CPU_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
MACE_DECLARE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \
...
...
@@ -157,7 +161,8 @@ MACE_DECLARE_REGISTRY(NEONOperatorRegistry, OperatorBase, const OperatorDef &,
MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__)
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
);
Workspace
*
ws
,
DeviceType
type
);
}
// namespace mace
...
...
mace/core/proto_utils.cc
浏览文件 @
291a5ee6
...
...
@@ -316,7 +316,8 @@ const Argument& GetArgument(const OperatorDef& def, const string& name) {
ProtoDebugString
(
def
));
}
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
def_value
)
{
for
(
const
Argument
&
arg
:
def
.
arg
())
{
if
(
arg
.
name
()
==
name
)
{
...
...
@@ -328,7 +329,8 @@ bool GetFlagArgument(const OperatorDef& def, const string& name,
return
def_value
;
}
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
OperatorDef
*
def
)
{
for
(
int
i
=
0
;
i
<
def
->
arg_size
();
++
i
)
{
if
(
def
->
arg
(
i
).
name
()
==
name
)
{
...
...
mace/core/proto_utils.h
浏览文件 @
291a5ee6
...
...
@@ -107,7 +107,8 @@ inline bool ReadProtoFromFile(const string& filename, Message* proto) {
template
<
class
IterableInputs
=
std
::
initializer_list
<
string
>,
class
IterableOutputs
=
std
::
initializer_list
<
string
>
,
class
IterableArgs
=
std
::
initializer_list
<
Argument
>>
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
const
IterableInputs
&
inputs
,
const
IterableOutputs
&
outputs
,
const
IterableArgs
&
args
)
{
...
...
@@ -130,7 +131,8 @@ OperatorDef CreateOperatorDef(const string& type, const string& name,
// to specify args.
template
<
class
IterableInputs
=
std
::
initializer_list
<
string
>,
class
IterableOutputs
=
std
::
initializer_list
<
string
>>
inline
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
inline
OperatorDef
CreateOperatorDef
(
const
string
&
type
,
const
string
&
name
,
const
IterableInputs
&
inputs
,
const
IterableOutputs
&
outputs
)
{
return
CreateOperatorDef
(
type
,
name
,
inputs
,
outputs
,
...
...
@@ -153,7 +155,8 @@ class ArgumentHelper {
}
template
<
typename
Def
,
typename
T
>
static
T
GetSingleArgument
(
const
Def
&
def
,
const
string
&
name
,
static
T
GetSingleArgument
(
const
Def
&
def
,
const
string
&
name
,
const
T
&
default_value
)
{
return
ArgumentHelper
(
def
).
GetSingleArgument
<
T
>
(
name
,
default_value
);
}
...
...
@@ -165,7 +168,8 @@ class ArgumentHelper {
template
<
typename
Def
,
typename
T
>
static
vector
<
T
>
GetRepeatedArgument
(
const
Def
&
def
,
const
string
&
name
,
const
Def
&
def
,
const
string
&
name
,
const
std
::
vector
<
T
>&
default_value
=
std
::
vector
<
T
>
())
{
return
ArgumentHelper
(
def
).
GetRepeatedArgument
<
T
>
(
name
,
default_value
);
}
...
...
@@ -223,10 +227,12 @@ class ArgumentHelper {
};
const
Argument
&
GetArgument
(
const
OperatorDef
&
def
,
const
string
&
name
);
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
GetFlagArgument
(
const
OperatorDef
&
def
,
const
string
&
name
,
bool
def_value
=
false
);
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
Argument
*
GetMutableArgument
(
const
string
&
name
,
const
bool
create_if_missing
,
OperatorDef
*
def
);
template
<
typename
T
>
...
...
mace/core/registry.h
浏览文件 @
291a5ee6
...
...
@@ -101,7 +101,8 @@ class Registerer {
#define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, RegistryName(), \
key, \
RegistryName(), \
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \
}
...
...
mace/core/workspace.cc
浏览文件 @
291a5ee6
...
...
@@ -16,7 +16,8 @@ vector<string> Workspace::Tensors() const {
return
names
;
}
Tensor
*
Workspace
::
CreateTensor
(
const
string
&
name
,
Allocator
*
alloc
,
Tensor
*
Workspace
::
CreateTensor
(
const
string
&
name
,
Allocator
*
alloc
,
DataType
type
)
{
if
(
HasTensor
(
name
))
{
VLOG
(
1
)
<<
"Tensor "
<<
name
<<
" already exists. Skipping."
;
...
...
mace/kernels/batch_norm.h
浏览文件 @
291a5ee6
...
...
@@ -18,9 +18,15 @@ struct BatchNormFunctor {
BatchNormFunctor
(
const
float
variance_epsilon
)
:
variance_epsilon_
(
variance_epsilon
)
{}
void
operator
()(
const
T
*
input
,
const
T
*
scale
,
const
T
*
offset
,
const
T
*
mean
,
const
T
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
T
*
output
)
{
void
operator
()(
const
T
*
input
,
const
T
*
scale
,
const
T
*
offset
,
const
T
*
mean
,
const
T
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
T
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
...
...
@@ -49,9 +55,15 @@ struct BatchNormFunctor {
template
<
>
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
);
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
);
}
// namepsace kernels
}
// namespace mace
...
...
mace/kernels/conv_2d.h
浏览文件 @
291a5ee6
...
...
@@ -102,8 +102,12 @@ class Conv2dFunctor {
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
index_t
*
filter_shape
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
}
// namespace kernels
...
...
mace/kernels/conv_pool_2d_util.cc
浏览文件 @
291a5ee6
...
...
@@ -9,8 +9,10 @@ namespace kernels {
void
CalcPaddingAndOutputSize
(
const
index_t
*
input_shape
,
// NCHW
const
index_t
*
filter_shape
,
// OIHW
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
int
*
padding_size
)
{
MACE_CHECK
(
dilations
[
0
]
>
0
&&
dilations
[
1
]
>
0
,
"Invalid dilations, must >= 1"
);
...
...
@@ -69,8 +71,10 @@ void CalcPaddingAndOutputSize(const index_t *input_shape, // NCHW
output_shape
[
3
]
=
output_width
;
}
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
)
{
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
)
{
index_t
batch
=
input_shape
[
0
];
index_t
channels
=
input_shape
[
1
];
index_t
height
=
input_shape
[
2
];
...
...
mace/kernels/conv_pool_2d_util.h
浏览文件 @
291a5ee6
...
...
@@ -19,12 +19,16 @@ namespace kernels {
void
CalcPaddingAndOutputSize
(
const
index_t
*
input_shape
,
// NCHW
const
index_t
*
filter_shape
,
// OIHW
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
const
int
*
dilations
,
const
int
*
strides
,
Padding
padding
,
index_t
*
output_shape
,
int
*
padding_size
);
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
);
void
ConstructInputWithPadding
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
int
*
paddings
,
Tensor
*
output_tensor
);
}
// namespace kernels
}
// namespace mace
...
...
mace/kernels/neon/batch_norm_neon.cc
浏览文件 @
291a5ee6
...
...
@@ -10,9 +10,15 @@ namespace kernels {
template
<
>
void
BatchNormFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
)
{
const
float
*
input
,
const
float
*
scale
,
const
float
*
offset
,
const
float
*
mean
,
const
float
*
var
,
const
index_t
n
,
const
index_t
channel
,
const
index_t
sample_size
,
float
*
output
)
{
// Batch normalization in the paper https://arxiv.org/abs/1502.03167 .
// The calculation formula for inference is
// Y = \frac{ \scale } { \sqrt{var+\variance_epsilon} } * X +
...
...
mace/kernels/neon/conv_2d_neon.cc
浏览文件 @
291a5ee6
...
...
@@ -8,17 +8,26 @@
namespace
mace
{
namespace
kernels
{
extern
void
Conv2dNeonK1x1S1
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
extern
void
Conv2dNeonK1x1S1
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
extern
void
Conv2dNeonK3x3S1
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
extern
void
Conv2dNeonK3x3S1
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
extern
void
Conv2dNeonK5x5S1
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
extern
void
Conv2dNeonK5x5S1
(
const
float
*
input
,
const
index_t
*
input_shape
,
const
float
*
filter
,
const
float
*
bias
,
float
*
output
,
const
index_t
*
output_shape
);
template
<
>
void
Conv2dFunctor
<
DeviceType
::
NEON
,
...
...
mace/kernels/neon/conv_2d_neon_3x3.cc
浏览文件 @
291a5ee6
...
...
@@ -60,7 +60,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
float32x4_t
sum0
=
vdupq_n_f32
(
.0
f
);
float32x4_t
sum1
=
vdupq_n_f32
(
.0
f
);
float32x4_t
row0_ext_0
=
vld1q_f32
(
row
[
0
]);
// 0123
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
// 4567
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
// 4567
float32x4_t
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
// 1234
float32x4_t
row0_ext_2
=
...
...
@@ -71,7 +72,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_2
,
filter0
,
2
);
float32x4_t
row1_ext_0
=
vld1q_f32
(
row
[
1
]);
// 0123
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
// 4567
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
// 4567
float32x4_t
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
// 1234
float32x4_t
row1_ext_2
=
...
...
@@ -158,7 +160,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
for
(;
count
>
0
;
--
count
)
{
float32x4_t
sum0
=
vdupq_n_f32
(
.0
f
);
float32x4_t
row0_ext_0
=
vld1q_f32
(
row
[
0
]);
// 0123
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
// 4567
float32x4_t
row0_latter
=
vld1q_f32
(
row
[
0
]
+
kRegisterSize
);
// 4567
float32x4_t
row0_ext_1
=
vextq_f32
(
row0_ext_0
,
row0_latter
,
1
);
// 1234
float32x4_t
row0_ext_2
=
...
...
@@ -169,7 +172,8 @@ void Conv2dNeonK3x3S1(const float* input, // NCHW
sum0
=
vfmaq_laneq_f32
(
sum0
,
row0_ext_2
,
filter0
,
2
);
float32x4_t
row1_ext_0
=
vld1q_f32
(
row
[
1
]);
// 0123
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
// 4567
float32x4_t
row1_latter
=
vld1q_f32
(
row
[
1
]
+
kRegisterSize
);
// 4567
float32x4_t
row1_ext_1
=
vextq_f32
(
row1_ext_0
,
row1_latter
,
1
);
// 1234
float32x4_t
row1_ext_2
=
...
...
mace/kernels/neon/max_pooling_neon_2x2.cc
浏览文件 @
291a5ee6
...
...
@@ -11,8 +11,10 @@
namespace
mace
{
namespace
kernels
{
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
...
...
@@ -101,8 +103,10 @@ void PoolingMaxNeonK2x2S2x2(const float *input, const index_t *in_shape,
}
// assume the input has already been padded
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
index_t
in_height
=
in_shape
[
2
];
...
...
mace/kernels/neon/max_pooling_neon_3x3.cc
浏览文件 @
291a5ee6
...
...
@@ -11,8 +11,10 @@
namespace
mace
{
namespace
kernels
{
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
...
...
@@ -127,8 +129,10 @@ void PoolingMaxNeonK3x3S2x2(const float *input, const index_t *in_shape,
}
// assume the input has already been padded
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
)
{
index_t
batch
=
in_shape
[
0
];
index_t
channels
=
in_shape
[
1
];
index_t
in_height
=
in_shape
[
2
];
...
...
mace/kernels/neon/pooling_neon.cc
浏览文件 @
291a5ee6
...
...
@@ -9,26 +9,34 @@
namespace
mace
{
namespace
kernels
{
extern
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
extern
void
PoolingMaxNeonK2x2S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
);
extern
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
extern
void
PoolingMaxNeonK3x3S2x2
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
,
const
int
*
paddings
);
#ifdef __COPY_MAKE_PADDING
extern
void
PoolingMaxNeonK2x2S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
extern
void
PoolingMaxNeonK3x3S2x2Padded
(
const
float
*
input
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
in_shape
,
float
*
output
,
const
index_t
*
out_shape
);
#endif
template
<
>
void
PoolingFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
index_t
*
output_shape
)
{
if
(
kernels_
[
0
]
==
2
&&
kernels_
[
1
]
==
2
&&
strides_
[
0
]
==
2
&&
strides_
[
1
]
==
2
&&
pooling_type_
==
MAX
)
{
...
...
mace/kernels/pooling.h
浏览文件 @
291a5ee6
...
...
@@ -20,15 +20,20 @@ namespace kernels {
template
<
DeviceType
D
,
typename
T
>
class
PoolingFunctor
{
public:
PoolingFunctor
(
const
PoolingType
pooling_type
,
const
int
*
kernels
,
const
int
*
strides
,
const
int
*
paddings
,
const
int
*
dilations
)
PoolingFunctor
(
const
PoolingType
pooling_type
,
const
int
*
kernels
,
const
int
*
strides
,
const
int
*
paddings
,
const
int
*
dilations
)
:
pooling_type_
(
pooling_type
),
kernels_
(
kernels
),
strides_
(
strides
),
paddings_
(
paddings
),
dilations_
(
dilations
)
{}
void
operator
()(
const
T
*
input
,
const
index_t
*
input_shape
,
T
*
output
,
void
operator
()(
const
T
*
input
,
const
index_t
*
input_shape
,
T
*
output
,
const
index_t
*
output_shape
)
{
index_t
batch
=
output_shape
[
0
];
index_t
channels
=
output_shape
[
1
];
...
...
@@ -118,7 +123,9 @@ class PoolingFunctor {
template
<
>
void
PoolingFunctor
<
DeviceType
::
NEON
,
float
>::
operator
()(
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
float
*
input
,
const
index_t
*
input_shape
,
float
*
output
,
const
index_t
*
output_shape
);
}
// namespace kernels
...
...
mace/kernels/resize_bilinear.h
浏览文件 @
291a5ee6
...
...
@@ -19,7 +19,8 @@ struct CachedInterpolation {
float
lerp
;
};
inline
float
CalculateResizeScale
(
index_t
in_size
,
index_t
out_size
,
inline
float
CalculateResizeScale
(
index_t
in_size
,
index_t
out_size
,
bool
align_corners
)
{
return
(
align_corners
&&
out_size
>
1
)
?
(
in_size
-
1
)
/
static_cast
<
float
>
(
out_size
-
1
)
...
...
@@ -40,21 +41,28 @@ inline void ComputeInterpolationWeights(const index_t out_size,
}
}
inline
float
ComputeLerp
(
const
float
top_left
,
const
float
top_right
,
const
float
bottom_left
,
const
float
bottom_right
,
const
float
x_lerp
,
const
float
y_lerp
)
{
inline
float
ComputeLerp
(
const
float
top_left
,
const
float
top_right
,
const
float
bottom_left
,
const
float
bottom_right
,
const
float
x_lerp
,
const
float
y_lerp
)
{
const
float
top
=
top_left
+
(
top_right
-
top_left
)
*
x_lerp
;
const
float
bottom
=
bottom_left
+
(
bottom_right
-
bottom_left
)
*
x_lerp
;
return
top
+
(
bottom
-
top
)
*
y_lerp
;
}
template
<
typename
T
>
void
ResizeImage
(
const
T
*
images
,
const
index_t
batch_size
,
const
index_t
in_height
,
const
index_t
in_width
,
const
index_t
out_height
,
const
index_t
out_width
,
void
ResizeImage
(
const
T
*
images
,
const
index_t
batch_size
,
const
index_t
in_height
,
const
index_t
in_width
,
const
index_t
out_height
,
const
index_t
out_width
,
const
index_t
channels
,
const
std
::
vector
<
CachedInterpolation
>
&
xs_vec
,
const
std
::
vector
<
CachedInterpolation
>
&
ys
,
float
*
output
)
{
const
std
::
vector
<
CachedInterpolation
>
&
ys
,
float
*
output
)
{
const
index_t
in_channel_size
=
in_height
*
in_width
;
const
index_t
in_batch_num_values
=
channels
*
in_channel_size
;
const
index_t
out_channel_size
=
out_height
*
out_width
;
...
...
@@ -98,8 +106,13 @@ struct ResizeBilinearFunctor {
ResizeBilinearFunctor
(
bool
align_corners
)
:
align_corners_
(
align_corners
)
{}
void
operator
()(
const
T
*
input
,
T
*
output
,
index_t
n
,
index_t
channels
,
index_t
in_height
,
index_t
in_width
,
index_t
out_height
,
void
operator
()(
const
T
*
input
,
T
*
output
,
index_t
n
,
index_t
channels
,
index_t
in_height
,
index_t
in_width
,
index_t
out_height
,
index_t
out_width
)
{
if
(
out_height
==
in_height
&&
out_width
==
in_width
)
{
std
::
copy
(
input
,
input
+
channels
*
in_height
*
in_width
,
output
);
...
...
mace/ops/batch_norm_benchmark.cc
浏览文件 @
291a5ee6
...
...
@@ -8,8 +8,8 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
BatchNorm
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
static
void
BatchNorm
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
...
...
mace/ops/conv_2d_benchmark.cc
浏览文件 @
291a5ee6
...
...
@@ -12,8 +12,15 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
Conv2d
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
stride
,
Padding
padding
,
static
void
Conv2d
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel_h
,
int
kernel_w
,
int
stride
,
Padding
padding
,
int
output_channels
)
{
mace
::
testing
::
StopTiming
();
...
...
mace/ops/ops_test_util.h
浏览文件 @
291a5ee6
...
...
@@ -44,7 +44,8 @@ class OpsTestNet {
OpsTestNet
()
{}
template
<
typename
T
>
void
AddInputFromArray
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
void
AddInputFromArray
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
...
...
@@ -55,7 +56,8 @@ class OpsTestNet {
}
template
<
typename
T
>
void
AddRepeatedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
void
AddRepeatedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
T
data
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
...
...
@@ -66,7 +68,8 @@ class OpsTestNet {
}
template
<
typename
T
>
void
AddRandomInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
void
AddRandomInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
bool
positive
=
false
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
...
...
@@ -84,7 +87,8 @@ class OpsTestNet {
}
template
<
typename
T
>
void
AddFixedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
void
AddFixedInput
(
const
char
*
name
,
const
std
::
vector
<
index_t
>
&
shape
,
T
value
)
{
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
cpu_allocator
(),
DataTypeToEnum
<
T
>::
v
());
...
...
mace/ops/pooling_benchmark.cc
浏览文件 @
291a5ee6
...
...
@@ -12,8 +12,14 @@ using namespace mace;
using
namespace
mace
::
kernels
;
template
<
DeviceType
D
>
static
void
Pooling
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel
,
int
stride
,
Padding
padding
,
static
void
Pooling
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
,
int
kernel
,
int
stride
,
Padding
padding
,
PoolingType
pooling_type
)
{
mace
::
testing
::
StopTiming
();
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录