Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
baf2dcd1
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
baf2dcd1
编写于
12月 28, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Resolve operator and allocator registering static variable issue
上级
faadb474
变更
57
隐藏空白更改
内联
并排
Showing
57 changed file
with
1090 addition
and
951 deletion
+1090
-951
mace/core/BUILD
mace/core/BUILD
+6
-17
mace/core/allocator.cc
mace/core/allocator.cc
+2
-0
mace/core/mace.cc
mace/core/mace.cc
+6
-3
mace/core/net.cc
mace/core/net.cc
+24
-19
mace/core/net.h
mace/core/net.h
+18
-10
mace/core/operator.cc
mace/core/operator.cc
+63
-45
mace/core/operator.h
mace/core/operator.h
+24
-58
mace/core/public/mace.h
mace/core/public/mace.h
+4
-1
mace/core/registry.h
mace/core/registry.h
+16
-21
mace/core/runtime/opencl/opencl_allocator.cc
mace/core/runtime/opencl/opencl_allocator.cc
+0
-2
mace/core/tensor.h
mace/core/tensor.h
+2
-1
mace/examples/BUILD
mace/examples/BUILD
+0
-1
mace/kernels/BUILD
mace/kernels/BUILD
+0
-1
mace/ops/addn.cc
mace/ops/addn.cc
+22
-16
mace/ops/addn_benchmark.cc
mace/ops/addn_benchmark.cc
+6
-6
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+22
-16
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+13
-11
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+75
-45
mace/ops/batch_to_space.cc
mace/ops/batch_to_space.cc
+12
-8
mace/ops/batch_to_space_benchmark.cc
mace/ops/batch_to_space_benchmark.cc
+11
-9
mace/ops/bias_add.cc
mace/ops/bias_add.cc
+26
-20
mace/ops/bias_add_benchmark.cc
mace/ops/bias_add_benchmark.cc
+11
-11
mace/ops/bias_add_test.cc
mace/ops/bias_add_test.cc
+27
-20
mace/ops/buffer_to_image.cc
mace/ops/buffer_to_image.cc
+12
-8
mace/ops/channel_shuffle.cc
mace/ops/channel_shuffle.cc
+7
-4
mace/ops/channel_shuffle_benchmark.cc
mace/ops/channel_shuffle_benchmark.cc
+2
-1
mace/ops/channel_shuffle_test.cc
mace/ops/channel_shuffle_test.cc
+0
-1
mace/ops/concat.cc
mace/ops/concat.cc
+23
-16
mace/ops/concat_benchmark.cc
mace/ops/concat_benchmark.cc
+6
-3
mace/ops/concat_test.cc
mace/ops/concat_test.cc
+18
-26
mace/ops/conv_2d.cc
mace/ops/conv_2d.cc
+29
-22
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+31
-27
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+80
-61
mace/ops/core_test.cc
mace/ops/core_test.cc
+11
-8
mace/ops/depthwise_conv2d.cc
mace/ops/depthwise_conv2d.cc
+17
-12
mace/ops/depthwise_conv2d_test.cc
mace/ops/depthwise_conv2d_test.cc
+6
-8
mace/ops/depthwise_conv_2d_benchmark.cc
mace/ops/depthwise_conv_2d_benchmark.cc
+15
-16
mace/ops/fused_conv_2d.cc
mace/ops/fused_conv_2d.cc
+25
-20
mace/ops/fused_conv_2d_test.cc
mace/ops/fused_conv_2d_test.cc
+75
-50
mace/ops/global_avg_pooling.cc
mace/ops/global_avg_pooling.cc
+12
-8
mace/ops/global_avg_pooling_benchmark.cc
mace/ops/global_avg_pooling_benchmark.cc
+2
-1
mace/ops/image_to_buffer.cc
mace/ops/image_to_buffer.cc
+12
-8
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+26
-19
mace/ops/pooling.cc
mace/ops/pooling.cc
+27
-20
mace/ops/pooling_benchmark.cc
mace/ops/pooling_benchmark.cc
+2
-1
mace/ops/pooling_test.cc
mace/ops/pooling_test.cc
+46
-33
mace/ops/relu.cc
mace/ops/relu.cc
+22
-16
mace/ops/relu_benchmark.cc
mace/ops/relu_benchmark.cc
+5
-4
mace/ops/relu_test.cc
mace/ops/relu_test.cc
+36
-49
mace/ops/resize_bilinear.cc
mace/ops/resize_bilinear.cc
+22
-16
mace/ops/resize_bilinear_benchmark.cc
mace/ops/resize_bilinear_benchmark.cc
+16
-15
mace/ops/resize_bilinear_test.cc
mace/ops/resize_bilinear_test.cc
+14
-12
mace/ops/space_to_batch.cc
mace/ops/space_to_batch.cc
+12
-8
mace/ops/space_to_batch_benchmark.cc
mace/ops/space_to_batch_benchmark.cc
+15
-12
mace/ops/space_to_batch_test.cc
mace/ops/space_to_batch_test.cc
+66
-96
mace/python/tools/model.template
mace/python/tools/model.template
+6
-6
tools/bazel-adb-run.sh
tools/bazel-adb-run.sh
+2
-3
未找到文件。
mace/core/BUILD
浏览文件 @
baf2dcd1
...
...
@@ -10,38 +10,27 @@ licenses(["notice"]) # Apache 2.0
load
(
"//mace:mace.bzl"
,
"if_android"
,
"if_profiling_enabled"
,
"if_embed_binary_program"
)
cc_library
(
name
=
"
opencl_runtim
e"
,
name
=
"
cor
e"
,
srcs
=
glob
([
"*.cc"
,
"runtime/opencl/*.cc"
,
]),
hdrs
=
glob
([
"*.h"
,
"public/*.h"
,
"runtime/opencl/cl2.hpp"
,
"runtime/opencl/*.h"
,
]),
copts
=
[
"-std=c++11"
,
"-D_GLIBCXX_USE_C99_MATH_TR1"
]
+
if_profiling_enabled
([
"-DMACE_OPENCL_PROFILING"
])
+
if_embed_binary_program
([
"-DMACE_EMBED_BINARY_PROGRAM"
]),
linkopts
=
[
"-ldl"
]
,
linkopts
=
if_android
([
"-pie"
,
"-ldl"
])
,
deps
=
[
"
:core
"
,
"
//mace/utils:utils_hdrs
"
,
"//mace/utils:logging"
,
"//mace/utils:tuner"
,
"@opencl_headers//:opencl20_headers"
,
],
alwayslink
=
1
,
)
cc_library
(
name
=
"core"
,
srcs
=
glob
([
"*.cc"
]),
hdrs
=
glob
([
"*.h"
,
"public/*.h"
]),
copts
=
[
"-std=c++11"
,
"-D_GLIBCXX_USE_C99_MATH_TR1"
],
linkopts
=
if_android
([
"-pie"
]),
deps
=
[
"//mace/utils:utils_hdrs"
,
"//mace/utils:logging"
,
],
)
cc_library
(
...
...
mace/core/allocator.cc
浏览文件 @
baf2dcd1
...
...
@@ -3,6 +3,7 @@
//
#include "mace/core/allocator.h"
#include "mace/core/runtime/opencl/opencl_allocator.h"
namespace
mace
{
...
...
@@ -22,5 +23,6 @@ Allocator *GetDeviceAllocator(DeviceType type) {
MACE_REGISTER_ALLOCATOR
(
DeviceType
::
CPU
,
new
CPUAllocator
());
MACE_REGISTER_ALLOCATOR
(
DeviceType
::
NEON
,
new
CPUAllocator
());
MACE_REGISTER_ALLOCATOR
(
DeviceType
::
OPENCL
,
new
OpenCLAllocator
());
}
// namespace mace
mace/core/mace.cc
浏览文件 @
baf2dcd1
...
...
@@ -5,6 +5,7 @@
#include "mace/core/public/mace.h"
#include "mace/core/types.h"
#include "mace/core/net.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/utils/logging.h"
...
...
@@ -481,17 +482,19 @@ const OperatorDef &NetDef::op(const int idx) const {
// Mace Engine
MaceEngine
::
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
)
:
device_type_
(
device_type
),
ws_
(
new
Workspace
()),
net_
(
nullptr
)
{
op_registry_
(
new
OperatorRegistry
()),
device_type_
(
device_type
),
ws_
(
new
Workspace
()),
net_
(
nullptr
)
{
ws_
->
LoadModelTensor
(
*
net_def
,
device_type
);
// Init model
auto
net
=
CreateNet
(
*
net_def
,
ws_
.
get
(),
device_type
,
NetMode
::
INIT
);
auto
net
=
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
,
NetMode
::
INIT
);
if
(
!
net
->
Run
())
{
LOG
(
FATAL
)
<<
"Net init run failed"
;
}
ws_
->
CreateTensor
(
"mace_input_node:0"
,
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
net_
=
std
::
move
(
CreateNet
(
*
net_def
,
ws_
.
get
(),
device_type
));
net_
=
std
::
move
(
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
));
}
MaceEngine
::~
MaceEngine
()
=
default
;
bool
MaceEngine
::
Run
(
const
float
*
input
,
...
...
mace/core/net.cc
浏览文件 @
baf2dcd1
...
...
@@ -3,22 +3,24 @@
//
#include "mace/core/net.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/utils/utils.h"
namespace
mace
{
NetBase
::
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
NetBase
::
NetBase
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
DeviceType
type
)
:
name_
(
net_def
->
name
())
{}
:
op_registry_
(
op_registry
),
name_
(
net_def
->
name
())
{}
SimpleNet
::
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
SimpleNet
::
SimpleNet
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
:
NetBase
(
net_def
,
ws
,
type
),
device_type_
(
type
){
:
NetBase
(
op_registry
,
net_def
,
ws
,
type
),
device_type_
(
type
)
{
VLOG
(
1
)
<<
"Constructing SimpleNet "
<<
net_def
->
name
();
for
(
int
idx
=
0
;
idx
<
net_def
->
op_size
();
++
idx
)
{
const
auto
&
operator_def
=
net_def
->
op
(
idx
);
...
...
@@ -26,7 +28,7 @@ SimpleNet::SimpleNet(const std::shared_ptr<const NetDef> &net_def,
<<
operator_def
.
type
();
std
::
unique_ptr
<
OperatorBase
>
op
{
nullptr
};
OperatorDef
temp_def
(
operator_def
);
op
=
CreateOperator
(
temp_def
,
ws
,
type
,
mode
);
op
=
op_registry
->
CreateOperator
(
temp_def
,
ws
,
type
,
mode
);
if
(
op
)
{
operators_
.
emplace_back
(
std
::
move
(
op
));
}
...
...
@@ -62,9 +64,8 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
}
if
(
run_metadata
!=
nullptr
)
{
OperatorStats
op_stats
=
{
op
->
debug_def
().
name
(),
op
->
debug_def
().
type
(),
call_stats
};
OperatorStats
op_stats
=
{
op
->
debug_def
().
name
(),
op
->
debug_def
().
type
(),
call_stats
};
run_metadata
->
op_stats
.
emplace_back
(
op_stats
);
}
...
...
@@ -80,19 +81,23 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
return
true
;
}
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
{
std
::
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
NetDef
&
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
{
std
::
shared_ptr
<
NetDef
>
tmp_net_def
(
new
NetDef
(
net_def
));
return
CreateNet
(
tmp_net_def
,
ws
,
type
,
mode
);
return
CreateNet
(
op_registry
,
tmp_net_def
,
ws
,
type
,
mode
);
}
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
{
unique_ptr
<
NetBase
>
net
(
new
SimpleNet
(
net_def
,
ws
,
type
,
mode
));
std
::
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
{
unique_ptr
<
NetBase
>
net
(
new
SimpleNet
(
op_registry
,
net_def
,
ws
,
type
,
mode
));
return
net
;
}
...
...
mace/core/net.h
浏览文件 @
baf2dcd1
...
...
@@ -6,6 +6,7 @@
#define MACE_CORE_NET_H_
#include "mace/core/common.h"
#include "mace/core/operator.h"
#include "mace/core/public/mace.h"
namespace
mace
{
...
...
@@ -16,7 +17,8 @@ class Workspace;
class
NetBase
{
public:
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
NetBase
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
DeviceType
type
);
virtual
~
NetBase
()
noexcept
{}
...
...
@@ -27,13 +29,15 @@ class NetBase {
protected:
string
name_
;
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry_
;
DISABLE_COPY_AND_ASSIGN
(
NetBase
);
};
class
SimpleNet
:
public
NetBase
{
public:
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
SimpleNet
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
=
NetMode
::
NORMAL
);
...
...
@@ -47,14 +51,18 @@ class SimpleNet : public NetBase {
DISABLE_COPY_AND_ASSIGN
(
SimpleNet
);
};
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
=
NetMode
::
NORMAL
);
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
=
NetMode
::
NORMAL
);
std
::
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
NetDef
&
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
=
NetMode
::
NORMAL
);
std
::
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
=
NetMode
::
NORMAL
);
}
// namespace mace
...
...
mace/core/operator.cc
浏览文件 @
baf2dcd1
...
...
@@ -2,12 +2,19 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <sstream>
#include "mace/core/operator.h"
namespace
mace
{
OperatorBase
::
OperatorBase
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
operator_ws_
(
ws
),
operator_def_
(
std
::
make_shared
<
OperatorDef
>
(
operator_def
))
{}
OpKeyBuilder
::
OpKeyBuilder
(
const
char
*
op_name
)
:
op_name_
(
op_name
)
{}
OpKeyBuilder
::
OpKeyBuilder
(
const
char
*
op_name
)
:
op_name_
(
op_name
)
{}
OpKeyBuilder
&
OpKeyBuilder
::
Device
(
DeviceType
device
)
{
device_type_
=
device
;
}
OpKeyBuilder
&
OpKeyBuilder
::
TypeConstraint
(
const
char
*
attr_name
,
const
DataType
allowed
)
{
...
...
@@ -17,61 +24,72 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name,
const
std
::
string
OpKeyBuilder
::
Build
()
{
static
const
std
::
vector
<
std
::
string
>
type_order
=
{
"T"
};
std
::
string
key
=
op_name_
;
std
::
stringstream
ss
;
ss
<<
op_name_
;
ss
<<
device_type_
;
for
(
auto
type
:
type_order
)
{
key
+=
type
+
"_"
+
DataTypeToString
(
type_constraint_
[
type
]);
ss
<<
type
<<
"_"
<<
DataTypeToString
(
type_constraint_
[
type
]);
}
return
key
;
}
std
::
map
<
int32_t
,
OperatorRegistry
*>
*
gDeviceTypeRegistry
()
{
static
std
::
map
<
int32_t
,
OperatorRegistry
*>
g_device_type_registry
;
return
&
g_device_type_registry
;
return
ss
.
str
();
}
MACE_DEFINE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
CPU
,
CPUOperatorRegistry
);
MACE_DEFINE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
NEON
,
NEONOperatorRegistry
);
MACE_DEFINE_REGISTRY
(
OPENCLOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
OPENCL
,
OPENCLOperatorRegistry
);
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
{
OperatorRegistry
*
registry
=
gDeviceTypeRegistry
()
->
at
(
type
);
const
int
dtype
=
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
operator_def
,
"T"
,
static_cast
<
int
>
(
DT_FLOAT
));
const
int
op_mode_i
=
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
operator_def
,
"mode"
,
static_cast
<
int
>
(
NetMode
::
NORMAL
));
std
::
unique_ptr
<
OperatorBase
>
OperatorRegistry
::
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
const
{
const
int
dtype
=
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
operator_def
,
"T"
,
static_cast
<
int
>
(
DT_FLOAT
));
const
int
op_mode_i
=
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
operator_def
,
"mode"
,
static_cast
<
int
>
(
NetMode
::
NORMAL
));
const
NetMode
op_mode
=
static_cast
<
NetMode
>
(
op_mode_i
);
if
(
op_mode
==
mode
)
{
return
registry
->
Create
(
OpKeyBuilder
(
operator_def
.
type
().
data
())
.
TypeConstraint
(
"T"
,
static_cast
<
DataType
>
(
dtype
))
.
Build
(),
operator_def
,
ws
);
return
registry_
.
Create
(
OpKeyBuilder
(
operator_def
.
type
().
data
())
.
Device
(
type
)
.
TypeConstraint
(
"T"
,
static_cast
<
DataType
>
(
dtype
))
.
Build
(),
operator_def
,
ws
);
}
else
{
return
nullptr
;
}
}
OperatorBase
::
OperatorBase
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
operator_ws_
(
ws
),
operator_def_
(
std
::
make_shared
<
OperatorDef
>
(
operator_def
))
{}
extern
void
Register_AddN
(
OperatorRegistry
*
op_registry
);
extern
void
Register_BatchNorm
(
OperatorRegistry
*
op_registry
);
extern
void
Register_BatchToSpaceND
(
OperatorRegistry
*
op_registry
);
extern
void
Register_BiasAdd
(
OperatorRegistry
*
op_registry
);
extern
void
Register_BufferToImage
(
OperatorRegistry
*
op_registry
);
extern
void
Register_ChannelShuffle
(
OperatorRegistry
*
op_registry
);
extern
void
Register_Concat
(
OperatorRegistry
*
op_registry
);
extern
void
Register_Conv2D
(
OperatorRegistry
*
op_registry
);
extern
void
Register_DepthwiseConv2d
(
OperatorRegistry
*
op_registry
);
extern
void
Register_FusedConv2D
(
OperatorRegistry
*
op_registry
);
extern
void
Register_GlobalAvgPooling
(
OperatorRegistry
*
op_registry
);
extern
void
Register_ImageToBuffer
(
OperatorRegistry
*
op_registry
);
extern
void
Register_Pooling
(
OperatorRegistry
*
op_registry
);
extern
void
Register_Relu
(
OperatorRegistry
*
op_registry
);
extern
void
Register_ResizeBilinear
(
OperatorRegistry
*
op_registry
);
extern
void
Register_SpaceToBatchND
(
OperatorRegistry
*
op_registry
);
OperatorRegistry
::
OperatorRegistry
()
{
Register_AddN
(
this
);
Register_BatchNorm
(
this
);
Register_BatchToSpaceND
(
this
);
Register_BiasAdd
(
this
);
Register_BufferToImage
(
this
);
Register_ChannelShuffle
(
this
);
Register_Concat
(
this
);
Register_Conv2D
(
this
);
Register_DepthwiseConv2d
(
this
);
Register_FusedConv2D
(
this
);
Register_GlobalAvgPooling
(
this
);
Register_ImageToBuffer
(
this
);
Register_Pooling
(
this
);
Register_Relu
(
this
);
Register_ResizeBilinear
(
this
);
Register_SpaceToBatchND
(
this
);
}
}
// namespace mace
mace/core/operator.h
浏览文件 @
baf2dcd1
...
...
@@ -5,13 +5,13 @@
#ifndef MACE_CORE_OPERATOR_H
#define MACE_CORE_OPERATOR_H
#include "mace/core/common.h"
#include "mace/core/arg_helper.h"
#include "mace/core/common.h"
#include "mace/core/future.h"
#include "mace/core/public/mace.h"
#include "mace/core/registry.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/core/public/mace.h"
namespace
mace
{
...
...
@@ -102,7 +102,7 @@ class Operator : public OperatorBase {
}
}
}
virtual
bool
Run
(
StatsFuture
*
future
)
override
=
0
;
virtual
bool
Run
(
StatsFuture
*
future
)
override
=
0
;
~
Operator
()
noexcept
override
{}
};
...
...
@@ -122,29 +122,12 @@ class Operator : public OperatorBase {
#define OP_OUTPUT_TAGS(first_input, ...) \
enum _OutputTags { first_input = 0, __VA_ARGS__ }
typedef
Registry
<
std
::
string
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*>
OperatorRegistry
;
typedef
Registry
<
std
::
string
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*>
*
(
*
RegistryFunction
)();
std
::
map
<
int32_t
,
OperatorRegistry
*>
*
gDeviceTypeRegistry
();
struct
DeviceTypeRegisterer
{
explicit
DeviceTypeRegisterer
(
int32_t
type
,
RegistryFunction
func
)
{
if
(
gDeviceTypeRegistry
()
->
count
(
type
))
{
LOG
(
ERROR
)
<<
"Device type "
<<
type
<<
"registered twice. This should not happen. Did you have "
"duplicated numbers assigned to different devices?"
;
std
::
exit
(
1
);
}
// Calling the registry function to get the actual registry pointer.
gDeviceTypeRegistry
()
->
emplace
(
type
,
func
());
}
};
class
OpKeyBuilder
{
public:
explicit
OpKeyBuilder
(
const
char
*
op_name
);
OpKeyBuilder
&
Device
(
DeviceType
device
);
OpKeyBuilder
&
TypeConstraint
(
const
char
*
attr_name
,
const
DataType
allowed
);
template
<
typename
T
>
...
...
@@ -154,6 +137,7 @@ class OpKeyBuilder {
private:
std
::
string
op_name_
;
DeviceType
device_type_
;
std
::
map
<
std
::
string
,
DataType
>
type_constraint_
;
};
...
...
@@ -162,48 +146,30 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) {
return
this
->
TypeConstraint
(
attr_name
,
DataTypeToEnum
<
T
>::
value
);
}
class
OperatorRegistry
{
public:
typedef
Registry
<
std
::
string
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*>
RegistryType
;
OperatorRegistry
();
~
OperatorRegistry
()
=
default
;
RegistryType
*
registry
()
{
return
&
registry_
;
};
std
::
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
const
;
private:
RegistryType
registry_
;
DISABLE_COPY_AND_ASSIGN
(
OperatorRegistry
);
};
#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \
namespace { \
static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(DeviceType)( \
type, ®istry_function); \
}
MACE_DECLARE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_CPU_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_NEON_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY
(
OPENCLOperatorRegistry
,
MACE_DECLARE_REGISTRY
(
OpRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
#define REGISTER_OPENCL_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(OPENCLOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_OPENCL_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(OPENCLOperatorRegistry, name, __VA_ARGS__)
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
);
#define REGISTER_OPERATOR(op_registry, name, ...) \
MACE_REGISTER_CLASS(OpRegistry, op_registry->registry(), name, __VA_ARGS__)
}
// namespace mace
...
...
mace/core/public/mace.h
浏览文件 @
baf2dcd1
...
...
@@ -302,10 +302,12 @@ class NetDef {
class
Workspace
;
class
NetBase
;
class
OperatorRegistry
;
class
MaceEngine
{
public:
explicit
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
);
explicit
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
);
~
MaceEngine
();
bool
Run
(
const
float
*
input
,
const
std
::
vector
<
int64_t
>
&
input_shape
,
...
...
@@ -314,6 +316,7 @@ class MaceEngine {
MaceEngine
&
operator
=
(
const
MaceEngine
&
)
=
delete
;
private:
std
::
shared_ptr
<
OperatorRegistry
>
op_registry_
;
DeviceType
device_type_
;
std
::
unique_ptr
<
Workspace
>
ws_
;
std
::
unique_ptr
<
NetBase
>
net_
;
...
...
mace/core/registry.h
浏览文件 @
baf2dcd1
...
...
@@ -17,24 +17,27 @@ class Registry {
Registry
()
:
registry_
()
{}
void
Register
(
const
SrcType
&
key
,
Creator
creator
)
{
VLOG
(
2
)
<<
"Registering: "
<<
key
;
std
::
lock_guard
<
std
::
mutex
>
lock
(
register_mutex_
);
MACE_CHECK
(
registry_
.
count
(
key
)
==
0
,
"Key already registered."
);
registry_
[
key
]
=
creator
;
}
inline
bool
Has
(
const
SrcType
&
key
)
{
return
registry_
.
count
(
key
)
!=
0
;
}
inline
bool
Has
(
const
SrcType
&
key
)
const
{
return
registry_
.
count
(
key
)
!=
0
;
}
unique_ptr
<
ObjectType
>
Create
(
const
SrcType
&
key
,
Args
...
args
)
{
unique_ptr
<
ObjectType
>
Create
(
const
SrcType
&
key
,
Args
...
args
)
const
{
if
(
registry_
.
count
(
key
)
==
0
)
{
LOG
(
FATAL
)
<<
"Key not registered: "
<<
key
;
}
return
registry_
[
key
]
(
args
...);
return
registry_
.
at
(
key
)
(
args
...);
}
/**
* Returns the keys currently registered as a vector.
*/
vector
<
SrcType
>
Keys
()
{
vector
<
SrcType
>
Keys
()
const
{
vector
<
SrcType
>
keys
;
for
(
const
auto
&
it
:
registry_
)
{
keys
.
push_back
(
it
.
first
);
...
...
@@ -77,39 +80,31 @@ class Registerer {
typedef Registerer<SrcType, ObjectType, ##__VA_ARGS__> \
Registerer##RegistryName;
/*
#define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \
Registry<SrcType, ObjectType, ##__VA_ARGS__> *RegistryName() { \
static Registry<SrcType, ObjectType, ##__VA_ARGS__> *registry = \
new Registry<SrcType, ObjectType, ##__VA_ARGS__>(); \
return registry; \
}
*/
#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
##__VA_ARGS__)
/*
#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DEFINE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
##__VA_ARGS__)
*/
#define MACE_REGISTER_TYPED_CREATOR(RegistryName, key, ...) \
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, RegistryName(), __VA_ARGS__);
#define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, \
RegistryName(), \
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \
}
#define MACE_REGISTER_CREATOR(RegistryName, key, ...) \
MACE_REGISTER_TYPED_CREATOR(RegistryName, key, __VA_ARGS__)
#define MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, ...) \
Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(l_##RegistryName)( \
key, registry, Registerer##RegistryName::DefaultCreator<__VA_ARGS__>);
#define MACE_REGISTER_CLASS(RegistryName, key, ...) \
MACE_REGISTER_TYPED_CLASS(RegistryName, key, __VA_ARGS__)
#define MACE_REGISTER_CLASS(RegistryName,
registry,
key, ...) \
MACE_REGISTER_TYPED_CLASS(RegistryName,
registry,
key, __VA_ARGS__)
}
// namespace mace
...
...
mace/core/runtime/opencl/opencl_allocator.cc
浏览文件 @
baf2dcd1
...
...
@@ -127,6 +127,4 @@ void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) {
bool
OpenCLAllocator
::
OnHost
()
{
return
false
;
}
MACE_REGISTER_ALLOCATOR
(
DeviceType
::
OPENCL
,
new
OpenCLAllocator
());
}
// namespace mace
mace/core/tensor.h
浏览文件 @
baf2dcd1
...
...
@@ -105,7 +105,8 @@ class Tensor {
inline
index_t
dim_size
()
const
{
return
shape_
.
size
();
}
inline
index_t
dim
(
unsigned
int
index
)
const
{
MACE_CHECK
(
index
<
shape_
.
size
(),
"Exceeding ndim limit"
);
MACE_CHECK
(
index
<
shape_
.
size
(),
"Dim out of range: "
,
index
,
" >= "
,
shape_
.
size
());
return
shape_
[
index
];
}
...
...
mace/examples/BUILD
浏览文件 @
baf2dcd1
...
...
@@ -11,7 +11,6 @@ cc_binary(
deps
=
[
"//mace/core"
,
"//mace/ops"
,
"//mace/core:opencl_runtime"
,
],
)
...
...
mace/kernels/BUILD
浏览文件 @
baf2dcd1
...
...
@@ -26,7 +26,6 @@ cc_library(
linkopts
=
if_android
([
"-lm"
]),
deps
=
[
"//mace/core"
,
"//mace/core:opencl_runtime"
,
"//mace/utils:utils_hdrs"
,
],
)
...
...
mace/ops/addn.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,26 +6,32 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"AddN"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
AddNOp
<
DeviceType
::
CPU
,
float
>
);
void
Register_AddN
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"AddN"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
AddNOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"AddN"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
AddNOp
<
DeviceType
::
NEON
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"AddN"
)
.
Device
(
DeviceType
::
NEON
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
AddNOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"AddN"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
AddNOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"AddN"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
AddNOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"AddN"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
AddNOp
<
DeviceType
::
OPENCL
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"AddN"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
AddNOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/addn_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -15,8 +15,8 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
OpsTestNet
net
;
// Add input data
for
(
int
i
=
0
;
i
<
inputs
;
++
i
)
{
net
.
AddRandomInput
<
D
,
float
>
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
(),
{
n
,
h
,
w
,
c
});
net
.
AddRandomInput
<
D
,
float
>
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
(),
{
n
,
h
,
w
,
c
});
}
if
(
D
==
DeviceType
::
OPENCL
)
{
...
...
@@ -30,16 +30,16 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
op_def_builder
.
Input
(
internal
::
MakeString
(
"InputImage"
,
i
).
c_str
());
}
op_def_builder
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
op_def_builder
(
"AddN"
,
"AddNBM"
);
for
(
int
i
=
0
;
i
<
inputs
;
++
i
)
{
op_def_builder
.
Input
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
());
}
op_def_builder
.
Output
(
"Output"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
// Warm-up
...
...
mace/ops/batch_norm.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,26 +6,32 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"BatchNorm"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BatchNormOp
<
DeviceType
::
CPU
,
float
>
);
void
Register_BatchNorm
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchNorm"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BatchNormOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"BatchNorm"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BatchNormOp
<
DeviceType
::
NEON
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchNorm"
)
.
Device
(
DeviceType
::
NEON
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BatchNormOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BatchNorm"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BatchNormOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchNorm"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BatchNormOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BatchNorm"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BatchNormOp
<
DeviceType
::
OPENCL
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchNorm"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BatchNormOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/batch_norm_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -23,11 +23,16 @@ static void BatchNorm(
net
.
AddRandomInput
<
D
,
T
>
(
"Var"
,
{
channels
},
true
);
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormBM"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
...
...
@@ -37,8 +42,7 @@ static void BatchNorm(
.
AddFloatArg
(
"epsilon"
,
1e-3
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
}
else
{
OpDefBuilder
(
"BatchNorm"
,
"BatchNormBM"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
...
...
@@ -50,7 +54,6 @@ static void BatchNorm(
.
Finalize
(
net
.
NewOperatorDef
());
}
// tuning
setenv
(
"MACE_TUNING"
,
"1"
,
1
);
net
.
RunOp
(
D
);
...
...
@@ -79,9 +82,8 @@ static void BatchNorm(
} \
BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_BATCH_NORM(N, C, H, W, TYPE) \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON);\
#define BM_BATCH_NORM(N, C, H, W, TYPE) \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, OPENCL);
BM_BATCH_NORM
(
1
,
1
,
512
,
512
,
float
);
...
...
mace/ops/batch_norm_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -15,18 +15,23 @@ void Simple() {
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
6
,
2
,
1
},
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Scale"
,
{
1
},
{
4.0
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Offset"
,
{
1
},
{
2.0
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Mean"
,
{
1
},
{
10
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Var"
,
{
1
},
{
11.67
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -41,7 +46,8 @@ void Simple() {
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
...
...
@@ -64,9 +70,7 @@ void Simple() {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
TEST_F
(
BatchNormOpTest
,
SimpleCPU
)
{
Simple
<
DeviceType
::
CPU
>
();
}
TEST_F
(
BatchNormOpTest
,
SimpleCPU
)
{
Simple
<
DeviceType
::
CPU
>
();
}
/*
TEST_F(BatchNormOpTest, SimpleNEON) {
...
...
@@ -74,9 +78,7 @@ TEST_F(BatchNormOpTest, SimpleNEON) {
}
*/
TEST_F
(
BatchNormOpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
();
}
TEST_F
(
BatchNormOpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
();
}
/*
TEST_F(BatchNormOpTest, SimpleRandomNeon) {
...
...
@@ -100,7 +102,8 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) {
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height,
width});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
...
...
@@ -141,7 +144,8 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) {
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height,
width});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
...
...
@@ -184,7 +188,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
...
...
@@ -198,11 +203,16 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -223,7 +233,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
...
...
@@ -249,7 +260,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
...
...
@@ -263,11 +275,16 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -289,7 +306,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
}
...
...
@@ -315,7 +333,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
...
...
@@ -328,13 +347,17 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -355,7 +378,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
...
...
@@ -381,7 +405,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
...
...
@@ -394,13 +419,17 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -422,7 +451,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
}
}
mace/ops/batch_to_space.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,13 +6,17 @@
namespace
mace
{
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BatchToSpaceND"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BatchToSpaceNDOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BatchToSpaceND"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BatchToSpaceNDOp
<
DeviceType
::
OPENCL
,
half
>
);
void
Register_BatchToSpaceND
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchToSpaceND"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BatchToSpaceNDOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchToSpaceND"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BatchToSpaceNDOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/batch_to_space_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -14,7 +14,8 @@ static void BMBatchToSpace(
OpsTestNet
net
;
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"BatchToSpaceND"
,
"BatchToSpaceNDTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
...
...
@@ -36,16 +37,17 @@ static void BMBatchToSpace(
}
#define BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, DEVICE) \
static void BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMBatchToSpace<DEVICE, TYPE>(iters, N, C, H, W, ARG); \
} \
static void \
BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMBatchToSpace<DEVICE, TYPE>(iters, N, C, H, W, ARG); \
} \
BENCHMARK(BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE)
#define BM_BATCH_TO_SPACE(N, H, W, C, ARG, TYPE)
\
#define BM_BATCH_TO_SPACE(N, H, W, C, ARG, TYPE) \
BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, OPENCL);
BM_BATCH_TO_SPACE
(
128
,
8
,
8
,
128
,
2
,
float
);
...
...
mace/ops/bias_add.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,28 +6,34 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"BiasAdd"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BiasAddOp
<
DeviceType
::
CPU
,
float
>
);
void
Register_BiasAdd
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BiasAdd"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BiasAddOp
<
DeviceType
::
CPU
,
float
>
);
/*
#if __ARM_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("BiasAdd")
.TypeConstraint<float>("T")
.Build(),
BiasAddOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
*/
/*
#if __ARM_NEON
REGISTER_OPERATOR(op_registry,OpKeyBuilder("BiasAdd")
.Device(DeviceType::NEON)
.TypeConstraint<float>("T")
.Build(),
BiasAddOp<DeviceType::NEON, float>);
#endif // __ARM_NEON
*/
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BiasAdd"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BiasAddOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BiasAdd"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BiasAddOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BiasAdd"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BiasAddOp
<
DeviceType
::
OPENCL
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BiasAdd"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BiasAddOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/bias_add_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -9,8 +9,7 @@
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
static
void
BiasAdd
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
static
void
BiasAdd
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
...
...
@@ -20,15 +19,16 @@ static void BiasAdd(
net
.
AddRandomInput
<
D
,
T
>
(
"Bias"
,
{
channels
},
true
);
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BiasAdd"
,
"BiasAddBM"
)
.
Input
(
"InputImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
}
else
{
OpDefBuilder
(
"BiasAdd"
,
"BiasAddBM"
)
.
Input
(
"Input"
)
.
Input
(
"Bias"
)
...
...
@@ -51,12 +51,12 @@ static void BiasAdd(
#define BM_BIAS_ADD_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) {
\
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W;
\
mace::testing::ItemsProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE)));
\
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BiasAdd<DEVICE, TYPE>(iters, N, C, H, W); \
}
\
} \
BENCHMARK(BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_BIAS_ADD(N, C, H, W, TYPE) \
...
...
mace/ops/bias_add_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -15,12 +15,14 @@ void BiasAddSimple() {
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
6
,
2
,
1
},
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
1
},
{
0.5
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -31,7 +33,8 @@ void BiasAddSimple() {
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
.
Input
(
"Input"
)
...
...
@@ -43,16 +46,14 @@ void BiasAddSimple() {
}
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
6
,
2
,
1
},
{
5.5
,
5.5
,
7.5
,
7.5
,
9.5
,
9.5
,
11.5
,
11.5
,
13.5
,
13.5
,
15.5
,
15.5
});
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
6
,
2
,
1
}
,
{
5.5
,
5.5
,
7.5
,
7.5
,
9.5
,
9.5
,
11.5
,
11.5
,
13.5
,
13.5
,
15.5
,
15.5
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
TEST_F
(
BiasAddOpTest
,
BiasAddSimpleCPU
)
{
BiasAddSimple
<
DeviceType
::
CPU
>
();
}
TEST_F
(
BiasAddOpTest
,
BiasAddSimpleCPU
)
{
BiasAddSimple
<
DeviceType
::
CPU
>
();
}
TEST_F
(
BiasAddOpTest
,
BiasAddSimpleOPENCL
)
{
BiasAddSimple
<
DeviceType
::
OPENCL
>
();
...
...
@@ -76,7 +77,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Bias"
,
{
channels
},
true
);
// run cpu
...
...
@@ -87,8 +89,10 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -100,7 +104,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
...
...
@@ -122,7 +127,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Bias"
,
{
channels
},
true
);
// run cpu
...
...
@@ -132,10 +138,11 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -147,8 +154,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
}
mace/ops/buffer_to_image.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,14 +6,18 @@
namespace
mace
{
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BufferToImage"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BufferToImageOp
<
DeviceType
::
OPENCL
,
float
>
);
void
Register_BufferToImage
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BufferToImage"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BufferToImageOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BufferToImage"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BufferToImageOp
<
DeviceType
::
OPENCL
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BufferToImage"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BufferToImageOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/channel_shuffle.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,9 +6,12 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"ChannelShuffle"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ChannelShuffleOp
<
DeviceType
::
CPU
,
float
>
);
void
Register_ChannelShuffle
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ChannelShuffle"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ChannelShuffleOp
<
DeviceType
::
CPU
,
float
>
);
}
}
// namespace mace
mace/ops/channel_shuffle_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -23,7 +23,8 @@ static void ChannelShuffle(
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
...
mace/ops/channel_shuffle_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -17,7 +17,6 @@ TEST_F(ChannelShuffleOpTest, C8G4) {
.
AddIntArg
(
"group"
,
4
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
8
,
1
,
2
},
...
...
mace/ops/concat.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,21 +6,28 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
CPU
,
float
>
);
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
CPU
,
half
>
);
void
Register_Concat
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
CPU
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
CPU
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
OPENCL
,
half
>
);
}
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
OPENCL
,
half
>
);
}
// namespace mace
mace/ops/concat_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -60,8 +60,10 @@ static void OpenclConcatHelper(int iters,
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input0"
,
shape0
);
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input1"
,
shape1
);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"Input0"
,
"InputImage0"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"Input1"
,
"InputImage1"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"Input0"
,
"InputImage0"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"Input1"
,
"InputImage1"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Concat"
,
"ConcatBM"
)
.
Input
(
"InputImage0"
)
.
Input
(
"InputImage1"
)
...
...
@@ -75,7 +77,8 @@ static void OpenclConcatHelper(int iters,
net
.
RunOp
(
DeviceType
::
OPENCL
);
}
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
(
net
.
GetTensor
(
"Input0"
)
->
size
()
+
net
.
GetTensor
(
"Input1"
)
->
size
());
mace
::
testing
::
ItemsProcessed
(
tot
);
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
...
...
mace/ops/concat_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -97,7 +97,9 @@ TEST_F(ConcatOpTest, CPURandom) {
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
builder
=
builder
.
Input
((
"Input"
+
ToString
(
i
)).
c_str
());
}
builder
.
AddIntArg
(
"axis"
,
axis
).
Output
(
"Output"
).
Finalize
(
net
.
NewOperatorDef
());
builder
.
AddIntArg
(
"axis"
,
axis
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
std
::
vector
<
index_t
>
shape_data
;
GenerateRandomIntTypeData
<
index_t
>
({
dim
},
shape_data
,
1
,
dim
);
...
...
@@ -110,8 +112,8 @@ TEST_F(ConcatOpTest, CPURandom) {
concat_axis_size
+=
input_shapes
[
i
][
axis
];
GenerateRandomRealTypeData
(
input_shapes
[
i
],
inputs
[
i
]);
input_ptrs
[
i
]
=
inputs
[
i
].
data
();
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
(
"Input"
+
ToString
(
i
)).
c_str
(),
input_shapes
[
i
],
inputs
[
i
]);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
(
"Input"
+
ToString
(
i
)).
c_str
(),
input_shapes
[
i
],
inputs
[
i
]);
}
// Run
...
...
@@ -137,7 +139,7 @@ TEST_F(ConcatOpTest, CPURandom) {
}
}
template
<
typename
T
>
template
<
typename
T
>
void
OpenclRandomTest
(
const
std
::
vector
<
std
::
vector
<
index_t
>>
&
shapes
,
const
int
axis
)
{
srand
(
time
(
nullptr
));
...
...
@@ -149,9 +151,9 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
const
std
::
string
input_name
=
(
"Input"
+
ToString
(
i
)).
c_str
();
const
std
::
string
image_name
=
(
"InputImage"
+
ToString
(
i
)).
c_str
();
concat_axis_size
+=
shapes
[
i
][
axis
];
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
input_name
,
shapes
[
i
]);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
input_name
,
image_name
,
kernels
::
BufferType
::
IN_OUT
);
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
input_name
,
shapes
[
i
]);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
input_name
,
image_name
,
kernels
::
BufferType
::
IN_OUT
);
}
auto
builder
=
OpDefBuilder
(
"Concat"
,
"ConcatTest"
);
...
...
@@ -167,7 +169,8 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
// Run
net
.
RunOp
(
DeviceType
::
OPENCL
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
// Check
auto
output
=
net
.
GetOutput
(
"Output"
);
...
...
@@ -182,15 +185,16 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
while
(
output_ptr
!=
(
output
->
data
<
float
>
()
+
output
->
size
()))
{
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
index_t
num_elements
=
std
::
accumulate
(
shapes
[
i
].
begin
()
+
axis
,
shapes
[
i
].
end
(),
1
,
std
::
multiplies
<
index_t
>
());
std
::
accumulate
(
shapes
[
i
].
begin
()
+
axis
,
shapes
[
i
].
end
(),
1
,
std
::
multiplies
<
index_t
>
());
const
std
::
string
input_name
=
(
"Input"
+
ToString
(
i
)).
c_str
();
const
Tensor
*
input_tensor
=
net
.
GetTensor
(
input_name
.
data
());
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
const
float
*
input_ptr
=
input_tensor
->
data
<
float
>
()
+
k
*
num_elements
;
for
(
int
j
=
0
;
j
<
num_elements
;
++
j
)
{
EXPECT_NEAR
(
*
(
input_ptr
+
j
),
*
output_ptr
++
,
1e-2
)
<<
"With index: "
<<
i
<<
", "
<<
j
;
EXPECT_NEAR
(
*
(
input_ptr
+
j
),
*
output_ptr
++
,
1e-2
)
<<
"With index: "
<<
i
<<
", "
<<
j
;
}
}
k
++
;
...
...
@@ -198,25 +202,13 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
}
TEST_F
(
ConcatOpTest
,
OPENCLAligned
)
{
OpenclRandomTest
<
float
>
({
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
64
}
},
3
);
OpenclRandomTest
<
float
>
({{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
64
}},
3
);
}
TEST_F
(
ConcatOpTest
,
OPENCLHalfAligned
)
{
OpenclRandomTest
<
half
>
({
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
64
}
},
3
);
OpenclRandomTest
<
half
>
({{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
64
}},
3
);
}
TEST_F
(
ConcatOpTest
,
OPENCLUnAligned
)
{
OpenclRandomTest
<
float
>
({
{
3
,
32
,
32
,
13
},
{
3
,
32
,
32
,
17
}
},
3
);
OpenclRandomTest
<
float
>
({{
3
,
32
,
32
,
13
},
{
3
,
32
,
32
,
17
}},
3
);
}
mace/ops/conv_2d.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,31 +6,38 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
CPU
,
float
>
);
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
CPU
,
half
>
);
void
Register_Conv2D
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Conv2D"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
CPU
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Conv2D"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
CPU
,
half
>
);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
NEON
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Conv2D"
)
.
Device
(
DeviceType
::
NEON
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Conv2D"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Conv2D"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/conv_2d_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -33,9 +33,12 @@ static void Conv2d(int iters,
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
output_channels
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
...
...
@@ -89,7 +92,7 @@ static void Conv2d(int iters,
BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE)
\
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL);
// ICNet
...
...
@@ -106,28 +109,29 @@ BM_CONV_2D(1, 3, 512, 512, 7, 7, 2, SAME, 64, half);
BM_CONV_2D
(
1
,
512
,
64
,
64
,
1
,
1
,
1
,
SAME
,
256
,
half
);
// Test RGB <-> YUV
//BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float);
//BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float);
//
BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float);
//
BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float);
//
//BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad alignments
//BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float);
//BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float);
//BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float);
//BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float);
//BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float);
//BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float);
//BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad
// alignments
// BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float);
// BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float);
// BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float);
// BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float);
// BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float);
// BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float);
// BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float);
}
// namespace mace
mace/ops/conv_2d_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -10,7 +10,7 @@ using namespace mace;
class
Conv2dOpTest
:
public
OpsTestBase
{};
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestSimple3x3VALID
()
{
OpsTestNet
net
;
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
...
@@ -42,10 +42,9 @@ void TestSimple3x3VALID() {
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
1
,
1
},
{
18.1
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestSimple3x3SAME
()
{
OpsTestNet
net
;
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
...
@@ -86,7 +85,7 @@ TEST_F(Conv2dOpTest, NEONSimple) {
}
#endif
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3VALID
()
{
OpsTestNet
net
;
// Add input data
...
...
@@ -100,9 +99,12 @@ void TestNHWCSimple3x3VALID() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
0.1
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
...
...
@@ -117,7 +119,8 @@ void TestNHWCSimple3x3VALID() {
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
...
@@ -138,7 +141,7 @@ void TestNHWCSimple3x3VALID() {
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3SAME
()
{
OpsTestNet
net
;
...
...
@@ -153,9 +156,12 @@ void TestNHWCSimple3x3SAME() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
0.1
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
...
...
@@ -170,7 +176,8 @@ void TestNHWCSimple3x3SAME() {
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
...
@@ -204,7 +211,7 @@ TEST_F(Conv2dOpTest, OPENCLSimple) {
TestNHWCSimple3x3SAME
<
DeviceType
::
OPENCL
,
float
>
();
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestSimple3x3WithoutBias
()
{
OpsTestNet
net
;
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
...
@@ -234,14 +241,13 @@ void TestSimple3x3WithoutBias() {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
#ifdef __ARM_NEON
TEST_F
(
Conv2dOpTest
,
NEONWithouBias
)
{
TestSimple3x3WithoutBias
<
DeviceType
::
NEON
>
();
}
#endif
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3WithoutBias
()
{
OpsTestNet
net
;
...
...
@@ -255,8 +261,10 @@ void TestNHWCSimple3x3WithoutBias() {
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -270,7 +278,8 @@ void TestNHWCSimple3x3WithoutBias() {
// Run
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
...
...
@@ -300,7 +309,7 @@ TEST_F(Conv2dOpTest, OPENCLWithoutBias) {
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
OPENCL
,
float
>
();
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
static
void
TestCombined3x3
()
{
// Construct graph
OpsTestNet
net
;
...
...
@@ -335,17 +344,13 @@ static void TestCombined3x3() {
4.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
9.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
4.2
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
#ifdef __ARM_NEON
TEST_F
(
Conv2dOpTest
,
NEONCombined
)
{
TestCombined3x3
<
DeviceType
::
NEON
>
();
}
TEST_F
(
Conv2dOpTest
,
NEONCombined
)
{
TestCombined3x3
<
DeviceType
::
NEON
>
();
}
#endif
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
TestNHWCCombined3x3
()
{
// Construct graph
OpsTestNet
net
;
...
...
@@ -353,8 +358,8 @@ static void TestNHWCCombined3x3() {
// Add input data
net
.
AddInputFromArray
<
D
,
T
>
(
"Input"
,
{
1
,
5
,
5
,
2
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
D
,
T
>
(
"Filter"
,
{
3
,
3
,
2
,
2
},
{
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
...
...
@@ -363,9 +368,12 @@ static void TestNHWCCombined3x3() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -380,7 +388,8 @@ static void TestNHWCCombined3x3() {
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"Input"
)
...
...
@@ -394,16 +403,13 @@ static void TestNHWCCombined3x3() {
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
}
// Check
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
3
,
3
,
2
},
{
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
18.1
f
,
9.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
});
{
1
,
3
,
3
,
2
},
{
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
18.1
f
,
9.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
});
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
TEST_F
(
Conv2dOpTest
,
CPUStride2
)
{
...
...
@@ -414,7 +420,7 @@ TEST_F(Conv2dOpTest, OPENCLStride2) {
TestNHWCCombined3x3
<
DeviceType
::
OPENCL
,
float
>
();
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestConv1x1
()
{
// Construct graph
OpsTestNet
net
;
...
...
@@ -435,9 +441,12 @@ void TestConv1x1() {
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -451,7 +460,8 @@ void TestConv1x1() {
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"Input"
)
...
...
@@ -479,15 +489,11 @@ void TestConv1x1() {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
Conv2dOpTest
,
CPUConv1x1
)
{
TestConv1x1
<
DeviceType
::
CPU
>
();
}
TEST_F
(
Conv2dOpTest
,
CPUConv1x1
)
{
TestConv1x1
<
DeviceType
::
CPU
>
();
}
TEST_F
(
Conv2dOpTest
,
OPENCLConv1x1
)
{
TestConv1x1
<
DeviceType
::
OPENCL
>
();
}
TEST_F
(
Conv2dOpTest
,
OPENCLConv1x1
)
{
TestConv1x1
<
DeviceType
::
OPENCL
>
();
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
TestComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
...
...
@@ -526,9 +532,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -543,7 +552,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
};
...
...
@@ -592,15 +602,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
.
Finalize
(
net
.
NewOperatorDef
());
std
::
vector
<
float
>
float_input_data
;
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
input_channels
},
float_input_data
);
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
input_channels
},
float_input_data
);
std
::
vector
<
float
>
float_filter_data
;
GenerateRandomRealTypeData
({
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
GenerateRandomRealTypeData
(
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
std
::
vector
<
float
>
float_bias_data
;
GenerateRandomRealTypeData
({
output_channels
},
float_bias_data
);
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
},
float_input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
"Input"
,
{
batch
,
height
,
width
,
input_channels
},
float_input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
output_channels
},
float_bias_data
);
// run on cpu
...
...
@@ -610,9 +625,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
BufferToImage
<
D
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
half
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
half
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
half
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
half
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -627,7 +645,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
};
...
...
mace/ops/core_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -7,7 +7,6 @@
namespace
mace
{
TEST
(
CoreTest
,
INIT_MODE
)
{
std
::
vector
<
OperatorDef
>
op_defs
;
Workspace
ws
;
...
...
@@ -18,10 +17,11 @@ TEST(CoreTest, INIT_MODE) {
.
Output
(
"B2IOutput"
)
.
AddIntArg
(
"buffer_type"
,
kernels
::
BufferType
::
FILTER
)
.
AddIntArg
(
"mode"
,
static_cast
<
int
>
(
NetMode
::
INIT
))
.
Finalize
(
&
op_defs
[
op_defs
.
size
()
-
1
]);
.
Finalize
(
&
op_defs
[
op_defs
.
size
()
-
1
]);
Tensor
*
input
=
ws
.
CreateTensor
(
"Input"
,
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataTypeToEnum
<
float
>::
v
());
ws
.
CreateTensor
(
"Input"
,
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataTypeToEnum
<
float
>::
v
());
input
->
Resize
({
1
,
3
,
3
,
3
});
{
Tensor
::
MappingGuard
input_mapper
(
input
);
...
...
@@ -34,23 +34,26 @@ TEST(CoreTest, INIT_MODE) {
.
Input
(
"B2IOutput"
)
.
Output
(
"Output"
)
.
AddIntArg
(
"buffer_type"
,
kernels
::
BufferType
::
FILTER
)
.
Finalize
(
&
op_defs
[
op_defs
.
size
()
-
1
]);
.
Finalize
(
&
op_defs
[
op_defs
.
size
()
-
1
]);
NetDef
net_def
;
for
(
auto
&
op_def
:
op_defs
)
{
net_def
.
add_op
()
->
CopyFrom
(
op_def
);
}
auto
net
=
CreateNet
(
net_def
,
&
ws
,
DeviceType
::
OPENCL
,
NetMode
::
INIT
);
std
::
shared_ptr
<
OperatorRegistry
>
op_registry
(
new
OperatorRegistry
());
auto
net
=
CreateNet
(
op_registry
,
net_def
,
&
ws
,
DeviceType
::
OPENCL
,
NetMode
::
INIT
);
net
->
Run
();
EXPECT_TRUE
(
ws
.
GetTensor
(
"B2IOutput"
)
!=
nullptr
);
EXPECT_TRUE
(
ws
.
GetTensor
(
"Output"
)
==
nullptr
);
net
=
CreateNet
(
net_def
,
&
ws
,
DeviceType
::
OPENCL
);
net
=
CreateNet
(
op_registry
,
net_def
,
&
ws
,
DeviceType
::
OPENCL
);
net
->
Run
();
EXPECT_TRUE
(
ws
.
GetTensor
(
"Output"
)
!=
nullptr
);
ExpectTensorNear
<
float
>
(
*
ws
.
GetTensor
(
"Input"
),
*
ws
.
GetTensor
(
"Output"
),
1e-5
);
ExpectTensorNear
<
float
>
(
*
ws
.
GetTensor
(
"Input"
),
*
ws
.
GetTensor
(
"Output"
),
1e-5
);
}
}
// namespace mace
}
// namespace mace
mace/ops/depthwise_conv2d.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,21 +6,26 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"DepthwiseConv2d"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
DepthwiseConv2dOp
<
DeviceType
::
CPU
,
float
>
);
void
Register_DepthwiseConv2d
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"DepthwiseConv2d"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
DepthwiseConv2dOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"DepthwiseConv2d"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
DepthwiseConv2dOp
<
DeviceType
::
NEON
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"DepthwiseConv2d"
)
.
Device
(
DeviceType
::
NEON
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
DepthwiseConv2dOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"DepthwiseConv2d"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
DepthwiseConv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"DepthwiseConv2d"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
DepthwiseConv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
}
}
// namespace mace
mace/ops/depthwise_conv2d_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -26,7 +26,7 @@ void SimpleValidTest() {
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
2
,
2
,
3
},
{
1
,
3
,
5
,
7
,
9
,
11
,
2
,
4
,
6
,
8
,
10
,
12
});
{
1
,
3
,
5
,
7
,
9
,
11
,
2
,
4
,
6
,
8
,
10
,
12
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
2
,
2
,
2
,
2
},
{
1.0
f
,
5.0
f
,
9.0
f
,
13.0
f
,
2.0
f
,
6.0
f
,
10.0
f
,
14.0
f
,
3.0
f
,
7.0
f
,
11.0
f
,
...
...
@@ -41,12 +41,9 @@ void SimpleValidTest() {
{
196.1
f
,
252.1
f
,
216.2
f
,
280.2
f
,
272.3
f
,
344.3
f
,
296.4
f
,
376.4
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
TEST_F
(
DepthwiseConv2dOpTest
,
SimpleCPU
)
{
SimpleValidTest
<
DeviceType
::
CPU
>
();
}
TEST_F
(
DepthwiseConv2dOpTest
,
SimpleCPU
)
{
SimpleValidTest
<
DeviceType
::
CPU
>
();
}
template
<
DeviceType
D
>
void
TestNxNS12
(
const
index_t
height
,
const
index_t
width
)
{
...
...
@@ -72,8 +69,10 @@ void TestNxNS12(const index_t height, const index_t width) {
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
input_channels
,
height
,
width
});
net
.
AddRandomInput
<
D
,
float
>
(
"Filter"
,
{
multiplier
,
input_channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
input_channels
,
height
,
width
});
net
.
AddRandomInput
<
D
,
float
>
(
"Filter"
,
{
multiplier
,
input_channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
multiplier
*
input_channels
});
// Run on device
net
.
RunOp
(
D
);
...
...
@@ -93,7 +92,6 @@ void TestNxNS12(const index_t height, const index_t width) {
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
SAME
);
}
}
}
#if __ARM_NEON
...
...
mace/ops/depthwise_conv_2d_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -38,8 +38,8 @@ static void DepthwiseConv2d(int iters,
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
D
,
float
>
(
"Filter"
,
{
output_channels
,
channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
output_channels
*
channels
});
{
output_channels
,
channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
output_channels
*
channels
});
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
...
@@ -54,23 +54,22 @@ static void DepthwiseConv2d(int iters,
net
.
Sync
();
}
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE,
\
DEVICE)
\
static void
\
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \
DEVICE) \
static void \
BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) {
\
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W;
\
mace::testing::ItemsProcessed(tot);
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE)));
\
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE,
\
mace::Padding::P, OC);
\
}
\
BENCHMARK(
\
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
mace::Padding::P, OC); \
} \
BENCHMARK( \
BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON);\
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL);
BM_DEPTHWISE_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
1
,
VALID
,
2
,
float
);
...
...
mace/ops/fused_conv_2d.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,25 +6,30 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"FusedConv2D"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
FusedConv2dOp
<
DeviceType
::
CPU
,
float
>
);
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"FusedConv2D"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
FusedConv2dOp
<
DeviceType
::
CPU
,
half
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"FusedConv2D"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
FusedConv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"FusedConv2D"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
FusedConv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
void
Register_FusedConv2D
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FusedConv2D"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
FusedConv2dOp
<
DeviceType
::
CPU
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FusedConv2D"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
FusedConv2dOp
<
DeviceType
::
CPU
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FusedConv2D"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
FusedConv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FusedConv2D"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
FusedConv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/fused_conv_2d_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -9,7 +9,7 @@ using namespace mace;
class
FusedConv2dOpTest
:
public
OpsTestBase
{};
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3VALID
()
{
OpsTestNet
net
;
// Add input data
...
...
@@ -23,9 +23,12 @@ void TestNHWCSimple3x3VALID() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
-
0.1
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
...
...
@@ -40,7 +43,8 @@ void TestNHWCSimple3x3VALID() {
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
...
...
@@ -61,7 +65,7 @@ void TestNHWCSimple3x3VALID() {
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3SAME
()
{
OpsTestNet
net
;
...
...
@@ -76,9 +80,12 @@ void TestNHWCSimple3x3SAME() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
-
0.1
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
...
...
@@ -93,7 +100,8 @@ void TestNHWCSimple3x3SAME() {
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
...
...
@@ -111,8 +119,7 @@ void TestNHWCSimple3x3SAME() {
}
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
3
,
3
,
1
},
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
});
{
1
,
3
,
3
,
1
},
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
});
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
...
...
@@ -127,7 +134,7 @@ TEST_F(FusedConv2dOpTest, OPENCLSimple) {
TestNHWCSimple3x3SAME
<
DeviceType
::
OPENCL
,
float
>
();
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3WithoutBias
()
{
OpsTestNet
net
;
...
...
@@ -141,8 +148,10 @@ void TestNHWCSimple3x3WithoutBias() {
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -156,7 +165,8 @@ void TestNHWCSimple3x3WithoutBias() {
// Run
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"Input"
)
...
...
@@ -186,7 +196,7 @@ TEST_F(FusedConv2dOpTest, OPENCLWithoutBias) {
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
OPENCL
,
float
>
();
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestConv1x1
()
{
// Construct graph
OpsTestNet
net
;
...
...
@@ -207,9 +217,12 @@ void TestConv1x1() {
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -223,7 +236,8 @@ void TestConv1x1() {
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"Input"
)
...
...
@@ -251,15 +265,11 @@ void TestConv1x1() {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
FusedConv2dOpTest
,
CPUConv1x1
)
{
TestConv1x1
<
DeviceType
::
CPU
>
();
}
TEST_F
(
FusedConv2dOpTest
,
CPUConv1x1
)
{
TestConv1x1
<
DeviceType
::
CPU
>
();
}
TEST_F
(
FusedConv2dOpTest
,
OPENCLConv1x1
)
{
TestConv1x1
<
DeviceType
::
OPENCL
>
();
}
TEST_F
(
FusedConv2dOpTest
,
OPENCLConv1x1
)
{
TestConv1x1
<
DeviceType
::
OPENCL
>
();
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
TestComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
...
...
@@ -298,9 +308,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -315,7 +328,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
};
...
...
@@ -331,7 +345,7 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) {
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
107
,
113
,
5
,
7
});
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
static
void
TestHalfComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
...
...
@@ -357,15 +371,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
.
Finalize
(
net
.
NewOperatorDef
());
std
::
vector
<
float
>
float_input_data
;
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
input_channels
},
float_input_data
);
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
input_channels
},
float_input_data
);
std
::
vector
<
float
>
float_filter_data
;
GenerateRandomRealTypeData
({
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
GenerateRandomRealTypeData
(
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
std
::
vector
<
float
>
float_bias_data
;
GenerateRandomRealTypeData
({
output_channels
},
float_bias_data
);
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
},
float_input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
"Input"
,
{
batch
,
height
,
width
,
input_channels
},
float_input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
output_channels
},
float_bias_data
);
// run on cpu
...
...
@@ -375,9 +394,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
BufferToImage
<
D
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
half
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
half
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
half
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
half
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -392,7 +414,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.2
);
};
...
...
@@ -408,7 +431,7 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) {
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
,
32
,
64
});
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
TestGeneralConvNxNS12
(
const
std
::
vector
<
index_t
>
&
image_shape
,
const
std
::
vector
<
index_t
>
&
filter_shape
)
{
testing
::
internal
::
LogToStderr
();
...
...
@@ -449,9 +472,12 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -466,7 +492,8 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
// Run on device
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
};
...
...
@@ -477,13 +504,11 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
}
TEST_F
(
FusedConv2dOpTest
,
OPENCL7X7ConvNxNS12
)
{
TestGeneralConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
},
{
7
,
7
,
3
,
64
});
TestGeneralConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
},
{
7
,
7
,
3
,
64
});
}
TEST_F
(
FusedConv2dOpTest
,
OPENCL15X1ConvNxNS12
)
{
TestGeneralConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
40
,
40
},
{
15
,
1
,
32
,
64
});
TestGeneralConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
40
,
40
},
{
15
,
1
,
32
,
64
});
}
template
<
DeviceType
D
,
typename
T
>
...
...
mace/ops/global_avg_pooling.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,16 +6,20 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"GlobalAvgPooling"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
GlobalAvgPoolingOp
<
DeviceType
::
CPU
,
float
>
);
void
Register_GlobalAvgPooling
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"GlobalAvgPooling"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
GlobalAvgPoolingOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"GlobalAvgPooling"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
GlobalAvgPoolingOp
<
DeviceType
::
NEON
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"GlobalAvgPooling"
)
.
Device
(
DeviceType
::
NEON
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
GlobalAvgPoolingOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
}
}
// namespace mace
mace/ops/global_avg_pooling_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -22,7 +22,8 @@ static void GlobalAvgPooling(
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
...
mace/ops/image_to_buffer.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,14 +6,18 @@
namespace
mace
{
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"ImageToBuffer"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ImageToBufferOp
<
DeviceType
::
OPENCL
,
float
>
);
void
Register_ImageToBuffer
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ImageToBuffer"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ImageToBufferOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"ImageToBuffer"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ImageToBufferOp
<
DeviceType
::
OPENCL
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ImageToBuffer"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ImageToBufferOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/ops_test_util.h
浏览文件 @
baf2dcd1
...
...
@@ -10,9 +10,9 @@
#include "gtest/gtest.h"
#include "mace/core/common.h"
#include "mace/core/net.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
...
...
@@ -56,7 +56,8 @@ class OpDefBuilder {
return
*
this
;
}
OpDefBuilder
AddIntsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
&
values
)
{
OpDefBuilder
AddIntsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
...
...
@@ -65,7 +66,8 @@ class OpDefBuilder {
return
*
this
;
}
OpDefBuilder
AddFloatsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
float
>
&
values
)
{
OpDefBuilder
AddFloatsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
float
>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
...
...
@@ -75,7 +77,7 @@ class OpDefBuilder {
}
OpDefBuilder
AddStringsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
const
char
*>
&
values
)
{
const
std
::
vector
<
const
char
*>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
...
...
@@ -94,7 +96,7 @@ class OpDefBuilder {
class
OpsTestNet
{
public:
OpsTestNet
()
{}
OpsTestNet
()
:
op_registry_
(
new
OperatorRegistry
())
{};
template
<
DeviceType
D
,
typename
T
>
void
AddInputFromArray
(
const
std
::
string
&
name
,
...
...
@@ -135,10 +137,11 @@ class OpsTestNet {
std
::
mt19937
gen
(
rd
());
std
::
normal_distribution
<
float
>
nd
(
0
,
1
);
if
(
DataTypeToEnum
<
T
>::
value
==
DT_HALF
)
{
std
::
generate
(
input_data
,
input_data
+
input
->
size
(),
[
&
gen
,
&
nd
,
positive
]
{
return
half_float
::
half_cast
<
half
>
(
positive
?
std
::
abs
(
nd
(
gen
))
:
nd
(
gen
));
});
std
::
generate
(
input_data
,
input_data
+
input
->
size
(),
[
&
gen
,
&
nd
,
positive
]
{
return
half_float
::
half_cast
<
half
>
(
positive
?
std
::
abs
(
nd
(
gen
))
:
nd
(
gen
));
});
}
else
{
std
::
generate
(
input_data
,
input_data
+
input
->
size
(),
[
&
gen
,
&
nd
,
positive
]
{
...
...
@@ -160,7 +163,7 @@ class OpsTestNet {
for
(
auto
&
op_def_
:
op_defs_
)
{
net_def
.
add_op
()
->
CopyFrom
(
op_def_
);
}
net_
=
CreateNet
(
net_def
,
&
ws_
,
device
);
net_
=
CreateNet
(
op_registry_
,
net_def
,
&
ws_
,
device
);
device_
=
device
;
return
net_
->
Run
();
}
...
...
@@ -182,6 +185,7 @@ class OpsTestNet {
}
public:
std
::
shared_ptr
<
OperatorRegistry
>
op_registry_
;
Workspace
ws_
;
std
::
vector
<
OperatorDef
>
op_defs_
;
std
::
unique_ptr
<
NetBase
>
net_
;
...
...
@@ -211,7 +215,8 @@ void GenerateRandomRealTypeData(const std::vector<index_t> &shape,
res
.
resize
(
size
);
if
(
DataTypeToEnum
<
T
>::
value
==
DT_HALF
)
{
std
::
generate
(
res
.
begin
(),
res
.
end
(),
[
&
gen
,
&
nd
]
{
return
half_float
::
half_cast
<
half
>
(
nd
(
gen
));
});
std
::
generate
(
res
.
begin
(),
res
.
end
(),
[
&
gen
,
&
nd
]
{
return
half_float
::
half_cast
<
half
>
(
nd
(
gen
));
});
}
else
{
std
::
generate
(
res
.
begin
(),
res
.
end
(),
[
&
gen
,
&
nd
]
{
return
nd
(
gen
);
});
}
...
...
@@ -236,7 +241,8 @@ void GenerateRandomIntTypeData(const std::vector<index_t> &shape,
template
<
typename
T
>
unique_ptr
<
Tensor
>
CreateTensor
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
unique_ptr
<
Tensor
>
res
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
CPU
),
DataTypeToEnum
<
T
>::
v
()));
unique_ptr
<
Tensor
>
res
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
CPU
),
DataTypeToEnum
<
T
>::
v
()));
res
->
Resize
(
shape
);
T
*
input_data
=
res
->
mutable_data
<
T
>
();
memcpy
(
input_data
,
data
.
data
(),
data
.
size
()
*
sizeof
(
T
));
...
...
@@ -268,9 +274,9 @@ inline std::string ShapeToString(const Tensor &x) {
template
<
typename
T
>
struct
is_floating_point_type
{
static
const
bool
value
=
std
::
is_same
<
T
,
float
>::
value
||
std
::
is_same
<
T
,
double
>::
value
||
std
::
is_same
<
T
,
half
>::
value
;
static
const
bool
value
=
std
::
is_same
<
T
,
float
>::
value
||
std
::
is_same
<
T
,
double
>::
value
||
std
::
is_same
<
T
,
half
>::
value
;
};
template
<
typename
T
>
...
...
@@ -293,7 +299,9 @@ inline void AssertSameDims(const Tensor &x, const Tensor &y) {
<<
"y.shape [ "
<<
ShapeToString
(
y
)
<<
"]"
;
}
template
<
typename
EXP_TYPE
,
typename
RES_TYPE
,
bool
is_fp
=
is_floating_point_type
<
EXP_TYPE
>
::
value
>
template
<
typename
EXP_TYPE
,
typename
RES_TYPE
,
bool
is_fp
=
is_floating_point_type
<
EXP_TYPE
>
::
value
>
struct
Expector
;
// Partial specialization for float and double.
...
...
@@ -343,7 +351,6 @@ struct Expector<EXP_TYPE, RES_TYPE, true> {
}
}
}
};
template
<
typename
T
>
...
...
@@ -355,8 +362,8 @@ void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) {
template
<
typename
EXP_TYPE
,
typename
RES_TYPE
>
void
ExpectTensorNear
(
const
Tensor
&
x
,
const
Tensor
&
y
,
const
double
abs_err
)
{
static_assert
(
is_floating_point_type
<
EXP_TYPE
>::
value
&&
is_floating_point_type
<
RES_TYPE
>::
value
,
static_assert
(
is_floating_point_type
<
EXP_TYPE
>::
value
&&
is_floating_point_type
<
RES_TYPE
>::
value
,
"T is not a floating point type"
);
Expector
<
EXP_TYPE
,
RES_TYPE
>::
Near
(
x
,
y
,
abs_err
);
}
...
...
mace/ops/pooling.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,29 +6,36 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Pooling"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
CPU
,
float
>
);
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Pooling"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
CPU
,
half
>
);
void
Register_Pooling
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Pooling"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
CPU
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Pooling"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
CPU
,
half
>
);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"Pooling"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
NEON
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Pooling"
)
.
Device
(
DeviceType
::
NEON
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Pooling"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Pooling"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
OPENCL
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Pooling"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Pooling"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/pooling_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -35,7 +35,8 @@ static void Pooling(int iters,
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
...
mace/ops/pooling_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -29,7 +29,7 @@ TEST_F(PoolingOpTest, MAX_VALID) {
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
4
,
4
,
2
},
{
0
,
16
,
1
,
17
,
2
,
18
,
3
,
19
,
4
,
20
,
5
,
21
,
6
,
22
,
7
,
23
,
{
0
,
16
,
1
,
17
,
2
,
18
,
3
,
19
,
4
,
20
,
5
,
21
,
6
,
22
,
7
,
23
,
8
,
24
,
9
,
25
,
10
,
26
,
11
,
27
,
12
,
28
,
13
,
29
,
14
,
30
,
15
,
31
});
// Run
...
...
@@ -42,7 +42,6 @@ TEST_F(PoolingOpTest, MAX_VALID) {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
PoolingOpTest
,
MAX_SAME
)
{
// Construct graph
OpsTestNet
net
;
...
...
@@ -122,7 +121,7 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
static
void
SimpleMaxPooling3S2
()
{
// Construct graph
OpsTestNet
net
;
...
...
@@ -130,11 +129,12 @@ static void SimpleMaxPooling3S2() {
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
3
,
9
,
1
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
...
...
@@ -145,7 +145,8 @@ static void SimpleMaxPooling3S2() {
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
// Run
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
...
...
@@ -166,15 +167,13 @@ static void SimpleMaxPooling3S2() {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
TEST_F
(
PoolingOpTest
,
CPUSimpleMaxPooling3S2
)
{
SimpleMaxPooling3S2
<
CPU
>
();
}
TEST_F
(
PoolingOpTest
,
CPUSimpleMaxPooling3S2
)
{
SimpleMaxPooling3S2
<
CPU
>
();
}
TEST_F
(
PoolingOpTest
,
OPENCLSimpleMaxPooling3S2
)
{
SimpleMaxPooling3S2
<
OPENCL
>
();
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
MaxPooling3S2
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
int
>
strides
,
Padding
padding
)
{
...
...
@@ -211,13 +210,14 @@ static void MaxPooling3S2(const std::vector<index_t> &input_shape,
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
T
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
}
// TODO(chenghui) : there is a bug.
//TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) {
//
TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) {
// AlignedMaxPooling3S2<NEON>(Padding::VALID);
// AlignedMaxPooling3S2<NEON>(Padding::SAME);
//}
...
...
@@ -259,7 +259,7 @@ TEST_F(PoolingOpTest, AVG_VALID) {
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
4
,
4
,
2
},
{
0
,
16
,
1
,
17
,
2
,
18
,
3
,
19
,
4
,
20
,
5
,
21
,
6
,
22
,
7
,
23
,
{
0
,
16
,
1
,
17
,
2
,
18
,
3
,
19
,
4
,
20
,
5
,
21
,
6
,
22
,
7
,
23
,
8
,
24
,
9
,
25
,
10
,
26
,
11
,
27
,
12
,
28
,
13
,
29
,
14
,
30
,
15
,
31
});
// Run
...
...
@@ -272,7 +272,7 @@ TEST_F(PoolingOpTest, AVG_VALID) {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
static
void
SimpleAvgPoolingTest
()
{
// Construct graph
OpsTestNet
net
;
...
...
@@ -282,7 +282,8 @@ static void SimpleAvgPoolingTest() {
"Input"
,
{
1
,
2
,
8
,
1
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
});
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
...
...
@@ -294,7 +295,8 @@ static void SimpleAvgPoolingTest() {
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
4
,
1
},
{
4.5
,
6.5
,
8.5
,
10.5
});
...
...
@@ -306,11 +308,11 @@ TEST_F(PoolingOpTest, OPENCLSimpleAvgPooling) {
SimpleAvgPoolingTest
<
OPENCL
>
();
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
AvgPoolingTest
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
int
>
&
kernels
,
const
std
::
vector
<
int
>
&
strides
,
Padding
padding
)
{
const
std
::
vector
<
int
>
&
kernels
,
const
std
::
vector
<
int
>
&
strides
,
Padding
padding
)
{
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
...
...
@@ -343,38 +345,49 @@ static void AvgPoolingTest(const std::vector<index_t> &shape,
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
,
T
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.01
);
}
TEST_F
(
PoolingOpTest
,
OPENCLAlignedAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
SAME
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
SAME
);
}
TEST_F
(
PoolingOpTest
,
OPENCLHalfAlignedAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
SAME
);
}
TEST_F
(
PoolingOpTest
,
OPENCLAlignedLargeKernelAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
SAME
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
SAME
);
}
TEST_F
(
PoolingOpTest
,
OPENCLHalfAlignedLargeKernelAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
SAME
);
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
SAME
);
}
TEST_F
(
PoolingOpTest
,
OPENCLUnAlignedAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
2
,
2
},
{
2
,
2
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
2
,
2
},
{
2
,
2
},
Padding
::
SAME
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
2
,
2
},
{
2
,
2
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
2
,
2
},
{
2
,
2
},
Padding
::
SAME
);
}
TEST_F
(
PoolingOpTest
,
OPENCLUnAlignedLargeKernelAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
8
,
8
},
{
8
,
8
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
8
,
8
},
{
8
,
8
},
Padding
::
SAME
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
8
,
8
},
{
8
,
8
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
8
,
8
},
{
8
,
8
},
Padding
::
SAME
);
}
mace/ops/relu.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,26 +6,32 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Relu"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ReluOp
<
DeviceType
::
CPU
,
float
>
);
void
Register_Relu
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Relu"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ReluOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"Relu"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ReluOp
<
DeviceType
::
NEON
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Relu"
)
.
Device
(
DeviceType
::
NEON
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ReluOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Relu"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ReluOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Relu"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ReluOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Relu"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ReluOp
<
DeviceType
::
OPENCL
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Relu"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ReluOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/relu_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -19,7 +19,8 @@ static void ReluBenchmark(
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluBM"
)
.
Input
(
"InputImage"
)
...
...
@@ -54,9 +55,9 @@ static void ReluBenchmark(
} \
BENCHMARK(BM_RELU_##N##C##H##W##_##TYPE##_##DEVICE)
#define BM_RELU(N, C, H, W, TYPE) \
BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \
BM_RELU_MACRO(N, C, H, W, TYPE, NEON);\
#define BM_RELU(N, C, H, W, TYPE)
\
BM_RELU_MACRO(N, C, H, W, TYPE, CPU);
\
BM_RELU_MACRO(N, C, H, W, TYPE, NEON);
\
BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL);
BM_RELU
(
1
,
1
,
512
,
512
,
float
);
...
...
mace/ops/relu_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -14,13 +14,13 @@ void TestSimple() {
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
2
,
2
,
2
,
2
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
2
,
2
,
2
,
2
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -31,7 +31,8 @@ void TestSimple() {
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
...
...
@@ -42,38 +43,30 @@ void TestSimple() {
net
.
RunOp
(
D
);
}
auto
expected
=
CreateTensor
<
float
>
({
2
,
2
,
2
,
2
},
{
0
,
7
,
0
,
6
,
0
,
5
,
0
,
4
,
0
,
3
,
0
,
2
,
0
,
1
,
0
,
0
});
auto
expected
=
CreateTensor
<
float
>
(
{
2
,
2
,
2
,
2
},
{
0
,
7
,
0
,
6
,
0
,
5
,
0
,
4
,
0
,
3
,
0
,
2
,
0
,
1
,
0
,
0
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
TEST_F
(
ReluOpTest
,
CPUSimple
)
{
TestSimple
<
DeviceType
::
CPU
>
();
}
TEST_F
(
ReluOpTest
,
CPUSimple
)
{
TestSimple
<
DeviceType
::
CPU
>
();
}
#if __ARM_NEON
TEST_F
(
ReluOpTest
,
NEONSimple
)
{
TestSimple
<
DeviceType
::
NEON
>
();
}
TEST_F
(
ReluOpTest
,
NEONSimple
)
{
TestSimple
<
DeviceType
::
NEON
>
();
}
#endif
TEST_F
(
ReluOpTest
,
OPENCLSimple
)
{
TestSimple
<
DeviceType
::
OPENCL
>
();
}
TEST_F
(
ReluOpTest
,
OPENCLSimple
)
{
TestSimple
<
DeviceType
::
OPENCL
>
();
}
template
<
DeviceType
D
>
void
TestUnalignedSimple
()
{
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
3
,
2
,
1
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
3
,
2
,
1
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -84,7 +77,8 @@ void TestUnalignedSimple() {
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
...
...
@@ -95,8 +89,7 @@ void TestUnalignedSimple() {
net
.
RunOp
(
D
);
}
auto
expected
=
CreateTensor
<
float
>
({
1
,
3
,
2
,
1
},
{
0
,
7
,
0
,
6
,
0
,
5
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
3
,
2
,
1
},
{
0
,
7
,
0
,
6
,
0
,
5
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
...
...
@@ -120,13 +113,13 @@ void TestSimpleReluX() {
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
2
,
2
,
2
,
2
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
2
,
2
,
2
,
2
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -138,7 +131,8 @@ void TestSimpleReluX() {
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
...
...
@@ -150,38 +144,31 @@ void TestSimpleReluX() {
net
.
RunOp
(
D
);
}
auto
expected
=
CreateTensor
<
float
>
({
2
,
2
,
2
,
2
},
{
0
,
6
,
0
,
6
,
0
,
5
,
0
,
4
,
0
,
3
,
0
,
2
,
0
,
1
,
0
,
0
});
auto
expected
=
CreateTensor
<
float
>
(
{
2
,
2
,
2
,
2
},
{
0
,
6
,
0
,
6
,
0
,
5
,
0
,
4
,
0
,
3
,
0
,
2
,
0
,
1
,
0
,
0
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
TEST_F
(
ReluOpTest
,
CPUSimpleReluX
)
{
TestSimpleReluX
<
DeviceType
::
CPU
>
();
}
TEST_F
(
ReluOpTest
,
CPUSimpleReluX
)
{
TestSimpleReluX
<
DeviceType
::
CPU
>
();
}
#if __ARM_NEON
TEST_F
(
ReluOpTest
,
NEONSimpleReluX
)
{
TestSimpleReluX
<
DeviceType
::
NEON
>
();
}
TEST_F
(
ReluOpTest
,
NEONSimpleReluX
)
{
TestSimpleReluX
<
DeviceType
::
NEON
>
();
}
#endif
TEST_F
(
ReluOpTest
,
OPENCLSimpleReluX
)
{
TestSimpleReluX
<
DeviceType
::
OPENCL
>
();
}
TEST_F
(
ReluOpTest
,
OPENCLSimpleReluX
)
{
TestSimpleReluX
<
DeviceType
::
OPENCL
>
();
}
template
<
DeviceType
D
>
void
TestUnalignedSimpleReluX
()
{
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
7
,
1
},
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
7
,
1
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
...
...
@@ -193,7 +180,8 @@ void TestUnalignedSimpleReluX() {
net
.
RunOp
(
D
);
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
...
...
@@ -205,8 +193,7 @@ void TestUnalignedSimpleReluX() {
net
.
RunOp
(
D
);
}
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
7
,
1
},
{
0
,
6
,
0
,
6
,
0
,
5
,
0
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
7
,
1
},
{
0
,
6
,
0
,
6
,
0
,
5
,
0
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
...
...
mace/ops/resize_bilinear.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,26 +6,32 @@
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"ResizeBilinear"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
CPU
,
float
>
);
void
Register_ResizeBilinear
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"ResizeBilinear"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
NEON
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
.
Device
(
DeviceType
::
NEON
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"ResizeBilinear"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"ResizeBilinear"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
OPENCL
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/resize_bilinear_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -26,22 +26,23 @@ static void ResizeBilinearBenchmark(int iters,
net
.
AddInputFromArray
<
D
,
index_t
>
(
"OutSize"
,
{
2
},
{
output_height
,
output_width
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearBenchmark"
)
.
Input
(
"InputImage"
)
.
Input
(
"OutSize"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"size"
,
{
output_height
,
output_width
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
.
Input
(
"InputImage"
)
.
Input
(
"OutSize"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"size"
,
{
output_height
,
output_width
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearBenchmark"
)
.
Input
(
"Input"
)
.
Input
(
"OutSize"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"size"
,
{
output_height
,
output_width
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
.
Input
(
"Input"
)
.
Input
(
"OutSize"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"size"
,
{
output_height
,
output_width
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
// Warm-up
...
...
@@ -68,8 +69,8 @@ static void ResizeBilinearBenchmark(int iters,
BENCHMARK( \
BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE)
#define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1, TYPE)
\
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, CPU);
\
#define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1, TYPE) \
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, CPU); \
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, OPENCL);
// SNPE 835 GPU: 6870us
...
...
mace/ops/resize_bilinear_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -80,29 +80,31 @@ void TestRandomResizeBilinear() {
{
batch
,
in_height
,
in_width
,
channels
});
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
Finalize
(
net
.
NewOperatorDef
());
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
Finalize
(
net
.
NewOperatorDef
());
// Run on CPU
net
.
RunOp
(
DeviceType
::
CPU
);
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
Finalize
(
net
.
NewOperatorDef
());
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"DeviceOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"DeviceOutput"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
// TODO support NEON
}
...
...
mace/ops/space_to_batch.cc
浏览文件 @
baf2dcd1
...
...
@@ -6,13 +6,17 @@
namespace
mace
{
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"SpaceToBatchND"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
SpaceToBatchNDOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"SpaceToBatchND"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
SpaceToBatchNDOp
<
DeviceType
::
OPENCL
,
half
>
);
void
Register_SpaceToBatchND
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"SpaceToBatchND"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
SpaceToBatchNDOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"SpaceToBatchND"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
SpaceToBatchNDOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
mace/ops/space_to_batch_benchmark.cc
浏览文件 @
baf2dcd1
...
...
@@ -15,7 +15,8 @@ static void BMSpaceToBatch(
OpsTestNet
net
;
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"SpaceToBatchND"
,
"SpaceToBatchNDTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
...
...
@@ -36,17 +37,19 @@ static void BMSpaceToBatch(
net
.
Sync
();
}
#define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \
static void BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \
} \
BENCHMARK(BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE)
#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \
#define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \
static void \
BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \
} \
BENCHMARK( \
BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE)
#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \
BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, OPENCL);
BM_SPACE_TO_BATCH
(
128
,
16
,
16
,
128
,
2
,
float
);
...
...
mace/ops/space_to_batch_test.cc
浏览文件 @
baf2dcd1
...
...
@@ -2,23 +2,23 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include <fstream>
#include "gtest/gtest.h"
#include "mace/ops/ops_test_util.h"
#include <fstream>
using
namespace
mace
;
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
RunSpaceToBatch
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
float
>
&
input_data
,
const
std
::
vector
<
int
>
&
block_shape_data
,
const
std
::
vector
<
int
>
&
padding_data
,
const
Tensor
*
expected
)
{
OpsTestNet
net
;
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
input_shape
,
input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
input_shape
,
input_data
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"SpaceToBatchND"
,
"SpaceToBatchNDTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
...
...
@@ -29,12 +29,13 @@ void RunSpaceToBatch(const std::vector<index_t> &input_shape,
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
// Check
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-8
);
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
RunBatchToSpace
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
float
>
&
input_data
,
const
std
::
vector
<
int
>
&
block_shape_data
,
...
...
@@ -42,10 +43,10 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
const
Tensor
*
expected
)
{
OpsTestNet
net
;
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
input_shape
,
input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
input_shape
,
input_data
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"BatchToSpaceND"
,
"BatchToSpaceNDTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
...
...
@@ -56,33 +57,33 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
// Check
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-8
);
}
template
<
typename
T
>
template
<
typename
T
>
void
TestBidirectionalTransform
(
const
std
::
vector
<
index_t
>
&
space_shape
,
const
std
::
vector
<
float
>
&
space_data
,
const
std
::
vector
<
int
>
&
block_data
,
const
std
::
vector
<
int
>
&
padding_data
,
const
std
::
vector
<
index_t
>
&
batch_shape
,
const
std
::
vector
<
float
>
&
batch_data
)
{
auto
space_tensor
=
unique_ptr
<
Tensor
>
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataTypeToEnum
<
T
>::
v
()));
auto
space_tensor
=
unique_ptr
<
Tensor
>
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataTypeToEnum
<
T
>::
v
()));
space_tensor
->
Resize
(
space_shape
);
{
Tensor
::
MappingGuard
space_mapper
(
space_tensor
.
get
());
T
*
space_ptr
=
space_tensor
->
mutable_data
<
T
>
();
MACE_CHECK
(
static_cast
<
size_t
>
(
space_tensor
->
size
())
==
space_data
.
size
())
<<
"Space tensor size:"
<<
space_tensor
->
size
()
<<
", space data size:"
<<
space_data
.
size
();
<<
"Space tensor size:"
<<
space_tensor
->
size
()
<<
", space data size:"
<<
space_data
.
size
();
memcpy
(
space_ptr
,
space_data
.
data
(),
space_data
.
size
()
*
sizeof
(
T
));
}
auto
batch_tensor
=
unique_ptr
<
Tensor
>
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataTypeToEnum
<
T
>::
v
()));
auto
batch_tensor
=
unique_ptr
<
Tensor
>
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataTypeToEnum
<
T
>::
v
()));
batch_tensor
->
Resize
(
batch_shape
);
{
Tensor
::
MappingGuard
batch_mapper
(
batch_tensor
.
get
());
...
...
@@ -91,113 +92,81 @@ void TestBidirectionalTransform(const std::vector<index_t> &space_shape,
memcpy
(
batch_ptr
,
batch_data
.
data
(),
batch_data
.
size
()
*
sizeof
(
T
));
}
RunSpaceToBatch
<
DeviceType
::
OPENCL
>
(
space_shape
,
space_data
,
block_data
,
padding_data
,
batch_tensor
.
get
());
RunSpaceToBatch
<
DeviceType
::
OPENCL
>
(
space_shape
,
space_data
,
block_data
,
padding_data
,
batch_tensor
.
get
());
RunBatchToSpace
<
DeviceType
::
OPENCL
>
(
batch_shape
,
batch_data
,
block_data
,
padding_data
,
space_tensor
.
get
());
RunBatchToSpace
<
DeviceType
::
OPENCL
>
(
batch_shape
,
batch_data
,
block_data
,
padding_data
,
space_tensor
.
get
());
}
TEST
(
SpaceToBatchTest
,
SmallData
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
1
},
{
1
,
2
,
3
,
4
}
);
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
1
},
{
1
,
2
,
3
,
4
});
}
TEST
(
SpaceToBatchTest
,
SmallDataWithOnePadding
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
3
,
3
},
{
1
,
0
,
1
,
0
},
{
9
,
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
1
,
2
,
0
,
3
,
4
}
);
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
3
,
3
},
{
1
,
0
,
1
,
0
},
{
9
,
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
1
,
2
,
0
,
3
,
4
});
}
TEST
(
SpaceToBatchTest
,
SmallDataWithTwoPadding
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
2
,
2
},
{
1
,
1
,
1
,
1
},
{
4
,
2
,
2
,
1
},
{
0
,
0
,
0
,
4
,
0
,
0
,
3
,
0
,
0
,
2
,
0
,
0
,
1
,
0
,
0
,
0
}
);
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
2
,
2
},
{
1
,
1
,
1
,
1
},
{
4
,
2
,
2
,
1
},
{
0
,
0
,
0
,
4
,
0
,
0
,
3
,
0
,
0
,
2
,
0
,
0
,
1
,
0
,
0
,
0
});
}
TEST
(
SpaceToBatchTest
,
SmallDataWithLargeImage
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
10
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
5
,
1
},
{
1
,
3
,
5
,
7
,
9
,
2
,
4
,
6
,
8
,
10
,
11
,
13
,
15
,
17
,
19
,
12
,
14
,
16
,
18
,
20
}
);
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
10
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
5
,
1
},
{
1
,
3
,
5
,
7
,
9
,
2
,
4
,
6
,
8
,
10
,
11
,
13
,
15
,
17
,
19
,
12
,
14
,
16
,
18
,
20
});
}
TEST
(
SpaceToBatchTest
,
MultiChannelData
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
}
);
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
});
}
TEST
(
SpaceToBatchTest
,
LargerMultiChannelData
)
{
TestBidirectionalTransform
<
float
>
({
1
,
4
,
4
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
2
,
2
,
1
},
{
1
,
3
,
9
,
11
,
2
,
4
,
10
,
12
,
5
,
7
,
13
,
15
,
6
,
8
,
14
,
16
}
);
TestBidirectionalTransform
<
float
>
(
{
1
,
4
,
4
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
2
,
2
,
1
},
{
1
,
3
,
9
,
11
,
2
,
4
,
10
,
12
,
5
,
7
,
13
,
15
,
6
,
8
,
14
,
16
});
}
TEST
(
SpaceToBatchTest
,
MultiBatchData
)
{
TestBidirectionalTransform
<
float
>
({
2
,
2
,
4
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
1
},
{
1
,
3
,
2
,
4
,
5
,
7
,
6
,
8
,
9
,
11
,
10
,
12
,
13
,
15
,
14
,
16
}
);
TestBidirectionalTransform
<
float
>
(
{
2
,
2
,
4
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
1
},
{
1
,
3
,
2
,
4
,
5
,
7
,
6
,
8
,
9
,
11
,
10
,
12
,
13
,
15
,
14
,
16
});
}
TEST
(
SpaceToBatchTest
,
MultiBatchAndChannelData
)
{
TestBidirectionalTransform
<
float
>
({
2
,
2
,
4
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
2
},
{
1
,
2
,
5
,
6
,
3
,
4
,
7
,
8
,
9
,
10
,
13
,
14
,
11
,
12
,
15
,
16
,
17
,
18
,
21
,
22
,
19
,
20
,
23
,
24
,
25
,
26
,
29
,
30
,
27
,
28
,
31
,
32
}
);
TestBidirectionalTransform
<
float
>
(
{
2
,
2
,
4
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
2
},
{
1
,
2
,
5
,
6
,
3
,
4
,
7
,
8
,
9
,
10
,
13
,
14
,
11
,
12
,
15
,
16
,
17
,
18
,
21
,
22
,
19
,
20
,
23
,
24
,
25
,
26
,
29
,
30
,
27
,
28
,
31
,
32
});
}
//TEST(SpaceTobatchTest, CompareTF) {
//
TEST(SpaceTobatchTest, CompareTF) {
//
// const std::string space_file = "/data/local/tmp/test/input";
// const std::string batch_file = "/data/local/tmp/test/output";
// const std::vector<index_t> space_shape = {1, 256, 256, 32};
// const int space_size = std::accumulate(space_shape.begin(), space_shape.end(), 1, std::multiplies<int>());
// const int space_size = std::accumulate(space_shape.begin(),
// space_shape.end(), 1, std::multiplies<int>());
// const std::vector<index_t> batch_shape = {4, 130, 130, 32};
// const int batch_size = std::accumulate(batch_shape.begin(), batch_shape.end(), 1, std::multiplies<int>());
// const int batch_size = std::accumulate(batch_shape.begin(),
// batch_shape.end(), 1, std::multiplies<int>());
//
// auto space_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// auto space_tensor = unique_ptr<Tensor>(new
// Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// DataTypeToEnum<float>::v()));
// space_tensor->Resize(space_shape);
// std::vector<float> space_data(space_size, 0.0);
...
...
@@ -216,7 +185,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// VLOG(0) << "open space file failed";
// }
//
// auto batch_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// auto batch_tensor = unique_ptr<Tensor>(new
// Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// DataTypeToEnum<float>::v()));
// std::vector<float> batch_data(batch_size, 0.0);
// batch_tensor->Resize(batch_shape);
...
...
@@ -231,7 +201,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// }
// Tensor::MappingGuard batch_mapper(batch_tensor.get());
// float *batch_ptr = batch_tensor->mutable_data<float>();
// MACE_CHECK(static_cast<size_t>(batch_tensor->size()) == batch_data.size());
// MACE_CHECK(static_cast<size_t>(batch_tensor->size()) ==
// batch_data.size());
// memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(float));
// }
//
...
...
@@ -245,4 +216,3 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// {2, 2, 2, 2},
// space_tensor.get());
//}
mace/python/tools/model.template
浏览文件 @
baf2dcd1
...
...
@@ -27,12 +27,12 @@ void Create{{tensor.name}}(std::vector<mace::ConstTensor> &tensors) {
#include "mace/core/public/mace.h"
namespace {
static
void UpdateOp(mace::OperatorDef &op,
const std::string &name,
const std::string &type,
const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs,
const std::vector<mace::DataType> &output_types) {
void UpdateOp(mace::OperatorDef &op,
const std::string &name,
const std::string &type,
const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs,
const std::vector<mace::DataType> &output_types) {
op.set_name(name);
op.set_type(type);
op.set_input(inputs);
...
...
tools/bazel-adb-run.sh
浏览文件 @
baf2dcd1
...
...
@@ -17,9 +17,8 @@ BAZEL_BIN_PATH=${BAZEL_BIN_PATH#//}
BAZEL_BIN_PATH
=
bazel-bin/
$BAZEL_BIN_PATH
BIN_NAME
=
`
echo
$BAZEL_TARGET
|
cut
-d
:
-f2
`
ANDROID_ABI
=
armeabi-v7a
ANDROID_ABI
=
arm64-v8a
STRIP
=
""
ANDROID_ABI
=
armeabi-v7a
STRIP
=
"--strip always"
VLOG_LEVEL
=
0
PROFILINE
=
"--define profiling=true"
...
...
@@ -31,7 +30,7 @@ bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \
--crosstool_top
=
//external:android/crosstool
\
--host_crosstool_top
=
@bazel_tools//tools/cpp:toolchain
\
--cpu
=
$ANDROID_ABI
\
--define
neon
=
tru
e
--define
neon
=
fals
e
if
[
$?
-ne
0
]
;
then
exit
1
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录