Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
baf2dcd1
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
baf2dcd1
编写于
12月 28, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Resolve operator and allocator registering static variable issue
上级
faadb474
变更
57
隐藏空白更改
内联
并排
Showing
57 changed file
with
1090 addition
and
951 deletion
+1090
-951
mace/core/BUILD
mace/core/BUILD
+6
-17
mace/core/allocator.cc
mace/core/allocator.cc
+2
-0
mace/core/mace.cc
mace/core/mace.cc
+6
-3
mace/core/net.cc
mace/core/net.cc
+24
-19
mace/core/net.h
mace/core/net.h
+18
-10
mace/core/operator.cc
mace/core/operator.cc
+63
-45
mace/core/operator.h
mace/core/operator.h
+24
-58
mace/core/public/mace.h
mace/core/public/mace.h
+4
-1
mace/core/registry.h
mace/core/registry.h
+16
-21
mace/core/runtime/opencl/opencl_allocator.cc
mace/core/runtime/opencl/opencl_allocator.cc
+0
-2
mace/core/tensor.h
mace/core/tensor.h
+2
-1
mace/examples/BUILD
mace/examples/BUILD
+0
-1
mace/kernels/BUILD
mace/kernels/BUILD
+0
-1
mace/ops/addn.cc
mace/ops/addn.cc
+22
-16
mace/ops/addn_benchmark.cc
mace/ops/addn_benchmark.cc
+6
-6
mace/ops/batch_norm.cc
mace/ops/batch_norm.cc
+22
-16
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+13
-11
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+75
-45
mace/ops/batch_to_space.cc
mace/ops/batch_to_space.cc
+12
-8
mace/ops/batch_to_space_benchmark.cc
mace/ops/batch_to_space_benchmark.cc
+11
-9
mace/ops/bias_add.cc
mace/ops/bias_add.cc
+26
-20
mace/ops/bias_add_benchmark.cc
mace/ops/bias_add_benchmark.cc
+11
-11
mace/ops/bias_add_test.cc
mace/ops/bias_add_test.cc
+27
-20
mace/ops/buffer_to_image.cc
mace/ops/buffer_to_image.cc
+12
-8
mace/ops/channel_shuffle.cc
mace/ops/channel_shuffle.cc
+7
-4
mace/ops/channel_shuffle_benchmark.cc
mace/ops/channel_shuffle_benchmark.cc
+2
-1
mace/ops/channel_shuffle_test.cc
mace/ops/channel_shuffle_test.cc
+0
-1
mace/ops/concat.cc
mace/ops/concat.cc
+23
-16
mace/ops/concat_benchmark.cc
mace/ops/concat_benchmark.cc
+6
-3
mace/ops/concat_test.cc
mace/ops/concat_test.cc
+18
-26
mace/ops/conv_2d.cc
mace/ops/conv_2d.cc
+29
-22
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+31
-27
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+80
-61
mace/ops/core_test.cc
mace/ops/core_test.cc
+11
-8
mace/ops/depthwise_conv2d.cc
mace/ops/depthwise_conv2d.cc
+17
-12
mace/ops/depthwise_conv2d_test.cc
mace/ops/depthwise_conv2d_test.cc
+6
-8
mace/ops/depthwise_conv_2d_benchmark.cc
mace/ops/depthwise_conv_2d_benchmark.cc
+15
-16
mace/ops/fused_conv_2d.cc
mace/ops/fused_conv_2d.cc
+25
-20
mace/ops/fused_conv_2d_test.cc
mace/ops/fused_conv_2d_test.cc
+75
-50
mace/ops/global_avg_pooling.cc
mace/ops/global_avg_pooling.cc
+12
-8
mace/ops/global_avg_pooling_benchmark.cc
mace/ops/global_avg_pooling_benchmark.cc
+2
-1
mace/ops/image_to_buffer.cc
mace/ops/image_to_buffer.cc
+12
-8
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+26
-19
mace/ops/pooling.cc
mace/ops/pooling.cc
+27
-20
mace/ops/pooling_benchmark.cc
mace/ops/pooling_benchmark.cc
+2
-1
mace/ops/pooling_test.cc
mace/ops/pooling_test.cc
+46
-33
mace/ops/relu.cc
mace/ops/relu.cc
+22
-16
mace/ops/relu_benchmark.cc
mace/ops/relu_benchmark.cc
+5
-4
mace/ops/relu_test.cc
mace/ops/relu_test.cc
+36
-49
mace/ops/resize_bilinear.cc
mace/ops/resize_bilinear.cc
+22
-16
mace/ops/resize_bilinear_benchmark.cc
mace/ops/resize_bilinear_benchmark.cc
+16
-15
mace/ops/resize_bilinear_test.cc
mace/ops/resize_bilinear_test.cc
+14
-12
mace/ops/space_to_batch.cc
mace/ops/space_to_batch.cc
+12
-8
mace/ops/space_to_batch_benchmark.cc
mace/ops/space_to_batch_benchmark.cc
+15
-12
mace/ops/space_to_batch_test.cc
mace/ops/space_to_batch_test.cc
+66
-96
mace/python/tools/model.template
mace/python/tools/model.template
+6
-6
tools/bazel-adb-run.sh
tools/bazel-adb-run.sh
+2
-3
未找到文件。
mace/core/BUILD
浏览文件 @
baf2dcd1
...
@@ -10,38 +10,27 @@ licenses(["notice"]) # Apache 2.0
...
@@ -10,38 +10,27 @@ licenses(["notice"]) # Apache 2.0
load
(
"//mace:mace.bzl"
,
"if_android"
,
"if_profiling_enabled"
,
"if_embed_binary_program"
)
load
(
"//mace:mace.bzl"
,
"if_android"
,
"if_profiling_enabled"
,
"if_embed_binary_program"
)
cc_library
(
cc_library
(
name
=
"
opencl_runtim
e"
,
name
=
"
cor
e"
,
srcs
=
glob
([
srcs
=
glob
([
"*.cc"
,
"runtime/opencl/*.cc"
,
"runtime/opencl/*.cc"
,
]),
]),
hdrs
=
glob
([
hdrs
=
glob
([
"*.h"
,
"public/*.h"
,
"runtime/opencl/cl2.hpp"
,
"runtime/opencl/cl2.hpp"
,
"runtime/opencl/*.h"
,
"runtime/opencl/*.h"
,
]),
]),
copts
=
[
"-std=c++11"
,
"-D_GLIBCXX_USE_C99_MATH_TR1"
]
+
copts
=
[
"-std=c++11"
,
"-D_GLIBCXX_USE_C99_MATH_TR1"
]
+
if_profiling_enabled
([
"-DMACE_OPENCL_PROFILING"
])
+
if_profiling_enabled
([
"-DMACE_OPENCL_PROFILING"
])
+
if_embed_binary_program
([
"-DMACE_EMBED_BINARY_PROGRAM"
]),
if_embed_binary_program
([
"-DMACE_EMBED_BINARY_PROGRAM"
]),
linkopts
=
[
"-ldl"
]
,
linkopts
=
if_android
([
"-pie"
,
"-ldl"
])
,
deps
=
[
deps
=
[
"
:core
"
,
"
//mace/utils:utils_hdrs
"
,
"//mace/utils:logging"
,
"//mace/utils:logging"
,
"//mace/utils:tuner"
,
"//mace/utils:tuner"
,
"@opencl_headers//:opencl20_headers"
,
"@opencl_headers//:opencl20_headers"
,
],
],
alwayslink
=
1
,
)
cc_library
(
name
=
"core"
,
srcs
=
glob
([
"*.cc"
]),
hdrs
=
glob
([
"*.h"
,
"public/*.h"
]),
copts
=
[
"-std=c++11"
,
"-D_GLIBCXX_USE_C99_MATH_TR1"
],
linkopts
=
if_android
([
"-pie"
]),
deps
=
[
"//mace/utils:utils_hdrs"
,
"//mace/utils:logging"
,
],
)
)
cc_library
(
cc_library
(
...
...
mace/core/allocator.cc
浏览文件 @
baf2dcd1
...
@@ -3,6 +3,7 @@
...
@@ -3,6 +3,7 @@
//
//
#include "mace/core/allocator.h"
#include "mace/core/allocator.h"
#include "mace/core/runtime/opencl/opencl_allocator.h"
namespace
mace
{
namespace
mace
{
...
@@ -22,5 +23,6 @@ Allocator *GetDeviceAllocator(DeviceType type) {
...
@@ -22,5 +23,6 @@ Allocator *GetDeviceAllocator(DeviceType type) {
MACE_REGISTER_ALLOCATOR
(
DeviceType
::
CPU
,
new
CPUAllocator
());
MACE_REGISTER_ALLOCATOR
(
DeviceType
::
CPU
,
new
CPUAllocator
());
MACE_REGISTER_ALLOCATOR
(
DeviceType
::
NEON
,
new
CPUAllocator
());
MACE_REGISTER_ALLOCATOR
(
DeviceType
::
NEON
,
new
CPUAllocator
());
MACE_REGISTER_ALLOCATOR
(
DeviceType
::
OPENCL
,
new
OpenCLAllocator
());
}
// namespace mace
}
// namespace mace
mace/core/mace.cc
浏览文件 @
baf2dcd1
...
@@ -5,6 +5,7 @@
...
@@ -5,6 +5,7 @@
#include "mace/core/public/mace.h"
#include "mace/core/public/mace.h"
#include "mace/core/types.h"
#include "mace/core/types.h"
#include "mace/core/net.h"
#include "mace/core/net.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/core/workspace.h"
#include "mace/utils/logging.h"
#include "mace/utils/logging.h"
...
@@ -481,17 +482,19 @@ const OperatorDef &NetDef::op(const int idx) const {
...
@@ -481,17 +482,19 @@ const OperatorDef &NetDef::op(const int idx) const {
// Mace Engine
// Mace Engine
MaceEngine
::
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
)
:
MaceEngine
::
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
)
:
device_type_
(
device_type
),
ws_
(
new
Workspace
()),
net_
(
nullptr
)
{
op_registry_
(
new
OperatorRegistry
()),
device_type_
(
device_type
),
ws_
(
new
Workspace
()),
net_
(
nullptr
)
{
ws_
->
LoadModelTensor
(
*
net_def
,
device_type
);
ws_
->
LoadModelTensor
(
*
net_def
,
device_type
);
// Init model
// Init model
auto
net
=
CreateNet
(
*
net_def
,
ws_
.
get
(),
device_type
,
NetMode
::
INIT
);
auto
net
=
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
,
NetMode
::
INIT
);
if
(
!
net
->
Run
())
{
if
(
!
net
->
Run
())
{
LOG
(
FATAL
)
<<
"Net init run failed"
;
LOG
(
FATAL
)
<<
"Net init run failed"
;
}
}
ws_
->
CreateTensor
(
"mace_input_node:0"
,
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
ws_
->
CreateTensor
(
"mace_input_node:0"
,
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
net_
=
std
::
move
(
CreateNet
(
*
net_def
,
ws_
.
get
(),
device_type
));
net_
=
std
::
move
(
CreateNet
(
op_registry_
,
*
net_def
,
ws_
.
get
(),
device_type
));
}
}
MaceEngine
::~
MaceEngine
()
=
default
;
MaceEngine
::~
MaceEngine
()
=
default
;
bool
MaceEngine
::
Run
(
const
float
*
input
,
bool
MaceEngine
::
Run
(
const
float
*
input
,
...
...
mace/core/net.cc
浏览文件 @
baf2dcd1
...
@@ -3,22 +3,24 @@
...
@@ -3,22 +3,24 @@
//
//
#include "mace/core/net.h"
#include "mace/core/net.h"
#include "mace/core/operator.h"
#include "mace/core/workspace.h"
#include "mace/core/workspace.h"
#include "mace/utils/utils.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
mace
{
NetBase
::
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
NetBase
::
NetBase
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
Workspace
*
ws
,
DeviceType
type
)
DeviceType
type
)
:
name_
(
net_def
->
name
())
{}
:
op_registry_
(
op_registry
),
name_
(
net_def
->
name
())
{}
SimpleNet
::
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
SimpleNet
::
SimpleNet
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
Workspace
*
ws
,
DeviceType
type
,
DeviceType
type
,
const
NetMode
mode
)
const
NetMode
mode
)
:
NetBase
(
net_def
,
ws
,
type
),
device_type_
(
type
){
:
NetBase
(
op_registry
,
net_def
,
ws
,
type
),
device_type_
(
type
)
{
VLOG
(
1
)
<<
"Constructing SimpleNet "
<<
net_def
->
name
();
VLOG
(
1
)
<<
"Constructing SimpleNet "
<<
net_def
->
name
();
for
(
int
idx
=
0
;
idx
<
net_def
->
op_size
();
++
idx
)
{
for
(
int
idx
=
0
;
idx
<
net_def
->
op_size
();
++
idx
)
{
const
auto
&
operator_def
=
net_def
->
op
(
idx
);
const
auto
&
operator_def
=
net_def
->
op
(
idx
);
...
@@ -26,7 +28,7 @@ SimpleNet::SimpleNet(const std::shared_ptr<const NetDef> &net_def,
...
@@ -26,7 +28,7 @@ SimpleNet::SimpleNet(const std::shared_ptr<const NetDef> &net_def,
<<
operator_def
.
type
();
<<
operator_def
.
type
();
std
::
unique_ptr
<
OperatorBase
>
op
{
nullptr
};
std
::
unique_ptr
<
OperatorBase
>
op
{
nullptr
};
OperatorDef
temp_def
(
operator_def
);
OperatorDef
temp_def
(
operator_def
);
op
=
CreateOperator
(
temp_def
,
ws
,
type
,
mode
);
op
=
op_registry
->
CreateOperator
(
temp_def
,
ws
,
type
,
mode
);
if
(
op
)
{
if
(
op
)
{
operators_
.
emplace_back
(
std
::
move
(
op
));
operators_
.
emplace_back
(
std
::
move
(
op
));
}
}
...
@@ -62,9 +64,8 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
...
@@ -62,9 +64,8 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
}
}
if
(
run_metadata
!=
nullptr
)
{
if
(
run_metadata
!=
nullptr
)
{
OperatorStats
op_stats
=
{
op
->
debug_def
().
name
(),
OperatorStats
op_stats
=
{
op
->
debug_def
().
name
(),
op
->
debug_def
().
type
(),
op
->
debug_def
().
type
(),
call_stats
};
call_stats
};
run_metadata
->
op_stats
.
emplace_back
(
op_stats
);
run_metadata
->
op_stats
.
emplace_back
(
op_stats
);
}
}
...
@@ -80,19 +81,23 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
...
@@ -80,19 +81,23 @@ bool SimpleNet::Run(RunMetadata *run_metadata) {
return
true
;
return
true
;
}
}
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
std
::
unique_ptr
<
NetBase
>
CreateNet
(
Workspace
*
ws
,
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
DeviceType
type
,
const
NetDef
&
net_def
,
const
NetMode
mode
)
{
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
{
std
::
shared_ptr
<
NetDef
>
tmp_net_def
(
new
NetDef
(
net_def
));
std
::
shared_ptr
<
NetDef
>
tmp_net_def
(
new
NetDef
(
net_def
));
return
CreateNet
(
tmp_net_def
,
ws
,
type
,
mode
);
return
CreateNet
(
op_registry
,
tmp_net_def
,
ws
,
type
,
mode
);
}
}
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
std
::
unique_ptr
<
NetBase
>
CreateNet
(
Workspace
*
ws
,
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
DeviceType
type
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
const
NetMode
mode
)
{
Workspace
*
ws
,
unique_ptr
<
NetBase
>
net
(
new
SimpleNet
(
net_def
,
ws
,
type
,
mode
));
DeviceType
type
,
const
NetMode
mode
)
{
unique_ptr
<
NetBase
>
net
(
new
SimpleNet
(
op_registry
,
net_def
,
ws
,
type
,
mode
));
return
net
;
return
net
;
}
}
...
...
mace/core/net.h
浏览文件 @
baf2dcd1
...
@@ -6,6 +6,7 @@
...
@@ -6,6 +6,7 @@
#define MACE_CORE_NET_H_
#define MACE_CORE_NET_H_
#include "mace/core/common.h"
#include "mace/core/common.h"
#include "mace/core/operator.h"
#include "mace/core/public/mace.h"
#include "mace/core/public/mace.h"
namespace
mace
{
namespace
mace
{
...
@@ -16,7 +17,8 @@ class Workspace;
...
@@ -16,7 +17,8 @@ class Workspace;
class
NetBase
{
class
NetBase
{
public:
public:
NetBase
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
NetBase
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
Workspace
*
ws
,
DeviceType
type
);
DeviceType
type
);
virtual
~
NetBase
()
noexcept
{}
virtual
~
NetBase
()
noexcept
{}
...
@@ -27,13 +29,15 @@ class NetBase {
...
@@ -27,13 +29,15 @@ class NetBase {
protected:
protected:
string
name_
;
string
name_
;
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry_
;
DISABLE_COPY_AND_ASSIGN
(
NetBase
);
DISABLE_COPY_AND_ASSIGN
(
NetBase
);
};
};
class
SimpleNet
:
public
NetBase
{
class
SimpleNet
:
public
NetBase
{
public:
public:
SimpleNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
SimpleNet
(
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
Workspace
*
ws
,
DeviceType
type
,
DeviceType
type
,
const
NetMode
mode
=
NetMode
::
NORMAL
);
const
NetMode
mode
=
NetMode
::
NORMAL
);
...
@@ -47,14 +51,18 @@ class SimpleNet : public NetBase {
...
@@ -47,14 +51,18 @@ class SimpleNet : public NetBase {
DISABLE_COPY_AND_ASSIGN
(
SimpleNet
);
DISABLE_COPY_AND_ASSIGN
(
SimpleNet
);
};
};
unique_ptr
<
NetBase
>
CreateNet
(
const
NetDef
&
net_def
,
std
::
unique_ptr
<
NetBase
>
CreateNet
(
Workspace
*
ws
,
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
DeviceType
type
,
const
NetDef
&
net_def
,
const
NetMode
mode
=
NetMode
::
NORMAL
);
Workspace
*
ws
,
unique_ptr
<
NetBase
>
CreateNet
(
const
std
::
shared_ptr
<
const
NetDef
>
&
net_def
,
DeviceType
type
,
Workspace
*
ws
,
const
NetMode
mode
=
NetMode
::
NORMAL
);
DeviceType
type
,
std
::
unique_ptr
<
NetBase
>
CreateNet
(
const
NetMode
mode
=
NetMode
::
NORMAL
);
const
std
::
shared_ptr
<
const
OperatorRegistry
>
op_registry
,
const
std
::
shared_ptr
<
const
NetDef
>
net_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
=
NetMode
::
NORMAL
);
}
// namespace mace
}
// namespace mace
...
...
mace/core/operator.cc
浏览文件 @
baf2dcd1
...
@@ -2,12 +2,19 @@
...
@@ -2,12 +2,19 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
// Copyright (c) 2017 XiaoMi All rights reserved.
//
//
#include <sstream>
#include "mace/core/operator.h"
#include "mace/core/operator.h"
namespace
mace
{
namespace
mace
{
OperatorBase
::
OperatorBase
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
:
operator_ws_
(
ws
),
operator_def_
(
std
::
make_shared
<
OperatorDef
>
(
operator_def
))
{}
OpKeyBuilder
::
OpKeyBuilder
(
const
char
*
op_name
)
:
op_name_
(
op_name
)
{}
OpKeyBuilder
::
OpKeyBuilder
(
const
char
*
op_name
)
:
op_name_
(
op_name
)
{}
OpKeyBuilder
&
OpKeyBuilder
::
Device
(
DeviceType
device
)
{
device_type_
=
device
;
}
OpKeyBuilder
&
OpKeyBuilder
::
TypeConstraint
(
const
char
*
attr_name
,
OpKeyBuilder
&
OpKeyBuilder
::
TypeConstraint
(
const
char
*
attr_name
,
const
DataType
allowed
)
{
const
DataType
allowed
)
{
...
@@ -17,61 +24,72 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name,
...
@@ -17,61 +24,72 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name,
const
std
::
string
OpKeyBuilder
::
Build
()
{
const
std
::
string
OpKeyBuilder
::
Build
()
{
static
const
std
::
vector
<
std
::
string
>
type_order
=
{
"T"
};
static
const
std
::
vector
<
std
::
string
>
type_order
=
{
"T"
};
std
::
string
key
=
op_name_
;
std
::
stringstream
ss
;
ss
<<
op_name_
;
ss
<<
device_type_
;
for
(
auto
type
:
type_order
)
{
for
(
auto
type
:
type_order
)
{
key
+=
type
+
"_"
+
DataTypeToString
(
type_constraint_
[
type
]);
ss
<<
type
<<
"_"
<<
DataTypeToString
(
type_constraint_
[
type
]);
}
}
return
key
;
}
std
::
map
<
int32_t
,
OperatorRegistry
*>
*
gDeviceTypeRegistry
()
{
return
ss
.
str
();
static
std
::
map
<
int32_t
,
OperatorRegistry
*>
g_device_type_registry
;
return
&
g_device_type_registry
;
}
}
MACE_DEFINE_REGISTRY
(
CPUOperatorRegistry
,
std
::
unique_ptr
<
OperatorBase
>
OperatorRegistry
::
CreateOperator
(
OperatorBase
,
const
OperatorDef
&
operator_def
,
const
OperatorDef
&
,
Workspace
*
ws
,
Workspace
*
);
DeviceType
type
,
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
CPU
,
CPUOperatorRegistry
);
const
NetMode
mode
)
const
{
const
int
dtype
=
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
MACE_DEFINE_REGISTRY
(
NEONOperatorRegistry
,
operator_def
,
"T"
,
static_cast
<
int
>
(
DT_FLOAT
));
OperatorBase
,
const
int
op_mode_i
=
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
const
OperatorDef
&
,
operator_def
,
"mode"
,
static_cast
<
int
>
(
NetMode
::
NORMAL
));
Workspace
*
);
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
NEON
,
NEONOperatorRegistry
);
MACE_DEFINE_REGISTRY
(
OPENCLOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
MACE_REGISTER_DEVICE_TYPE
(
DeviceType
::
OPENCL
,
OPENCLOperatorRegistry
);
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
{
OperatorRegistry
*
registry
=
gDeviceTypeRegistry
()
->
at
(
type
);
const
int
dtype
=
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
operator_def
,
"T"
,
static_cast
<
int
>
(
DT_FLOAT
));
const
int
op_mode_i
=
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
operator_def
,
"mode"
,
static_cast
<
int
>
(
NetMode
::
NORMAL
));
const
NetMode
op_mode
=
static_cast
<
NetMode
>
(
op_mode_i
);
const
NetMode
op_mode
=
static_cast
<
NetMode
>
(
op_mode_i
);
if
(
op_mode
==
mode
)
{
if
(
op_mode
==
mode
)
{
return
registry
->
Create
(
OpKeyBuilder
(
operator_def
.
type
().
data
())
return
registry_
.
Create
(
.
TypeConstraint
(
"T"
,
static_cast
<
DataType
>
(
dtype
))
OpKeyBuilder
(
operator_def
.
type
().
data
())
.
Build
(),
.
Device
(
type
)
operator_def
,
.
TypeConstraint
(
"T"
,
static_cast
<
DataType
>
(
dtype
))
ws
);
.
Build
(),
operator_def
,
ws
);
}
else
{
}
else
{
return
nullptr
;
return
nullptr
;
}
}
}
}
OperatorBase
::
OperatorBase
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
)
extern
void
Register_AddN
(
OperatorRegistry
*
op_registry
);
:
operator_ws_
(
ws
),
extern
void
Register_BatchNorm
(
OperatorRegistry
*
op_registry
);
operator_def_
(
std
::
make_shared
<
OperatorDef
>
(
operator_def
))
{}
extern
void
Register_BatchToSpaceND
(
OperatorRegistry
*
op_registry
);
extern
void
Register_BiasAdd
(
OperatorRegistry
*
op_registry
);
extern
void
Register_BufferToImage
(
OperatorRegistry
*
op_registry
);
extern
void
Register_ChannelShuffle
(
OperatorRegistry
*
op_registry
);
extern
void
Register_Concat
(
OperatorRegistry
*
op_registry
);
extern
void
Register_Conv2D
(
OperatorRegistry
*
op_registry
);
extern
void
Register_DepthwiseConv2d
(
OperatorRegistry
*
op_registry
);
extern
void
Register_FusedConv2D
(
OperatorRegistry
*
op_registry
);
extern
void
Register_GlobalAvgPooling
(
OperatorRegistry
*
op_registry
);
extern
void
Register_ImageToBuffer
(
OperatorRegistry
*
op_registry
);
extern
void
Register_Pooling
(
OperatorRegistry
*
op_registry
);
extern
void
Register_Relu
(
OperatorRegistry
*
op_registry
);
extern
void
Register_ResizeBilinear
(
OperatorRegistry
*
op_registry
);
extern
void
Register_SpaceToBatchND
(
OperatorRegistry
*
op_registry
);
OperatorRegistry
::
OperatorRegistry
()
{
Register_AddN
(
this
);
Register_BatchNorm
(
this
);
Register_BatchToSpaceND
(
this
);
Register_BiasAdd
(
this
);
Register_BufferToImage
(
this
);
Register_ChannelShuffle
(
this
);
Register_Concat
(
this
);
Register_Conv2D
(
this
);
Register_DepthwiseConv2d
(
this
);
Register_FusedConv2D
(
this
);
Register_GlobalAvgPooling
(
this
);
Register_ImageToBuffer
(
this
);
Register_Pooling
(
this
);
Register_Relu
(
this
);
Register_ResizeBilinear
(
this
);
Register_SpaceToBatchND
(
this
);
}
}
// namespace mace
}
// namespace mace
mace/core/operator.h
浏览文件 @
baf2dcd1
...
@@ -5,13 +5,13 @@
...
@@ -5,13 +5,13 @@
#ifndef MACE_CORE_OPERATOR_H
#ifndef MACE_CORE_OPERATOR_H
#define MACE_CORE_OPERATOR_H
#define MACE_CORE_OPERATOR_H
#include "mace/core/common.h"
#include "mace/core/arg_helper.h"
#include "mace/core/arg_helper.h"
#include "mace/core/common.h"
#include "mace/core/future.h"
#include "mace/core/future.h"
#include "mace/core/public/mace.h"
#include "mace/core/registry.h"
#include "mace/core/registry.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/core/workspace.h"
#include "mace/core/public/mace.h"
namespace
mace
{
namespace
mace
{
...
@@ -102,7 +102,7 @@ class Operator : public OperatorBase {
...
@@ -102,7 +102,7 @@ class Operator : public OperatorBase {
}
}
}
}
}
}
virtual
bool
Run
(
StatsFuture
*
future
)
override
=
0
;
virtual
bool
Run
(
StatsFuture
*
future
)
override
=
0
;
~
Operator
()
noexcept
override
{}
~
Operator
()
noexcept
override
{}
};
};
...
@@ -122,29 +122,12 @@ class Operator : public OperatorBase {
...
@@ -122,29 +122,12 @@ class Operator : public OperatorBase {
#define OP_OUTPUT_TAGS(first_input, ...) \
#define OP_OUTPUT_TAGS(first_input, ...) \
enum _OutputTags { first_input = 0, __VA_ARGS__ }
enum _OutputTags { first_input = 0, __VA_ARGS__ }
typedef
Registry
<
std
::
string
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*>
OperatorRegistry
;
typedef
Registry
<
std
::
string
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*>
*
(
*
RegistryFunction
)();
std
::
map
<
int32_t
,
OperatorRegistry
*>
*
gDeviceTypeRegistry
();
struct
DeviceTypeRegisterer
{
explicit
DeviceTypeRegisterer
(
int32_t
type
,
RegistryFunction
func
)
{
if
(
gDeviceTypeRegistry
()
->
count
(
type
))
{
LOG
(
ERROR
)
<<
"Device type "
<<
type
<<
"registered twice. This should not happen. Did you have "
"duplicated numbers assigned to different devices?"
;
std
::
exit
(
1
);
}
// Calling the registry function to get the actual registry pointer.
gDeviceTypeRegistry
()
->
emplace
(
type
,
func
());
}
};
class
OpKeyBuilder
{
class
OpKeyBuilder
{
public:
public:
explicit
OpKeyBuilder
(
const
char
*
op_name
);
explicit
OpKeyBuilder
(
const
char
*
op_name
);
OpKeyBuilder
&
Device
(
DeviceType
device
);
OpKeyBuilder
&
TypeConstraint
(
const
char
*
attr_name
,
const
DataType
allowed
);
OpKeyBuilder
&
TypeConstraint
(
const
char
*
attr_name
,
const
DataType
allowed
);
template
<
typename
T
>
template
<
typename
T
>
...
@@ -154,6 +137,7 @@ class OpKeyBuilder {
...
@@ -154,6 +137,7 @@ class OpKeyBuilder {
private:
private:
std
::
string
op_name_
;
std
::
string
op_name_
;
DeviceType
device_type_
;
std
::
map
<
std
::
string
,
DataType
>
type_constraint_
;
std
::
map
<
std
::
string
,
DataType
>
type_constraint_
;
};
};
...
@@ -162,48 +146,30 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) {
...
@@ -162,48 +146,30 @@ OpKeyBuilder &OpKeyBuilder::TypeConstraint(const char *attr_name) {
return
this
->
TypeConstraint
(
attr_name
,
DataTypeToEnum
<
T
>::
value
);
return
this
->
TypeConstraint
(
attr_name
,
DataTypeToEnum
<
T
>::
value
);
}
}
class
OperatorRegistry
{
public:
typedef
Registry
<
std
::
string
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*>
RegistryType
;
OperatorRegistry
();
~
OperatorRegistry
()
=
default
;
RegistryType
*
registry
()
{
return
&
registry_
;
};
std
::
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
)
const
;
private:
RegistryType
registry_
;
DISABLE_COPY_AND_ASSIGN
(
OperatorRegistry
);
};
#define MACE_REGISTER_DEVICE_TYPE(type, registry_function) \
MACE_DECLARE_REGISTRY
(
OpRegistry
,
namespace { \
static DeviceTypeRegisterer MACE_ANONYMOUS_VARIABLE(DeviceType)( \
type, ®istry_function); \
}
MACE_DECLARE_REGISTRY
(
CPUOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
#define REGISTER_CPU_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(CPUOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_CPU_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(CPUOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY
(
NEONOperatorRegistry
,
OperatorBase
,
const
OperatorDef
&
,
Workspace
*
);
#define REGISTER_NEON_OPERATOR_CREATOR(key, ...) \
MACE_REGISTER_CREATOR(NEONOperatorRegistry, key, __VA_ARGS__)
#define REGISTER_NEON_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(NEONOperatorRegistry, name, __VA_ARGS__)
MACE_DECLARE_REGISTRY
(
OPENCLOperatorRegistry
,
OperatorBase
,
OperatorBase
,
const
OperatorDef
&
,
const
OperatorDef
&
,
Workspace
*
);
Workspace
*
);
#define REGISTER_OPENCL_OPERATOR_CREATOR(key, ...) \
#define REGISTER_OPERATOR(op_registry, name, ...) \
MACE_REGISTER_CREATOR(OPENCLOperatorRegistry, key, __VA_ARGS__)
MACE_REGISTER_CLASS(OpRegistry, op_registry->registry(), name, __VA_ARGS__)
#define REGISTER_OPENCL_OPERATOR(name, ...) \
MACE_REGISTER_CLASS(OPENCLOperatorRegistry, name, __VA_ARGS__)
unique_ptr
<
OperatorBase
>
CreateOperator
(
const
OperatorDef
&
operator_def
,
Workspace
*
ws
,
DeviceType
type
,
const
NetMode
mode
);
}
// namespace mace
}
// namespace mace
...
...
mace/core/public/mace.h
浏览文件 @
baf2dcd1
...
@@ -302,10 +302,12 @@ class NetDef {
...
@@ -302,10 +302,12 @@ class NetDef {
class
Workspace
;
class
Workspace
;
class
NetBase
;
class
NetBase
;
class
OperatorRegistry
;
class
MaceEngine
{
class
MaceEngine
{
public:
public:
explicit
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
);
explicit
MaceEngine
(
const
NetDef
*
net_def
,
DeviceType
device_type
);
~
MaceEngine
();
~
MaceEngine
();
bool
Run
(
const
float
*
input
,
bool
Run
(
const
float
*
input
,
const
std
::
vector
<
int64_t
>
&
input_shape
,
const
std
::
vector
<
int64_t
>
&
input_shape
,
...
@@ -314,6 +316,7 @@ class MaceEngine {
...
@@ -314,6 +316,7 @@ class MaceEngine {
MaceEngine
&
operator
=
(
const
MaceEngine
&
)
=
delete
;
MaceEngine
&
operator
=
(
const
MaceEngine
&
)
=
delete
;
private:
private:
std
::
shared_ptr
<
OperatorRegistry
>
op_registry_
;
DeviceType
device_type_
;
DeviceType
device_type_
;
std
::
unique_ptr
<
Workspace
>
ws_
;
std
::
unique_ptr
<
Workspace
>
ws_
;
std
::
unique_ptr
<
NetBase
>
net_
;
std
::
unique_ptr
<
NetBase
>
net_
;
...
...
mace/core/registry.h
浏览文件 @
baf2dcd1
...
@@ -17,24 +17,27 @@ class Registry {
...
@@ -17,24 +17,27 @@ class Registry {
Registry
()
:
registry_
()
{}
Registry
()
:
registry_
()
{}
void
Register
(
const
SrcType
&
key
,
Creator
creator
)
{
void
Register
(
const
SrcType
&
key
,
Creator
creator
)
{
VLOG
(
2
)
<<
"Registering: "
<<
key
;
std
::
lock_guard
<
std
::
mutex
>
lock
(
register_mutex_
);
std
::
lock_guard
<
std
::
mutex
>
lock
(
register_mutex_
);
MACE_CHECK
(
registry_
.
count
(
key
)
==
0
,
"Key already registered."
);
MACE_CHECK
(
registry_
.
count
(
key
)
==
0
,
"Key already registered."
);
registry_
[
key
]
=
creator
;
registry_
[
key
]
=
creator
;
}
}
inline
bool
Has
(
const
SrcType
&
key
)
{
return
registry_
.
count
(
key
)
!=
0
;
}
inline
bool
Has
(
const
SrcType
&
key
)
const
{
return
registry_
.
count
(
key
)
!=
0
;
}
unique_ptr
<
ObjectType
>
Create
(
const
SrcType
&
key
,
Args
...
args
)
{
unique_ptr
<
ObjectType
>
Create
(
const
SrcType
&
key
,
Args
...
args
)
const
{
if
(
registry_
.
count
(
key
)
==
0
)
{
if
(
registry_
.
count
(
key
)
==
0
)
{
LOG
(
FATAL
)
<<
"Key not registered: "
<<
key
;
LOG
(
FATAL
)
<<
"Key not registered: "
<<
key
;
}
}
return
registry_
[
key
]
(
args
...);
return
registry_
.
at
(
key
)
(
args
...);
}
}
/**
/**
* Returns the keys currently registered as a vector.
* Returns the keys currently registered as a vector.
*/
*/
vector
<
SrcType
>
Keys
()
{
vector
<
SrcType
>
Keys
()
const
{
vector
<
SrcType
>
keys
;
vector
<
SrcType
>
keys
;
for
(
const
auto
&
it
:
registry_
)
{
for
(
const
auto
&
it
:
registry_
)
{
keys
.
push_back
(
it
.
first
);
keys
.
push_back
(
it
.
first
);
...
@@ -77,39 +80,31 @@ class Registerer {
...
@@ -77,39 +80,31 @@ class Registerer {
typedef Registerer<SrcType, ObjectType, ##__VA_ARGS__> \
typedef Registerer<SrcType, ObjectType, ##__VA_ARGS__> \
Registerer##RegistryName;
Registerer##RegistryName;
/*
#define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \
#define MACE_DEFINE_TYPED_REGISTRY(RegistryName, SrcType, ObjectType, ...) \
Registry<SrcType, ObjectType, ##__VA_ARGS__> *RegistryName() { \
Registry<SrcType, ObjectType, ##__VA_ARGS__> *RegistryName() { \
static Registry<SrcType, ObjectType, ##__VA_ARGS__> *registry = \
static Registry<SrcType, ObjectType, ##__VA_ARGS__> *registry = \
new Registry<SrcType, ObjectType, ##__VA_ARGS__>(); \
new Registry<SrcType, ObjectType, ##__VA_ARGS__>(); \
return registry; \
return registry; \
}
}
*/
#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
#define MACE_DECLARE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
MACE_DECLARE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
##__VA_ARGS__)
##__VA_ARGS__)
/*
#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \
#define MACE_DEFINE_REGISTRY(RegistryName, ObjectType, ...) \
MACE_DEFINE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
MACE_DEFINE_TYPED_REGISTRY(RegistryName, std::string, ObjectType, \
##__VA_ARGS__)
##__VA_ARGS__)
*/
#define MACE_REGISTER_TYPED_CREATOR(RegistryName, key, ...) \
#define MACE_REGISTER_TYPED_CLASS(RegistryName, registry, key, ...) \
namespace { \
Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(l_##RegistryName)( \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, registry, Registerer##RegistryName::DefaultCreator<__VA_ARGS__>);
key, RegistryName(), __VA_ARGS__);
#define MACE_REGISTER_TYPED_CLASS(RegistryName, key, ...) \
namespace { \
static Registerer##RegistryName MACE_ANONYMOUS_VARIABLE(g_##RegistryName)( \
key, \
RegistryName(), \
Registerer##RegistryName::DefaultCreator<__VA_ARGS__>); \
}
#define MACE_REGISTER_CREATOR(RegistryName, key, ...) \
MACE_REGISTER_TYPED_CREATOR(RegistryName, key, __VA_ARGS__)
#define MACE_REGISTER_CLASS(RegistryName, key, ...) \
#define MACE_REGISTER_CLASS(RegistryName,
registry,
key, ...) \
MACE_REGISTER_TYPED_CLASS(RegistryName, key, __VA_ARGS__)
MACE_REGISTER_TYPED_CLASS(RegistryName,
registry,
key, __VA_ARGS__)
}
// namespace mace
}
// namespace mace
...
...
mace/core/runtime/opencl/opencl_allocator.cc
浏览文件 @
baf2dcd1
...
@@ -127,6 +127,4 @@ void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) {
...
@@ -127,6 +127,4 @@ void OpenCLAllocator::Unmap(void *buffer, void *mapped_ptr) {
bool
OpenCLAllocator
::
OnHost
()
{
return
false
;
}
bool
OpenCLAllocator
::
OnHost
()
{
return
false
;
}
MACE_REGISTER_ALLOCATOR
(
DeviceType
::
OPENCL
,
new
OpenCLAllocator
());
}
// namespace mace
}
// namespace mace
mace/core/tensor.h
浏览文件 @
baf2dcd1
...
@@ -105,7 +105,8 @@ class Tensor {
...
@@ -105,7 +105,8 @@ class Tensor {
inline
index_t
dim_size
()
const
{
return
shape_
.
size
();
}
inline
index_t
dim_size
()
const
{
return
shape_
.
size
();
}
inline
index_t
dim
(
unsigned
int
index
)
const
{
inline
index_t
dim
(
unsigned
int
index
)
const
{
MACE_CHECK
(
index
<
shape_
.
size
(),
"Exceeding ndim limit"
);
MACE_CHECK
(
index
<
shape_
.
size
(),
"Dim out of range: "
,
index
,
" >= "
,
shape_
.
size
());
return
shape_
[
index
];
return
shape_
[
index
];
}
}
...
...
mace/examples/BUILD
浏览文件 @
baf2dcd1
...
@@ -11,7 +11,6 @@ cc_binary(
...
@@ -11,7 +11,6 @@ cc_binary(
deps
=
[
deps
=
[
"//mace/core"
,
"//mace/core"
,
"//mace/ops"
,
"//mace/ops"
,
"//mace/core:opencl_runtime"
,
],
],
)
)
...
...
mace/kernels/BUILD
浏览文件 @
baf2dcd1
...
@@ -26,7 +26,6 @@ cc_library(
...
@@ -26,7 +26,6 @@ cc_library(
linkopts
=
if_android
([
"-lm"
]),
linkopts
=
if_android
([
"-lm"
]),
deps
=
[
deps
=
[
"//mace/core"
,
"//mace/core"
,
"//mace/core:opencl_runtime"
,
"//mace/utils:utils_hdrs"
,
"//mace/utils:utils_hdrs"
,
],
],
)
)
...
...
mace/ops/addn.cc
浏览文件 @
baf2dcd1
...
@@ -6,26 +6,32 @@
...
@@ -6,26 +6,32 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"AddN"
)
void
Register_AddN
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"AddN"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
AddNOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
AddNOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"AddN"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"AddN"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
NEON
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
AddNOp
<
DeviceType
::
NEON
,
float
>
);
.
Build
(),
AddNOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"AddN"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"AddN"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
AddNOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Build
(),
AddNOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"AddN"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"AddN"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
AddNOp
<
DeviceType
::
OPENCL
,
half
>
);
.
Build
(),
AddNOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/addn_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -15,8 +15,8 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
...
@@ -15,8 +15,8 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
OpsTestNet
net
;
OpsTestNet
net
;
// Add input data
// Add input data
for
(
int
i
=
0
;
i
<
inputs
;
++
i
)
{
for
(
int
i
=
0
;
i
<
inputs
;
++
i
)
{
net
.
AddRandomInput
<
D
,
float
>
(
net
.
AddRandomInput
<
D
,
float
>
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
(),
internal
::
MakeString
(
"Input"
,
i
).
c_str
(),
{
n
,
h
,
w
,
c
});
{
n
,
h
,
w
,
c
});
}
}
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
...
@@ -30,16 +30,16 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
...
@@ -30,16 +30,16 @@ static void AddNBenchmark(int iters, int inputs, int n, int h, int w, int c) {
op_def_builder
.
Input
(
internal
::
MakeString
(
"InputImage"
,
i
).
c_str
());
op_def_builder
.
Input
(
internal
::
MakeString
(
"InputImage"
,
i
).
c_str
());
}
}
op_def_builder
.
Output
(
"OutputImage"
)
op_def_builder
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
}
else
{
OpDefBuilder
op_def_builder
(
"AddN"
,
"AddNBM"
);
OpDefBuilder
op_def_builder
(
"AddN"
,
"AddNBM"
);
for
(
int
i
=
0
;
i
<
inputs
;
++
i
)
{
for
(
int
i
=
0
;
i
<
inputs
;
++
i
)
{
op_def_builder
.
Input
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
());
op_def_builder
.
Input
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
());
}
}
op_def_builder
.
Output
(
"Output"
)
op_def_builder
.
Output
(
"Output"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
}
}
// Warm-up
// Warm-up
...
...
mace/ops/batch_norm.cc
浏览文件 @
baf2dcd1
...
@@ -6,26 +6,32 @@
...
@@ -6,26 +6,32 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"BatchNorm"
)
void
Register_BatchNorm
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchNorm"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
BatchNormOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BatchNormOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"BatchNorm"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchNorm"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
NEON
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
BatchNormOp
<
DeviceType
::
NEON
,
float
>
);
.
Build
(),
BatchNormOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BatchNorm"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchNorm"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
BatchNormOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Build
(),
BatchNormOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BatchNorm"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchNorm"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
BatchNormOp
<
DeviceType
::
OPENCL
,
half
>
);
.
Build
(),
BatchNormOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/batch_norm_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -23,11 +23,16 @@ static void BatchNorm(
...
@@ -23,11 +23,16 @@ static void BatchNorm(
net
.
AddRandomInput
<
D
,
T
>
(
"Var"
,
{
channels
},
true
);
net
.
AddRandomInput
<
D
,
T
>
(
"Var"
,
{
channels
},
true
);
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
BufferToImage
<
D
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormBM"
)
OpDefBuilder
(
"BatchNorm"
,
"BatchNormBM"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"ScaleImage"
)
.
Input
(
"ScaleImage"
)
...
@@ -37,8 +42,7 @@ static void BatchNorm(
...
@@ -37,8 +42,7 @@ static void BatchNorm(
.
AddFloatArg
(
"epsilon"
,
1e-3
)
.
AddFloatArg
(
"epsilon"
,
1e-3
)
.
Output
(
"Output"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
}
}
else
{
else
{
OpDefBuilder
(
"BatchNorm"
,
"BatchNormBM"
)
OpDefBuilder
(
"BatchNorm"
,
"BatchNormBM"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
.
Input
(
"Scale"
)
.
Input
(
"Scale"
)
...
@@ -50,7 +54,6 @@ static void BatchNorm(
...
@@ -50,7 +54,6 @@ static void BatchNorm(
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
}
}
// tuning
// tuning
setenv
(
"MACE_TUNING"
,
"1"
,
1
);
setenv
(
"MACE_TUNING"
,
"1"
,
1
);
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
...
@@ -79,9 +82,8 @@ static void BatchNorm(
...
@@ -79,9 +82,8 @@ static void BatchNorm(
} \
} \
BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
BENCHMARK(BM_BATCH_NORM_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_BATCH_NORM(N, C, H, W, TYPE) \
#define BM_BATCH_NORM(N, C, H, W, TYPE) \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, CPU); \
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, NEON);\
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, OPENCL);
BM_BATCH_NORM_MACRO(N, C, H, W, TYPE, OPENCL);
BM_BATCH_NORM
(
1
,
1
,
512
,
512
,
float
);
BM_BATCH_NORM
(
1
,
1
,
512
,
512
,
float
);
...
...
mace/ops/batch_norm_test.cc
浏览文件 @
baf2dcd1
...
@@ -15,18 +15,23 @@ void Simple() {
...
@@ -15,18 +15,23 @@ void Simple() {
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
6
,
2
,
1
},
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
6
,
2
,
1
},
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Scale"
,
{
1
},
{
4.0
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Scale"
,
{
1
},
{
4.0
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Offset"
,
{
1
},
{
2.0
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Offset"
,
{
1
},
{
2.0
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Mean"
,
{
1
},
{
10
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Mean"
,
{
1
},
{
10
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Var"
,
{
1
},
{
11.67
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Var"
,
{
1
},
{
11.67
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
BufferToImage
<
D
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -41,7 +46,8 @@ void Simple() {
...
@@ -41,7 +46,8 @@ void Simple() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
...
@@ -64,9 +70,7 @@ void Simple() {
...
@@ -64,9 +70,7 @@ void Simple() {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
}
TEST_F
(
BatchNormOpTest
,
SimpleCPU
)
{
TEST_F
(
BatchNormOpTest
,
SimpleCPU
)
{
Simple
<
DeviceType
::
CPU
>
();
}
Simple
<
DeviceType
::
CPU
>
();
}
/*
/*
TEST_F(BatchNormOpTest, SimpleNEON) {
TEST_F(BatchNormOpTest, SimpleNEON) {
...
@@ -74,9 +78,7 @@ TEST_F(BatchNormOpTest, SimpleNEON) {
...
@@ -74,9 +78,7 @@ TEST_F(BatchNormOpTest, SimpleNEON) {
}
}
*/
*/
TEST_F
(
BatchNormOpTest
,
SimpleOPENCL
)
{
TEST_F
(
BatchNormOpTest
,
SimpleOPENCL
)
{
Simple
<
DeviceType
::
OPENCL
>
();
}
Simple
<
DeviceType
::
OPENCL
>
();
}
/*
/*
TEST_F(BatchNormOpTest, SimpleRandomNeon) {
TEST_F(BatchNormOpTest, SimpleRandomNeon) {
...
@@ -100,7 +102,8 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) {
...
@@ -100,7 +102,8 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) {
.Finalize(net.NewOperatorDef());
.Finalize(net.NewOperatorDef());
// Add input data
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height,
width});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
...
@@ -141,7 +144,8 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) {
...
@@ -141,7 +144,8 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) {
.Finalize(net.NewOperatorDef());
.Finalize(net.NewOperatorDef());
// Add input data
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height,
width});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Scale", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Offset", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
net.AddRandomInput<DeviceType::CPU, float>("Mean", {channels});
...
@@ -184,7 +188,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
...
@@ -184,7 +188,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
...
@@ -198,11 +203,16 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
...
@@ -198,11 +203,16 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -223,7 +233,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
...
@@ -223,7 +233,8 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
}
...
@@ -249,7 +260,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
...
@@ -249,7 +260,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
...
@@ -263,11 +275,16 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
...
@@ -263,11 +275,16 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Scale"
,
"ScaleImage"
,
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -289,7 +306,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
...
@@ -289,7 +306,8 @@ TEST_F(BatchNormOpTest, SimpleRandomHalfOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
}
}
...
@@ -315,7 +333,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
...
@@ -315,7 +333,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
...
@@ -328,13 +347,17 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
...
@@ -328,13 +347,17 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
Tensor
expected
;
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Scale"
,
"ScaleImage"
,
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -355,7 +378,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
...
@@ -355,7 +378,8 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
}
...
@@ -381,7 +405,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
...
@@ -381,7 +405,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Scale"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Offset"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Mean"
,
{
channels
});
...
@@ -394,13 +419,17 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
...
@@ -394,13 +419,17 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
Tensor
expected
;
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Scale"
,
"ScaleImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Scale"
,
"ScaleImage"
,
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Offset"
,
"OffsetImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Mean"
,
"MeanImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
DeviceType
::
OPENCL
,
half
>
(
net
,
"Var"
,
"VarImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
OpDefBuilder
(
"BatchNorm"
,
"BatchNormTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -422,7 +451,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
...
@@ -422,7 +451,8 @@ TEST_F(BatchNormOpTest, ComplexRandomHalfOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
}
}
}
}
mace/ops/batch_to_space.cc
浏览文件 @
baf2dcd1
...
@@ -6,13 +6,17 @@
...
@@ -6,13 +6,17 @@
namespace
mace
{
namespace
mace
{
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BatchToSpaceND"
)
void
Register_BatchToSpaceND
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchToSpaceND"
)
.
Build
(),
.
Device
(
DeviceType
::
OPENCL
)
BatchToSpaceNDOp
<
DeviceType
::
OPENCL
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BatchToSpaceND"
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
BatchToSpaceNDOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Build
(),
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchToSpaceND"
)
BatchToSpaceNDOp
<
DeviceType
::
OPENCL
,
half
>
);
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BatchToSpaceNDOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/batch_to_space_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -14,7 +14,8 @@ static void BMBatchToSpace(
...
@@ -14,7 +14,8 @@ static void BMBatchToSpace(
OpsTestNet
net
;
OpsTestNet
net
;
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"BatchToSpaceND"
,
"BatchToSpaceNDTest"
)
OpDefBuilder
(
"BatchToSpaceND"
,
"BatchToSpaceNDTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
Output
(
"OutputImage"
)
...
@@ -36,16 +37,17 @@ static void BMBatchToSpace(
...
@@ -36,16 +37,17 @@ static void BMBatchToSpace(
}
}
#define BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, DEVICE) \
#define BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, DEVICE) \
static void BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
static void \
int iters) { \
BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE( \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
int iters) { \
mace::testing::ItemsProcessed(tot); \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::ItemsProcessed(tot); \
BMBatchToSpace<DEVICE, TYPE>(iters, N, C, H, W, ARG); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
} \
BMBatchToSpace<DEVICE, TYPE>(iters, N, C, H, W, ARG); \
} \
BENCHMARK(BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE)
BENCHMARK(BM_BATCH_TO_SPACE_##N##_##H##_##W##_##C##_##ARG##_##TYPE##_##DEVICE)
#define BM_BATCH_TO_SPACE(N, H, W, C, ARG, TYPE)
\
#define BM_BATCH_TO_SPACE(N, H, W, C, ARG, TYPE) \
BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, OPENCL);
BM_BATCH_TO_SPACE_MACRO(N, H, W, C, ARG, TYPE, OPENCL);
BM_BATCH_TO_SPACE
(
128
,
8
,
8
,
128
,
2
,
float
);
BM_BATCH_TO_SPACE
(
128
,
8
,
8
,
128
,
2
,
float
);
...
...
mace/ops/bias_add.cc
浏览文件 @
baf2dcd1
...
@@ -6,28 +6,34 @@
...
@@ -6,28 +6,34 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"BiasAdd"
)
void
Register_BiasAdd
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BiasAdd"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
BiasAddOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BiasAddOp
<
DeviceType
::
CPU
,
float
>
);
/*
/*
#if __ARM_NEON
#if __ARM_NEON
REGISTER_NEON_OPERATOR(OpKeyBuilder("BiasAdd")
REGISTER_OPERATOR(op_registry,OpKeyBuilder("BiasAdd")
.TypeConstraint<float>("T")
.Device(DeviceType::NEON)
.Build(),
.TypeConstraint<float>("T")
BiasAddOp<DeviceType::NEON, float>);
.Build(),
#endif // __ARM_NEON
BiasAddOp<DeviceType::NEON, float>);
*/
#endif // __ARM_NEON
*/
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BiasAdd"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BiasAdd"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
BiasAddOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Build
(),
BiasAddOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BiasAdd"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BiasAdd"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
BiasAddOp
<
DeviceType
::
OPENCL
,
half
>
);
.
Build
(),
BiasAddOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/bias_add_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -9,8 +9,7 @@
...
@@ -9,8 +9,7 @@
namespace
mace
{
namespace
mace
{
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
BiasAdd
(
static
void
BiasAdd
(
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
int
iters
,
int
batch
,
int
channels
,
int
height
,
int
width
)
{
mace
::
testing
::
StopTiming
();
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -20,15 +19,16 @@ static void BiasAdd(
...
@@ -20,15 +19,16 @@ static void BiasAdd(
net
.
AddRandomInput
<
D
,
T
>
(
"Bias"
,
{
channels
},
true
);
net
.
AddRandomInput
<
D
,
T
>
(
"Bias"
,
{
channels
},
true
);
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BiasAdd"
,
"BiasAddBM"
)
OpDefBuilder
(
"BiasAdd"
,
"BiasAddBM"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"BiasImage"
)
.
Input
(
"BiasImage"
)
.
Output
(
"Output"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
}
}
else
{
else
{
OpDefBuilder
(
"BiasAdd"
,
"BiasAddBM"
)
OpDefBuilder
(
"BiasAdd"
,
"BiasAddBM"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
.
Input
(
"Bias"
)
.
Input
(
"Bias"
)
...
@@ -51,12 +51,12 @@ static void BiasAdd(
...
@@ -51,12 +51,12 @@ static void BiasAdd(
#define BM_BIAS_ADD_MACRO(N, C, H, W, TYPE, DEVICE) \
#define BM_BIAS_ADD_MACRO(N, C, H, W, TYPE, DEVICE) \
static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
static void BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE( \
int iters) {
\
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W;
\
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot);
\
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE)));
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
BiasAdd<DEVICE, TYPE>(iters, N, C, H, W); \
BiasAdd<DEVICE, TYPE>(iters, N, C, H, W); \
}
\
} \
BENCHMARK(BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
BENCHMARK(BM_BIAS_ADD_##N##_##C##_##H##_##W##_##TYPE##_##DEVICE)
#define BM_BIAS_ADD(N, C, H, W, TYPE) \
#define BM_BIAS_ADD(N, C, H, W, TYPE) \
...
...
mace/ops/bias_add_test.cc
浏览文件 @
baf2dcd1
...
@@ -15,12 +15,14 @@ void BiasAddSimple() {
...
@@ -15,12 +15,14 @@ void BiasAddSimple() {
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
6
,
2
,
1
},
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
6
,
2
,
1
},
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
{
5
,
5
,
7
,
7
,
9
,
9
,
11
,
11
,
13
,
13
,
15
,
15
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
1
},
{
0.5
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
1
},
{
0.5
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -31,7 +33,8 @@ void BiasAddSimple() {
...
@@ -31,7 +33,8 @@ void BiasAddSimple() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
...
@@ -43,16 +46,14 @@ void BiasAddSimple() {
...
@@ -43,16 +46,14 @@ void BiasAddSimple() {
}
}
// Check
// Check
auto
expected
=
auto
expected
=
CreateTensor
<
float
>
(
CreateTensor
<
float
>
({
1
,
6
,
2
,
1
},
{
5.5
,
5.5
,
7.5
,
7.5
,
9.5
,
9.5
,
11.5
,
{
1
,
6
,
2
,
1
}
,
11.5
,
13.5
,
13.5
,
15.5
,
15.5
});
{
5.5
,
5.5
,
7.5
,
7.5
,
9.5
,
9.5
,
11.5
,
11.5
,
13.5
,
13.5
,
15.5
,
15.5
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-2
);
}
}
TEST_F
(
BiasAddOpTest
,
BiasAddSimpleCPU
)
{
TEST_F
(
BiasAddOpTest
,
BiasAddSimpleCPU
)
{
BiasAddSimple
<
DeviceType
::
CPU
>
();
}
BiasAddSimple
<
DeviceType
::
CPU
>
();
}
TEST_F
(
BiasAddOpTest
,
BiasAddSimpleOPENCL
)
{
TEST_F
(
BiasAddOpTest
,
BiasAddSimpleOPENCL
)
{
BiasAddSimple
<
DeviceType
::
OPENCL
>
();
BiasAddSimple
<
DeviceType
::
OPENCL
>
();
...
@@ -76,7 +77,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
...
@@ -76,7 +77,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Bias"
,
{
channels
},
true
);
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Bias"
,
{
channels
},
true
);
// run cpu
// run cpu
...
@@ -87,8 +89,10 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
...
@@ -87,8 +89,10 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -100,7 +104,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
...
@@ -100,7 +104,8 @@ TEST_F(BiasAddOpTest, SimpleRandomOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
}
...
@@ -122,7 +127,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
...
@@ -122,7 +127,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Bias"
,
{
channels
},
true
);
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Bias"
,
{
channels
},
true
);
// run cpu
// run cpu
...
@@ -132,10 +138,11 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
...
@@ -132,10 +138,11 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
Tensor
expected
;
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// Run on opencl
// Run on opencl
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
OpDefBuilder
(
"BiasAdd"
,
"BiasAddTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -147,8 +154,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
...
@@ -147,8 +154,8 @@ TEST_F(BiasAddOpTest, ComplexRandomOPENCL) {
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
Sync
();
net
.
Sync
();
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
1e-2
);
}
}
}
}
mace/ops/buffer_to_image.cc
浏览文件 @
baf2dcd1
...
@@ -6,14 +6,18 @@
...
@@ -6,14 +6,18 @@
namespace
mace
{
namespace
mace
{
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BufferToImage"
)
void
Register_BufferToImage
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BufferToImage"
)
.
Build
(),
.
Device
(
DeviceType
::
OPENCL
)
BufferToImageOp
<
DeviceType
::
OPENCL
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BufferToImageOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"BufferToImage"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BufferToImage"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
BufferToImageOp
<
DeviceType
::
OPENCL
,
half
>
);
.
Build
(),
BufferToImageOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/channel_shuffle.cc
浏览文件 @
baf2dcd1
...
@@ -6,9 +6,12 @@
...
@@ -6,9 +6,12 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"ChannelShuffle"
)
void
Register_ChannelShuffle
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ChannelShuffle"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
ChannelShuffleOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ChannelShuffleOp
<
DeviceType
::
CPU
,
float
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/channel_shuffle_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -23,7 +23,8 @@ static void ChannelShuffle(
...
@@ -23,7 +23,8 @@ static void ChannelShuffle(
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
// Warm-up
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
...
mace/ops/channel_shuffle_test.cc
浏览文件 @
baf2dcd1
...
@@ -17,7 +17,6 @@ TEST_F(ChannelShuffleOpTest, C8G4) {
...
@@ -17,7 +17,6 @@ TEST_F(ChannelShuffleOpTest, C8G4) {
.
AddIntArg
(
"group"
,
4
)
.
AddIntArg
(
"group"
,
4
)
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
8
,
1
,
2
},
"Input"
,
{
1
,
8
,
1
,
2
},
...
...
mace/ops/concat.cc
浏览文件 @
baf2dcd1
...
@@ -6,21 +6,28 @@
...
@@ -6,21 +6,28 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
void
Register_Concat
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
ConcatOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
ConcatOp
<
DeviceType
::
CPU
,
float
>
);
.
Build
(),
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
ConcatOp
<
DeviceType
::
CPU
,
half
>
);
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
CPU
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
OPENCL
,
half
>
);
}
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Concat"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
OPENCL
,
half
>
);
}
// namespace mace
}
// namespace mace
mace/ops/concat_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -60,8 +60,10 @@ static void OpenclConcatHelper(int iters,
...
@@ -60,8 +60,10 @@ static void OpenclConcatHelper(int iters,
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input0"
,
shape0
);
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input0"
,
shape0
);
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input1"
,
shape1
);
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input1"
,
shape1
);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"Input0"
,
"InputImage0"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"Input0"
,
"InputImage0"
,
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"Input1"
,
"InputImage1"
,
kernels
::
BufferType
::
IN_OUT
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
"Input1"
,
"InputImage1"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Concat"
,
"ConcatBM"
)
OpDefBuilder
(
"Concat"
,
"ConcatBM"
)
.
Input
(
"InputImage0"
)
.
Input
(
"InputImage0"
)
.
Input
(
"InputImage1"
)
.
Input
(
"InputImage1"
)
...
@@ -75,7 +77,8 @@ static void OpenclConcatHelper(int iters,
...
@@ -75,7 +77,8 @@ static void OpenclConcatHelper(int iters,
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
}
}
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
const
int64_t
tot
=
static_cast
<
int64_t
>
(
iters
)
*
(
net
.
GetTensor
(
"Input0"
)
->
size
()
+
net
.
GetTensor
(
"Input1"
)
->
size
());
(
net
.
GetTensor
(
"Input0"
)
->
size
()
+
net
.
GetTensor
(
"Input1"
)
->
size
());
mace
::
testing
::
ItemsProcessed
(
tot
);
mace
::
testing
::
ItemsProcessed
(
tot
);
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
testing
::
BytesProcessed
(
tot
*
sizeof
(
T
));
...
...
mace/ops/concat_test.cc
浏览文件 @
baf2dcd1
...
@@ -97,7 +97,9 @@ TEST_F(ConcatOpTest, CPURandom) {
...
@@ -97,7 +97,9 @@ TEST_F(ConcatOpTest, CPURandom) {
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
builder
=
builder
.
Input
((
"Input"
+
ToString
(
i
)).
c_str
());
builder
=
builder
.
Input
((
"Input"
+
ToString
(
i
)).
c_str
());
}
}
builder
.
AddIntArg
(
"axis"
,
axis
).
Output
(
"Output"
).
Finalize
(
net
.
NewOperatorDef
());
builder
.
AddIntArg
(
"axis"
,
axis
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
NewOperatorDef
());
std
::
vector
<
index_t
>
shape_data
;
std
::
vector
<
index_t
>
shape_data
;
GenerateRandomIntTypeData
<
index_t
>
({
dim
},
shape_data
,
1
,
dim
);
GenerateRandomIntTypeData
<
index_t
>
({
dim
},
shape_data
,
1
,
dim
);
...
@@ -110,8 +112,8 @@ TEST_F(ConcatOpTest, CPURandom) {
...
@@ -110,8 +112,8 @@ TEST_F(ConcatOpTest, CPURandom) {
concat_axis_size
+=
input_shapes
[
i
][
axis
];
concat_axis_size
+=
input_shapes
[
i
][
axis
];
GenerateRandomRealTypeData
(
input_shapes
[
i
],
inputs
[
i
]);
GenerateRandomRealTypeData
(
input_shapes
[
i
],
inputs
[
i
]);
input_ptrs
[
i
]
=
inputs
[
i
].
data
();
input_ptrs
[
i
]
=
inputs
[
i
].
data
();
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
(
"Input"
+
ToString
(
i
)).
c_str
(),
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
input_shapes
[
i
],
inputs
[
i
]);
(
"Input"
+
ToString
(
i
)).
c_str
(),
input_shapes
[
i
],
inputs
[
i
]);
}
}
// Run
// Run
...
@@ -137,7 +139,7 @@ TEST_F(ConcatOpTest, CPURandom) {
...
@@ -137,7 +139,7 @@ TEST_F(ConcatOpTest, CPURandom) {
}
}
}
}
template
<
typename
T
>
template
<
typename
T
>
void
OpenclRandomTest
(
const
std
::
vector
<
std
::
vector
<
index_t
>>
&
shapes
,
void
OpenclRandomTest
(
const
std
::
vector
<
std
::
vector
<
index_t
>>
&
shapes
,
const
int
axis
)
{
const
int
axis
)
{
srand
(
time
(
nullptr
));
srand
(
time
(
nullptr
));
...
@@ -149,9 +151,9 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
...
@@ -149,9 +151,9 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
const
std
::
string
input_name
=
(
"Input"
+
ToString
(
i
)).
c_str
();
const
std
::
string
input_name
=
(
"Input"
+
ToString
(
i
)).
c_str
();
const
std
::
string
image_name
=
(
"InputImage"
+
ToString
(
i
)).
c_str
();
const
std
::
string
image_name
=
(
"InputImage"
+
ToString
(
i
)).
c_str
();
concat_axis_size
+=
shapes
[
i
][
axis
];
concat_axis_size
+=
shapes
[
i
][
axis
];
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
input_name
,
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
input_name
,
shapes
[
i
]);
shapes
[
i
]);
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
input_name
,
image_name
,
BufferToImage
<
DeviceType
::
OPENCL
,
T
>
(
net
,
input_name
,
image_name
,
kernels
::
BufferType
::
IN_OUT
);
kernels
::
BufferType
::
IN_OUT
);
}
}
auto
builder
=
OpDefBuilder
(
"Concat"
,
"ConcatTest"
);
auto
builder
=
OpDefBuilder
(
"Concat"
,
"ConcatTest"
);
...
@@ -167,7 +169,8 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
...
@@ -167,7 +169,8 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
// Run
// Run
net
.
RunOp
(
DeviceType
::
OPENCL
);
net
.
RunOp
(
DeviceType
::
OPENCL
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
DeviceType
::
OPENCL
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
// Check
// Check
auto
output
=
net
.
GetOutput
(
"Output"
);
auto
output
=
net
.
GetOutput
(
"Output"
);
...
@@ -182,15 +185,16 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
...
@@ -182,15 +185,16 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
while
(
output_ptr
!=
(
output
->
data
<
float
>
()
+
output
->
size
()))
{
while
(
output_ptr
!=
(
output
->
data
<
float
>
()
+
output
->
size
()))
{
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
index_t
num_elements
=
index_t
num_elements
=
std
::
accumulate
(
shapes
[
i
].
begin
()
+
axis
,
shapes
[
i
].
end
(),
std
::
accumulate
(
shapes
[
i
].
begin
()
+
axis
,
shapes
[
i
].
end
(),
1
,
1
,
std
::
multiplies
<
index_t
>
());
std
::
multiplies
<
index_t
>
());
const
std
::
string
input_name
=
(
"Input"
+
ToString
(
i
)).
c_str
();
const
std
::
string
input_name
=
(
"Input"
+
ToString
(
i
)).
c_str
();
const
Tensor
*
input_tensor
=
net
.
GetTensor
(
input_name
.
data
());
const
Tensor
*
input_tensor
=
net
.
GetTensor
(
input_name
.
data
());
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
const
float
*
input_ptr
=
input_tensor
->
data
<
float
>
()
+
k
*
num_elements
;
const
float
*
input_ptr
=
input_tensor
->
data
<
float
>
()
+
k
*
num_elements
;
for
(
int
j
=
0
;
j
<
num_elements
;
++
j
)
{
for
(
int
j
=
0
;
j
<
num_elements
;
++
j
)
{
EXPECT_NEAR
(
*
(
input_ptr
+
j
),
*
output_ptr
++
,
1e-2
)
<<
"With index: "
<<
i
<<
", "
<<
j
;
EXPECT_NEAR
(
*
(
input_ptr
+
j
),
*
output_ptr
++
,
1e-2
)
<<
"With index: "
<<
i
<<
", "
<<
j
;
}
}
}
}
k
++
;
k
++
;
...
@@ -198,25 +202,13 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
...
@@ -198,25 +202,13 @@ void OpenclRandomTest(const std::vector<std::vector<index_t>> &shapes,
}
}
TEST_F
(
ConcatOpTest
,
OPENCLAligned
)
{
TEST_F
(
ConcatOpTest
,
OPENCLAligned
)
{
OpenclRandomTest
<
float
>
({
OpenclRandomTest
<
float
>
({{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
64
}},
3
);
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
64
}
},
3
);
}
}
TEST_F
(
ConcatOpTest
,
OPENCLHalfAligned
)
{
TEST_F
(
ConcatOpTest
,
OPENCLHalfAligned
)
{
OpenclRandomTest
<
half
>
({
OpenclRandomTest
<
half
>
({{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
64
}},
3
);
{
3
,
32
,
32
,
32
},
{
3
,
32
,
32
,
64
}
},
3
);
}
}
TEST_F
(
ConcatOpTest
,
OPENCLUnAligned
)
{
TEST_F
(
ConcatOpTest
,
OPENCLUnAligned
)
{
OpenclRandomTest
<
float
>
({
OpenclRandomTest
<
float
>
({{
3
,
32
,
32
,
13
},
{
3
,
32
,
32
,
17
}},
3
);
{
3
,
32
,
32
,
13
},
{
3
,
32
,
32
,
17
}
},
3
);
}
}
mace/ops/conv_2d.cc
浏览文件 @
baf2dcd1
...
@@ -6,31 +6,38 @@
...
@@ -6,31 +6,38 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
void
Register_Conv2D
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Conv2D"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
Conv2dOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
Conv2dOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Conv2D"
)
Conv2dOp
<
DeviceType
::
CPU
,
half
>
);
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
CPU
,
half
>
);
#if MACE_ENABLE_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Conv2D"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
NEON
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
Conv2dOp
<
DeviceType
::
NEON
,
float
>
);
.
Build
(),
Conv2dOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Conv2D"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
Conv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Build
(),
Conv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Conv2D"
)
.
TypeConstraint
<
half
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Conv2D"
)
.
Build
(),
.
Device
(
DeviceType
::
OPENCL
)
Conv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
Conv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/conv_2d_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -33,9 +33,12 @@ static void Conv2d(int iters,
...
@@ -33,9 +33,12 @@ static void Conv2d(int iters,
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
output_channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
output_channels
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"FilterImage"
)
...
@@ -89,7 +92,7 @@ static void Conv2d(int iters,
...
@@ -89,7 +92,7 @@ static void Conv2d(int iters,
BENCHMARK( \
BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE)
\
#define BM_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL);
BM_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL);
// ICNet
// ICNet
...
@@ -106,28 +109,29 @@ BM_CONV_2D(1, 3, 512, 512, 7, 7, 2, SAME, 64, half);
...
@@ -106,28 +109,29 @@ BM_CONV_2D(1, 3, 512, 512, 7, 7, 2, SAME, 64, half);
BM_CONV_2D
(
1
,
512
,
64
,
64
,
1
,
1
,
1
,
SAME
,
256
,
half
);
BM_CONV_2D
(
1
,
512
,
64
,
64
,
1
,
1
,
1
,
SAME
,
256
,
half
);
// Test RGB <-> YUV
// Test RGB <-> YUV
//BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float);
//
BM_CONV_2D(1, 3, 2160, 1080, 1, 1, 1, VALID, 3, float);
//BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float);
//
BM_CONV_2D(1, 3, 480, 480, 1, 1, 1, VALID, 3, float);
//
//
//BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 1, 1, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad alignments
// BM_CONV_2D(1, 64, 33, 31, 1, 1, 1, VALID, 128, float); // Test bad
//BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float);
// alignments
//BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float);
// BM_CONV_2D(1, 3, 512, 512, 1, 1, 1, VALID, 3, float);
//BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float);
// BM_CONV_2D(1, 32, 112, 112, 1, 1, 1, VALID, 64, float);
//BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float);
// BM_CONV_2D(1, 64, 56, 56, 1, 1, 1, VALID, 128, float);
//BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float);
// BM_CONV_2D(1, 256, 28, 28, 1, 1, 1, VALID, 256, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float);
// BM_CONV_2D(1, 1024, 7, 7, 1, 1, 1, VALID, 1024, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, VALID, 128, float);
//BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float);
// BM_CONV_2D(1, 3, 512, 512, 3, 3, 1, VALID, 3, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 1, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 1, SAME, 128, float);
//BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, VALID, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float);
// BM_CONV_2D(1, 3, 512, 512, 3, 3, 2, VALID, 3, float);
//BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, VALID, 128, float);
//BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 3, 3, 2, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 33, 31, 3, 3, 2, SAME, 128, float);
//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, VALID, 128, float);
//BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 32, 5, 5, 1, SAME, 128, float);
// BM_CONV_2D(1, 64, 32, 31, 5, 5, 1, SAME, 128, float);
}
// namespace mace
}
// namespace mace
mace/ops/conv_2d_test.cc
浏览文件 @
baf2dcd1
...
@@ -10,7 +10,7 @@ using namespace mace;
...
@@ -10,7 +10,7 @@ using namespace mace;
class
Conv2dOpTest
:
public
OpsTestBase
{};
class
Conv2dOpTest
:
public
OpsTestBase
{};
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestSimple3x3VALID
()
{
void
TestSimple3x3VALID
()
{
OpsTestNet
net
;
OpsTestNet
net
;
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
@@ -42,10 +42,9 @@ void TestSimple3x3VALID() {
...
@@ -42,10 +42,9 @@ void TestSimple3x3VALID() {
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
1
,
1
},
{
18.1
f
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
1
,
1
},
{
18.1
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestSimple3x3SAME
()
{
void
TestSimple3x3SAME
()
{
OpsTestNet
net
;
OpsTestNet
net
;
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
@@ -86,7 +85,7 @@ TEST_F(Conv2dOpTest, NEONSimple) {
...
@@ -86,7 +85,7 @@ TEST_F(Conv2dOpTest, NEONSimple) {
}
}
#endif
#endif
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3VALID
()
{
void
TestNHWCSimple3x3VALID
()
{
OpsTestNet
net
;
OpsTestNet
net
;
// Add input data
// Add input data
...
@@ -100,9 +99,12 @@ void TestNHWCSimple3x3VALID() {
...
@@ -100,9 +99,12 @@ void TestNHWCSimple3x3VALID() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
0.1
f
});
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
0.1
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"FilterImage"
)
...
@@ -117,7 +119,8 @@ void TestNHWCSimple3x3VALID() {
...
@@ -117,7 +119,8 @@ void TestNHWCSimple3x3VALID() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
@@ -138,7 +141,7 @@ void TestNHWCSimple3x3VALID() {
...
@@ -138,7 +141,7 @@ void TestNHWCSimple3x3VALID() {
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3SAME
()
{
void
TestNHWCSimple3x3SAME
()
{
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -153,9 +156,12 @@ void TestNHWCSimple3x3SAME() {
...
@@ -153,9 +156,12 @@ void TestNHWCSimple3x3SAME() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
0.1
f
});
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
0.1
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"FilterImage"
)
...
@@ -170,7 +176,8 @@ void TestNHWCSimple3x3SAME() {
...
@@ -170,7 +176,8 @@ void TestNHWCSimple3x3SAME() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
@@ -204,7 +211,7 @@ TEST_F(Conv2dOpTest, OPENCLSimple) {
...
@@ -204,7 +211,7 @@ TEST_F(Conv2dOpTest, OPENCLSimple) {
TestNHWCSimple3x3SAME
<
DeviceType
::
OPENCL
,
float
>
();
TestNHWCSimple3x3SAME
<
DeviceType
::
OPENCL
,
float
>
();
}
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestSimple3x3WithoutBias
()
{
void
TestSimple3x3WithoutBias
()
{
OpsTestNet
net
;
OpsTestNet
net
;
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
...
@@ -234,14 +241,13 @@ void TestSimple3x3WithoutBias() {
...
@@ -234,14 +241,13 @@ void TestSimple3x3WithoutBias() {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
#ifdef __ARM_NEON
#ifdef __ARM_NEON
TEST_F
(
Conv2dOpTest
,
NEONWithouBias
)
{
TEST_F
(
Conv2dOpTest
,
NEONWithouBias
)
{
TestSimple3x3WithoutBias
<
DeviceType
::
NEON
>
();
TestSimple3x3WithoutBias
<
DeviceType
::
NEON
>
();
}
}
#endif
#endif
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3WithoutBias
()
{
void
TestNHWCSimple3x3WithoutBias
()
{
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -255,8 +261,10 @@ void TestNHWCSimple3x3WithoutBias() {
...
@@ -255,8 +261,10 @@ void TestNHWCSimple3x3WithoutBias() {
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -270,7 +278,8 @@ void TestNHWCSimple3x3WithoutBias() {
...
@@ -270,7 +278,8 @@ void TestNHWCSimple3x3WithoutBias() {
// Run
// Run
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
...
@@ -300,7 +309,7 @@ TEST_F(Conv2dOpTest, OPENCLWithoutBias) {
...
@@ -300,7 +309,7 @@ TEST_F(Conv2dOpTest, OPENCLWithoutBias) {
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
OPENCL
,
float
>
();
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
OPENCL
,
float
>
();
}
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
static
void
TestCombined3x3
()
{
static
void
TestCombined3x3
()
{
// Construct graph
// Construct graph
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -335,17 +344,13 @@ static void TestCombined3x3() {
...
@@ -335,17 +344,13 @@ static void TestCombined3x3() {
4.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
9.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
4.2
f
});
4.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
9.2
f
,
6.2
f
,
4.2
f
,
6.2
f
,
4.2
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
#ifdef __ARM_NEON
#ifdef __ARM_NEON
TEST_F
(
Conv2dOpTest
,
NEONCombined
)
{
TEST_F
(
Conv2dOpTest
,
NEONCombined
)
{
TestCombined3x3
<
DeviceType
::
NEON
>
();
}
TestCombined3x3
<
DeviceType
::
NEON
>
();
}
#endif
#endif
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
TestNHWCCombined3x3
()
{
static
void
TestNHWCCombined3x3
()
{
// Construct graph
// Construct graph
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -353,8 +358,8 @@ static void TestNHWCCombined3x3() {
...
@@ -353,8 +358,8 @@ static void TestNHWCCombined3x3() {
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
T
>
(
net
.
AddInputFromArray
<
D
,
T
>
(
"Input"
,
{
1
,
5
,
5
,
2
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
"Input"
,
{
1
,
5
,
5
,
2
},
{
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
,
1
});
net
.
AddInputFromArray
<
D
,
T
>
(
net
.
AddInputFromArray
<
D
,
T
>
(
"Filter"
,
{
3
,
3
,
2
,
2
},
"Filter"
,
{
3
,
3
,
2
,
2
},
{
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
{
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
1.0
f
,
0.5
f
,
...
@@ -363,9 +368,12 @@ static void TestNHWCCombined3x3() {
...
@@ -363,9 +368,12 @@ static void TestNHWCCombined3x3() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -380,7 +388,8 @@ static void TestNHWCCombined3x3() {
...
@@ -380,7 +388,8 @@ static void TestNHWCCombined3x3() {
// Run
// Run
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
...
@@ -394,16 +403,13 @@ static void TestNHWCCombined3x3() {
...
@@ -394,16 +403,13 @@ static void TestNHWCCombined3x3() {
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Run
// Run
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
}
}
// Check
// Check
auto
expected
=
CreateTensor
<
float
>
(
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
3
,
3
,
2
},
{
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
,
{
1
,
3
,
3
,
2
},
{
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
18.1
f
,
12.1
f
,
6.2
f
,
18.1
f
,
9.2
f
,
12.1
f
,
6.2
f
,
9.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
});
8.1
f
,
4.2
f
,
12.1
f
,
6.2
f
,
8.1
f
,
4.2
f
});
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
}
TEST_F
(
Conv2dOpTest
,
CPUStride2
)
{
TEST_F
(
Conv2dOpTest
,
CPUStride2
)
{
...
@@ -414,7 +420,7 @@ TEST_F(Conv2dOpTest, OPENCLStride2) {
...
@@ -414,7 +420,7 @@ TEST_F(Conv2dOpTest, OPENCLStride2) {
TestNHWCCombined3x3
<
DeviceType
::
OPENCL
,
float
>
();
TestNHWCCombined3x3
<
DeviceType
::
OPENCL
,
float
>
();
}
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestConv1x1
()
{
void
TestConv1x1
()
{
// Construct graph
// Construct graph
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -435,9 +441,12 @@ void TestConv1x1() {
...
@@ -435,9 +441,12 @@ void TestConv1x1() {
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
float
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -451,7 +460,8 @@ void TestConv1x1() {
...
@@ -451,7 +460,8 @@ void TestConv1x1() {
// Run
// Run
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2DTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
...
@@ -479,15 +489,11 @@ void TestConv1x1() {
...
@@ -479,15 +489,11 @@ void TestConv1x1() {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
TEST_F
(
Conv2dOpTest
,
CPUConv1x1
)
{
TEST_F
(
Conv2dOpTest
,
CPUConv1x1
)
{
TestConv1x1
<
DeviceType
::
CPU
>
();
}
TestConv1x1
<
DeviceType
::
CPU
>
();
}
TEST_F
(
Conv2dOpTest
,
OPENCLConv1x1
)
{
TEST_F
(
Conv2dOpTest
,
OPENCLConv1x1
)
{
TestConv1x1
<
DeviceType
::
OPENCL
>
();
}
TestConv1x1
<
DeviceType
::
OPENCL
>
();
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
TestComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
static
void
TestComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
testing
::
internal
::
LogToStderr
();
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
...
@@ -526,9 +532,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
...
@@ -526,9 +532,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
// run on gpu
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -543,7 +552,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
...
@@ -543,7 +552,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device
// Run on device
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
};
};
...
@@ -592,15 +602,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
...
@@ -592,15 +602,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
std
::
vector
<
float
>
float_input_data
;
std
::
vector
<
float
>
float_input_data
;
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
input_channels
},
float_input_data
);
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
input_channels
},
float_input_data
);
std
::
vector
<
float
>
float_filter_data
;
std
::
vector
<
float
>
float_filter_data
;
GenerateRandomRealTypeData
({
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
GenerateRandomRealTypeData
(
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
std
::
vector
<
float
>
float_bias_data
;
std
::
vector
<
float
>
float_bias_data
;
GenerateRandomRealTypeData
({
output_channels
},
float_bias_data
);
GenerateRandomRealTypeData
({
output_channels
},
float_bias_data
);
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
},
float_input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
"Input"
,
{
batch
,
height
,
width
,
input_channels
},
float_input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
output_channels
},
float_bias_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
output_channels
},
float_bias_data
);
// run on cpu
// run on cpu
...
@@ -610,9 +625,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
...
@@ -610,9 +625,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
// run on gpu
BufferToImage
<
D
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
half
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
half
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
half
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
half
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
half
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
OpDefBuilder
(
"Conv2D"
,
"Conv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -627,7 +645,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
...
@@ -627,7 +645,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &input_shape,
// Run on device
// Run on device
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.5
);
};
};
...
...
mace/ops/core_test.cc
浏览文件 @
baf2dcd1
...
@@ -7,7 +7,6 @@
...
@@ -7,7 +7,6 @@
namespace
mace
{
namespace
mace
{
TEST
(
CoreTest
,
INIT_MODE
)
{
TEST
(
CoreTest
,
INIT_MODE
)
{
std
::
vector
<
OperatorDef
>
op_defs
;
std
::
vector
<
OperatorDef
>
op_defs
;
Workspace
ws
;
Workspace
ws
;
...
@@ -18,10 +17,11 @@ TEST(CoreTest, INIT_MODE) {
...
@@ -18,10 +17,11 @@ TEST(CoreTest, INIT_MODE) {
.
Output
(
"B2IOutput"
)
.
Output
(
"B2IOutput"
)
.
AddIntArg
(
"buffer_type"
,
kernels
::
BufferType
::
FILTER
)
.
AddIntArg
(
"buffer_type"
,
kernels
::
BufferType
::
FILTER
)
.
AddIntArg
(
"mode"
,
static_cast
<
int
>
(
NetMode
::
INIT
))
.
AddIntArg
(
"mode"
,
static_cast
<
int
>
(
NetMode
::
INIT
))
.
Finalize
(
&
op_defs
[
op_defs
.
size
()
-
1
]);
.
Finalize
(
&
op_defs
[
op_defs
.
size
()
-
1
]);
Tensor
*
input
=
Tensor
*
input
=
ws
.
CreateTensor
(
"Input"
,
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataTypeToEnum
<
float
>::
v
());
ws
.
CreateTensor
(
"Input"
,
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataTypeToEnum
<
float
>::
v
());
input
->
Resize
({
1
,
3
,
3
,
3
});
input
->
Resize
({
1
,
3
,
3
,
3
});
{
{
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
input_mapper
(
input
);
...
@@ -34,23 +34,26 @@ TEST(CoreTest, INIT_MODE) {
...
@@ -34,23 +34,26 @@ TEST(CoreTest, INIT_MODE) {
.
Input
(
"B2IOutput"
)
.
Input
(
"B2IOutput"
)
.
Output
(
"Output"
)
.
Output
(
"Output"
)
.
AddIntArg
(
"buffer_type"
,
kernels
::
BufferType
::
FILTER
)
.
AddIntArg
(
"buffer_type"
,
kernels
::
BufferType
::
FILTER
)
.
Finalize
(
&
op_defs
[
op_defs
.
size
()
-
1
]);
.
Finalize
(
&
op_defs
[
op_defs
.
size
()
-
1
]);
NetDef
net_def
;
NetDef
net_def
;
for
(
auto
&
op_def
:
op_defs
)
{
for
(
auto
&
op_def
:
op_defs
)
{
net_def
.
add_op
()
->
CopyFrom
(
op_def
);
net_def
.
add_op
()
->
CopyFrom
(
op_def
);
}
}
auto
net
=
CreateNet
(
net_def
,
&
ws
,
DeviceType
::
OPENCL
,
NetMode
::
INIT
);
std
::
shared_ptr
<
OperatorRegistry
>
op_registry
(
new
OperatorRegistry
());
auto
net
=
CreateNet
(
op_registry
,
net_def
,
&
ws
,
DeviceType
::
OPENCL
,
NetMode
::
INIT
);
net
->
Run
();
net
->
Run
();
EXPECT_TRUE
(
ws
.
GetTensor
(
"B2IOutput"
)
!=
nullptr
);
EXPECT_TRUE
(
ws
.
GetTensor
(
"B2IOutput"
)
!=
nullptr
);
EXPECT_TRUE
(
ws
.
GetTensor
(
"Output"
)
==
nullptr
);
EXPECT_TRUE
(
ws
.
GetTensor
(
"Output"
)
==
nullptr
);
net
=
CreateNet
(
net_def
,
&
ws
,
DeviceType
::
OPENCL
);
net
=
CreateNet
(
op_registry
,
net_def
,
&
ws
,
DeviceType
::
OPENCL
);
net
->
Run
();
net
->
Run
();
EXPECT_TRUE
(
ws
.
GetTensor
(
"Output"
)
!=
nullptr
);
EXPECT_TRUE
(
ws
.
GetTensor
(
"Output"
)
!=
nullptr
);
ExpectTensorNear
<
float
>
(
*
ws
.
GetTensor
(
"Input"
),
*
ws
.
GetTensor
(
"Output"
),
1e-5
);
ExpectTensorNear
<
float
>
(
*
ws
.
GetTensor
(
"Input"
),
*
ws
.
GetTensor
(
"Output"
),
1e-5
);
}
}
}
// namespace mace
}
// namespace mace
mace/ops/depthwise_conv2d.cc
浏览文件 @
baf2dcd1
...
@@ -6,21 +6,26 @@
...
@@ -6,21 +6,26 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"DepthwiseConv2d"
)
void
Register_DepthwiseConv2d
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"DepthwiseConv2d"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
DepthwiseConv2dOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
DepthwiseConv2dOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"DepthwiseConv2d"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"DepthwiseConv2d"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
NEON
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
DepthwiseConv2dOp
<
DeviceType
::
NEON
,
float
>
);
.
Build
(),
DepthwiseConv2dOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"DepthwiseConv2d"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"DepthwiseConv2d"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
DepthwiseConv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Build
(),
DepthwiseConv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/depthwise_conv2d_test.cc
浏览文件 @
baf2dcd1
...
@@ -26,7 +26,7 @@ void SimpleValidTest() {
...
@@ -26,7 +26,7 @@ void SimpleValidTest() {
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
2
,
2
,
3
},
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
2
,
2
,
3
},
{
1
,
3
,
5
,
7
,
9
,
11
,
2
,
4
,
6
,
8
,
10
,
12
});
{
1
,
3
,
5
,
7
,
9
,
11
,
2
,
4
,
6
,
8
,
10
,
12
});
net
.
AddInputFromArray
<
D
,
float
>
(
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
2
,
2
,
2
,
2
},
"Filter"
,
{
2
,
2
,
2
,
2
},
{
1.0
f
,
5.0
f
,
9.0
f
,
13.0
f
,
2.0
f
,
6.0
f
,
10.0
f
,
14.0
f
,
3.0
f
,
7.0
f
,
11.0
f
,
{
1.0
f
,
5.0
f
,
9.0
f
,
13.0
f
,
2.0
f
,
6.0
f
,
10.0
f
,
14.0
f
,
3.0
f
,
7.0
f
,
11.0
f
,
...
@@ -41,12 +41,9 @@ void SimpleValidTest() {
...
@@ -41,12 +41,9 @@ void SimpleValidTest() {
{
196.1
f
,
252.1
f
,
216.2
f
,
280.2
f
,
272.3
f
,
344.3
f
,
296.4
f
,
376.4
f
});
{
196.1
f
,
252.1
f
,
216.2
f
,
280.2
f
,
272.3
f
,
344.3
f
,
296.4
f
,
376.4
f
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
}
TEST_F
(
DepthwiseConv2dOpTest
,
SimpleCPU
)
{
TEST_F
(
DepthwiseConv2dOpTest
,
SimpleCPU
)
{
SimpleValidTest
<
DeviceType
::
CPU
>
();
}
SimpleValidTest
<
DeviceType
::
CPU
>
();
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestNxNS12
(
const
index_t
height
,
const
index_t
width
)
{
void
TestNxNS12
(
const
index_t
height
,
const
index_t
width
)
{
...
@@ -72,8 +69,10 @@ void TestNxNS12(const index_t height, const index_t width) {
...
@@ -72,8 +69,10 @@ void TestNxNS12(const index_t height, const index_t width) {
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
input_channels
,
height
,
width
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
net
.
AddRandomInput
<
D
,
float
>
(
"Filter"
,
{
multiplier
,
input_channels
,
kernel_h
,
kernel_w
});
{
batch
,
input_channels
,
height
,
width
});
net
.
AddRandomInput
<
D
,
float
>
(
"Filter"
,
{
multiplier
,
input_channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
multiplier
*
input_channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
multiplier
*
input_channels
});
// Run on device
// Run on device
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
...
@@ -93,7 +92,6 @@ void TestNxNS12(const index_t height, const index_t width) {
...
@@ -93,7 +92,6 @@ void TestNxNS12(const index_t height, const index_t width) {
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
SAME
);
func
(
kernel_size
,
kernel_size
,
stride
,
stride
,
SAME
);
}
}
}
}
}
}
#if __ARM_NEON
#if __ARM_NEON
...
...
mace/ops/depthwise_conv_2d_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -38,8 +38,8 @@ static void DepthwiseConv2d(int iters,
...
@@ -38,8 +38,8 @@ static void DepthwiseConv2d(int iters,
// Add input data
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
D
,
float
>
(
"Filter"
,
net
.
AddRandomInput
<
D
,
float
>
(
"Filter"
,
{
output_channels
,
channels
,
kernel_h
,
kernel_w
});
{
output_channels
,
channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
output_channels
*
channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
output_channels
*
channels
});
// Warm-up
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
@@ -54,23 +54,22 @@ static void DepthwiseConv2d(int iters,
...
@@ -54,23 +54,22 @@ static void DepthwiseConv2d(int iters,
net
.
Sync
();
net
.
Sync
();
}
}
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE,
\
#define BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, \
DEVICE)
\
DEVICE) \
static void
\
static void \
BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
int iters) {
\
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W;
\
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot);
\
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE)));
\
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE,
\
DepthwiseConv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, \
mace::Padding::P, OC);
\
mace::Padding::P, OC); \
}
\
} \
BENCHMARK(
\
BENCHMARK( \
BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
BM_DEPTHWISE_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
#define BM_DEPTHWISE_CONV_2D(N, C, H, W, KH, KW, S, P, OC, TYPE) \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, CPU); \
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, NEON);\
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL);
BM_DEPTHWISE_CONV_2D_MACRO(N, C, H, W, KH, KW, S, P, OC, TYPE, OPENCL);
BM_DEPTHWISE_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
1
,
VALID
,
2
,
float
);
BM_DEPTHWISE_CONV_2D
(
1
,
64
,
32
,
32
,
3
,
3
,
1
,
VALID
,
2
,
float
);
...
...
mace/ops/fused_conv_2d.cc
浏览文件 @
baf2dcd1
...
@@ -6,25 +6,30 @@
...
@@ -6,25 +6,30 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"FusedConv2D"
)
void
Register_FusedConv2D
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FusedConv2D"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
FusedConv2dOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"FusedConv2D"
)
FusedConv2dOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FusedConv2D"
)
FusedConv2dOp
<
DeviceType
::
CPU
,
half
>
);
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"FusedConv2D"
)
FusedConv2dOp
<
DeviceType
::
CPU
,
half
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FusedConv2D"
)
FusedConv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"FusedConv2D"
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
FusedConv2dOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Build
(),
FusedConv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"FusedConv2D"
)
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
FusedConv2dOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/fused_conv_2d_test.cc
浏览文件 @
baf2dcd1
...
@@ -9,7 +9,7 @@ using namespace mace;
...
@@ -9,7 +9,7 @@ using namespace mace;
class
FusedConv2dOpTest
:
public
OpsTestBase
{};
class
FusedConv2dOpTest
:
public
OpsTestBase
{};
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3VALID
()
{
void
TestNHWCSimple3x3VALID
()
{
OpsTestNet
net
;
OpsTestNet
net
;
// Add input data
// Add input data
...
@@ -23,9 +23,12 @@ void TestNHWCSimple3x3VALID() {
...
@@ -23,9 +23,12 @@ void TestNHWCSimple3x3VALID() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
-
0.1
f
});
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
-
0.1
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"FilterImage"
)
...
@@ -40,7 +43,8 @@ void TestNHWCSimple3x3VALID() {
...
@@ -40,7 +43,8 @@ void TestNHWCSimple3x3VALID() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
...
@@ -61,7 +65,7 @@ void TestNHWCSimple3x3VALID() {
...
@@ -61,7 +65,7 @@ void TestNHWCSimple3x3VALID() {
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3SAME
()
{
void
TestNHWCSimple3x3SAME
()
{
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -76,9 +80,12 @@ void TestNHWCSimple3x3SAME() {
...
@@ -76,9 +80,12 @@ void TestNHWCSimple3x3SAME() {
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
-
0.1
f
});
net
.
AddInputFromArray
<
D
,
T
>
(
"Bias"
,
{
1
},
{
-
0.1
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"FilterImage"
)
.
Input
(
"FilterImage"
)
...
@@ -93,7 +100,8 @@ void TestNHWCSimple3x3SAME() {
...
@@ -93,7 +100,8 @@ void TestNHWCSimple3x3SAME() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
...
@@ -111,8 +119,7 @@ void TestNHWCSimple3x3SAME() {
...
@@ -111,8 +119,7 @@ void TestNHWCSimple3x3SAME() {
}
}
auto
expected
=
CreateTensor
<
float
>
(
auto
expected
=
CreateTensor
<
float
>
(
{
1
,
3
,
3
,
1
},
{
1
,
3
,
3
,
1
},
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
});
{
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
,
0.0
f
});
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
ExpectTensorNear
<
float
,
T
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.01
);
}
}
...
@@ -127,7 +134,7 @@ TEST_F(FusedConv2dOpTest, OPENCLSimple) {
...
@@ -127,7 +134,7 @@ TEST_F(FusedConv2dOpTest, OPENCLSimple) {
TestNHWCSimple3x3SAME
<
DeviceType
::
OPENCL
,
float
>
();
TestNHWCSimple3x3SAME
<
DeviceType
::
OPENCL
,
float
>
();
}
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
TestNHWCSimple3x3WithoutBias
()
{
void
TestNHWCSimple3x3WithoutBias
()
{
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -141,8 +148,10 @@ void TestNHWCSimple3x3WithoutBias() {
...
@@ -141,8 +148,10 @@ void TestNHWCSimple3x3WithoutBias() {
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
,
1.0
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -156,7 +165,8 @@ void TestNHWCSimple3x3WithoutBias() {
...
@@ -156,7 +165,8 @@ void TestNHWCSimple3x3WithoutBias() {
// Run
// Run
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
...
@@ -186,7 +196,7 @@ TEST_F(FusedConv2dOpTest, OPENCLWithoutBias) {
...
@@ -186,7 +196,7 @@ TEST_F(FusedConv2dOpTest, OPENCLWithoutBias) {
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
OPENCL
,
float
>
();
TestNHWCSimple3x3WithoutBias
<
DeviceType
::
OPENCL
,
float
>
();
}
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestConv1x1
()
{
void
TestConv1x1
()
{
// Construct graph
// Construct graph
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -207,9 +217,12 @@ void TestConv1x1() {
...
@@ -207,9 +217,12 @@ void TestConv1x1() {
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
2
},
{
0.1
f
,
0.2
f
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
float
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
float
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
float
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -223,7 +236,8 @@ void TestConv1x1() {
...
@@ -223,7 +236,8 @@ void TestConv1x1() {
// Run
// Run
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
...
@@ -251,15 +265,11 @@ void TestConv1x1() {
...
@@ -251,15 +265,11 @@ void TestConv1x1() {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
TEST_F
(
FusedConv2dOpTest
,
CPUConv1x1
)
{
TEST_F
(
FusedConv2dOpTest
,
CPUConv1x1
)
{
TestConv1x1
<
DeviceType
::
CPU
>
();
}
TestConv1x1
<
DeviceType
::
CPU
>
();
}
TEST_F
(
FusedConv2dOpTest
,
OPENCLConv1x1
)
{
TEST_F
(
FusedConv2dOpTest
,
OPENCLConv1x1
)
{
TestConv1x1
<
DeviceType
::
OPENCL
>
();
}
TestConv1x1
<
DeviceType
::
OPENCL
>
();
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
TestComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
static
void
TestComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
testing
::
internal
::
LogToStderr
();
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
...
@@ -298,9 +308,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
...
@@ -298,9 +308,12 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
// run on gpu
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -315,7 +328,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
...
@@ -315,7 +328,8 @@ static void TestComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device
// Run on device
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
};
};
...
@@ -331,7 +345,7 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) {
...
@@ -331,7 +345,7 @@ TEST_F(FusedConv2dOpTest, OPENCLUnalignedConvNxNS12) {
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
107
,
113
,
5
,
7
});
TestComplexConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
107
,
113
,
5
,
7
});
}
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
static
void
TestHalfComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
static
void
TestHalfComplexConvNxNS12
(
const
std
::
vector
<
index_t
>
&
shape
)
{
testing
::
internal
::
LogToStderr
();
testing
::
internal
::
LogToStderr
();
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
auto
func
=
[
&
](
int
kernel_h
,
int
kernel_w
,
int
stride_h
,
int
stride_w
,
...
@@ -357,15 +371,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
...
@@ -357,15 +371,20 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
std
::
vector
<
float
>
float_input_data
;
std
::
vector
<
float
>
float_input_data
;
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
input_channels
},
float_input_data
);
GenerateRandomRealTypeData
({
batch
,
height
,
width
,
input_channels
},
float_input_data
);
std
::
vector
<
float
>
float_filter_data
;
std
::
vector
<
float
>
float_filter_data
;
GenerateRandomRealTypeData
({
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
GenerateRandomRealTypeData
(
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
std
::
vector
<
float
>
float_bias_data
;
std
::
vector
<
float
>
float_bias_data
;
GenerateRandomRealTypeData
({
output_channels
},
float_bias_data
);
GenerateRandomRealTypeData
({
output_channels
},
float_bias_data
);
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
input_channels
},
float_input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
"Input"
,
{
batch
,
height
,
width
,
input_channels
},
float_input_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Filter"
,
{
kernel_h
,
kernel_w
,
input_channels
,
output_channels
},
float_filter_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
output_channels
},
float_bias_data
);
net
.
AddInputFromArray
<
D
,
float
>
(
"Bias"
,
{
output_channels
},
float_bias_data
);
// run on cpu
// run on cpu
...
@@ -375,9 +394,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
...
@@ -375,9 +394,12 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
// run on gpu
BufferToImage
<
D
,
half
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
half
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
half
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
half
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
half
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
half
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -392,7 +414,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
...
@@ -392,7 +414,8 @@ static void TestHalfComplexConvNxNS12(const std::vector<index_t> &shape) {
// Run on device
// Run on device
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.2
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.2
);
};
};
...
@@ -408,7 +431,7 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) {
...
@@ -408,7 +431,7 @@ TEST_F(FusedConv2dOpTest, OPENCLHalfAlignedConvNxNS12) {
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
,
32
,
64
});
TestHalfComplexConvNxNS12
<
DeviceType
::
OPENCL
>
({
32
,
32
,
32
,
64
});
}
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
TestGeneralConvNxNS12
(
const
std
::
vector
<
index_t
>
&
image_shape
,
static
void
TestGeneralConvNxNS12
(
const
std
::
vector
<
index_t
>
&
image_shape
,
const
std
::
vector
<
index_t
>
&
filter_shape
)
{
const
std
::
vector
<
index_t
>
&
filter_shape
)
{
testing
::
internal
::
LogToStderr
();
testing
::
internal
::
LogToStderr
();
...
@@ -449,9 +472,12 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
...
@@ -449,9 +472,12 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
// run on gpu
// run on gpu
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
BufferToImage
<
D
,
T
>
(
net
,
"Filter"
,
"FilterImage"
,
kernels
::
BufferType
::
FILTER
);
BufferToImage
<
D
,
T
>
(
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
OpDefBuilder
(
"FusedConv2D"
,
"FusedConv2dTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -466,7 +492,8 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
...
@@ -466,7 +492,8 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
// Run on device
// Run on device
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
};
};
...
@@ -477,13 +504,11 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
...
@@ -477,13 +504,11 @@ static void TestGeneralConvNxNS12(const std::vector<index_t> &image_shape,
}
}
TEST_F
(
FusedConv2dOpTest
,
OPENCL7X7ConvNxNS12
)
{
TEST_F
(
FusedConv2dOpTest
,
OPENCL7X7ConvNxNS12
)
{
TestGeneralConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
},
TestGeneralConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
32
,
32
},
{
7
,
7
,
3
,
64
});
{
7
,
7
,
3
,
64
});
}
}
TEST_F
(
FusedConv2dOpTest
,
OPENCL15X1ConvNxNS12
)
{
TEST_F
(
FusedConv2dOpTest
,
OPENCL15X1ConvNxNS12
)
{
TestGeneralConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
40
,
40
},
TestGeneralConvNxNS12
<
DeviceType
::
OPENCL
,
float
>
({
40
,
40
},
{
15
,
1
,
32
,
64
});
{
15
,
1
,
32
,
64
});
}
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
...
...
mace/ops/global_avg_pooling.cc
浏览文件 @
baf2dcd1
...
@@ -6,16 +6,20 @@
...
@@ -6,16 +6,20 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"GlobalAvgPooling"
)
void
Register_GlobalAvgPooling
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"GlobalAvgPooling"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
GlobalAvgPoolingOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
GlobalAvgPoolingOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"GlobalAvgPooling"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"GlobalAvgPooling"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
NEON
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
GlobalAvgPoolingOp
<
DeviceType
::
NEON
,
float
>
);
.
Build
(),
GlobalAvgPoolingOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
#endif // MACE_ENABLE_NEON
}
}
// namespace mace
}
// namespace mace
mace/ops/global_avg_pooling_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -22,7 +22,8 @@ static void GlobalAvgPooling(
...
@@ -22,7 +22,8 @@ static void GlobalAvgPooling(
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
// Warm-up
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
...
mace/ops/image_to_buffer.cc
浏览文件 @
baf2dcd1
...
@@ -6,14 +6,18 @@
...
@@ -6,14 +6,18 @@
namespace
mace
{
namespace
mace
{
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"ImageToBuffer"
)
void
Register_ImageToBuffer
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ImageToBuffer"
)
.
Build
(),
.
Device
(
DeviceType
::
OPENCL
)
ImageToBufferOp
<
DeviceType
::
OPENCL
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ImageToBufferOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"ImageToBuffer"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ImageToBuffer"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
ImageToBufferOp
<
DeviceType
::
OPENCL
,
half
>
);
.
Build
(),
ImageToBufferOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/ops_test_util.h
浏览文件 @
baf2dcd1
...
@@ -10,9 +10,9 @@
...
@@ -10,9 +10,9 @@
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "mace/core/common.h"
#include "mace/core/common.h"
#include "mace/core/net.h"
#include "mace/core/net.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/tensor.h"
#include "mace/core/tensor.h"
#include "mace/core/workspace.h"
#include "mace/core/workspace.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
#include "mace/utils/utils.h"
...
@@ -56,7 +56,8 @@ class OpDefBuilder {
...
@@ -56,7 +56,8 @@ class OpDefBuilder {
return
*
this
;
return
*
this
;
}
}
OpDefBuilder
AddIntsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
&
values
)
{
OpDefBuilder
AddIntsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
for
(
auto
value
:
values
)
{
...
@@ -65,7 +66,8 @@ class OpDefBuilder {
...
@@ -65,7 +66,8 @@ class OpDefBuilder {
return
*
this
;
return
*
this
;
}
}
OpDefBuilder
AddFloatsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
float
>
&
values
)
{
OpDefBuilder
AddFloatsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
float
>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
for
(
auto
value
:
values
)
{
...
@@ -75,7 +77,7 @@ class OpDefBuilder {
...
@@ -75,7 +77,7 @@ class OpDefBuilder {
}
}
OpDefBuilder
AddStringsArg
(
const
std
::
string
&
name
,
OpDefBuilder
AddStringsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
const
char
*>
&
values
)
{
const
std
::
vector
<
const
char
*>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
for
(
auto
value
:
values
)
{
...
@@ -94,7 +96,7 @@ class OpDefBuilder {
...
@@ -94,7 +96,7 @@ class OpDefBuilder {
class
OpsTestNet
{
class
OpsTestNet
{
public:
public:
OpsTestNet
()
{}
OpsTestNet
()
:
op_registry_
(
new
OperatorRegistry
())
{};
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
AddInputFromArray
(
const
std
::
string
&
name
,
void
AddInputFromArray
(
const
std
::
string
&
name
,
...
@@ -135,10 +137,11 @@ class OpsTestNet {
...
@@ -135,10 +137,11 @@ class OpsTestNet {
std
::
mt19937
gen
(
rd
());
std
::
mt19937
gen
(
rd
());
std
::
normal_distribution
<
float
>
nd
(
0
,
1
);
std
::
normal_distribution
<
float
>
nd
(
0
,
1
);
if
(
DataTypeToEnum
<
T
>::
value
==
DT_HALF
)
{
if
(
DataTypeToEnum
<
T
>::
value
==
DT_HALF
)
{
std
::
generate
(
input_data
,
input_data
+
input
->
size
(),
std
::
generate
(
[
&
gen
,
&
nd
,
positive
]
{
input_data
,
input_data
+
input
->
size
(),
[
&
gen
,
&
nd
,
positive
]
{
return
half_float
::
half_cast
<
half
>
(
positive
?
std
::
abs
(
nd
(
gen
))
:
nd
(
gen
));
return
half_float
::
half_cast
<
half
>
(
positive
?
std
::
abs
(
nd
(
gen
))
});
:
nd
(
gen
));
});
}
else
{
}
else
{
std
::
generate
(
input_data
,
input_data
+
input
->
size
(),
std
::
generate
(
input_data
,
input_data
+
input
->
size
(),
[
&
gen
,
&
nd
,
positive
]
{
[
&
gen
,
&
nd
,
positive
]
{
...
@@ -160,7 +163,7 @@ class OpsTestNet {
...
@@ -160,7 +163,7 @@ class OpsTestNet {
for
(
auto
&
op_def_
:
op_defs_
)
{
for
(
auto
&
op_def_
:
op_defs_
)
{
net_def
.
add_op
()
->
CopyFrom
(
op_def_
);
net_def
.
add_op
()
->
CopyFrom
(
op_def_
);
}
}
net_
=
CreateNet
(
net_def
,
&
ws_
,
device
);
net_
=
CreateNet
(
op_registry_
,
net_def
,
&
ws_
,
device
);
device_
=
device
;
device_
=
device
;
return
net_
->
Run
();
return
net_
->
Run
();
}
}
...
@@ -182,6 +185,7 @@ class OpsTestNet {
...
@@ -182,6 +185,7 @@ class OpsTestNet {
}
}
public:
public:
std
::
shared_ptr
<
OperatorRegistry
>
op_registry_
;
Workspace
ws_
;
Workspace
ws_
;
std
::
vector
<
OperatorDef
>
op_defs_
;
std
::
vector
<
OperatorDef
>
op_defs_
;
std
::
unique_ptr
<
NetBase
>
net_
;
std
::
unique_ptr
<
NetBase
>
net_
;
...
@@ -211,7 +215,8 @@ void GenerateRandomRealTypeData(const std::vector<index_t> &shape,
...
@@ -211,7 +215,8 @@ void GenerateRandomRealTypeData(const std::vector<index_t> &shape,
res
.
resize
(
size
);
res
.
resize
(
size
);
if
(
DataTypeToEnum
<
T
>::
value
==
DT_HALF
)
{
if
(
DataTypeToEnum
<
T
>::
value
==
DT_HALF
)
{
std
::
generate
(
res
.
begin
(),
res
.
end
(),
[
&
gen
,
&
nd
]
{
return
half_float
::
half_cast
<
half
>
(
nd
(
gen
));
});
std
::
generate
(
res
.
begin
(),
res
.
end
(),
[
&
gen
,
&
nd
]
{
return
half_float
::
half_cast
<
half
>
(
nd
(
gen
));
});
}
else
{
}
else
{
std
::
generate
(
res
.
begin
(),
res
.
end
(),
[
&
gen
,
&
nd
]
{
return
nd
(
gen
);
});
std
::
generate
(
res
.
begin
(),
res
.
end
(),
[
&
gen
,
&
nd
]
{
return
nd
(
gen
);
});
}
}
...
@@ -236,7 +241,8 @@ void GenerateRandomIntTypeData(const std::vector<index_t> &shape,
...
@@ -236,7 +241,8 @@ void GenerateRandomIntTypeData(const std::vector<index_t> &shape,
template
<
typename
T
>
template
<
typename
T
>
unique_ptr
<
Tensor
>
CreateTensor
(
const
std
::
vector
<
index_t
>
&
shape
,
unique_ptr
<
Tensor
>
CreateTensor
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
const
std
::
vector
<
T
>
&
data
)
{
unique_ptr
<
Tensor
>
res
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
CPU
),
DataTypeToEnum
<
T
>::
v
()));
unique_ptr
<
Tensor
>
res
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
CPU
),
DataTypeToEnum
<
T
>::
v
()));
res
->
Resize
(
shape
);
res
->
Resize
(
shape
);
T
*
input_data
=
res
->
mutable_data
<
T
>
();
T
*
input_data
=
res
->
mutable_data
<
T
>
();
memcpy
(
input_data
,
data
.
data
(),
data
.
size
()
*
sizeof
(
T
));
memcpy
(
input_data
,
data
.
data
(),
data
.
size
()
*
sizeof
(
T
));
...
@@ -268,9 +274,9 @@ inline std::string ShapeToString(const Tensor &x) {
...
@@ -268,9 +274,9 @@ inline std::string ShapeToString(const Tensor &x) {
template
<
typename
T
>
template
<
typename
T
>
struct
is_floating_point_type
{
struct
is_floating_point_type
{
static
const
bool
value
=
static
const
bool
value
=
std
::
is_same
<
T
,
float
>::
value
||
std
::
is_same
<
T
,
float
>::
value
||
std
::
is_same
<
T
,
double
>::
value
std
::
is_same
<
T
,
double
>::
value
||
||
std
::
is_same
<
T
,
half
>::
value
;
std
::
is_same
<
T
,
half
>::
value
;
};
};
template
<
typename
T
>
template
<
typename
T
>
...
@@ -293,7 +299,9 @@ inline void AssertSameDims(const Tensor &x, const Tensor &y) {
...
@@ -293,7 +299,9 @@ inline void AssertSameDims(const Tensor &x, const Tensor &y) {
<<
"y.shape [ "
<<
ShapeToString
(
y
)
<<
"]"
;
<<
"y.shape [ "
<<
ShapeToString
(
y
)
<<
"]"
;
}
}
template
<
typename
EXP_TYPE
,
typename
RES_TYPE
,
bool
is_fp
=
is_floating_point_type
<
EXP_TYPE
>
::
value
>
template
<
typename
EXP_TYPE
,
typename
RES_TYPE
,
bool
is_fp
=
is_floating_point_type
<
EXP_TYPE
>
::
value
>
struct
Expector
;
struct
Expector
;
// Partial specialization for float and double.
// Partial specialization for float and double.
...
@@ -343,7 +351,6 @@ struct Expector<EXP_TYPE, RES_TYPE, true> {
...
@@ -343,7 +351,6 @@ struct Expector<EXP_TYPE, RES_TYPE, true> {
}
}
}
}
}
}
};
};
template
<
typename
T
>
template
<
typename
T
>
...
@@ -355,8 +362,8 @@ void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) {
...
@@ -355,8 +362,8 @@ void ExpectTensorNear(const Tensor &x, const Tensor &y, const double abs_err) {
template
<
typename
EXP_TYPE
,
typename
RES_TYPE
>
template
<
typename
EXP_TYPE
,
typename
RES_TYPE
>
void
ExpectTensorNear
(
const
Tensor
&
x
,
const
Tensor
&
y
,
const
double
abs_err
)
{
void
ExpectTensorNear
(
const
Tensor
&
x
,
const
Tensor
&
y
,
const
double
abs_err
)
{
static_assert
(
is_floating_point_type
<
EXP_TYPE
>::
value
static_assert
(
is_floating_point_type
<
EXP_TYPE
>::
value
&&
&&
is_floating_point_type
<
RES_TYPE
>::
value
,
is_floating_point_type
<
RES_TYPE
>::
value
,
"T is not a floating point type"
);
"T is not a floating point type"
);
Expector
<
EXP_TYPE
,
RES_TYPE
>::
Near
(
x
,
y
,
abs_err
);
Expector
<
EXP_TYPE
,
RES_TYPE
>::
Near
(
x
,
y
,
abs_err
);
}
}
...
...
mace/ops/pooling.cc
浏览文件 @
baf2dcd1
...
@@ -6,29 +6,36 @@
...
@@ -6,29 +6,36 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Pooling"
)
void
Register_Pooling
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Pooling"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
PoolingOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Pooling"
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
PoolingOp
<
DeviceType
::
CPU
,
float
>
);
.
Build
(),
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Pooling"
)
PoolingOp
<
DeviceType
::
CPU
,
half
>
);
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
CPU
,
half
>
);
#if MACE_ENABLE_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"Pooling"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Pooling"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
NEON
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
PoolingOp
<
DeviceType
::
NEON
,
float
>
);
.
Build
(),
PoolingOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Pooling"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Pooling"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
PoolingOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Build
(),
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Pooling"
)
PoolingOp
<
DeviceType
::
OPENCL
,
float
>
);
.
TypeConstraint
<
half
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Pooling"
)
.
Build
(),
.
Device
(
DeviceType
::
OPENCL
)
PoolingOp
<
DeviceType
::
OPENCL
,
half
>
);
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
PoolingOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/pooling_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -35,7 +35,8 @@ static void Pooling(int iters,
...
@@ -35,7 +35,8 @@ static void Pooling(int iters,
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
// Warm-up
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
...
mace/ops/pooling_test.cc
浏览文件 @
baf2dcd1
...
@@ -29,7 +29,7 @@ TEST_F(PoolingOpTest, MAX_VALID) {
...
@@ -29,7 +29,7 @@ TEST_F(PoolingOpTest, MAX_VALID) {
// Add input data
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
4
,
4
,
2
},
"Input"
,
{
1
,
4
,
4
,
2
},
{
0
,
16
,
1
,
17
,
2
,
18
,
3
,
19
,
4
,
20
,
5
,
21
,
6
,
22
,
7
,
23
,
{
0
,
16
,
1
,
17
,
2
,
18
,
3
,
19
,
4
,
20
,
5
,
21
,
6
,
22
,
7
,
23
,
8
,
24
,
9
,
25
,
10
,
26
,
11
,
27
,
12
,
28
,
13
,
29
,
14
,
30
,
15
,
31
});
8
,
24
,
9
,
25
,
10
,
26
,
11
,
27
,
12
,
28
,
13
,
29
,
14
,
30
,
15
,
31
});
// Run
// Run
...
@@ -42,7 +42,6 @@ TEST_F(PoolingOpTest, MAX_VALID) {
...
@@ -42,7 +42,6 @@ TEST_F(PoolingOpTest, MAX_VALID) {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
TEST_F
(
PoolingOpTest
,
MAX_SAME
)
{
TEST_F
(
PoolingOpTest
,
MAX_SAME
)
{
// Construct graph
// Construct graph
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -122,7 +121,7 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
...
@@ -122,7 +121,7 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
static
void
SimpleMaxPooling3S2
()
{
static
void
SimpleMaxPooling3S2
()
{
// Construct graph
// Construct graph
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -130,11 +129,12 @@ static void SimpleMaxPooling3S2() {
...
@@ -130,11 +129,12 @@ static void SimpleMaxPooling3S2() {
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
3
,
9
,
1
},
"Input"
,
{
1
,
3
,
9
,
1
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
});
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
Output
(
"OutputImage"
)
...
@@ -145,7 +145,8 @@ static void SimpleMaxPooling3S2() {
...
@@ -145,7 +145,8 @@ static void SimpleMaxPooling3S2() {
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
// Run
// Run
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
...
@@ -166,15 +167,13 @@ static void SimpleMaxPooling3S2() {
...
@@ -166,15 +167,13 @@ static void SimpleMaxPooling3S2() {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
TEST_F
(
PoolingOpTest
,
CPUSimpleMaxPooling3S2
)
{
TEST_F
(
PoolingOpTest
,
CPUSimpleMaxPooling3S2
)
{
SimpleMaxPooling3S2
<
CPU
>
();
}
SimpleMaxPooling3S2
<
CPU
>
();
}
TEST_F
(
PoolingOpTest
,
OPENCLSimpleMaxPooling3S2
)
{
TEST_F
(
PoolingOpTest
,
OPENCLSimpleMaxPooling3S2
)
{
SimpleMaxPooling3S2
<
OPENCL
>
();
SimpleMaxPooling3S2
<
OPENCL
>
();
}
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
MaxPooling3S2
(
const
std
::
vector
<
index_t
>
&
input_shape
,
static
void
MaxPooling3S2
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
int
>
strides
,
const
std
::
vector
<
int
>
strides
,
Padding
padding
)
{
Padding
padding
)
{
...
@@ -211,13 +210,14 @@ static void MaxPooling3S2(const std::vector<index_t> &input_shape,
...
@@ -211,13 +210,14 @@ static void MaxPooling3S2(const std::vector<index_t> &input_shape,
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
T
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
ExpectTensorNear
<
T
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.001
);
}
}
// TODO(chenghui) : there is a bug.
// TODO(chenghui) : there is a bug.
//TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) {
//
TEST_F(PoolingOpTest, NEONAlignedMaxPooling3S2) {
// AlignedMaxPooling3S2<NEON>(Padding::VALID);
// AlignedMaxPooling3S2<NEON>(Padding::VALID);
// AlignedMaxPooling3S2<NEON>(Padding::SAME);
// AlignedMaxPooling3S2<NEON>(Padding::SAME);
//}
//}
...
@@ -259,7 +259,7 @@ TEST_F(PoolingOpTest, AVG_VALID) {
...
@@ -259,7 +259,7 @@ TEST_F(PoolingOpTest, AVG_VALID) {
// Add input data
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
4
,
4
,
2
},
"Input"
,
{
1
,
4
,
4
,
2
},
{
0
,
16
,
1
,
17
,
2
,
18
,
3
,
19
,
4
,
20
,
5
,
21
,
6
,
22
,
7
,
23
,
{
0
,
16
,
1
,
17
,
2
,
18
,
3
,
19
,
4
,
20
,
5
,
21
,
6
,
22
,
7
,
23
,
8
,
24
,
9
,
25
,
10
,
26
,
11
,
27
,
12
,
28
,
13
,
29
,
14
,
30
,
15
,
31
});
8
,
24
,
9
,
25
,
10
,
26
,
11
,
27
,
12
,
28
,
13
,
29
,
14
,
30
,
15
,
31
});
// Run
// Run
...
@@ -272,7 +272,7 @@ TEST_F(PoolingOpTest, AVG_VALID) {
...
@@ -272,7 +272,7 @@ TEST_F(PoolingOpTest, AVG_VALID) {
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
static
void
SimpleAvgPoolingTest
()
{
static
void
SimpleAvgPoolingTest
()
{
// Construct graph
// Construct graph
OpsTestNet
net
;
OpsTestNet
net
;
...
@@ -282,7 +282,8 @@ static void SimpleAvgPoolingTest() {
...
@@ -282,7 +282,8 @@ static void SimpleAvgPoolingTest() {
"Input"
,
{
1
,
2
,
8
,
1
},
"Input"
,
{
1
,
2
,
8
,
1
},
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
});
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
});
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
Output
(
"OutputImage"
)
...
@@ -294,7 +295,8 @@ static void SimpleAvgPoolingTest() {
...
@@ -294,7 +295,8 @@ static void SimpleAvgPoolingTest() {
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Run
// Run
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
// Check
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
4
,
1
},
{
4.5
,
6.5
,
8.5
,
10.5
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
4
,
1
},
{
4.5
,
6.5
,
8.5
,
10.5
});
...
@@ -306,11 +308,11 @@ TEST_F(PoolingOpTest, OPENCLSimpleAvgPooling) {
...
@@ -306,11 +308,11 @@ TEST_F(PoolingOpTest, OPENCLSimpleAvgPooling) {
SimpleAvgPoolingTest
<
OPENCL
>
();
SimpleAvgPoolingTest
<
OPENCL
>
();
}
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
static
void
AvgPoolingTest
(
const
std
::
vector
<
index_t
>
&
shape
,
static
void
AvgPoolingTest
(
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
int
>
&
kernels
,
const
std
::
vector
<
int
>
&
kernels
,
const
std
::
vector
<
int
>
&
strides
,
const
std
::
vector
<
int
>
&
strides
,
Padding
padding
)
{
Padding
padding
)
{
// Construct graph
// Construct graph
OpsTestNet
net
;
OpsTestNet
net
;
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
...
@@ -343,38 +345,49 @@ static void AvgPoolingTest(const std::vector<index_t> &shape,
...
@@ -343,38 +345,49 @@ static void AvgPoolingTest(const std::vector<index_t> &shape,
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
T
>
(
net
,
"OutputImage"
,
"OPENCLOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ExpectTensorNear
<
float
,
T
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.01
);
ExpectTensorNear
<
float
,
T
>
(
expected
,
*
net
.
GetOutput
(
"OPENCLOutput"
),
0.01
);
}
}
TEST_F
(
PoolingOpTest
,
OPENCLAlignedAvgPooling
)
{
TEST_F
(
PoolingOpTest
,
OPENCLAlignedAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
SAME
);
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
SAME
);
}
}
TEST_F
(
PoolingOpTest
,
OPENCLHalfAlignedAvgPooling
)
{
TEST_F
(
PoolingOpTest
,
OPENCLHalfAlignedAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
SAME
);
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
15
,
15
,
128
},
{
4
,
4
},
{
4
,
4
},
Padding
::
SAME
);
}
}
TEST_F
(
PoolingOpTest
,
OPENCLAlignedLargeKernelAvgPooling
)
{
TEST_F
(
PoolingOpTest
,
OPENCLAlignedLargeKernelAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
SAME
);
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
SAME
);
}
}
TEST_F
(
PoolingOpTest
,
OPENCLHalfAlignedLargeKernelAvgPooling
)
{
TEST_F
(
PoolingOpTest
,
OPENCLHalfAlignedLargeKernelAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
SAME
);
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
half
>
({
3
,
64
,
64
,
128
},
{
16
,
16
},
{
16
,
16
},
Padding
::
SAME
);
}
}
TEST_F
(
PoolingOpTest
,
OPENCLUnAlignedAvgPooling
)
{
TEST_F
(
PoolingOpTest
,
OPENCLUnAlignedAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
2
,
2
},
{
2
,
2
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
2
,
2
},
{
2
,
2
},
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
2
,
2
},
{
2
,
2
},
Padding
::
SAME
);
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
2
,
2
},
{
2
,
2
},
Padding
::
SAME
);
}
}
TEST_F
(
PoolingOpTest
,
OPENCLUnAlignedLargeKernelAvgPooling
)
{
TEST_F
(
PoolingOpTest
,
OPENCLUnAlignedLargeKernelAvgPooling
)
{
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
8
,
8
},
{
8
,
8
},
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
8
,
8
},
{
8
,
8
},
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
8
,
8
},
{
8
,
8
},
Padding
::
SAME
);
Padding
::
VALID
);
AvgPoolingTest
<
OPENCL
,
float
>
({
3
,
31
,
37
,
128
},
{
8
,
8
},
{
8
,
8
},
Padding
::
SAME
);
}
}
mace/ops/relu.cc
浏览文件 @
baf2dcd1
...
@@ -6,26 +6,32 @@
...
@@ -6,26 +6,32 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"Relu"
)
void
Register_Relu
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Relu"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
ReluOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ReluOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"Relu"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Relu"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
NEON
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
ReluOp
<
DeviceType
::
NEON
,
float
>
);
.
Build
(),
ReluOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Relu"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Relu"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
ReluOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Build
(),
ReluOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"Relu"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Relu"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
ReluOp
<
DeviceType
::
OPENCL
,
half
>
);
.
Build
(),
ReluOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/relu_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -19,7 +19,8 @@ static void ReluBenchmark(
...
@@ -19,7 +19,8 @@ static void ReluBenchmark(
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluBM"
)
OpDefBuilder
(
"Relu"
,
"ReluBM"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -54,9 +55,9 @@ static void ReluBenchmark(
...
@@ -54,9 +55,9 @@ static void ReluBenchmark(
} \
} \
BENCHMARK(BM_RELU_##N##C##H##W##_##TYPE##_##DEVICE)
BENCHMARK(BM_RELU_##N##C##H##W##_##TYPE##_##DEVICE)
#define BM_RELU(N, C, H, W, TYPE) \
#define BM_RELU(N, C, H, W, TYPE)
\
BM_RELU_MACRO(N, C, H, W, TYPE, CPU); \
BM_RELU_MACRO(N, C, H, W, TYPE, CPU);
\
BM_RELU_MACRO(N, C, H, W, TYPE, NEON);\
BM_RELU_MACRO(N, C, H, W, TYPE, NEON);
\
BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL);
BM_RELU_MACRO(N, C, H, W, TYPE, OPENCL);
BM_RELU
(
1
,
1
,
512
,
512
,
float
);
BM_RELU
(
1
,
1
,
512
,
512
,
float
);
...
...
mace/ops/relu_test.cc
浏览文件 @
baf2dcd1
...
@@ -14,13 +14,13 @@ void TestSimple() {
...
@@ -14,13 +14,13 @@ void TestSimple() {
OpsTestNet
net
;
OpsTestNet
net
;
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
net
.
AddInputFromArray
<
D
,
float
>
(
{
2
,
2
,
2
,
2
},
"Input"
,
{
2
,
2
,
2
,
2
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -31,7 +31,8 @@ void TestSimple() {
...
@@ -31,7 +31,8 @@ void TestSimple() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
...
@@ -42,38 +43,30 @@ void TestSimple() {
...
@@ -42,38 +43,30 @@ void TestSimple() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
}
}
auto
expected
=
CreateTensor
<
float
>
({
2
,
2
,
2
,
2
},
auto
expected
=
CreateTensor
<
float
>
(
{
0
,
7
,
0
,
6
,
0
,
5
,
0
,
4
,
{
2
,
2
,
2
,
2
},
{
0
,
7
,
0
,
6
,
0
,
5
,
0
,
4
,
0
,
3
,
0
,
2
,
0
,
1
,
0
,
0
});
0
,
3
,
0
,
2
,
0
,
1
,
0
,
0
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
}
TEST_F
(
ReluOpTest
,
CPUSimple
)
{
TEST_F
(
ReluOpTest
,
CPUSimple
)
{
TestSimple
<
DeviceType
::
CPU
>
();
}
TestSimple
<
DeviceType
::
CPU
>
();
}
#if __ARM_NEON
#if __ARM_NEON
TEST_F
(
ReluOpTest
,
NEONSimple
)
{
TEST_F
(
ReluOpTest
,
NEONSimple
)
{
TestSimple
<
DeviceType
::
NEON
>
();
}
TestSimple
<
DeviceType
::
NEON
>
();
}
#endif
#endif
TEST_F
(
ReluOpTest
,
OPENCLSimple
)
{
TEST_F
(
ReluOpTest
,
OPENCLSimple
)
{
TestSimple
<
DeviceType
::
OPENCL
>
();
}
TestSimple
<
DeviceType
::
OPENCL
>
();
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestUnalignedSimple
()
{
void
TestUnalignedSimple
()
{
OpsTestNet
net
;
OpsTestNet
net
;
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
3
,
2
,
1
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
});
{
1
,
3
,
2
,
1
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -84,7 +77,8 @@ void TestUnalignedSimple() {
...
@@ -84,7 +77,8 @@ void TestUnalignedSimple() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
...
@@ -95,8 +89,7 @@ void TestUnalignedSimple() {
...
@@ -95,8 +89,7 @@ void TestUnalignedSimple() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
}
}
auto
expected
=
CreateTensor
<
float
>
({
1
,
3
,
2
,
1
},
auto
expected
=
CreateTensor
<
float
>
({
1
,
3
,
2
,
1
},
{
0
,
7
,
0
,
6
,
0
,
5
});
{
0
,
7
,
0
,
6
,
0
,
5
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
}
...
@@ -120,13 +113,13 @@ void TestSimpleReluX() {
...
@@ -120,13 +113,13 @@ void TestSimpleReluX() {
OpsTestNet
net
;
OpsTestNet
net
;
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
net
.
AddInputFromArray
<
D
,
float
>
(
{
2
,
2
,
2
,
2
},
"Input"
,
{
2
,
2
,
2
,
2
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
,
4
,
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
-
3
,
3
,
-
2
,
2
,
-
1
,
1
,
0
,
0
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -138,7 +131,8 @@ void TestSimpleReluX() {
...
@@ -138,7 +131,8 @@ void TestSimpleReluX() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
...
@@ -150,38 +144,31 @@ void TestSimpleReluX() {
...
@@ -150,38 +144,31 @@ void TestSimpleReluX() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
}
}
auto
expected
=
CreateTensor
<
float
>
({
2
,
2
,
2
,
2
},
auto
expected
=
CreateTensor
<
float
>
(
{
0
,
6
,
0
,
6
,
0
,
5
,
0
,
4
,
{
2
,
2
,
2
,
2
},
{
0
,
6
,
0
,
6
,
0
,
5
,
0
,
4
,
0
,
3
,
0
,
2
,
0
,
1
,
0
,
0
});
0
,
3
,
0
,
2
,
0
,
1
,
0
,
0
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
}
TEST_F
(
ReluOpTest
,
CPUSimpleReluX
)
{
TEST_F
(
ReluOpTest
,
CPUSimpleReluX
)
{
TestSimpleReluX
<
DeviceType
::
CPU
>
();
}
TestSimpleReluX
<
DeviceType
::
CPU
>
();
}
#if __ARM_NEON
#if __ARM_NEON
TEST_F
(
ReluOpTest
,
NEONSimpleReluX
)
{
TEST_F
(
ReluOpTest
,
NEONSimpleReluX
)
{
TestSimpleReluX
<
DeviceType
::
NEON
>
();
}
TestSimpleReluX
<
DeviceType
::
NEON
>
();
}
#endif
#endif
TEST_F
(
ReluOpTest
,
OPENCLSimpleReluX
)
{
TEST_F
(
ReluOpTest
,
OPENCLSimpleReluX
)
{
TestSimpleReluX
<
DeviceType
::
OPENCL
>
();
}
TestSimpleReluX
<
DeviceType
::
OPENCL
>
();
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
TestUnalignedSimpleReluX
()
{
void
TestUnalignedSimpleReluX
()
{
OpsTestNet
net
;
OpsTestNet
net
;
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
7
,
1
},
{
1
,
1
,
7
,
1
},
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
});
{
-
7
,
7
,
-
6
,
6
,
-
5
,
5
,
-
4
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
...
@@ -193,7 +180,8 @@ void TestUnalignedSimpleReluX() {
...
@@ -193,7 +180,8 @@ void TestUnalignedSimpleReluX() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
// Transfer output
// Transfer output
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
...
@@ -205,8 +193,7 @@ void TestUnalignedSimpleReluX() {
...
@@ -205,8 +193,7 @@ void TestUnalignedSimpleReluX() {
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
}
}
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
7
,
1
},
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
7
,
1
},
{
0
,
6
,
0
,
6
,
0
,
5
,
0
});
{
0
,
6
,
0
,
6
,
0
,
5
,
0
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-5
);
}
}
...
...
mace/ops/resize_bilinear.cc
浏览文件 @
baf2dcd1
...
@@ -6,26 +6,32 @@
...
@@ -6,26 +6,32 @@
namespace
mace
{
namespace
mace
{
REGISTER_CPU_OPERATOR
(
OpKeyBuilder
(
"ResizeBilinear"
)
void
Register_ResizeBilinear
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
.
Build
(),
.
Device
(
DeviceType
::
CPU
)
ResizeBilinearOp
<
DeviceType
::
CPU
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
CPU
,
float
>
);
#if MACE_ENABLE_NEON
#if MACE_ENABLE_NEON
REGISTER_NEON_OPERATOR
(
OpKeyBuilder
(
"ResizeBilinear"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
NEON
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
ResizeBilinearOp
<
DeviceType
::
NEON
,
float
>
);
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
NEON
,
float
>
);
#endif // MACE_ENABLE_NEON
#endif // MACE_ENABLE_NEON
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"ResizeBilinear"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
float
>
(
"T"
)
ResizeBilinearOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"ResizeBilinear"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Device
(
DeviceType
::
OPENCL
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
ResizeBilinearOp
<
DeviceType
::
OPENCL
,
half
>
);
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/resize_bilinear_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -26,22 +26,23 @@ static void ResizeBilinearBenchmark(int iters,
...
@@ -26,22 +26,23 @@ static void ResizeBilinearBenchmark(int iters,
net
.
AddInputFromArray
<
D
,
index_t
>
(
"OutSize"
,
{
2
},
net
.
AddInputFromArray
<
D
,
index_t
>
(
"OutSize"
,
{
2
},
{
output_height
,
output_width
});
{
output_height
,
output_width
});
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearBenchmark"
)
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearBenchmark"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"OutSize"
)
.
Input
(
"OutSize"
)
.
Output
(
"OutputImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"size"
,
{
output_height
,
output_width
})
.
AddIntsArg
(
"size"
,
{
output_height
,
output_width
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
}
else
{
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearBenchmark"
)
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearBenchmark"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
.
Input
(
"OutSize"
)
.
Input
(
"OutSize"
)
.
Output
(
"Output"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"size"
,
{
output_height
,
output_width
})
.
AddIntsArg
(
"size"
,
{
output_height
,
output_width
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
}
}
// Warm-up
// Warm-up
...
@@ -68,8 +69,8 @@ static void ResizeBilinearBenchmark(int iters,
...
@@ -68,8 +69,8 @@ static void ResizeBilinearBenchmark(int iters,
BENCHMARK( \
BENCHMARK( \
BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE)
BM_RESIZE_BILINEAR_##N##_##C##_##H0##_##W0##_##H1##_##W1##_##TYPE##_##DEVICE)
#define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1, TYPE)
\
#define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1, TYPE) \
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, CPU);
\
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, CPU); \
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, OPENCL);
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, OPENCL);
// SNPE 835 GPU: 6870us
// SNPE 835 GPU: 6870us
...
...
mace/ops/resize_bilinear_test.cc
浏览文件 @
baf2dcd1
...
@@ -80,29 +80,31 @@ void TestRandomResizeBilinear() {
...
@@ -80,29 +80,31 @@ void TestRandomResizeBilinear() {
{
batch
,
in_height
,
in_width
,
channels
});
{
batch
,
in_height
,
in_width
,
channels
});
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearTest"
)
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Output
(
"Output"
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Run on CPU
// Run on CPU
net
.
RunOp
(
DeviceType
::
CPU
);
net
.
RunOp
(
DeviceType
::
CPU
);
Tensor
expected
;
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
if
(
D
==
DeviceType
::
OPENCL
)
{
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearTest"
)
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
// Run
// Run
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"DeviceOutput"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"DeviceOutput"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
}
else
{
// TODO support NEON
// TODO support NEON
}
}
...
...
mace/ops/space_to_batch.cc
浏览文件 @
baf2dcd1
...
@@ -6,13 +6,17 @@
...
@@ -6,13 +6,17 @@
namespace
mace
{
namespace
mace
{
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"SpaceToBatchND"
)
void
Register_SpaceToBatchND
(
OperatorRegistry
*
op_registry
)
{
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"SpaceToBatchND"
)
.
Build
(),
.
Device
(
DeviceType
::
OPENCL
)
SpaceToBatchNDOp
<
DeviceType
::
OPENCL
,
float
>
);
.
TypeConstraint
<
float
>
(
"T"
)
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"SpaceToBatchND"
)
.
Build
(),
.
TypeConstraint
<
half
>
(
"T"
)
SpaceToBatchNDOp
<
DeviceType
::
OPENCL
,
float
>
);
.
Build
(),
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"SpaceToBatchND"
)
SpaceToBatchNDOp
<
DeviceType
::
OPENCL
,
half
>
);
.
Device
(
DeviceType
::
OPENCL
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
SpaceToBatchNDOp
<
DeviceType
::
OPENCL
,
half
>
);
}
}
// namespace mace
}
// namespace mace
mace/ops/space_to_batch_benchmark.cc
浏览文件 @
baf2dcd1
...
@@ -15,7 +15,8 @@ static void BMSpaceToBatch(
...
@@ -15,7 +15,8 @@ static void BMSpaceToBatch(
OpsTestNet
net
;
OpsTestNet
net
;
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"SpaceToBatchND"
,
"SpaceToBatchNDTest"
)
OpDefBuilder
(
"SpaceToBatchND"
,
"SpaceToBatchNDTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
Output
(
"OutputImage"
)
...
@@ -36,17 +37,19 @@ static void BMSpaceToBatch(
...
@@ -36,17 +37,19 @@ static void BMSpaceToBatch(
net
.
Sync
();
net
.
Sync
();
}
}
#define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \
#define BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, DEVICE) \
static void BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
static void \
int iters) { \
BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE( \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
int iters) { \
mace::testing::ItemsProcessed(tot); \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
mace::testing::ItemsProcessed(tot); \
BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
} \
BMSpaceToBatch<DEVICE, TYPE>(iters, N, H, W, C, SHAPE); \
BENCHMARK(BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE)
} \
BENCHMARK( \
#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \
BM_SPACE_TO_BATCH_##N##_##H##_##W##_##C##_##SHAPE##_##TYPE##_##DEVICE)
#define BM_SPACE_TO_BATCH(N, H, W, C, SHAPE, TYPE) \
BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, OPENCL);
BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, TYPE, OPENCL);
BM_SPACE_TO_BATCH
(
128
,
16
,
16
,
128
,
2
,
float
);
BM_SPACE_TO_BATCH
(
128
,
16
,
16
,
128
,
2
,
float
);
...
...
mace/ops/space_to_batch_test.cc
浏览文件 @
baf2dcd1
...
@@ -2,23 +2,23 @@
...
@@ -2,23 +2,23 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
// Copyright (c) 2017 XiaoMi All rights reserved.
//
//
#include <fstream>
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "mace/ops/ops_test_util.h"
#include "mace/ops/ops_test_util.h"
#include <fstream>
using
namespace
mace
;
using
namespace
mace
;
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
RunSpaceToBatch
(
const
std
::
vector
<
index_t
>
&
input_shape
,
void
RunSpaceToBatch
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
float
>
&
input_data
,
const
std
::
vector
<
float
>
&
input_data
,
const
std
::
vector
<
int
>
&
block_shape_data
,
const
std
::
vector
<
int
>
&
block_shape_data
,
const
std
::
vector
<
int
>
&
padding_data
,
const
std
::
vector
<
int
>
&
padding_data
,
const
Tensor
*
expected
)
{
const
Tensor
*
expected
)
{
OpsTestNet
net
;
OpsTestNet
net
;
net
.
AddInputFromArray
<
D
,
float
>
(
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
input_shape
,
input_data
);
"Input"
,
input_shape
,
input_data
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"SpaceToBatchND"
,
"SpaceToBatchNDTest"
)
OpDefBuilder
(
"SpaceToBatchND"
,
"SpaceToBatchNDTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
Output
(
"OutputImage"
)
...
@@ -29,12 +29,13 @@ void RunSpaceToBatch(const std::vector<index_t> &input_shape,
...
@@ -29,12 +29,13 @@ void RunSpaceToBatch(const std::vector<index_t> &input_shape,
// Run
// Run
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
// Check
// Check
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-8
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-8
);
}
}
template
<
DeviceType
D
>
template
<
DeviceType
D
>
void
RunBatchToSpace
(
const
std
::
vector
<
index_t
>
&
input_shape
,
void
RunBatchToSpace
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
float
>
&
input_data
,
const
std
::
vector
<
float
>
&
input_data
,
const
std
::
vector
<
int
>
&
block_shape_data
,
const
std
::
vector
<
int
>
&
block_shape_data
,
...
@@ -42,10 +43,10 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
...
@@ -42,10 +43,10 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
const
Tensor
*
expected
)
{
const
Tensor
*
expected
)
{
OpsTestNet
net
;
OpsTestNet
net
;
// Add input data
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
input_shape
,
input_data
);
"Input"
,
input_shape
,
input_data
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"BatchToSpaceND"
,
"BatchToSpaceNDTest"
)
OpDefBuilder
(
"BatchToSpaceND"
,
"BatchToSpaceNDTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
Output
(
"OutputImage"
)
...
@@ -56,33 +57,33 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
...
@@ -56,33 +57,33 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
// Run
// Run
net
.
RunOp
(
D
);
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT
);
// Check
// Check
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-8
);
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
1e-8
);
}
}
template
<
typename
T
>
template
<
typename
T
>
void
TestBidirectionalTransform
(
const
std
::
vector
<
index_t
>
&
space_shape
,
void
TestBidirectionalTransform
(
const
std
::
vector
<
index_t
>
&
space_shape
,
const
std
::
vector
<
float
>
&
space_data
,
const
std
::
vector
<
float
>
&
space_data
,
const
std
::
vector
<
int
>
&
block_data
,
const
std
::
vector
<
int
>
&
block_data
,
const
std
::
vector
<
int
>
&
padding_data
,
const
std
::
vector
<
int
>
&
padding_data
,
const
std
::
vector
<
index_t
>
&
batch_shape
,
const
std
::
vector
<
index_t
>
&
batch_shape
,
const
std
::
vector
<
float
>
&
batch_data
)
{
const
std
::
vector
<
float
>
&
batch_data
)
{
auto
space_tensor
=
unique_ptr
<
Tensor
>
(
new
Tensor
(
auto
space_tensor
=
unique_ptr
<
Tensor
>
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataTypeToEnum
<
T
>::
v
()));
DataTypeToEnum
<
T
>::
v
()));
space_tensor
->
Resize
(
space_shape
);
space_tensor
->
Resize
(
space_shape
);
{
{
Tensor
::
MappingGuard
space_mapper
(
space_tensor
.
get
());
Tensor
::
MappingGuard
space_mapper
(
space_tensor
.
get
());
T
*
space_ptr
=
space_tensor
->
mutable_data
<
T
>
();
T
*
space_ptr
=
space_tensor
->
mutable_data
<
T
>
();
MACE_CHECK
(
static_cast
<
size_t
>
(
space_tensor
->
size
())
==
space_data
.
size
())
MACE_CHECK
(
static_cast
<
size_t
>
(
space_tensor
->
size
())
==
space_data
.
size
())
<<
"Space tensor size:"
<<
space_tensor
->
size
()
<<
"Space tensor size:"
<<
space_tensor
->
size
()
<<
", space data size:"
<<
space_data
.
size
();
<<
", space data size:"
<<
space_data
.
size
();
memcpy
(
space_ptr
,
space_data
.
data
(),
space_data
.
size
()
*
sizeof
(
T
));
memcpy
(
space_ptr
,
space_data
.
data
(),
space_data
.
size
()
*
sizeof
(
T
));
}
}
auto
batch_tensor
=
unique_ptr
<
Tensor
>
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
auto
batch_tensor
=
unique_ptr
<
Tensor
>
(
new
Tensor
(
DataTypeToEnum
<
T
>::
v
()));
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
DataTypeToEnum
<
T
>::
v
()));
batch_tensor
->
Resize
(
batch_shape
);
batch_tensor
->
Resize
(
batch_shape
);
{
{
Tensor
::
MappingGuard
batch_mapper
(
batch_tensor
.
get
());
Tensor
::
MappingGuard
batch_mapper
(
batch_tensor
.
get
());
...
@@ -91,113 +92,81 @@ void TestBidirectionalTransform(const std::vector<index_t> &space_shape,
...
@@ -91,113 +92,81 @@ void TestBidirectionalTransform(const std::vector<index_t> &space_shape,
memcpy
(
batch_ptr
,
batch_data
.
data
(),
batch_data
.
size
()
*
sizeof
(
T
));
memcpy
(
batch_ptr
,
batch_data
.
data
(),
batch_data
.
size
()
*
sizeof
(
T
));
}
}
RunSpaceToBatch
<
DeviceType
::
OPENCL
>
(
space_shape
,
space_data
,
RunSpaceToBatch
<
DeviceType
::
OPENCL
>
(
space_shape
,
space_data
,
block_data
,
block_data
,
padding_data
,
batch_tensor
.
get
());
padding_data
,
batch_tensor
.
get
());
RunBatchToSpace
<
DeviceType
::
OPENCL
>
(
batch_shape
,
batch_data
,
RunBatchToSpace
<
DeviceType
::
OPENCL
>
(
batch_shape
,
batch_data
,
block_data
,
block_data
,
padding_data
,
space_tensor
.
get
());
padding_data
,
space_tensor
.
get
());
}
}
TEST
(
SpaceToBatchTest
,
SmallData
)
{
TEST
(
SpaceToBatchTest
,
SmallData
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
2
,
2
},
{
1
,
2
,
3
,
4
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
1
},
{
1
,
2
,
3
,
4
});
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
1
},
{
1
,
2
,
3
,
4
}
);
}
}
TEST
(
SpaceToBatchTest
,
SmallDataWithOnePadding
)
{
TEST
(
SpaceToBatchTest
,
SmallDataWithOnePadding
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
3
,
3
},
{
1
,
2
,
3
,
4
},
{
1
,
0
,
1
,
0
},
{
9
,
1
,
1
,
1
},
{
3
,
3
},
{
0
,
0
,
0
,
0
,
1
,
2
,
0
,
3
,
4
});
{
1
,
0
,
1
,
0
},
{
9
,
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
1
,
2
,
0
,
3
,
4
}
);
}
}
TEST
(
SpaceToBatchTest
,
SmallDataWithTwoPadding
)
{
TEST
(
SpaceToBatchTest
,
SmallDataWithTwoPadding
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
3
,
4
},
{
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
2
,
2
},
{
1
,
1
,
1
,
1
},
{
4
,
2
,
2
,
1
},
{
2
,
2
},
{
0
,
0
,
0
,
4
,
0
,
0
,
3
,
0
,
0
,
2
,
0
,
0
,
1
,
0
,
0
,
0
});
{
1
,
1
,
1
,
1
},
{
4
,
2
,
2
,
1
},
{
0
,
0
,
0
,
4
,
0
,
0
,
3
,
0
,
0
,
2
,
0
,
0
,
1
,
0
,
0
,
0
}
);
}
}
TEST
(
SpaceToBatchTest
,
SmallDataWithLargeImage
)
{
TEST
(
SpaceToBatchTest
,
SmallDataWithLargeImage
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
10
,
1
},
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
{
1
,
2
,
10
,
1
},
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
},
{
2
,
2
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
5
,
1
},
{
0
,
0
,
0
,
0
},
{
1
,
3
,
5
,
7
,
9
,
2
,
4
,
6
,
8
,
10
,
11
,
13
,
15
,
17
,
19
,
12
,
14
,
16
,
18
,
20
});
{
4
,
1
,
5
,
1
},
{
1
,
3
,
5
,
7
,
9
,
2
,
4
,
6
,
8
,
10
,
11
,
13
,
15
,
17
,
19
,
12
,
14
,
16
,
18
,
20
}
);
}
}
TEST
(
SpaceToBatchTest
,
MultiChannelData
)
{
TEST
(
SpaceToBatchTest
,
MultiChannelData
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
3
},
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
},
{
1
,
2
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
},
{
2
,
2
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
});
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
}
);
}
}
TEST
(
SpaceToBatchTest
,
LargerMultiChannelData
)
{
TEST
(
SpaceToBatchTest
,
LargerMultiChannelData
)
{
TestBidirectionalTransform
<
float
>
({
1
,
4
,
4
,
1
},
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
1
,
4
,
4
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
2
,
2
,
1
},
{
0
,
0
,
0
,
0
},
{
1
,
3
,
9
,
11
,
2
,
4
,
10
,
12
,
5
,
7
,
13
,
15
,
6
,
8
,
14
,
16
});
{
4
,
2
,
2
,
1
},
{
1
,
3
,
9
,
11
,
2
,
4
,
10
,
12
,
5
,
7
,
13
,
15
,
6
,
8
,
14
,
16
}
);
}
}
TEST
(
SpaceToBatchTest
,
MultiBatchData
)
{
TEST
(
SpaceToBatchTest
,
MultiBatchData
)
{
TestBidirectionalTransform
<
float
>
({
2
,
2
,
4
,
1
},
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
,
4
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
1
},
{
0
,
0
,
0
,
0
},
{
1
,
3
,
2
,
4
,
5
,
7
,
6
,
8
,
9
,
11
,
10
,
12
,
13
,
15
,
14
,
16
});
{
8
,
1
,
2
,
1
},
{
1
,
3
,
2
,
4
,
5
,
7
,
6
,
8
,
9
,
11
,
10
,
12
,
13
,
15
,
14
,
16
}
);
}
}
TEST
(
SpaceToBatchTest
,
MultiBatchAndChannelData
)
{
TEST
(
SpaceToBatchTest
,
MultiBatchAndChannelData
)
{
TestBidirectionalTransform
<
float
>
({
2
,
2
,
4
,
2
},
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
{
2
,
2
,
4
,
2
},
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
{
2
,
2
},
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
},
{
0
,
0
,
0
,
0
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
2
},
{
8
,
1
,
2
,
2
},
{
1
,
2
,
5
,
6
,
3
,
4
,
7
,
8
,
9
,
10
,
13
,
14
,
11
,
12
,
15
,
16
,
{
1
,
2
,
5
,
6
,
3
,
4
,
7
,
8
,
9
,
10
,
13
,
14
,
11
,
12
,
15
,
16
,
17
,
18
,
21
,
22
,
19
,
20
,
23
,
24
,
25
,
26
,
29
,
30
,
27
,
28
,
31
,
32
});
17
,
18
,
21
,
22
,
19
,
20
,
23
,
24
,
25
,
26
,
29
,
30
,
27
,
28
,
31
,
32
}
);
}
}
//TEST(SpaceTobatchTest, CompareTF) {
//
TEST(SpaceTobatchTest, CompareTF) {
//
//
// const std::string space_file = "/data/local/tmp/test/input";
// const std::string space_file = "/data/local/tmp/test/input";
// const std::string batch_file = "/data/local/tmp/test/output";
// const std::string batch_file = "/data/local/tmp/test/output";
// const std::vector<index_t> space_shape = {1, 256, 256, 32};
// const std::vector<index_t> space_shape = {1, 256, 256, 32};
// const int space_size = std::accumulate(space_shape.begin(), space_shape.end(), 1, std::multiplies<int>());
// const int space_size = std::accumulate(space_shape.begin(),
// space_shape.end(), 1, std::multiplies<int>());
// const std::vector<index_t> batch_shape = {4, 130, 130, 32};
// const std::vector<index_t> batch_shape = {4, 130, 130, 32};
// const int batch_size = std::accumulate(batch_shape.begin(), batch_shape.end(), 1, std::multiplies<int>());
// const int batch_size = std::accumulate(batch_shape.begin(),
// batch_shape.end(), 1, std::multiplies<int>());
//
//
// auto space_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// auto space_tensor = unique_ptr<Tensor>(new
// Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// DataTypeToEnum<float>::v()));
// DataTypeToEnum<float>::v()));
// space_tensor->Resize(space_shape);
// space_tensor->Resize(space_shape);
// std::vector<float> space_data(space_size, 0.0);
// std::vector<float> space_data(space_size, 0.0);
...
@@ -216,7 +185,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
...
@@ -216,7 +185,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// VLOG(0) << "open space file failed";
// VLOG(0) << "open space file failed";
// }
// }
//
//
// auto batch_tensor = unique_ptr<Tensor>(new Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// auto batch_tensor = unique_ptr<Tensor>(new
// Tensor(GetDeviceAllocator(DeviceType::OPENCL),
// DataTypeToEnum<float>::v()));
// DataTypeToEnum<float>::v()));
// std::vector<float> batch_data(batch_size, 0.0);
// std::vector<float> batch_data(batch_size, 0.0);
// batch_tensor->Resize(batch_shape);
// batch_tensor->Resize(batch_shape);
...
@@ -231,7 +201,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
...
@@ -231,7 +201,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// }
// }
// Tensor::MappingGuard batch_mapper(batch_tensor.get());
// Tensor::MappingGuard batch_mapper(batch_tensor.get());
// float *batch_ptr = batch_tensor->mutable_data<float>();
// float *batch_ptr = batch_tensor->mutable_data<float>();
// MACE_CHECK(static_cast<size_t>(batch_tensor->size()) == batch_data.size());
// MACE_CHECK(static_cast<size_t>(batch_tensor->size()) ==
// batch_data.size());
// memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(float));
// memcpy(batch_ptr, batch_data.data(), batch_data.size() * sizeof(float));
// }
// }
//
//
...
@@ -245,4 +216,3 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
...
@@ -245,4 +216,3 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
// {2, 2, 2, 2},
// {2, 2, 2, 2},
// space_tensor.get());
// space_tensor.get());
//}
//}
mace/python/tools/model.template
浏览文件 @
baf2dcd1
...
@@ -27,12 +27,12 @@ void Create{{tensor.name}}(std::vector<mace::ConstTensor> &tensors) {
...
@@ -27,12 +27,12 @@ void Create{{tensor.name}}(std::vector<mace::ConstTensor> &tensors) {
#include "mace/core/public/mace.h"
#include "mace/core/public/mace.h"
namespace {
namespace {
static
void UpdateOp(mace::OperatorDef &op,
void UpdateOp(mace::OperatorDef &op,
const std::string &name,
const std::string &name,
const std::string &type,
const std::string &type,
const std::vector<std::string> &inputs,
const std::vector<std::string> &inputs,
const std::vector<std::string> &outputs,
const std::vector<std::string> &outputs,
const std::vector<mace::DataType> &output_types) {
const std::vector<mace::DataType> &output_types) {
op.set_name(name);
op.set_name(name);
op.set_type(type);
op.set_type(type);
op.set_input(inputs);
op.set_input(inputs);
...
...
tools/bazel-adb-run.sh
浏览文件 @
baf2dcd1
...
@@ -17,9 +17,8 @@ BAZEL_BIN_PATH=${BAZEL_BIN_PATH#//}
...
@@ -17,9 +17,8 @@ BAZEL_BIN_PATH=${BAZEL_BIN_PATH#//}
BAZEL_BIN_PATH
=
bazel-bin/
$BAZEL_BIN_PATH
BAZEL_BIN_PATH
=
bazel-bin/
$BAZEL_BIN_PATH
BIN_NAME
=
`
echo
$BAZEL_TARGET
|
cut
-d
:
-f2
`
BIN_NAME
=
`
echo
$BAZEL_TARGET
|
cut
-d
:
-f2
`
ANDROID_ABI
=
armeabi-v7a
ANDROID_ABI
=
arm64-v8a
ANDROID_ABI
=
arm64-v8a
STRIP
=
""
ANDROID_ABI
=
armeabi-v7a
STRIP
=
"--strip always"
STRIP
=
"--strip always"
VLOG_LEVEL
=
0
VLOG_LEVEL
=
0
PROFILINE
=
"--define profiling=true"
PROFILINE
=
"--define profiling=true"
...
@@ -31,7 +30,7 @@ bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \
...
@@ -31,7 +30,7 @@ bazel build -c opt $STRIP --verbose_failures $BAZEL_TARGET \
--crosstool_top
=
//external:android/crosstool
\
--crosstool_top
=
//external:android/crosstool
\
--host_crosstool_top
=
@bazel_tools//tools/cpp:toolchain
\
--host_crosstool_top
=
@bazel_tools//tools/cpp:toolchain
\
--cpu
=
$ANDROID_ABI
\
--cpu
=
$ANDROID_ABI
\
--define
neon
=
tru
e
--define
neon
=
fals
e
if
[
$?
-ne
0
]
;
then
if
[
$?
-ne
0
]
;
then
exit
1
exit
1
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录