Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
3e82ad67
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
3e82ad67
编写于
5月 08, 2018
作者:
李
李寅
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refactor model converter and transformer
上级
04f7a34a
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
2411 addition
and
2802 deletion
+2411
-2802
mace/core/mace.cc
mace/core/mace.cc
+5
-5
mace/ops/fully_connected.cc
mace/ops/fully_connected.cc
+3
-3
mace/ops/fully_connected_benchmark.cc
mace/ops/fully_connected_benchmark.cc
+2
-2
mace/ops/fully_connected_test.cc
mace/ops/fully_connected_test.cc
+6
-6
mace/proto/mace.proto
mace/proto/mace.proto
+1
-0
mace/python/tools/BUILD
mace/python/tools/BUILD
+14
-22
mace/python/tools/caffe_converter_lib.py
mace/python/tools/caffe_converter_lib.py
+0
-1213
mace/python/tools/convert_util.py
mace/python/tools/convert_util.py
+6
-0
mace/python/tools/converter.py
mace/python/tools/converter.py
+72
-16
mace/python/tools/converter_tool/__init__.py
mace/python/tools/converter_tool/__init__.py
+0
-0
mace/python/tools/converter_tool/base_converter.py
mace/python/tools/converter_tool/base_converter.py
+259
-0
mace/python/tools/converter_tool/caffe_converter.py
mace/python/tools/converter_tool/caffe_converter.py
+508
-0
mace/python/tools/converter_tool/shape_inference.py
mace/python/tools/converter_tool/shape_inference.py
+149
-0
mace/python/tools/converter_tool/tensorflow_converter.py
mace/python/tools/converter_tool/tensorflow_converter.py
+442
-0
mace/python/tools/converter_tool/transformer.py
mace/python/tools/converter_tool/transformer.py
+914
-0
mace/python/tools/memory_optimizer.py
mace/python/tools/memory_optimizer.py
+10
-4
mace/python/tools/source_converter_lib.py
mace/python/tools/source_converter_lib.py
+12
-1
mace/python/tools/tf_converter_lib.py
mace/python/tools/tf_converter_lib.py
+0
-1522
mace/test/mace_api_mt_test.cc
mace/test/mace_api_mt_test.cc
+4
-4
mace/test/mace_api_test.cc
mace/test/mace_api_test.cc
+4
-4
未找到文件。
mace/core/mace.cc
浏览文件 @
3e82ad67
...
...
@@ -119,11 +119,11 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
LOG
(
INFO
)
<<
"MACE version: "
<<
MaceVersion
();
// Set storage path for internal usage
for
(
auto
input_name
:
input_nodes
)
{
ws_
->
CreateTensor
(
MakeString
(
"mace_input_node_"
,
input_name
,
":0"
),
ws_
->
CreateTensor
(
MakeString
(
"mace_input_node_"
,
input_name
),
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
}
for
(
auto
output_name
:
output_nodes
)
{
ws_
->
CreateTensor
(
MakeString
(
"mace_output_node_"
,
output_name
,
":0"
),
ws_
->
CreateTensor
(
MakeString
(
"mace_output_node_"
,
output_name
),
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
}
#ifdef MACE_ENABLE_HEXAGON
...
...
@@ -182,7 +182,7 @@ MaceStatus MaceEngine::Impl::Run(
"The Inputs' shape must be 4-dimension with NHWC format,"
" please use 1 to fill missing dimensions"
);
Tensor
*
input_tensor
=
ws_
->
GetTensor
(
MakeString
(
"mace_input_node_"
,
input
.
first
,
":0"
));
ws_
->
GetTensor
(
MakeString
(
"mace_input_node_"
,
input
.
first
));
input_tensor
->
Resize
(
input
.
second
.
shape
());
{
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
...
...
@@ -199,7 +199,7 @@ MaceStatus MaceEngine::Impl::Run(
" please use 1 to fill missing dimensions"
);
}
Tensor
*
output_tensor
=
ws_
->
GetTensor
(
MakeString
(
"mace_output_node_"
,
output
.
first
+
":0"
));
ws_
->
GetTensor
(
MakeString
(
"mace_output_node_"
,
output
.
first
));
output_tensors
.
push_back
(
output_tensor
);
}
#ifdef MACE_ENABLE_HEXAGON
...
...
@@ -223,7 +223,7 @@ MaceStatus MaceEngine::Impl::Run(
#endif
for
(
auto
&
output
:
*
outputs
)
{
Tensor
*
output_tensor
=
ws_
->
GetTensor
(
MakeString
(
"mace_output_node_"
,
output
.
first
+
":0"
));
ws_
->
GetTensor
(
MakeString
(
"mace_output_node_"
,
output
.
first
));
// save output
if
(
output_tensor
!=
nullptr
&&
output
.
second
.
data
()
!=
nullptr
)
{
Tensor
::
MappingGuard
output_guard
(
output_tensor
);
...
...
mace/ops/fully_connected.cc
浏览文件 @
3e82ad67
...
...
@@ -18,20 +18,20 @@ namespace mace {
namespace
ops
{
void
Register_FullyConnected
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
C
"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
ullyConnected
"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
FullyConnectedOp
<
DeviceType
::
CPU
,
float
>
);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
C
"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
ullyConnected
"
)
.
Device
(
DeviceType
::
GPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
FullyConnectedOp
<
DeviceType
::
GPU
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
C
"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
ullyConnected
"
)
.
Device
(
DeviceType
::
GPU
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
...
...
mace/ops/fully_connected_benchmark.cc
浏览文件 @
3e82ad67
...
...
@@ -37,7 +37,7 @@ void FCBenchmark(
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
out_channel
});
if
(
D
==
DeviceType
::
CPU
)
{
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"Input"
)
.
Input
(
"Weight"
)
.
Input
(
"Bias"
)
...
...
@@ -52,7 +52,7 @@ void FCBenchmark(
BufferToImage
<
D
,
T
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"BiasImage"
)
...
...
mace/ops/fully_connected_test.cc
浏览文件 @
3e82ad67
...
...
@@ -42,7 +42,7 @@ void Simple(const std::vector<index_t> &input_shape,
if
(
D
==
DeviceType
::
CPU
)
{
net
.
Transpose2D
<
D
,
float
>
(
"Weight"
,
"WeightTranspose"
);
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"Input"
)
.
Input
(
"Weight"
)
.
Input
(
"Bias"
)
...
...
@@ -59,7 +59,7 @@ void Simple(const std::vector<index_t> &input_shape,
BufferToImage
<
D
,
float
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"BiasImage"
)
...
...
@@ -142,7 +142,7 @@ void Complex(const index_t batch,
"Weight"
,
{
out_channel
,
height
*
width
*
channels
});
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Bias"
,
{
out_channel
});
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"Input"
)
.
Input
(
"Weight"
)
.
Input
(
"Bias"
)
...
...
@@ -166,7 +166,7 @@ void Complex(const index_t batch,
BufferToImage
<
DeviceType
::
GPU
,
float
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"BiasImage"
)
...
...
@@ -231,7 +231,7 @@ void TestWXFormat(const index_t batch,
"Weight"
,
{
out_channel
,
height
*
width
*
channels
});
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Bias"
,
{
out_channel
});
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"Input"
)
.
Input
(
"Weight"
)
.
Input
(
"Bias"
)
...
...
@@ -255,7 +255,7 @@ void TestWXFormat(const index_t batch,
BufferToImage
<
DeviceType
::
GPU
,
T
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"BiasImage"
)
...
...
mace/proto/mace.proto
浏览文件 @
3e82ad67
...
...
@@ -10,6 +10,7 @@ enum NetMode {
enum
DeviceType
{
CPU
=
0
;
// In default, we will use CPU.
GPU
=
2
;
HEXAGON
=
3
;
}
enum
DataType
{
...
...
mace/python/tools/BUILD
浏览文件 @
3e82ad67
py_library
(
name
=
"
tf_
converter_lib"
,
name
=
"converter_lib"
,
srcs
=
[
"convert_util.py"
,
"graph_util.py"
,
"tf_converter_lib.py"
,
"tf_dsp_converter_lib.py"
,
"converter_tool/base_converter.py"
,
"converter_tool/shape_inference.py"
,
"converter_tool/tensorflow_converter.py"
,
"converter_tool/caffe_converter.py"
,
"converter_tool/transformer.py"
,
],
srcs_version
=
"PY2AND3"
,
deps
=
[
":memory_optimizer"
,
"//mace/proto:mace_py"
,
],
)
py_library
(
name
=
"caffe_converter_lib"
,
srcs
=
[
"caffe_converter_lib.py"
,
],
srcs_version
=
"PY2AND3"
,
deps
=
[
":memory_optimizer"
,
"//mace/third_party/caffe:caffe_py"
,
],
)
...
...
@@ -37,22 +30,21 @@ py_library(
)
py_binary
(
name
=
"
convert
er"
,
srcs
=
[
"
convert
er.py"
],
name
=
"
memory_optimiz
er"
,
srcs
=
[
"
memory_optimiz
er.py"
],
srcs_version
=
"PY2AND3"
,
deps
=
[
":caffe_converter_lib"
,
":source_converter_lib"
,
":tf_converter_lib"
,
"@six_archive//:six"
,
"//mace/proto:mace_py"
,
],
)
py_binary
(
name
=
"
memory_optimiz
er"
,
srcs
=
[
"
memory_optimiz
er.py"
],
name
=
"
convert
er"
,
srcs
=
[
"
convert
er.py"
],
srcs_version
=
"PY2AND3"
,
deps
=
[
"//mace/proto:mace_py"
,
":converter_lib"
,
":source_converter_lib"
,
"@six_archive//:six"
,
],
)
mace/python/tools/caffe_converter_lib.py
已删除
100644 → 0
浏览文件 @
04f7a34a
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
mace.proto
import
mace_pb2
from
mace.third_party.caffe
import
caffe_pb2
from
mace.python.tools
import
memory_optimizer
import
google.protobuf.text_format
import
numpy
as
np
import
math
pooling_type_mode
=
{
'AvgPool'
:
1
,
'MaxPool'
:
2
}
buffer_type_map
=
{
'CONV2D_FILTER'
:
0
,
'IN_OUT_CHANNEL'
:
1
,
'ARGUMENT'
:
2
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_WIDTH'
:
4
,
'WINOGRAD_FILTER'
:
5
,
'DW_CONV2D_FILTER'
:
6
,
'WEIGHT_HEIGHT'
:
7
,
'WEIGHT_WIDTH'
:
8
,
}
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
activation_name_map
=
{
'ReLU'
:
'RELU'
,
'Sigmoid'
:
'SIGMOID'
,
'TanH'
:
'TANH'
,
}
math_type_mode
=
{
0
:
2
,
# PROD
1
:
0
,
# SUM
2
:
5
,
# MAX
}
MACE_INPUT_NODE_NAME
=
"mace_input_node"
MACE_OUTPUT_NODE_NAME
=
"mace_output_node"
OPENCL_IMAGE_MAX_SIZE
=
16384
class
Operator
(
object
):
def
__init__
(
self
,
name
,
type
,
layer
):
self
.
name
=
name
self
.
type
=
type
self
.
layer
=
layer
self
.
parents
=
[]
self
.
children
=
[]
self
.
data
=
[]
self
.
output_shape_map
=
{}
def
add_parent
(
self
,
parent_op
):
self
.
parents
.
append
(
parent_op
)
parent_op
.
children
.
append
(
self
)
def
get_single_parent
(
self
):
if
len
(
self
.
parents
)
!=
1
:
raise
Exception
(
'Operation %s expected single parent, but got %s'
%
(
self
.
name
,
len
(
self
.
parents
)))
return
self
.
parents
[
0
]
def
BlobToNPArray
(
blob
):
if
blob
.
num
!=
0
:
return
(
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
(
blob
.
num
,
blob
.
channels
,
blob
.
height
,
blob
.
width
)))
else
:
return
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
blob
.
shape
.
dim
)
class
Shapes
(
object
):
@
staticmethod
def
conv_pool_shape
(
input_shape
,
filter_shape
,
paddings
,
strides
,
dilations
,
round_func
,
input_format
=
'NHWC'
):
output_shape
=
np
.
zeros_like
(
input_shape
)
output_shape
[
0
]
=
input_shape
[
0
]
if
input_format
==
'NHWC'
:
# input format: NHWC, filter format: HWOI
output_shape
[
1
]
=
int
(
round_func
((
input_shape
[
1
]
+
paddings
[
0
]
-
filter_shape
[
0
]
-
(
filter_shape
[
0
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
1
]
-
filter_shape
[
1
]
-
(
filter_shape
[
1
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
output_shape
[
3
]
=
filter_shape
[
2
]
elif
input_format
==
'NCHW'
:
# input format: NCHW, filter format: OIHW
output_shape
[
1
]
=
filter_shape
[
0
]
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
0
]
-
filter_shape
[
2
]
-
(
filter_shape
[
2
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
3
]
=
int
(
round_func
((
input_shape
[
3
]
+
paddings
[
1
]
-
filter_shape
[
3
]
-
(
filter_shape
[
3
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
else
:
raise
Exception
(
"format %s is not supported"
%
input_format
)
return
output_shape
@
staticmethod
def
fully_connected_shape
(
input_shape
,
weight_shape
,
input_format
=
'NHWC'
):
if
input_format
==
'NHWC'
:
return
[
input_shape
[
0
],
1
,
1
,
weight_shape
[
0
]]
elif
input_format
==
'NCHW'
:
return
[
input_shape
[
0
],
weight_shape
[
0
],
1
,
1
]
else
:
raise
Exception
(
"format %s is not supported"
%
input_format
)
@
staticmethod
def
concat_shape
(
input_shapes
,
axis
):
output_shape
=
None
for
input_shape
in
input_shapes
:
if
output_shape
is
None
:
output_shape
=
list
(
input_shape
)
else
:
output_shape
[
axis
]
+=
input_shape
[
axis
]
return
output_shape
@
staticmethod
def
slice_shape
(
input_shape
,
num_output
,
input_format
=
'NHWC'
):
if
input_format
==
'NHWC'
:
return
[
input_shape
[
0
],
input_shape
[
1
],
input_shape
[
2
],
input_shape
[
3
]
/
num_output
]
elif
input_format
==
'NCHW'
:
return
[
input_shape
[
0
],
input_shape
[
1
]
/
num_output
,
input_shape
[
2
],
input_shape
[
3
]
]
else
:
raise
Exception
(
"format %s is not supported"
%
input_format
)
# outputs' name is [op.name + '_' + #]
class
CaffeConverter
(
object
):
def
__init__
(
self
,
caffe_net
,
weights
,
net_def
,
dt
,
device
,
winograd
):
self
.
net_def
=
net_def
self
.
caffe_net
=
caffe_net
self
.
weights
=
weights
self
.
dt
=
dt
self
.
device
=
device
self
.
winograd
=
winograd
self
.
resolved_ops
=
set
()
self
.
ops
=
[]
self
.
inputs_map
=
{}
# caffe op name -> mace inputs' name
# Add Input operations
top_name_map
=
{}
inputs
=
caffe_net
.
input
for
input
in
inputs
:
self
.
ops
.
extend
([
Operator
(
input
,
'Input'
,
None
)])
top_name_map
[
input
]
=
input
layers
=
caffe_net
.
layer
# remove train layers and dropout
layers
=
self
.
remove_unused_layers
(
layers
)
# Construct graph
# Only support single-output layer
# layer with single output often use the same top name.
self
.
ops
.
extend
(
[
Operator
(
layer
.
name
,
layer
.
type
,
layer
)
for
layer
in
layers
])
self
.
ops_map
=
{
op
.
name
:
op
for
op
in
self
.
ops
}
output_op_map
=
{}
for
layer
in
layers
:
op
=
self
.
ops_map
[
layer
.
name
]
for
input_name
in
layer
.
bottom
:
assert
input_name
!=
layer
.
name
parent_op
=
output_op_map
.
get
(
input_name
)
if
parent_op
is
None
:
parent_op
=
self
.
ops_map
[
input_name
]
op
.
add_parent
(
parent_op
)
if
op
.
name
not
in
self
.
inputs_map
:
self
.
inputs_map
[
op
.
name
]
=
[]
self
.
inputs_map
[
op
.
name
].
extend
([
top_name_map
[
input_name
]])
for
i
in
range
(
len
(
layer
.
top
)):
output_name
=
layer
.
top
[
i
]
if
len
(
layer
.
top
)
==
1
:
top_name_map
[
output_name
]
=
op
.
name
else
:
top_name_map
[
output_name
]
=
op
.
name
+
'_'
+
str
(
i
)
if
output_name
==
layer
.
name
:
continue
output_op_map
[
output_name
]
=
op
# Load weights
weights_layers
=
weights
.
layer
for
layer
in
weights_layers
:
if
not
layer
.
blobs
:
continue
if
layer
.
name
in
self
.
ops_map
:
op
=
self
.
ops_map
[
layer
.
name
]
op
.
data
=
[
BlobToNPArray
(
blob
)
for
blob
in
layer
.
blobs
]
# toposort ops
self
.
ops
=
self
.
toposort_ops
()
def
CommonConvert
(
self
,
op
,
mace_type
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
op_def
.
name
=
op
.
name
op_def
.
type
=
mace_type
op_def
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
return
op_def
def
remove_unused_layers
(
self
,
layers
):
phase_map
=
{
0
:
'train'
,
1
:
'test'
}
test_layers_names
=
set
()
test_layers
=
[]
for
layer
in
layers
:
phase
=
'test'
if
len
(
layer
.
include
):
phase
=
phase_map
[
layer
.
include
[
0
].
phase
]
if
len
(
layer
.
exclude
):
phase
=
phase_map
[
layer
.
exclude
[
0
].
phase
]
if
phase
==
'test'
and
layer
.
type
!=
'Dropout'
:
test_layers
.
append
(
layer
)
assert
layer
.
name
not
in
test_layers_names
test_layers_names
.
add
(
layer
.
name
)
return
test_layers
def
toposort_ops
(
self
):
sorted_ops
=
[]
temp_visited
=
set
()
visited
=
set
()
def
search
(
op
):
if
op
.
name
in
temp_visited
:
raise
Exception
(
"The model is not DAG"
)
if
op
.
name
in
visited
:
return
temp_visited
.
add
(
op
.
name
)
for
parent_op
in
op
.
parents
:
search
(
parent_op
)
temp_visited
.
remove
(
op
.
name
)
sorted_ops
.
append
(
op
)
visited
.
add
(
op
.
name
)
for
op
in
self
.
ops
:
search
(
op
)
return
sorted_ops
def
add_buffer_to_image
(
self
,
input_name
,
input_type
):
output_name
=
input_name
[:
-
2
]
+
"_b2i"
+
input_name
[
-
2
:]
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
input_name
])
op_def
.
output
.
extend
([
output_name
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'buffer_type'
arg
.
i
=
buffer_type_map
[
input_type
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'mode'
arg
.
i
=
0
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
return
output_name
def
add_image_to_buffer
(
self
,
input_name
,
input_type
):
output_name
=
input_name
[:
-
2
]
+
"_i2b"
+
input_name
[
-
2
:]
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
input_name
])
op_def
.
output
.
extend
([
output_name
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'buffer_type'
arg
.
i
=
buffer_type_map
[
input_type
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
return
output_name
def
add_gpu_input_transform
(
self
,
names
):
for
name
in
names
:
new_input_name
=
MACE_INPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
name
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
epsilon_arg
.
i
=
buffer_type_map
[
'IN_OUT_CHANNEL'
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
input_op
=
self
.
ops_map
[
name
]
if
input_op
.
layer
is
not
None
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
layer
.
top
[
0
]]
else
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
name
]
self
.
add_output_shape
(
op_def
,
output_shape
)
def
add_gpu_output_transform
(
self
,
names
):
for
name
in
names
:
output_name
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
epsilon_arg
.
i
=
buffer_type_map
[
'IN_OUT_CHANNEL'
]
def
add_tensor
(
self
,
name
,
value
):
tensor
=
self
.
net_def
.
tensors
.
add
()
tensor
.
name
=
name
shape
=
list
(
value
.
shape
)
tensor
.
dims
.
extend
(
shape
)
tensor
.
data_type
=
mace_pb2
.
DT_FLOAT
tensor
.
float_data
.
extend
(
value
.
flat
)
@
staticmethod
def
add_output_shape
(
op_def
,
output_shape
):
mace_output_shape
=
mace_pb2
.
OutputShape
()
mace_output_shape
.
dims
.
extend
(
output_shape
)
op_def
.
output_shape
.
extend
([
mace_output_shape
])
def
add_stride_pad_kernel_arg
(
self
,
param
,
op_def
):
try
:
if
len
(
param
.
stride
)
>
1
or
len
(
param
.
kernel_size
)
>
1
or
len
(
param
.
pad
)
>
1
:
raise
Exception
(
'Mace does not support multiple stride/kernel_size/pad'
)
stride
=
[
param
.
stride
[
0
],
param
.
stride
[
0
]]
if
len
(
param
.
stride
)
else
[
1
,
1
]
pad
=
[
param
.
pad
[
0
]
*
2
,
param
.
pad
[
0
]
*
2
]
if
len
(
param
.
pad
)
else
[
0
,
0
]
kernel
=
[
param
.
kernel_size
[
0
],
param
.
kernel_size
[
0
]]
if
len
(
param
.
kernel_size
)
else
[
0
,
0
]
except
TypeError
:
stride
=
[
param
.
stride
,
param
.
stride
]
pad
=
[
param
.
pad
*
2
,
param
.
pad
*
2
]
kernel
=
[
param
.
kernel_size
,
param
.
kernel_size
]
if
param
.
HasField
(
"stride_h"
)
or
param
.
HasField
(
"stride_w"
):
stride
=
[
param
.
stride_h
,
param
.
stride_w
]
# Pad
if
param
.
HasField
(
"pad_h"
)
or
param
.
HasField
(
"pad_w"
):
pad
=
[
param
.
pad_h
*
2
,
param
.
pad_w
*
2
]
if
op_def
is
not
None
:
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
(
stride
)
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding_values'
padding_arg
.
ints
.
extend
(
pad
)
if
op_def
.
type
==
'Pooling'
:
if
param
.
HasField
(
"kernel_h"
)
or
param
.
HasField
(
"kernel_w"
):
kernel
=
[
param
.
kernel_h
,
param
.
kernel_w
]
return
pad
,
stride
,
kernel
def
convert_conv2d
(
self
,
op
):
use_winograd
=
False
if
self
.
device
==
'cpu'
:
use_winograd
=
self
.
check_winograd_conv
(
op
)
param
=
op
.
layer
.
convolution_param
is_depthwise
=
False
if
param
.
HasField
(
'group'
):
if
param
.
group
==
op
.
data
[
0
].
shape
[
0
]
and
op
.
data
[
0
].
shape
[
1
]
==
1
:
is_depthwise
=
True
else
:
raise
Exception
(
"Mace do not support group convolution yet"
)
if
is_depthwise
:
op_def
=
self
.
CommonConvert
(
op
,
'DepthwiseConv2d'
)
else
:
op_def
=
self
.
CommonConvert
(
op
,
'Conv2D'
)
# Add filter
weight_tensor_name
=
op
.
name
+
'_weight:0'
if
self
.
device
==
'cpu'
:
weight_data
=
op
.
data
[
0
]
else
:
# OIHW -> HWOI
weight_data
=
op
.
data
[
0
].
transpose
((
2
,
3
,
0
,
1
))
if
use_winograd
:
self
.
convert_winograd_conv_filter_cpu
(
op
,
op_def
)
elif
self
.
device
==
'gpu'
:
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
buffer_type
=
"DW_CONV2D_FILTER"
\
if
is_depthwise
else
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
op_def
.
input
.
extend
([
weight_tensor_name
])
# Add Bias
if
len
(
op
.
data
)
==
2
:
bias_tensor_name
=
op
.
name
+
'_bias:0'
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
bias_tensor_name
])
paddings
,
strides
,
_
=
self
.
add_stride_pad_kernel_arg
(
param
,
op_def
)
dilations
=
[
1
,
1
]
if
len
(
param
.
dilation
)
>
0
:
dilation_arg
=
op_def
.
arg
.
add
()
dilation_arg
.
name
=
'dilations'
if
len
(
param
.
dilation
)
==
1
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
0
]]
elif
len
(
param
.
dilation
)
==
2
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
1
]]
dilation_arg
.
ints
.
extend
(
dilations
)
final_op
=
op
self
.
resolved_ops
.
add
(
op
.
name
)
input_format
=
'NCHW'
if
self
.
device
==
'cpu'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]],
weight_data
.
shape
,
paddings
,
strides
,
dilations
,
math
.
floor
,
input_format
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
and
\
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
final_op
=
activation_op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
op_def
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
check_winograd_conv
(
self
,
op
):
param
=
op
.
layer
.
convolution_param
filter_shape
=
np
.
asarray
(
op
.
data
[
0
].
shape
)
if
self
.
device
!=
'cpu'
:
filter_shape
=
filter_shape
[[
2
,
3
,
0
,
1
]]
# OIHW -> HWOI
paddings
,
strides
,
_
=
self
.
add_stride_pad_kernel_arg
(
param
,
None
)
if
param
.
HasField
(
'group'
):
if
param
.
group
==
op
.
data
[
0
].
shape
[
0
]
and
op
.
data
[
0
].
shape
[
1
]
==
1
:
return
False
# Depthwise conv not support winograd
else
:
raise
Exception
(
"Mace do not support group convolution yet"
)
dilations
=
[
1
,
1
]
if
len
(
param
.
dilation
)
>
0
:
if
len
(
param
.
dilation
)
==
1
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
0
]]
elif
len
(
param
.
dilation
)
==
2
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
1
]]
input_format
=
'NCHW'
if
self
.
device
==
'cpu'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]],
filter_shape
,
paddings
,
strides
,
dilations
,
math
.
floor
,
input_format
)
if
self
.
winograd
and
dilations
[
0
]
==
1
and
\
(
dilations
[
0
]
==
dilations
[
1
])
and
\
(
strides
[
0
]
==
1
)
and
(
strides
[
0
]
==
strides
[
1
]):
if
self
.
device
==
'gpu'
:
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
\
((
output_shape
[
2
]
+
1
)
/
2
)
return
filter_shape
[
0
]
==
3
and
\
filter_shape
[
0
]
==
filter_shape
[
1
]
and
\
(
16
*
filter_shape
[
2
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
3
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
width
<
OPENCL_IMAGE_MAX_SIZE
)
elif
self
.
device
==
'cpu'
:
return
filter_shape
[
2
]
==
3
and
\
filter_shape
[
2
]
==
filter_shape
[
3
]
and
\
filter_shape
[
0
]
>=
8
and
filter_shape
[
1
]
>=
8
return
False
def
convert_winograd_conv_filter_cpu
(
self
,
op
,
op_def
):
# Add filter
weight_tensor_name
=
op
.
name
+
'_weight:0'
weight_data
=
op
.
data
[
0
]
# OIHW
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
if
input_shape
[
2
]
>
16
and
input_shape
[
3
]
>
16
:
G
=
np
.
array
([
[
1.0
,
0.0
,
0.0
],
[
-
2.0
/
9
,
-
2.0
/
9
,
-
2.0
/
9
],
[
-
2.0
/
9
,
2.0
/
9
,
-
2.0
/
9
],
[
1.0
/
90
,
1.0
/
45
,
2.0
/
45
],
[
1.0
/
90
,
-
1.0
/
45
,
2.0
/
45
],
[
1.0
/
45
,
1.0
/
90
,
1.0
/
180
],
[
1.0
/
45
,
-
1.0
/
90
,
1.0
/
180
],
[
0.0
,
0.0
,
1.0
]
],
dtype
=
np
.
float32
)
new_shape
=
[
64
,
weight_data
.
shape
[
0
],
weight_data
.
shape
[
1
]]
# TOC
else
:
G
=
np
.
array
([
[
1.0
,
0.0
,
0.0
],
[
0.5
,
0.5
,
0.5
],
[
0.5
,
-
0.5
,
0.5
],
[
0.0
,
0.0
,
1.0
],
],
dtype
=
np
.
float32
)
new_shape
=
[
16
,
weight_data
.
shape
[
0
],
weight_data
.
shape
[
1
]]
# TOC
new_weight_value
=
G
.
dot
(
weight_data
).
dot
(
G
.
T
)
# [8, O, I, 8]
new_weight_value
=
new_weight_value
.
transpose
(
0
,
3
,
1
,
2
)
new_weight_value
=
new_weight_value
.
reshape
(
new_shape
)
self
.
add_tensor
(
weight_tensor_name
,
new_weight_value
)
op_def
.
input
.
extend
([
weight_tensor_name
])
winograd_transformed_arg
=
op_def
.
arg
.
add
()
winograd_transformed_arg
.
name
=
'is_filter_transformed'
winograd_transformed_arg
.
i
=
1
def
convert_winograd_conv_gpu
(
self
,
op
):
# Add filter
weight_tensor_name
=
op
.
name
+
'_weight:0'
self
.
add_tensor
(
weight_tensor_name
,
op
.
data
[
0
])
buffer_type
=
"WINOGRAD_FILTER"
filter_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
param
=
op
.
layer
.
convolution_param
paddings
,
strides
,
_
=
self
.
add_stride_pad_kernel_arg
(
param
,
None
)
filter_shape
=
np
.
asarray
(
op
.
data
[
0
].
shape
)
filter_shape
=
filter_shape
[[
2
,
3
,
0
,
1
]]
# OIHW -> HWOI
input_format
=
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]],
filter_shape
,
paddings
,
strides
,
[
1
,
1
],
math
.
floor
,
input_format
)
# Input transform
wt_op
=
mace_pb2
.
OperatorDef
()
arg
=
wt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
padding_arg
=
wt_op
.
arg
.
add
()
padding_arg
.
name
=
'padding_values'
padding_arg
.
ints
.
extend
(
paddings
)
wt_op
.
name
=
op
.
name
+
'_input_transform'
wt_op
.
type
=
'WinogradTransform'
wt_op
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
wt_output_name
=
wt_op
.
name
+
":0"
wt_op
.
output
.
extend
([
wt_output_name
])
wt_output_shape
=
mace_pb2
.
OutputShape
()
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
wt_op
.
output_shape
.
extend
([
wt_output_shape
])
# MatMul
matmul_op
=
mace_pb2
.
OperatorDef
()
arg
=
matmul_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
matmul_op
.
name
=
op
.
name
+
'_matmul'
matmul_op
.
type
=
'MatMul'
matmul_op
.
input
.
extend
([
filter_name
,
wt_output_name
])
matmul_output_name
=
matmul_op
.
name
+
":0"
matmul_op
.
output
.
extend
([
matmul_output_name
])
matmul_output_shape
=
mace_pb2
.
OutputShape
()
matmul_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
2
],
wt_output_width
,
1
])
matmul_op
.
output_shape
.
extend
([
matmul_output_shape
])
# Inverse transform
iwt_op
=
mace_pb2
.
OperatorDef
()
arg
=
iwt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
batch_arg
=
iwt_op
.
arg
.
add
()
batch_arg
.
name
=
'batch'
batch_arg
.
i
=
output_shape
[
0
]
height_arg
=
iwt_op
.
arg
.
add
()
height_arg
.
name
=
'height'
height_arg
.
i
=
output_shape
[
1
]
width_arg
=
iwt_op
.
arg
.
add
()
width_arg
.
name
=
'width'
width_arg
.
i
=
output_shape
[
2
]
iwt_op
.
name
=
op
.
name
+
'_inverse_transform'
iwt_op
.
type
=
'WinogradInverseTransform'
iwt_op
.
input
.
extend
([
matmul_output_name
])
# Add Bias
if
len
(
op
.
data
)
==
2
:
bias_tensor_name
=
op
.
name
+
'_bias:0'
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
iwt_op
.
input
.
extend
([
output_name
])
final_op
=
op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
op
.
name
)
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
and
\
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
iwt_op
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
final_op
=
activation_op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
iwt_op
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
iwt_op
,
output_shape
)
self
.
net_def
.
op
.
extend
([
wt_op
,
matmul_op
,
iwt_op
])
def
convert_batchnorm
(
self
,
op
):
if
len
(
op
.
children
)
!=
1
or
op
.
children
[
0
].
type
!=
'Scale'
:
raise
Exception
(
'Now only support BatchNorm+Scale'
)
op_def
=
self
.
CommonConvert
(
op
,
'FoldedBatchNorm'
)
scale_op
=
op
.
children
[
0
]
epsilon_value
=
op
.
layer
.
batch_norm_param
.
eps
if
op
.
data
[
2
][
0
]
!=
0
:
mean_value
=
(
1.
/
op
.
data
[
2
][
0
])
*
op
.
data
[
0
]
var_value
=
(
1.
/
op
.
data
[
2
][
0
])
*
op
.
data
[
1
]
else
:
raise
RuntimeError
(
'scalar is zero.'
)
gamma_value
=
scale_op
.
data
[
0
]
beta_value
=
np
.
zeros_like
(
mean_value
)
if
len
(
scale_op
.
data
)
==
2
:
beta_value
=
scale_op
.
data
[
1
]
scale_value
=
((
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
gamma_value
).
reshape
(
-
1
)
offset_value
=
((
-
mean_value
*
scale_value
)
+
beta_value
).
reshape
(
-
1
)
input_names
=
[
op
.
name
+
'_scale:0'
,
op
.
name
+
'_offset:0'
]
self
.
add_tensor
(
input_names
[
0
],
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
offset_value
)
if
self
.
device
==
'gpu'
:
for
name
in
input_names
:
output_name
=
self
.
add_buffer_to_image
(
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
name
for
name
in
input_names
])
self
.
resolved_ops
.
add
(
op
.
name
)
self
.
resolved_ops
.
add
(
scale_op
.
name
)
final_op
=
scale_op
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
and
\
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
final_op
=
activation_op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
op_def
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_inner_product
(
self
,
op
):
param
=
op
.
layer
.
inner_product_param
try
:
if
param
.
axis
!=
1
or
param
.
transpose
:
raise
ValueError
(
'Do not support non-default axis and transpose '
'case for innner product'
)
except
AttributeError
:
pass
op_def
=
self
.
CommonConvert
(
op
,
'FC'
)
weight_tensor_name
=
op
.
name
+
'_weight:0'
if
op
.
data
[
0
].
ndim
not
in
[
2
,
4
]:
raise
ValueError
(
'Unexpected weigth ndim.'
)
if
op
.
data
[
0
].
ndim
==
4
and
list
(
op
.
data
[
0
].
shape
[:
2
])
!=
[
1
,
1
]:
raise
ValueError
(
'Do not support 4D weight with shape [1, 1, *, *]'
)
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
weight_data
=
op
.
data
[
0
].
reshape
(
-
1
,
op
.
data
[
0
].
shape
[
-
1
])
assert
weight_data
.
shape
[
1
]
==
(
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
])
if
self
.
device
!=
'cpu'
:
weight_data
=
weight_data
.
reshape
(
-
1
,
input_shape
[
3
],
input_shape
[
1
],
input_shape
[
2
])
weight_data
=
weight_data
.
transpose
((
0
,
2
,
3
,
1
)).
reshape
(
weight_data
.
shape
[
0
],
-
1
)
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
if
self
.
device
==
'gpu'
:
if
(
weight_data
.
shape
[
0
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
and
\
(
weight_data
.
shape
[
1
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
:
raise
Exception
(
'Mace gpu do not support FC with weight shape: '
+
str
(
weight_data
.
shape
))
if
input_shape
[
3
]
%
4
==
0
:
buffer_type
=
"WEIGHT_WIDTH"
else
:
buffer_type
=
"WEIGHT_HEIGHT"
weight_type_arg
=
op_def
.
arg
.
add
()
weight_type_arg
.
name
=
'weight_type'
weight_type_arg
.
i
=
buffer_type_map
[
'WEIGHT_HEIGHT'
]
if
buffer_type
==
"WEIGHT_HEIGHT"
and
\
(
weight_data
.
shape
[
0
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
:
raise
Exception
(
'Mace gpu do not support FC with weight shape: '
+
str
(
weight_data
.
shape
))
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
weight_tensor_name
])
# Add Bias
if
len
(
op
.
data
)
==
2
:
bias_tensor_name
=
op
.
name
+
'_bias:0'
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
bias_tensor_name
])
self
.
resolved_ops
.
add
(
op
.
name
)
input_format
=
'NCHW'
if
self
.
device
==
'cpu'
else
'NHWC'
output_shape
=
Shapes
.
fully_connected_shape
(
input_shape
,
weight_data
.
shape
,
input_format
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
final_op
=
op
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
final_op
=
activation_op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
op_def
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_pooling
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Pooling'
)
param
=
op
.
layer
.
pooling_param
paddings
,
strides
,
kernels
=
self
.
add_stride_pad_kernel_arg
(
param
,
op_def
)
if
param
.
pool
==
caffe_pb2
.
PoolingParameter
.
MAX
:
pooling_type
=
"MaxPool"
elif
param
.
pool
==
caffe_pb2
.
PoolingParameter
.
AVE
:
pooling_type
=
"AvgPool"
pooling_type_arg
=
op_def
.
arg
.
add
()
pooling_type_arg
.
name
=
'pooling_type'
pooling_type_arg
.
i
=
pooling_type_mode
[
pooling_type
]
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
if
param
.
HasField
(
'global_pooling'
)
and
param
.
global_pooling
:
kernels
=
[
input_shape
[
2
],
input_shape
[
3
]]
\
if
self
.
device
==
'cpu'
else
\
[
input_shape
[
1
],
input_shape
[
2
]]
kernel_arg
=
op_def
.
arg
.
add
()
kernel_arg
.
name
=
'kernels'
kernel_arg
.
ints
.
extend
(
kernels
)
if
self
.
device
!=
'cpu'
:
filter_shape
=
[
kernels
[
0
],
kernels
[
1
],
input_shape
[
3
],
input_shape
[
3
]
]
else
:
filter_shape
=
[
input_shape
[
1
],
input_shape
[
1
],
kernels
[
0
],
kernels
[
1
]
]
input_format
=
'NCHW'
if
self
.
device
==
'cpu'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
input_shape
,
filter_shape
,
paddings
,
strides
,
[
1
,
1
],
math
.
ceil
,
input_format
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_activation
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Activation'
)
activation_arg
=
op_def
.
arg
.
add
()
activation_arg
.
name
=
'activation'
activation_arg
.
s
=
activation_name_map
[
op
.
type
]
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_prelu
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Activation'
)
activation_arg
=
op_def
.
arg
.
add
()
activation_arg
.
name
=
'activation'
activation_arg
.
s
=
'PRELU'
alpha_tensor_name
=
op
.
name
+
'_alpha:0'
alpha_data
=
op
.
data
[
0
].
reshape
(
-
1
)
self
.
add_tensor
(
alpha_tensor_name
,
alpha_data
)
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
alpha_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
alpha_tensor_name
])
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_add
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'AddN'
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
output_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_concat
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Concat'
)
axis_arg
=
op_def
.
arg
.
add
()
axis_arg
.
name
=
'axis'
axis_arg
.
i
=
3
if
self
.
device
!=
'cpu'
else
1
try
:
if
op
.
layer
.
concat_param
.
HasFeild
(
'axis'
):
axis_arg
.
i
=
op
.
concat_param
.
axis
elif
op
.
layer
.
concat_param
.
HasFeild
(
'concat_dim'
):
axis_arg
.
i
=
op
.
concat_param
.
concat_dim
except
AttributeError
:
pass
input_shapes
=
[]
for
i
in
range
(
len
(
op
.
parents
)):
input_shapes
.
append
(
op
.
parents
[
i
].
output_shape_map
[
op
.
layer
.
bottom
[
i
]])
output_shape
=
Shapes
.
concat_shape
(
input_shapes
,
axis_arg
.
i
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_eltwise
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Eltwise'
)
param
=
op
.
layer
.
eltwise_param
type_arg
=
op_def
.
arg
.
add
()
type_arg
.
name
=
'type'
type_arg
.
i
=
math_type_mode
[
param
.
operation
]
if
len
(
param
.
coeff
)
>
0
:
coeff_arg
=
op_def
.
arg
.
add
()
coeff_arg
.
name
=
'coeff'
coeff_arg
.
floats
.
extend
(
list
(
param
.
coeff
))
output_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_slice
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Slice'
)
if
op
.
layer
.
HasField
(
'slice_param'
):
param
=
op
.
layer
.
slice_param
if
param
.
HasField
(
'axis'
)
and
param
.
axis
!=
1
:
raise
Exception
(
'Mace do not support slice with axis '
+
str
(
param
.
axis
))
if
len
(
param
.
slice_point
)
>
0
:
raise
Exception
(
'Mace do not support slice with slice_point'
)
axis_arg
=
op_def
.
arg
.
add
()
axis_arg
.
name
=
'axis'
axis_arg
.
i
=
3
if
self
.
device
!=
'cpu'
else
1
input_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
num_outputs
=
len
(
op
.
layer
.
top
)
input_channels
=
input_shape
[
axis_arg
.
i
]
if
(
input_channels
%
num_outputs
)
!=
0
or
\
(
self
.
device
==
'gpu'
and
((
input_channels
/
num_outputs
)
%
4
!=
0
)):
raise
Exception
(
'Mace do not support slice with input shape '
+
str
(
input_shape
)
+
' and number of output '
+
str
(
num_outputs
))
input_format
=
'NCHW'
if
self
.
device
==
'cpu'
else
'NHWC'
output_shape
=
Shapes
.
slice_shape
(
input_shape
,
num_outputs
,
input_format
)
for
i
in
range
(
len
(
op
.
layer
.
top
)):
op
.
output_shape_map
[
op
.
layer
.
top
[
i
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
'_'
+
str
(
i
)
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_normal_op
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
op
.
type
)
output_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_reshape
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Reshape'
)
input_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
output_shape
=
input_shape
shape_param
=
np
.
asarray
(
op
.
layer
.
reshape_param
.
shape
.
dim
)
for
i
in
range
(
len
(
shape_param
)):
if
shape_param
[
i
]
!=
0
:
output_shape
[
i
]
=
shape_param
[
i
]
shape_arg
=
op_def
.
arg
.
add
()
shape_arg
.
name
=
'shape'
shape_arg
.
ints
.
extend
(
output_shape
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_proposal_op
(
self
,
op
):
assert
self
.
device
==
'cpu'
op_def
=
self
.
CommonConvert
(
op
,
op
.
type
)
if
op
.
layer
.
HasField
(
'proposal_param'
):
proposal_param
=
op
.
layer
.
proposal_param
feat_stride_arg
=
op_def
.
arg
.
add
()
feat_stride_arg
.
name
=
'feat_stride'
feat_stride_arg
.
i
=
proposal_param
.
feat_stride
scales_arg
=
op_def
.
arg
.
add
()
scales_arg
.
name
=
'scales'
scales_arg
.
ints
.
extend
(
list
(
proposal_param
.
scales
))
ratios_arg
=
op_def
.
arg
.
add
()
ratios_arg
.
name
=
'ratios'
ratios_arg
.
floats
.
extend
(
list
(
proposal_param
.
ratios
))
output_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_psroi_align
(
self
,
op
):
assert
self
.
device
==
'cpu'
op_def
=
self
.
CommonConvert
(
op
,
op
.
type
)
if
op
.
layer
.
HasField
(
'psroi_align_param'
):
psroi_align_param
=
op
.
layer
.
psroi_align_param
spatial_scale_arg
=
op_def
.
arg
.
add
()
spatial_scale_arg
.
name
=
'spatial_scale'
spatial_scale_arg
.
f
=
psroi_align_param
.
spatial_scale
output_dim_arg
=
op_def
.
arg
.
add
()
output_dim_arg
.
name
=
'output_dim'
output_dim_arg
.
i
=
psroi_align_param
.
output_dim
group_size_arg
=
op_def
.
arg
.
add
()
group_size_arg
.
name
=
'group_size'
group_size_arg
.
i
=
psroi_align_param
.
group_size
output_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
replace_in_out_name
(
self
,
input_names
,
output_names
):
in_names
=
set
([
input_name
+
":0"
for
input_name
in
input_names
])
out_names
=
set
([
output_name
+
":0"
for
output_name
in
output_names
])
for
op
in
self
.
net_def
.
op
:
for
i
in
range
(
len
(
op
.
input
)):
if
op
.
input
[
i
]
in
in_names
:
op
.
input
[
i
]
=
MACE_INPUT_NODE_NAME
+
'_'
+
op
.
input
[
i
]
if
op
.
input
[
i
]
in
out_names
:
op
.
input
[
i
]
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
op
.
input
[
i
]
for
i
in
range
(
len
(
op
.
output
)):
if
op
.
output
[
i
]
in
in_names
:
op
.
output
[
i
]
=
MACE_INPUT_NODE_NAME
+
'_'
+
op
.
output
[
i
]
if
op
.
output
[
i
]
in
out_names
:
op
.
output
[
i
]
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
op
.
output
[
i
]
def
add_input_op_shape
(
self
,
input_nodes
,
input_shapes
):
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
for
i
in
range
(
len
(
input_nodes
)):
input_op
=
self
.
ops_map
[
input_nodes
[
i
]]
input_shape
=
input_shapes
[
i
]
if
self
.
device
!=
'cpu'
else
\
[
input_shapes
[
i
][
0
],
input_shapes
[
i
][
3
],
input_shapes
[
i
][
1
],
input_shapes
[
i
][
2
]]
if
input_op
.
layer
is
not
None
:
input_op
.
output_shape_map
[
input_op
.
layer
.
top
[
0
]]
=
input_shape
else
:
input_op
.
output_shape_map
[
input_op
.
name
]
=
input_shape
def
add_cpu_input_transform
(
self
,
names
):
for
name
in
names
:
new_input_name
=
MACE_INPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
name
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
dims_arg
.
ints
.
extend
([
0
,
3
,
1
,
2
])
# NHWC -> NCHW
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
input_op
=
self
.
ops_map
[
name
]
if
input_op
.
layer
is
not
None
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
layer
.
top
[
0
]]
else
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
name
]
self
.
add_output_shape
(
op_def
,
output_shape
)
def
add_cpu_output_transform
(
self
,
names
):
for
name
in
names
:
output_name
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
dims_arg
.
ints
.
extend
([
0
,
2
,
3
,
1
])
# NCHW -> NHWC
input_op
=
self
.
ops_map
[
name
]
if
input_op
.
layer
is
not
None
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
layer
.
top
[
0
]]
else
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
name
]
self
.
add_output_shape
(
op_def
,
[
output_shape
[
0
],
output_shape
[
2
],
output_shape
[
3
],
output_shape
[
1
]])
def
convert
(
self
,
input_nodes
,
input_shapes
,
output_nodes
):
assert
self
.
ops
[
0
].
type
==
'Input'
self
.
add_input_op_shape
(
input_nodes
,
input_shapes
)
if
self
.
device
==
'gpu'
:
self
.
add_gpu_input_transform
(
input_nodes
)
if
self
.
device
==
'cpu'
:
self
.
add_cpu_input_transform
(
input_nodes
)
for
op
in
self
.
ops
:
if
op
.
name
in
self
.
resolved_ops
:
continue
if
op
.
type
==
'Input'
:
self
.
resolved_ops
.
add
(
op
.
name
)
elif
op
.
type
==
'Convolution'
:
if
self
.
device
==
'gpu'
and
self
.
check_winograd_conv
(
op
):
self
.
convert_winograd_conv_gpu
(
op
)
else
:
self
.
convert_conv2d
(
op
)
elif
op
.
type
==
'BatchNorm'
:
self
.
convert_batchnorm
(
op
)
elif
op
.
type
==
'InnerProduct'
:
self
.
convert_inner_product
(
op
)
elif
op
.
type
==
'Pooling'
:
self
.
convert_pooling
(
op
)
elif
op
.
type
==
'PReLU'
:
self
.
convert_prelu
(
op
)
elif
op
.
type
in
[
'ReLU'
,
'Sigmoid'
,
'TanH'
]:
self
.
convert_activation
(
op
)
elif
op
.
type
==
'Add'
:
self
.
convert_add
(
op
)
elif
op
.
type
==
'Concat'
:
self
.
convert_concat
(
op
)
elif
op
.
type
==
'Eltwise'
:
self
.
convert_eltwise
(
op
)
elif
op
.
type
==
'Slice'
:
self
.
convert_slice
(
op
)
elif
op
.
type
==
'Reshape'
:
self
.
convert_reshape
(
op
)
elif
op
.
type
==
'Proposal'
:
self
.
convert_proposal_op
(
op
)
elif
op
.
type
==
'PSROIAlign'
:
self
.
convert_psroi_align
(
op
)
elif
op
.
type
in
[
'Softmax'
]:
self
.
convert_normal_op
(
op
)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
if
self
.
device
==
'gpu'
:
self
.
add_gpu_output_transform
(
output_nodes
)
if
self
.
device
==
'cpu'
:
self
.
add_cpu_output_transform
(
output_nodes
)
for
op
in
self
.
ops
:
if
op
.
name
not
in
self
.
resolved_ops
:
print
'Unresolve Op: %s with type %s'
%
(
op
.
name
,
op
.
type
)
def
convert_to_mace_pb
(
model_file
,
weight_file
,
input_node_str
,
input_shape_str
,
output_node_str
,
data_type
,
device
,
winograd
):
net_def
=
mace_pb2
.
NetDef
()
dt
=
data_type_map
[
data_type
]
caffe_net
=
caffe_pb2
.
NetParameter
()
with
open
(
model_file
,
"r"
)
as
f
:
google
.
protobuf
.
text_format
.
Merge
(
str
(
f
.
read
()),
caffe_net
)
weights
=
caffe_pb2
.
NetParameter
()
with
open
(
weight_file
,
"rb"
)
as
f
:
weights
.
MergeFromString
(
f
.
read
())
input_nodes
=
[
x
for
x
in
input_node_str
.
split
(
','
)]
input_shapes
=
[]
if
input_shape_str
!=
""
:
input_shape_strs
=
[
x
for
x
in
input_shape_str
.
split
(
':'
)]
for
shape_str
in
input_shape_strs
:
input_shapes
.
extend
([[
int
(
x
)
for
x
in
shape_str
.
split
(
','
)]])
output_nodes
=
[
x
for
x
in
output_node_str
.
split
(
','
)]
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
converter
=
CaffeConverter
(
caffe_net
,
weights
,
net_def
,
dt
,
device
,
winograd
)
converter
.
convert
(
input_nodes
,
input_shapes
,
output_nodes
)
print
"PB Converted."
if
device
==
'gpu'
:
print
"start optimize memory."
memory_optimizer
.
optimize_gpu_memory
(
net_def
)
print
"Memory optimization done."
elif
device
==
'cpu'
:
print
"start optimize memory."
memory_optimizer
.
optimize_cpu_memory
(
net_def
)
print
"Memory optimization done."
return
net_def
mace/python/tools/convert_util.py
浏览文件 @
3e82ad67
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
tensorflow
as
tf
from
mace.proto
import
mace_pb2
...
...
@@ -40,3 +41,8 @@ def tf_dtype_2_mace_dtype(tf_dtype):
if
not
mace_dtype
:
raise
Exception
(
"Not supported tensorflow dtype: "
+
tf_dtype
)
return
mace_dtype
def
mace_check
(
condition
,
msg
):
if
not
condition
:
raise
Exception
(
msg
)
mace/python/tools/converter.py
浏览文件 @
3e82ad67
...
...
@@ -16,7 +16,16 @@ import argparse
import
sys
import
hashlib
import
os.path
from
mace.proto
import
mace_pb2
from
mace.python.tools
import
tf_dsp_converter_lib
from
mace.python.tools
import
memory_optimizer
from
mace.python.tools
import
source_converter_lib
from
mace.python.tools.converter_tool
import
base_converter
as
cvt
from
mace.python.tools.converter_tool
import
tensorflow_converter
from
mace.python.tools.converter_tool
import
caffe_converter
from
mace.python.tools.converter_tool
import
transformer
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
# --output quantized_test_dsp.pb \
...
...
@@ -25,6 +34,12 @@ from mace.python.tools import source_converter_lib
FLAGS
=
None
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
device_type_map
=
{
'cpu'
:
mace_pb2
.
CPU
,
'gpu'
:
mace_pb2
.
GPU
,
'dsp'
:
mace_pb2
.
HEXAGON
}
def
file_checksum
(
fname
):
hash_func
=
hashlib
.
sha256
()
...
...
@@ -34,6 +49,10 @@ def file_checksum(fname):
return
hash_func
.
hexdigest
()
def
parse_int_array_from_str
(
ints_str
):
return
[
int
(
int_str
)
for
int_str
in
ints_str
.
split
(
','
)]
def
main
(
unused_args
):
if
not
os
.
path
.
isfile
(
FLAGS
.
model_file
):
print
(
"Input graph file '"
+
FLAGS
.
model_file
+
"' does not exist!"
)
...
...
@@ -59,27 +78,64 @@ def main(unused_args):
(
weight_checksum
,
FLAGS
.
weight_checksum
))
sys
.
exit
(
-
1
)
if
FLAGS
.
runtime
==
'dsp'
:
print
(
"DSP not support caffe model yet."
)
sys
.
exit
(
-
1
)
if
FLAGS
.
platform
not
in
[
'tensorflow'
,
'caffe'
]:
print
(
"platform %s is not supported."
%
FLAGS
.
platform
)
sys
.
exit
(
-
1
)
if
FLAGS
.
runtime
not
in
[
'cpu'
,
'gpu'
,
'dsp'
]:
print
(
"runtime %s is not supported."
%
FLAGS
.
runtime
)
sys
.
exit
(
-
1
)
from
mace.python.tools
import
caffe_converter_lib
output_graph_def
=
caffe_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
weight_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
elif
FLAGS
.
platform
==
'tensorflow'
:
if
FLAGS
.
runtime
==
'dsp'
:
from
mace.python.tools
import
tf_dsp_converter_lib
if
FLAGS
.
runtime
==
'dsp'
:
if
FLAGS
.
platform
==
'tensorflow'
:
output_graph_def
=
tf_dsp_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
output_node
,
FLAGS
.
dsp_mode
)
else
:
from
mace.python.tools
import
tf_converter_lib
output_graph_def
=
tf_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
print
(
"%s does not support dsp runtime yet."
%
FLAGS
.
platform
)
sys
.
exit
(
-
1
)
else
:
option
=
cvt
.
ConverterOption
()
option
.
data_type
=
data_type_map
[
FLAGS
.
data_type
]
option
.
device
=
device_type_map
[
FLAGS
.
runtime
]
option
.
winograd_enabled
=
bool
(
FLAGS
.
winograd
)
input_node_names
=
FLAGS
.
input_node
.
split
(
','
)
input_node_shapes
=
FLAGS
.
input_shape
.
split
(
':'
)
if
len
(
input_node_names
)
!=
len
(
input_node_shapes
):
raise
Exception
(
'input node count and shape count do not match.'
)
for
i
in
xrange
(
len
(
input_node_names
)):
input_node
=
cvt
.
NodeInfo
()
input_node
.
name
=
input_node_names
[
i
]
input_node
.
shape
=
parse_int_array_from_str
(
FLAGS
.
input_shape
)
option
.
add_input_node
(
input_node
)
output_node_names
=
FLAGS
.
output_node
.
split
(
','
)
for
i
in
xrange
(
len
(
output_node_names
)):
output_node
=
cvt
.
NodeInfo
()
output_node
.
name
=
output_node_names
[
i
]
option
.
add_output_node
(
output_node
)
print
(
"Convert model to mace model."
)
if
FLAGS
.
platform
==
'tensorflow'
:
converter
=
tensorflow_converter
.
TensorflowConverter
(
option
,
FLAGS
.
model_file
)
# noqa
elif
FLAGS
.
platform
==
'caffe'
:
converter
=
caffe_converter
.
CaffeConverter
(
option
,
FLAGS
.
model_file
,
FLAGS
.
weight_file
)
output_graph_def
=
converter
.
run
()
print
(
"Transform model to one that can better run on device."
)
# TODO(liuqi/liyin): transform gpu/cpu and merge their ops
mace_transformer
=
transformer
.
Transformer
(
option
,
output_graph_def
)
output_graph_def
=
mace_transformer
.
run
()
print
"start optimize memory."
if
FLAGS
.
runtime
==
'gpu'
:
memory_optimizer
.
optimize_gpu_memory
(
output_graph_def
)
elif
FLAGS
.
runtime
==
'cpu'
:
memory_optimizer
.
optimize_cpu_memory
(
output_graph_def
)
print
"Memory optimization done."
if
FLAGS
.
output_type
==
'source'
:
source_converter_lib
.
convert_to_source
(
...
...
mace/python/tools/converter_tool/__init__.py
0 → 100644
浏览文件 @
3e82ad67
mace/python/tools/converter_tool/base_converter.py
0 → 100644
浏览文件 @
3e82ad67
from
enum
import
Enum
from
mace.proto
import
mace_pb2
class
DataFormat
(
Enum
):
NHWC
=
0
NCHW
=
1
class
FilterFormat
(
Enum
):
HWIO
=
0
OIHW
=
1
HWOI
=
2
class
PaddingMode
(
Enum
):
VALID
=
0
SAME
=
1
FULL
=
2
class
PoolingType
(
Enum
):
AVG
=
1
MAX
=
2
class
ActivationType
(
Enum
):
NOOP
=
0
RELU
=
1
RELUX
=
2
PRELU
=
3
TANH
=
4
SIGMOID
=
5
class
EltwiseType
(
Enum
):
SUM
=
0
SUB
=
1
PROD
=
2
DIV
=
3
MIN
=
4
MAX
=
5
NEG
=
6
ABS
=
7
SQR_DIFF
=
8
POW
=
9
MaceSupportedOps
=
[
'Activation'
,
'AddN'
,
'BatchNorm'
,
'BatchToSpaceND'
,
'BiasAdd'
,
'ChannelShuffle'
,
'Concat'
,
'Conv2D'
,
'Deconv2D'
,
'DepthToSpace'
,
'DepthwiseConv2d'
,
'Dequantize'
,
'Eltwise'
,
'FoldedBatchNorm'
,
'FullyConnected'
,
'LocalResponseNorm'
,
'MatMul'
,
'Pad'
,
'Pooling'
,
'Proposal'
,
'PSROIAlign'
,
'Quantize'
,
'Requantize'
,
'Reshape'
,
'ResizeBilinear'
,
'Slice'
,
'Softmax'
,
'SpaceToBatchND'
,
'SpaceToDepth'
,
'Transpose'
,
'WinogradInverseTransform'
,
'WinogradTransform'
,
]
MaceOp
=
Enum
(
'MaceOp'
,
[(
op
,
op
)
for
op
in
MaceSupportedOps
],
type
=
str
)
class
MaceKeyword
(
object
):
# node related str
mace_input_node_name
=
'mace_input_node'
mace_output_node_name
=
'mace_output_node'
mace_buffer_type
=
'buffer_type'
mace_mode
=
'mode'
mace_buffer_to_image
=
'BufferToImage'
mace_image_to_buffer
=
'ImageToBuffer'
# arg related str
mace_padding_str
=
'padding'
mace_padding_values_str
=
'padding_values'
mace_strides_str
=
'strides'
mace_dilations_str
=
'dilations'
mace_pooling_type_str
=
'pooling_type'
mace_global_pooling_str
=
'global_pooling'
mace_kernel_str
=
'kernels'
mace_data_format_str
=
'data_format'
mace_filter_format_str
=
'filter_format'
mace_element_type_str
=
'type'
mace_activation_type_str
=
'activation'
mace_activation_max_limit_str
=
'max_limit'
mace_resize_size_str
=
'size'
mace_batch_to_space_crops_str
=
'crops'
mace_paddings_str
=
'paddings'
mace_align_corners_str
=
'align_corners'
mace_space_batch_block_shape_str
=
'block_shape'
mace_space_depth_block_size_str
=
'block_size'
mace_constant_value_str
=
'constant_value'
mace_dims_str
=
'dims'
mace_axis_str
=
'axis'
mace_shape_str
=
'shape'
mace_winograd_filter_transformed
=
'is_filter_transformed'
class
ConverterInterface
(
object
):
"""Base class for converting external models to mace models."""
def
run
(
self
):
raise
NotImplementedError
(
'run'
)
class
NodeInfo
(
object
):
"""A class for describing node information"""
def
__init__
(
self
):
self
.
_name
=
None
self
.
_shape
=
[]
@
property
def
name
(
self
):
return
self
.
_name
@
property
def
shape
(
self
):
return
self
.
_shape
@
name
.
setter
def
name
(
self
,
name
):
self
.
_name
=
name
@
shape
.
setter
def
shape
(
self
,
shape
):
self
.
_shape
=
shape
def
__str__
(
self
):
return
'%s %s'
%
(
self
.
_name
,
str
(
self
.
_shape
))
class
ConverterOption
(
object
):
"""A class for specifying options passed to converter tool"""
def
__init__
(
self
):
self
.
_input_nodes
=
{}
self
.
_output_nodes
=
{}
self
.
_data_type
=
mace_pb2
.
DT_FLOAT
self
.
_device
=
mace_pb2
.
CPU
self
.
_winograd_enabled
=
False
@
property
def
input_nodes
(
self
):
return
self
.
_input_nodes
@
property
def
output_nodes
(
self
):
return
self
.
_output_nodes
@
property
def
data_type
(
self
):
return
self
.
_data_type
@
property
def
device
(
self
):
return
self
.
_device
@
property
def
winograd_enabled
(
self
):
return
self
.
_winograd_enabled
@
input_nodes
.
setter
def
input_nodes
(
self
,
input_nodes
):
for
node
in
input_nodes
:
self
.
_input_nodes
[
node
.
name
]
=
node
def
add_input_node
(
self
,
input_node
):
self
.
_input_nodes
[
input_node
.
name
]
=
input_node
@
output_nodes
.
setter
def
output_nodes
(
self
,
output_nodes
):
for
node
in
output_nodes
:
self
.
output_nodes
[
node
.
name
]
=
node
def
add_output_node
(
self
,
output_node
):
self
.
_output_nodes
[
output_node
.
name
]
=
output_node
@
data_type
.
setter
def
data_type
(
self
,
data_type
):
self
.
_data_type
=
data_type
@
device
.
setter
def
device
(
self
,
device
):
self
.
_device
=
device
@
winograd_enabled
.
setter
def
winograd_enabled
(
self
,
winograd_enabled
):
self
.
_winograd_enabled
=
winograd_enabled
class
ConverterUtil
(
object
):
@
staticmethod
def
get_arg
(
op
,
arg_name
):
for
arg
in
op
.
arg
:
if
arg
.
name
==
arg_name
:
return
arg
return
None
@
staticmethod
def
add_data_format_arg
(
op
,
data_format
):
data_format_arg
=
op
.
arg
.
add
()
data_format_arg
.
name
=
MaceKeyword
.
mace_data_format_str
data_format_arg
.
i
=
data_format
.
value
@
staticmethod
def
data_format
(
op
):
arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_data_format_str
)
if
arg
is
None
:
return
None
elif
arg
.
i
==
DataFormat
.
NHWC
.
value
:
return
DataFormat
.
NHWC
elif
arg
.
i
==
DataFormat
.
NCHW
.
value
:
return
DataFormat
.
NCHW
else
:
return
None
@
staticmethod
def
set_filter_format
(
net
,
filter_format
):
arg
=
net
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_filter_format_str
arg
.
i
=
filter_format
.
value
@
staticmethod
def
filter_format
(
net
):
arg
=
ConverterUtil
.
get_arg
(
net
,
MaceKeyword
.
mace_filter_format_str
)
if
arg
is
None
:
return
None
elif
arg
.
i
==
FilterFormat
.
HWIO
.
value
:
return
FilterFormat
.
HWIO
elif
arg
.
i
==
FilterFormat
.
HWOI
.
value
:
return
FilterFormat
.
HWOI
elif
arg
.
i
==
FilterFormat
.
OIHW
.
value
:
return
FilterFormat
.
OIHW
else
:
return
None
mace/python/tools/converter_tool/caffe_converter.py
0 → 100644
浏览文件 @
3e82ad67
import
math
import
numpy
as
np
import
google.protobuf.text_format
from
mace.proto
import
mace_pb2
from
mace.third_party.caffe
import
caffe_pb2
from
mace.python.tools.converter_tool
import
base_converter
from
mace.python.tools.converter_tool
import
shape_inference
from
mace.python.tools.converter_tool.base_converter
import
PoolingType
from
mace.python.tools.converter_tool.base_converter
import
ActivationType
from
mace.python.tools.converter_tool.base_converter
import
EltwiseType
from
mace.python.tools.converter_tool.base_converter
import
DataFormat
from
mace.python.tools.converter_tool.base_converter
import
FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
from
mace.python.tools.converter_tool.base_converter
import
MaceKeyword
from
mace.python.tools.converter_tool.base_converter
import
ConverterUtil
from
mace.python.tools.convert_util
import
mace_check
caffe_group_str
=
'group'
caffe_kernel_h_str
=
'kernel_h'
caffe_kernel_w_str
=
'kernel_w'
caffe_stride_h_str
=
'stride_h'
caffe_stride_w_str
=
'stride_w'
caffe_pad_h_str
=
'pad_h'
caffe_pad_w_str
=
'pad_w'
class
CaffeOperator
(
object
):
"""CaffeOperator merges and provides both layer and weights information.
Layer records caffe layer proto, while blobs records the weight data in
format of numpy ndarray.
"""
def
__init__
(
self
):
self
.
_layer
=
None
self
.
_blobs
=
None
@
property
def
name
(
self
):
return
self
.
_layer
.
name
@
property
def
type
(
self
):
return
self
.
_layer
.
type
@
property
def
layer
(
self
):
return
self
.
_layer
@
property
def
blobs
(
self
):
return
self
.
_blobs
@
layer
.
setter
def
layer
(
self
,
layer
):
self
.
_layer
=
layer
@
blobs
.
setter
def
blobs
(
self
,
blobs
):
self
.
_blobs
=
[
self
.
blob_to_nparray
(
blob
)
for
blob
in
blobs
]
def
get_blob
(
self
,
index
):
mace_check
(
index
<
len
(
self
.
_blobs
),
"blob out of index"
)
return
self
.
_blobs
[
index
]
@
staticmethod
def
blob_to_nparray
(
blob
):
if
blob
.
num
!=
0
:
return
(
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
(
blob
.
num
,
blob
.
channels
,
blob
.
height
,
blob
.
width
)))
else
:
return
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
blob
.
shape
.
dim
)
class
CaffeNet
(
object
):
"""CaffeNet contains caffe operations. Output of each layer has unique
name as we replace duplicated output name with unique one, while keep
mace input/output name which user specifies unchanged."""
def
__init__
(
self
):
self
.
_ops
=
{}
self
.
_consumers
=
{}
# for in-place op, its input name is the same with output name,
# so we change the output name to an alias
self
.
_alias_op_output_name
=
{}
self
.
_used_op_output_name
=
set
()
@
property
def
ops
(
self
):
return
self
.
_ops
.
values
()
def
get_op
(
self
,
op_name
):
return
self
.
_ops
.
get
(
op_name
,
None
)
def
get_consumers
(
self
,
tensor_name
):
return
self
.
_consumers
.
get
(
tensor_name
,
[])
def
add_layer
(
self
,
layer
):
op
=
CaffeOperator
()
op
.
layer
=
layer
self
.
_ops
[
layer
.
name
]
=
op
# change op output name if it is an in-place op
layer
.
bottom
[:]
=
[
self
.
_alias_op_output_name
.
get
(
layer_input
,
layer_input
)
for
layer_input
in
layer
.
bottom
][:]
for
i
in
xrange
(
len
(
layer
.
top
)):
old_name
=
layer
.
top
[
i
]
if
layer
.
type
==
'Input'
:
new_name
=
old_name
else
:
idx
=
0
new_name
=
old_name
+
'#'
+
str
(
idx
)
while
new_name
in
self
.
_used_op_output_name
:
idx
+=
1
new_name
=
old_name
+
'#'
+
str
(
idx
)
layer
.
top
[
i
]
=
new_name
self
.
_alias_op_output_name
[
old_name
]
=
new_name
self
.
_used_op_output_name
.
update
([
new_name
])
for
input_tensor
in
layer
.
bottom
:
if
input_tensor
not
in
self
.
_consumers
:
self
.
_consumers
[
input_tensor
]
=
[]
self
.
_consumers
[
input_tensor
].
append
(
op
)
def
add_blob
(
self
,
weight
):
if
weight
.
name
in
self
.
_ops
:
op
=
self
.
_ops
[
weight
.
name
]
op
.
blobs
=
list
(
weight
.
blobs
)
class
CaffeConverter
(
base_converter
.
ConverterInterface
):
"""A class for convert caffe model to mace model."""
pooling_type_mode
=
{
caffe_pb2
.
PoolingParameter
.
AVE
:
PoolingType
.
AVG
,
caffe_pb2
.
PoolingParameter
.
MAX
:
PoolingType
.
MAX
}
eltwise_type
=
{
caffe_pb2
.
EltwiseParameter
.
PROD
:
EltwiseType
.
PROD
,
caffe_pb2
.
EltwiseParameter
.
SUM
:
EltwiseType
.
SUM
,
caffe_pb2
.
EltwiseParameter
.
MAX
:
EltwiseType
.
MAX
,
}
activation_type
=
{
'ReLU'
:
ActivationType
.
RELU
,
'PReLU'
:
ActivationType
.
PRELU
,
'TanH'
:
ActivationType
.
TANH
,
}
def
__init__
(
self
,
option
,
src_model_file
,
src_weight_file
):
self
.
_op_converters
=
{
'Input'
:
self
.
convert_nop
,
'Convolution'
:
self
.
convert_conv2d
,
'Eltwise'
:
self
.
convert_elementwise
,
'Add'
:
self
.
convert_add
,
'ReLU'
:
self
.
convert_activation
,
'TanH'
:
self
.
convert_activation
,
'Sigmoid'
:
self
.
convert_activation
,
'PReLU'
:
self
.
convert_activation
,
'Pooling'
:
self
.
convert_pooling
,
'Concat'
:
self
.
convert_concat
,
'Slice'
:
self
.
convert_slice
,
'Softmax'
:
self
.
convert_softmax
,
'InnerProduct'
:
self
.
convert_fully_connected
,
'BatchNorm'
:
self
.
convert_folded_batchnorm
,
}
self
.
_option
=
option
self
.
_mace_net_def
=
mace_pb2
.
NetDef
()
ConverterUtil
.
set_filter_format
(
self
.
_mace_net_def
,
FilterFormat
.
OIHW
)
self
.
_caffe_net
=
CaffeNet
()
self
.
_caffe_layers
=
caffe_pb2
.
NetParameter
()
caffe_weights
=
caffe_pb2
.
NetParameter
()
# parse prototxt
with
open
(
src_model_file
,
'rb'
)
as
f
:
google
.
protobuf
.
text_format
.
Merge
(
str
(
f
.
read
()),
self
.
_caffe_layers
)
self
.
filter_test_layers
(
self
.
_caffe_layers
)
for
layer
in
self
.
_caffe_layers
.
layer
:
self
.
_caffe_net
.
add_layer
(
layer
)
# parse model weight
with
open
(
src_weight_file
,
'rb'
)
as
f
:
caffe_weights
.
ParseFromString
(
f
.
read
())
self
.
filter_test_layers
(
caffe_weights
)
for
weight
in
caffe_weights
.
layer
:
self
.
_caffe_net
.
add_blob
(
weight
)
self
.
_skip_ops
=
[]
def
run
(
self
):
self
.
convert_ops
()
shape_inferer
=
shape_inference
.
ShapeInference
(
self
.
_mace_net_def
,
self
.
_option
.
input_nodes
.
values
())
shape_inferer
.
run
()
self
.
replace_output_tensor_name
()
return
self
.
_mace_net_def
@
staticmethod
def
replace_input_name
(
ops
,
src_name
,
dst_name
):
for
op
in
ops
:
for
i
in
xrange
(
len
(
op
.
input
)):
if
op
.
input
[
i
]
==
src_name
:
op
.
input
[
i
]
=
dst_name
def
replace_output_tensor_name
(
self
):
consumers
=
{}
for
op
in
self
.
_mace_net_def
.
op
:
for
input_name
in
op
.
input
:
if
input_name
not
in
consumers
:
consumers
[
input_name
]
=
[]
consumers
[
input_name
].
append
(
op
)
# replace the last op with same prefix name with the original top name
ops
=
[
op
for
op
in
self
.
_mace_net_def
.
op
]
ops
.
reverse
()
visited
=
set
()
for
op
in
ops
:
for
i
in
xrange
(
len
(
op
.
output
)):
original_output_name
=
op
.
output
[
i
].
split
(
'#'
)[
0
]
if
original_output_name
not
in
visited
:
self
.
replace_input_name
(
consumers
.
get
(
op
.
output
[
i
],
[]),
op
.
output
[
i
],
original_output_name
)
op
.
output
[
i
]
=
original_output_name
visited
.
update
([
original_output_name
])
# if user set op name as output node, replace it with op name
for
op
in
self
.
_mace_net_def
.
op
:
if
op
.
name
in
self
.
_option
.
output_nodes
:
if
len
(
op
.
output
)
>
0
:
self
.
replace_input_name
(
consumers
.
get
(
op
.
output
[
0
],
[]),
op
.
output
,
op
.
name
)
op
.
output
[
0
]
=
op
.
name
@
staticmethod
def
filter_test_layers
(
layers
):
phase_map
=
{
0
:
'train'
,
1
:
'test'
}
while
True
:
changed
=
False
for
layer
in
layers
.
layer
:
phase
=
'test'
if
len
(
layer
.
include
):
phase
=
phase_map
[
layer
.
include
[
0
].
phase
]
if
len
(
layer
.
exclude
):
phase
=
phase_map
[
layer
.
exclude
[
0
].
phase
]
if
phase
!=
'test'
or
layer
.
type
==
'Dropout'
:
print
(
"Remove layer %s (%s)"
%
(
layer
.
name
,
layer
.
type
))
layers
.
layer
.
remove
(
layer
)
changed
=
True
break
if
not
changed
:
break
@
staticmethod
def
add_stride_pad_kernel_arg
(
param
,
op_def
):
try
:
if
len
(
param
.
stride
)
>
1
or
len
(
param
.
kernel_size
)
>
1
or
len
(
param
.
pad
)
>
1
:
raise
Exception
(
'Mace does not support multiple stride/kernel_size/pad'
)
stride
=
[
param
.
stride
[
0
],
param
.
stride
[
0
]]
if
len
(
param
.
stride
)
else
[
1
,
1
]
pad
=
[
param
.
pad
[
0
]
*
2
,
param
.
pad
[
0
]
*
2
]
if
len
(
param
.
pad
)
else
[
0
,
0
]
kernel
=
[
param
.
kernel_size
[
0
],
param
.
kernel_size
[
0
]]
if
len
(
param
.
kernel_size
)
else
[
0
,
0
]
except
TypeError
:
stride
=
[
param
.
stride
,
param
.
stride
]
pad
=
[
param
.
pad
*
2
,
param
.
pad
*
2
]
kernel
=
[
param
.
kernel_size
,
param
.
kernel_size
]
if
param
.
HasField
(
caffe_stride_h_str
)
or
param
.
HasField
(
caffe_stride_w_str
):
stride
=
[
param
.
stride_h
,
param
.
stride_w
]
if
param
.
HasField
(
caffe_pad_h_str
)
or
param
.
HasField
(
caffe_pad_w_str
):
pad
=
[
param
.
pad_h
*
2
,
param
.
pad_w
*
2
]
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
MaceKeyword
.
mace_strides_str
strides_arg
.
ints
.
extend
(
stride
)
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_values_str
padding_arg
.
ints
.
extend
(
pad
)
if
op_def
.
type
==
MaceOp
.
Pooling
.
name
:
if
param
.
HasField
(
caffe_kernel_h_str
)
or
param
.
HasField
(
caffe_kernel_w_str
):
kernel
=
[
param
.
kernel_h
,
param
.
kernel_w
]
kernels_arg
=
op_def
.
arg
.
add
()
kernels_arg
.
name
=
MaceKeyword
.
mace_kernel_str
kernels_arg
.
ints
.
extend
(
kernel
)
if
param
.
HasField
(
'global_pooling'
):
global_pooling_arg
=
op_def
.
arg
.
add
()
global_pooling_arg
.
name
=
MaceKeyword
.
mace_global_pooling_str
global_pooling_arg
.
i
=
1
def
convert_ops
(
self
):
for
layer
in
self
.
_caffe_layers
.
layer
:
caffe_op
=
self
.
_caffe_net
.
get_op
(
layer
.
name
)
if
caffe_op
not
in
self
.
_skip_ops
:
mace_check
(
layer
.
type
in
self
.
_op_converters
,
"Mace does not support caffe op type %s yet"
%
layer
.
type
)
self
.
_op_converters
[
layer
.
type
](
caffe_op
)
def
add_tensor
(
self
,
name
,
shape
,
data_type
,
value
):
tensor
=
self
.
_mace_net_def
.
tensors
.
add
()
tensor
.
name
=
name
tensor
.
dims
.
extend
(
list
(
shape
))
tensor
.
data_type
=
data_type
tensor
.
float_data
.
extend
(
value
.
flat
)
def
convert_nop
(
self
,
layer
):
pass
def
convert_general_op
(
self
,
caffe_op
):
op
=
self
.
_mace_net_def
.
op
.
add
()
op
.
name
=
caffe_op
.
name
op
.
type
=
caffe_op
.
type
op
.
input
.
extend
(
caffe_op
.
layer
.
bottom
)
op
.
output
.
extend
(
caffe_op
.
layer
.
top
)
data_type_arg
=
op
.
arg
.
add
()
data_type_arg
.
name
=
'T'
data_type_arg
.
i
=
self
.
_option
.
data_type
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NCHW
)
return
op
def
convert_conv2d
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
param
=
caffe_op
.
layer
.
convolution_param
is_depthwise
=
False
if
param
.
HasField
(
caffe_group_str
):
mace_check
(
param
.
group
==
caffe_op
.
blob
[
0
].
shape
[
1
]
and
caffe_op
.
blob
[
0
].
shape
[
0
]
==
1
,
"Mace do not support group convolution yet"
)
is_depthwise
=
True
if
is_depthwise
:
op
.
type
=
MaceOp
.
DepthwiseConv2d
.
name
else
:
op
.
type
=
MaceOp
.
Conv2D
.
name
self
.
add_stride_pad_kernel_arg
(
param
,
op
)
# dilation is specific for convolution in caffe
dilations
=
[
1
,
1
]
if
len
(
param
.
dilation
)
>
0
:
dilation_arg
=
op
.
arg
.
add
()
dilation_arg
.
name
=
MaceKeyword
.
mace_dilations_str
if
len
(
param
.
dilation
)
==
1
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
0
]]
elif
len
(
param
.
dilation
)
==
2
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
1
]]
dilation_arg
.
ints
.
extend
(
dilations
)
filter_tensor_name
=
op
.
name
+
'_filter'
filter_data
=
caffe_op
.
blobs
[
0
]
self
.
add_tensor
(
filter_tensor_name
,
filter_data
.
shape
,
mace_pb2
.
DT_FLOAT
,
filter_data
)
op
.
input
.
extend
([
filter_tensor_name
])
if
len
(
caffe_op
.
blobs
)
==
2
:
bias_tensor_name
=
op
.
name
+
'_bias'
bias_data
=
caffe_op
.
blobs
[
1
]
self
.
add_tensor
(
bias_tensor_name
,
bias_data
.
shape
,
mace_pb2
.
DT_FLOAT
,
bias_data
)
op
.
input
.
extend
([
bias_tensor_name
])
def
convert_elementwise
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
param
=
caffe_op
.
layer
.
eltwise_param
op
.
type
=
MaceOp
.
Eltwise
.
name
type_arg
=
op
.
arg
.
add
()
type_arg
.
name
=
MaceKeyword
.
mace_element_type_str
type_arg
.
i
=
self
.
eltwise_type
[
param
.
operation
].
value
if
len
(
param
.
coeff
)
>
0
:
coeff_arg
=
op
.
arg
.
add
()
coeff_arg
.
name
=
'coeff'
coeff_arg
.
floats
.
extend
(
list
(
param
.
coeff
))
def
convert_add
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
op
.
type
=
MaceOp
.
AddN
.
name
def
convert_activation
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
op
.
type
=
MaceOp
.
Activation
.
name
type_arg
=
op
.
arg
.
add
()
type_arg
.
name
=
MaceKeyword
.
mace_activation_type_str
type_arg
.
s
=
self
.
activation_type
[
caffe_op
.
type
].
name
if
caffe_op
.
type
==
'PReLU'
:
alpha_tensor_name
=
caffe_op
.
name
+
'_alpha'
alpha_data
=
caffe_op
.
blobs
[
0
]
self
.
add_tensor
(
alpha_tensor_name
,
alpha_data
.
shape
,
mace_pb2
.
DT_FLOAT
,
alpha_data
)
op
.
input
.
extend
([
alpha_tensor_name
])
def
convert_folded_batchnorm
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
op
.
type
=
MaceOp
.
FoldedBatchNorm
.
name
scale_op
=
None
for
consumer
in
self
.
_caffe_net
.
get_consumers
(
caffe_op
.
layer
.
top
[
0
]):
if
consumer
.
type
==
'Scale'
:
scale_op
=
consumer
mace_check
(
scale_op
is
not
None
,
"batchnorm is not followed by scale"
)
self
.
_skip_ops
.
append
(
scale_op
)
epsilon_value
=
caffe_op
.
layer
.
batch_norm_param
.
eps
mace_check
(
caffe_op
.
blobs
[
2
][
0
]
!=
0
,
"batchnorm scalar is zero"
)
mean_value
=
(
1.
/
caffe_op
.
blobs
[
2
][
0
])
*
caffe_op
.
blobs
[
0
]
var_value
=
(
1.
/
caffe_op
.
blobs
[
2
][
0
])
*
caffe_op
.
blobs
[
1
]
gamma_value
=
scale_op
.
blobs
[
0
]
beta_value
=
np
.
zeros_like
(
mean_value
)
if
len
(
scale_op
.
blobs
)
==
2
:
beta_value
=
scale_op
.
blobs
[
1
]
scale_value
=
(
(
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
gamma_value
).
reshape
(
-
1
)
offset_value
=
((
-
mean_value
*
scale_value
)
+
beta_value
).
reshape
(
-
1
)
input_names
=
[
op
.
name
+
'_scale'
,
op
.
name
+
'_offset'
]
self
.
add_tensor
(
input_names
[
0
],
scale_value
.
shape
,
mace_pb2
.
DT_FLOAT
,
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
offset_value
.
shape
,
mace_pb2
.
DT_FLOAT
,
offset_value
)
op
.
input
.
extend
([
name
for
name
in
input_names
])
op
.
output
[:]
=
scale_op
.
layer
.
top
[:]
def
convert_pooling
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
param
=
caffe_op
.
layer
.
pooling_param
op
.
type
=
MaceOp
.
Pooling
.
name
self
.
add_stride_pad_kernel_arg
(
param
,
op
)
pooling_type_arg
=
op
.
arg
.
add
()
pooling_type_arg
.
name
=
MaceKeyword
.
mace_pooling_type_str
pooling_type_arg
.
i
=
self
.
pooling_type_mode
[
param
.
pool
].
value
def
convert_softmax
(
self
,
caffe_op
):
self
.
convert_general_op
(
caffe_op
)
def
convert_concat
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
param
=
caffe_op
.
layer
.
concat_param
op
.
type
=
MaceOp
.
Concat
.
name
axis_arg
=
op
.
arg
.
add
()
axis_arg
.
name
=
MaceKeyword
.
mace_axis_str
axis_arg
.
i
=
1
if
param
.
HasField
(
'axis'
):
axis_arg
.
i
=
param
.
axis
elif
param
.
HasField
(
'concat_dim'
):
axis_arg
.
i
=
param
.
concat_dim
mace_check
(
axis_arg
.
i
==
1
,
"only support concat at channel dimension"
)
def
convert_slice
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
op
.
type
=
MaceOp
.
Slice
.
name
if
caffe_op
.
layer
.
HasField
(
'slice_param'
):
param
=
caffe_op
.
layer
.
slice_param
mace_check
(
not
param
.
HasField
(
'axis'
)
or
param
.
axis
==
1
,
"Mace do not support slice with axis %d"
%
param
.
axis
)
mace_check
(
len
(
param
.
slice_point
)
==
0
,
"Mace do not support slice with slice_point"
)
axis_arg
=
op
.
arg
.
add
()
axis_arg
.
name
=
MaceKeyword
.
mace_axis_str
axis_arg
.
i
=
1
def
convert_fully_connected
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
param
=
caffe_op
.
layer
.
inner_product_param
op
.
type
=
MaceOp
.
FullyConnected
.
name
mace_check
(
param
.
axis
==
1
and
not
param
.
transpose
,
"Do not support non-default axis and transpose"
)
mace_check
(
caffe_op
.
blobs
[
0
].
ndim
in
[
2
,
4
],
"Unexpected fc weigth ndim."
)
if
caffe_op
.
blobs
[
0
].
ndim
==
4
:
mace_check
(
list
(
caffe_op
.
blobs
[
0
].
shape
[:
2
])
==
[
1
,
1
],
"Do not support 4D weight with shape [1, 1, *, *]"
)
weight_tensor_name
=
op
.
name
+
'_weight'
weight_data
=
caffe_op
.
blobs
[
0
].
reshape
(
param
.
num_output
,
-
1
)
self
.
add_tensor
(
weight_tensor_name
,
weight_data
.
shape
,
mace_pb2
.
DT_FLOAT
,
weight_data
)
op
.
input
.
extend
([
weight_tensor_name
])
if
len
(
caffe_op
.
blobs
)
==
2
:
bias_tensor_name
=
op
.
name
+
'_bias'
bias_data
=
caffe_op
.
blobs
[
1
]
self
.
add_tensor
(
bias_tensor_name
,
bias_data
.
shape
,
mace_pb2
.
DT_FLOAT
,
bias_data
)
op
.
input
.
extend
([
bias_tensor_name
])
mace/python/tools/converter_tool/shape_inference.py
0 → 100644
浏览文件 @
3e82ad67
import
math
import
numpy
as
np
from
mace.python.tools.converter_tool.transformer
import
Transformer
from
mace.python.tools.converter_tool.base_converter
import
DataFormat
from
mace.python.tools.converter_tool.base_converter
import
FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
from
mace.python.tools.converter_tool.base_converter
import
MaceKeyword
from
mace.python.tools.converter_tool.base_converter
import
ConverterUtil
from
mace.python.tools.convert_util
import
mace_check
class
ShapeInference
(
object
):
"""Currently we only use it to infer caffe shape, we use tensorflow engine
to infer tensorflow op shapes, since tensorflow has too many ops."""
def
__init__
(
self
,
net
,
input_nodes
):
self
.
_op_shape_inference
=
{
MaceOp
.
Conv2D
.
name
:
self
.
infer_shape_conv_pool_shape
,
MaceOp
.
Eltwise
.
name
:
self
.
infer_shape_general
,
MaceOp
.
FoldedBatchNorm
.
name
:
self
.
infer_shape_general
,
MaceOp
.
AddN
.
name
:
self
.
infer_shape_general
,
MaceOp
.
Activation
.
name
:
self
.
infer_shape_general
,
MaceOp
.
Pooling
.
name
:
self
.
infer_shape_conv_pool_shape
,
MaceOp
.
Concat
.
name
:
self
.
infer_shape_concat
,
MaceOp
.
Slice
.
name
:
self
.
infer_shape_slice
,
MaceOp
.
Softmax
.
name
:
self
.
infer_shape_general
,
MaceOp
.
FullyConnected
.
name
:
self
.
infer_shape_fully_connected
,
}
self
.
_net
=
net
self
.
_output_shape_cache
=
{}
for
input_node
in
input_nodes
:
input_shape
=
input_node
.
shape
[:]
# transpose input from NCHW to NHWC
Transformer
.
transpose_shape
(
input_shape
,
[
0
,
3
,
1
,
2
])
self
.
_output_shape_cache
[
input_node
.
name
]
=
input_shape
for
tensor
in
net
.
tensors
:
self
.
_output_shape_cache
[
tensor
.
name
]
=
list
(
tensor
.
dims
)
def
run
(
self
):
for
op
in
self
.
_net
.
op
:
mace_check
(
op
.
type
in
self
.
_op_shape_inference
,
"Mace does not support caffe op type %s yet"
%
op
.
type
)
self
.
_op_shape_inference
[
op
.
type
](
op
)
def
add_output_shape
(
self
,
op
,
shapes
):
mace_check
(
len
(
op
.
output
)
==
len
(
shapes
),
"Op %s (%s) output count is different from "
"output shape count"
%
(
op
.
name
,
op
.
type
))
for
i
in
xrange
(
len
(
shapes
)):
output_name
=
op
.
output
[
i
]
output_shape
=
op
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
shapes
[
i
])
self
.
_output_shape_cache
[
output_name
]
=
shapes
[
i
]
def
infer_shape_general
(
self
,
op
):
if
len
(
op
.
input
)
>
0
:
mace_check
(
op
.
input
[
0
]
in
self
.
_output_shape_cache
,
"%s does not exist"
%
op
.
input
[
0
])
input_shape
=
self
.
_output_shape_cache
[
op
.
input
[
0
]]
self
.
add_output_shape
(
op
,
[
input_shape
])
def
infer_shape_conv_pool_shape
(
self
,
op
):
input_shape
=
self
.
_output_shape_cache
[
op
.
input
[
0
]]
output_shape
=
np
.
zeros_like
(
input_shape
)
if
op
.
type
==
MaceOp
.
Pooling
:
filter_shape
=
list
(
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_kernel_str
).
ints
)
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
:
filter_shape
=
[
input_shape
[
1
],
input_shape
[
1
]]
+
filter_shape
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_global_pooling_str
)
\
is
not
None
:
filter_shape
[
2
]
=
input_shape
[
2
]
filter_shape
[
3
]
=
input_shape
[
3
]
else
:
# NHWC
filter_shape
=
filter_shape
+
[
input_shape
[
1
],
input_shape
[
1
]]
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_global_pooling_str
)
\
is
not
None
:
filter_shape
[
0
]
=
input_shape
[
1
]
filter_shape
[
1
]
=
input_shape
[
2
]
else
:
filter_shape
=
self
.
_output_shape_cache
[
op
.
input
[
1
]]
paddings
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_values_str
).
ints
# noqa
strides
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_strides_str
).
ints
dilations_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_dilations_str
)
if
dilations_arg
is
not
None
:
dilations
=
dilations_arg
.
ints
else
:
dilations
=
[
1
,
1
]
if
op
.
type
==
MaceOp
.
Pooling
:
round_func
=
math
.
ceil
else
:
round_func
=
math
.
floor
output_shape
[
0
]
=
input_shape
[
0
]
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
\
and
ConverterUtil
.
filter_format
(
self
.
_net
)
==
FilterFormat
.
OIHW
:
# noqa
# filter format: OIHW
output_shape
[
1
]
=
filter_shape
[
0
]
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
0
]
-
filter_shape
[
2
]
-
(
filter_shape
[
2
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
3
]
=
int
(
round_func
((
input_shape
[
3
]
+
paddings
[
1
]
-
filter_shape
[
3
]
-
(
filter_shape
[
3
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
else
:
mace_check
(
False
,
"Mace can only infer shape for"
" NCHW input and OIHW filter"
)
self
.
add_output_shape
(
op
,
[
output_shape
])
def
infer_shape_concat
(
self
,
op
):
output_shape
=
self
.
_output_shape_cache
[
op
.
input
[
0
]]
axis
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_axis_str
).
i
for
input_node
in
op
.
input
:
input_shape
=
self
.
_output_shape_cache
[
input_node
]
output_shape
[
axis
]
+=
input_shape
[
axis
]
self
.
add_output_shape
(
op
,
[
output_shape
])
def
infer_shape_slice
(
self
,
op
):
output_shape
=
self
.
_output_shape_cache
[
op
.
input
[
0
]]
axis
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_axis_str
).
i
output_shape
[
axis
]
/=
len
(
op
.
output
)
output_shapes
=
[]
for
_
in
op
.
output
:
output_shapes
.
append
(
output_shape
)
self
.
add_output_shape
(
op
,
output_shapes
)
def
infer_shape_fully_connected
(
self
,
op
):
input_shape
=
self
.
_output_shape_cache
[
op
.
input
[
0
]]
weight_shape
=
self
.
_output_shape_cache
[
op
.
input
[
1
]]
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
:
output_shape
=
[
input_shape
[
0
],
weight_shape
[
0
],
1
,
1
]
else
:
mace_check
(
False
,
"format %s is not supported"
%
ConverterUtil
.
data_format
(
op
))
self
.
add_output_shape
(
op
,
[
output_shape
])
mace/python/tools/converter_tool/tensorflow_converter.py
0 → 100644
浏览文件 @
3e82ad67
import
math
import
numpy
as
np
import
tensorflow
as
tf
from
mace.proto
import
mace_pb2
from
mace.python.tools.converter_tool
import
base_converter
from
mace.python.tools.converter_tool.base_converter
import
PoolingType
from
mace.python.tools.converter_tool.base_converter
import
PaddingMode
from
mace.python.tools.converter_tool.base_converter
import
ActivationType
from
mace.python.tools.converter_tool.base_converter
import
EltwiseType
from
mace.python.tools.converter_tool.base_converter
import
DataFormat
from
mace.python.tools.converter_tool.base_converter
import
FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
from
mace.python.tools.converter_tool.base_converter
import
MaceKeyword
from
mace.python.tools.converter_tool.base_converter
import
ConverterUtil
from
mace.python.tools.convert_util
import
mace_check
from
tensorflow.core.framework
import
tensor_shape_pb2
tf_padding_str
=
'padding'
tf_strides_str
=
'strides'
tf_dilations_str
=
'dilations'
tf_data_format_str
=
'data_format'
tf_kernel_str
=
'ksize'
tf_epsilon_str
=
'epsilon'
tf_align_corners
=
'align_corners'
tf_block_size
=
'block_size'
class
TensorflowConverter
(
base_converter
.
ConverterInterface
):
"""A class for convert tensorflow frozen model to mace model.
We use tensorflow engine to infer op output shapes, since they are of
too many types."""
padding_mode
=
{
'VALID'
:
PaddingMode
.
VALID
,
'SAME'
:
PaddingMode
.
SAME
,
'FULL'
:
PaddingMode
.
FULL
}
pooling_type_mode
=
{
'AvgPool'
:
PoolingType
.
AVG
,
'MaxPool'
:
PoolingType
.
MAX
}
eltwise_type
=
{
'Add'
:
EltwiseType
.
SUM
,
'Sub'
:
EltwiseType
.
SUB
,
'Mul'
:
EltwiseType
.
PROD
,
'Div'
:
EltwiseType
.
DIV
,
'Min'
:
EltwiseType
.
MIN
,
'Max'
:
EltwiseType
.
MAX
,
'Neg'
:
EltwiseType
.
NEG
,
'Abs'
:
EltwiseType
.
ABS
,
'RealDiv'
:
EltwiseType
.
DIV
,
'SquaredDifference'
:
EltwiseType
.
SQR_DIFF
,
'Pow'
:
EltwiseType
.
POW
}
activation_type
=
{
'Relu'
:
ActivationType
.
RELU
,
'Relu6'
:
ActivationType
.
RELUX
,
'Tanh'
:
ActivationType
.
TANH
,
'Sigmoid'
:
ActivationType
.
SIGMOID
}
def
__init__
(
self
,
option
,
src_model_file
):
self
.
_op_converters
=
{
'Conv2D'
:
self
.
convert_conv2d
,
'DepthwiseConv2dNative'
:
self
.
convert_conv2d
,
'Conv2DBackpropInput'
:
self
.
convert_conv2d
,
'BiasAdd'
:
self
.
convert_biasadd
,
'Add'
:
self
.
convert_add
,
'Sub'
:
self
.
convert_elementwise
,
'Mul'
:
self
.
convert_elementwise
,
'Div'
:
self
.
convert_elementwise
,
'Min'
:
self
.
convert_elementwise
,
'Max'
:
self
.
convert_elementwise
,
'Neg'
:
self
.
convert_elementwise
,
'Abs'
:
self
.
convert_elementwise
,
'RealDiv'
:
self
.
convert_elementwise
,
'SquaredDifference'
:
self
.
convert_elementwise
,
'Pow'
:
self
.
convert_elementwise
,
'Relu'
:
self
.
convert_activation
,
'Relu6'
:
self
.
convert_activation
,
'Tanh'
:
self
.
convert_activation
,
'Sigmoid'
:
self
.
convert_activation
,
'FusedBatchNorm'
:
self
.
convert_fused_batchnorm
,
'AvgPool'
:
self
.
convert_pooling
,
'MaxPool'
:
self
.
convert_pooling
,
'Squeeze'
:
self
.
convert_identity
,
'Reshape'
:
self
.
convert_reshape
,
'Shape'
:
self
.
convert_nop
,
'Softmax'
:
self
.
convert_softmax
,
'ResizeBilinear'
:
self
.
convert_resize_bilinear
,
'Placeholder'
:
self
.
convert_nop
,
'SpaceToBatchND'
:
self
.
convert_space_batch
,
'BatchToSpaceND'
:
self
.
convert_space_batch
,
'DepthToSpace'
:
self
.
convert_space_depth
,
'SpaceToDepth'
:
self
.
convert_space_depth
,
'Pad'
:
self
.
convert_pad
,
'ConcatV2'
:
self
.
convert_concat
,
'Mean'
:
self
.
convert_mean
,
# Const converter_tool should be placed at the end
'Const'
:
self
.
convert_tensor
,
}
self
.
_option
=
option
self
.
_mace_net_def
=
mace_pb2
.
NetDef
()
ConverterUtil
.
set_filter_format
(
self
.
_mace_net_def
,
FilterFormat
.
HWIO
)
tf_graph_def
=
tf
.
GraphDef
()
with
tf
.
gfile
.
Open
(
src_model_file
,
'rb'
)
as
f
:
tf_graph_def
.
ParseFromString
(
f
.
read
())
self
.
add_shape_info
(
tf_graph_def
)
with
tf
.
Session
()
as
session
:
with
session
.
graph
.
as_default
()
as
graph
:
tf
.
import_graph_def
(
tf_graph_def
,
name
=
''
)
self
.
_tf_graph
=
graph
self
.
_skip_tensor
=
set
()
def
run
(
self
):
with
tf
.
Session
()
as
session
:
self
.
convert_ops
()
self
.
replace_input_output_tensor_name
()
return
self
.
_mace_net_def
def
replace_input_output_tensor_name
(
self
):
for
op
in
self
.
_mace_net_def
.
op
:
for
i
in
xrange
(
len
(
op
.
input
)):
if
op
.
input
[
i
][
-
2
:]
==
':0'
:
op_name
=
op
.
input
[
i
][:
-
2
]
if
op_name
in
self
.
_option
.
input_nodes
:
op
.
input
[
i
]
=
op_name
for
i
in
xrange
(
len
(
op
.
output
)):
if
op
.
output
[
i
][
-
2
:]
==
':0'
:
op_name
=
op
.
output
[
i
][:
-
2
]
if
op_name
in
self
.
_option
.
output_nodes
:
op
.
output
[
i
]
=
op_name
def
add_shape_info
(
self
,
tf_graph_def
):
for
node
in
tf_graph_def
.
node
:
if
node
.
name
in
self
.
_option
.
input_nodes
:
del
node
.
attr
[
'shape'
].
shape
.
dim
[:]
node
.
attr
[
'shape'
].
shape
.
dim
.
extend
([
tensor_shape_pb2
.
TensorShapeProto
.
Dim
(
size
=
i
)
for
i
in
self
.
_option
.
input_nodes
[
node
.
name
].
shape
])
@
staticmethod
def
get_scope
(
tensor_name
):
idx
=
tensor_name
.
rfind
(
'/'
)
if
idx
==
-
1
:
return
tensor_name
else
:
return
tensor_name
[:
idx
]
def
convert_ops
(
self
):
for
tf_op
in
self
.
_tf_graph
.
get_operations
():
mace_check
(
tf_op
.
type
in
self
.
_op_converters
,
"Mace does not support tensorflow op type %s yet"
%
tf_op
.
type
)
self
.
_op_converters
[
tf_op
.
type
](
tf_op
)
def
convert_tensor
(
self
,
tf_op
):
output_name
=
tf_op
.
outputs
[
0
].
name
if
output_name
not
in
self
.
_skip_tensor
:
tensor
=
self
.
_mace_net_def
.
tensors
.
add
()
tensor
.
name
=
tf_op
.
outputs
[
0
].
name
tf_tensor
=
tf_op
.
outputs
[
0
].
eval
()
tensor
.
dims
.
extend
(
list
(
tf_tensor
.
shape
))
tf_dt
=
tf_op
.
get_attr
(
'dtype'
)
if
tf_dt
==
tf
.
float32
:
tensor
.
data_type
=
mace_pb2
.
DT_FLOAT
tensor
.
float_data
.
extend
(
tf_tensor
.
astype
(
np
.
float32
).
flat
)
elif
tf_dt
==
tf
.
int32
:
tensor
.
data_type
=
mace_pb2
.
DT_INT32
tensor
.
int32_data
.
extend
(
tf_tensor
.
astype
(
np
.
int32
).
flat
)
else
:
mace_check
(
False
,
"Not supported tensor type: %s"
%
tf_dt
.
name
)
def
add_tensor
(
self
,
name
,
shape
,
data_type
,
value
):
tensor
=
self
.
_mace_net_def
.
tensors
.
add
()
tensor
.
name
=
name
tensor
.
dims
.
extend
(
list
(
shape
))
tensor
.
data_type
=
data_type
tensor
.
float_data
.
extend
(
value
.
flat
)
def
convert_nop
(
self
,
tf_op
):
pass
def
convert_general_op
(
self
,
tf_op
):
op
=
self
.
_mace_net_def
.
op
.
add
()
op
.
name
=
tf_op
.
name
op
.
type
=
tf_op
.
type
op
.
input
.
extend
([
tf_input
.
name
for
tf_input
in
tf_op
.
inputs
])
op
.
output
.
extend
([
tf_output
.
name
for
tf_output
in
tf_op
.
outputs
])
for
tf_output
in
tf_op
.
outputs
:
output_shape
=
op
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
tf_output
.
shape
.
as_list
())
op
.
output_type
.
append
(
self
.
_option
.
data_type
)
data_type_arg
=
op
.
arg
.
add
()
data_type_arg
.
name
=
'T'
data_type_arg
.
i
=
self
.
_option
.
data_type
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NHWC
)
return
op
def
convert_identity
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
'Identity'
def
convert_conv2d
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
if
tf_op
.
type
==
'DepthwiseConv2dNative'
:
op
.
type
=
MaceOp
.
DepthwiseConv2d
.
name
elif
tf_op
.
type
==
'Conv2DBackpropInput'
:
op
.
type
=
MaceOp
.
Deconv2D
.
name
else
:
op
.
type
=
MaceOp
.
Conv2D
.
name
padding_arg
=
op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_str
padding_arg
.
i
=
self
.
padding_mode
[
tf_op
.
get_attr
(
tf_padding_str
)].
value
strides_arg
=
op
.
arg
.
add
()
strides_arg
.
name
=
MaceKeyword
.
mace_strides_str
strides_arg
.
ints
.
extend
(
tf_op
.
get_attr
(
tf_strides_str
)[
1
:
3
])
if
op
.
type
!=
MaceOp
.
Deconv2D
.
name
:
dilation_arg
=
op
.
arg
.
add
()
dilation_arg
.
name
=
MaceKeyword
.
mace_dilations_str
dilation_arg
.
ints
.
extend
(
tf_op
.
get_attr
(
tf_dilations_str
)[
1
:
3
])
def
convert_elementwise
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Eltwise
.
name
type_arg
=
op
.
arg
.
add
()
type_arg
.
name
=
MaceKeyword
.
mace_element_type_str
type_arg
.
i
=
self
.
eltwise_type
[
tf_op
.
type
].
value
def
convert_biasadd
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
BiasAdd
.
name
def
convert_add
(
self
,
tf_op
):
if
len
(
tf_op
.
inputs
)
==
2
:
self
.
convert_elementwise
(
tf_op
)
else
:
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
AddN
.
name
def
convert_activation
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Activation
.
name
type_arg
=
op
.
arg
.
add
()
type_arg
.
name
=
MaceKeyword
.
mace_activation_type_str
type_arg
.
s
=
self
.
activation_type
[
tf_op
.
type
].
name
if
tf_op
.
type
==
'Relu6'
:
limit_arg
=
op
.
arg
.
add
()
limit_arg
.
name
=
MaceKeyword
.
mace_activation_max_limit_str
limit_arg
.
f
=
6.0
def
convert_fused_batchnorm
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
FoldedBatchNorm
.
name
gamma_value
=
tf_op
.
inputs
[
1
].
eval
().
astype
(
np
.
float32
)
beta_value
=
tf_op
.
inputs
[
2
].
eval
().
astype
(
np
.
float32
)
mean_value
=
tf_op
.
inputs
[
3
].
eval
().
astype
(
np
.
float32
)
var_value
=
tf_op
.
inputs
[
4
].
eval
().
astype
(
np
.
float32
)
epsilon_value
=
tf_op
.
get_attr
(
tf_epsilon_str
)
scale_name
=
self
.
get_scope
(
tf_op
.
name
)
+
'/scale:0'
offset_name
=
self
.
get_scope
(
tf_op
.
name
)
+
'/offset:0'
scale_value
=
(
(
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
gamma_value
)
offset_value
=
(
-
mean_value
*
scale_value
)
+
beta_value
self
.
add_tensor
(
scale_name
,
scale_value
.
shape
,
mace_pb2
.
DT_FLOAT
,
scale_value
)
self
.
add_tensor
(
offset_name
,
offset_value
.
shape
,
mace_pb2
.
DT_FLOAT
,
offset_value
)
self
.
_skip_tensor
.
update
([
inp
.
name
for
inp
in
tf_op
.
inputs
][
1
:])
del
op
.
input
[
1
:]
op
.
input
.
extend
([
scale_name
,
offset_name
])
del
op
.
output
[
1
:]
del
op
.
output_shape
[
1
:]
del
op
.
output_type
[
1
:]
def
convert_pooling
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Pooling
.
name
pooling_type_arg
=
op
.
arg
.
add
()
pooling_type_arg
.
name
=
MaceKeyword
.
mace_pooling_type_str
pooling_type_arg
.
i
=
self
.
pooling_type_mode
[
tf_op
.
type
].
value
padding_arg
=
op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_str
padding_arg
.
i
=
self
.
padding_mode
[
tf_op
.
get_attr
(
tf_padding_str
)].
value
strides_arg
=
op
.
arg
.
add
()
strides_arg
.
name
=
MaceKeyword
.
mace_strides_str
strides_arg
.
ints
.
extend
(
tf_op
.
get_attr
(
tf_strides_str
)[
1
:
3
])
kernels_arg
=
op
.
arg
.
add
()
kernels_arg
.
name
=
MaceKeyword
.
mace_kernel_str
kernels_arg
.
ints
.
extend
(
tf_op
.
get_attr
(
tf_kernel_str
)[
1
:
3
])
def
convert_softmax
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Softmax
.
name
def
convert_resize_bilinear
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
ResizeBilinear
.
name
del
op
.
input
[
1
:]
size_arg
=
op
.
arg
.
add
()
size_arg
.
name
=
MaceKeyword
.
mace_resize_size_str
size_value
=
tf_op
.
inputs
[
1
].
eval
().
astype
(
np
.
int32
)
size_arg
.
ints
.
extend
(
size_value
)
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
1
].
name
)
align_corners_arg
=
op
.
arg
.
add
()
align_corners_arg
.
name
=
MaceKeyword
.
mace_align_corners_str
align_corners_arg
.
i
=
tf_op
.
get_attr
(
tf_align_corners
)
def
convert_space_batch
(
self
,
tf_op
):
print
"""You might want to try 'flatten_atrous_conv' in
transform graph to turn atrous conv2d into regular conv2d.
This may give you performance benefit on GPU.
(see https://github.com/tensorflow/tensorflow/blob/master/
tensorflow/tools/graph_transforms/README.md#flatten_atrous_conv)
"""
op
=
self
.
convert_general_op
(
tf_op
)
del
op
.
input
[
1
:]
size_arg
=
op
.
arg
.
add
()
size_arg
.
name
=
MaceKeyword
.
mace_space_batch_block_shape_str
size_value
=
tf_op
.
inputs
[
1
].
eval
().
astype
(
np
.
int32
)
size_arg
.
ints
.
extend
(
size_value
)
crops_or_paddings_arg
=
op
.
arg
.
add
()
if
op
.
type
==
'BatchToSpaceND'
:
op
.
type
=
MaceOp
.
BatchToSpaceND
.
name
crops_or_paddings_arg
.
name
=
\
MaceKeyword
.
mace_batch_to_space_crops_str
else
:
op
.
type
=
MaceOp
.
SpaceToBatchND
.
name
crops_or_paddings_arg
.
name
=
MaceKeyword
.
mace_paddings_str
crops_or_paddings_value
=
tf_op
.
inputs
[
2
].
eval
().
astype
(
np
.
int32
).
flat
crops_or_paddings_arg
.
ints
.
extend
(
crops_or_paddings_value
)
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
1
].
name
)
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
2
].
name
)
def
convert_space_depth
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
if
op
.
type
==
'SpaceToDepth'
:
op
.
type
=
MaceOp
.
SpaceToDepth
.
name
else
:
op
.
type
=
MaceOp
.
DepthToSpace
.
name
size_arg
=
op
.
arg
.
add
()
size_arg
.
name
=
MaceKeyword
.
mace_space_depth_block_size_str
size_arg
.
i
=
tf_op
.
get_attr
(
tf_block_size
)
def
convert_pad
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Pad
.
name
del
op
.
input
[
1
:]
paddings_arg
=
op
.
arg
.
add
()
paddings_arg
.
name
=
MaceKeyword
.
mace_paddings_str
paddings_value
=
tf_op
.
inputs
[
1
].
eval
().
astype
(
np
.
int32
).
flat
paddings_arg
.
ints
.
extend
(
paddings_value
)
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
1
].
name
)
if
len
(
tf_op
.
inputs
)
==
3
:
constant_value_arg
=
op
.
arg
.
add
()
constant_value_arg
.
name
=
MaceKeyword
.
mace_constant_value_str
constant_value
=
tf_op
.
inputs
[
2
].
eval
().
astype
(
np
.
int32
).
flat
[
0
]
constant_value_arg
.
i
=
constant_value
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
2
].
name
)
def
convert_concat
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Concat
.
name
del
op
.
input
[
-
1
]
axis_arg
=
op
.
arg
.
add
()
axis_arg
.
name
=
MaceKeyword
.
mace_axis_str
axis
=
tf_op
.
inputs
[
-
1
].
eval
().
astype
(
np
.
int32
)
axis_arg
.
i
=
axis
mace_check
(
axis
==
3
,
"only support concat at channel dimension"
)
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
-
1
].
name
)
def
convert_reshape
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Reshape
.
name
del
op
.
input
[
1
:]
shape_arg
=
op
.
arg
.
add
()
shape_arg
.
name
=
MaceKeyword
.
mace_shape_str
shape_value
=
[]
if
tf_op
.
inputs
[
1
].
op
.
type
==
'Const'
:
shape_value
=
list
(
tf_op
.
inputs
[
1
].
eval
().
astype
(
np
.
int32
))
for
i
in
xrange
(
len
(
shape_value
)):
if
shape_value
[
i
]
==
-
1
:
shape_value
[
i
]
=
1
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
-
1
].
name
)
elif
tf_op
.
inputs
[
1
].
op
.
type
==
'Shape'
:
shape_value
=
list
(
tf_op
.
inputs
[
1
].
op
.
inputs
[
0
].
shape
.
as_list
())
shape_arg
.
ints
.
extend
(
shape_value
)
def
convert_mean
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
del
op
.
input
[
1
:]
reduce_dims
=
tf_op
.
inputs
[
1
].
eval
()
mace_check
(
reduce_dims
[
0
]
==
1
and
reduce_dims
[
1
]
==
2
,
"Mean only support reduce dim 1, 2"
)
op
.
type
=
MaceOp
.
Pooling
.
name
pooling_type_arg
=
op
.
arg
.
add
()
pooling_type_arg
.
name
=
MaceKeyword
.
mace_pooling_type_str
pooling_type_arg
.
i
=
PoolingType
.
AVG
.
value
padding_arg
=
op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_str
padding_arg
.
i
=
PaddingMode
.
VALID
.
value
strides_arg
=
op
.
arg
.
add
()
strides_arg
.
name
=
MaceKeyword
.
mace_strides_str
strides_arg
.
ints
.
extend
([
1
,
1
])
kernels_arg
=
op
.
arg
.
add
()
kernels_arg
.
name
=
MaceKeyword
.
mace_kernel_str
kernels_arg
.
ints
.
extend
(
tf_op
.
inputs
[
0
].
shape
.
as_list
()[
1
:
3
])
self
.
_skip_tensor
.
add
(
tf_op
.
inputs
[
1
].
name
)
mace/python/tools/converter_tool/transformer.py
0 → 100644
浏览文件 @
3e82ad67
import
enum
import
numpy
as
np
from
mace.proto
import
mace_pb2
from
mace.python.tools.converter_tool
import
base_converter
from
mace.python.tools.converter_tool.base_converter
import
EltwiseType
from
mace.python.tools.converter_tool.base_converter
import
ActivationType
from
mace.python.tools.converter_tool.base_converter
import
PaddingMode
from
mace.python.tools.converter_tool.base_converter
import
DataFormat
from
mace.python.tools.converter_tool.base_converter
import
FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
from
mace.python.tools.converter_tool.base_converter
import
MaceKeyword
from
mace.python.tools.converter_tool.base_converter
import
ConverterUtil
from
mace.python.tools.convert_util
import
mace_check
OPENCL_IMAGE_MAX_SIZE
=
16384
class
OpenCLBufferType
(
enum
.
Enum
):
CONV2D_FILTER
=
0
IN_OUT_CHANNEL
=
1
ARGUMENT
=
2
IN_OUT_HEIGHT
=
3
IN_OUT_WIDTH
=
4
WINOGRAD_FILTER
=
5
DW_CONV2D_FILTER
=
6
WEIGHT_HEIGHT
=
7
WEIGHT_WIDTH
=
8
class
Transformer
(
base_converter
.
ConverterInterface
):
"""A class for transform naive mace model to optimized model.
This Transformer should be platform irrelevant. So, do not assume
tensor name has suffix like ':0".
"""
def
__init__
(
self
,
option
,
model
):
# DO NOT reorder the following transformers
self
.
_registered_transformers
=
[
self
.
remove_identity_op
,
self
.
transform_global_pooling
,
self
.
fold_softmax
,
self
.
fold_batchnorm
,
self
.
fold_conv_and_bn
,
# data_format related
self
.
fold_depthwise_conv_and_bn
,
# data_format related
self
.
transform_gpu_winograd
,
# data_format related
self
.
transform_add_to_biasadd
,
self
.
fold_biasadd
,
self
.
fold_activation
,
self
.
transpose_filters
,
self
.
transpose_data_format
,
self
.
transform_global_conv_to_fc
,
self
.
transform_buffer_image
,
self
.
sort_by_execution
,
]
self
.
_option
=
option
self
.
_model
=
model
self
.
_ops
=
{}
self
.
_consts
=
{}
self
.
_consumers
=
{}
self
.
_producer
=
{}
self
.
_target_data_format
=
DataFormat
.
NHWC
if
self
.
_option
.
device
==
mace_pb2
.
CPU
:
self
.
_target_data_format
=
DataFormat
.
NCHW
def
run
(
self
):
for
transformer
in
self
.
_registered_transformers
:
while
True
:
self
.
construct_ops_and_consumers
()
changed
=
transformer
()
if
not
changed
:
break
return
self
.
_model
def
filter_format
(
self
):
filter_format_value
=
ConverterUtil
.
get_arg
(
self
.
_model
,
MaceKeyword
.
mace_filter_format_str
).
i
# noqa
filter_format
=
None
if
filter_format_value
==
FilterFormat
.
HWIO
.
value
:
filter_format
=
FilterFormat
.
HWIO
elif
filter_format_value
==
FilterFormat
.
OIHW
.
value
:
filter_format
=
FilterFormat
.
OIHW
elif
filter_format_value
==
FilterFormat
.
HWOI
.
value
:
filter_format
=
FilterFormat
.
HWOI
else
:
mace_check
(
False
,
"filter format %d not supported"
%
filter_format_value
)
return
filter_format
def
set_filter_format
(
self
,
filter_format
):
arg
=
ConverterUtil
.
get_arg
(
self
.
_model
,
MaceKeyword
.
mace_filter_format_str
)
arg
.
i
=
filter_format
.
value
def
construct_ops_and_consumers
(
self
):
self
.
_ops
.
clear
()
self
.
_consumers
.
clear
()
self
.
_producer
.
clear
()
for
op
in
self
.
_model
.
op
:
self
.
_ops
[
op
.
name
]
=
op
for
tensor
in
self
.
_model
.
tensors
:
self
.
_consts
[
tensor
.
name
]
=
tensor
for
op
in
self
.
_ops
.
values
():
for
input_tensor
in
op
.
input
:
if
input_tensor
not
in
self
.
_consumers
:
self
.
_consumers
[
input_tensor
]
=
[]
self
.
_consumers
[
input_tensor
].
append
(
op
)
for
output_tensor
in
op
.
output
:
self
.
_producer
[
output_tensor
]
=
op
for
input_node
in
self
.
_option
.
input_nodes
.
values
():
op
=
mace_pb2
.
OperatorDef
()
op
.
name
=
self
.
normalize_op_name
(
input_node
.
name
)
op
.
type
=
'Input'
op
.
output
.
extend
(
input_node
.
name
)
output_shape
=
op
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
input_node
.
shape
)
if
self
.
_option
.
device
==
mace_pb2
.
CPU
:
self
.
transpose_shape
(
output_shape
.
dims
,
[
0
,
3
,
1
,
2
])
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NCHW
)
else
:
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NHWC
)
self
.
_producer
[
op
.
output
[
0
]]
=
op
@
staticmethod
def
replace
(
obj_list
,
source
,
target
):
for
i
in
xrange
(
len
(
obj_list
)):
if
obj_list
[
i
]
==
source
:
obj_list
[
i
]
=
target
@
staticmethod
def
transpose_shape
(
shape
,
order
):
transposed_shape
=
[]
for
i
in
xrange
(
len
(
order
)):
transposed_shape
.
append
(
shape
[
order
[
i
]])
shape
[:]
=
transposed_shape
[:]
@
staticmethod
def
normalize_op_name
(
name
):
return
name
.
replace
(
':'
,
'_'
)
def
consumer_count
(
self
,
tensor_name
):
return
len
(
self
.
_consumers
.
get
(
tensor_name
,
[]))
def
is_op_output_node
(
self
,
op
):
output_node_tensor_names
=
[
out
for
out
in
self
.
_option
.
output_nodes
]
for
output
in
op
.
output
:
if
output
in
output_node_tensor_names
:
return
True
return
False
def
replace_output_node
(
self
,
op
):
"""if it is an output node, change output node to the op before it"""
if
self
.
is_op_output_node
(
op
):
real_output_node
=
self
.
_producer
[
op
.
input
[
0
]]
self
.
replace
(
real_output_node
.
output
,
op
.
input
[
0
],
op
.
output
[
0
])
print
(
"change %s to %s"
%
(
real_output_node
.
name
,
op
.
name
))
def
remove_identity_op
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
'Identity'
:
print
(
"Remove identity: %s(%s)"
%
(
op
.
name
,
op
.
type
))
for
consumer_op
in
self
.
_consumers
.
get
(
op
.
output
[
0
],
[]):
Transformer
.
replace
(
consumer_op
.
input
,
op
.
output
[
0
],
op
.
input
[
0
])
self
.
replace_output_node
(
op
)
net
.
op
.
remove
(
op
)
return
True
return
False
def
transform_global_pooling
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Pooling
.
name
and
\
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_global_pooling_str
)
is
not
None
:
# noqa
print
(
"Transform global pooling: %s(%s)"
%
(
op
.
name
,
op
.
type
))
input_shape
=
self
.
_producer
[
op
.
input
[
0
]].
output_shape
[
0
].
dims
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NHWC
:
kernel_shape
=
input_shape
[
1
:
3
]
else
:
kernel_shape
=
input_shape
[
2
:
4
]
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_kernel_str
).
ints
[:]
\
=
kernel_shape
[:]
return
False
def
fold_batchnorm
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
(
op
.
type
==
MaceOp
.
Eltwise
.
name
and
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_element_type_str
).
i
==
EltwiseType
.
PROD
.
value
)
\
and
len
(
op
.
input
)
==
2
\
and
op
.
input
[
1
]
in
self
.
_consts
\
and
self
.
consumer_count
(
op
.
output
[
0
])
==
1
\
and
not
self
.
is_op_output_node
(
op
):
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
(
consumer_op
.
type
==
MaceOp
.
Eltwise
.
name
and
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_element_type_str
).
i
==
EltwiseType
.
SUM
.
value
or
consumer_op
.
type
==
MaceOp
.
BiasAdd
.
name
)
\
and
len
(
consumer_op
.
input
)
==
2
\
and
consumer_op
.
input
[
1
]
in
self
.
_consts
\
and
len
(
self
.
_consts
[
consumer_op
.
input
[
1
]].
dims
)
==
1
:
print
(
"Fold batchnorm: %s(%s)"
%
(
op
.
name
,
op
.
type
))
consumer_op
.
type
=
MaceOp
.
FoldedBatchNorm
.
name
inputs
=
[
op
.
input
[
0
],
op
.
input
[
1
],
consumer_op
.
input
[
1
]]
consumer_op
.
input
[:]
=
inputs
[:]
net
.
op
.
remove
(
op
)
return
True
return
False
def
fold_conv_and_bn
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
(
op
.
type
==
MaceOp
.
Conv2D
.
name
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
)
\
and
self
.
consumer_count
(
op
.
output
[
0
])
==
1
:
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer_op
.
type
==
MaceOp
.
FoldedBatchNorm
.
name
:
print
(
"Fold conv and bn: %s(%s)"
%
(
op
.
name
,
op
.
type
))
filter
=
self
.
_consts
[
op
.
input
[
1
]]
scale
=
self
.
_consts
[
consumer_op
.
input
[
1
]]
idx
=
0
filter_format
=
self
.
filter_format
()
if
filter_format
==
FilterFormat
.
HWIO
:
for
hwi
in
xrange
(
filter
.
dims
[
0
]
*
filter
.
dims
[
1
]
*
filter
.
dims
[
2
]):
for
o
in
xrange
(
filter
.
dims
[
3
]):
filter
.
float_data
[
idx
]
*=
scale
.
float_data
[
o
]
idx
+=
1
elif
filter_format
==
FilterFormat
.
OIHW
:
for
o
in
xrange
(
filter
.
dims
[
0
]):
for
hwi
in
xrange
(
filter
.
dims
[
1
]
*
filter
.
dims
[
2
]
*
filter
.
dims
[
3
]):
filter
.
float_data
[
idx
]
*=
scale
.
float_data
[
o
]
idx
+=
1
else
:
mace_check
(
False
,
"filter format %s not supported"
%
filter_format
)
# change BN to BiasAdd
consumer_op
.
type
=
MaceOp
.
BiasAdd
.
name
del
consumer_op
.
input
[
1
]
# remove scale tensor
net
.
tensors
.
remove
(
scale
)
return
True
return
False
def
fold_depthwise_conv_and_bn
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
\
and
self
.
consumer_count
(
op
.
output
[
0
])
==
1
:
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer_op
.
type
==
MaceOp
.
FoldedBatchNorm
.
name
:
print
(
"Fold depthwise conv and bn: %s(%s)"
%
(
op
.
name
,
op
.
type
))
filter
=
self
.
_consts
[
op
.
input
[
1
]]
scale
=
self
.
_consts
[
consumer_op
.
input
[
1
]]
idx
=
0
filter_format
=
self
.
filter_format
()
if
filter_format
==
FilterFormat
.
HWIO
:
for
hw
in
xrange
(
filter
.
dims
[
0
]
*
filter
.
dims
[
1
]):
for
i
in
xrange
(
filter
.
dims
[
2
]):
for
o
in
xrange
(
filter
.
dims
[
3
]):
filter
.
float_data
[
idx
]
*=
scale
.
float_data
[
i
*
filter
.
dims
[
3
]
+
o
]
idx
+=
1
elif
filter_format
==
FilterFormat
.
OIHW
:
for
o
in
xrange
(
filter
.
dims
[
0
]):
for
i
in
xrange
(
filter
.
dims
[
1
]):
for
hw
in
xrange
(
filter
.
dims
[
2
]
*
filter
.
dims
[
3
]):
filter
.
float_data
[
idx
]
*=
scale
.
float_data
[
i
*
filter
.
dims
[
0
]
+
o
]
idx
+=
1
else
:
mace_check
(
False
,
"filter format %s not supported"
%
filter_format
)
# change BN to BiasAdd
consumer_op
.
type
=
MaceOp
.
BiasAdd
.
name
del
consumer_op
.
input
[
1
]
# remove scale tensor
net
.
tensors
.
remove
(
scale
)
return
True
return
False
@
staticmethod
def
sort_feature_map_shape
(
shape
,
data_format
):
"""Return shape in NHWC order"""
batch
=
shape
[
0
]
if
data_format
==
DataFormat
.
NHWC
:
height
=
shape
[
1
]
width
=
shape
[
2
]
channels
=
shape
[
3
]
else
:
height
=
shape
[
2
]
width
=
shape
[
3
]
channels
=
shape
[
1
]
return
batch
,
height
,
width
,
channels
@
staticmethod
def
sort_filter_shape
(
filter_shape
,
filter_format
):
"""Return filter shape in HWIO order"""
if
filter_format
==
FilterFormat
.
HWIO
:
filter_height
=
filter_shape
[
0
]
filter_width
=
filter_shape
[
1
]
in_channels
=
filter_shape
[
2
]
out_channels
=
filter_shape
[
3
]
elif
filter_format
==
FilterFormat
.
OIHW
:
filter_height
=
filter_shape
[
2
]
filter_width
=
filter_shape
[
3
]
in_channels
=
filter_shape
[
1
]
out_channels
=
filter_shape
[
0
]
elif
filter_format
==
FilterFormat
.
HWOI
:
filter_height
=
filter_shape
[
0
]
filter_width
=
filter_shape
[
1
]
in_channels
=
filter_shape
[
3
]
out_channels
=
filter_shape
[
2
]
else
:
mace_check
(
False
,
"filter format %s not supported"
%
filter_format
)
return
filter_height
,
filter_width
,
in_channels
,
out_channels
def
check_if_gpu_use_winograd_conv
(
self
,
op
):
if
not
self
.
_option
.
winograd_enabled
:
return
False
if
op
.
type
!=
MaceOp
.
Conv2D
.
name
:
return
False
filter_shape
=
self
.
_consts
[
op
.
input
[
1
]].
dims
output_shape
=
op
.
output_shape
[
0
].
dims
strides
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_strides_str
).
ints
dilations_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_dilations_str
)
if
dilations_arg
is
None
:
dilations
=
[
1
,
1
]
else
:
dilations
=
dilations_arg
.
ints
filter_height
,
filter_width
,
in_channels
,
out_channels
=
\
Transformer
.
sort_filter_shape
(
filter_shape
,
self
.
filter_format
())
batch
,
out_height
,
out_width
,
_
=
Transformer
.
sort_feature_map_shape
(
output_shape
,
ConverterUtil
.
data_format
(
op
))
if
filter_height
!=
3
or
filter_width
!=
3
or
strides
[
0
]
>
1
\
or
strides
[
1
]
>
1
or
dilations
[
0
]
>
1
or
dilations
[
1
]
>
1
:
return
False
width
=
batch
*
((
out_height
+
1
)
/
2
)
*
((
out_width
+
1
)
/
2
)
return
(
16
*
in_channels
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
out_channels
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
width
<
OPENCL_IMAGE_MAX_SIZE
)
def
transform_gpu_winograd
(
self
):
"""Only gpu needs winograd transform."""
net
=
self
.
_model
filter_format
=
self
.
filter_format
()
if
self
.
_option
.
device
==
mace_pb2
.
GPU
:
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
and
self
.
check_if_gpu_use_winograd_conv
(
op
):
print
(
"Transform gpu winograd %s(%s)"
%
(
op
.
name
,
op
.
type
))
output_shape
=
op
.
output_shape
[
0
].
dims
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_shape
=
filter
.
dims
data_format
=
ConverterUtil
.
data_format
(
op
)
filter_height
,
filter_width
,
in_channels
,
out_channels
=
\
Transformer
.
sort_filter_shape
(
filter_shape
,
filter_format
)
batch
,
out_height
,
out_width
,
_
=
\
Transformer
.
sort_feature_map_shape
(
output_shape
,
data_format
)
# Input transform
wt_op
=
net
.
op
.
add
()
wt_op
.
name
=
op
.
name
+
'_input_transform'
wt_op
.
type
=
MaceOp
.
WinogradTransform
.
name
wt_op
.
input
.
extend
([
op
.
input
[
0
]])
wt_op
.
output
.
extend
([
wt_op
.
name
])
wt_output_shape
=
wt_op
.
output_shape
.
add
()
wt_output_width
=
batch
*
(
(
out_height
+
1
)
/
2
)
*
((
out_width
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
(
[
16
,
in_channels
,
wt_output_width
,
1
])
arg
=
wt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_str
)
\
is
not
None
:
padding_arg
=
wt_op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_str
padding_arg
.
i
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_str
).
i
# noqa
elif
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_values_str
)
is
not
None
:
# noqa
padding_arg
=
wt_op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_values_str
padding_arg
.
ints
.
extend
(
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_values_str
).
ints
)
# MatMul
matmul_op
=
net
.
op
.
add
()
matmul_op
.
name
=
op
.
name
+
'_matmul'
matmul_op
.
type
=
MaceOp
.
MatMul
.
name
matmul_op
.
input
.
extend
([
op
.
input
[
1
],
wt_op
.
output
[
0
]])
matmul_op
.
output
.
extend
([
matmul_op
.
name
])
matmul_output_shape
=
matmul_op
.
output_shape
.
add
()
matmul_output_shape
.
dims
.
extend
(
[
16
,
out_channels
,
wt_output_width
,
1
])
arg
=
matmul_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
arg
=
matmul_op
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_winograd_filter_transformed
arg
.
i
=
1
# Inverse transform
iwt_op
=
net
.
op
.
add
()
iwt_op
.
name
=
op
.
name
+
'_inverse_transform'
iwt_op
.
type
=
MaceOp
.
WinogradInverseTransform
.
name
iwt_op
.
input
.
extend
([
matmul_op
.
output
[
0
]])
# biasadd
if
len
(
op
.
input
)
>=
3
:
iwt_op
.
input
.
extend
([
op
.
input
[
2
]])
iwt_op
.
output
.
extend
(
op
.
output
)
iwt_output_shape
=
iwt_op
.
output_shape
.
add
()
iwt_output_shape
.
dims
.
extend
(
op
.
output_shape
[
0
].
dims
)
arg
=
iwt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
batch_arg
=
iwt_op
.
arg
.
add
()
batch_arg
.
name
=
'batch'
batch_arg
.
i
=
batch
height_arg
=
iwt_op
.
arg
.
add
()
height_arg
.
name
=
'height'
height_arg
.
i
=
out_height
width_arg
=
iwt_op
.
arg
.
add
()
width_arg
.
name
=
'width'
width_arg
.
i
=
out_width
ConverterUtil
.
add_data_format_arg
(
iwt_op
,
data_format
)
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
filter
.
dims
)
weight_tensor_value
=
filter_data
if
filter_format
==
FilterFormat
.
HWIO
:
weight_tensor_value
=
filter_data
.
transpose
(
3
,
2
,
0
,
1
)
elif
filter_format
==
FilterFormat
.
HWOI
:
weight_tensor_value
=
filter_data
.
transpose
(
2
,
3
,
0
,
1
)
filter
.
float_data
[:]
=
weight_tensor_value
.
flat
[:]
filter
.
dims
[:]
=
weight_tensor_value
.
shape
[:]
net
.
op
.
remove
(
op
)
return
False
def
transform_add_to_biasadd
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
'Add'
\
and
len
(
op
.
input
)
==
2
\
and
op
.
input
[
1
]
in
self
.
_consts
\
and
len
(
self
.
_consts
[
op
.
input
[
1
]].
dims
)
==
1
:
print
(
"Transform add to biasadd: %s(%s)"
%
(
op
.
name
,
op
.
type
))
op
.
type
=
MaceOp
.
BiasAdd
.
name
return
True
return
False
def
fold_biasadd
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
((
op
.
type
==
MaceOp
.
Conv2D
.
name
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
or
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
or
op
.
type
==
MaceOp
.
FullyConnected
.
name
or
op
.
type
==
MaceOp
.
WinogradInverseTransform
.
name
)
and
len
(
op
.
input
)
==
2
)
\
and
len
(
self
.
_consumers
.
get
(
op
.
output
[
0
],
[]))
==
1
:
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer_op
.
type
==
MaceOp
.
BiasAdd
.
name
:
print
(
"Fold biasadd: %s(%s)"
%
(
op
.
name
,
op
.
type
))
op
.
name
=
consumer_op
.
name
op
.
input
.
append
(
consumer_op
.
input
[
1
])
op
.
output
[
0
]
=
consumer_op
.
output
[
0
]
net
.
op
.
remove
(
consumer_op
)
return
True
return
False
def
fold_activation
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
(
op
.
type
==
MaceOp
.
Conv2D
.
name
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
or
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
or
op
.
type
==
MaceOp
.
FullyConnected
.
name
or
op
.
type
==
MaceOp
.
FoldedBatchNorm
.
name
or
op
.
type
==
MaceOp
.
WinogradInverseTransform
.
name
)
\
and
len
(
self
.
_consumers
.
get
(
op
.
output
[
0
],
[]))
==
1
:
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer_op
.
type
==
MaceOp
.
Activation
.
name
\
and
ConverterUtil
.
get_arg
(
consumer_op
,
MaceKeyword
.
mace_activation_type_str
).
s
!=
'PRELU'
:
print
(
"Fold activation: %s(%s)"
%
(
op
.
name
,
op
.
type
))
op
.
name
=
consumer_op
.
name
op
.
output
[
0
]
=
consumer_op
.
output
[
0
]
for
arg
in
consumer_op
.
arg
:
if
arg
.
name
==
MaceKeyword
.
mace_activation_type_str
\
or
arg
.
name
==
MaceKeyword
.
mace_activation_max_limit_str
:
# noqa
op
.
arg
.
extend
([
arg
])
net
.
op
.
remove
(
consumer_op
)
return
True
return
False
def
transpose_data_format
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
# transpose args
if
op
.
type
==
MaceOp
.
Pad
.
name
:
for
arg
in
op
.
arg
:
if
arg
.
name
==
MaceKeyword
.
mace_paddings_str
and
len
(
arg
.
ints
)
==
4
:
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NHWC
\
and
self
.
_target_data_format
==
DataFormat
.
NCHW
:
# noqa
print
(
"Transpose pad args: %s(%s)"
%
(
op
.
name
,
op
.
type
))
self
.
transpose_shape
(
arg
.
ints
,
[
0
,
3
,
1
,
2
])
elif
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
\
and
self
.
_target_data_format
==
DataFormat
.
NHWC
:
# noqa
print
(
"Transpose pad args: %s(%s)"
%
(
op
.
name
,
op
.
type
))
self
.
transpose_shape
(
arg
.
ints
,
[
0
,
2
,
3
,
1
])
elif
op
.
type
==
MaceOp
.
Concat
.
name
or
op
.
type
==
MaceOp
.
Slice
.
name
:
for
arg
in
op
.
arg
:
if
arg
.
name
==
MaceKeyword
.
mace_axis_str
:
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NHWC
\
and
self
.
_target_data_format
==
DataFormat
.
NCHW
:
# noqa
print
(
"Transpose slice args: %s(%s)"
%
(
op
.
name
,
op
.
type
))
mace_check
(
arg
.
i
==
3
,
'only support concat at '
'channel dimension'
)
arg
.
i
=
1
elif
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
\
and
self
.
_target_data_format
==
DataFormat
.
NHWC
:
# noqa
print
(
"Transpose slice args: %s(%s)"
%
(
op
.
name
,
op
.
type
))
mace_check
(
arg
.
i
==
1
,
"only support concat at "
"channel dimension"
)
arg
.
i
=
3
# transpose op output shape
data_format
=
ConverterUtil
.
data_format
(
op
)
if
data_format
is
not
None
\
and
data_format
!=
self
.
_target_data_format
:
print
(
"Transpose output shapes: %s(%s)"
%
(
op
.
name
,
op
.
type
))
if
self
.
_target_data_format
==
DataFormat
.
NHWC
:
# NCHW -> NHWC
for
output_shape
in
op
.
output_shape
:
if
len
(
output_shape
.
dims
)
==
4
:
self
.
transpose_shape
(
output_shape
.
dims
,
[
0
,
2
,
3
,
1
])
else
:
# NHWC -> NCHW
for
output_shape
in
op
.
output_shape
:
if
len
(
output_shape
.
dims
)
==
4
:
self
.
transpose_shape
(
output_shape
.
dims
,
[
0
,
3
,
1
,
2
])
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_data_format_str
).
i
=
\
self
.
_target_data_format
.
value
# transpose input/output
if
self
.
_target_data_format
==
DataFormat
.
NCHW
:
print
(
"Transpose input/output to NCHW"
)
for
input_node
in
self
.
_option
.
input_nodes
.
values
():
new_input_name
=
MaceKeyword
.
mace_input_node_name
\
+
'_'
+
input_node
.
name
op
=
net
.
op
.
add
()
op
.
name
=
self
.
normalize_op_name
(
input_node
.
name
)
op
.
type
=
MaceOp
.
Transpose
.
name
op
.
input
.
extend
([
new_input_name
])
op
.
output
.
extend
([
input_node
.
name
])
output_shape
=
op
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
input_node
.
shape
)
dims_arg
=
op
.
arg
.
add
()
dims_arg
.
name
=
MaceKeyword
.
mace_dims_str
dims_arg
.
ints
.
extend
([
0
,
3
,
1
,
2
])
arg
=
op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
for
output_node
in
self
.
_option
.
output_nodes
.
values
():
output_name
=
MaceKeyword
.
mace_output_node_name
\
+
'_'
+
output_node
.
name
op
=
self
.
_model
.
op
.
add
()
op
.
name
=
self
.
normalize_op_name
(
output_name
)
op
.
type
=
MaceOp
.
Transpose
.
name
op
.
input
.
extend
([
output_node
.
name
])
op
.
output
.
extend
([
output_name
])
output_shape
=
op
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
self
.
_producer
[
output_node
.
name
].
output_shape
[
0
].
dims
)
self
.
transpose_shape
(
output_shape
.
dims
,
[
0
,
2
,
3
,
1
])
dims_arg
=
op
.
arg
.
add
()
dims_arg
.
name
=
MaceKeyword
.
mace_dims_str
dims_arg
.
ints
.
extend
([
0
,
2
,
3
,
1
])
arg
=
op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
return
False
def
transpose_filters
(
self
):
net
=
self
.
_model
filter_format
=
self
.
filter_format
()
# TODO(liyin/liuqi): remove this if-condition after combine cpu/gpu
if
self
.
_option
.
device
==
mace_pb2
.
CPU
:
print
(
"Transpose filters to OIHW"
)
# transpose filter to OIHW/MIHW for tensorflow (HWIO/HWIM)
if
filter_format
==
FilterFormat
.
HWIO
:
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
\
or
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
:
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_winograd_filter_transformed
)
is
None
:
# noqa
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
filter
.
dims
)
filter_data
=
filter_data
.
transpose
(
3
,
2
,
0
,
1
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
self
.
set_filter_format
(
FilterFormat
.
OIHW
)
elif
self
.
_option
.
device
==
mace_pb2
.
GPU
:
# TODO(liyin/liuqi): remove this whole logic after combine cpu/gpu
print
(
"Transpose filters to HWOI/HWIM"
)
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
\
or
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
:
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
filter
.
dims
)
# transpose filter to HWOI/HWIM for
# tensorflow and caffe (OIHW/MIHW)
if
filter_format
==
FilterFormat
.
HWIO
\
and
(
op
.
type
==
MaceOp
.
Conv2D
.
name
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
):
filter_data
=
filter_data
.
transpose
(
0
,
1
,
3
,
2
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
elif
filter_format
==
FilterFormat
.
OIHW
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
:
filter_data
=
filter_data
.
transpose
(
2
,
3
,
0
,
1
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
elif
op
.
type
==
MaceOp
.
Depthwiseconv2d
.
name
:
filter_data
=
filter_data
.
transpose
(
2
,
3
,
1
,
0
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
if
op
.
type
==
MaceOp
.
FullyConnected
.
name
:
weight
=
self
.
_consts
[
op
.
input
[
1
]]
input_shape
=
list
(
self
.
_producer
[
op
.
input
[
0
]]
.
output_shape
[
0
].
dims
)
weight_shape
=
[
weight
.
dims
[
0
]]
+
input_shape
[
1
:]
# OCHW -> OHWC
weight_data
=
np
.
array
(
weight
.
float_data
).
reshape
(
weight_shape
)
weight_data
=
weight_data
.
transpose
(
0
,
2
,
3
,
1
)
weight
.
float_data
[:]
=
weight_data
.
flat
self
.
set_filter_format
(
FilterFormat
.
HWOI
)
return
False
def
buffer_to_image
(
self
,
op
,
input_idx
,
input_type
):
net
=
self
.
_model
input_name
=
op
.
input
[
input_idx
]
op_def
=
net
.
op
.
add
()
op_def
.
name
=
input_name
.
replace
(
':'
,
'_'
)
+
"_b2i"
output_name
=
op_def
.
name
op_def
.
type
=
MaceKeyword
.
mace_buffer_to_image
op_def
.
input
.
extend
([
input_name
])
op_def
.
output
.
extend
([
output_name
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_buffer_type
arg
.
i
=
input_type
.
value
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_mode
arg
.
i
=
0
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
op
.
input
[
input_idx
]
=
output_name
def
transform_buffer_image
(
self
):
if
self
.
_option
.
device
!=
mace_pb2
.
GPU
:
return
False
print
(
"Transform buffer to image"
)
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
CONV2D_FILTER
)
if
len
(
op
.
input
)
>=
3
:
self
.
buffer_to_image
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
DW_CONV2D_FILTER
)
if
len
(
op
.
input
)
>=
3
:
self
.
buffer_to_image
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
BiasAdd
.
name
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
FoldedBatchNorm
.
name
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
ARGUMENT
)
self
.
buffer_to_image
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
if
len
(
op
.
input
)
>=
4
:
self
.
buffer_to_image
(
op
,
3
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
MatMul
.
name
and
\
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_winograd_filter_transformed
)
is
not
None
:
# noqa
self
.
buffer_to_image
(
op
,
0
,
OpenCLBufferType
.
WINOGRAD_FILTER
)
elif
op
.
type
==
MaceOp
.
WinogradInverseTransform
.
name
\
and
len
(
op
.
input
)
>=
2
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
FullyConnected
.
name
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
WEIGHT_WIDTH
)
if
len
(
op
.
input
)
>=
3
:
self
.
buffer_to_image
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
Activation
.
name
:
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_activation_type_str
).
s
==
ActivationType
.
PRELU
.
name
:
# noqa
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
ARGUMENT
)
for
input_node
in
self
.
_option
.
input_nodes
.
values
():
new_input_name
=
MaceKeyword
.
mace_input_node_name
\
+
'_'
+
input_node
.
name
op_def
=
self
.
_model
.
op
.
add
()
op_def
.
name
=
self
.
normalize_op_name
(
input_node
.
name
)
op_def
.
type
=
MaceKeyword
.
mace_buffer_to_image
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
input_node
.
name
])
output_shape
=
op_def
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
input_node
.
shape
)
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_buffer_type
arg
.
i
=
OpenCLBufferType
.
IN_OUT_CHANNEL
.
value
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
for
output_node
in
self
.
_option
.
output_nodes
.
values
():
output_name
=
MaceKeyword
.
mace_output_node_name
\
+
'_'
+
output_node
.
name
op_def
=
self
.
_model
.
op
.
add
()
op_def
.
name
=
self
.
normalize_op_name
(
output_name
)
op_def
.
type
=
MaceKeyword
.
mace_image_to_buffer
op_def
.
input
.
extend
([
output_node
.
name
])
op_def
.
output
.
extend
([
output_name
])
output_shape
=
op_def
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
output_node
.
shape
)
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_buffer_type
arg
.
i
=
OpenCLBufferType
.
IN_OUT_CHANNEL
.
value
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
return
False
def
fold_softmax
(
self
):
changed
=
False
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Softmax
.
name
:
print
(
"Fold softmax: %s(%s)"
%
(
op
.
name
,
op
.
type
))
if
self
.
consumer_count
(
op
.
output
[
0
])
==
1
:
consumer
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer
.
type
==
MaceOp
.
Reshape
.
name
:
shape
=
ConverterUtil
.
get_arg
(
consumer
,
MaceKeyword
.
mace_shape_str
).
ints
# noqa
del
op
.
output_shape
[
0
].
dims
[:]
op
.
output_shape
[
0
].
dims
.
extend
(
shape
)
self
.
replace_output_node
(
consumer
)
net
.
op
.
remove
(
consumer
)
changed
=
True
producer
=
self
.
_producer
[
op
.
input
[
0
]]
if
producer
.
type
==
MaceOp
.
Reshape
.
name
:
op
.
input
[
0
]
=
producer
.
input
[
0
]
self
.
replace_output_node
(
producer
)
net
.
op
.
remove
(
producer
)
changed
=
True
if
len
(
op
.
output_shape
[
0
].
dims
)
<
4
:
shape
=
([
1
,
1
,
1
,
1
]
+
list
(
op
.
output_shape
[
0
].
dims
))[
-
4
:]
op
.
output_shape
[
0
].
dims
[:]
=
shape
[:]
changed
=
True
if
changed
:
return
True
return
False
def
transform_global_conv_to_fc
(
self
):
"""Transform global conv to fc should be placed after transposing
input/output and filter"""
if
self
.
_option
.
device
==
mace_pb2
.
GPU
:
return
False
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
:
producer
=
self
.
_producer
[
op
.
input
[
0
]]
input_shape
=
producer
.
output_shape
[
0
].
dims
batch
,
height
,
width
,
channels
=
self
.
sort_feature_map_shape
(
input_shape
,
ConverterUtil
.
data_format
(
producer
))
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_shape
=
filter
.
dims
filter_height
,
filter_width
,
in_channels
,
out_channels
=
\
self
.
sort_filter_shape
(
filter_shape
,
self
.
filter_format
())
zero_padding
=
True
padding_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_str
)
# noqa
if
padding_arg
is
not
None
:
if
padding_arg
.
i
!=
PaddingMode
.
VALID
.
value
:
zero_padding
=
False
else
:
padding_value_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_values_str
)
# noqa
if
padding_value_arg
is
not
None
:
if
not
all
(
v
==
0
for
v
in
padding_value_arg
.
ints
):
zero_padding
=
False
if
height
==
filter_height
and
width
==
filter_width
\
and
zero_padding
:
print
(
"transform global conv to fc %s(%s)"
%
(
op
.
name
,
op
.
type
))
op
.
type
=
MaceOp
.
FullyConnected
.
name
filter
.
dims
[:]
=
[
out_channels
,
in_channels
*
filter_width
*
filter_height
][:]
def
sort_dfs
(
self
,
op
,
visited
,
sorted_nodes
):
visited
.
update
([
op
.
name
])
if
len
(
op
.
input
)
>
0
:
for
input_tensor
in
op
.
input
:
producer_op
=
self
.
_producer
.
get
(
input_tensor
,
None
)
if
producer_op
is
None
:
pass
elif
producer_op
.
name
not
in
visited
:
self
.
sort_dfs
(
producer_op
,
visited
,
sorted_nodes
)
sorted_nodes
.
append
(
op
)
def
sort_by_execution
(
self
):
print
(
"Sort by execution"
)
net
=
self
.
_model
visited
=
set
()
sorted_nodes
=
[]
for
output_node
in
self
.
_option
.
output_nodes
:
output_tensor
=
MaceKeyword
.
mace_output_node_name
\
+
'_'
+
output_node
mace_check
(
output_tensor
in
self
.
_producer
,
"output_tensor %s not existed in model"
%
output_tensor
)
self
.
sort_dfs
(
self
.
_producer
[
output_tensor
],
visited
,
sorted_nodes
)
del
net
.
op
[:]
net
.
op
.
extend
(
sorted_nodes
)
return
False
mace/python/tools/memory_optimizer.py
浏览文件 @
3e82ad67
...
...
@@ -129,7 +129,7 @@ class MemoryOptimizer(object):
self
.
idle_mem
.
remove
(
mem_id
)
if
mem_id
==
-
1
:
mem_id
=
self
.
total_mem_count
mem_id
=
self
.
mem_id_base
()
+
self
.
total_mem_count
self
.
total_mem_count
+=
1
self
.
mem_block
[
mem_id
]
=
op_mem_block
...
...
@@ -147,10 +147,13 @@ class MemoryOptimizer(object):
self
.
add_net_mem_blocks
()
print
(
'total op: %d'
,
len
(
self
.
net_def
.
op
))
print
(
'origin mem: %d, optimized mem: %d'
,
print
(
"total op: %d"
%
len
(
self
.
net_def
.
op
))
print
(
"origin mem: %d, optimized mem: %d"
%
(
self
.
get_total_origin_mem_size
(),
self
.
get_total_optimized_mem_size
())
self
.
get_total_optimized_mem_size
()))
def
mem_id_base
(
self
):
return
0
class
GPUMemoryOptimizer
(
MemoryOptimizer
):
...
...
@@ -189,6 +192,9 @@ class GPUMemoryOptimizer(MemoryOptimizer):
block
.
x
=
self
.
mem_block
[
mem
][
0
]
block
.
y
=
self
.
mem_block
[
mem
][
1
]
def
mem_id_base
(
self
):
return
20000
def
optimize_gpu_memory
(
net_def
):
mem_optimizer
=
GPUMemoryOptimizer
(
net_def
)
...
...
mace/python/tools/source_converter_lib.py
浏览文件 @
3e82ad67
...
...
@@ -84,11 +84,20 @@ def obfuscate_name(net_def):
op
.
output
[
i
]
=
in_out_map
[
op
.
output
[
i
]]
def
normalize_op_name
(
op_name
):
idx
=
op_name
.
rfind
(
':'
)
if
idx
==
-
1
:
return
op_name
else
:
return
op_name
[:
idx
]
def
rename_tensor
(
net_def
):
tensor_map
=
{}
for
t
in
net_def
.
tensors
:
if
t
.
name
not
in
tensor_map
:
tensor_map
[
t
.
name
]
=
"_"
+
t
.
name
[:
-
2
].
replace
(
"/"
,
"_"
)
tensor_map
[
t
.
name
]
=
"_"
+
normalize_op_name
(
t
.
name
).
replace
(
"/"
,
"_"
)
t
.
name
=
tensor_map
[
t
.
name
]
for
op
in
net_def
.
op
:
for
i
in
range
(
len
(
op
.
input
)):
...
...
@@ -118,6 +127,8 @@ class TensorInfo:
elif
t
.
data_type
==
mace_pb2
.
DT_UINT8
:
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
uint8
).
tolist
())
else
:
raise
Exception
(
'Tensor data type %s not supported'
%
t
.
data_type
)
def
stringfy
(
value
):
...
...
mace/python/tools/tf_converter_lib.py
已删除
100644 → 0
浏览文件 @
04f7a34a
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
mace.proto
import
mace_pb2
import
tensorflow
as
tf
import
numpy
as
np
import
math
import
copy
from
tensorflow
import
gfile
from
mace.python.tools
import
memory_optimizer
from
tensorflow.core.framework
import
graph_pb2
from
tensorflow.core.framework
import
tensor_shape_pb2
padding_mode
=
{
'VALID'
:
0
,
'SAME'
:
1
,
'FULL'
:
2
}
pooling_type_mode
=
{
'AvgPool'
:
1
,
'MaxPool'
:
2
}
# the order should be the same as
# eltwise type's in mace/kernels/eltwise.h
# and also cwise type's in mace/kernels/cwise.h
# cuz these math ops should have compatible with "EltWise" and "CWise"
math_type_mode
=
{
'ADD'
:
0
,
'SUB'
:
1
,
'MUL'
:
2
,
'DIV'
:
3
,
'MIN'
:
4
,
'MAX'
:
5
,
'NEG'
:
6
,
'ABS'
:
7
,
'SQR_DIFF'
:
8
,
'POW'
:
9
,
}
buffer_type_map
=
{
'CONV2D_FILTER'
:
0
,
'IN_OUT_CHANNEL'
:
1
,
'ARGUMENT'
:
2
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_WIDTH'
:
4
,
'WINOGRAD_FILTER'
:
5
,
'DW_CONV2D_FILTER'
:
6
,
}
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
activation_name_map
=
{
'Relu'
:
'RELU'
,
'Sigmoid'
:
'SIGMOID'
,
'Tanh'
:
'TANH'
,
'Relu6'
:
'RELUX'
}
BATCH_NORM_ORDER
=
[
"Add"
,
"Rsqrt"
,
"Mul"
,
"Mul"
,
"Mul"
,
"Sub"
,
"Add"
]
MACE_INPUT_NODE_NAME
=
"mace_input_node"
MACE_OUTPUT_NODE_NAME
=
"mace_output_node"
OPENCL_IMAGE_MAX_SIZE
=
16384
def
get_input_tensor
(
op
,
index
):
input_tensor
=
op
.
inputs
[
index
]
if
input_tensor
.
op
.
type
==
'Reshape'
:
input_tensor
=
get_input_tensor
(
input_tensor
.
op
,
0
)
return
input_tensor
class
TFConverter
(
object
):
def
__init__
(
self
,
graph
,
tf_ops
,
net_def
,
dt
,
device
,
winograd
):
self
.
graph
=
graph
self
.
net_def
=
net_def
self
.
tf_ops
=
tf_ops
self
.
dt
=
dt
self
.
device
=
device
self
.
winograd
=
winograd
self
.
tf_graph
=
{}
self
.
tf_parents
=
{}
self
.
resolved_ops
=
{}
self
.
unused_tensor
=
set
()
self
.
transpose_filter_tensor
=
{}
self
.
reshape_tensor
=
{}
self
.
ops
=
{}
for
op
in
tf_ops
:
self
.
ops
[
op
.
name
]
=
op
for
op
in
tf_ops
:
self
.
resolved_ops
[
op
.
name
]
=
0
for
input
in
op
.
inputs
:
input_name
=
input
.
name
[:
-
2
]
if
input_name
not
in
self
.
tf_graph
:
self
.
tf_graph
[
input_name
]
=
[]
self
.
tf_graph
[
input_name
].
append
(
op
)
if
op
.
name
not
in
self
.
tf_parents
:
self
.
tf_parents
[
op
.
name
]
=
[]
self
.
tf_parents
[
op
.
name
].
append
(
self
.
ops
[
input_name
])
def
add_buffer_to_image
(
self
,
input_name
,
input_type
):
output_name
=
input_name
[:
-
2
]
+
"_b2i"
+
input_name
[
-
2
:]
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
input_name
])
op_def
.
output
.
extend
([
output_name
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'buffer_type'
arg
.
i
=
buffer_type_map
[
input_type
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'mode'
arg
.
i
=
0
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
return
output_name
def
add_image_to_buffer
(
self
,
input_name
,
input_type
):
output_name
=
input_name
[:
-
2
]
+
"_i2b"
+
input_name
[
-
2
:]
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
input_name
])
op_def
.
output
.
extend
([
output_name
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'buffer_type'
arg
.
i
=
buffer_type_map
[
input_type
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
return
output_name
def
add_gpu_input_transform
(
self
,
names
):
for
name
in
names
:
new_input_name
=
MACE_INPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
name
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
epsilon_arg
.
i
=
buffer_type_map
[
'IN_OUT_CHANNEL'
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
self
.
add_output_shape
(
self
.
ops
[
name
].
outputs
,
op_def
)
def
add_cpu_input_transform
(
self
,
names
):
for
name
in
names
:
new_input_name
=
MACE_INPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
name
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
dims_arg
.
ints
.
extend
([
0
,
3
,
1
,
2
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
self
.
add_output_shape
(
self
.
ops
[
name
].
outputs
,
op_def
)
def
add_gpu_output_transform
(
self
,
names
):
for
name
in
names
:
output_name
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
epsilon_arg
.
i
=
buffer_type_map
[
'IN_OUT_CHANNEL'
]
def
add_cpu_output_transform
(
self
,
names
):
for
name
in
names
:
output_name
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
dims_arg
.
ints
.
extend
([
0
,
2
,
3
,
1
])
output_shapes
=
[]
for
output
in
self
.
ops
[
name
].
outputs
:
old_shape
=
output
.
shape
.
as_list
()
# NCHW -> NHWC
if
len
(
old_shape
)
==
2
:
new_shape
=
[
old_shape
[
0
],
1
,
1
,
old_shape
[
1
]]
else
:
new_shape
=
[
old_shape
[
0
],
old_shape
[
2
],
old_shape
[
3
],
old_shape
[
1
]]
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
new_shape
)
output_shapes
.
append
(
output_shape
)
op_def
.
output_shape
.
extend
(
output_shapes
)
def
add_output_shape
(
self
,
outputs
,
op
):
output_shapes
=
[]
for
output
in
outputs
:
old_shape
=
[]
if
isinstance
(
output
,
list
):
old_shape
=
output
elif
isinstance
(
output
,
tf
.
Tensor
):
if
output
.
shape
.
num_elements
()
is
not
None
:
old_shape
=
output
.
shape
.
as_list
()
else
:
raise
ValueError
(
'output type not supported: '
,
type
(
output
))
if
len
(
old_shape
)
==
2
:
old_shape
=
[
old_shape
[
0
],
old_shape
[
1
],
1
,
1
]
if
self
.
device
==
'cpu'
:
# NHWC -> NCHW
old_shape
=
[
old_shape
[
0
],
old_shape
[
3
],
old_shape
[
1
],
old_shape
[
2
]]
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
old_shape
)
output_shapes
.
append
(
output_shape
)
op
.
output_shape
.
extend
(
output_shapes
)
def
add_tensor
(
self
,
name
,
shape
,
tf_dt
,
value
):
tensor
=
self
.
net_def
.
tensors
.
add
()
tensor
.
name
=
name
shape
=
list
(
shape
)
tensor
.
dims
.
extend
(
shape
)
if
tf_dt
==
tf
.
float32
:
tensor
.
data_type
=
mace_pb2
.
DT_FLOAT
tensor
.
float_data
.
extend
(
value
.
flat
)
elif
tf_dt
==
tf
.
int32
:
tensor
.
data_type
=
mace_pb2
.
DT_INT32
tensor
.
int32_data
.
extend
(
value
.
flat
)
else
:
raise
Exception
(
"Not supported tensor type: "
+
tf_dt
.
name
)
def
convert_reshape
(
self
,
op
):
input_tensor
=
get_input_tensor
(
op
,
0
)
shape_tensor
=
get_input_tensor
(
op
,
1
)
shape_value
=
shape_tensor
.
eval
().
astype
(
np
.
int32
)
self
.
unused_tensor
.
add
(
shape_tensor
.
name
)
self
.
reshape_tensor
[
input_tensor
.
name
]
=
shape_value
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_tensor
(
self
,
op
):
output_name
=
op
.
outputs
[
0
].
name
if
output_name
not
in
self
.
unused_tensor
:
tensor
=
self
.
net_def
.
tensors
.
add
()
tf_tensor
=
op
.
outputs
[
0
].
eval
()
if
output_name
in
self
.
transpose_filter_tensor
:
tf_tensor
=
tf_tensor
.
transpose
(
self
.
transpose_filter_tensor
[
output_name
])
if
output_name
in
self
.
reshape_tensor
:
tf_tensor
=
tf_tensor
.
reshape
(
self
.
reshape_tensor
[
output_name
])
tensor
.
name
=
op
.
outputs
[
0
].
name
shape
=
list
(
tf_tensor
.
shape
)
tensor
.
dims
.
extend
(
shape
)
tf_dt
=
op
.
get_attr
(
'dtype'
)
if
tf_dt
==
tf
.
float32
:
tensor
.
data_type
=
mace_pb2
.
DT_FLOAT
tensor
.
float_data
.
extend
(
tf_tensor
.
astype
(
np
.
float32
).
flat
)
elif
tf_dt
==
tf
.
int32
:
tensor
.
data_type
=
mace_pb2
.
DT_INT32
tensor
.
int32_data
.
extend
(
tf_tensor
.
astype
(
np
.
int32
).
flat
)
else
:
raise
Exception
(
"Not supported tensor type: "
+
tf_dt
.
name
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
check_winograd_conv
(
self
,
op
):
filter_shape
=
get_input_tensor
(
op
,
1
).
shape
.
as_list
()
strides
=
op
.
get_attr
(
'strides'
)[
1
:
3
]
output_shape
=
op
.
outputs
[
0
].
shape
.
as_list
()
if
len
(
output_shape
)
==
0
or
output_shape
[
0
]
is
None
:
return
False
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
if
self
.
winograd
and
op
.
type
!=
'DepthwiseConv2dNative'
and
\
filter_shape
[
0
]
==
3
and
\
(
filter_shape
[
0
]
==
filter_shape
[
1
])
and
\
(
strides
[
0
]
==
1
)
and
(
strides
[
0
]
==
strides
[
1
]):
if
self
.
device
==
'gpu'
:
return
(
16
*
filter_shape
[
2
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
3
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
width
<
OPENCL_IMAGE_MAX_SIZE
)
elif
self
.
device
==
'cpu'
:
return
filter_shape
[
2
]
>=
8
and
filter_shape
[
3
]
>=
8
return
False
def
convert_winograd_conv_gpu
(
self
,
op
):
filter_tensor
=
get_input_tensor
(
op
,
1
)
filter_shape
=
filter_tensor
.
shape
.
as_list
()
output_shape
=
op
.
outputs
[
0
].
shape
.
as_list
()
self
.
transpose_filter_tensor
[
filter_tensor
.
name
]
=
(
3
,
2
,
0
,
1
)
filter_name
=
self
.
add_buffer_to_image
(
op
.
inputs
[
1
].
name
,
"WINOGRAD_FILTER"
)
# Input transform
wt_op
=
mace_pb2
.
OperatorDef
()
arg
=
wt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
padding_arg
=
wt_op
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
i
=
padding_mode
[
op
.
get_attr
(
'padding'
)]
wt_op
.
name
=
op
.
name
+
'_input_transform'
wt_op
.
type
=
'WinogradTransform'
wt_op
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
wt_output_name
=
wt_op
.
name
+
":0"
wt_op
.
output
.
extend
([
wt_output_name
])
wt_output_shape
=
mace_pb2
.
OutputShape
()
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
2
],
wt_output_width
,
1
])
wt_op
.
output_shape
.
extend
([
wt_output_shape
])
# MatMul
matmul_op
=
mace_pb2
.
OperatorDef
()
arg
=
matmul_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
matmul_op
.
name
=
op
.
name
+
'_matmul'
matmul_op
.
type
=
'MatMul'
matmul_op
.
input
.
extend
([
filter_name
,
wt_output_name
])
matmul_output_name
=
matmul_op
.
name
+
":0"
matmul_op
.
output
.
extend
([
matmul_output_name
])
matmul_output_shape
=
mace_pb2
.
OutputShape
()
matmul_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
matmul_op
.
output_shape
.
extend
([
matmul_output_shape
])
# Inverse transform
iwt_op
=
mace_pb2
.
OperatorDef
()
arg
=
iwt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
batch_arg
=
iwt_op
.
arg
.
add
()
batch_arg
.
name
=
'batch'
batch_arg
.
i
=
output_shape
[
0
]
height_arg
=
iwt_op
.
arg
.
add
()
height_arg
.
name
=
'height'
height_arg
.
i
=
output_shape
[
1
]
width_arg
=
iwt_op
.
arg
.
add
()
width_arg
.
name
=
'width'
width_arg
.
i
=
output_shape
[
2
]
iwt_op
.
name
=
op
.
name
+
'_inverse_transform'
iwt_op
.
type
=
'WinogradInverseTransform'
iwt_op
.
input
.
extend
([
matmul_output_name
])
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
op
.
name
]
)
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
iwt_op
.
input
.
extend
([
output_name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_act_arg
=
iwt_op
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
iwt_op
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
iwt_op
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
iwt_op
)
self
.
net_def
.
op
.
extend
([
wt_op
,
matmul_op
,
iwt_op
])
def
convert_conv_winograd_filter_cpu
(
self
,
op
,
op_def
):
weight_tensor
=
get_input_tensor
(
op
,
1
)
weight_tensor_value
=
weight_tensor
.
eval
().
astype
(
np
.
float32
)
input_shape
=
get_input_tensor
(
op
,
0
).
shape
.
as_list
()
output_channels
=
weight_tensor_value
.
shape
[
3
]
input_channels
=
weight_tensor_value
.
shape
[
2
]
# HWIO -> OIHW
weight_tensor_value
=
weight_tensor_value
.
transpose
(
3
,
2
,
0
,
1
)
if
input_shape
[
1
]
>
16
and
input_shape
[
2
]
>
16
:
G
=
np
.
array
([
[
1.0
,
0.0
,
0.0
],
[
-
2.0
/
9
,
-
2.0
/
9
,
-
2.0
/
9
],
[
-
2.0
/
9
,
2.0
/
9
,
-
2.0
/
9
],
[
1.0
/
90
,
1.0
/
45
,
2.0
/
45
],
[
1.0
/
90
,
-
1.0
/
45
,
2.0
/
45
],
[
1.0
/
45
,
1.0
/
90
,
1.0
/
180
],
[
1.0
/
45
,
-
1.0
/
90
,
1.0
/
180
],
[
0.0
,
0.0
,
1.0
]
],
dtype
=
np
.
float32
)
new_shape
=
[
64
,
output_channels
,
input_channels
]
# TOC
else
:
G
=
np
.
array
([
[
1.0
,
0.0
,
0.0
],
[
0.5
,
0.5
,
0.5
],
[
0.5
,
-
0.5
,
0.5
],
[
0.0
,
0.0
,
1.0
],
],
dtype
=
np
.
float32
)
new_shape
=
[
16
,
output_channels
,
input_channels
]
# TOC
new_weight_value
=
G
.
dot
(
weight_tensor_value
).
dot
(
G
.
T
)
# [t, O, I, t]
new_weight_value
=
new_weight_value
.
transpose
(
0
,
3
,
1
,
2
)
new_weight_value
=
new_weight_value
.
reshape
(
new_shape
)
new_tensor_name
=
weight_tensor
.
name
[:
-
2
]
+
'/winograd_transformed:0'
self
.
add_tensor
(
new_tensor_name
,
new_shape
,
tf
.
float32
,
new_weight_value
)
winograd_transformed_arg
=
op_def
.
arg
.
add
()
winograd_transformed_arg
.
name
=
'is_filter_transformed'
winograd_transformed_arg
.
i
=
1
self
.
unused_tensor
.
add
(
weight_tensor
.
name
)
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
input
.
extend
([
new_tensor_name
])
def
convert_conv2d
(
self
,
op
):
use_winograd
=
False
if
self
.
device
==
'cpu'
:
use_winograd
=
self
.
check_winograd_conv
(
op
)
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
if
op
.
type
==
'DepthwiseConv2dNative'
:
op_def
.
type
=
'DepthwiseConv2d'
else
:
op_def
.
type
=
op
.
type
if
self
.
device
==
'cpu'
and
not
use_winograd
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
elif
op
.
type
==
'Conv2D'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
op_def
.
type
==
'DepthwiseConv2d'
:
buffer_type
=
"DW_CONV2D_FILTER"
else
:
buffer_type
=
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
elif
self
.
device
==
'cpu'
and
use_winograd
:
self
.
convert_conv_winograd_filter_cpu
(
op
,
op_def
)
else
:
op_def
.
input
.
extend
(
[
get_input_tensor
(
op
,
i
).
name
for
i
in
range
(
len
(
op
.
inputs
))])
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
i
=
padding_mode
[
op
.
get_attr
(
'padding'
)]
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
(
op
.
get_attr
(
'strides'
)[
1
:
3
])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
or
\
(
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Add'
and
len
(
self
.
tf_graph
[
op
.
name
][
0
].
inputs
)
==
2
and
len
(
self
.
graph
.
get_tensor_by_name
(
self
.
tf_graph
[
op
.
name
][
0
].
inputs
[
1
].
name
).
shape
)
==
1
):
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
final_op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_deconv2d
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'Deconv2D'
out_shape_value
=
None
if
len
(
op
.
inputs
)
==
2
:
out_shape_value
=
op
.
get_attr
(
'output_shape'
)
if
self
.
device
==
'cpu'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
else
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
buffer_type
=
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
(
[
get_input_tensor
(
op
,
i
).
name
for
i
in
range
(
len
(
op
.
inputs
))])
elif
len
(
op
.
inputs
)
==
3
:
out_shape_value
=
\
get_input_tensor
(
op
,
0
).
eval
().
astype
(
np
.
int32
).
flat
self
.
unused_tensor
.
add
(
op
.
inputs
[
0
].
name
)
if
self
.
device
==
'cpu'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
2
,
3
,
0
,
1
)
else
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
2
,
3
)
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
2
].
name
])
buffer_type
=
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
op
.
inputs
[
2
].
name
])
op_def
.
input
.
extend
([
op
.
inputs
[
1
].
name
])
else
:
raise
Exception
(
'Too many inputs. Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
if
out_shape_value
is
not
None
:
out_shape_arg
=
op_def
.
arg
.
add
()
out_shape_arg
.
name
=
'output_shape'
out_shape_arg
.
ints
.
extend
(
out_shape_value
)
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
i
=
padding_mode
[
op
.
get_attr
(
'padding'
)]
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
(
op
.
get_attr
(
'strides'
)[
1
:
3
])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
final_op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
check_conv_to_fc
(
self
,
op
):
if
self
.
device
!=
'cpu'
or
op
.
type
!=
"Conv2D"
:
return
False
filter_shape
=
get_input_tensor
(
op
,
1
).
shape
.
as_list
()
input_shape
=
get_input_tensor
(
op
,
0
).
shape
.
as_list
()
return
input_shape
[
1
]
==
filter_shape
[
0
]
\
and
input_shape
[
2
]
==
filter_shape
[
1
]
\
and
(
op
.
get_attr
(
'padding'
)
==
'VALID'
or
filter_shape
[
0
]
==
1
and
filter_shape
[
1
]
==
1
)
def
convert_global_conv_to_fc
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'FC'
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
\
(
3
,
2
,
0
,
1
)
filter_shape
=
get_input_tensor
(
op
,
1
).
shape
.
as_list
()
self
.
reshape_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
\
[
filter_shape
[
3
],
filter_shape
[
2
]
*
filter_shape
[
1
]
*
filter_shape
[
0
],
1
,
1
]
op_def
.
input
.
extend
(
[
get_input_tensor
(
op
,
i
).
name
for
i
in
range
(
len
(
op
.
inputs
))])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
data_format_arg
.
s
=
'NCHW'
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
final_op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_fused_batchnorm
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
op_def
.
name
=
op
.
name
op_def
.
type
=
'FoldedBatchNorm'
gamma_tensor
=
get_input_tensor
(
op
,
1
)
for
i
in
range
(
1
,
5
):
input_tensor
=
get_input_tensor
(
op
,
i
)
assert
input_tensor
.
shape
==
gamma_tensor
.
shape
self
.
unused_tensor
.
add
(
input_tensor
.
name
)
gamma_value
=
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
float32
)
beta_value
=
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
float32
)
mean_value
=
get_input_tensor
(
op
,
3
).
eval
().
astype
(
np
.
float32
)
var_value
=
get_input_tensor
(
op
,
4
).
eval
().
astype
(
np
.
float32
)
epsilon_value
=
op
.
get_attr
(
'epsilon'
)
scale_value
=
((
1.0
/
np
.
vectorize
(
math
.
sqrt
)
(
var_value
+
epsilon_value
))
*
gamma_value
)
offset_value
=
(
-
mean_value
*
scale_value
)
+
beta_value
idx
=
gamma_tensor
.
name
.
rfind
(
'/'
)
name_prefix
=
gamma_tensor
.
name
[:
idx
]
+
'/'
input_names
=
[
name_prefix
+
'scale:0'
,
name_prefix
+
'offset:0'
]
self
.
add_tensor
(
input_names
[
0
],
gamma_value
.
shape
,
gamma_tensor
.
dtype
,
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
gamma_value
.
shape
,
gamma_tensor
.
dtype
,
offset_value
)
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
self
.
device
==
'gpu'
:
for
name
in
input_names
:
output_name
=
self
.
add_buffer_to_image
(
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
name
for
name
in
input_names
])
self
.
resolved_ops
[
op
.
name
]
=
1
final_op
=
op
if
len
(
self
.
tf_graph
[
op
.
name
])
==
1
\
and
self
.
tf_graph
[
op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
op
.
name
][
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
final_op
.
outputs
[
0
].
name
])
self
.
add_output_shape
([
final_op
.
outputs
[
0
]],
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_batchnorm
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
op_def
.
name
=
op
.
name
op_def
.
type
=
'FoldedBatchNorm'
add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
scale_tensor
=
get_input_tensor
(
op
,
1
)
offset_tensor
=
get_input_tensor
(
add_op
,
1
)
input_names
=
[
scale_tensor
.
name
,
offset_tensor
.
name
]
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
self
.
device
==
'gpu'
:
for
name
in
input_names
:
output_name
=
self
.
add_buffer_to_image
(
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
name
for
name
in
input_names
])
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
resolved_ops
[
add_op
.
name
]
=
1
final_op
=
add_op
if
len
(
self
.
tf_graph
[
op
.
name
])
==
1
\
and
self
.
tf_graph
[
op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
op
.
name
][
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
final_op
.
outputs
[
0
].
name
])
self
.
add_output_shape
([
final_op
.
outputs
[
0
]],
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_pooling
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'Pooling'
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
pooling_type_arg
=
op_def
.
arg
.
add
()
pooling_type_arg
.
name
=
'pooling_type'
pooling_type_arg
.
i
=
pooling_type_mode
[
op
.
type
]
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
i
=
padding_mode
[
op
.
get_attr
(
'padding'
)]
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
(
op
.
get_attr
(
'strides'
)[
1
:
3
])
kernels_arg
=
op_def
.
arg
.
add
()
kernels_arg
.
name
=
'kernels'
kernels_arg
.
ints
.
extend
(
op
.
get_attr
(
'ksize'
)[
1
:
3
])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_global_avg_pooling
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'Pooling'
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
pooling_type_arg
=
op_def
.
arg
.
add
()
pooling_type_arg
.
name
=
'pooling_type'
pooling_type_arg
.
i
=
pooling_type_mode
[
'AvgPool'
]
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
i
=
padding_mode
[
'VALID'
]
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
([
1
,
1
])
kernels_arg
=
op_def
.
arg
.
add
()
kernels_arg
.
name
=
'kernels'
kernels_arg
.
ints
.
extend
(
op
.
inputs
[
0
].
shape
.
as_list
()[
1
:
3
])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_activation
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'Activation'
activation_arg
=
op_def
.
arg
.
add
()
activation_arg
.
name
=
'activation'
activation_arg
.
s
=
activation_name_map
[
op
.
type
]
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_relu6
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'Activation'
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
activation_arg
=
op_def
.
arg
.
add
()
activation_arg
.
name
=
'activation'
activation_arg
.
s
=
"RELUX"
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_add
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"AddN"
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_concat
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"Concat"
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
[:
-
1
]])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
axis_arg
=
op_def
.
arg
.
add
()
axis_arg
.
name
=
'axis'
axis
=
get_input_tensor
(
op
,
len
(
op
.
inputs
)
-
1
).
eval
().
astype
(
np
.
int32
)
if
self
.
device
==
'cpu'
and
axis
==
3
:
axis
=
1
axis_arg
.
i
=
axis
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
len
(
op
.
inputs
)
-
1
).
name
)
def
convert_resize_bilinear
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"ResizeBilinear"
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'size'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'align_corners'
size_arg
.
i
=
op
.
get_attr
(
'align_corners'
)
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
1
).
name
)
def
convert_eltwise
(
self
,
op
,
math_type
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"Eltwise"
if
len
(
op
.
inputs
)
==
2
:
input_tensor0
=
get_input_tensor
(
op
,
0
)
input_tensor1
=
get_input_tensor
(
op
,
1
)
x_value
=
None
if
np
.
asarray
(
input_tensor1
.
shape
).
size
==
0
:
x_value
=
input_tensor1
.
eval
()
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
self
.
unused_tensor
.
add
(
input_tensor1
.
name
)
elif
np
.
asarray
(
input_tensor0
.
shape
).
size
==
0
:
x_value
=
input_tensor0
.
eval
()
op_def
.
input
.
extend
([
op
.
inputs
[
1
].
name
])
self
.
unused_tensor
.
add
(
input_tensor0
.
name
)
else
:
if
np
.
asarray
(
input_tensor0
.
shape
).
size
==
1
\
and
input_tensor0
.
op
.
type
==
'Const'
:
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
input_tensor0
.
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
input_tensor0
.
name
])
if
np
.
asarray
(
input_tensor1
.
shape
).
size
==
1
\
and
input_tensor1
.
op
.
type
==
'Const'
:
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
input_tensor1
.
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
input_tensor1
.
name
])
if
x_value
is
not
None
:
x_arg
=
op_def
.
arg
.
add
()
x_arg
.
name
=
'x'
x_arg
.
f
=
x_value
else
:
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
type_arg
=
op_def
.
arg
.
add
()
type_arg
.
name
=
'type'
type_arg
.
i
=
math_type_mode
[
math_type
]
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_depth_to_space
(
self
,
op
,
d2s
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
op
.
type
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'block_size'
size_arg
.
i
=
op
.
get_attr
(
'block_size'
)
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_bias_add
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"BiasAdd"
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
1
).
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_space_to_batch
(
self
,
op
,
b2s
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
op
.
type
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'block_shape'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
=
op_def
.
arg
.
add
()
if
b2s
:
size_arg
.
name
=
'crops'
else
:
size_arg
.
name
=
'paddings'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
)
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
1
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
2
).
name
)
def
is_atrous_conv2d
(
self
,
op
):
return
op
.
type
==
'SpaceToBatchND'
and
\
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
\
(
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Conv2D'
or
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'DepthwiseConv2dNative'
)
def
convert_atrous_conv2d
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
conv_op
=
self
.
tf_graph
[
op
.
name
][
0
]
op_def
.
name
=
conv_op
.
name
if
conv_op
.
type
==
'DepthwiseConv2dNative'
:
op_def
.
type
=
'DepthwiseConv2d'
else
:
op_def
.
type
=
conv_op
.
type
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
op_def
.
type
==
'DepthwiseConv2d'
:
buffer_type
=
"DW_CONV2D_FILTER"
else
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
conv_op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
buffer_type
=
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
conv_op
,
1
).
name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
conv_op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
0
).
name
])
op_def
.
input
.
extend
([
get_input_tensor
(
conv_op
,
1
).
name
])
dilation_arg
=
op_def
.
arg
.
add
()
dilation_arg
.
name
=
'dilations'
dilation_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_values
=
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
if
len
(
padding_values
)
>
0
and
padding_values
[
0
]
>
0
:
padding_arg
.
i
=
padding_mode
[
'SAME'
]
else
:
padding_arg
.
i
=
padding_mode
[
'VALID'
]
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
1
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
2
).
name
)
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
([
1
,
1
])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
final_op
=
conv_op
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
resolved_ops
[
conv_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
]
)
==
1
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BatchToSpaceND'
:
final_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
self
.
resolved_ops
[
final_op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
final_op
,
1
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
final_op
,
2
).
name
)
else
:
raise
Exception
(
'Convert atrous conv error: no BatchToSpaceND op'
)
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'Relu'
:
relu_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_relu_arg
=
op_def
.
arg
.
add
()
fused_relu_arg
.
name
=
'activation'
fused_relu_arg
.
s
=
"RELU"
final_op
=
relu_op
self
.
resolved_ops
[
relu_op
.
name
]
=
1
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
is_softmax
(
self
,
op
):
return
op
.
type
==
'Softmax'
and
\
len
(
self
.
tf_parents
[
op
.
name
])
==
1
and
\
self
.
tf_parents
[
op
.
name
][
0
].
type
==
'Reshape'
and
\
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Reshape'
def
convert_softmax
(
self
,
softmax_op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
# deal with first Reshape op
parent_reshape_op
=
self
.
tf_parents
[
softmax_op
.
name
][
0
]
self
.
unused_tensor
.
add
(
get_input_tensor
(
parent_reshape_op
,
1
).
name
)
self
.
resolved_ops
[
parent_reshape_op
.
name
]
=
1
# FIXME: hardcode for inception_v3
# remove squeeze if exist
squeeze_op
=
self
.
tf_parents
[
parent_reshape_op
.
name
][
0
]
if
squeeze_op
.
type
==
'Squeeze'
:
op_def
.
input
.
extend
([
squeeze_op
.
inputs
[
0
].
name
])
self
.
resolved_ops
[
squeeze_op
.
name
]
=
1
# remove shape if exist
children_ops
=
self
.
tf_graph
[
squeeze_op
.
name
]
print
children_ops
if
len
(
children_ops
)
>
1
and
children_ops
[
0
].
type
==
'Shape'
:
self
.
unused_tensor
.
add
(
get_input_tensor
(
children_ops
[
1
],
0
).
name
)
self
.
resolved_ops
[
children_ops
[
1
].
name
]
=
1
else
:
op_def
.
input
.
extend
([
parent_reshape_op
.
inputs
[
0
].
name
])
# deal with Softmax op
op_def
.
name
=
softmax_op
.
name
op_def
.
type
=
softmax_op
.
type
self
.
resolved_ops
[
softmax_op
.
name
]
=
1
# deal with last Reshape op
reshape_op
=
self
.
tf_graph
[
softmax_op
.
name
][
0
]
self
.
unused_tensor
.
add
(
get_input_tensor
(
reshape_op
,
1
).
name
)
shape
=
[
dim
.
value
for
dim
in
reshape_op
.
outputs
[
0
].
shape
]
if
len
(
shape
)
==
2
:
shape
=
[
1
,
1
,
shape
[
0
],
shape
[
1
]]
op_def
.
output
.
extend
([
output
.
name
for
output
in
reshape_op
.
outputs
])
self
.
add_output_shape
([
shape
],
op_def
)
self
.
resolved_ops
[
reshape_op
.
name
]
=
1
def
convert_pad
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"Pad"
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
paddings_arg
=
op_def
.
arg
.
add
()
paddings_arg
.
name
=
'paddings'
if
self
.
device
==
'gpu'
:
paddings_value
=
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
)
else
:
paddings_value
=
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
)
paddings_value
=
paddings_value
[[
0
,
3
,
1
,
2
]]
paddings_arg
.
ints
.
extend
(
paddings_value
.
flat
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
1
).
name
)
if
len
(
op
.
inputs
)
==
3
:
constant_value_arg
=
op_def
.
arg
.
add
()
constant_value_arg
.
name
=
'constant_value'
constant_value_arg
.
i
=
\
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
[
0
]
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
2
).
name
)
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_normal_op
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
op
.
type
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert
(
self
,
input_nodes
,
output_nodes
):
if
self
.
device
==
'gpu'
:
self
.
add_gpu_input_transform
(
input_nodes
)
if
self
.
device
==
'cpu'
:
self
.
add_cpu_input_transform
(
input_nodes
)
for
op
in
self
.
tf_ops
:
if
self
.
resolved_ops
[
op
.
name
]
==
1
:
continue
if
op
.
type
in
[
'Placeholder'
,
'Identity'
]:
self
.
resolved_ops
[
op
.
name
]
=
1
pass
elif
op
.
type
==
'Const'
:
pass
elif
op
.
type
==
'Reshape'
:
self
.
convert_reshape
(
op
)
elif
self
.
is_atrous_conv2d
(
op
):
self
.
convert_atrous_conv2d
(
op
)
elif
self
.
check_conv_to_fc
(
op
):
self
.
convert_global_conv_to_fc
(
op
)
elif
op
.
type
==
'Conv2D'
or
op
.
type
==
'DepthwiseConv2dNative'
:
if
self
.
device
==
'gpu'
and
self
.
check_winograd_conv
(
op
):
self
.
convert_winograd_conv_gpu
(
op
)
else
:
self
.
convert_conv2d
(
op
)
elif
op
.
type
==
'Conv2DBackpropInput'
:
self
.
convert_deconv2d
(
op
)
elif
op
.
type
==
'FusedBatchNorm'
:
self
.
convert_fused_batchnorm
(
op
)
elif
op
.
type
==
'Mul'
and
op
.
name
.
find
(
'batchnorm/mul'
)
!=
-
1
:
self
.
convert_batchnorm
(
op
)
elif
op
.
type
==
'AvgPool'
or
op
.
type
==
'MaxPool'
:
self
.
convert_pooling
(
op
)
elif
op
.
type
==
'Relu6'
:
self
.
convert_relu6
(
op
)
elif
op
.
type
==
'Add'
:
if
len
(
op
.
inputs
)
>
2
:
self
.
convert_add
(
op
)
else
:
self
.
convert_eltwise
(
op
,
'ADD'
)
elif
op
.
type
==
'ConcatV2'
:
self
.
convert_concat
(
op
)
elif
op
.
type
==
'ResizeBilinear'
:
self
.
convert_resize_bilinear
(
op
)
elif
op
.
type
==
'BiasAdd'
:
self
.
convert_bias_add
(
op
)
elif
op
.
type
==
'SpaceToBatchND'
:
self
.
convert_space_to_batch
(
op
,
False
)
elif
op
.
type
==
'BatchToSpaceND'
:
self
.
convert_space_to_batch
(
op
,
True
)
elif
op
.
type
==
'DepthToSpace'
:
self
.
convert_depth_to_space
(
op
,
True
)
elif
op
.
type
==
'SpaceToDepth'
:
self
.
convert_depth_to_space
(
op
,
False
)
elif
op
.
type
in
[
'Neg'
,
'neg'
,
'Negative'
,
'negative'
]:
self
.
convert_eltwise
(
op
,
'NEG'
)
elif
op
.
type
in
[
'RealDiv'
,
'Div'
]:
self
.
convert_eltwise
(
op
,
'DIV'
)
elif
op
.
type
in
[
'SquaredDifference'
]:
self
.
convert_eltwise
(
op
,
'SQR_DIFF'
)
elif
op
.
type
in
[
'Pow'
]:
self
.
convert_eltwise
(
op
,
'POW'
)
elif
op
.
type
==
'Mul'
:
self
.
convert_eltwise
(
op
,
'MUL'
)
elif
op
.
type
==
'Sub'
:
self
.
convert_eltwise
(
op
,
'SUB'
)
elif
self
.
is_softmax
(
op
):
self
.
convert_softmax
(
op
)
elif
op
.
type
in
[
'Relu'
,
'Sigmoid'
,
'Tanh'
]:
self
.
convert_activation
(
op
)
# FIXME: hardcode for inception_v3
elif
op
.
type
in
[
'Squeeze'
,
'Shape'
]:
self
.
resolved_ops
[
op
.
name
]
=
1
elif
op
.
type
==
'Mean'
:
# Global avg pooling
reduce_dims
=
op
.
inputs
[
1
].
eval
()
if
reduce_dims
[
0
]
==
1
and
reduce_dims
[
1
]
==
2
:
self
.
convert_global_avg_pooling
(
op
)
self
.
unused_tensor
.
add
(
op
.
inputs
[
1
].
name
)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
elif
op
.
type
==
'Pad'
:
self
.
convert_pad
(
op
)
# elif op.type in ['']:
# self.convert_normal_op(op)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
for
op
in
self
.
tf_ops
:
if
self
.
resolved_ops
[
op
.
name
]
==
1
:
continue
elif
op
.
type
==
'Const'
:
self
.
convert_tensor
(
op
)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
if
self
.
device
==
'gpu'
:
self
.
add_gpu_output_transform
(
output_nodes
)
if
self
.
device
==
'cpu'
:
self
.
add_cpu_output_transform
(
output_nodes
)
for
key
in
self
.
resolved_ops
:
if
self
.
resolved_ops
[
key
]
!=
1
:
print
'Unresolve Op: %s'
%
key
class
Optimizer
:
def
__init__
(
self
,
net_def
,
device
):
self
.
net_def
=
net_def
self
.
device
=
device
self
.
mace_graph
=
{}
self
.
tensor_map
=
{}
for
op
in
net_def
.
op
:
for
input_name
in
op
.
input
:
if
input_name
not
in
self
.
mace_graph
:
self
.
mace_graph
[
input_name
]
=
[]
self
.
mace_graph
[
input_name
].
append
(
op
)
for
tensor
in
net_def
.
tensors
:
self
.
tensor_map
[
tensor
.
name
]
=
tensor
def
get_buffer_tensor_name
(
self
,
name
):
if
self
.
device
==
'gpu'
:
return
name
[:
-
6
]
+
name
[
-
2
:]
else
:
return
name
def
fold_batch_norm
(
self
):
unused_tensors
=
set
()
new_tensors
=
[]
new_net
=
mace_pb2
.
NetDef
()
resolved_ops
=
set
()
for
op
in
self
.
net_def
.
op
:
if
op
.
name
in
resolved_ops
:
pass
elif
op
.
type
==
'DepthwiseConv2d'
and
len
(
op
.
output
)
==
1
and
\
self
.
mace_graph
[
op
.
output
[
0
]][
0
].
type
==
'FoldedBatchNorm'
:
depthwise_conv2d_op
=
op
folded_bn_op
=
self
.
mace_graph
[
op
.
output
[
0
]][
0
]
weight_buffer_name
=
self
.
get_buffer_tensor_name
(
depthwise_conv2d_op
.
input
[
1
])
weight_tensor
=
self
.
tensor_map
[
weight_buffer_name
]
scale_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
1
])
offset_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
2
])
scale_tensor
=
self
.
tensor_map
[
scale_buffer_name
]
weight_shape
=
weight_tensor
.
dims
idx
=
0
if
self
.
device
==
'cpu'
:
# OIHW
for
oc
in
range
(
weight_shape
[
0
]):
for
ic
in
range
(
weight_shape
[
1
]):
for
i
in
range
(
weight_shape
[
2
]):
for
j
in
range
(
weight_shape
[
3
]):
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
0
]
+
oc
]
idx
+=
1
else
:
# HWIO
for
i
in
range
(
weight_shape
[
0
]):
for
j
in
range
(
weight_shape
[
1
]):
for
ic
in
range
(
weight_shape
[
2
]):
for
oc
in
range
(
weight_shape
[
3
]):
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
3
]
+
oc
]
idx
+=
1
new_tensors
.
append
(
weight_tensor
)
unused_tensors
.
add
(
weight_tensor
.
name
)
unused_tensors
.
add
(
scale_tensor
.
name
)
if
self
.
device
==
'gpu'
:
scale_b2i_op
=
self
.
mace_graph
[
scale_buffer_name
][
0
]
offset_b2i_op
=
self
.
mace_graph
[
offset_buffer_name
][
0
]
resolved_ops
.
add
(
scale_b2i_op
.
name
)
resolved_ops
.
add
(
offset_b2i_op
.
name
)
new_net
.
op
.
extend
([
offset_b2i_op
])
resolved_ops
.
add
(
depthwise_conv2d_op
.
name
)
resolved_ops
.
add
(
folded_bn_op
.
name
)
offset_tensor_name
=
folded_bn_op
.
input
[
2
]
depthwise_conv2d_op
.
input
.
extend
([
offset_tensor_name
])
for
arg
in
folded_bn_op
.
arg
:
if
arg
.
name
==
'activation'
:
act_arg
=
depthwise_conv2d_op
.
arg
.
add
()
act_arg
.
name
=
arg
.
name
act_arg
.
s
=
arg
.
s
elif
arg
.
name
==
'max_limit'
:
act_arg
=
depthwise_conv2d_op
.
arg
.
add
()
act_arg
.
name
=
arg
.
name
act_arg
.
f
=
arg
.
f
depthwise_conv2d_op
.
output
[
0
]
=
folded_bn_op
.
output
[
0
]
new_net
.
op
.
extend
([
depthwise_conv2d_op
])
else
:
new_net
.
op
.
extend
([
op
])
for
tensor
in
self
.
net_def
.
tensors
:
if
tensor
.
name
in
unused_tensors
:
pass
else
:
new_net
.
tensors
.
extend
([
tensor
])
for
tensor
in
new_tensors
:
new_net
.
tensors
.
extend
([
tensor
])
return
new_net
def
optimize
(
self
):
new_net
=
self
.
fold_batch_norm
()
return
new_net
def
add_shape_info
(
input_graph_def
,
input_nodes
,
input_shapes
):
inputs_replaced_graph
=
graph_pb2
.
GraphDef
()
for
node
in
input_graph_def
.
node
:
if
node
.
name
in
input_nodes
:
idx
=
input_nodes
.
index
(
node
.
name
)
input_shape
=
input_shapes
[
idx
]
placeholder_node
=
copy
.
deepcopy
(
node
)
placeholder_node
.
attr
.
clear
()
placeholder_node
.
attr
[
'shape'
].
shape
.
dim
.
extend
([
tensor_shape_pb2
.
TensorShapeProto
.
Dim
(
size
=
i
)
for
i
in
input_shape
])
placeholder_node
.
attr
[
'dtype'
].
CopyFrom
(
node
.
attr
[
'dtype'
])
inputs_replaced_graph
.
node
.
extend
([
placeholder_node
])
else
:
inputs_replaced_graph
.
node
.
extend
([
copy
.
deepcopy
(
node
)])
return
inputs_replaced_graph
def
convert_to_mace_pb
(
model_file
,
input_node
,
input_shape
,
output_node
,
data_type
,
device
,
winograd
):
net_def
=
mace_pb2
.
NetDef
()
dt
=
data_type_map
[
data_type
]
input_graph_def
=
tf
.
GraphDef
()
with
gfile
.
Open
(
model_file
,
"rb"
)
as
f
:
data
=
f
.
read
()
input_graph_def
.
ParseFromString
(
data
)
input_nodes
=
[
x
for
x
in
input_node
.
split
(
','
)]
input_shapes
=
[]
if
input_shape
!=
""
:
input_shape_strs
=
[
x
for
x
in
input_shape
.
split
(
':'
)]
for
shape_str
in
input_shape_strs
:
input_shapes
.
extend
([[
int
(
x
)
for
x
in
shape_str
.
split
(
','
)]])
output_nodes
=
[
x
for
x
in
output_node
.
split
(
','
)]
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
input_graph_def
=
add_shape_info
(
input_graph_def
,
input_nodes
,
input_shapes
)
with
tf
.
Session
()
as
session
:
with
session
.
graph
.
as_default
()
as
graph
:
tf
.
import_graph_def
(
input_graph_def
,
name
=
""
)
ops
=
graph
.
get_operations
()
converter
=
TFConverter
(
graph
,
ops
,
net_def
,
dt
,
device
,
winograd
)
converter
.
convert
(
input_nodes
,
output_nodes
)
optimizer
=
Optimizer
(
net_def
,
device
)
net_def
=
optimizer
.
optimize
()
print
"Model Converted."
if
device
==
'gpu'
:
print
"start optimize memory."
memory_optimizer
.
optimize_gpu_memory
(
net_def
)
print
"Memory optimization done."
elif
device
==
'cpu'
:
print
"start optimize memory."
memory_optimizer
.
optimize_cpu_memory
(
net_def
)
print
"Memory optimization done."
return
net_def
mace/test/mace_api_mt_test.cc
浏览文件 @
3e82ad67
...
...
@@ -152,7 +152,7 @@ void CheckOutputs(const NetDef &net_def,
memcpy
(
input_data
.
data
(),
input
.
second
.
data
().
get
(),
data_size
*
sizeof
(
float
));
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
input
.
first
,
":0"
);
input
.
first
);
net
.
AddInputFromArray
<
D
,
float
>
(
input_name
,
input
.
second
.
shape
(),
input_data
);
}
...
...
@@ -181,7 +181,7 @@ void CheckOutputs(const NetDef &net_def,
float
*
data
=
tmp_tensor
->
mutable_data
<
float
>
();
memcpy
(
data
,
output
.
second
.
data
().
get
(),
data_size
*
sizeof
(
float
));
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
output
.
first
,
":0"
);
output
.
first
);
ops
::
test
::
ExpectTensorNear
<
float
>
(
*
tmp_tensor
,
*
net
.
GetOutput
(
output_name
.
data
()),
1e-5
);
...
...
@@ -265,7 +265,7 @@ void MaceRunFunc(const int in_out_size) {
for
(
size_t
i
=
0
;
i
<
input_names
.
size
();
++
i
)
{
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
input_names
[
i
]
,
":0"
);
input_names
[
i
]);
BufferToImage
<
half
>
(
input_name
,
input_names
[
i
],
mace
::
kernels
::
IN_OUT_CHANNEL
,
{
mem_map
[
input_names
[
i
]]},
...
...
@@ -281,7 +281,7 @@ void MaceRunFunc(const int in_out_size) {
}
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
++
i
)
{
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
output_names
[
i
]
,
":0"
);
output_names
[
i
]);
ImageToBuffer
<
float
>
(
output_names
[
i
],
output_name
,
mace
::
kernels
::
IN_OUT_CHANNEL
,
&
net_def
);
}
...
...
mace/test/mace_api_test.cc
浏览文件 @
3e82ad67
...
...
@@ -162,7 +162,7 @@ void CheckOutputs(const NetDef &net_def,
memcpy
(
input_data
.
data
(),
input
.
second
.
data
().
get
(),
data_size
*
sizeof
(
float
));
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
input
.
first
,
":0"
);
input
.
first
);
net
.
AddInputFromArray
<
D
,
float
>
(
input_name
,
input
.
second
.
shape
(),
input_data
);
}
...
...
@@ -191,7 +191,7 @@ void CheckOutputs(const NetDef &net_def,
float
*
data
=
tmp_tensor
->
mutable_data
<
float
>
();
memcpy
(
data
,
output
.
second
.
data
().
get
(),
data_size
*
sizeof
(
float
));
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
output
.
first
,
":0"
);
output
.
first
);
ops
::
test
::
ExpectTensorNear
<
float
>
(
*
tmp_tensor
,
*
net
.
GetOutput
(
output_name
.
data
()),
1e-5
);
...
...
@@ -275,7 +275,7 @@ void MaceRun(const int in_out_size,
for
(
size_t
i
=
0
;
i
<
input_names
.
size
();
++
i
)
{
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
input_names
[
i
]
,
":0"
);
input_names
[
i
]);
BufferToImage
<
half
>
(
input_name
,
input_names
[
i
],
mace
::
kernels
::
IN_OUT_CHANNEL
,
{
mem_map
[
input_names
[
i
]]},
...
...
@@ -291,7 +291,7 @@ void MaceRun(const int in_out_size,
}
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
++
i
)
{
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
output_names
[
i
]
,
":0"
);
output_names
[
i
]);
ImageToBuffer
<
float
>
(
output_names
[
i
],
output_name
,
mace
::
kernels
::
IN_OUT_CHANNEL
,
&
net_def
);
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录