Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
c3837858
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
c3837858
编写于
5月 15, 2018
作者:
刘
刘琦
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'transform' into 'master'
Refactor model converter and transformer See merge request !477
上级
04f7a34a
3e82ad67
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
2411 addition
and
2802 deletion
+2411
-2802
mace/core/mace.cc
mace/core/mace.cc
+5
-5
mace/ops/fully_connected.cc
mace/ops/fully_connected.cc
+3
-3
mace/ops/fully_connected_benchmark.cc
mace/ops/fully_connected_benchmark.cc
+2
-2
mace/ops/fully_connected_test.cc
mace/ops/fully_connected_test.cc
+6
-6
mace/proto/mace.proto
mace/proto/mace.proto
+1
-0
mace/python/tools/BUILD
mace/python/tools/BUILD
+14
-22
mace/python/tools/caffe_converter_lib.py
mace/python/tools/caffe_converter_lib.py
+0
-1213
mace/python/tools/convert_util.py
mace/python/tools/convert_util.py
+6
-0
mace/python/tools/converter.py
mace/python/tools/converter.py
+72
-16
mace/python/tools/converter_tool/__init__.py
mace/python/tools/converter_tool/__init__.py
+0
-0
mace/python/tools/converter_tool/base_converter.py
mace/python/tools/converter_tool/base_converter.py
+259
-0
mace/python/tools/converter_tool/caffe_converter.py
mace/python/tools/converter_tool/caffe_converter.py
+508
-0
mace/python/tools/converter_tool/shape_inference.py
mace/python/tools/converter_tool/shape_inference.py
+149
-0
mace/python/tools/converter_tool/tensorflow_converter.py
mace/python/tools/converter_tool/tensorflow_converter.py
+442
-0
mace/python/tools/converter_tool/transformer.py
mace/python/tools/converter_tool/transformer.py
+914
-0
mace/python/tools/memory_optimizer.py
mace/python/tools/memory_optimizer.py
+10
-4
mace/python/tools/source_converter_lib.py
mace/python/tools/source_converter_lib.py
+12
-1
mace/python/tools/tf_converter_lib.py
mace/python/tools/tf_converter_lib.py
+0
-1522
mace/test/mace_api_mt_test.cc
mace/test/mace_api_mt_test.cc
+4
-4
mace/test/mace_api_test.cc
mace/test/mace_api_test.cc
+4
-4
未找到文件。
mace/core/mace.cc
浏览文件 @
c3837858
...
@@ -119,11 +119,11 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
...
@@ -119,11 +119,11 @@ MaceEngine::Impl::Impl(const NetDef *net_def,
LOG
(
INFO
)
<<
"MACE version: "
<<
MaceVersion
();
LOG
(
INFO
)
<<
"MACE version: "
<<
MaceVersion
();
// Set storage path for internal usage
// Set storage path for internal usage
for
(
auto
input_name
:
input_nodes
)
{
for
(
auto
input_name
:
input_nodes
)
{
ws_
->
CreateTensor
(
MakeString
(
"mace_input_node_"
,
input_name
,
":0"
),
ws_
->
CreateTensor
(
MakeString
(
"mace_input_node_"
,
input_name
),
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
}
}
for
(
auto
output_name
:
output_nodes
)
{
for
(
auto
output_name
:
output_nodes
)
{
ws_
->
CreateTensor
(
MakeString
(
"mace_output_node_"
,
output_name
,
":0"
),
ws_
->
CreateTensor
(
MakeString
(
"mace_output_node_"
,
output_name
),
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
GetDeviceAllocator
(
device_type_
),
DT_FLOAT
);
}
}
#ifdef MACE_ENABLE_HEXAGON
#ifdef MACE_ENABLE_HEXAGON
...
@@ -182,7 +182,7 @@ MaceStatus MaceEngine::Impl::Run(
...
@@ -182,7 +182,7 @@ MaceStatus MaceEngine::Impl::Run(
"The Inputs' shape must be 4-dimension with NHWC format,"
"The Inputs' shape must be 4-dimension with NHWC format,"
" please use 1 to fill missing dimensions"
);
" please use 1 to fill missing dimensions"
);
Tensor
*
input_tensor
=
Tensor
*
input_tensor
=
ws_
->
GetTensor
(
MakeString
(
"mace_input_node_"
,
input
.
first
,
":0"
));
ws_
->
GetTensor
(
MakeString
(
"mace_input_node_"
,
input
.
first
));
input_tensor
->
Resize
(
input
.
second
.
shape
());
input_tensor
->
Resize
(
input
.
second
.
shape
());
{
{
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
Tensor
::
MappingGuard
input_guard
(
input_tensor
);
...
@@ -199,7 +199,7 @@ MaceStatus MaceEngine::Impl::Run(
...
@@ -199,7 +199,7 @@ MaceStatus MaceEngine::Impl::Run(
" please use 1 to fill missing dimensions"
);
" please use 1 to fill missing dimensions"
);
}
}
Tensor
*
output_tensor
=
Tensor
*
output_tensor
=
ws_
->
GetTensor
(
MakeString
(
"mace_output_node_"
,
output
.
first
+
":0"
));
ws_
->
GetTensor
(
MakeString
(
"mace_output_node_"
,
output
.
first
));
output_tensors
.
push_back
(
output_tensor
);
output_tensors
.
push_back
(
output_tensor
);
}
}
#ifdef MACE_ENABLE_HEXAGON
#ifdef MACE_ENABLE_HEXAGON
...
@@ -223,7 +223,7 @@ MaceStatus MaceEngine::Impl::Run(
...
@@ -223,7 +223,7 @@ MaceStatus MaceEngine::Impl::Run(
#endif
#endif
for
(
auto
&
output
:
*
outputs
)
{
for
(
auto
&
output
:
*
outputs
)
{
Tensor
*
output_tensor
=
Tensor
*
output_tensor
=
ws_
->
GetTensor
(
MakeString
(
"mace_output_node_"
,
output
.
first
+
":0"
));
ws_
->
GetTensor
(
MakeString
(
"mace_output_node_"
,
output
.
first
));
// save output
// save output
if
(
output_tensor
!=
nullptr
&&
output
.
second
.
data
()
!=
nullptr
)
{
if
(
output_tensor
!=
nullptr
&&
output
.
second
.
data
()
!=
nullptr
)
{
Tensor
::
MappingGuard
output_guard
(
output_tensor
);
Tensor
::
MappingGuard
output_guard
(
output_tensor
);
...
...
mace/ops/fully_connected.cc
浏览文件 @
c3837858
...
@@ -18,20 +18,20 @@ namespace mace {
...
@@ -18,20 +18,20 @@ namespace mace {
namespace
ops
{
namespace
ops
{
void
Register_FullyConnected
(
OperatorRegistry
*
op_registry
)
{
void
Register_FullyConnected
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
C
"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
ullyConnected
"
)
.
Device
(
DeviceType
::
CPU
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
.
Build
(),
FullyConnectedOp
<
DeviceType
::
CPU
,
float
>
);
FullyConnectedOp
<
DeviceType
::
CPU
,
float
>
);
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
C
"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
ullyConnected
"
)
.
Device
(
DeviceType
::
GPU
)
.
Device
(
DeviceType
::
GPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
.
Build
(),
FullyConnectedOp
<
DeviceType
::
GPU
,
float
>
);
FullyConnectedOp
<
DeviceType
::
GPU
,
float
>
);
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
C
"
)
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"F
ullyConnected
"
)
.
Device
(
DeviceType
::
GPU
)
.
Device
(
DeviceType
::
GPU
)
.
TypeConstraint
<
half
>
(
"T"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
.
Build
(),
...
...
mace/ops/fully_connected_benchmark.cc
浏览文件 @
c3837858
...
@@ -37,7 +37,7 @@ void FCBenchmark(
...
@@ -37,7 +37,7 @@ void FCBenchmark(
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
out_channel
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
out_channel
});
if
(
D
==
DeviceType
::
CPU
)
{
if
(
D
==
DeviceType
::
CPU
)
{
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
.
Input
(
"Weight"
)
.
Input
(
"Weight"
)
.
Input
(
"Bias"
)
.
Input
(
"Bias"
)
...
@@ -52,7 +52,7 @@ void FCBenchmark(
...
@@ -52,7 +52,7 @@ void FCBenchmark(
BufferToImage
<
D
,
T
>
(
&
net
,
"Bias"
,
"BiasImage"
,
BufferToImage
<
D
,
T
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"BiasImage"
)
.
Input
(
"BiasImage"
)
...
...
mace/ops/fully_connected_test.cc
浏览文件 @
c3837858
...
@@ -42,7 +42,7 @@ void Simple(const std::vector<index_t> &input_shape,
...
@@ -42,7 +42,7 @@ void Simple(const std::vector<index_t> &input_shape,
if
(
D
==
DeviceType
::
CPU
)
{
if
(
D
==
DeviceType
::
CPU
)
{
net
.
Transpose2D
<
D
,
float
>
(
"Weight"
,
"WeightTranspose"
);
net
.
Transpose2D
<
D
,
float
>
(
"Weight"
,
"WeightTranspose"
);
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
.
Input
(
"Weight"
)
.
Input
(
"Weight"
)
.
Input
(
"Bias"
)
.
Input
(
"Bias"
)
...
@@ -59,7 +59,7 @@ void Simple(const std::vector<index_t> &input_shape,
...
@@ -59,7 +59,7 @@ void Simple(const std::vector<index_t> &input_shape,
BufferToImage
<
D
,
float
>
(
&
net
,
"Bias"
,
"BiasImage"
,
BufferToImage
<
D
,
float
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"BiasImage"
)
.
Input
(
"BiasImage"
)
...
@@ -142,7 +142,7 @@ void Complex(const index_t batch,
...
@@ -142,7 +142,7 @@ void Complex(const index_t batch,
"Weight"
,
{
out_channel
,
height
*
width
*
channels
});
"Weight"
,
{
out_channel
,
height
*
width
*
channels
});
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Bias"
,
{
out_channel
});
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Bias"
,
{
out_channel
});
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
.
Input
(
"Weight"
)
.
Input
(
"Weight"
)
.
Input
(
"Bias"
)
.
Input
(
"Bias"
)
...
@@ -166,7 +166,7 @@ void Complex(const index_t batch,
...
@@ -166,7 +166,7 @@ void Complex(const index_t batch,
BufferToImage
<
DeviceType
::
GPU
,
float
>
(
&
net
,
"Bias"
,
"BiasImage"
,
BufferToImage
<
DeviceType
::
GPU
,
float
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"BiasImage"
)
.
Input
(
"BiasImage"
)
...
@@ -231,7 +231,7 @@ void TestWXFormat(const index_t batch,
...
@@ -231,7 +231,7 @@ void TestWXFormat(const index_t batch,
"Weight"
,
{
out_channel
,
height
*
width
*
channels
});
"Weight"
,
{
out_channel
,
height
*
width
*
channels
});
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Bias"
,
{
out_channel
});
net
.
AddRandomInput
<
DeviceType
::
GPU
,
float
>
(
"Bias"
,
{
out_channel
});
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"Input"
)
.
Input
(
"Input"
)
.
Input
(
"Weight"
)
.
Input
(
"Weight"
)
.
Input
(
"Bias"
)
.
Input
(
"Bias"
)
...
@@ -255,7 +255,7 @@ void TestWXFormat(const index_t batch,
...
@@ -255,7 +255,7 @@ void TestWXFormat(const index_t batch,
BufferToImage
<
DeviceType
::
GPU
,
T
>
(
&
net
,
"Bias"
,
"BiasImage"
,
BufferToImage
<
DeviceType
::
GPU
,
T
>
(
&
net
,
"Bias"
,
"BiasImage"
,
kernels
::
BufferType
::
ARGUMENT
);
kernels
::
BufferType
::
ARGUMENT
);
OpDefBuilder
(
"F
C
"
,
"FullyConnectedTest"
)
OpDefBuilder
(
"F
ullyConnected
"
,
"FullyConnectedTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"InputImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"WeightImage"
)
.
Input
(
"BiasImage"
)
.
Input
(
"BiasImage"
)
...
...
mace/proto/mace.proto
浏览文件 @
c3837858
...
@@ -10,6 +10,7 @@ enum NetMode {
...
@@ -10,6 +10,7 @@ enum NetMode {
enum
DeviceType
{
enum
DeviceType
{
CPU
=
0
;
// In default, we will use CPU.
CPU
=
0
;
// In default, we will use CPU.
GPU
=
2
;
GPU
=
2
;
HEXAGON
=
3
;
}
}
enum
DataType
{
enum
DataType
{
...
...
mace/python/tools/BUILD
浏览文件 @
c3837858
py_library
(
py_library
(
name
=
"
tf_
converter_lib"
,
name
=
"converter_lib"
,
srcs
=
[
srcs
=
[
"convert_util.py"
,
"convert_util.py"
,
"graph_util.py"
,
"graph_util.py"
,
"tf_converter_lib.py"
,
"tf_dsp_converter_lib.py"
,
"tf_dsp_converter_lib.py"
,
"converter_tool/base_converter.py"
,
"converter_tool/shape_inference.py"
,
"converter_tool/tensorflow_converter.py"
,
"converter_tool/caffe_converter.py"
,
"converter_tool/transformer.py"
,
],
],
srcs_version
=
"PY2AND3"
,
srcs_version
=
"PY2AND3"
,
deps
=
[
deps
=
[
":memory_optimizer"
,
":memory_optimizer"
,
"//mace/proto:mace_py"
,
"//mace/proto:mace_py"
,
],
)
py_library
(
name
=
"caffe_converter_lib"
,
srcs
=
[
"caffe_converter_lib.py"
,
],
srcs_version
=
"PY2AND3"
,
deps
=
[
":memory_optimizer"
,
"//mace/third_party/caffe:caffe_py"
,
"//mace/third_party/caffe:caffe_py"
,
],
],
)
)
...
@@ -37,22 +30,21 @@ py_library(
...
@@ -37,22 +30,21 @@ py_library(
)
)
py_binary
(
py_binary
(
name
=
"
convert
er"
,
name
=
"
memory_optimiz
er"
,
srcs
=
[
"
convert
er.py"
],
srcs
=
[
"
memory_optimiz
er.py"
],
srcs_version
=
"PY2AND3"
,
srcs_version
=
"PY2AND3"
,
deps
=
[
deps
=
[
":caffe_converter_lib"
,
"//mace/proto:mace_py"
,
":source_converter_lib"
,
":tf_converter_lib"
,
"@six_archive//:six"
,
],
],
)
)
py_binary
(
py_binary
(
name
=
"
memory_optimiz
er"
,
name
=
"
convert
er"
,
srcs
=
[
"
memory_optimiz
er.py"
],
srcs
=
[
"
convert
er.py"
],
srcs_version
=
"PY2AND3"
,
srcs_version
=
"PY2AND3"
,
deps
=
[
deps
=
[
"//mace/proto:mace_py"
,
":converter_lib"
,
":source_converter_lib"
,
"@six_archive//:six"
,
],
],
)
)
mace/python/tools/caffe_converter_lib.py
已删除
100644 → 0
浏览文件 @
04f7a34a
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
mace.proto
import
mace_pb2
from
mace.third_party.caffe
import
caffe_pb2
from
mace.python.tools
import
memory_optimizer
import
google.protobuf.text_format
import
numpy
as
np
import
math
pooling_type_mode
=
{
'AvgPool'
:
1
,
'MaxPool'
:
2
}
buffer_type_map
=
{
'CONV2D_FILTER'
:
0
,
'IN_OUT_CHANNEL'
:
1
,
'ARGUMENT'
:
2
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_WIDTH'
:
4
,
'WINOGRAD_FILTER'
:
5
,
'DW_CONV2D_FILTER'
:
6
,
'WEIGHT_HEIGHT'
:
7
,
'WEIGHT_WIDTH'
:
8
,
}
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
activation_name_map
=
{
'ReLU'
:
'RELU'
,
'Sigmoid'
:
'SIGMOID'
,
'TanH'
:
'TANH'
,
}
math_type_mode
=
{
0
:
2
,
# PROD
1
:
0
,
# SUM
2
:
5
,
# MAX
}
MACE_INPUT_NODE_NAME
=
"mace_input_node"
MACE_OUTPUT_NODE_NAME
=
"mace_output_node"
OPENCL_IMAGE_MAX_SIZE
=
16384
class
Operator
(
object
):
def
__init__
(
self
,
name
,
type
,
layer
):
self
.
name
=
name
self
.
type
=
type
self
.
layer
=
layer
self
.
parents
=
[]
self
.
children
=
[]
self
.
data
=
[]
self
.
output_shape_map
=
{}
def
add_parent
(
self
,
parent_op
):
self
.
parents
.
append
(
parent_op
)
parent_op
.
children
.
append
(
self
)
def
get_single_parent
(
self
):
if
len
(
self
.
parents
)
!=
1
:
raise
Exception
(
'Operation %s expected single parent, but got %s'
%
(
self
.
name
,
len
(
self
.
parents
)))
return
self
.
parents
[
0
]
def
BlobToNPArray
(
blob
):
if
blob
.
num
!=
0
:
return
(
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
(
blob
.
num
,
blob
.
channels
,
blob
.
height
,
blob
.
width
)))
else
:
return
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
blob
.
shape
.
dim
)
class
Shapes
(
object
):
@
staticmethod
def
conv_pool_shape
(
input_shape
,
filter_shape
,
paddings
,
strides
,
dilations
,
round_func
,
input_format
=
'NHWC'
):
output_shape
=
np
.
zeros_like
(
input_shape
)
output_shape
[
0
]
=
input_shape
[
0
]
if
input_format
==
'NHWC'
:
# input format: NHWC, filter format: HWOI
output_shape
[
1
]
=
int
(
round_func
((
input_shape
[
1
]
+
paddings
[
0
]
-
filter_shape
[
0
]
-
(
filter_shape
[
0
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
1
]
-
filter_shape
[
1
]
-
(
filter_shape
[
1
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
output_shape
[
3
]
=
filter_shape
[
2
]
elif
input_format
==
'NCHW'
:
# input format: NCHW, filter format: OIHW
output_shape
[
1
]
=
filter_shape
[
0
]
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
0
]
-
filter_shape
[
2
]
-
(
filter_shape
[
2
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
3
]
=
int
(
round_func
((
input_shape
[
3
]
+
paddings
[
1
]
-
filter_shape
[
3
]
-
(
filter_shape
[
3
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
else
:
raise
Exception
(
"format %s is not supported"
%
input_format
)
return
output_shape
@
staticmethod
def
fully_connected_shape
(
input_shape
,
weight_shape
,
input_format
=
'NHWC'
):
if
input_format
==
'NHWC'
:
return
[
input_shape
[
0
],
1
,
1
,
weight_shape
[
0
]]
elif
input_format
==
'NCHW'
:
return
[
input_shape
[
0
],
weight_shape
[
0
],
1
,
1
]
else
:
raise
Exception
(
"format %s is not supported"
%
input_format
)
@
staticmethod
def
concat_shape
(
input_shapes
,
axis
):
output_shape
=
None
for
input_shape
in
input_shapes
:
if
output_shape
is
None
:
output_shape
=
list
(
input_shape
)
else
:
output_shape
[
axis
]
+=
input_shape
[
axis
]
return
output_shape
@
staticmethod
def
slice_shape
(
input_shape
,
num_output
,
input_format
=
'NHWC'
):
if
input_format
==
'NHWC'
:
return
[
input_shape
[
0
],
input_shape
[
1
],
input_shape
[
2
],
input_shape
[
3
]
/
num_output
]
elif
input_format
==
'NCHW'
:
return
[
input_shape
[
0
],
input_shape
[
1
]
/
num_output
,
input_shape
[
2
],
input_shape
[
3
]
]
else
:
raise
Exception
(
"format %s is not supported"
%
input_format
)
# outputs' name is [op.name + '_' + #]
class
CaffeConverter
(
object
):
def
__init__
(
self
,
caffe_net
,
weights
,
net_def
,
dt
,
device
,
winograd
):
self
.
net_def
=
net_def
self
.
caffe_net
=
caffe_net
self
.
weights
=
weights
self
.
dt
=
dt
self
.
device
=
device
self
.
winograd
=
winograd
self
.
resolved_ops
=
set
()
self
.
ops
=
[]
self
.
inputs_map
=
{}
# caffe op name -> mace inputs' name
# Add Input operations
top_name_map
=
{}
inputs
=
caffe_net
.
input
for
input
in
inputs
:
self
.
ops
.
extend
([
Operator
(
input
,
'Input'
,
None
)])
top_name_map
[
input
]
=
input
layers
=
caffe_net
.
layer
# remove train layers and dropout
layers
=
self
.
remove_unused_layers
(
layers
)
# Construct graph
# Only support single-output layer
# layer with single output often use the same top name.
self
.
ops
.
extend
(
[
Operator
(
layer
.
name
,
layer
.
type
,
layer
)
for
layer
in
layers
])
self
.
ops_map
=
{
op
.
name
:
op
for
op
in
self
.
ops
}
output_op_map
=
{}
for
layer
in
layers
:
op
=
self
.
ops_map
[
layer
.
name
]
for
input_name
in
layer
.
bottom
:
assert
input_name
!=
layer
.
name
parent_op
=
output_op_map
.
get
(
input_name
)
if
parent_op
is
None
:
parent_op
=
self
.
ops_map
[
input_name
]
op
.
add_parent
(
parent_op
)
if
op
.
name
not
in
self
.
inputs_map
:
self
.
inputs_map
[
op
.
name
]
=
[]
self
.
inputs_map
[
op
.
name
].
extend
([
top_name_map
[
input_name
]])
for
i
in
range
(
len
(
layer
.
top
)):
output_name
=
layer
.
top
[
i
]
if
len
(
layer
.
top
)
==
1
:
top_name_map
[
output_name
]
=
op
.
name
else
:
top_name_map
[
output_name
]
=
op
.
name
+
'_'
+
str
(
i
)
if
output_name
==
layer
.
name
:
continue
output_op_map
[
output_name
]
=
op
# Load weights
weights_layers
=
weights
.
layer
for
layer
in
weights_layers
:
if
not
layer
.
blobs
:
continue
if
layer
.
name
in
self
.
ops_map
:
op
=
self
.
ops_map
[
layer
.
name
]
op
.
data
=
[
BlobToNPArray
(
blob
)
for
blob
in
layer
.
blobs
]
# toposort ops
self
.
ops
=
self
.
toposort_ops
()
def
CommonConvert
(
self
,
op
,
mace_type
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
op_def
.
name
=
op
.
name
op_def
.
type
=
mace_type
op_def
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
return
op_def
def
remove_unused_layers
(
self
,
layers
):
phase_map
=
{
0
:
'train'
,
1
:
'test'
}
test_layers_names
=
set
()
test_layers
=
[]
for
layer
in
layers
:
phase
=
'test'
if
len
(
layer
.
include
):
phase
=
phase_map
[
layer
.
include
[
0
].
phase
]
if
len
(
layer
.
exclude
):
phase
=
phase_map
[
layer
.
exclude
[
0
].
phase
]
if
phase
==
'test'
and
layer
.
type
!=
'Dropout'
:
test_layers
.
append
(
layer
)
assert
layer
.
name
not
in
test_layers_names
test_layers_names
.
add
(
layer
.
name
)
return
test_layers
def
toposort_ops
(
self
):
sorted_ops
=
[]
temp_visited
=
set
()
visited
=
set
()
def
search
(
op
):
if
op
.
name
in
temp_visited
:
raise
Exception
(
"The model is not DAG"
)
if
op
.
name
in
visited
:
return
temp_visited
.
add
(
op
.
name
)
for
parent_op
in
op
.
parents
:
search
(
parent_op
)
temp_visited
.
remove
(
op
.
name
)
sorted_ops
.
append
(
op
)
visited
.
add
(
op
.
name
)
for
op
in
self
.
ops
:
search
(
op
)
return
sorted_ops
def
add_buffer_to_image
(
self
,
input_name
,
input_type
):
output_name
=
input_name
[:
-
2
]
+
"_b2i"
+
input_name
[
-
2
:]
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
input_name
])
op_def
.
output
.
extend
([
output_name
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'buffer_type'
arg
.
i
=
buffer_type_map
[
input_type
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'mode'
arg
.
i
=
0
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
return
output_name
def
add_image_to_buffer
(
self
,
input_name
,
input_type
):
output_name
=
input_name
[:
-
2
]
+
"_i2b"
+
input_name
[
-
2
:]
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
input_name
])
op_def
.
output
.
extend
([
output_name
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'buffer_type'
arg
.
i
=
buffer_type_map
[
input_type
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
return
output_name
def
add_gpu_input_transform
(
self
,
names
):
for
name
in
names
:
new_input_name
=
MACE_INPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
name
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
epsilon_arg
.
i
=
buffer_type_map
[
'IN_OUT_CHANNEL'
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
input_op
=
self
.
ops_map
[
name
]
if
input_op
.
layer
is
not
None
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
layer
.
top
[
0
]]
else
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
name
]
self
.
add_output_shape
(
op_def
,
output_shape
)
def
add_gpu_output_transform
(
self
,
names
):
for
name
in
names
:
output_name
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
epsilon_arg
.
i
=
buffer_type_map
[
'IN_OUT_CHANNEL'
]
def
add_tensor
(
self
,
name
,
value
):
tensor
=
self
.
net_def
.
tensors
.
add
()
tensor
.
name
=
name
shape
=
list
(
value
.
shape
)
tensor
.
dims
.
extend
(
shape
)
tensor
.
data_type
=
mace_pb2
.
DT_FLOAT
tensor
.
float_data
.
extend
(
value
.
flat
)
@
staticmethod
def
add_output_shape
(
op_def
,
output_shape
):
mace_output_shape
=
mace_pb2
.
OutputShape
()
mace_output_shape
.
dims
.
extend
(
output_shape
)
op_def
.
output_shape
.
extend
([
mace_output_shape
])
def
add_stride_pad_kernel_arg
(
self
,
param
,
op_def
):
try
:
if
len
(
param
.
stride
)
>
1
or
len
(
param
.
kernel_size
)
>
1
or
len
(
param
.
pad
)
>
1
:
raise
Exception
(
'Mace does not support multiple stride/kernel_size/pad'
)
stride
=
[
param
.
stride
[
0
],
param
.
stride
[
0
]]
if
len
(
param
.
stride
)
else
[
1
,
1
]
pad
=
[
param
.
pad
[
0
]
*
2
,
param
.
pad
[
0
]
*
2
]
if
len
(
param
.
pad
)
else
[
0
,
0
]
kernel
=
[
param
.
kernel_size
[
0
],
param
.
kernel_size
[
0
]]
if
len
(
param
.
kernel_size
)
else
[
0
,
0
]
except
TypeError
:
stride
=
[
param
.
stride
,
param
.
stride
]
pad
=
[
param
.
pad
*
2
,
param
.
pad
*
2
]
kernel
=
[
param
.
kernel_size
,
param
.
kernel_size
]
if
param
.
HasField
(
"stride_h"
)
or
param
.
HasField
(
"stride_w"
):
stride
=
[
param
.
stride_h
,
param
.
stride_w
]
# Pad
if
param
.
HasField
(
"pad_h"
)
or
param
.
HasField
(
"pad_w"
):
pad
=
[
param
.
pad_h
*
2
,
param
.
pad_w
*
2
]
if
op_def
is
not
None
:
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
(
stride
)
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding_values'
padding_arg
.
ints
.
extend
(
pad
)
if
op_def
.
type
==
'Pooling'
:
if
param
.
HasField
(
"kernel_h"
)
or
param
.
HasField
(
"kernel_w"
):
kernel
=
[
param
.
kernel_h
,
param
.
kernel_w
]
return
pad
,
stride
,
kernel
def
convert_conv2d
(
self
,
op
):
use_winograd
=
False
if
self
.
device
==
'cpu'
:
use_winograd
=
self
.
check_winograd_conv
(
op
)
param
=
op
.
layer
.
convolution_param
is_depthwise
=
False
if
param
.
HasField
(
'group'
):
if
param
.
group
==
op
.
data
[
0
].
shape
[
0
]
and
op
.
data
[
0
].
shape
[
1
]
==
1
:
is_depthwise
=
True
else
:
raise
Exception
(
"Mace do not support group convolution yet"
)
if
is_depthwise
:
op_def
=
self
.
CommonConvert
(
op
,
'DepthwiseConv2d'
)
else
:
op_def
=
self
.
CommonConvert
(
op
,
'Conv2D'
)
# Add filter
weight_tensor_name
=
op
.
name
+
'_weight:0'
if
self
.
device
==
'cpu'
:
weight_data
=
op
.
data
[
0
]
else
:
# OIHW -> HWOI
weight_data
=
op
.
data
[
0
].
transpose
((
2
,
3
,
0
,
1
))
if
use_winograd
:
self
.
convert_winograd_conv_filter_cpu
(
op
,
op_def
)
elif
self
.
device
==
'gpu'
:
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
buffer_type
=
"DW_CONV2D_FILTER"
\
if
is_depthwise
else
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
op_def
.
input
.
extend
([
weight_tensor_name
])
# Add Bias
if
len
(
op
.
data
)
==
2
:
bias_tensor_name
=
op
.
name
+
'_bias:0'
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
bias_tensor_name
])
paddings
,
strides
,
_
=
self
.
add_stride_pad_kernel_arg
(
param
,
op_def
)
dilations
=
[
1
,
1
]
if
len
(
param
.
dilation
)
>
0
:
dilation_arg
=
op_def
.
arg
.
add
()
dilation_arg
.
name
=
'dilations'
if
len
(
param
.
dilation
)
==
1
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
0
]]
elif
len
(
param
.
dilation
)
==
2
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
1
]]
dilation_arg
.
ints
.
extend
(
dilations
)
final_op
=
op
self
.
resolved_ops
.
add
(
op
.
name
)
input_format
=
'NCHW'
if
self
.
device
==
'cpu'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]],
weight_data
.
shape
,
paddings
,
strides
,
dilations
,
math
.
floor
,
input_format
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
and
\
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
final_op
=
activation_op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
op_def
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
check_winograd_conv
(
self
,
op
):
param
=
op
.
layer
.
convolution_param
filter_shape
=
np
.
asarray
(
op
.
data
[
0
].
shape
)
if
self
.
device
!=
'cpu'
:
filter_shape
=
filter_shape
[[
2
,
3
,
0
,
1
]]
# OIHW -> HWOI
paddings
,
strides
,
_
=
self
.
add_stride_pad_kernel_arg
(
param
,
None
)
if
param
.
HasField
(
'group'
):
if
param
.
group
==
op
.
data
[
0
].
shape
[
0
]
and
op
.
data
[
0
].
shape
[
1
]
==
1
:
return
False
# Depthwise conv not support winograd
else
:
raise
Exception
(
"Mace do not support group convolution yet"
)
dilations
=
[
1
,
1
]
if
len
(
param
.
dilation
)
>
0
:
if
len
(
param
.
dilation
)
==
1
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
0
]]
elif
len
(
param
.
dilation
)
==
2
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
1
]]
input_format
=
'NCHW'
if
self
.
device
==
'cpu'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]],
filter_shape
,
paddings
,
strides
,
dilations
,
math
.
floor
,
input_format
)
if
self
.
winograd
and
dilations
[
0
]
==
1
and
\
(
dilations
[
0
]
==
dilations
[
1
])
and
\
(
strides
[
0
]
==
1
)
and
(
strides
[
0
]
==
strides
[
1
]):
if
self
.
device
==
'gpu'
:
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
\
((
output_shape
[
2
]
+
1
)
/
2
)
return
filter_shape
[
0
]
==
3
and
\
filter_shape
[
0
]
==
filter_shape
[
1
]
and
\
(
16
*
filter_shape
[
2
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
3
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
width
<
OPENCL_IMAGE_MAX_SIZE
)
elif
self
.
device
==
'cpu'
:
return
filter_shape
[
2
]
==
3
and
\
filter_shape
[
2
]
==
filter_shape
[
3
]
and
\
filter_shape
[
0
]
>=
8
and
filter_shape
[
1
]
>=
8
return
False
def
convert_winograd_conv_filter_cpu
(
self
,
op
,
op_def
):
# Add filter
weight_tensor_name
=
op
.
name
+
'_weight:0'
weight_data
=
op
.
data
[
0
]
# OIHW
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
if
input_shape
[
2
]
>
16
and
input_shape
[
3
]
>
16
:
G
=
np
.
array
([
[
1.0
,
0.0
,
0.0
],
[
-
2.0
/
9
,
-
2.0
/
9
,
-
2.0
/
9
],
[
-
2.0
/
9
,
2.0
/
9
,
-
2.0
/
9
],
[
1.0
/
90
,
1.0
/
45
,
2.0
/
45
],
[
1.0
/
90
,
-
1.0
/
45
,
2.0
/
45
],
[
1.0
/
45
,
1.0
/
90
,
1.0
/
180
],
[
1.0
/
45
,
-
1.0
/
90
,
1.0
/
180
],
[
0.0
,
0.0
,
1.0
]
],
dtype
=
np
.
float32
)
new_shape
=
[
64
,
weight_data
.
shape
[
0
],
weight_data
.
shape
[
1
]]
# TOC
else
:
G
=
np
.
array
([
[
1.0
,
0.0
,
0.0
],
[
0.5
,
0.5
,
0.5
],
[
0.5
,
-
0.5
,
0.5
],
[
0.0
,
0.0
,
1.0
],
],
dtype
=
np
.
float32
)
new_shape
=
[
16
,
weight_data
.
shape
[
0
],
weight_data
.
shape
[
1
]]
# TOC
new_weight_value
=
G
.
dot
(
weight_data
).
dot
(
G
.
T
)
# [8, O, I, 8]
new_weight_value
=
new_weight_value
.
transpose
(
0
,
3
,
1
,
2
)
new_weight_value
=
new_weight_value
.
reshape
(
new_shape
)
self
.
add_tensor
(
weight_tensor_name
,
new_weight_value
)
op_def
.
input
.
extend
([
weight_tensor_name
])
winograd_transformed_arg
=
op_def
.
arg
.
add
()
winograd_transformed_arg
.
name
=
'is_filter_transformed'
winograd_transformed_arg
.
i
=
1
def
convert_winograd_conv_gpu
(
self
,
op
):
# Add filter
weight_tensor_name
=
op
.
name
+
'_weight:0'
self
.
add_tensor
(
weight_tensor_name
,
op
.
data
[
0
])
buffer_type
=
"WINOGRAD_FILTER"
filter_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
param
=
op
.
layer
.
convolution_param
paddings
,
strides
,
_
=
self
.
add_stride_pad_kernel_arg
(
param
,
None
)
filter_shape
=
np
.
asarray
(
op
.
data
[
0
].
shape
)
filter_shape
=
filter_shape
[[
2
,
3
,
0
,
1
]]
# OIHW -> HWOI
input_format
=
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]],
filter_shape
,
paddings
,
strides
,
[
1
,
1
],
math
.
floor
,
input_format
)
# Input transform
wt_op
=
mace_pb2
.
OperatorDef
()
arg
=
wt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
padding_arg
=
wt_op
.
arg
.
add
()
padding_arg
.
name
=
'padding_values'
padding_arg
.
ints
.
extend
(
paddings
)
wt_op
.
name
=
op
.
name
+
'_input_transform'
wt_op
.
type
=
'WinogradTransform'
wt_op
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
wt_output_name
=
wt_op
.
name
+
":0"
wt_op
.
output
.
extend
([
wt_output_name
])
wt_output_shape
=
mace_pb2
.
OutputShape
()
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
wt_op
.
output_shape
.
extend
([
wt_output_shape
])
# MatMul
matmul_op
=
mace_pb2
.
OperatorDef
()
arg
=
matmul_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
matmul_op
.
name
=
op
.
name
+
'_matmul'
matmul_op
.
type
=
'MatMul'
matmul_op
.
input
.
extend
([
filter_name
,
wt_output_name
])
matmul_output_name
=
matmul_op
.
name
+
":0"
matmul_op
.
output
.
extend
([
matmul_output_name
])
matmul_output_shape
=
mace_pb2
.
OutputShape
()
matmul_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
2
],
wt_output_width
,
1
])
matmul_op
.
output_shape
.
extend
([
matmul_output_shape
])
# Inverse transform
iwt_op
=
mace_pb2
.
OperatorDef
()
arg
=
iwt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
batch_arg
=
iwt_op
.
arg
.
add
()
batch_arg
.
name
=
'batch'
batch_arg
.
i
=
output_shape
[
0
]
height_arg
=
iwt_op
.
arg
.
add
()
height_arg
.
name
=
'height'
height_arg
.
i
=
output_shape
[
1
]
width_arg
=
iwt_op
.
arg
.
add
()
width_arg
.
name
=
'width'
width_arg
.
i
=
output_shape
[
2
]
iwt_op
.
name
=
op
.
name
+
'_inverse_transform'
iwt_op
.
type
=
'WinogradInverseTransform'
iwt_op
.
input
.
extend
([
matmul_output_name
])
# Add Bias
if
len
(
op
.
data
)
==
2
:
bias_tensor_name
=
op
.
name
+
'_bias:0'
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
iwt_op
.
input
.
extend
([
output_name
])
final_op
=
op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
op
.
name
)
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
and
\
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
iwt_op
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
final_op
=
activation_op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
iwt_op
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
iwt_op
,
output_shape
)
self
.
net_def
.
op
.
extend
([
wt_op
,
matmul_op
,
iwt_op
])
def
convert_batchnorm
(
self
,
op
):
if
len
(
op
.
children
)
!=
1
or
op
.
children
[
0
].
type
!=
'Scale'
:
raise
Exception
(
'Now only support BatchNorm+Scale'
)
op_def
=
self
.
CommonConvert
(
op
,
'FoldedBatchNorm'
)
scale_op
=
op
.
children
[
0
]
epsilon_value
=
op
.
layer
.
batch_norm_param
.
eps
if
op
.
data
[
2
][
0
]
!=
0
:
mean_value
=
(
1.
/
op
.
data
[
2
][
0
])
*
op
.
data
[
0
]
var_value
=
(
1.
/
op
.
data
[
2
][
0
])
*
op
.
data
[
1
]
else
:
raise
RuntimeError
(
'scalar is zero.'
)
gamma_value
=
scale_op
.
data
[
0
]
beta_value
=
np
.
zeros_like
(
mean_value
)
if
len
(
scale_op
.
data
)
==
2
:
beta_value
=
scale_op
.
data
[
1
]
scale_value
=
((
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
gamma_value
).
reshape
(
-
1
)
offset_value
=
((
-
mean_value
*
scale_value
)
+
beta_value
).
reshape
(
-
1
)
input_names
=
[
op
.
name
+
'_scale:0'
,
op
.
name
+
'_offset:0'
]
self
.
add_tensor
(
input_names
[
0
],
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
offset_value
)
if
self
.
device
==
'gpu'
:
for
name
in
input_names
:
output_name
=
self
.
add_buffer_to_image
(
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
name
for
name
in
input_names
])
self
.
resolved_ops
.
add
(
op
.
name
)
self
.
resolved_ops
.
add
(
scale_op
.
name
)
final_op
=
scale_op
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
and
\
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
final_op
=
activation_op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
op_def
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_inner_product
(
self
,
op
):
param
=
op
.
layer
.
inner_product_param
try
:
if
param
.
axis
!=
1
or
param
.
transpose
:
raise
ValueError
(
'Do not support non-default axis and transpose '
'case for innner product'
)
except
AttributeError
:
pass
op_def
=
self
.
CommonConvert
(
op
,
'FC'
)
weight_tensor_name
=
op
.
name
+
'_weight:0'
if
op
.
data
[
0
].
ndim
not
in
[
2
,
4
]:
raise
ValueError
(
'Unexpected weigth ndim.'
)
if
op
.
data
[
0
].
ndim
==
4
and
list
(
op
.
data
[
0
].
shape
[:
2
])
!=
[
1
,
1
]:
raise
ValueError
(
'Do not support 4D weight with shape [1, 1, *, *]'
)
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
weight_data
=
op
.
data
[
0
].
reshape
(
-
1
,
op
.
data
[
0
].
shape
[
-
1
])
assert
weight_data
.
shape
[
1
]
==
(
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
])
if
self
.
device
!=
'cpu'
:
weight_data
=
weight_data
.
reshape
(
-
1
,
input_shape
[
3
],
input_shape
[
1
],
input_shape
[
2
])
weight_data
=
weight_data
.
transpose
((
0
,
2
,
3
,
1
)).
reshape
(
weight_data
.
shape
[
0
],
-
1
)
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
if
self
.
device
==
'gpu'
:
if
(
weight_data
.
shape
[
0
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
and
\
(
weight_data
.
shape
[
1
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
:
raise
Exception
(
'Mace gpu do not support FC with weight shape: '
+
str
(
weight_data
.
shape
))
if
input_shape
[
3
]
%
4
==
0
:
buffer_type
=
"WEIGHT_WIDTH"
else
:
buffer_type
=
"WEIGHT_HEIGHT"
weight_type_arg
=
op_def
.
arg
.
add
()
weight_type_arg
.
name
=
'weight_type'
weight_type_arg
.
i
=
buffer_type_map
[
'WEIGHT_HEIGHT'
]
if
buffer_type
==
"WEIGHT_HEIGHT"
and
\
(
weight_data
.
shape
[
0
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
:
raise
Exception
(
'Mace gpu do not support FC with weight shape: '
+
str
(
weight_data
.
shape
))
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
weight_tensor_name
])
# Add Bias
if
len
(
op
.
data
)
==
2
:
bias_tensor_name
=
op
.
name
+
'_bias:0'
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
bias_tensor_name
])
self
.
resolved_ops
.
add
(
op
.
name
)
input_format
=
'NCHW'
if
self
.
device
==
'cpu'
else
'NHWC'
output_shape
=
Shapes
.
fully_connected_shape
(
input_shape
,
weight_data
.
shape
,
input_format
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
final_op
=
op
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
final_op
=
activation_op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
op_def
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_pooling
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Pooling'
)
param
=
op
.
layer
.
pooling_param
paddings
,
strides
,
kernels
=
self
.
add_stride_pad_kernel_arg
(
param
,
op_def
)
if
param
.
pool
==
caffe_pb2
.
PoolingParameter
.
MAX
:
pooling_type
=
"MaxPool"
elif
param
.
pool
==
caffe_pb2
.
PoolingParameter
.
AVE
:
pooling_type
=
"AvgPool"
pooling_type_arg
=
op_def
.
arg
.
add
()
pooling_type_arg
.
name
=
'pooling_type'
pooling_type_arg
.
i
=
pooling_type_mode
[
pooling_type
]
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
if
param
.
HasField
(
'global_pooling'
)
and
param
.
global_pooling
:
kernels
=
[
input_shape
[
2
],
input_shape
[
3
]]
\
if
self
.
device
==
'cpu'
else
\
[
input_shape
[
1
],
input_shape
[
2
]]
kernel_arg
=
op_def
.
arg
.
add
()
kernel_arg
.
name
=
'kernels'
kernel_arg
.
ints
.
extend
(
kernels
)
if
self
.
device
!=
'cpu'
:
filter_shape
=
[
kernels
[
0
],
kernels
[
1
],
input_shape
[
3
],
input_shape
[
3
]
]
else
:
filter_shape
=
[
input_shape
[
1
],
input_shape
[
1
],
kernels
[
0
],
kernels
[
1
]
]
input_format
=
'NCHW'
if
self
.
device
==
'cpu'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
input_shape
,
filter_shape
,
paddings
,
strides
,
[
1
,
1
],
math
.
ceil
,
input_format
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_activation
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Activation'
)
activation_arg
=
op_def
.
arg
.
add
()
activation_arg
.
name
=
'activation'
activation_arg
.
s
=
activation_name_map
[
op
.
type
]
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_prelu
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Activation'
)
activation_arg
=
op_def
.
arg
.
add
()
activation_arg
.
name
=
'activation'
activation_arg
.
s
=
'PRELU'
alpha_tensor_name
=
op
.
name
+
'_alpha:0'
alpha_data
=
op
.
data
[
0
].
reshape
(
-
1
)
self
.
add_tensor
(
alpha_tensor_name
,
alpha_data
)
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
alpha_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
alpha_tensor_name
])
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_add
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'AddN'
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
output_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_concat
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Concat'
)
axis_arg
=
op_def
.
arg
.
add
()
axis_arg
.
name
=
'axis'
axis_arg
.
i
=
3
if
self
.
device
!=
'cpu'
else
1
try
:
if
op
.
layer
.
concat_param
.
HasFeild
(
'axis'
):
axis_arg
.
i
=
op
.
concat_param
.
axis
elif
op
.
layer
.
concat_param
.
HasFeild
(
'concat_dim'
):
axis_arg
.
i
=
op
.
concat_param
.
concat_dim
except
AttributeError
:
pass
input_shapes
=
[]
for
i
in
range
(
len
(
op
.
parents
)):
input_shapes
.
append
(
op
.
parents
[
i
].
output_shape_map
[
op
.
layer
.
bottom
[
i
]])
output_shape
=
Shapes
.
concat_shape
(
input_shapes
,
axis_arg
.
i
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_eltwise
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Eltwise'
)
param
=
op
.
layer
.
eltwise_param
type_arg
=
op_def
.
arg
.
add
()
type_arg
.
name
=
'type'
type_arg
.
i
=
math_type_mode
[
param
.
operation
]
if
len
(
param
.
coeff
)
>
0
:
coeff_arg
=
op_def
.
arg
.
add
()
coeff_arg
.
name
=
'coeff'
coeff_arg
.
floats
.
extend
(
list
(
param
.
coeff
))
output_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_slice
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Slice'
)
if
op
.
layer
.
HasField
(
'slice_param'
):
param
=
op
.
layer
.
slice_param
if
param
.
HasField
(
'axis'
)
and
param
.
axis
!=
1
:
raise
Exception
(
'Mace do not support slice with axis '
+
str
(
param
.
axis
))
if
len
(
param
.
slice_point
)
>
0
:
raise
Exception
(
'Mace do not support slice with slice_point'
)
axis_arg
=
op_def
.
arg
.
add
()
axis_arg
.
name
=
'axis'
axis_arg
.
i
=
3
if
self
.
device
!=
'cpu'
else
1
input_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
num_outputs
=
len
(
op
.
layer
.
top
)
input_channels
=
input_shape
[
axis_arg
.
i
]
if
(
input_channels
%
num_outputs
)
!=
0
or
\
(
self
.
device
==
'gpu'
and
((
input_channels
/
num_outputs
)
%
4
!=
0
)):
raise
Exception
(
'Mace do not support slice with input shape '
+
str
(
input_shape
)
+
' and number of output '
+
str
(
num_outputs
))
input_format
=
'NCHW'
if
self
.
device
==
'cpu'
else
'NHWC'
output_shape
=
Shapes
.
slice_shape
(
input_shape
,
num_outputs
,
input_format
)
for
i
in
range
(
len
(
op
.
layer
.
top
)):
op
.
output_shape_map
[
op
.
layer
.
top
[
i
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
'_'
+
str
(
i
)
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_normal_op
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
op
.
type
)
output_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_reshape
(
self
,
op
):
op_def
=
self
.
CommonConvert
(
op
,
'Reshape'
)
input_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
output_shape
=
input_shape
shape_param
=
np
.
asarray
(
op
.
layer
.
reshape_param
.
shape
.
dim
)
for
i
in
range
(
len
(
shape_param
)):
if
shape_param
[
i
]
!=
0
:
output_shape
[
i
]
=
shape_param
[
i
]
shape_arg
=
op_def
.
arg
.
add
()
shape_arg
.
name
=
'shape'
shape_arg
.
ints
.
extend
(
output_shape
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_proposal_op
(
self
,
op
):
assert
self
.
device
==
'cpu'
op_def
=
self
.
CommonConvert
(
op
,
op
.
type
)
if
op
.
layer
.
HasField
(
'proposal_param'
):
proposal_param
=
op
.
layer
.
proposal_param
feat_stride_arg
=
op_def
.
arg
.
add
()
feat_stride_arg
.
name
=
'feat_stride'
feat_stride_arg
.
i
=
proposal_param
.
feat_stride
scales_arg
=
op_def
.
arg
.
add
()
scales_arg
.
name
=
'scales'
scales_arg
.
ints
.
extend
(
list
(
proposal_param
.
scales
))
ratios_arg
=
op_def
.
arg
.
add
()
ratios_arg
.
name
=
'ratios'
ratios_arg
.
floats
.
extend
(
list
(
proposal_param
.
ratios
))
output_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
convert_psroi_align
(
self
,
op
):
assert
self
.
device
==
'cpu'
op_def
=
self
.
CommonConvert
(
op
,
op
.
type
)
if
op
.
layer
.
HasField
(
'psroi_align_param'
):
psroi_align_param
=
op
.
layer
.
psroi_align_param
spatial_scale_arg
=
op_def
.
arg
.
add
()
spatial_scale_arg
.
name
=
'spatial_scale'
spatial_scale_arg
.
f
=
psroi_align_param
.
spatial_scale
output_dim_arg
=
op_def
.
arg
.
add
()
output_dim_arg
.
name
=
'output_dim'
output_dim_arg
.
i
=
psroi_align_param
.
output_dim
group_size_arg
=
op_def
.
arg
.
add
()
group_size_arg
.
name
=
'group_size'
group_size_arg
.
i
=
psroi_align_param
.
group_size
output_shape
=
op
.
parents
[
0
].
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
.
add
(
op
.
name
)
def
replace_in_out_name
(
self
,
input_names
,
output_names
):
in_names
=
set
([
input_name
+
":0"
for
input_name
in
input_names
])
out_names
=
set
([
output_name
+
":0"
for
output_name
in
output_names
])
for
op
in
self
.
net_def
.
op
:
for
i
in
range
(
len
(
op
.
input
)):
if
op
.
input
[
i
]
in
in_names
:
op
.
input
[
i
]
=
MACE_INPUT_NODE_NAME
+
'_'
+
op
.
input
[
i
]
if
op
.
input
[
i
]
in
out_names
:
op
.
input
[
i
]
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
op
.
input
[
i
]
for
i
in
range
(
len
(
op
.
output
)):
if
op
.
output
[
i
]
in
in_names
:
op
.
output
[
i
]
=
MACE_INPUT_NODE_NAME
+
'_'
+
op
.
output
[
i
]
if
op
.
output
[
i
]
in
out_names
:
op
.
output
[
i
]
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
op
.
output
[
i
]
def
add_input_op_shape
(
self
,
input_nodes
,
input_shapes
):
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
for
i
in
range
(
len
(
input_nodes
)):
input_op
=
self
.
ops_map
[
input_nodes
[
i
]]
input_shape
=
input_shapes
[
i
]
if
self
.
device
!=
'cpu'
else
\
[
input_shapes
[
i
][
0
],
input_shapes
[
i
][
3
],
input_shapes
[
i
][
1
],
input_shapes
[
i
][
2
]]
if
input_op
.
layer
is
not
None
:
input_op
.
output_shape_map
[
input_op
.
layer
.
top
[
0
]]
=
input_shape
else
:
input_op
.
output_shape_map
[
input_op
.
name
]
=
input_shape
def
add_cpu_input_transform
(
self
,
names
):
for
name
in
names
:
new_input_name
=
MACE_INPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
name
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
dims_arg
.
ints
.
extend
([
0
,
3
,
1
,
2
])
# NHWC -> NCHW
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
input_op
=
self
.
ops_map
[
name
]
if
input_op
.
layer
is
not
None
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
layer
.
top
[
0
]]
else
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
name
]
self
.
add_output_shape
(
op_def
,
output_shape
)
def
add_cpu_output_transform
(
self
,
names
):
for
name
in
names
:
output_name
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
dims_arg
.
ints
.
extend
([
0
,
2
,
3
,
1
])
# NCHW -> NHWC
input_op
=
self
.
ops_map
[
name
]
if
input_op
.
layer
is
not
None
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
layer
.
top
[
0
]]
else
:
output_shape
=
input_op
.
output_shape_map
[
input_op
.
name
]
self
.
add_output_shape
(
op_def
,
[
output_shape
[
0
],
output_shape
[
2
],
output_shape
[
3
],
output_shape
[
1
]])
def
convert
(
self
,
input_nodes
,
input_shapes
,
output_nodes
):
assert
self
.
ops
[
0
].
type
==
'Input'
self
.
add_input_op_shape
(
input_nodes
,
input_shapes
)
if
self
.
device
==
'gpu'
:
self
.
add_gpu_input_transform
(
input_nodes
)
if
self
.
device
==
'cpu'
:
self
.
add_cpu_input_transform
(
input_nodes
)
for
op
in
self
.
ops
:
if
op
.
name
in
self
.
resolved_ops
:
continue
if
op
.
type
==
'Input'
:
self
.
resolved_ops
.
add
(
op
.
name
)
elif
op
.
type
==
'Convolution'
:
if
self
.
device
==
'gpu'
and
self
.
check_winograd_conv
(
op
):
self
.
convert_winograd_conv_gpu
(
op
)
else
:
self
.
convert_conv2d
(
op
)
elif
op
.
type
==
'BatchNorm'
:
self
.
convert_batchnorm
(
op
)
elif
op
.
type
==
'InnerProduct'
:
self
.
convert_inner_product
(
op
)
elif
op
.
type
==
'Pooling'
:
self
.
convert_pooling
(
op
)
elif
op
.
type
==
'PReLU'
:
self
.
convert_prelu
(
op
)
elif
op
.
type
in
[
'ReLU'
,
'Sigmoid'
,
'TanH'
]:
self
.
convert_activation
(
op
)
elif
op
.
type
==
'Add'
:
self
.
convert_add
(
op
)
elif
op
.
type
==
'Concat'
:
self
.
convert_concat
(
op
)
elif
op
.
type
==
'Eltwise'
:
self
.
convert_eltwise
(
op
)
elif
op
.
type
==
'Slice'
:
self
.
convert_slice
(
op
)
elif
op
.
type
==
'Reshape'
:
self
.
convert_reshape
(
op
)
elif
op
.
type
==
'Proposal'
:
self
.
convert_proposal_op
(
op
)
elif
op
.
type
==
'PSROIAlign'
:
self
.
convert_psroi_align
(
op
)
elif
op
.
type
in
[
'Softmax'
]:
self
.
convert_normal_op
(
op
)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
if
self
.
device
==
'gpu'
:
self
.
add_gpu_output_transform
(
output_nodes
)
if
self
.
device
==
'cpu'
:
self
.
add_cpu_output_transform
(
output_nodes
)
for
op
in
self
.
ops
:
if
op
.
name
not
in
self
.
resolved_ops
:
print
'Unresolve Op: %s with type %s'
%
(
op
.
name
,
op
.
type
)
def
convert_to_mace_pb
(
model_file
,
weight_file
,
input_node_str
,
input_shape_str
,
output_node_str
,
data_type
,
device
,
winograd
):
net_def
=
mace_pb2
.
NetDef
()
dt
=
data_type_map
[
data_type
]
caffe_net
=
caffe_pb2
.
NetParameter
()
with
open
(
model_file
,
"r"
)
as
f
:
google
.
protobuf
.
text_format
.
Merge
(
str
(
f
.
read
()),
caffe_net
)
weights
=
caffe_pb2
.
NetParameter
()
with
open
(
weight_file
,
"rb"
)
as
f
:
weights
.
MergeFromString
(
f
.
read
())
input_nodes
=
[
x
for
x
in
input_node_str
.
split
(
','
)]
input_shapes
=
[]
if
input_shape_str
!=
""
:
input_shape_strs
=
[
x
for
x
in
input_shape_str
.
split
(
':'
)]
for
shape_str
in
input_shape_strs
:
input_shapes
.
extend
([[
int
(
x
)
for
x
in
shape_str
.
split
(
','
)]])
output_nodes
=
[
x
for
x
in
output_node_str
.
split
(
','
)]
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
converter
=
CaffeConverter
(
caffe_net
,
weights
,
net_def
,
dt
,
device
,
winograd
)
converter
.
convert
(
input_nodes
,
input_shapes
,
output_nodes
)
print
"PB Converted."
if
device
==
'gpu'
:
print
"start optimize memory."
memory_optimizer
.
optimize_gpu_memory
(
net_def
)
print
"Memory optimization done."
elif
device
==
'cpu'
:
print
"start optimize memory."
memory_optimizer
.
optimize_cpu_memory
(
net_def
)
print
"Memory optimization done."
return
net_def
mace/python/tools/convert_util.py
浏览文件 @
c3837858
...
@@ -12,6 +12,7 @@
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
tensorflow
as
tf
import
tensorflow
as
tf
from
mace.proto
import
mace_pb2
from
mace.proto
import
mace_pb2
...
@@ -40,3 +41,8 @@ def tf_dtype_2_mace_dtype(tf_dtype):
...
@@ -40,3 +41,8 @@ def tf_dtype_2_mace_dtype(tf_dtype):
if
not
mace_dtype
:
if
not
mace_dtype
:
raise
Exception
(
"Not supported tensorflow dtype: "
+
tf_dtype
)
raise
Exception
(
"Not supported tensorflow dtype: "
+
tf_dtype
)
return
mace_dtype
return
mace_dtype
def
mace_check
(
condition
,
msg
):
if
not
condition
:
raise
Exception
(
msg
)
mace/python/tools/converter.py
浏览文件 @
c3837858
...
@@ -16,7 +16,16 @@ import argparse
...
@@ -16,7 +16,16 @@ import argparse
import
sys
import
sys
import
hashlib
import
hashlib
import
os.path
import
os.path
from
mace.proto
import
mace_pb2
from
mace.python.tools
import
tf_dsp_converter_lib
from
mace.python.tools
import
memory_optimizer
from
mace.python.tools
import
source_converter_lib
from
mace.python.tools
import
source_converter_lib
from
mace.python.tools.converter_tool
import
base_converter
as
cvt
from
mace.python.tools.converter_tool
import
tensorflow_converter
from
mace.python.tools.converter_tool
import
caffe_converter
from
mace.python.tools.converter_tool
import
transformer
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
# --output quantized_test_dsp.pb \
# --output quantized_test_dsp.pb \
...
@@ -25,6 +34,12 @@ from mace.python.tools import source_converter_lib
...
@@ -25,6 +34,12 @@ from mace.python.tools import source_converter_lib
FLAGS
=
None
FLAGS
=
None
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
device_type_map
=
{
'cpu'
:
mace_pb2
.
CPU
,
'gpu'
:
mace_pb2
.
GPU
,
'dsp'
:
mace_pb2
.
HEXAGON
}
def
file_checksum
(
fname
):
def
file_checksum
(
fname
):
hash_func
=
hashlib
.
sha256
()
hash_func
=
hashlib
.
sha256
()
...
@@ -34,6 +49,10 @@ def file_checksum(fname):
...
@@ -34,6 +49,10 @@ def file_checksum(fname):
return
hash_func
.
hexdigest
()
return
hash_func
.
hexdigest
()
def
parse_int_array_from_str
(
ints_str
):
return
[
int
(
int_str
)
for
int_str
in
ints_str
.
split
(
','
)]
def
main
(
unused_args
):
def
main
(
unused_args
):
if
not
os
.
path
.
isfile
(
FLAGS
.
model_file
):
if
not
os
.
path
.
isfile
(
FLAGS
.
model_file
):
print
(
"Input graph file '"
+
FLAGS
.
model_file
+
"' does not exist!"
)
print
(
"Input graph file '"
+
FLAGS
.
model_file
+
"' does not exist!"
)
...
@@ -59,27 +78,64 @@ def main(unused_args):
...
@@ -59,27 +78,64 @@ def main(unused_args):
(
weight_checksum
,
FLAGS
.
weight_checksum
))
(
weight_checksum
,
FLAGS
.
weight_checksum
))
sys
.
exit
(
-
1
)
sys
.
exit
(
-
1
)
if
FLAGS
.
runtime
==
'dsp'
:
if
FLAGS
.
platform
not
in
[
'tensorflow'
,
'caffe'
]:
print
(
"DSP not support caffe model yet."
)
print
(
"platform %s is not supported."
%
FLAGS
.
platform
)
sys
.
exit
(
-
1
)
sys
.
exit
(
-
1
)
if
FLAGS
.
runtime
not
in
[
'cpu'
,
'gpu'
,
'dsp'
]:
print
(
"runtime %s is not supported."
%
FLAGS
.
runtime
)
sys
.
exit
(
-
1
)
from
mace.python.tools
import
caffe_converter_lib
if
FLAGS
.
runtime
==
'dsp'
:
output_graph_def
=
caffe_converter_lib
.
convert_to_mace_pb
(
if
FLAGS
.
platform
==
'tensorflow'
:
FLAGS
.
model_file
,
FLAGS
.
weight_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
elif
FLAGS
.
platform
==
'tensorflow'
:
if
FLAGS
.
runtime
==
'dsp'
:
from
mace.python.tools
import
tf_dsp_converter_lib
output_graph_def
=
tf_dsp_converter_lib
.
convert_to_mace_pb
(
output_graph_def
=
tf_dsp_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
output_node
,
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
output_node
,
FLAGS
.
dsp_mode
)
FLAGS
.
dsp_mode
)
else
:
else
:
from
mace.python.tools
import
tf_converter_lib
print
(
"%s does not support dsp runtime yet."
%
FLAGS
.
platform
)
output_graph_def
=
tf_converter_lib
.
convert_to_mace_pb
(
sys
.
exit
(
-
1
)
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
else
:
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
option
=
cvt
.
ConverterOption
()
FLAGS
.
winograd
)
option
.
data_type
=
data_type_map
[
FLAGS
.
data_type
]
option
.
device
=
device_type_map
[
FLAGS
.
runtime
]
option
.
winograd_enabled
=
bool
(
FLAGS
.
winograd
)
input_node_names
=
FLAGS
.
input_node
.
split
(
','
)
input_node_shapes
=
FLAGS
.
input_shape
.
split
(
':'
)
if
len
(
input_node_names
)
!=
len
(
input_node_shapes
):
raise
Exception
(
'input node count and shape count do not match.'
)
for
i
in
xrange
(
len
(
input_node_names
)):
input_node
=
cvt
.
NodeInfo
()
input_node
.
name
=
input_node_names
[
i
]
input_node
.
shape
=
parse_int_array_from_str
(
FLAGS
.
input_shape
)
option
.
add_input_node
(
input_node
)
output_node_names
=
FLAGS
.
output_node
.
split
(
','
)
for
i
in
xrange
(
len
(
output_node_names
)):
output_node
=
cvt
.
NodeInfo
()
output_node
.
name
=
output_node_names
[
i
]
option
.
add_output_node
(
output_node
)
print
(
"Convert model to mace model."
)
if
FLAGS
.
platform
==
'tensorflow'
:
converter
=
tensorflow_converter
.
TensorflowConverter
(
option
,
FLAGS
.
model_file
)
# noqa
elif
FLAGS
.
platform
==
'caffe'
:
converter
=
caffe_converter
.
CaffeConverter
(
option
,
FLAGS
.
model_file
,
FLAGS
.
weight_file
)
output_graph_def
=
converter
.
run
()
print
(
"Transform model to one that can better run on device."
)
# TODO(liuqi/liyin): transform gpu/cpu and merge their ops
mace_transformer
=
transformer
.
Transformer
(
option
,
output_graph_def
)
output_graph_def
=
mace_transformer
.
run
()
print
"start optimize memory."
if
FLAGS
.
runtime
==
'gpu'
:
memory_optimizer
.
optimize_gpu_memory
(
output_graph_def
)
elif
FLAGS
.
runtime
==
'cpu'
:
memory_optimizer
.
optimize_cpu_memory
(
output_graph_def
)
print
"Memory optimization done."
if
FLAGS
.
output_type
==
'source'
:
if
FLAGS
.
output_type
==
'source'
:
source_converter_lib
.
convert_to_source
(
source_converter_lib
.
convert_to_source
(
...
...
mace/python/tools/converter_tool/__init__.py
0 → 100644
浏览文件 @
c3837858
mace/python/tools/converter_tool/base_converter.py
0 → 100644
浏览文件 @
c3837858
from
enum
import
Enum
from
mace.proto
import
mace_pb2
class
DataFormat
(
Enum
):
NHWC
=
0
NCHW
=
1
class
FilterFormat
(
Enum
):
HWIO
=
0
OIHW
=
1
HWOI
=
2
class
PaddingMode
(
Enum
):
VALID
=
0
SAME
=
1
FULL
=
2
class
PoolingType
(
Enum
):
AVG
=
1
MAX
=
2
class
ActivationType
(
Enum
):
NOOP
=
0
RELU
=
1
RELUX
=
2
PRELU
=
3
TANH
=
4
SIGMOID
=
5
class
EltwiseType
(
Enum
):
SUM
=
0
SUB
=
1
PROD
=
2
DIV
=
3
MIN
=
4
MAX
=
5
NEG
=
6
ABS
=
7
SQR_DIFF
=
8
POW
=
9
MaceSupportedOps
=
[
'Activation'
,
'AddN'
,
'BatchNorm'
,
'BatchToSpaceND'
,
'BiasAdd'
,
'ChannelShuffle'
,
'Concat'
,
'Conv2D'
,
'Deconv2D'
,
'DepthToSpace'
,
'DepthwiseConv2d'
,
'Dequantize'
,
'Eltwise'
,
'FoldedBatchNorm'
,
'FullyConnected'
,
'LocalResponseNorm'
,
'MatMul'
,
'Pad'
,
'Pooling'
,
'Proposal'
,
'PSROIAlign'
,
'Quantize'
,
'Requantize'
,
'Reshape'
,
'ResizeBilinear'
,
'Slice'
,
'Softmax'
,
'SpaceToBatchND'
,
'SpaceToDepth'
,
'Transpose'
,
'WinogradInverseTransform'
,
'WinogradTransform'
,
]
MaceOp
=
Enum
(
'MaceOp'
,
[(
op
,
op
)
for
op
in
MaceSupportedOps
],
type
=
str
)
class
MaceKeyword
(
object
):
# node related str
mace_input_node_name
=
'mace_input_node'
mace_output_node_name
=
'mace_output_node'
mace_buffer_type
=
'buffer_type'
mace_mode
=
'mode'
mace_buffer_to_image
=
'BufferToImage'
mace_image_to_buffer
=
'ImageToBuffer'
# arg related str
mace_padding_str
=
'padding'
mace_padding_values_str
=
'padding_values'
mace_strides_str
=
'strides'
mace_dilations_str
=
'dilations'
mace_pooling_type_str
=
'pooling_type'
mace_global_pooling_str
=
'global_pooling'
mace_kernel_str
=
'kernels'
mace_data_format_str
=
'data_format'
mace_filter_format_str
=
'filter_format'
mace_element_type_str
=
'type'
mace_activation_type_str
=
'activation'
mace_activation_max_limit_str
=
'max_limit'
mace_resize_size_str
=
'size'
mace_batch_to_space_crops_str
=
'crops'
mace_paddings_str
=
'paddings'
mace_align_corners_str
=
'align_corners'
mace_space_batch_block_shape_str
=
'block_shape'
mace_space_depth_block_size_str
=
'block_size'
mace_constant_value_str
=
'constant_value'
mace_dims_str
=
'dims'
mace_axis_str
=
'axis'
mace_shape_str
=
'shape'
mace_winograd_filter_transformed
=
'is_filter_transformed'
class
ConverterInterface
(
object
):
"""Base class for converting external models to mace models."""
def
run
(
self
):
raise
NotImplementedError
(
'run'
)
class
NodeInfo
(
object
):
"""A class for describing node information"""
def
__init__
(
self
):
self
.
_name
=
None
self
.
_shape
=
[]
@
property
def
name
(
self
):
return
self
.
_name
@
property
def
shape
(
self
):
return
self
.
_shape
@
name
.
setter
def
name
(
self
,
name
):
self
.
_name
=
name
@
shape
.
setter
def
shape
(
self
,
shape
):
self
.
_shape
=
shape
def
__str__
(
self
):
return
'%s %s'
%
(
self
.
_name
,
str
(
self
.
_shape
))
class
ConverterOption
(
object
):
"""A class for specifying options passed to converter tool"""
def
__init__
(
self
):
self
.
_input_nodes
=
{}
self
.
_output_nodes
=
{}
self
.
_data_type
=
mace_pb2
.
DT_FLOAT
self
.
_device
=
mace_pb2
.
CPU
self
.
_winograd_enabled
=
False
@
property
def
input_nodes
(
self
):
return
self
.
_input_nodes
@
property
def
output_nodes
(
self
):
return
self
.
_output_nodes
@
property
def
data_type
(
self
):
return
self
.
_data_type
@
property
def
device
(
self
):
return
self
.
_device
@
property
def
winograd_enabled
(
self
):
return
self
.
_winograd_enabled
@
input_nodes
.
setter
def
input_nodes
(
self
,
input_nodes
):
for
node
in
input_nodes
:
self
.
_input_nodes
[
node
.
name
]
=
node
def
add_input_node
(
self
,
input_node
):
self
.
_input_nodes
[
input_node
.
name
]
=
input_node
@
output_nodes
.
setter
def
output_nodes
(
self
,
output_nodes
):
for
node
in
output_nodes
:
self
.
output_nodes
[
node
.
name
]
=
node
def
add_output_node
(
self
,
output_node
):
self
.
_output_nodes
[
output_node
.
name
]
=
output_node
@
data_type
.
setter
def
data_type
(
self
,
data_type
):
self
.
_data_type
=
data_type
@
device
.
setter
def
device
(
self
,
device
):
self
.
_device
=
device
@
winograd_enabled
.
setter
def
winograd_enabled
(
self
,
winograd_enabled
):
self
.
_winograd_enabled
=
winograd_enabled
class
ConverterUtil
(
object
):
@
staticmethod
def
get_arg
(
op
,
arg_name
):
for
arg
in
op
.
arg
:
if
arg
.
name
==
arg_name
:
return
arg
return
None
@
staticmethod
def
add_data_format_arg
(
op
,
data_format
):
data_format_arg
=
op
.
arg
.
add
()
data_format_arg
.
name
=
MaceKeyword
.
mace_data_format_str
data_format_arg
.
i
=
data_format
.
value
@
staticmethod
def
data_format
(
op
):
arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_data_format_str
)
if
arg
is
None
:
return
None
elif
arg
.
i
==
DataFormat
.
NHWC
.
value
:
return
DataFormat
.
NHWC
elif
arg
.
i
==
DataFormat
.
NCHW
.
value
:
return
DataFormat
.
NCHW
else
:
return
None
@
staticmethod
def
set_filter_format
(
net
,
filter_format
):
arg
=
net
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_filter_format_str
arg
.
i
=
filter_format
.
value
@
staticmethod
def
filter_format
(
net
):
arg
=
ConverterUtil
.
get_arg
(
net
,
MaceKeyword
.
mace_filter_format_str
)
if
arg
is
None
:
return
None
elif
arg
.
i
==
FilterFormat
.
HWIO
.
value
:
return
FilterFormat
.
HWIO
elif
arg
.
i
==
FilterFormat
.
HWOI
.
value
:
return
FilterFormat
.
HWOI
elif
arg
.
i
==
FilterFormat
.
OIHW
.
value
:
return
FilterFormat
.
OIHW
else
:
return
None
mace/python/tools/converter_tool/caffe_converter.py
0 → 100644
浏览文件 @
c3837858
import
math
import
numpy
as
np
import
google.protobuf.text_format
from
mace.proto
import
mace_pb2
from
mace.third_party.caffe
import
caffe_pb2
from
mace.python.tools.converter_tool
import
base_converter
from
mace.python.tools.converter_tool
import
shape_inference
from
mace.python.tools.converter_tool.base_converter
import
PoolingType
from
mace.python.tools.converter_tool.base_converter
import
ActivationType
from
mace.python.tools.converter_tool.base_converter
import
EltwiseType
from
mace.python.tools.converter_tool.base_converter
import
DataFormat
from
mace.python.tools.converter_tool.base_converter
import
FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
from
mace.python.tools.converter_tool.base_converter
import
MaceKeyword
from
mace.python.tools.converter_tool.base_converter
import
ConverterUtil
from
mace.python.tools.convert_util
import
mace_check
caffe_group_str
=
'group'
caffe_kernel_h_str
=
'kernel_h'
caffe_kernel_w_str
=
'kernel_w'
caffe_stride_h_str
=
'stride_h'
caffe_stride_w_str
=
'stride_w'
caffe_pad_h_str
=
'pad_h'
caffe_pad_w_str
=
'pad_w'
class
CaffeOperator
(
object
):
"""CaffeOperator merges and provides both layer and weights information.
Layer records caffe layer proto, while blobs records the weight data in
format of numpy ndarray.
"""
def
__init__
(
self
):
self
.
_layer
=
None
self
.
_blobs
=
None
@
property
def
name
(
self
):
return
self
.
_layer
.
name
@
property
def
type
(
self
):
return
self
.
_layer
.
type
@
property
def
layer
(
self
):
return
self
.
_layer
@
property
def
blobs
(
self
):
return
self
.
_blobs
@
layer
.
setter
def
layer
(
self
,
layer
):
self
.
_layer
=
layer
@
blobs
.
setter
def
blobs
(
self
,
blobs
):
self
.
_blobs
=
[
self
.
blob_to_nparray
(
blob
)
for
blob
in
blobs
]
def
get_blob
(
self
,
index
):
mace_check
(
index
<
len
(
self
.
_blobs
),
"blob out of index"
)
return
self
.
_blobs
[
index
]
@
staticmethod
def
blob_to_nparray
(
blob
):
if
blob
.
num
!=
0
:
return
(
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
(
blob
.
num
,
blob
.
channels
,
blob
.
height
,
blob
.
width
)))
else
:
return
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
blob
.
shape
.
dim
)
class
CaffeNet
(
object
):
"""CaffeNet contains caffe operations. Output of each layer has unique
name as we replace duplicated output name with unique one, while keep
mace input/output name which user specifies unchanged."""
def
__init__
(
self
):
self
.
_ops
=
{}
self
.
_consumers
=
{}
# for in-place op, its input name is the same with output name,
# so we change the output name to an alias
self
.
_alias_op_output_name
=
{}
self
.
_used_op_output_name
=
set
()
@
property
def
ops
(
self
):
return
self
.
_ops
.
values
()
def
get_op
(
self
,
op_name
):
return
self
.
_ops
.
get
(
op_name
,
None
)
def
get_consumers
(
self
,
tensor_name
):
return
self
.
_consumers
.
get
(
tensor_name
,
[])
def
add_layer
(
self
,
layer
):
op
=
CaffeOperator
()
op
.
layer
=
layer
self
.
_ops
[
layer
.
name
]
=
op
# change op output name if it is an in-place op
layer
.
bottom
[:]
=
[
self
.
_alias_op_output_name
.
get
(
layer_input
,
layer_input
)
for
layer_input
in
layer
.
bottom
][:]
for
i
in
xrange
(
len
(
layer
.
top
)):
old_name
=
layer
.
top
[
i
]
if
layer
.
type
==
'Input'
:
new_name
=
old_name
else
:
idx
=
0
new_name
=
old_name
+
'#'
+
str
(
idx
)
while
new_name
in
self
.
_used_op_output_name
:
idx
+=
1
new_name
=
old_name
+
'#'
+
str
(
idx
)
layer
.
top
[
i
]
=
new_name
self
.
_alias_op_output_name
[
old_name
]
=
new_name
self
.
_used_op_output_name
.
update
([
new_name
])
for
input_tensor
in
layer
.
bottom
:
if
input_tensor
not
in
self
.
_consumers
:
self
.
_consumers
[
input_tensor
]
=
[]
self
.
_consumers
[
input_tensor
].
append
(
op
)
def
add_blob
(
self
,
weight
):
if
weight
.
name
in
self
.
_ops
:
op
=
self
.
_ops
[
weight
.
name
]
op
.
blobs
=
list
(
weight
.
blobs
)
class
CaffeConverter
(
base_converter
.
ConverterInterface
):
"""A class for convert caffe model to mace model."""
pooling_type_mode
=
{
caffe_pb2
.
PoolingParameter
.
AVE
:
PoolingType
.
AVG
,
caffe_pb2
.
PoolingParameter
.
MAX
:
PoolingType
.
MAX
}
eltwise_type
=
{
caffe_pb2
.
EltwiseParameter
.
PROD
:
EltwiseType
.
PROD
,
caffe_pb2
.
EltwiseParameter
.
SUM
:
EltwiseType
.
SUM
,
caffe_pb2
.
EltwiseParameter
.
MAX
:
EltwiseType
.
MAX
,
}
activation_type
=
{
'ReLU'
:
ActivationType
.
RELU
,
'PReLU'
:
ActivationType
.
PRELU
,
'TanH'
:
ActivationType
.
TANH
,
}
def
__init__
(
self
,
option
,
src_model_file
,
src_weight_file
):
self
.
_op_converters
=
{
'Input'
:
self
.
convert_nop
,
'Convolution'
:
self
.
convert_conv2d
,
'Eltwise'
:
self
.
convert_elementwise
,
'Add'
:
self
.
convert_add
,
'ReLU'
:
self
.
convert_activation
,
'TanH'
:
self
.
convert_activation
,
'Sigmoid'
:
self
.
convert_activation
,
'PReLU'
:
self
.
convert_activation
,
'Pooling'
:
self
.
convert_pooling
,
'Concat'
:
self
.
convert_concat
,
'Slice'
:
self
.
convert_slice
,
'Softmax'
:
self
.
convert_softmax
,
'InnerProduct'
:
self
.
convert_fully_connected
,
'BatchNorm'
:
self
.
convert_folded_batchnorm
,
}
self
.
_option
=
option
self
.
_mace_net_def
=
mace_pb2
.
NetDef
()
ConverterUtil
.
set_filter_format
(
self
.
_mace_net_def
,
FilterFormat
.
OIHW
)
self
.
_caffe_net
=
CaffeNet
()
self
.
_caffe_layers
=
caffe_pb2
.
NetParameter
()
caffe_weights
=
caffe_pb2
.
NetParameter
()
# parse prototxt
with
open
(
src_model_file
,
'rb'
)
as
f
:
google
.
protobuf
.
text_format
.
Merge
(
str
(
f
.
read
()),
self
.
_caffe_layers
)
self
.
filter_test_layers
(
self
.
_caffe_layers
)
for
layer
in
self
.
_caffe_layers
.
layer
:
self
.
_caffe_net
.
add_layer
(
layer
)
# parse model weight
with
open
(
src_weight_file
,
'rb'
)
as
f
:
caffe_weights
.
ParseFromString
(
f
.
read
())
self
.
filter_test_layers
(
caffe_weights
)
for
weight
in
caffe_weights
.
layer
:
self
.
_caffe_net
.
add_blob
(
weight
)
self
.
_skip_ops
=
[]
def
run
(
self
):
self
.
convert_ops
()
shape_inferer
=
shape_inference
.
ShapeInference
(
self
.
_mace_net_def
,
self
.
_option
.
input_nodes
.
values
())
shape_inferer
.
run
()
self
.
replace_output_tensor_name
()
return
self
.
_mace_net_def
@
staticmethod
def
replace_input_name
(
ops
,
src_name
,
dst_name
):
for
op
in
ops
:
for
i
in
xrange
(
len
(
op
.
input
)):
if
op
.
input
[
i
]
==
src_name
:
op
.
input
[
i
]
=
dst_name
def
replace_output_tensor_name
(
self
):
consumers
=
{}
for
op
in
self
.
_mace_net_def
.
op
:
for
input_name
in
op
.
input
:
if
input_name
not
in
consumers
:
consumers
[
input_name
]
=
[]
consumers
[
input_name
].
append
(
op
)
# replace the last op with same prefix name with the original top name
ops
=
[
op
for
op
in
self
.
_mace_net_def
.
op
]
ops
.
reverse
()
visited
=
set
()
for
op
in
ops
:
for
i
in
xrange
(
len
(
op
.
output
)):
original_output_name
=
op
.
output
[
i
].
split
(
'#'
)[
0
]
if
original_output_name
not
in
visited
:
self
.
replace_input_name
(
consumers
.
get
(
op
.
output
[
i
],
[]),
op
.
output
[
i
],
original_output_name
)
op
.
output
[
i
]
=
original_output_name
visited
.
update
([
original_output_name
])
# if user set op name as output node, replace it with op name
for
op
in
self
.
_mace_net_def
.
op
:
if
op
.
name
in
self
.
_option
.
output_nodes
:
if
len
(
op
.
output
)
>
0
:
self
.
replace_input_name
(
consumers
.
get
(
op
.
output
[
0
],
[]),
op
.
output
,
op
.
name
)
op
.
output
[
0
]
=
op
.
name
@
staticmethod
def
filter_test_layers
(
layers
):
phase_map
=
{
0
:
'train'
,
1
:
'test'
}
while
True
:
changed
=
False
for
layer
in
layers
.
layer
:
phase
=
'test'
if
len
(
layer
.
include
):
phase
=
phase_map
[
layer
.
include
[
0
].
phase
]
if
len
(
layer
.
exclude
):
phase
=
phase_map
[
layer
.
exclude
[
0
].
phase
]
if
phase
!=
'test'
or
layer
.
type
==
'Dropout'
:
print
(
"Remove layer %s (%s)"
%
(
layer
.
name
,
layer
.
type
))
layers
.
layer
.
remove
(
layer
)
changed
=
True
break
if
not
changed
:
break
@
staticmethod
def
add_stride_pad_kernel_arg
(
param
,
op_def
):
try
:
if
len
(
param
.
stride
)
>
1
or
len
(
param
.
kernel_size
)
>
1
or
len
(
param
.
pad
)
>
1
:
raise
Exception
(
'Mace does not support multiple stride/kernel_size/pad'
)
stride
=
[
param
.
stride
[
0
],
param
.
stride
[
0
]]
if
len
(
param
.
stride
)
else
[
1
,
1
]
pad
=
[
param
.
pad
[
0
]
*
2
,
param
.
pad
[
0
]
*
2
]
if
len
(
param
.
pad
)
else
[
0
,
0
]
kernel
=
[
param
.
kernel_size
[
0
],
param
.
kernel_size
[
0
]]
if
len
(
param
.
kernel_size
)
else
[
0
,
0
]
except
TypeError
:
stride
=
[
param
.
stride
,
param
.
stride
]
pad
=
[
param
.
pad
*
2
,
param
.
pad
*
2
]
kernel
=
[
param
.
kernel_size
,
param
.
kernel_size
]
if
param
.
HasField
(
caffe_stride_h_str
)
or
param
.
HasField
(
caffe_stride_w_str
):
stride
=
[
param
.
stride_h
,
param
.
stride_w
]
if
param
.
HasField
(
caffe_pad_h_str
)
or
param
.
HasField
(
caffe_pad_w_str
):
pad
=
[
param
.
pad_h
*
2
,
param
.
pad_w
*
2
]
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
MaceKeyword
.
mace_strides_str
strides_arg
.
ints
.
extend
(
stride
)
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_values_str
padding_arg
.
ints
.
extend
(
pad
)
if
op_def
.
type
==
MaceOp
.
Pooling
.
name
:
if
param
.
HasField
(
caffe_kernel_h_str
)
or
param
.
HasField
(
caffe_kernel_w_str
):
kernel
=
[
param
.
kernel_h
,
param
.
kernel_w
]
kernels_arg
=
op_def
.
arg
.
add
()
kernels_arg
.
name
=
MaceKeyword
.
mace_kernel_str
kernels_arg
.
ints
.
extend
(
kernel
)
if
param
.
HasField
(
'global_pooling'
):
global_pooling_arg
=
op_def
.
arg
.
add
()
global_pooling_arg
.
name
=
MaceKeyword
.
mace_global_pooling_str
global_pooling_arg
.
i
=
1
def
convert_ops
(
self
):
for
layer
in
self
.
_caffe_layers
.
layer
:
caffe_op
=
self
.
_caffe_net
.
get_op
(
layer
.
name
)
if
caffe_op
not
in
self
.
_skip_ops
:
mace_check
(
layer
.
type
in
self
.
_op_converters
,
"Mace does not support caffe op type %s yet"
%
layer
.
type
)
self
.
_op_converters
[
layer
.
type
](
caffe_op
)
def
add_tensor
(
self
,
name
,
shape
,
data_type
,
value
):
tensor
=
self
.
_mace_net_def
.
tensors
.
add
()
tensor
.
name
=
name
tensor
.
dims
.
extend
(
list
(
shape
))
tensor
.
data_type
=
data_type
tensor
.
float_data
.
extend
(
value
.
flat
)
def
convert_nop
(
self
,
layer
):
pass
def
convert_general_op
(
self
,
caffe_op
):
op
=
self
.
_mace_net_def
.
op
.
add
()
op
.
name
=
caffe_op
.
name
op
.
type
=
caffe_op
.
type
op
.
input
.
extend
(
caffe_op
.
layer
.
bottom
)
op
.
output
.
extend
(
caffe_op
.
layer
.
top
)
data_type_arg
=
op
.
arg
.
add
()
data_type_arg
.
name
=
'T'
data_type_arg
.
i
=
self
.
_option
.
data_type
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NCHW
)
return
op
def
convert_conv2d
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
param
=
caffe_op
.
layer
.
convolution_param
is_depthwise
=
False
if
param
.
HasField
(
caffe_group_str
):
mace_check
(
param
.
group
==
caffe_op
.
blob
[
0
].
shape
[
1
]
and
caffe_op
.
blob
[
0
].
shape
[
0
]
==
1
,
"Mace do not support group convolution yet"
)
is_depthwise
=
True
if
is_depthwise
:
op
.
type
=
MaceOp
.
DepthwiseConv2d
.
name
else
:
op
.
type
=
MaceOp
.
Conv2D
.
name
self
.
add_stride_pad_kernel_arg
(
param
,
op
)
# dilation is specific for convolution in caffe
dilations
=
[
1
,
1
]
if
len
(
param
.
dilation
)
>
0
:
dilation_arg
=
op
.
arg
.
add
()
dilation_arg
.
name
=
MaceKeyword
.
mace_dilations_str
if
len
(
param
.
dilation
)
==
1
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
0
]]
elif
len
(
param
.
dilation
)
==
2
:
dilations
=
[
param
.
dilation
[
0
],
param
.
dilation
[
1
]]
dilation_arg
.
ints
.
extend
(
dilations
)
filter_tensor_name
=
op
.
name
+
'_filter'
filter_data
=
caffe_op
.
blobs
[
0
]
self
.
add_tensor
(
filter_tensor_name
,
filter_data
.
shape
,
mace_pb2
.
DT_FLOAT
,
filter_data
)
op
.
input
.
extend
([
filter_tensor_name
])
if
len
(
caffe_op
.
blobs
)
==
2
:
bias_tensor_name
=
op
.
name
+
'_bias'
bias_data
=
caffe_op
.
blobs
[
1
]
self
.
add_tensor
(
bias_tensor_name
,
bias_data
.
shape
,
mace_pb2
.
DT_FLOAT
,
bias_data
)
op
.
input
.
extend
([
bias_tensor_name
])
def
convert_elementwise
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
param
=
caffe_op
.
layer
.
eltwise_param
op
.
type
=
MaceOp
.
Eltwise
.
name
type_arg
=
op
.
arg
.
add
()
type_arg
.
name
=
MaceKeyword
.
mace_element_type_str
type_arg
.
i
=
self
.
eltwise_type
[
param
.
operation
].
value
if
len
(
param
.
coeff
)
>
0
:
coeff_arg
=
op
.
arg
.
add
()
coeff_arg
.
name
=
'coeff'
coeff_arg
.
floats
.
extend
(
list
(
param
.
coeff
))
def
convert_add
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
op
.
type
=
MaceOp
.
AddN
.
name
def
convert_activation
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
op
.
type
=
MaceOp
.
Activation
.
name
type_arg
=
op
.
arg
.
add
()
type_arg
.
name
=
MaceKeyword
.
mace_activation_type_str
type_arg
.
s
=
self
.
activation_type
[
caffe_op
.
type
].
name
if
caffe_op
.
type
==
'PReLU'
:
alpha_tensor_name
=
caffe_op
.
name
+
'_alpha'
alpha_data
=
caffe_op
.
blobs
[
0
]
self
.
add_tensor
(
alpha_tensor_name
,
alpha_data
.
shape
,
mace_pb2
.
DT_FLOAT
,
alpha_data
)
op
.
input
.
extend
([
alpha_tensor_name
])
def
convert_folded_batchnorm
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
op
.
type
=
MaceOp
.
FoldedBatchNorm
.
name
scale_op
=
None
for
consumer
in
self
.
_caffe_net
.
get_consumers
(
caffe_op
.
layer
.
top
[
0
]):
if
consumer
.
type
==
'Scale'
:
scale_op
=
consumer
mace_check
(
scale_op
is
not
None
,
"batchnorm is not followed by scale"
)
self
.
_skip_ops
.
append
(
scale_op
)
epsilon_value
=
caffe_op
.
layer
.
batch_norm_param
.
eps
mace_check
(
caffe_op
.
blobs
[
2
][
0
]
!=
0
,
"batchnorm scalar is zero"
)
mean_value
=
(
1.
/
caffe_op
.
blobs
[
2
][
0
])
*
caffe_op
.
blobs
[
0
]
var_value
=
(
1.
/
caffe_op
.
blobs
[
2
][
0
])
*
caffe_op
.
blobs
[
1
]
gamma_value
=
scale_op
.
blobs
[
0
]
beta_value
=
np
.
zeros_like
(
mean_value
)
if
len
(
scale_op
.
blobs
)
==
2
:
beta_value
=
scale_op
.
blobs
[
1
]
scale_value
=
(
(
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
gamma_value
).
reshape
(
-
1
)
offset_value
=
((
-
mean_value
*
scale_value
)
+
beta_value
).
reshape
(
-
1
)
input_names
=
[
op
.
name
+
'_scale'
,
op
.
name
+
'_offset'
]
self
.
add_tensor
(
input_names
[
0
],
scale_value
.
shape
,
mace_pb2
.
DT_FLOAT
,
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
offset_value
.
shape
,
mace_pb2
.
DT_FLOAT
,
offset_value
)
op
.
input
.
extend
([
name
for
name
in
input_names
])
op
.
output
[:]
=
scale_op
.
layer
.
top
[:]
def
convert_pooling
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
param
=
caffe_op
.
layer
.
pooling_param
op
.
type
=
MaceOp
.
Pooling
.
name
self
.
add_stride_pad_kernel_arg
(
param
,
op
)
pooling_type_arg
=
op
.
arg
.
add
()
pooling_type_arg
.
name
=
MaceKeyword
.
mace_pooling_type_str
pooling_type_arg
.
i
=
self
.
pooling_type_mode
[
param
.
pool
].
value
def
convert_softmax
(
self
,
caffe_op
):
self
.
convert_general_op
(
caffe_op
)
def
convert_concat
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
param
=
caffe_op
.
layer
.
concat_param
op
.
type
=
MaceOp
.
Concat
.
name
axis_arg
=
op
.
arg
.
add
()
axis_arg
.
name
=
MaceKeyword
.
mace_axis_str
axis_arg
.
i
=
1
if
param
.
HasField
(
'axis'
):
axis_arg
.
i
=
param
.
axis
elif
param
.
HasField
(
'concat_dim'
):
axis_arg
.
i
=
param
.
concat_dim
mace_check
(
axis_arg
.
i
==
1
,
"only support concat at channel dimension"
)
def
convert_slice
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
op
.
type
=
MaceOp
.
Slice
.
name
if
caffe_op
.
layer
.
HasField
(
'slice_param'
):
param
=
caffe_op
.
layer
.
slice_param
mace_check
(
not
param
.
HasField
(
'axis'
)
or
param
.
axis
==
1
,
"Mace do not support slice with axis %d"
%
param
.
axis
)
mace_check
(
len
(
param
.
slice_point
)
==
0
,
"Mace do not support slice with slice_point"
)
axis_arg
=
op
.
arg
.
add
()
axis_arg
.
name
=
MaceKeyword
.
mace_axis_str
axis_arg
.
i
=
1
def
convert_fully_connected
(
self
,
caffe_op
):
op
=
self
.
convert_general_op
(
caffe_op
)
param
=
caffe_op
.
layer
.
inner_product_param
op
.
type
=
MaceOp
.
FullyConnected
.
name
mace_check
(
param
.
axis
==
1
and
not
param
.
transpose
,
"Do not support non-default axis and transpose"
)
mace_check
(
caffe_op
.
blobs
[
0
].
ndim
in
[
2
,
4
],
"Unexpected fc weigth ndim."
)
if
caffe_op
.
blobs
[
0
].
ndim
==
4
:
mace_check
(
list
(
caffe_op
.
blobs
[
0
].
shape
[:
2
])
==
[
1
,
1
],
"Do not support 4D weight with shape [1, 1, *, *]"
)
weight_tensor_name
=
op
.
name
+
'_weight'
weight_data
=
caffe_op
.
blobs
[
0
].
reshape
(
param
.
num_output
,
-
1
)
self
.
add_tensor
(
weight_tensor_name
,
weight_data
.
shape
,
mace_pb2
.
DT_FLOAT
,
weight_data
)
op
.
input
.
extend
([
weight_tensor_name
])
if
len
(
caffe_op
.
blobs
)
==
2
:
bias_tensor_name
=
op
.
name
+
'_bias'
bias_data
=
caffe_op
.
blobs
[
1
]
self
.
add_tensor
(
bias_tensor_name
,
bias_data
.
shape
,
mace_pb2
.
DT_FLOAT
,
bias_data
)
op
.
input
.
extend
([
bias_tensor_name
])
mace/python/tools/converter_tool/shape_inference.py
0 → 100644
浏览文件 @
c3837858
import
math
import
numpy
as
np
from
mace.python.tools.converter_tool.transformer
import
Transformer
from
mace.python.tools.converter_tool.base_converter
import
DataFormat
from
mace.python.tools.converter_tool.base_converter
import
FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
from
mace.python.tools.converter_tool.base_converter
import
MaceKeyword
from
mace.python.tools.converter_tool.base_converter
import
ConverterUtil
from
mace.python.tools.convert_util
import
mace_check
class
ShapeInference
(
object
):
"""Currently we only use it to infer caffe shape, we use tensorflow engine
to infer tensorflow op shapes, since tensorflow has too many ops."""
def
__init__
(
self
,
net
,
input_nodes
):
self
.
_op_shape_inference
=
{
MaceOp
.
Conv2D
.
name
:
self
.
infer_shape_conv_pool_shape
,
MaceOp
.
Eltwise
.
name
:
self
.
infer_shape_general
,
MaceOp
.
FoldedBatchNorm
.
name
:
self
.
infer_shape_general
,
MaceOp
.
AddN
.
name
:
self
.
infer_shape_general
,
MaceOp
.
Activation
.
name
:
self
.
infer_shape_general
,
MaceOp
.
Pooling
.
name
:
self
.
infer_shape_conv_pool_shape
,
MaceOp
.
Concat
.
name
:
self
.
infer_shape_concat
,
MaceOp
.
Slice
.
name
:
self
.
infer_shape_slice
,
MaceOp
.
Softmax
.
name
:
self
.
infer_shape_general
,
MaceOp
.
FullyConnected
.
name
:
self
.
infer_shape_fully_connected
,
}
self
.
_net
=
net
self
.
_output_shape_cache
=
{}
for
input_node
in
input_nodes
:
input_shape
=
input_node
.
shape
[:]
# transpose input from NCHW to NHWC
Transformer
.
transpose_shape
(
input_shape
,
[
0
,
3
,
1
,
2
])
self
.
_output_shape_cache
[
input_node
.
name
]
=
input_shape
for
tensor
in
net
.
tensors
:
self
.
_output_shape_cache
[
tensor
.
name
]
=
list
(
tensor
.
dims
)
def
run
(
self
):
for
op
in
self
.
_net
.
op
:
mace_check
(
op
.
type
in
self
.
_op_shape_inference
,
"Mace does not support caffe op type %s yet"
%
op
.
type
)
self
.
_op_shape_inference
[
op
.
type
](
op
)
def
add_output_shape
(
self
,
op
,
shapes
):
mace_check
(
len
(
op
.
output
)
==
len
(
shapes
),
"Op %s (%s) output count is different from "
"output shape count"
%
(
op
.
name
,
op
.
type
))
for
i
in
xrange
(
len
(
shapes
)):
output_name
=
op
.
output
[
i
]
output_shape
=
op
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
shapes
[
i
])
self
.
_output_shape_cache
[
output_name
]
=
shapes
[
i
]
def
infer_shape_general
(
self
,
op
):
if
len
(
op
.
input
)
>
0
:
mace_check
(
op
.
input
[
0
]
in
self
.
_output_shape_cache
,
"%s does not exist"
%
op
.
input
[
0
])
input_shape
=
self
.
_output_shape_cache
[
op
.
input
[
0
]]
self
.
add_output_shape
(
op
,
[
input_shape
])
def
infer_shape_conv_pool_shape
(
self
,
op
):
input_shape
=
self
.
_output_shape_cache
[
op
.
input
[
0
]]
output_shape
=
np
.
zeros_like
(
input_shape
)
if
op
.
type
==
MaceOp
.
Pooling
:
filter_shape
=
list
(
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_kernel_str
).
ints
)
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
:
filter_shape
=
[
input_shape
[
1
],
input_shape
[
1
]]
+
filter_shape
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_global_pooling_str
)
\
is
not
None
:
filter_shape
[
2
]
=
input_shape
[
2
]
filter_shape
[
3
]
=
input_shape
[
3
]
else
:
# NHWC
filter_shape
=
filter_shape
+
[
input_shape
[
1
],
input_shape
[
1
]]
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_global_pooling_str
)
\
is
not
None
:
filter_shape
[
0
]
=
input_shape
[
1
]
filter_shape
[
1
]
=
input_shape
[
2
]
else
:
filter_shape
=
self
.
_output_shape_cache
[
op
.
input
[
1
]]
paddings
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_values_str
).
ints
# noqa
strides
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_strides_str
).
ints
dilations_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_dilations_str
)
if
dilations_arg
is
not
None
:
dilations
=
dilations_arg
.
ints
else
:
dilations
=
[
1
,
1
]
if
op
.
type
==
MaceOp
.
Pooling
:
round_func
=
math
.
ceil
else
:
round_func
=
math
.
floor
output_shape
[
0
]
=
input_shape
[
0
]
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
\
and
ConverterUtil
.
filter_format
(
self
.
_net
)
==
FilterFormat
.
OIHW
:
# noqa
# filter format: OIHW
output_shape
[
1
]
=
filter_shape
[
0
]
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
0
]
-
filter_shape
[
2
]
-
(
filter_shape
[
2
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
3
]
=
int
(
round_func
((
input_shape
[
3
]
+
paddings
[
1
]
-
filter_shape
[
3
]
-
(
filter_shape
[
3
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
else
:
mace_check
(
False
,
"Mace can only infer shape for"
" NCHW input and OIHW filter"
)
self
.
add_output_shape
(
op
,
[
output_shape
])
def
infer_shape_concat
(
self
,
op
):
output_shape
=
self
.
_output_shape_cache
[
op
.
input
[
0
]]
axis
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_axis_str
).
i
for
input_node
in
op
.
input
:
input_shape
=
self
.
_output_shape_cache
[
input_node
]
output_shape
[
axis
]
+=
input_shape
[
axis
]
self
.
add_output_shape
(
op
,
[
output_shape
])
def
infer_shape_slice
(
self
,
op
):
output_shape
=
self
.
_output_shape_cache
[
op
.
input
[
0
]]
axis
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_axis_str
).
i
output_shape
[
axis
]
/=
len
(
op
.
output
)
output_shapes
=
[]
for
_
in
op
.
output
:
output_shapes
.
append
(
output_shape
)
self
.
add_output_shape
(
op
,
output_shapes
)
def
infer_shape_fully_connected
(
self
,
op
):
input_shape
=
self
.
_output_shape_cache
[
op
.
input
[
0
]]
weight_shape
=
self
.
_output_shape_cache
[
op
.
input
[
1
]]
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
:
output_shape
=
[
input_shape
[
0
],
weight_shape
[
0
],
1
,
1
]
else
:
mace_check
(
False
,
"format %s is not supported"
%
ConverterUtil
.
data_format
(
op
))
self
.
add_output_shape
(
op
,
[
output_shape
])
mace/python/tools/converter_tool/tensorflow_converter.py
0 → 100644
浏览文件 @
c3837858
import
math
import
numpy
as
np
import
tensorflow
as
tf
from
mace.proto
import
mace_pb2
from
mace.python.tools.converter_tool
import
base_converter
from
mace.python.tools.converter_tool.base_converter
import
PoolingType
from
mace.python.tools.converter_tool.base_converter
import
PaddingMode
from
mace.python.tools.converter_tool.base_converter
import
ActivationType
from
mace.python.tools.converter_tool.base_converter
import
EltwiseType
from
mace.python.tools.converter_tool.base_converter
import
DataFormat
from
mace.python.tools.converter_tool.base_converter
import
FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
from
mace.python.tools.converter_tool.base_converter
import
MaceKeyword
from
mace.python.tools.converter_tool.base_converter
import
ConverterUtil
from
mace.python.tools.convert_util
import
mace_check
from
tensorflow.core.framework
import
tensor_shape_pb2
tf_padding_str
=
'padding'
tf_strides_str
=
'strides'
tf_dilations_str
=
'dilations'
tf_data_format_str
=
'data_format'
tf_kernel_str
=
'ksize'
tf_epsilon_str
=
'epsilon'
tf_align_corners
=
'align_corners'
tf_block_size
=
'block_size'
class
TensorflowConverter
(
base_converter
.
ConverterInterface
):
"""A class for convert tensorflow frozen model to mace model.
We use tensorflow engine to infer op output shapes, since they are of
too many types."""
padding_mode
=
{
'VALID'
:
PaddingMode
.
VALID
,
'SAME'
:
PaddingMode
.
SAME
,
'FULL'
:
PaddingMode
.
FULL
}
pooling_type_mode
=
{
'AvgPool'
:
PoolingType
.
AVG
,
'MaxPool'
:
PoolingType
.
MAX
}
eltwise_type
=
{
'Add'
:
EltwiseType
.
SUM
,
'Sub'
:
EltwiseType
.
SUB
,
'Mul'
:
EltwiseType
.
PROD
,
'Div'
:
EltwiseType
.
DIV
,
'Min'
:
EltwiseType
.
MIN
,
'Max'
:
EltwiseType
.
MAX
,
'Neg'
:
EltwiseType
.
NEG
,
'Abs'
:
EltwiseType
.
ABS
,
'RealDiv'
:
EltwiseType
.
DIV
,
'SquaredDifference'
:
EltwiseType
.
SQR_DIFF
,
'Pow'
:
EltwiseType
.
POW
}
activation_type
=
{
'Relu'
:
ActivationType
.
RELU
,
'Relu6'
:
ActivationType
.
RELUX
,
'Tanh'
:
ActivationType
.
TANH
,
'Sigmoid'
:
ActivationType
.
SIGMOID
}
def
__init__
(
self
,
option
,
src_model_file
):
self
.
_op_converters
=
{
'Conv2D'
:
self
.
convert_conv2d
,
'DepthwiseConv2dNative'
:
self
.
convert_conv2d
,
'Conv2DBackpropInput'
:
self
.
convert_conv2d
,
'BiasAdd'
:
self
.
convert_biasadd
,
'Add'
:
self
.
convert_add
,
'Sub'
:
self
.
convert_elementwise
,
'Mul'
:
self
.
convert_elementwise
,
'Div'
:
self
.
convert_elementwise
,
'Min'
:
self
.
convert_elementwise
,
'Max'
:
self
.
convert_elementwise
,
'Neg'
:
self
.
convert_elementwise
,
'Abs'
:
self
.
convert_elementwise
,
'RealDiv'
:
self
.
convert_elementwise
,
'SquaredDifference'
:
self
.
convert_elementwise
,
'Pow'
:
self
.
convert_elementwise
,
'Relu'
:
self
.
convert_activation
,
'Relu6'
:
self
.
convert_activation
,
'Tanh'
:
self
.
convert_activation
,
'Sigmoid'
:
self
.
convert_activation
,
'FusedBatchNorm'
:
self
.
convert_fused_batchnorm
,
'AvgPool'
:
self
.
convert_pooling
,
'MaxPool'
:
self
.
convert_pooling
,
'Squeeze'
:
self
.
convert_identity
,
'Reshape'
:
self
.
convert_reshape
,
'Shape'
:
self
.
convert_nop
,
'Softmax'
:
self
.
convert_softmax
,
'ResizeBilinear'
:
self
.
convert_resize_bilinear
,
'Placeholder'
:
self
.
convert_nop
,
'SpaceToBatchND'
:
self
.
convert_space_batch
,
'BatchToSpaceND'
:
self
.
convert_space_batch
,
'DepthToSpace'
:
self
.
convert_space_depth
,
'SpaceToDepth'
:
self
.
convert_space_depth
,
'Pad'
:
self
.
convert_pad
,
'ConcatV2'
:
self
.
convert_concat
,
'Mean'
:
self
.
convert_mean
,
# Const converter_tool should be placed at the end
'Const'
:
self
.
convert_tensor
,
}
self
.
_option
=
option
self
.
_mace_net_def
=
mace_pb2
.
NetDef
()
ConverterUtil
.
set_filter_format
(
self
.
_mace_net_def
,
FilterFormat
.
HWIO
)
tf_graph_def
=
tf
.
GraphDef
()
with
tf
.
gfile
.
Open
(
src_model_file
,
'rb'
)
as
f
:
tf_graph_def
.
ParseFromString
(
f
.
read
())
self
.
add_shape_info
(
tf_graph_def
)
with
tf
.
Session
()
as
session
:
with
session
.
graph
.
as_default
()
as
graph
:
tf
.
import_graph_def
(
tf_graph_def
,
name
=
''
)
self
.
_tf_graph
=
graph
self
.
_skip_tensor
=
set
()
def
run
(
self
):
with
tf
.
Session
()
as
session
:
self
.
convert_ops
()
self
.
replace_input_output_tensor_name
()
return
self
.
_mace_net_def
def
replace_input_output_tensor_name
(
self
):
for
op
in
self
.
_mace_net_def
.
op
:
for
i
in
xrange
(
len
(
op
.
input
)):
if
op
.
input
[
i
][
-
2
:]
==
':0'
:
op_name
=
op
.
input
[
i
][:
-
2
]
if
op_name
in
self
.
_option
.
input_nodes
:
op
.
input
[
i
]
=
op_name
for
i
in
xrange
(
len
(
op
.
output
)):
if
op
.
output
[
i
][
-
2
:]
==
':0'
:
op_name
=
op
.
output
[
i
][:
-
2
]
if
op_name
in
self
.
_option
.
output_nodes
:
op
.
output
[
i
]
=
op_name
def
add_shape_info
(
self
,
tf_graph_def
):
for
node
in
tf_graph_def
.
node
:
if
node
.
name
in
self
.
_option
.
input_nodes
:
del
node
.
attr
[
'shape'
].
shape
.
dim
[:]
node
.
attr
[
'shape'
].
shape
.
dim
.
extend
([
tensor_shape_pb2
.
TensorShapeProto
.
Dim
(
size
=
i
)
for
i
in
self
.
_option
.
input_nodes
[
node
.
name
].
shape
])
@
staticmethod
def
get_scope
(
tensor_name
):
idx
=
tensor_name
.
rfind
(
'/'
)
if
idx
==
-
1
:
return
tensor_name
else
:
return
tensor_name
[:
idx
]
def
convert_ops
(
self
):
for
tf_op
in
self
.
_tf_graph
.
get_operations
():
mace_check
(
tf_op
.
type
in
self
.
_op_converters
,
"Mace does not support tensorflow op type %s yet"
%
tf_op
.
type
)
self
.
_op_converters
[
tf_op
.
type
](
tf_op
)
def
convert_tensor
(
self
,
tf_op
):
output_name
=
tf_op
.
outputs
[
0
].
name
if
output_name
not
in
self
.
_skip_tensor
:
tensor
=
self
.
_mace_net_def
.
tensors
.
add
()
tensor
.
name
=
tf_op
.
outputs
[
0
].
name
tf_tensor
=
tf_op
.
outputs
[
0
].
eval
()
tensor
.
dims
.
extend
(
list
(
tf_tensor
.
shape
))
tf_dt
=
tf_op
.
get_attr
(
'dtype'
)
if
tf_dt
==
tf
.
float32
:
tensor
.
data_type
=
mace_pb2
.
DT_FLOAT
tensor
.
float_data
.
extend
(
tf_tensor
.
astype
(
np
.
float32
).
flat
)
elif
tf_dt
==
tf
.
int32
:
tensor
.
data_type
=
mace_pb2
.
DT_INT32
tensor
.
int32_data
.
extend
(
tf_tensor
.
astype
(
np
.
int32
).
flat
)
else
:
mace_check
(
False
,
"Not supported tensor type: %s"
%
tf_dt
.
name
)
def
add_tensor
(
self
,
name
,
shape
,
data_type
,
value
):
tensor
=
self
.
_mace_net_def
.
tensors
.
add
()
tensor
.
name
=
name
tensor
.
dims
.
extend
(
list
(
shape
))
tensor
.
data_type
=
data_type
tensor
.
float_data
.
extend
(
value
.
flat
)
def
convert_nop
(
self
,
tf_op
):
pass
def
convert_general_op
(
self
,
tf_op
):
op
=
self
.
_mace_net_def
.
op
.
add
()
op
.
name
=
tf_op
.
name
op
.
type
=
tf_op
.
type
op
.
input
.
extend
([
tf_input
.
name
for
tf_input
in
tf_op
.
inputs
])
op
.
output
.
extend
([
tf_output
.
name
for
tf_output
in
tf_op
.
outputs
])
for
tf_output
in
tf_op
.
outputs
:
output_shape
=
op
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
tf_output
.
shape
.
as_list
())
op
.
output_type
.
append
(
self
.
_option
.
data_type
)
data_type_arg
=
op
.
arg
.
add
()
data_type_arg
.
name
=
'T'
data_type_arg
.
i
=
self
.
_option
.
data_type
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NHWC
)
return
op
def
convert_identity
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
'Identity'
def
convert_conv2d
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
if
tf_op
.
type
==
'DepthwiseConv2dNative'
:
op
.
type
=
MaceOp
.
DepthwiseConv2d
.
name
elif
tf_op
.
type
==
'Conv2DBackpropInput'
:
op
.
type
=
MaceOp
.
Deconv2D
.
name
else
:
op
.
type
=
MaceOp
.
Conv2D
.
name
padding_arg
=
op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_str
padding_arg
.
i
=
self
.
padding_mode
[
tf_op
.
get_attr
(
tf_padding_str
)].
value
strides_arg
=
op
.
arg
.
add
()
strides_arg
.
name
=
MaceKeyword
.
mace_strides_str
strides_arg
.
ints
.
extend
(
tf_op
.
get_attr
(
tf_strides_str
)[
1
:
3
])
if
op
.
type
!=
MaceOp
.
Deconv2D
.
name
:
dilation_arg
=
op
.
arg
.
add
()
dilation_arg
.
name
=
MaceKeyword
.
mace_dilations_str
dilation_arg
.
ints
.
extend
(
tf_op
.
get_attr
(
tf_dilations_str
)[
1
:
3
])
def
convert_elementwise
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Eltwise
.
name
type_arg
=
op
.
arg
.
add
()
type_arg
.
name
=
MaceKeyword
.
mace_element_type_str
type_arg
.
i
=
self
.
eltwise_type
[
tf_op
.
type
].
value
def
convert_biasadd
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
BiasAdd
.
name
def
convert_add
(
self
,
tf_op
):
if
len
(
tf_op
.
inputs
)
==
2
:
self
.
convert_elementwise
(
tf_op
)
else
:
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
AddN
.
name
def
convert_activation
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Activation
.
name
type_arg
=
op
.
arg
.
add
()
type_arg
.
name
=
MaceKeyword
.
mace_activation_type_str
type_arg
.
s
=
self
.
activation_type
[
tf_op
.
type
].
name
if
tf_op
.
type
==
'Relu6'
:
limit_arg
=
op
.
arg
.
add
()
limit_arg
.
name
=
MaceKeyword
.
mace_activation_max_limit_str
limit_arg
.
f
=
6.0
def
convert_fused_batchnorm
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
FoldedBatchNorm
.
name
gamma_value
=
tf_op
.
inputs
[
1
].
eval
().
astype
(
np
.
float32
)
beta_value
=
tf_op
.
inputs
[
2
].
eval
().
astype
(
np
.
float32
)
mean_value
=
tf_op
.
inputs
[
3
].
eval
().
astype
(
np
.
float32
)
var_value
=
tf_op
.
inputs
[
4
].
eval
().
astype
(
np
.
float32
)
epsilon_value
=
tf_op
.
get_attr
(
tf_epsilon_str
)
scale_name
=
self
.
get_scope
(
tf_op
.
name
)
+
'/scale:0'
offset_name
=
self
.
get_scope
(
tf_op
.
name
)
+
'/offset:0'
scale_value
=
(
(
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
gamma_value
)
offset_value
=
(
-
mean_value
*
scale_value
)
+
beta_value
self
.
add_tensor
(
scale_name
,
scale_value
.
shape
,
mace_pb2
.
DT_FLOAT
,
scale_value
)
self
.
add_tensor
(
offset_name
,
offset_value
.
shape
,
mace_pb2
.
DT_FLOAT
,
offset_value
)
self
.
_skip_tensor
.
update
([
inp
.
name
for
inp
in
tf_op
.
inputs
][
1
:])
del
op
.
input
[
1
:]
op
.
input
.
extend
([
scale_name
,
offset_name
])
del
op
.
output
[
1
:]
del
op
.
output_shape
[
1
:]
del
op
.
output_type
[
1
:]
def
convert_pooling
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Pooling
.
name
pooling_type_arg
=
op
.
arg
.
add
()
pooling_type_arg
.
name
=
MaceKeyword
.
mace_pooling_type_str
pooling_type_arg
.
i
=
self
.
pooling_type_mode
[
tf_op
.
type
].
value
padding_arg
=
op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_str
padding_arg
.
i
=
self
.
padding_mode
[
tf_op
.
get_attr
(
tf_padding_str
)].
value
strides_arg
=
op
.
arg
.
add
()
strides_arg
.
name
=
MaceKeyword
.
mace_strides_str
strides_arg
.
ints
.
extend
(
tf_op
.
get_attr
(
tf_strides_str
)[
1
:
3
])
kernels_arg
=
op
.
arg
.
add
()
kernels_arg
.
name
=
MaceKeyword
.
mace_kernel_str
kernels_arg
.
ints
.
extend
(
tf_op
.
get_attr
(
tf_kernel_str
)[
1
:
3
])
def
convert_softmax
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Softmax
.
name
def
convert_resize_bilinear
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
ResizeBilinear
.
name
del
op
.
input
[
1
:]
size_arg
=
op
.
arg
.
add
()
size_arg
.
name
=
MaceKeyword
.
mace_resize_size_str
size_value
=
tf_op
.
inputs
[
1
].
eval
().
astype
(
np
.
int32
)
size_arg
.
ints
.
extend
(
size_value
)
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
1
].
name
)
align_corners_arg
=
op
.
arg
.
add
()
align_corners_arg
.
name
=
MaceKeyword
.
mace_align_corners_str
align_corners_arg
.
i
=
tf_op
.
get_attr
(
tf_align_corners
)
def
convert_space_batch
(
self
,
tf_op
):
print
"""You might want to try 'flatten_atrous_conv' in
transform graph to turn atrous conv2d into regular conv2d.
This may give you performance benefit on GPU.
(see https://github.com/tensorflow/tensorflow/blob/master/
tensorflow/tools/graph_transforms/README.md#flatten_atrous_conv)
"""
op
=
self
.
convert_general_op
(
tf_op
)
del
op
.
input
[
1
:]
size_arg
=
op
.
arg
.
add
()
size_arg
.
name
=
MaceKeyword
.
mace_space_batch_block_shape_str
size_value
=
tf_op
.
inputs
[
1
].
eval
().
astype
(
np
.
int32
)
size_arg
.
ints
.
extend
(
size_value
)
crops_or_paddings_arg
=
op
.
arg
.
add
()
if
op
.
type
==
'BatchToSpaceND'
:
op
.
type
=
MaceOp
.
BatchToSpaceND
.
name
crops_or_paddings_arg
.
name
=
\
MaceKeyword
.
mace_batch_to_space_crops_str
else
:
op
.
type
=
MaceOp
.
SpaceToBatchND
.
name
crops_or_paddings_arg
.
name
=
MaceKeyword
.
mace_paddings_str
crops_or_paddings_value
=
tf_op
.
inputs
[
2
].
eval
().
astype
(
np
.
int32
).
flat
crops_or_paddings_arg
.
ints
.
extend
(
crops_or_paddings_value
)
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
1
].
name
)
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
2
].
name
)
def
convert_space_depth
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
if
op
.
type
==
'SpaceToDepth'
:
op
.
type
=
MaceOp
.
SpaceToDepth
.
name
else
:
op
.
type
=
MaceOp
.
DepthToSpace
.
name
size_arg
=
op
.
arg
.
add
()
size_arg
.
name
=
MaceKeyword
.
mace_space_depth_block_size_str
size_arg
.
i
=
tf_op
.
get_attr
(
tf_block_size
)
def
convert_pad
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Pad
.
name
del
op
.
input
[
1
:]
paddings_arg
=
op
.
arg
.
add
()
paddings_arg
.
name
=
MaceKeyword
.
mace_paddings_str
paddings_value
=
tf_op
.
inputs
[
1
].
eval
().
astype
(
np
.
int32
).
flat
paddings_arg
.
ints
.
extend
(
paddings_value
)
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
1
].
name
)
if
len
(
tf_op
.
inputs
)
==
3
:
constant_value_arg
=
op
.
arg
.
add
()
constant_value_arg
.
name
=
MaceKeyword
.
mace_constant_value_str
constant_value
=
tf_op
.
inputs
[
2
].
eval
().
astype
(
np
.
int32
).
flat
[
0
]
constant_value_arg
.
i
=
constant_value
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
2
].
name
)
def
convert_concat
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Concat
.
name
del
op
.
input
[
-
1
]
axis_arg
=
op
.
arg
.
add
()
axis_arg
.
name
=
MaceKeyword
.
mace_axis_str
axis
=
tf_op
.
inputs
[
-
1
].
eval
().
astype
(
np
.
int32
)
axis_arg
.
i
=
axis
mace_check
(
axis
==
3
,
"only support concat at channel dimension"
)
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
-
1
].
name
)
def
convert_reshape
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
op
.
type
=
MaceOp
.
Reshape
.
name
del
op
.
input
[
1
:]
shape_arg
=
op
.
arg
.
add
()
shape_arg
.
name
=
MaceKeyword
.
mace_shape_str
shape_value
=
[]
if
tf_op
.
inputs
[
1
].
op
.
type
==
'Const'
:
shape_value
=
list
(
tf_op
.
inputs
[
1
].
eval
().
astype
(
np
.
int32
))
for
i
in
xrange
(
len
(
shape_value
)):
if
shape_value
[
i
]
==
-
1
:
shape_value
[
i
]
=
1
self
.
_skip_tensor
.
update
(
tf_op
.
inputs
[
-
1
].
name
)
elif
tf_op
.
inputs
[
1
].
op
.
type
==
'Shape'
:
shape_value
=
list
(
tf_op
.
inputs
[
1
].
op
.
inputs
[
0
].
shape
.
as_list
())
shape_arg
.
ints
.
extend
(
shape_value
)
def
convert_mean
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
del
op
.
input
[
1
:]
reduce_dims
=
tf_op
.
inputs
[
1
].
eval
()
mace_check
(
reduce_dims
[
0
]
==
1
and
reduce_dims
[
1
]
==
2
,
"Mean only support reduce dim 1, 2"
)
op
.
type
=
MaceOp
.
Pooling
.
name
pooling_type_arg
=
op
.
arg
.
add
()
pooling_type_arg
.
name
=
MaceKeyword
.
mace_pooling_type_str
pooling_type_arg
.
i
=
PoolingType
.
AVG
.
value
padding_arg
=
op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_str
padding_arg
.
i
=
PaddingMode
.
VALID
.
value
strides_arg
=
op
.
arg
.
add
()
strides_arg
.
name
=
MaceKeyword
.
mace_strides_str
strides_arg
.
ints
.
extend
([
1
,
1
])
kernels_arg
=
op
.
arg
.
add
()
kernels_arg
.
name
=
MaceKeyword
.
mace_kernel_str
kernels_arg
.
ints
.
extend
(
tf_op
.
inputs
[
0
].
shape
.
as_list
()[
1
:
3
])
self
.
_skip_tensor
.
add
(
tf_op
.
inputs
[
1
].
name
)
mace/python/tools/converter_tool/transformer.py
0 → 100644
浏览文件 @
c3837858
import
enum
import
numpy
as
np
from
mace.proto
import
mace_pb2
from
mace.python.tools.converter_tool
import
base_converter
from
mace.python.tools.converter_tool.base_converter
import
EltwiseType
from
mace.python.tools.converter_tool.base_converter
import
ActivationType
from
mace.python.tools.converter_tool.base_converter
import
PaddingMode
from
mace.python.tools.converter_tool.base_converter
import
DataFormat
from
mace.python.tools.converter_tool.base_converter
import
FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
from
mace.python.tools.converter_tool.base_converter
import
MaceKeyword
from
mace.python.tools.converter_tool.base_converter
import
ConverterUtil
from
mace.python.tools.convert_util
import
mace_check
OPENCL_IMAGE_MAX_SIZE
=
16384
class
OpenCLBufferType
(
enum
.
Enum
):
CONV2D_FILTER
=
0
IN_OUT_CHANNEL
=
1
ARGUMENT
=
2
IN_OUT_HEIGHT
=
3
IN_OUT_WIDTH
=
4
WINOGRAD_FILTER
=
5
DW_CONV2D_FILTER
=
6
WEIGHT_HEIGHT
=
7
WEIGHT_WIDTH
=
8
class
Transformer
(
base_converter
.
ConverterInterface
):
"""A class for transform naive mace model to optimized model.
This Transformer should be platform irrelevant. So, do not assume
tensor name has suffix like ':0".
"""
def
__init__
(
self
,
option
,
model
):
# DO NOT reorder the following transformers
self
.
_registered_transformers
=
[
self
.
remove_identity_op
,
self
.
transform_global_pooling
,
self
.
fold_softmax
,
self
.
fold_batchnorm
,
self
.
fold_conv_and_bn
,
# data_format related
self
.
fold_depthwise_conv_and_bn
,
# data_format related
self
.
transform_gpu_winograd
,
# data_format related
self
.
transform_add_to_biasadd
,
self
.
fold_biasadd
,
self
.
fold_activation
,
self
.
transpose_filters
,
self
.
transpose_data_format
,
self
.
transform_global_conv_to_fc
,
self
.
transform_buffer_image
,
self
.
sort_by_execution
,
]
self
.
_option
=
option
self
.
_model
=
model
self
.
_ops
=
{}
self
.
_consts
=
{}
self
.
_consumers
=
{}
self
.
_producer
=
{}
self
.
_target_data_format
=
DataFormat
.
NHWC
if
self
.
_option
.
device
==
mace_pb2
.
CPU
:
self
.
_target_data_format
=
DataFormat
.
NCHW
def
run
(
self
):
for
transformer
in
self
.
_registered_transformers
:
while
True
:
self
.
construct_ops_and_consumers
()
changed
=
transformer
()
if
not
changed
:
break
return
self
.
_model
def
filter_format
(
self
):
filter_format_value
=
ConverterUtil
.
get_arg
(
self
.
_model
,
MaceKeyword
.
mace_filter_format_str
).
i
# noqa
filter_format
=
None
if
filter_format_value
==
FilterFormat
.
HWIO
.
value
:
filter_format
=
FilterFormat
.
HWIO
elif
filter_format_value
==
FilterFormat
.
OIHW
.
value
:
filter_format
=
FilterFormat
.
OIHW
elif
filter_format_value
==
FilterFormat
.
HWOI
.
value
:
filter_format
=
FilterFormat
.
HWOI
else
:
mace_check
(
False
,
"filter format %d not supported"
%
filter_format_value
)
return
filter_format
def
set_filter_format
(
self
,
filter_format
):
arg
=
ConverterUtil
.
get_arg
(
self
.
_model
,
MaceKeyword
.
mace_filter_format_str
)
arg
.
i
=
filter_format
.
value
def
construct_ops_and_consumers
(
self
):
self
.
_ops
.
clear
()
self
.
_consumers
.
clear
()
self
.
_producer
.
clear
()
for
op
in
self
.
_model
.
op
:
self
.
_ops
[
op
.
name
]
=
op
for
tensor
in
self
.
_model
.
tensors
:
self
.
_consts
[
tensor
.
name
]
=
tensor
for
op
in
self
.
_ops
.
values
():
for
input_tensor
in
op
.
input
:
if
input_tensor
not
in
self
.
_consumers
:
self
.
_consumers
[
input_tensor
]
=
[]
self
.
_consumers
[
input_tensor
].
append
(
op
)
for
output_tensor
in
op
.
output
:
self
.
_producer
[
output_tensor
]
=
op
for
input_node
in
self
.
_option
.
input_nodes
.
values
():
op
=
mace_pb2
.
OperatorDef
()
op
.
name
=
self
.
normalize_op_name
(
input_node
.
name
)
op
.
type
=
'Input'
op
.
output
.
extend
(
input_node
.
name
)
output_shape
=
op
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
input_node
.
shape
)
if
self
.
_option
.
device
==
mace_pb2
.
CPU
:
self
.
transpose_shape
(
output_shape
.
dims
,
[
0
,
3
,
1
,
2
])
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NCHW
)
else
:
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NHWC
)
self
.
_producer
[
op
.
output
[
0
]]
=
op
@
staticmethod
def
replace
(
obj_list
,
source
,
target
):
for
i
in
xrange
(
len
(
obj_list
)):
if
obj_list
[
i
]
==
source
:
obj_list
[
i
]
=
target
@
staticmethod
def
transpose_shape
(
shape
,
order
):
transposed_shape
=
[]
for
i
in
xrange
(
len
(
order
)):
transposed_shape
.
append
(
shape
[
order
[
i
]])
shape
[:]
=
transposed_shape
[:]
@
staticmethod
def
normalize_op_name
(
name
):
return
name
.
replace
(
':'
,
'_'
)
def
consumer_count
(
self
,
tensor_name
):
return
len
(
self
.
_consumers
.
get
(
tensor_name
,
[]))
def
is_op_output_node
(
self
,
op
):
output_node_tensor_names
=
[
out
for
out
in
self
.
_option
.
output_nodes
]
for
output
in
op
.
output
:
if
output
in
output_node_tensor_names
:
return
True
return
False
def
replace_output_node
(
self
,
op
):
"""if it is an output node, change output node to the op before it"""
if
self
.
is_op_output_node
(
op
):
real_output_node
=
self
.
_producer
[
op
.
input
[
0
]]
self
.
replace
(
real_output_node
.
output
,
op
.
input
[
0
],
op
.
output
[
0
])
print
(
"change %s to %s"
%
(
real_output_node
.
name
,
op
.
name
))
def
remove_identity_op
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
'Identity'
:
print
(
"Remove identity: %s(%s)"
%
(
op
.
name
,
op
.
type
))
for
consumer_op
in
self
.
_consumers
.
get
(
op
.
output
[
0
],
[]):
Transformer
.
replace
(
consumer_op
.
input
,
op
.
output
[
0
],
op
.
input
[
0
])
self
.
replace_output_node
(
op
)
net
.
op
.
remove
(
op
)
return
True
return
False
def
transform_global_pooling
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Pooling
.
name
and
\
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_global_pooling_str
)
is
not
None
:
# noqa
print
(
"Transform global pooling: %s(%s)"
%
(
op
.
name
,
op
.
type
))
input_shape
=
self
.
_producer
[
op
.
input
[
0
]].
output_shape
[
0
].
dims
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NHWC
:
kernel_shape
=
input_shape
[
1
:
3
]
else
:
kernel_shape
=
input_shape
[
2
:
4
]
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_kernel_str
).
ints
[:]
\
=
kernel_shape
[:]
return
False
def
fold_batchnorm
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
(
op
.
type
==
MaceOp
.
Eltwise
.
name
and
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_element_type_str
).
i
==
EltwiseType
.
PROD
.
value
)
\
and
len
(
op
.
input
)
==
2
\
and
op
.
input
[
1
]
in
self
.
_consts
\
and
self
.
consumer_count
(
op
.
output
[
0
])
==
1
\
and
not
self
.
is_op_output_node
(
op
):
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
(
consumer_op
.
type
==
MaceOp
.
Eltwise
.
name
and
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_element_type_str
).
i
==
EltwiseType
.
SUM
.
value
or
consumer_op
.
type
==
MaceOp
.
BiasAdd
.
name
)
\
and
len
(
consumer_op
.
input
)
==
2
\
and
consumer_op
.
input
[
1
]
in
self
.
_consts
\
and
len
(
self
.
_consts
[
consumer_op
.
input
[
1
]].
dims
)
==
1
:
print
(
"Fold batchnorm: %s(%s)"
%
(
op
.
name
,
op
.
type
))
consumer_op
.
type
=
MaceOp
.
FoldedBatchNorm
.
name
inputs
=
[
op
.
input
[
0
],
op
.
input
[
1
],
consumer_op
.
input
[
1
]]
consumer_op
.
input
[:]
=
inputs
[:]
net
.
op
.
remove
(
op
)
return
True
return
False
def
fold_conv_and_bn
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
(
op
.
type
==
MaceOp
.
Conv2D
.
name
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
)
\
and
self
.
consumer_count
(
op
.
output
[
0
])
==
1
:
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer_op
.
type
==
MaceOp
.
FoldedBatchNorm
.
name
:
print
(
"Fold conv and bn: %s(%s)"
%
(
op
.
name
,
op
.
type
))
filter
=
self
.
_consts
[
op
.
input
[
1
]]
scale
=
self
.
_consts
[
consumer_op
.
input
[
1
]]
idx
=
0
filter_format
=
self
.
filter_format
()
if
filter_format
==
FilterFormat
.
HWIO
:
for
hwi
in
xrange
(
filter
.
dims
[
0
]
*
filter
.
dims
[
1
]
*
filter
.
dims
[
2
]):
for
o
in
xrange
(
filter
.
dims
[
3
]):
filter
.
float_data
[
idx
]
*=
scale
.
float_data
[
o
]
idx
+=
1
elif
filter_format
==
FilterFormat
.
OIHW
:
for
o
in
xrange
(
filter
.
dims
[
0
]):
for
hwi
in
xrange
(
filter
.
dims
[
1
]
*
filter
.
dims
[
2
]
*
filter
.
dims
[
3
]):
filter
.
float_data
[
idx
]
*=
scale
.
float_data
[
o
]
idx
+=
1
else
:
mace_check
(
False
,
"filter format %s not supported"
%
filter_format
)
# change BN to BiasAdd
consumer_op
.
type
=
MaceOp
.
BiasAdd
.
name
del
consumer_op
.
input
[
1
]
# remove scale tensor
net
.
tensors
.
remove
(
scale
)
return
True
return
False
def
fold_depthwise_conv_and_bn
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
\
and
self
.
consumer_count
(
op
.
output
[
0
])
==
1
:
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer_op
.
type
==
MaceOp
.
FoldedBatchNorm
.
name
:
print
(
"Fold depthwise conv and bn: %s(%s)"
%
(
op
.
name
,
op
.
type
))
filter
=
self
.
_consts
[
op
.
input
[
1
]]
scale
=
self
.
_consts
[
consumer_op
.
input
[
1
]]
idx
=
0
filter_format
=
self
.
filter_format
()
if
filter_format
==
FilterFormat
.
HWIO
:
for
hw
in
xrange
(
filter
.
dims
[
0
]
*
filter
.
dims
[
1
]):
for
i
in
xrange
(
filter
.
dims
[
2
]):
for
o
in
xrange
(
filter
.
dims
[
3
]):
filter
.
float_data
[
idx
]
*=
scale
.
float_data
[
i
*
filter
.
dims
[
3
]
+
o
]
idx
+=
1
elif
filter_format
==
FilterFormat
.
OIHW
:
for
o
in
xrange
(
filter
.
dims
[
0
]):
for
i
in
xrange
(
filter
.
dims
[
1
]):
for
hw
in
xrange
(
filter
.
dims
[
2
]
*
filter
.
dims
[
3
]):
filter
.
float_data
[
idx
]
*=
scale
.
float_data
[
i
*
filter
.
dims
[
0
]
+
o
]
idx
+=
1
else
:
mace_check
(
False
,
"filter format %s not supported"
%
filter_format
)
# change BN to BiasAdd
consumer_op
.
type
=
MaceOp
.
BiasAdd
.
name
del
consumer_op
.
input
[
1
]
# remove scale tensor
net
.
tensors
.
remove
(
scale
)
return
True
return
False
@
staticmethod
def
sort_feature_map_shape
(
shape
,
data_format
):
"""Return shape in NHWC order"""
batch
=
shape
[
0
]
if
data_format
==
DataFormat
.
NHWC
:
height
=
shape
[
1
]
width
=
shape
[
2
]
channels
=
shape
[
3
]
else
:
height
=
shape
[
2
]
width
=
shape
[
3
]
channels
=
shape
[
1
]
return
batch
,
height
,
width
,
channels
@
staticmethod
def
sort_filter_shape
(
filter_shape
,
filter_format
):
"""Return filter shape in HWIO order"""
if
filter_format
==
FilterFormat
.
HWIO
:
filter_height
=
filter_shape
[
0
]
filter_width
=
filter_shape
[
1
]
in_channels
=
filter_shape
[
2
]
out_channels
=
filter_shape
[
3
]
elif
filter_format
==
FilterFormat
.
OIHW
:
filter_height
=
filter_shape
[
2
]
filter_width
=
filter_shape
[
3
]
in_channels
=
filter_shape
[
1
]
out_channels
=
filter_shape
[
0
]
elif
filter_format
==
FilterFormat
.
HWOI
:
filter_height
=
filter_shape
[
0
]
filter_width
=
filter_shape
[
1
]
in_channels
=
filter_shape
[
3
]
out_channels
=
filter_shape
[
2
]
else
:
mace_check
(
False
,
"filter format %s not supported"
%
filter_format
)
return
filter_height
,
filter_width
,
in_channels
,
out_channels
def
check_if_gpu_use_winograd_conv
(
self
,
op
):
if
not
self
.
_option
.
winograd_enabled
:
return
False
if
op
.
type
!=
MaceOp
.
Conv2D
.
name
:
return
False
filter_shape
=
self
.
_consts
[
op
.
input
[
1
]].
dims
output_shape
=
op
.
output_shape
[
0
].
dims
strides
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_strides_str
).
ints
dilations_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_dilations_str
)
if
dilations_arg
is
None
:
dilations
=
[
1
,
1
]
else
:
dilations
=
dilations_arg
.
ints
filter_height
,
filter_width
,
in_channels
,
out_channels
=
\
Transformer
.
sort_filter_shape
(
filter_shape
,
self
.
filter_format
())
batch
,
out_height
,
out_width
,
_
=
Transformer
.
sort_feature_map_shape
(
output_shape
,
ConverterUtil
.
data_format
(
op
))
if
filter_height
!=
3
or
filter_width
!=
3
or
strides
[
0
]
>
1
\
or
strides
[
1
]
>
1
or
dilations
[
0
]
>
1
or
dilations
[
1
]
>
1
:
return
False
width
=
batch
*
((
out_height
+
1
)
/
2
)
*
((
out_width
+
1
)
/
2
)
return
(
16
*
in_channels
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
out_channels
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
width
<
OPENCL_IMAGE_MAX_SIZE
)
def
transform_gpu_winograd
(
self
):
"""Only gpu needs winograd transform."""
net
=
self
.
_model
filter_format
=
self
.
filter_format
()
if
self
.
_option
.
device
==
mace_pb2
.
GPU
:
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
and
self
.
check_if_gpu_use_winograd_conv
(
op
):
print
(
"Transform gpu winograd %s(%s)"
%
(
op
.
name
,
op
.
type
))
output_shape
=
op
.
output_shape
[
0
].
dims
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_shape
=
filter
.
dims
data_format
=
ConverterUtil
.
data_format
(
op
)
filter_height
,
filter_width
,
in_channels
,
out_channels
=
\
Transformer
.
sort_filter_shape
(
filter_shape
,
filter_format
)
batch
,
out_height
,
out_width
,
_
=
\
Transformer
.
sort_feature_map_shape
(
output_shape
,
data_format
)
# Input transform
wt_op
=
net
.
op
.
add
()
wt_op
.
name
=
op
.
name
+
'_input_transform'
wt_op
.
type
=
MaceOp
.
WinogradTransform
.
name
wt_op
.
input
.
extend
([
op
.
input
[
0
]])
wt_op
.
output
.
extend
([
wt_op
.
name
])
wt_output_shape
=
wt_op
.
output_shape
.
add
()
wt_output_width
=
batch
*
(
(
out_height
+
1
)
/
2
)
*
((
out_width
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
(
[
16
,
in_channels
,
wt_output_width
,
1
])
arg
=
wt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_str
)
\
is
not
None
:
padding_arg
=
wt_op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_str
padding_arg
.
i
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_str
).
i
# noqa
elif
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_values_str
)
is
not
None
:
# noqa
padding_arg
=
wt_op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_values_str
padding_arg
.
ints
.
extend
(
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_values_str
).
ints
)
# MatMul
matmul_op
=
net
.
op
.
add
()
matmul_op
.
name
=
op
.
name
+
'_matmul'
matmul_op
.
type
=
MaceOp
.
MatMul
.
name
matmul_op
.
input
.
extend
([
op
.
input
[
1
],
wt_op
.
output
[
0
]])
matmul_op
.
output
.
extend
([
matmul_op
.
name
])
matmul_output_shape
=
matmul_op
.
output_shape
.
add
()
matmul_output_shape
.
dims
.
extend
(
[
16
,
out_channels
,
wt_output_width
,
1
])
arg
=
matmul_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
arg
=
matmul_op
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_winograd_filter_transformed
arg
.
i
=
1
# Inverse transform
iwt_op
=
net
.
op
.
add
()
iwt_op
.
name
=
op
.
name
+
'_inverse_transform'
iwt_op
.
type
=
MaceOp
.
WinogradInverseTransform
.
name
iwt_op
.
input
.
extend
([
matmul_op
.
output
[
0
]])
# biasadd
if
len
(
op
.
input
)
>=
3
:
iwt_op
.
input
.
extend
([
op
.
input
[
2
]])
iwt_op
.
output
.
extend
(
op
.
output
)
iwt_output_shape
=
iwt_op
.
output_shape
.
add
()
iwt_output_shape
.
dims
.
extend
(
op
.
output_shape
[
0
].
dims
)
arg
=
iwt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
batch_arg
=
iwt_op
.
arg
.
add
()
batch_arg
.
name
=
'batch'
batch_arg
.
i
=
batch
height_arg
=
iwt_op
.
arg
.
add
()
height_arg
.
name
=
'height'
height_arg
.
i
=
out_height
width_arg
=
iwt_op
.
arg
.
add
()
width_arg
.
name
=
'width'
width_arg
.
i
=
out_width
ConverterUtil
.
add_data_format_arg
(
iwt_op
,
data_format
)
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
filter
.
dims
)
weight_tensor_value
=
filter_data
if
filter_format
==
FilterFormat
.
HWIO
:
weight_tensor_value
=
filter_data
.
transpose
(
3
,
2
,
0
,
1
)
elif
filter_format
==
FilterFormat
.
HWOI
:
weight_tensor_value
=
filter_data
.
transpose
(
2
,
3
,
0
,
1
)
filter
.
float_data
[:]
=
weight_tensor_value
.
flat
[:]
filter
.
dims
[:]
=
weight_tensor_value
.
shape
[:]
net
.
op
.
remove
(
op
)
return
False
def
transform_add_to_biasadd
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
'Add'
\
and
len
(
op
.
input
)
==
2
\
and
op
.
input
[
1
]
in
self
.
_consts
\
and
len
(
self
.
_consts
[
op
.
input
[
1
]].
dims
)
==
1
:
print
(
"Transform add to biasadd: %s(%s)"
%
(
op
.
name
,
op
.
type
))
op
.
type
=
MaceOp
.
BiasAdd
.
name
return
True
return
False
def
fold_biasadd
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
((
op
.
type
==
MaceOp
.
Conv2D
.
name
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
or
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
or
op
.
type
==
MaceOp
.
FullyConnected
.
name
or
op
.
type
==
MaceOp
.
WinogradInverseTransform
.
name
)
and
len
(
op
.
input
)
==
2
)
\
and
len
(
self
.
_consumers
.
get
(
op
.
output
[
0
],
[]))
==
1
:
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer_op
.
type
==
MaceOp
.
BiasAdd
.
name
:
print
(
"Fold biasadd: %s(%s)"
%
(
op
.
name
,
op
.
type
))
op
.
name
=
consumer_op
.
name
op
.
input
.
append
(
consumer_op
.
input
[
1
])
op
.
output
[
0
]
=
consumer_op
.
output
[
0
]
net
.
op
.
remove
(
consumer_op
)
return
True
return
False
def
fold_activation
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
(
op
.
type
==
MaceOp
.
Conv2D
.
name
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
or
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
or
op
.
type
==
MaceOp
.
FullyConnected
.
name
or
op
.
type
==
MaceOp
.
FoldedBatchNorm
.
name
or
op
.
type
==
MaceOp
.
WinogradInverseTransform
.
name
)
\
and
len
(
self
.
_consumers
.
get
(
op
.
output
[
0
],
[]))
==
1
:
consumer_op
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer_op
.
type
==
MaceOp
.
Activation
.
name
\
and
ConverterUtil
.
get_arg
(
consumer_op
,
MaceKeyword
.
mace_activation_type_str
).
s
!=
'PRELU'
:
print
(
"Fold activation: %s(%s)"
%
(
op
.
name
,
op
.
type
))
op
.
name
=
consumer_op
.
name
op
.
output
[
0
]
=
consumer_op
.
output
[
0
]
for
arg
in
consumer_op
.
arg
:
if
arg
.
name
==
MaceKeyword
.
mace_activation_type_str
\
or
arg
.
name
==
MaceKeyword
.
mace_activation_max_limit_str
:
# noqa
op
.
arg
.
extend
([
arg
])
net
.
op
.
remove
(
consumer_op
)
return
True
return
False
def
transpose_data_format
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
# transpose args
if
op
.
type
==
MaceOp
.
Pad
.
name
:
for
arg
in
op
.
arg
:
if
arg
.
name
==
MaceKeyword
.
mace_paddings_str
and
len
(
arg
.
ints
)
==
4
:
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NHWC
\
and
self
.
_target_data_format
==
DataFormat
.
NCHW
:
# noqa
print
(
"Transpose pad args: %s(%s)"
%
(
op
.
name
,
op
.
type
))
self
.
transpose_shape
(
arg
.
ints
,
[
0
,
3
,
1
,
2
])
elif
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
\
and
self
.
_target_data_format
==
DataFormat
.
NHWC
:
# noqa
print
(
"Transpose pad args: %s(%s)"
%
(
op
.
name
,
op
.
type
))
self
.
transpose_shape
(
arg
.
ints
,
[
0
,
2
,
3
,
1
])
elif
op
.
type
==
MaceOp
.
Concat
.
name
or
op
.
type
==
MaceOp
.
Slice
.
name
:
for
arg
in
op
.
arg
:
if
arg
.
name
==
MaceKeyword
.
mace_axis_str
:
if
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NHWC
\
and
self
.
_target_data_format
==
DataFormat
.
NCHW
:
# noqa
print
(
"Transpose slice args: %s(%s)"
%
(
op
.
name
,
op
.
type
))
mace_check
(
arg
.
i
==
3
,
'only support concat at '
'channel dimension'
)
arg
.
i
=
1
elif
ConverterUtil
.
data_format
(
op
)
==
DataFormat
.
NCHW
\
and
self
.
_target_data_format
==
DataFormat
.
NHWC
:
# noqa
print
(
"Transpose slice args: %s(%s)"
%
(
op
.
name
,
op
.
type
))
mace_check
(
arg
.
i
==
1
,
"only support concat at "
"channel dimension"
)
arg
.
i
=
3
# transpose op output shape
data_format
=
ConverterUtil
.
data_format
(
op
)
if
data_format
is
not
None
\
and
data_format
!=
self
.
_target_data_format
:
print
(
"Transpose output shapes: %s(%s)"
%
(
op
.
name
,
op
.
type
))
if
self
.
_target_data_format
==
DataFormat
.
NHWC
:
# NCHW -> NHWC
for
output_shape
in
op
.
output_shape
:
if
len
(
output_shape
.
dims
)
==
4
:
self
.
transpose_shape
(
output_shape
.
dims
,
[
0
,
2
,
3
,
1
])
else
:
# NHWC -> NCHW
for
output_shape
in
op
.
output_shape
:
if
len
(
output_shape
.
dims
)
==
4
:
self
.
transpose_shape
(
output_shape
.
dims
,
[
0
,
3
,
1
,
2
])
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_data_format_str
).
i
=
\
self
.
_target_data_format
.
value
# transpose input/output
if
self
.
_target_data_format
==
DataFormat
.
NCHW
:
print
(
"Transpose input/output to NCHW"
)
for
input_node
in
self
.
_option
.
input_nodes
.
values
():
new_input_name
=
MaceKeyword
.
mace_input_node_name
\
+
'_'
+
input_node
.
name
op
=
net
.
op
.
add
()
op
.
name
=
self
.
normalize_op_name
(
input_node
.
name
)
op
.
type
=
MaceOp
.
Transpose
.
name
op
.
input
.
extend
([
new_input_name
])
op
.
output
.
extend
([
input_node
.
name
])
output_shape
=
op
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
input_node
.
shape
)
dims_arg
=
op
.
arg
.
add
()
dims_arg
.
name
=
MaceKeyword
.
mace_dims_str
dims_arg
.
ints
.
extend
([
0
,
3
,
1
,
2
])
arg
=
op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
for
output_node
in
self
.
_option
.
output_nodes
.
values
():
output_name
=
MaceKeyword
.
mace_output_node_name
\
+
'_'
+
output_node
.
name
op
=
self
.
_model
.
op
.
add
()
op
.
name
=
self
.
normalize_op_name
(
output_name
)
op
.
type
=
MaceOp
.
Transpose
.
name
op
.
input
.
extend
([
output_node
.
name
])
op
.
output
.
extend
([
output_name
])
output_shape
=
op
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
self
.
_producer
[
output_node
.
name
].
output_shape
[
0
].
dims
)
self
.
transpose_shape
(
output_shape
.
dims
,
[
0
,
2
,
3
,
1
])
dims_arg
=
op
.
arg
.
add
()
dims_arg
.
name
=
MaceKeyword
.
mace_dims_str
dims_arg
.
ints
.
extend
([
0
,
2
,
3
,
1
])
arg
=
op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
return
False
def
transpose_filters
(
self
):
net
=
self
.
_model
filter_format
=
self
.
filter_format
()
# TODO(liyin/liuqi): remove this if-condition after combine cpu/gpu
if
self
.
_option
.
device
==
mace_pb2
.
CPU
:
print
(
"Transpose filters to OIHW"
)
# transpose filter to OIHW/MIHW for tensorflow (HWIO/HWIM)
if
filter_format
==
FilterFormat
.
HWIO
:
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
\
or
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
:
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_winograd_filter_transformed
)
is
None
:
# noqa
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
filter
.
dims
)
filter_data
=
filter_data
.
transpose
(
3
,
2
,
0
,
1
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
self
.
set_filter_format
(
FilterFormat
.
OIHW
)
elif
self
.
_option
.
device
==
mace_pb2
.
GPU
:
# TODO(liyin/liuqi): remove this whole logic after combine cpu/gpu
print
(
"Transpose filters to HWOI/HWIM"
)
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
\
or
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
:
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
filter
.
dims
)
# transpose filter to HWOI/HWIM for
# tensorflow and caffe (OIHW/MIHW)
if
filter_format
==
FilterFormat
.
HWIO
\
and
(
op
.
type
==
MaceOp
.
Conv2D
.
name
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
):
filter_data
=
filter_data
.
transpose
(
0
,
1
,
3
,
2
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
elif
filter_format
==
FilterFormat
.
OIHW
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
:
filter_data
=
filter_data
.
transpose
(
2
,
3
,
0
,
1
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
elif
op
.
type
==
MaceOp
.
Depthwiseconv2d
.
name
:
filter_data
=
filter_data
.
transpose
(
2
,
3
,
1
,
0
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
if
op
.
type
==
MaceOp
.
FullyConnected
.
name
:
weight
=
self
.
_consts
[
op
.
input
[
1
]]
input_shape
=
list
(
self
.
_producer
[
op
.
input
[
0
]]
.
output_shape
[
0
].
dims
)
weight_shape
=
[
weight
.
dims
[
0
]]
+
input_shape
[
1
:]
# OCHW -> OHWC
weight_data
=
np
.
array
(
weight
.
float_data
).
reshape
(
weight_shape
)
weight_data
=
weight_data
.
transpose
(
0
,
2
,
3
,
1
)
weight
.
float_data
[:]
=
weight_data
.
flat
self
.
set_filter_format
(
FilterFormat
.
HWOI
)
return
False
def
buffer_to_image
(
self
,
op
,
input_idx
,
input_type
):
net
=
self
.
_model
input_name
=
op
.
input
[
input_idx
]
op_def
=
net
.
op
.
add
()
op_def
.
name
=
input_name
.
replace
(
':'
,
'_'
)
+
"_b2i"
output_name
=
op_def
.
name
op_def
.
type
=
MaceKeyword
.
mace_buffer_to_image
op_def
.
input
.
extend
([
input_name
])
op_def
.
output
.
extend
([
output_name
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_buffer_type
arg
.
i
=
input_type
.
value
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_mode
arg
.
i
=
0
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
op
.
input
[
input_idx
]
=
output_name
def
transform_buffer_image
(
self
):
if
self
.
_option
.
device
!=
mace_pb2
.
GPU
:
return
False
print
(
"Transform buffer to image"
)
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
CONV2D_FILTER
)
if
len
(
op
.
input
)
>=
3
:
self
.
buffer_to_image
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
DW_CONV2D_FILTER
)
if
len
(
op
.
input
)
>=
3
:
self
.
buffer_to_image
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
BiasAdd
.
name
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
FoldedBatchNorm
.
name
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
ARGUMENT
)
self
.
buffer_to_image
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
if
len
(
op
.
input
)
>=
4
:
self
.
buffer_to_image
(
op
,
3
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
MatMul
.
name
and
\
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_winograd_filter_transformed
)
is
not
None
:
# noqa
self
.
buffer_to_image
(
op
,
0
,
OpenCLBufferType
.
WINOGRAD_FILTER
)
elif
op
.
type
==
MaceOp
.
WinogradInverseTransform
.
name
\
and
len
(
op
.
input
)
>=
2
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
FullyConnected
.
name
:
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
WEIGHT_WIDTH
)
if
len
(
op
.
input
)
>=
3
:
self
.
buffer_to_image
(
op
,
2
,
OpenCLBufferType
.
ARGUMENT
)
elif
op
.
type
==
MaceOp
.
Activation
.
name
:
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_activation_type_str
).
s
==
ActivationType
.
PRELU
.
name
:
# noqa
self
.
buffer_to_image
(
op
,
1
,
OpenCLBufferType
.
ARGUMENT
)
for
input_node
in
self
.
_option
.
input_nodes
.
values
():
new_input_name
=
MaceKeyword
.
mace_input_node_name
\
+
'_'
+
input_node
.
name
op_def
=
self
.
_model
.
op
.
add
()
op_def
.
name
=
self
.
normalize_op_name
(
input_node
.
name
)
op_def
.
type
=
MaceKeyword
.
mace_buffer_to_image
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
input_node
.
name
])
output_shape
=
op_def
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
input_node
.
shape
)
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_buffer_type
arg
.
i
=
OpenCLBufferType
.
IN_OUT_CHANNEL
.
value
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
for
output_node
in
self
.
_option
.
output_nodes
.
values
():
output_name
=
MaceKeyword
.
mace_output_node_name
\
+
'_'
+
output_node
.
name
op_def
=
self
.
_model
.
op
.
add
()
op_def
.
name
=
self
.
normalize_op_name
(
output_name
)
op_def
.
type
=
MaceKeyword
.
mace_image_to_buffer
op_def
.
input
.
extend
([
output_node
.
name
])
op_def
.
output
.
extend
([
output_name
])
output_shape
=
op_def
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
output_node
.
shape
)
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_buffer_type
arg
.
i
=
OpenCLBufferType
.
IN_OUT_CHANNEL
.
value
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
return
False
def
fold_softmax
(
self
):
changed
=
False
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Softmax
.
name
:
print
(
"Fold softmax: %s(%s)"
%
(
op
.
name
,
op
.
type
))
if
self
.
consumer_count
(
op
.
output
[
0
])
==
1
:
consumer
=
self
.
_consumers
[
op
.
output
[
0
]][
0
]
if
consumer
.
type
==
MaceOp
.
Reshape
.
name
:
shape
=
ConverterUtil
.
get_arg
(
consumer
,
MaceKeyword
.
mace_shape_str
).
ints
# noqa
del
op
.
output_shape
[
0
].
dims
[:]
op
.
output_shape
[
0
].
dims
.
extend
(
shape
)
self
.
replace_output_node
(
consumer
)
net
.
op
.
remove
(
consumer
)
changed
=
True
producer
=
self
.
_producer
[
op
.
input
[
0
]]
if
producer
.
type
==
MaceOp
.
Reshape
.
name
:
op
.
input
[
0
]
=
producer
.
input
[
0
]
self
.
replace_output_node
(
producer
)
net
.
op
.
remove
(
producer
)
changed
=
True
if
len
(
op
.
output_shape
[
0
].
dims
)
<
4
:
shape
=
([
1
,
1
,
1
,
1
]
+
list
(
op
.
output_shape
[
0
].
dims
))[
-
4
:]
op
.
output_shape
[
0
].
dims
[:]
=
shape
[:]
changed
=
True
if
changed
:
return
True
return
False
def
transform_global_conv_to_fc
(
self
):
"""Transform global conv to fc should be placed after transposing
input/output and filter"""
if
self
.
_option
.
device
==
mace_pb2
.
GPU
:
return
False
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
:
producer
=
self
.
_producer
[
op
.
input
[
0
]]
input_shape
=
producer
.
output_shape
[
0
].
dims
batch
,
height
,
width
,
channels
=
self
.
sort_feature_map_shape
(
input_shape
,
ConverterUtil
.
data_format
(
producer
))
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_shape
=
filter
.
dims
filter_height
,
filter_width
,
in_channels
,
out_channels
=
\
self
.
sort_filter_shape
(
filter_shape
,
self
.
filter_format
())
zero_padding
=
True
padding_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_str
)
# noqa
if
padding_arg
is
not
None
:
if
padding_arg
.
i
!=
PaddingMode
.
VALID
.
value
:
zero_padding
=
False
else
:
padding_value_arg
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_values_str
)
# noqa
if
padding_value_arg
is
not
None
:
if
not
all
(
v
==
0
for
v
in
padding_value_arg
.
ints
):
zero_padding
=
False
if
height
==
filter_height
and
width
==
filter_width
\
and
zero_padding
:
print
(
"transform global conv to fc %s(%s)"
%
(
op
.
name
,
op
.
type
))
op
.
type
=
MaceOp
.
FullyConnected
.
name
filter
.
dims
[:]
=
[
out_channels
,
in_channels
*
filter_width
*
filter_height
][:]
def
sort_dfs
(
self
,
op
,
visited
,
sorted_nodes
):
visited
.
update
([
op
.
name
])
if
len
(
op
.
input
)
>
0
:
for
input_tensor
in
op
.
input
:
producer_op
=
self
.
_producer
.
get
(
input_tensor
,
None
)
if
producer_op
is
None
:
pass
elif
producer_op
.
name
not
in
visited
:
self
.
sort_dfs
(
producer_op
,
visited
,
sorted_nodes
)
sorted_nodes
.
append
(
op
)
def
sort_by_execution
(
self
):
print
(
"Sort by execution"
)
net
=
self
.
_model
visited
=
set
()
sorted_nodes
=
[]
for
output_node
in
self
.
_option
.
output_nodes
:
output_tensor
=
MaceKeyword
.
mace_output_node_name
\
+
'_'
+
output_node
mace_check
(
output_tensor
in
self
.
_producer
,
"output_tensor %s not existed in model"
%
output_tensor
)
self
.
sort_dfs
(
self
.
_producer
[
output_tensor
],
visited
,
sorted_nodes
)
del
net
.
op
[:]
net
.
op
.
extend
(
sorted_nodes
)
return
False
mace/python/tools/memory_optimizer.py
浏览文件 @
c3837858
...
@@ -129,7 +129,7 @@ class MemoryOptimizer(object):
...
@@ -129,7 +129,7 @@ class MemoryOptimizer(object):
self
.
idle_mem
.
remove
(
mem_id
)
self
.
idle_mem
.
remove
(
mem_id
)
if
mem_id
==
-
1
:
if
mem_id
==
-
1
:
mem_id
=
self
.
total_mem_count
mem_id
=
self
.
mem_id_base
()
+
self
.
total_mem_count
self
.
total_mem_count
+=
1
self
.
total_mem_count
+=
1
self
.
mem_block
[
mem_id
]
=
op_mem_block
self
.
mem_block
[
mem_id
]
=
op_mem_block
...
@@ -147,10 +147,13 @@ class MemoryOptimizer(object):
...
@@ -147,10 +147,13 @@ class MemoryOptimizer(object):
self
.
add_net_mem_blocks
()
self
.
add_net_mem_blocks
()
print
(
'total op: %d'
,
len
(
self
.
net_def
.
op
))
print
(
"total op: %d"
%
len
(
self
.
net_def
.
op
))
print
(
'origin mem: %d, optimized mem: %d'
,
print
(
"origin mem: %d, optimized mem: %d"
%
(
self
.
get_total_origin_mem_size
(),
self
.
get_total_origin_mem_size
(),
self
.
get_total_optimized_mem_size
())
self
.
get_total_optimized_mem_size
()))
def
mem_id_base
(
self
):
return
0
class
GPUMemoryOptimizer
(
MemoryOptimizer
):
class
GPUMemoryOptimizer
(
MemoryOptimizer
):
...
@@ -189,6 +192,9 @@ class GPUMemoryOptimizer(MemoryOptimizer):
...
@@ -189,6 +192,9 @@ class GPUMemoryOptimizer(MemoryOptimizer):
block
.
x
=
self
.
mem_block
[
mem
][
0
]
block
.
x
=
self
.
mem_block
[
mem
][
0
]
block
.
y
=
self
.
mem_block
[
mem
][
1
]
block
.
y
=
self
.
mem_block
[
mem
][
1
]
def
mem_id_base
(
self
):
return
20000
def
optimize_gpu_memory
(
net_def
):
def
optimize_gpu_memory
(
net_def
):
mem_optimizer
=
GPUMemoryOptimizer
(
net_def
)
mem_optimizer
=
GPUMemoryOptimizer
(
net_def
)
...
...
mace/python/tools/source_converter_lib.py
浏览文件 @
c3837858
...
@@ -84,11 +84,20 @@ def obfuscate_name(net_def):
...
@@ -84,11 +84,20 @@ def obfuscate_name(net_def):
op
.
output
[
i
]
=
in_out_map
[
op
.
output
[
i
]]
op
.
output
[
i
]
=
in_out_map
[
op
.
output
[
i
]]
def
normalize_op_name
(
op_name
):
idx
=
op_name
.
rfind
(
':'
)
if
idx
==
-
1
:
return
op_name
else
:
return
op_name
[:
idx
]
def
rename_tensor
(
net_def
):
def
rename_tensor
(
net_def
):
tensor_map
=
{}
tensor_map
=
{}
for
t
in
net_def
.
tensors
:
for
t
in
net_def
.
tensors
:
if
t
.
name
not
in
tensor_map
:
if
t
.
name
not
in
tensor_map
:
tensor_map
[
t
.
name
]
=
"_"
+
t
.
name
[:
-
2
].
replace
(
"/"
,
"_"
)
tensor_map
[
t
.
name
]
=
"_"
+
normalize_op_name
(
t
.
name
).
replace
(
"/"
,
"_"
)
t
.
name
=
tensor_map
[
t
.
name
]
t
.
name
=
tensor_map
[
t
.
name
]
for
op
in
net_def
.
op
:
for
op
in
net_def
.
op
:
for
i
in
range
(
len
(
op
.
input
)):
for
i
in
range
(
len
(
op
.
input
)):
...
@@ -118,6 +127,8 @@ class TensorInfo:
...
@@ -118,6 +127,8 @@ class TensorInfo:
elif
t
.
data_type
==
mace_pb2
.
DT_UINT8
:
elif
t
.
data_type
==
mace_pb2
.
DT_UINT8
:
self
.
data
=
bytearray
(
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
uint8
).
tolist
())
np
.
array
(
t
.
int32_data
).
astype
(
np
.
uint8
).
tolist
())
else
:
raise
Exception
(
'Tensor data type %s not supported'
%
t
.
data_type
)
def
stringfy
(
value
):
def
stringfy
(
value
):
...
...
mace/python/tools/tf_converter_lib.py
已删除
100644 → 0
浏览文件 @
04f7a34a
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
mace.proto
import
mace_pb2
import
tensorflow
as
tf
import
numpy
as
np
import
math
import
copy
from
tensorflow
import
gfile
from
mace.python.tools
import
memory_optimizer
from
tensorflow.core.framework
import
graph_pb2
from
tensorflow.core.framework
import
tensor_shape_pb2
padding_mode
=
{
'VALID'
:
0
,
'SAME'
:
1
,
'FULL'
:
2
}
pooling_type_mode
=
{
'AvgPool'
:
1
,
'MaxPool'
:
2
}
# the order should be the same as
# eltwise type's in mace/kernels/eltwise.h
# and also cwise type's in mace/kernels/cwise.h
# cuz these math ops should have compatible with "EltWise" and "CWise"
math_type_mode
=
{
'ADD'
:
0
,
'SUB'
:
1
,
'MUL'
:
2
,
'DIV'
:
3
,
'MIN'
:
4
,
'MAX'
:
5
,
'NEG'
:
6
,
'ABS'
:
7
,
'SQR_DIFF'
:
8
,
'POW'
:
9
,
}
buffer_type_map
=
{
'CONV2D_FILTER'
:
0
,
'IN_OUT_CHANNEL'
:
1
,
'ARGUMENT'
:
2
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_WIDTH'
:
4
,
'WINOGRAD_FILTER'
:
5
,
'DW_CONV2D_FILTER'
:
6
,
}
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
activation_name_map
=
{
'Relu'
:
'RELU'
,
'Sigmoid'
:
'SIGMOID'
,
'Tanh'
:
'TANH'
,
'Relu6'
:
'RELUX'
}
BATCH_NORM_ORDER
=
[
"Add"
,
"Rsqrt"
,
"Mul"
,
"Mul"
,
"Mul"
,
"Sub"
,
"Add"
]
MACE_INPUT_NODE_NAME
=
"mace_input_node"
MACE_OUTPUT_NODE_NAME
=
"mace_output_node"
OPENCL_IMAGE_MAX_SIZE
=
16384
def
get_input_tensor
(
op
,
index
):
input_tensor
=
op
.
inputs
[
index
]
if
input_tensor
.
op
.
type
==
'Reshape'
:
input_tensor
=
get_input_tensor
(
input_tensor
.
op
,
0
)
return
input_tensor
class
TFConverter
(
object
):
def
__init__
(
self
,
graph
,
tf_ops
,
net_def
,
dt
,
device
,
winograd
):
self
.
graph
=
graph
self
.
net_def
=
net_def
self
.
tf_ops
=
tf_ops
self
.
dt
=
dt
self
.
device
=
device
self
.
winograd
=
winograd
self
.
tf_graph
=
{}
self
.
tf_parents
=
{}
self
.
resolved_ops
=
{}
self
.
unused_tensor
=
set
()
self
.
transpose_filter_tensor
=
{}
self
.
reshape_tensor
=
{}
self
.
ops
=
{}
for
op
in
tf_ops
:
self
.
ops
[
op
.
name
]
=
op
for
op
in
tf_ops
:
self
.
resolved_ops
[
op
.
name
]
=
0
for
input
in
op
.
inputs
:
input_name
=
input
.
name
[:
-
2
]
if
input_name
not
in
self
.
tf_graph
:
self
.
tf_graph
[
input_name
]
=
[]
self
.
tf_graph
[
input_name
].
append
(
op
)
if
op
.
name
not
in
self
.
tf_parents
:
self
.
tf_parents
[
op
.
name
]
=
[]
self
.
tf_parents
[
op
.
name
].
append
(
self
.
ops
[
input_name
])
def
add_buffer_to_image
(
self
,
input_name
,
input_type
):
output_name
=
input_name
[:
-
2
]
+
"_b2i"
+
input_name
[
-
2
:]
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
input_name
])
op_def
.
output
.
extend
([
output_name
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'buffer_type'
arg
.
i
=
buffer_type_map
[
input_type
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'mode'
arg
.
i
=
0
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
return
output_name
def
add_image_to_buffer
(
self
,
input_name
,
input_type
):
output_name
=
input_name
[:
-
2
]
+
"_i2b"
+
input_name
[
-
2
:]
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
input_name
])
op_def
.
output
.
extend
([
output_name
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'buffer_type'
arg
.
i
=
buffer_type_map
[
input_type
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
return
output_name
def
add_gpu_input_transform
(
self
,
names
):
for
name
in
names
:
new_input_name
=
MACE_INPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
name
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
epsilon_arg
.
i
=
buffer_type_map
[
'IN_OUT_CHANNEL'
]
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
self
.
add_output_shape
(
self
.
ops
[
name
].
outputs
,
op_def
)
def
add_cpu_input_transform
(
self
,
names
):
for
name
in
names
:
new_input_name
=
MACE_INPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
name
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
dims_arg
.
ints
.
extend
([
0
,
3
,
1
,
2
])
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
self
.
add_output_shape
(
self
.
ops
[
name
].
outputs
,
op_def
)
def
add_gpu_output_transform
(
self
,
names
):
for
name
in
names
:
output_name
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
epsilon_arg
.
i
=
buffer_type_map
[
'IN_OUT_CHANNEL'
]
def
add_cpu_output_transform
(
self
,
names
):
for
name
in
names
:
output_name
=
MACE_OUTPUT_NODE_NAME
+
'_'
+
name
+
":0"
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
dims_arg
.
ints
.
extend
([
0
,
2
,
3
,
1
])
output_shapes
=
[]
for
output
in
self
.
ops
[
name
].
outputs
:
old_shape
=
output
.
shape
.
as_list
()
# NCHW -> NHWC
if
len
(
old_shape
)
==
2
:
new_shape
=
[
old_shape
[
0
],
1
,
1
,
old_shape
[
1
]]
else
:
new_shape
=
[
old_shape
[
0
],
old_shape
[
2
],
old_shape
[
3
],
old_shape
[
1
]]
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
new_shape
)
output_shapes
.
append
(
output_shape
)
op_def
.
output_shape
.
extend
(
output_shapes
)
def
add_output_shape
(
self
,
outputs
,
op
):
output_shapes
=
[]
for
output
in
outputs
:
old_shape
=
[]
if
isinstance
(
output
,
list
):
old_shape
=
output
elif
isinstance
(
output
,
tf
.
Tensor
):
if
output
.
shape
.
num_elements
()
is
not
None
:
old_shape
=
output
.
shape
.
as_list
()
else
:
raise
ValueError
(
'output type not supported: '
,
type
(
output
))
if
len
(
old_shape
)
==
2
:
old_shape
=
[
old_shape
[
0
],
old_shape
[
1
],
1
,
1
]
if
self
.
device
==
'cpu'
:
# NHWC -> NCHW
old_shape
=
[
old_shape
[
0
],
old_shape
[
3
],
old_shape
[
1
],
old_shape
[
2
]]
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
old_shape
)
output_shapes
.
append
(
output_shape
)
op
.
output_shape
.
extend
(
output_shapes
)
def
add_tensor
(
self
,
name
,
shape
,
tf_dt
,
value
):
tensor
=
self
.
net_def
.
tensors
.
add
()
tensor
.
name
=
name
shape
=
list
(
shape
)
tensor
.
dims
.
extend
(
shape
)
if
tf_dt
==
tf
.
float32
:
tensor
.
data_type
=
mace_pb2
.
DT_FLOAT
tensor
.
float_data
.
extend
(
value
.
flat
)
elif
tf_dt
==
tf
.
int32
:
tensor
.
data_type
=
mace_pb2
.
DT_INT32
tensor
.
int32_data
.
extend
(
value
.
flat
)
else
:
raise
Exception
(
"Not supported tensor type: "
+
tf_dt
.
name
)
def
convert_reshape
(
self
,
op
):
input_tensor
=
get_input_tensor
(
op
,
0
)
shape_tensor
=
get_input_tensor
(
op
,
1
)
shape_value
=
shape_tensor
.
eval
().
astype
(
np
.
int32
)
self
.
unused_tensor
.
add
(
shape_tensor
.
name
)
self
.
reshape_tensor
[
input_tensor
.
name
]
=
shape_value
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_tensor
(
self
,
op
):
output_name
=
op
.
outputs
[
0
].
name
if
output_name
not
in
self
.
unused_tensor
:
tensor
=
self
.
net_def
.
tensors
.
add
()
tf_tensor
=
op
.
outputs
[
0
].
eval
()
if
output_name
in
self
.
transpose_filter_tensor
:
tf_tensor
=
tf_tensor
.
transpose
(
self
.
transpose_filter_tensor
[
output_name
])
if
output_name
in
self
.
reshape_tensor
:
tf_tensor
=
tf_tensor
.
reshape
(
self
.
reshape_tensor
[
output_name
])
tensor
.
name
=
op
.
outputs
[
0
].
name
shape
=
list
(
tf_tensor
.
shape
)
tensor
.
dims
.
extend
(
shape
)
tf_dt
=
op
.
get_attr
(
'dtype'
)
if
tf_dt
==
tf
.
float32
:
tensor
.
data_type
=
mace_pb2
.
DT_FLOAT
tensor
.
float_data
.
extend
(
tf_tensor
.
astype
(
np
.
float32
).
flat
)
elif
tf_dt
==
tf
.
int32
:
tensor
.
data_type
=
mace_pb2
.
DT_INT32
tensor
.
int32_data
.
extend
(
tf_tensor
.
astype
(
np
.
int32
).
flat
)
else
:
raise
Exception
(
"Not supported tensor type: "
+
tf_dt
.
name
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
check_winograd_conv
(
self
,
op
):
filter_shape
=
get_input_tensor
(
op
,
1
).
shape
.
as_list
()
strides
=
op
.
get_attr
(
'strides'
)[
1
:
3
]
output_shape
=
op
.
outputs
[
0
].
shape
.
as_list
()
if
len
(
output_shape
)
==
0
or
output_shape
[
0
]
is
None
:
return
False
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
if
self
.
winograd
and
op
.
type
!=
'DepthwiseConv2dNative'
and
\
filter_shape
[
0
]
==
3
and
\
(
filter_shape
[
0
]
==
filter_shape
[
1
])
and
\
(
strides
[
0
]
==
1
)
and
(
strides
[
0
]
==
strides
[
1
]):
if
self
.
device
==
'gpu'
:
return
(
16
*
filter_shape
[
2
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
3
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
width
<
OPENCL_IMAGE_MAX_SIZE
)
elif
self
.
device
==
'cpu'
:
return
filter_shape
[
2
]
>=
8
and
filter_shape
[
3
]
>=
8
return
False
def
convert_winograd_conv_gpu
(
self
,
op
):
filter_tensor
=
get_input_tensor
(
op
,
1
)
filter_shape
=
filter_tensor
.
shape
.
as_list
()
output_shape
=
op
.
outputs
[
0
].
shape
.
as_list
()
self
.
transpose_filter_tensor
[
filter_tensor
.
name
]
=
(
3
,
2
,
0
,
1
)
filter_name
=
self
.
add_buffer_to_image
(
op
.
inputs
[
1
].
name
,
"WINOGRAD_FILTER"
)
# Input transform
wt_op
=
mace_pb2
.
OperatorDef
()
arg
=
wt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
padding_arg
=
wt_op
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
i
=
padding_mode
[
op
.
get_attr
(
'padding'
)]
wt_op
.
name
=
op
.
name
+
'_input_transform'
wt_op
.
type
=
'WinogradTransform'
wt_op
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
wt_output_name
=
wt_op
.
name
+
":0"
wt_op
.
output
.
extend
([
wt_output_name
])
wt_output_shape
=
mace_pb2
.
OutputShape
()
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
2
],
wt_output_width
,
1
])
wt_op
.
output_shape
.
extend
([
wt_output_shape
])
# MatMul
matmul_op
=
mace_pb2
.
OperatorDef
()
arg
=
matmul_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
matmul_op
.
name
=
op
.
name
+
'_matmul'
matmul_op
.
type
=
'MatMul'
matmul_op
.
input
.
extend
([
filter_name
,
wt_output_name
])
matmul_output_name
=
matmul_op
.
name
+
":0"
matmul_op
.
output
.
extend
([
matmul_output_name
])
matmul_output_shape
=
mace_pb2
.
OutputShape
()
matmul_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
matmul_op
.
output_shape
.
extend
([
matmul_output_shape
])
# Inverse transform
iwt_op
=
mace_pb2
.
OperatorDef
()
arg
=
iwt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
batch_arg
=
iwt_op
.
arg
.
add
()
batch_arg
.
name
=
'batch'
batch_arg
.
i
=
output_shape
[
0
]
height_arg
=
iwt_op
.
arg
.
add
()
height_arg
.
name
=
'height'
height_arg
.
i
=
output_shape
[
1
]
width_arg
=
iwt_op
.
arg
.
add
()
width_arg
.
name
=
'width'
width_arg
.
i
=
output_shape
[
2
]
iwt_op
.
name
=
op
.
name
+
'_inverse_transform'
iwt_op
.
type
=
'WinogradInverseTransform'
iwt_op
.
input
.
extend
([
matmul_output_name
])
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
op
.
name
]
)
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
iwt_op
.
input
.
extend
([
output_name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_act_arg
=
iwt_op
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
iwt_op
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
iwt_op
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
iwt_op
)
self
.
net_def
.
op
.
extend
([
wt_op
,
matmul_op
,
iwt_op
])
def
convert_conv_winograd_filter_cpu
(
self
,
op
,
op_def
):
weight_tensor
=
get_input_tensor
(
op
,
1
)
weight_tensor_value
=
weight_tensor
.
eval
().
astype
(
np
.
float32
)
input_shape
=
get_input_tensor
(
op
,
0
).
shape
.
as_list
()
output_channels
=
weight_tensor_value
.
shape
[
3
]
input_channels
=
weight_tensor_value
.
shape
[
2
]
# HWIO -> OIHW
weight_tensor_value
=
weight_tensor_value
.
transpose
(
3
,
2
,
0
,
1
)
if
input_shape
[
1
]
>
16
and
input_shape
[
2
]
>
16
:
G
=
np
.
array
([
[
1.0
,
0.0
,
0.0
],
[
-
2.0
/
9
,
-
2.0
/
9
,
-
2.0
/
9
],
[
-
2.0
/
9
,
2.0
/
9
,
-
2.0
/
9
],
[
1.0
/
90
,
1.0
/
45
,
2.0
/
45
],
[
1.0
/
90
,
-
1.0
/
45
,
2.0
/
45
],
[
1.0
/
45
,
1.0
/
90
,
1.0
/
180
],
[
1.0
/
45
,
-
1.0
/
90
,
1.0
/
180
],
[
0.0
,
0.0
,
1.0
]
],
dtype
=
np
.
float32
)
new_shape
=
[
64
,
output_channels
,
input_channels
]
# TOC
else
:
G
=
np
.
array
([
[
1.0
,
0.0
,
0.0
],
[
0.5
,
0.5
,
0.5
],
[
0.5
,
-
0.5
,
0.5
],
[
0.0
,
0.0
,
1.0
],
],
dtype
=
np
.
float32
)
new_shape
=
[
16
,
output_channels
,
input_channels
]
# TOC
new_weight_value
=
G
.
dot
(
weight_tensor_value
).
dot
(
G
.
T
)
# [t, O, I, t]
new_weight_value
=
new_weight_value
.
transpose
(
0
,
3
,
1
,
2
)
new_weight_value
=
new_weight_value
.
reshape
(
new_shape
)
new_tensor_name
=
weight_tensor
.
name
[:
-
2
]
+
'/winograd_transformed:0'
self
.
add_tensor
(
new_tensor_name
,
new_shape
,
tf
.
float32
,
new_weight_value
)
winograd_transformed_arg
=
op_def
.
arg
.
add
()
winograd_transformed_arg
.
name
=
'is_filter_transformed'
winograd_transformed_arg
.
i
=
1
self
.
unused_tensor
.
add
(
weight_tensor
.
name
)
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
input
.
extend
([
new_tensor_name
])
def
convert_conv2d
(
self
,
op
):
use_winograd
=
False
if
self
.
device
==
'cpu'
:
use_winograd
=
self
.
check_winograd_conv
(
op
)
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
if
op
.
type
==
'DepthwiseConv2dNative'
:
op_def
.
type
=
'DepthwiseConv2d'
else
:
op_def
.
type
=
op
.
type
if
self
.
device
==
'cpu'
and
not
use_winograd
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
elif
op
.
type
==
'Conv2D'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
op_def
.
type
==
'DepthwiseConv2d'
:
buffer_type
=
"DW_CONV2D_FILTER"
else
:
buffer_type
=
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
elif
self
.
device
==
'cpu'
and
use_winograd
:
self
.
convert_conv_winograd_filter_cpu
(
op
,
op_def
)
else
:
op_def
.
input
.
extend
(
[
get_input_tensor
(
op
,
i
).
name
for
i
in
range
(
len
(
op
.
inputs
))])
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
i
=
padding_mode
[
op
.
get_attr
(
'padding'
)]
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
(
op
.
get_attr
(
'strides'
)[
1
:
3
])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
or
\
(
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Add'
and
len
(
self
.
tf_graph
[
op
.
name
][
0
].
inputs
)
==
2
and
len
(
self
.
graph
.
get_tensor_by_name
(
self
.
tf_graph
[
op
.
name
][
0
].
inputs
[
1
].
name
).
shape
)
==
1
):
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
final_op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_deconv2d
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'Deconv2D'
out_shape_value
=
None
if
len
(
op
.
inputs
)
==
2
:
out_shape_value
=
op
.
get_attr
(
'output_shape'
)
if
self
.
device
==
'cpu'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
else
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
buffer_type
=
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
(
[
get_input_tensor
(
op
,
i
).
name
for
i
in
range
(
len
(
op
.
inputs
))])
elif
len
(
op
.
inputs
)
==
3
:
out_shape_value
=
\
get_input_tensor
(
op
,
0
).
eval
().
astype
(
np
.
int32
).
flat
self
.
unused_tensor
.
add
(
op
.
inputs
[
0
].
name
)
if
self
.
device
==
'cpu'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
2
,
3
,
0
,
1
)
else
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
2
,
3
)
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
2
].
name
])
buffer_type
=
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
op
.
inputs
[
2
].
name
])
op_def
.
input
.
extend
([
op
.
inputs
[
1
].
name
])
else
:
raise
Exception
(
'Too many inputs. Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
if
out_shape_value
is
not
None
:
out_shape_arg
=
op_def
.
arg
.
add
()
out_shape_arg
.
name
=
'output_shape'
out_shape_arg
.
ints
.
extend
(
out_shape_value
)
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
i
=
padding_mode
[
op
.
get_attr
(
'padding'
)]
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
(
op
.
get_attr
(
'strides'
)[
1
:
3
])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
final_op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
check_conv_to_fc
(
self
,
op
):
if
self
.
device
!=
'cpu'
or
op
.
type
!=
"Conv2D"
:
return
False
filter_shape
=
get_input_tensor
(
op
,
1
).
shape
.
as_list
()
input_shape
=
get_input_tensor
(
op
,
0
).
shape
.
as_list
()
return
input_shape
[
1
]
==
filter_shape
[
0
]
\
and
input_shape
[
2
]
==
filter_shape
[
1
]
\
and
(
op
.
get_attr
(
'padding'
)
==
'VALID'
or
filter_shape
[
0
]
==
1
and
filter_shape
[
1
]
==
1
)
def
convert_global_conv_to_fc
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'FC'
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
\
(
3
,
2
,
0
,
1
)
filter_shape
=
get_input_tensor
(
op
,
1
).
shape
.
as_list
()
self
.
reshape_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
\
[
filter_shape
[
3
],
filter_shape
[
2
]
*
filter_shape
[
1
]
*
filter_shape
[
0
],
1
,
1
]
op_def
.
input
.
extend
(
[
get_input_tensor
(
op
,
i
).
name
for
i
in
range
(
len
(
op
.
inputs
))])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
data_format_arg
.
s
=
'NCHW'
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
final_op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_fused_batchnorm
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
op_def
.
name
=
op
.
name
op_def
.
type
=
'FoldedBatchNorm'
gamma_tensor
=
get_input_tensor
(
op
,
1
)
for
i
in
range
(
1
,
5
):
input_tensor
=
get_input_tensor
(
op
,
i
)
assert
input_tensor
.
shape
==
gamma_tensor
.
shape
self
.
unused_tensor
.
add
(
input_tensor
.
name
)
gamma_value
=
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
float32
)
beta_value
=
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
float32
)
mean_value
=
get_input_tensor
(
op
,
3
).
eval
().
astype
(
np
.
float32
)
var_value
=
get_input_tensor
(
op
,
4
).
eval
().
astype
(
np
.
float32
)
epsilon_value
=
op
.
get_attr
(
'epsilon'
)
scale_value
=
((
1.0
/
np
.
vectorize
(
math
.
sqrt
)
(
var_value
+
epsilon_value
))
*
gamma_value
)
offset_value
=
(
-
mean_value
*
scale_value
)
+
beta_value
idx
=
gamma_tensor
.
name
.
rfind
(
'/'
)
name_prefix
=
gamma_tensor
.
name
[:
idx
]
+
'/'
input_names
=
[
name_prefix
+
'scale:0'
,
name_prefix
+
'offset:0'
]
self
.
add_tensor
(
input_names
[
0
],
gamma_value
.
shape
,
gamma_tensor
.
dtype
,
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
gamma_value
.
shape
,
gamma_tensor
.
dtype
,
offset_value
)
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
self
.
device
==
'gpu'
:
for
name
in
input_names
:
output_name
=
self
.
add_buffer_to_image
(
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
name
for
name
in
input_names
])
self
.
resolved_ops
[
op
.
name
]
=
1
final_op
=
op
if
len
(
self
.
tf_graph
[
op
.
name
])
==
1
\
and
self
.
tf_graph
[
op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
op
.
name
][
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
final_op
.
outputs
[
0
].
name
])
self
.
add_output_shape
([
final_op
.
outputs
[
0
]],
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_batchnorm
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
op_def
.
name
=
op
.
name
op_def
.
type
=
'FoldedBatchNorm'
add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
scale_tensor
=
get_input_tensor
(
op
,
1
)
offset_tensor
=
get_input_tensor
(
add_op
,
1
)
input_names
=
[
scale_tensor
.
name
,
offset_tensor
.
name
]
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
self
.
device
==
'gpu'
:
for
name
in
input_names
:
output_name
=
self
.
add_buffer_to_image
(
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
name
for
name
in
input_names
])
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
resolved_ops
[
add_op
.
name
]
=
1
final_op
=
add_op
if
len
(
self
.
tf_graph
[
op
.
name
])
==
1
\
and
self
.
tf_graph
[
op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
op
.
name
][
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
final_op
.
outputs
[
0
].
name
])
self
.
add_output_shape
([
final_op
.
outputs
[
0
]],
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
convert_pooling
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'Pooling'
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
pooling_type_arg
=
op_def
.
arg
.
add
()
pooling_type_arg
.
name
=
'pooling_type'
pooling_type_arg
.
i
=
pooling_type_mode
[
op
.
type
]
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
i
=
padding_mode
[
op
.
get_attr
(
'padding'
)]
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
(
op
.
get_attr
(
'strides'
)[
1
:
3
])
kernels_arg
=
op_def
.
arg
.
add
()
kernels_arg
.
name
=
'kernels'
kernels_arg
.
ints
.
extend
(
op
.
get_attr
(
'ksize'
)[
1
:
3
])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_global_avg_pooling
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'Pooling'
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
pooling_type_arg
=
op_def
.
arg
.
add
()
pooling_type_arg
.
name
=
'pooling_type'
pooling_type_arg
.
i
=
pooling_type_mode
[
'AvgPool'
]
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
i
=
padding_mode
[
'VALID'
]
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
([
1
,
1
])
kernels_arg
=
op_def
.
arg
.
add
()
kernels_arg
.
name
=
'kernels'
kernels_arg
.
ints
.
extend
(
op
.
inputs
[
0
].
shape
.
as_list
()[
1
:
3
])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_activation
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'Activation'
activation_arg
=
op_def
.
arg
.
add
()
activation_arg
.
name
=
'activation'
activation_arg
.
s
=
activation_name_map
[
op
.
type
]
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_relu6
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
'Activation'
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
activation_arg
=
op_def
.
arg
.
add
()
activation_arg
.
name
=
'activation'
activation_arg
.
s
=
"RELUX"
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_add
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"AddN"
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_concat
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"Concat"
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
[:
-
1
]])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
axis_arg
=
op_def
.
arg
.
add
()
axis_arg
.
name
=
'axis'
axis
=
get_input_tensor
(
op
,
len
(
op
.
inputs
)
-
1
).
eval
().
astype
(
np
.
int32
)
if
self
.
device
==
'cpu'
and
axis
==
3
:
axis
=
1
axis_arg
.
i
=
axis
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
len
(
op
.
inputs
)
-
1
).
name
)
def
convert_resize_bilinear
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"ResizeBilinear"
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'size'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'align_corners'
size_arg
.
i
=
op
.
get_attr
(
'align_corners'
)
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
1
).
name
)
def
convert_eltwise
(
self
,
op
,
math_type
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"Eltwise"
if
len
(
op
.
inputs
)
==
2
:
input_tensor0
=
get_input_tensor
(
op
,
0
)
input_tensor1
=
get_input_tensor
(
op
,
1
)
x_value
=
None
if
np
.
asarray
(
input_tensor1
.
shape
).
size
==
0
:
x_value
=
input_tensor1
.
eval
()
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
self
.
unused_tensor
.
add
(
input_tensor1
.
name
)
elif
np
.
asarray
(
input_tensor0
.
shape
).
size
==
0
:
x_value
=
input_tensor0
.
eval
()
op_def
.
input
.
extend
([
op
.
inputs
[
1
].
name
])
self
.
unused_tensor
.
add
(
input_tensor0
.
name
)
else
:
if
np
.
asarray
(
input_tensor0
.
shape
).
size
==
1
\
and
input_tensor0
.
op
.
type
==
'Const'
:
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
input_tensor0
.
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
input_tensor0
.
name
])
if
np
.
asarray
(
input_tensor1
.
shape
).
size
==
1
\
and
input_tensor1
.
op
.
type
==
'Const'
:
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
input_tensor1
.
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
input_tensor1
.
name
])
if
x_value
is
not
None
:
x_arg
=
op_def
.
arg
.
add
()
x_arg
.
name
=
'x'
x_arg
.
f
=
x_value
else
:
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
type_arg
=
op_def
.
arg
.
add
()
type_arg
.
name
=
'type'
type_arg
.
i
=
math_type_mode
[
math_type
]
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_depth_to_space
(
self
,
op
,
d2s
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
op
.
type
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'block_size'
size_arg
.
i
=
op
.
get_attr
(
'block_size'
)
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_bias_add
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"BiasAdd"
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
1
).
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_space_to_batch
(
self
,
op
,
b2s
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
op
.
type
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'block_shape'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
=
op_def
.
arg
.
add
()
if
b2s
:
size_arg
.
name
=
'crops'
else
:
size_arg
.
name
=
'paddings'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
)
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
1
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
2
).
name
)
def
is_atrous_conv2d
(
self
,
op
):
return
op
.
type
==
'SpaceToBatchND'
and
\
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
\
(
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Conv2D'
or
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'DepthwiseConv2dNative'
)
def
convert_atrous_conv2d
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
conv_op
=
self
.
tf_graph
[
op
.
name
][
0
]
op_def
.
name
=
conv_op
.
name
if
conv_op
.
type
==
'DepthwiseConv2dNative'
:
op_def
.
type
=
'DepthwiseConv2d'
else
:
op_def
.
type
=
conv_op
.
type
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
op_def
.
type
==
'DepthwiseConv2d'
:
buffer_type
=
"DW_CONV2D_FILTER"
else
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
conv_op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
buffer_type
=
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
conv_op
,
1
).
name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
conv_op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
0
).
name
])
op_def
.
input
.
extend
([
get_input_tensor
(
conv_op
,
1
).
name
])
dilation_arg
=
op_def
.
arg
.
add
()
dilation_arg
.
name
=
'dilations'
dilation_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_values
=
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
if
len
(
padding_values
)
>
0
and
padding_values
[
0
]
>
0
:
padding_arg
.
i
=
padding_mode
[
'SAME'
]
else
:
padding_arg
.
i
=
padding_mode
[
'VALID'
]
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
1
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
2
).
name
)
strides_arg
=
op_def
.
arg
.
add
()
strides_arg
.
name
=
'strides'
strides_arg
.
ints
.
extend
([
1
,
1
])
data_format_arg
=
op_def
.
arg
.
add
()
data_format_arg
.
name
=
'data_format'
if
self
.
device
==
'cpu'
:
data_format_arg
.
s
=
'NCHW'
else
:
data_format_arg
.
s
=
'NHWC'
final_op
=
conv_op
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
resolved_ops
[
conv_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
]
)
==
1
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BatchToSpaceND'
:
final_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
self
.
resolved_ops
[
final_op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
final_op
,
1
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
final_op
,
2
).
name
)
else
:
raise
Exception
(
'Convert atrous conv error: no BatchToSpaceND op'
)
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'Relu'
:
relu_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_relu_arg
=
op_def
.
arg
.
add
()
fused_relu_arg
.
name
=
'activation'
fused_relu_arg
.
s
=
"RELU"
final_op
=
relu_op
self
.
resolved_ops
[
relu_op
.
name
]
=
1
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
def
is_softmax
(
self
,
op
):
return
op
.
type
==
'Softmax'
and
\
len
(
self
.
tf_parents
[
op
.
name
])
==
1
and
\
self
.
tf_parents
[
op
.
name
][
0
].
type
==
'Reshape'
and
\
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Reshape'
def
convert_softmax
(
self
,
softmax_op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
# deal with first Reshape op
parent_reshape_op
=
self
.
tf_parents
[
softmax_op
.
name
][
0
]
self
.
unused_tensor
.
add
(
get_input_tensor
(
parent_reshape_op
,
1
).
name
)
self
.
resolved_ops
[
parent_reshape_op
.
name
]
=
1
# FIXME: hardcode for inception_v3
# remove squeeze if exist
squeeze_op
=
self
.
tf_parents
[
parent_reshape_op
.
name
][
0
]
if
squeeze_op
.
type
==
'Squeeze'
:
op_def
.
input
.
extend
([
squeeze_op
.
inputs
[
0
].
name
])
self
.
resolved_ops
[
squeeze_op
.
name
]
=
1
# remove shape if exist
children_ops
=
self
.
tf_graph
[
squeeze_op
.
name
]
print
children_ops
if
len
(
children_ops
)
>
1
and
children_ops
[
0
].
type
==
'Shape'
:
self
.
unused_tensor
.
add
(
get_input_tensor
(
children_ops
[
1
],
0
).
name
)
self
.
resolved_ops
[
children_ops
[
1
].
name
]
=
1
else
:
op_def
.
input
.
extend
([
parent_reshape_op
.
inputs
[
0
].
name
])
# deal with Softmax op
op_def
.
name
=
softmax_op
.
name
op_def
.
type
=
softmax_op
.
type
self
.
resolved_ops
[
softmax_op
.
name
]
=
1
# deal with last Reshape op
reshape_op
=
self
.
tf_graph
[
softmax_op
.
name
][
0
]
self
.
unused_tensor
.
add
(
get_input_tensor
(
reshape_op
,
1
).
name
)
shape
=
[
dim
.
value
for
dim
in
reshape_op
.
outputs
[
0
].
shape
]
if
len
(
shape
)
==
2
:
shape
=
[
1
,
1
,
shape
[
0
],
shape
[
1
]]
op_def
.
output
.
extend
([
output
.
name
for
output
in
reshape_op
.
outputs
])
self
.
add_output_shape
([
shape
],
op_def
)
self
.
resolved_ops
[
reshape_op
.
name
]
=
1
def
convert_pad
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
"Pad"
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
paddings_arg
=
op_def
.
arg
.
add
()
paddings_arg
.
name
=
'paddings'
if
self
.
device
==
'gpu'
:
paddings_value
=
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
)
else
:
paddings_value
=
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
)
paddings_value
=
paddings_value
[[
0
,
3
,
1
,
2
]]
paddings_arg
.
ints
.
extend
(
paddings_value
.
flat
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
1
).
name
)
if
len
(
op
.
inputs
)
==
3
:
constant_value_arg
=
op_def
.
arg
.
add
()
constant_value_arg
.
name
=
'constant_value'
constant_value_arg
.
i
=
\
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
[
0
]
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
2
).
name
)
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert_normal_op
(
self
,
op
):
op_def
=
self
.
net_def
.
op
.
add
()
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
dt
op_def
.
name
=
op
.
name
op_def
.
type
=
op
.
type
op_def
.
input
.
extend
([
input
.
name
for
input
in
op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
def
convert
(
self
,
input_nodes
,
output_nodes
):
if
self
.
device
==
'gpu'
:
self
.
add_gpu_input_transform
(
input_nodes
)
if
self
.
device
==
'cpu'
:
self
.
add_cpu_input_transform
(
input_nodes
)
for
op
in
self
.
tf_ops
:
if
self
.
resolved_ops
[
op
.
name
]
==
1
:
continue
if
op
.
type
in
[
'Placeholder'
,
'Identity'
]:
self
.
resolved_ops
[
op
.
name
]
=
1
pass
elif
op
.
type
==
'Const'
:
pass
elif
op
.
type
==
'Reshape'
:
self
.
convert_reshape
(
op
)
elif
self
.
is_atrous_conv2d
(
op
):
self
.
convert_atrous_conv2d
(
op
)
elif
self
.
check_conv_to_fc
(
op
):
self
.
convert_global_conv_to_fc
(
op
)
elif
op
.
type
==
'Conv2D'
or
op
.
type
==
'DepthwiseConv2dNative'
:
if
self
.
device
==
'gpu'
and
self
.
check_winograd_conv
(
op
):
self
.
convert_winograd_conv_gpu
(
op
)
else
:
self
.
convert_conv2d
(
op
)
elif
op
.
type
==
'Conv2DBackpropInput'
:
self
.
convert_deconv2d
(
op
)
elif
op
.
type
==
'FusedBatchNorm'
:
self
.
convert_fused_batchnorm
(
op
)
elif
op
.
type
==
'Mul'
and
op
.
name
.
find
(
'batchnorm/mul'
)
!=
-
1
:
self
.
convert_batchnorm
(
op
)
elif
op
.
type
==
'AvgPool'
or
op
.
type
==
'MaxPool'
:
self
.
convert_pooling
(
op
)
elif
op
.
type
==
'Relu6'
:
self
.
convert_relu6
(
op
)
elif
op
.
type
==
'Add'
:
if
len
(
op
.
inputs
)
>
2
:
self
.
convert_add
(
op
)
else
:
self
.
convert_eltwise
(
op
,
'ADD'
)
elif
op
.
type
==
'ConcatV2'
:
self
.
convert_concat
(
op
)
elif
op
.
type
==
'ResizeBilinear'
:
self
.
convert_resize_bilinear
(
op
)
elif
op
.
type
==
'BiasAdd'
:
self
.
convert_bias_add
(
op
)
elif
op
.
type
==
'SpaceToBatchND'
:
self
.
convert_space_to_batch
(
op
,
False
)
elif
op
.
type
==
'BatchToSpaceND'
:
self
.
convert_space_to_batch
(
op
,
True
)
elif
op
.
type
==
'DepthToSpace'
:
self
.
convert_depth_to_space
(
op
,
True
)
elif
op
.
type
==
'SpaceToDepth'
:
self
.
convert_depth_to_space
(
op
,
False
)
elif
op
.
type
in
[
'Neg'
,
'neg'
,
'Negative'
,
'negative'
]:
self
.
convert_eltwise
(
op
,
'NEG'
)
elif
op
.
type
in
[
'RealDiv'
,
'Div'
]:
self
.
convert_eltwise
(
op
,
'DIV'
)
elif
op
.
type
in
[
'SquaredDifference'
]:
self
.
convert_eltwise
(
op
,
'SQR_DIFF'
)
elif
op
.
type
in
[
'Pow'
]:
self
.
convert_eltwise
(
op
,
'POW'
)
elif
op
.
type
==
'Mul'
:
self
.
convert_eltwise
(
op
,
'MUL'
)
elif
op
.
type
==
'Sub'
:
self
.
convert_eltwise
(
op
,
'SUB'
)
elif
self
.
is_softmax
(
op
):
self
.
convert_softmax
(
op
)
elif
op
.
type
in
[
'Relu'
,
'Sigmoid'
,
'Tanh'
]:
self
.
convert_activation
(
op
)
# FIXME: hardcode for inception_v3
elif
op
.
type
in
[
'Squeeze'
,
'Shape'
]:
self
.
resolved_ops
[
op
.
name
]
=
1
elif
op
.
type
==
'Mean'
:
# Global avg pooling
reduce_dims
=
op
.
inputs
[
1
].
eval
()
if
reduce_dims
[
0
]
==
1
and
reduce_dims
[
1
]
==
2
:
self
.
convert_global_avg_pooling
(
op
)
self
.
unused_tensor
.
add
(
op
.
inputs
[
1
].
name
)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
elif
op
.
type
==
'Pad'
:
self
.
convert_pad
(
op
)
# elif op.type in ['']:
# self.convert_normal_op(op)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
for
op
in
self
.
tf_ops
:
if
self
.
resolved_ops
[
op
.
name
]
==
1
:
continue
elif
op
.
type
==
'Const'
:
self
.
convert_tensor
(
op
)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
if
self
.
device
==
'gpu'
:
self
.
add_gpu_output_transform
(
output_nodes
)
if
self
.
device
==
'cpu'
:
self
.
add_cpu_output_transform
(
output_nodes
)
for
key
in
self
.
resolved_ops
:
if
self
.
resolved_ops
[
key
]
!=
1
:
print
'Unresolve Op: %s'
%
key
class
Optimizer
:
def
__init__
(
self
,
net_def
,
device
):
self
.
net_def
=
net_def
self
.
device
=
device
self
.
mace_graph
=
{}
self
.
tensor_map
=
{}
for
op
in
net_def
.
op
:
for
input_name
in
op
.
input
:
if
input_name
not
in
self
.
mace_graph
:
self
.
mace_graph
[
input_name
]
=
[]
self
.
mace_graph
[
input_name
].
append
(
op
)
for
tensor
in
net_def
.
tensors
:
self
.
tensor_map
[
tensor
.
name
]
=
tensor
def
get_buffer_tensor_name
(
self
,
name
):
if
self
.
device
==
'gpu'
:
return
name
[:
-
6
]
+
name
[
-
2
:]
else
:
return
name
def
fold_batch_norm
(
self
):
unused_tensors
=
set
()
new_tensors
=
[]
new_net
=
mace_pb2
.
NetDef
()
resolved_ops
=
set
()
for
op
in
self
.
net_def
.
op
:
if
op
.
name
in
resolved_ops
:
pass
elif
op
.
type
==
'DepthwiseConv2d'
and
len
(
op
.
output
)
==
1
and
\
self
.
mace_graph
[
op
.
output
[
0
]][
0
].
type
==
'FoldedBatchNorm'
:
depthwise_conv2d_op
=
op
folded_bn_op
=
self
.
mace_graph
[
op
.
output
[
0
]][
0
]
weight_buffer_name
=
self
.
get_buffer_tensor_name
(
depthwise_conv2d_op
.
input
[
1
])
weight_tensor
=
self
.
tensor_map
[
weight_buffer_name
]
scale_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
1
])
offset_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
2
])
scale_tensor
=
self
.
tensor_map
[
scale_buffer_name
]
weight_shape
=
weight_tensor
.
dims
idx
=
0
if
self
.
device
==
'cpu'
:
# OIHW
for
oc
in
range
(
weight_shape
[
0
]):
for
ic
in
range
(
weight_shape
[
1
]):
for
i
in
range
(
weight_shape
[
2
]):
for
j
in
range
(
weight_shape
[
3
]):
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
0
]
+
oc
]
idx
+=
1
else
:
# HWIO
for
i
in
range
(
weight_shape
[
0
]):
for
j
in
range
(
weight_shape
[
1
]):
for
ic
in
range
(
weight_shape
[
2
]):
for
oc
in
range
(
weight_shape
[
3
]):
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
3
]
+
oc
]
idx
+=
1
new_tensors
.
append
(
weight_tensor
)
unused_tensors
.
add
(
weight_tensor
.
name
)
unused_tensors
.
add
(
scale_tensor
.
name
)
if
self
.
device
==
'gpu'
:
scale_b2i_op
=
self
.
mace_graph
[
scale_buffer_name
][
0
]
offset_b2i_op
=
self
.
mace_graph
[
offset_buffer_name
][
0
]
resolved_ops
.
add
(
scale_b2i_op
.
name
)
resolved_ops
.
add
(
offset_b2i_op
.
name
)
new_net
.
op
.
extend
([
offset_b2i_op
])
resolved_ops
.
add
(
depthwise_conv2d_op
.
name
)
resolved_ops
.
add
(
folded_bn_op
.
name
)
offset_tensor_name
=
folded_bn_op
.
input
[
2
]
depthwise_conv2d_op
.
input
.
extend
([
offset_tensor_name
])
for
arg
in
folded_bn_op
.
arg
:
if
arg
.
name
==
'activation'
:
act_arg
=
depthwise_conv2d_op
.
arg
.
add
()
act_arg
.
name
=
arg
.
name
act_arg
.
s
=
arg
.
s
elif
arg
.
name
==
'max_limit'
:
act_arg
=
depthwise_conv2d_op
.
arg
.
add
()
act_arg
.
name
=
arg
.
name
act_arg
.
f
=
arg
.
f
depthwise_conv2d_op
.
output
[
0
]
=
folded_bn_op
.
output
[
0
]
new_net
.
op
.
extend
([
depthwise_conv2d_op
])
else
:
new_net
.
op
.
extend
([
op
])
for
tensor
in
self
.
net_def
.
tensors
:
if
tensor
.
name
in
unused_tensors
:
pass
else
:
new_net
.
tensors
.
extend
([
tensor
])
for
tensor
in
new_tensors
:
new_net
.
tensors
.
extend
([
tensor
])
return
new_net
def
optimize
(
self
):
new_net
=
self
.
fold_batch_norm
()
return
new_net
def
add_shape_info
(
input_graph_def
,
input_nodes
,
input_shapes
):
inputs_replaced_graph
=
graph_pb2
.
GraphDef
()
for
node
in
input_graph_def
.
node
:
if
node
.
name
in
input_nodes
:
idx
=
input_nodes
.
index
(
node
.
name
)
input_shape
=
input_shapes
[
idx
]
placeholder_node
=
copy
.
deepcopy
(
node
)
placeholder_node
.
attr
.
clear
()
placeholder_node
.
attr
[
'shape'
].
shape
.
dim
.
extend
([
tensor_shape_pb2
.
TensorShapeProto
.
Dim
(
size
=
i
)
for
i
in
input_shape
])
placeholder_node
.
attr
[
'dtype'
].
CopyFrom
(
node
.
attr
[
'dtype'
])
inputs_replaced_graph
.
node
.
extend
([
placeholder_node
])
else
:
inputs_replaced_graph
.
node
.
extend
([
copy
.
deepcopy
(
node
)])
return
inputs_replaced_graph
def
convert_to_mace_pb
(
model_file
,
input_node
,
input_shape
,
output_node
,
data_type
,
device
,
winograd
):
net_def
=
mace_pb2
.
NetDef
()
dt
=
data_type_map
[
data_type
]
input_graph_def
=
tf
.
GraphDef
()
with
gfile
.
Open
(
model_file
,
"rb"
)
as
f
:
data
=
f
.
read
()
input_graph_def
.
ParseFromString
(
data
)
input_nodes
=
[
x
for
x
in
input_node
.
split
(
','
)]
input_shapes
=
[]
if
input_shape
!=
""
:
input_shape_strs
=
[
x
for
x
in
input_shape
.
split
(
':'
)]
for
shape_str
in
input_shape_strs
:
input_shapes
.
extend
([[
int
(
x
)
for
x
in
shape_str
.
split
(
','
)]])
output_nodes
=
[
x
for
x
in
output_node
.
split
(
','
)]
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
input_graph_def
=
add_shape_info
(
input_graph_def
,
input_nodes
,
input_shapes
)
with
tf
.
Session
()
as
session
:
with
session
.
graph
.
as_default
()
as
graph
:
tf
.
import_graph_def
(
input_graph_def
,
name
=
""
)
ops
=
graph
.
get_operations
()
converter
=
TFConverter
(
graph
,
ops
,
net_def
,
dt
,
device
,
winograd
)
converter
.
convert
(
input_nodes
,
output_nodes
)
optimizer
=
Optimizer
(
net_def
,
device
)
net_def
=
optimizer
.
optimize
()
print
"Model Converted."
if
device
==
'gpu'
:
print
"start optimize memory."
memory_optimizer
.
optimize_gpu_memory
(
net_def
)
print
"Memory optimization done."
elif
device
==
'cpu'
:
print
"start optimize memory."
memory_optimizer
.
optimize_cpu_memory
(
net_def
)
print
"Memory optimization done."
return
net_def
mace/test/mace_api_mt_test.cc
浏览文件 @
c3837858
...
@@ -152,7 +152,7 @@ void CheckOutputs(const NetDef &net_def,
...
@@ -152,7 +152,7 @@ void CheckOutputs(const NetDef &net_def,
memcpy
(
input_data
.
data
(),
input
.
second
.
data
().
get
(),
memcpy
(
input_data
.
data
(),
input
.
second
.
data
().
get
(),
data_size
*
sizeof
(
float
));
data_size
*
sizeof
(
float
));
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
input
.
first
,
":0"
);
input
.
first
);
net
.
AddInputFromArray
<
D
,
float
>
(
input_name
,
input
.
second
.
shape
(),
net
.
AddInputFromArray
<
D
,
float
>
(
input_name
,
input
.
second
.
shape
(),
input_data
);
input_data
);
}
}
...
@@ -181,7 +181,7 @@ void CheckOutputs(const NetDef &net_def,
...
@@ -181,7 +181,7 @@ void CheckOutputs(const NetDef &net_def,
float
*
data
=
tmp_tensor
->
mutable_data
<
float
>
();
float
*
data
=
tmp_tensor
->
mutable_data
<
float
>
();
memcpy
(
data
,
output
.
second
.
data
().
get
(),
data_size
*
sizeof
(
float
));
memcpy
(
data
,
output
.
second
.
data
().
get
(),
data_size
*
sizeof
(
float
));
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
output
.
first
,
":0"
);
output
.
first
);
ops
::
test
::
ExpectTensorNear
<
float
>
(
*
tmp_tensor
,
ops
::
test
::
ExpectTensorNear
<
float
>
(
*
tmp_tensor
,
*
net
.
GetOutput
(
output_name
.
data
()),
*
net
.
GetOutput
(
output_name
.
data
()),
1e-5
);
1e-5
);
...
@@ -265,7 +265,7 @@ void MaceRunFunc(const int in_out_size) {
...
@@ -265,7 +265,7 @@ void MaceRunFunc(const int in_out_size) {
for
(
size_t
i
=
0
;
i
<
input_names
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
input_names
.
size
();
++
i
)
{
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
input_names
[
i
]
,
":0"
);
input_names
[
i
]);
BufferToImage
<
half
>
(
input_name
,
input_names
[
i
],
BufferToImage
<
half
>
(
input_name
,
input_names
[
i
],
mace
::
kernels
::
IN_OUT_CHANNEL
,
mace
::
kernels
::
IN_OUT_CHANNEL
,
{
mem_map
[
input_names
[
i
]]},
{
mem_map
[
input_names
[
i
]]},
...
@@ -281,7 +281,7 @@ void MaceRunFunc(const int in_out_size) {
...
@@ -281,7 +281,7 @@ void MaceRunFunc(const int in_out_size) {
}
}
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
++
i
)
{
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
output_names
[
i
]
,
":0"
);
output_names
[
i
]);
ImageToBuffer
<
float
>
(
output_names
[
i
],
output_name
,
ImageToBuffer
<
float
>
(
output_names
[
i
],
output_name
,
mace
::
kernels
::
IN_OUT_CHANNEL
,
&
net_def
);
mace
::
kernels
::
IN_OUT_CHANNEL
,
&
net_def
);
}
}
...
...
mace/test/mace_api_test.cc
浏览文件 @
c3837858
...
@@ -162,7 +162,7 @@ void CheckOutputs(const NetDef &net_def,
...
@@ -162,7 +162,7 @@ void CheckOutputs(const NetDef &net_def,
memcpy
(
input_data
.
data
(),
input
.
second
.
data
().
get
(),
memcpy
(
input_data
.
data
(),
input
.
second
.
data
().
get
(),
data_size
*
sizeof
(
float
));
data_size
*
sizeof
(
float
));
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
input
.
first
,
":0"
);
input
.
first
);
net
.
AddInputFromArray
<
D
,
float
>
(
input_name
,
input
.
second
.
shape
(),
net
.
AddInputFromArray
<
D
,
float
>
(
input_name
,
input
.
second
.
shape
(),
input_data
);
input_data
);
}
}
...
@@ -191,7 +191,7 @@ void CheckOutputs(const NetDef &net_def,
...
@@ -191,7 +191,7 @@ void CheckOutputs(const NetDef &net_def,
float
*
data
=
tmp_tensor
->
mutable_data
<
float
>
();
float
*
data
=
tmp_tensor
->
mutable_data
<
float
>
();
memcpy
(
data
,
output
.
second
.
data
().
get
(),
data_size
*
sizeof
(
float
));
memcpy
(
data
,
output
.
second
.
data
().
get
(),
data_size
*
sizeof
(
float
));
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
output
.
first
,
":0"
);
output
.
first
);
ops
::
test
::
ExpectTensorNear
<
float
>
(
*
tmp_tensor
,
ops
::
test
::
ExpectTensorNear
<
float
>
(
*
tmp_tensor
,
*
net
.
GetOutput
(
output_name
.
data
()),
*
net
.
GetOutput
(
output_name
.
data
()),
1e-5
);
1e-5
);
...
@@ -275,7 +275,7 @@ void MaceRun(const int in_out_size,
...
@@ -275,7 +275,7 @@ void MaceRun(const int in_out_size,
for
(
size_t
i
=
0
;
i
<
input_names
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
input_names
.
size
();
++
i
)
{
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
std
::
string
input_name
=
MakeString
(
"mace_input_node_"
,
input_names
[
i
]
,
":0"
);
input_names
[
i
]);
BufferToImage
<
half
>
(
input_name
,
input_names
[
i
],
BufferToImage
<
half
>
(
input_name
,
input_names
[
i
],
mace
::
kernels
::
IN_OUT_CHANNEL
,
mace
::
kernels
::
IN_OUT_CHANNEL
,
{
mem_map
[
input_names
[
i
]]},
{
mem_map
[
input_names
[
i
]]},
...
@@ -291,7 +291,7 @@ void MaceRun(const int in_out_size,
...
@@ -291,7 +291,7 @@ void MaceRun(const int in_out_size,
}
}
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
++
i
)
{
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
++
i
)
{
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
output_names
[
i
]
,
":0"
);
output_names
[
i
]);
ImageToBuffer
<
float
>
(
output_names
[
i
],
output_name
,
ImageToBuffer
<
float
>
(
output_names
[
i
],
output_name
,
mace
::
kernels
::
IN_OUT_CHANNEL
,
&
net_def
);
mace
::
kernels
::
IN_OUT_CHANNEL
,
&
net_def
);
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录