Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
08314882
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
08314882
编写于
1月 30, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add optimizer to fold BN and depthwise conv2d.
上级
ea917a75
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
134 addition
and
23 deletion
+134
-23
python/tools/tf_converter_lib.py
python/tools/tf_converter_lib.py
+134
-23
未找到文件。
python/tools/tf_converter_lib.py
浏览文件 @
08314882
...
@@ -33,7 +33,8 @@ data_type_map = {
...
@@ -33,7 +33,8 @@ data_type_map = {
activation_name_map
=
{
activation_name_map
=
{
'Relu'
:
'RELU'
,
'Relu'
:
'RELU'
,
'Sigmoid'
:
'SIGMOID'
,
'Sigmoid'
:
'SIGMOID'
,
'Tanh'
:
'TANH'
'Tanh'
:
'TANH'
,
'Relu6'
:
'RELUX'
}
}
BATCH_NORM_ORDER
=
[
"Add"
,
"Rsqrt"
,
"Mul"
,
"Mul"
,
"Mul"
,
"Sub"
,
"Add"
]
BATCH_NORM_ORDER
=
[
"Add"
,
"Rsqrt"
,
"Mul"
,
"Mul"
,
"Mul"
,
"Sub"
,
"Add"
]
...
@@ -284,13 +285,17 @@ class TFConverter(object):
...
@@ -284,13 +285,17 @@ class TFConverter(object):
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
\
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
\
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'Relu'
:
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
relu_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_relu_arg
=
iwt_op
.
arg
.
add
()
fused_act_arg
=
iwt_op
.
arg
.
add
()
fused_relu_arg
.
name
=
'activation'
fused_act_arg
.
name
=
'activation'
fused_relu_arg
.
s
=
"RELU"
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
final_op
=
relu_op
if
activation_op
.
type
==
'Relu6'
:
self
.
resolved_ops
[
relu_op
.
name
]
=
1
max_limit_arg
=
iwt_op
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
iwt_op
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
iwt_op
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
iwt_op
)
self
.
add_output_shape
(
final_op
.
outputs
,
iwt_op
)
...
@@ -338,14 +343,18 @@ class TFConverter(object):
...
@@ -338,14 +343,18 @@ class TFConverter(object):
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
\
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
\
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'Relu'
:
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
relu
_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
activation
_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
op_def
.
type
=
"FusedConv2D"
op_def
.
type
=
"FusedConv2D"
fused_relu_arg
=
op_def
.
arg
.
add
()
fused_act_arg
=
op_def
.
arg
.
add
()
fused_relu_arg
.
name
=
'activation'
fused_act_arg
.
name
=
'activation'
fused_relu_arg
.
s
=
"RELU"
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
final_op
=
relu_op
if
activation_op
.
type
==
'Relu6'
:
self
.
resolved_ops
[
relu_op
.
name
]
=
1
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
...
@@ -397,16 +406,21 @@ class TFConverter(object):
...
@@ -397,16 +406,21 @@ class TFConverter(object):
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
resolved_ops
[
op
.
name
]
=
1
final_op
=
op
final_op
=
op
if
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Relu'
:
if
len
(
self
.
tf_graph
[
op
.
name
])
==
1
\
relu_op
=
self
.
tf_graph
[
op
.
name
][
0
]
and
self
.
tf_graph
[
op
.
name
][
0
].
type
in
activation_name_map
:
final_op
=
relu_op
activation_op
=
self
.
tf_graph
[
op
.
name
][
0
]
fused_relu_arg
=
op_def
.
arg
.
add
()
fused_act_arg
=
op_def
.
arg
.
add
()
fused_relu_arg
.
name
=
'activation'
fused_act_arg
.
name
=
'activation'
fused_relu_arg
.
s
=
"RELU"
fused_act_arg
.
s
=
activation_name_map
[
activation_op
.
type
]
self
.
resolved_ops
[
relu_op
.
name
]
=
1
if
activation_op
.
type
==
'Relu6'
:
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
final_op
=
activation_op
self
.
resolved_ops
[
activation_op
.
name
]
=
1
op_def
.
output
.
extend
([
final_op
.
outputs
[
0
].
name
])
op_def
.
output
.
extend
([
final_op
.
outputs
[
0
].
name
])
self
.
add_output_shape
(
final_op
.
outputs
,
op_def
)
self
.
add_output_shape
(
[
final_op
.
outputs
[
0
]]
,
op_def
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
net_def
.
op
.
extend
([
op_def
])
...
@@ -794,6 +808,101 @@ class TFConverter(object):
...
@@ -794,6 +808,101 @@ class TFConverter(object):
if
self
.
resolved_ops
[
key
]
!=
1
:
if
self
.
resolved_ops
[
key
]
!=
1
:
print
'Unresolve Op: %s'
%
key
print
'Unresolve Op: %s'
%
key
class
Optimizer
:
def
__init__
(
self
,
net_def
,
device
):
self
.
net_def
=
net_def
self
.
device
=
device
self
.
mace_graph
=
{}
self
.
tensor_map
=
{}
for
op
in
net_def
.
op
:
for
input_name
in
op
.
input
:
if
input_name
not
in
self
.
mace_graph
:
self
.
mace_graph
[
input_name
]
=
[]
self
.
mace_graph
[
input_name
].
append
(
op
)
for
tensor
in
net_def
.
tensors
:
self
.
tensor_map
[
tensor
.
name
]
=
tensor
def
get_buffer_tensor_name
(
self
,
name
):
if
self
.
device
==
'gpu'
:
return
name
[:
-
6
]
+
name
[
-
2
:]
else
:
return
name
def
fold_batch_norm
(
self
):
unused_tensors
=
set
()
new_tensors
=
[]
new_net
=
mace_pb2
.
NetDef
()
resolved_ops
=
set
()
for
op
in
self
.
net_def
.
op
:
if
op
.
name
in
resolved_ops
:
pass
elif
op
.
type
==
'DepthwiseConv2d'
and
len
(
op
.
output
)
==
1
\
and
self
.
mace_graph
[
op
.
output
[
0
]][
0
].
type
==
'FoldedBatchNorm'
:
depthwise_conv2d_op
=
op
folded_bn_op
=
self
.
mace_graph
[
op
.
output
[
0
]][
0
]
weight_buffer_name
=
self
.
get_buffer_tensor_name
(
depthwise_conv2d_op
.
input
[
1
])
weight_tensor
=
self
.
tensor_map
[
weight_buffer_name
]
scale_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
1
])
offset_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
2
])
scale_tensor
=
self
.
tensor_map
[
scale_buffer_name
]
weight_shape
=
weight_tensor
.
dims
idx
=
0
for
i
in
range
(
weight_shape
[
0
]):
for
j
in
range
(
weight_shape
[
1
]):
for
ic
in
range
(
weight_shape
[
2
]):
for
oc
in
range
(
weight_shape
[
3
]):
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
3
]
+
oc
]
idx
+=
1
new_tensors
.
append
(
weight_tensor
)
unused_tensors
.
add
(
weight_tensor
.
name
)
unused_tensors
.
add
(
scale_tensor
.
name
)
if
self
.
device
==
'gpu'
:
scale_b2i_op
=
self
.
mace_graph
[
scale_buffer_name
][
0
]
offset_b2i_op
=
self
.
mace_graph
[
offset_buffer_name
][
0
]
resolved_ops
.
add
(
scale_b2i_op
.
name
)
resolved_ops
.
add
(
offset_b2i_op
.
name
)
new_net
.
op
.
extend
([
offset_b2i_op
])
resolved_ops
.
add
(
depthwise_conv2d_op
.
name
)
resolved_ops
.
add
(
folded_bn_op
.
name
)
offset_tensor_name
=
folded_bn_op
.
input
[
2
]
depthwise_conv2d_op
.
input
.
extend
([
offset_tensor_name
])
for
arg
in
folded_bn_op
.
arg
:
if
arg
.
name
==
'activation'
:
act_arg
=
depthwise_conv2d_op
.
arg
.
add
()
act_arg
.
name
=
arg
.
name
act_arg
.
s
=
arg
.
s
elif
arg
.
name
==
'max_limit'
:
act_arg
=
depthwise_conv2d_op
.
arg
.
add
()
act_arg
.
name
=
arg
.
name
act_arg
.
f
=
arg
.
f
depthwise_conv2d_op
.
output
[
0
]
=
folded_bn_op
.
output
[
0
]
new_net
.
op
.
extend
([
depthwise_conv2d_op
])
else
:
new_net
.
op
.
extend
([
op
])
for
tensor
in
self
.
net_def
.
tensors
:
if
tensor
.
name
in
unused_tensors
:
pass
else
:
new_net
.
tensors
.
extend
([
tensor
])
for
tensor
in
new_tensors
:
new_net
.
tensors
.
extend
([
tensor
])
return
new_net
def
optimize
(
self
):
new_net
=
self
.
fold_batch_norm
()
return
new_net
def
convert_to_mace_pb
(
input_graph_def
,
input_node
,
output_node
,
data_type
,
device
,
winograd
):
def
convert_to_mace_pb
(
input_graph_def
,
input_node
,
output_node
,
data_type
,
device
,
winograd
):
net_def
=
mace_pb2
.
NetDef
()
net_def
=
mace_pb2
.
NetDef
()
dt
=
data_type_map
[
data_type
]
dt
=
data_type_map
[
data_type
]
...
@@ -804,6 +913,8 @@ def convert_to_mace_pb(input_graph_def, input_node, output_node, data_type, devi
...
@@ -804,6 +913,8 @@ def convert_to_mace_pb(input_graph_def, input_node, output_node, data_type, devi
ops
=
graph
.
get_operations
()
ops
=
graph
.
get_operations
()
converter
=
TFConverter
(
ops
,
net_def
,
dt
,
device
,
winograd
)
converter
=
TFConverter
(
ops
,
net_def
,
dt
,
device
,
winograd
)
converter
.
convert
(
input_node
,
output_node
)
converter
.
convert
(
input_node
,
output_node
)
optimizer
=
Optimizer
(
net_def
,
device
)
net_def
=
optimizer
.
optimize
()
print
"PB Converted, start optimize memory."
print
"PB Converted, start optimize memory."
mem_optimizer
=
memory_optimizer
.
MemoryOptimizer
(
net_def
)
mem_optimizer
=
memory_optimizer
.
MemoryOptimizer
(
net_def
)
mem_optimizer
.
optimize
()
mem_optimizer
.
optimize
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录