Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
58f2516e
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
58f2516e
编写于
4月 10, 2018
作者:
叶
叶剑武
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'pycodestyle' into 'master'
Enable python style check See merge request !361
上级
e54825c5
6da30d22
变更
22
显示空白变更内容
内联
并排
Showing
22 changed file
with
4594 addition
and
4287 deletion
+4594
-4287
.gitlab-ci.yml
.gitlab-ci.yml
+7
-1
docker/Dockerfile
docker/Dockerfile
+2
-1
mace/python/tools/binary_codegen.py
mace/python/tools/binary_codegen.py
+62
-61
mace/python/tools/caffe_converter_lib.py
mace/python/tools/caffe_converter_lib.py
+1098
-1024
mace/python/tools/convert_util.py
mace/python/tools/convert_util.py
+0
-1
mace/python/tools/converter.py
mace/python/tools/converter.py
+149
-159
mace/python/tools/dsp_ops.py
mace/python/tools/dsp_ops.py
+60
-62
mace/python/tools/encrypt_opencl_codegen.py
mace/python/tools/encrypt_opencl_codegen.py
+62
-58
mace/python/tools/graph_util.py
mace/python/tools/graph_util.py
+7
-2
mace/python/tools/memory_optimizer.py
mace/python/tools/memory_optimizer.py
+123
-112
mace/python/tools/opencl_codegen.py
mace/python/tools/opencl_codegen.py
+77
-74
mace/python/tools/source_converter_lib.py
mace/python/tools/source_converter_lib.py
+176
-162
mace/python/tools/tf_converter_lib.py
mace/python/tools/tf_converter_lib.py
+1162
-1125
mace/python/tools/tf_dsp_converter_lib.py
mace/python/tools/tf_dsp_converter_lib.py
+472
-403
mace/python/tools/tf_ops_stats.py
mace/python/tools/tf_ops_stats.py
+162
-136
tools/bazel_adb_run.py
tools/bazel_adb_run.py
+99
-91
tools/falcon_cli.py
tools/falcon_cli.py
+12
-10
tools/generate_data.py
tools/generate_data.py
+30
-35
tools/mace_tools.py
tools/mace_tools.py
+378
-340
tools/sh_commands.py
tools/sh_commands.py
+152
-119
tools/validate.py
tools/validate.py
+152
-155
tools/wino_conv.py
tools/wino_conv.py
+152
-156
未找到文件。
.gitlab-ci.yml
浏览文件 @
58f2516e
stages
:
-
cpplint
-
pycodestyle
-
ops_test
-
ops_benchmark
...
...
@@ -7,7 +8,12 @@ cpplint:
stage
:
cpplint
script
:
-
curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py
-
python cpplint.py --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc)
-
python cpplint.py --linelength=80 --counting=detailed $(find mace -name "*.h" -or -name "*.cc")
pycodestyle
:
stage
:
pycodestyle
script
:
-
pycodestyle $(find -name "*.py")
ops_test
:
stage
:
ops_test
...
...
docker/Dockerfile
浏览文件 @
58f2516e
...
...
@@ -113,7 +113,8 @@ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
scipy
\
jinja2
\
pyyaml
\
sh
sh
\
pycodestyle
# Download tensorflow tools
RUN
wget http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph
&&
\
...
...
mace/python/tools/binary_codegen.py
浏览文件 @
58f2516e
...
...
@@ -27,28 +27,30 @@ def generate_cpp_source():
print
"Generate binary from"
,
binary_path
idx
=
0
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
idx
+=
8
for
_
in
xrange
(
size
):
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
idx
+=
key_size
params_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
params_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
data_map
[
key
]
=
[]
count
=
params_size
/
4
params
=
struct
.
unpack
(
str
(
count
)
+
"i"
,
binary_array
[
idx
:
idx
+
params_size
])
params
=
struct
.
unpack
(
str
(
count
)
+
"i"
,
binary_array
[
idx
:
idx
+
params_size
])
for
i
in
params
:
data_map
[
key
].
append
(
i
)
idx
+=
params_size
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
return
env
.
get_template
(
'str2vec_maps.cc.jinja2'
).
render
(
maps
=
data_map
,
data_type
=
'unsigned int'
,
variable_name
=
FLAGS
.
variable_name
)
maps
=
data_map
,
data_type
=
'unsigned int'
,
variable_name
=
FLAGS
.
variable_name
)
def
main
(
unused_args
):
cpp_binary_source
=
generate_cpp_source
()
...
...
@@ -58,14 +60,12 @@ def main(unused_args):
w_file
.
write
(
cpp_binary_source
)
w_file
.
close
()
def
parse_args
():
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--binary_dirs"
,
type
=
str
,
default
=
""
,
help
=
"The binaries file path."
)
"--binary_dirs"
,
type
=
str
,
default
=
""
,
help
=
"The binaries file path."
)
parser
.
add_argument
(
"--binary_file_name"
,
type
=
str
,
...
...
@@ -75,7 +75,8 @@ def parse_args():
"--output_path"
,
type
=
str
,
default
=
""
,
help
=
"The path of generated C++ source file which contains the binary."
)
help
=
"The path of generated C++ source file which contains the binary."
)
parser
.
add_argument
(
"--variable_name"
,
type
=
str
,
...
...
mace/python/tools/caffe_converter_lib.py
浏览文件 @
58f2516e
...
...
@@ -5,32 +5,26 @@ import google.protobuf.text_format
import
numpy
as
np
import
math
pooling_type_mode
=
{
'AvgPool'
:
1
,
'MaxPool'
:
2
}
pooling_type_mode
=
{
'AvgPool'
:
1
,
'MaxPool'
:
2
}
buffer_type_map
=
{
'CONV2D_FILTER'
:
0
,
'IN_OUT_CHANNEL'
:
1
,
'ARGUMENT'
:
2
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_WIDTH'
:
4
,
'WINOGRAD_FILTER'
:
5
,
'DW_CONV2D_FILTER'
:
6
,
'WEIGHT_HEIGHT'
:
7
,
'WEIGHT_WIDTH'
:
8
,
'CONV2D_FILTER'
:
0
,
'IN_OUT_CHANNEL'
:
1
,
'ARGUMENT'
:
2
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_WIDTH'
:
4
,
'WINOGRAD_FILTER'
:
5
,
'DW_CONV2D_FILTER'
:
6
,
'WEIGHT_HEIGHT'
:
7
,
'WEIGHT_WIDTH'
:
8
,
}
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
activation_name_map
=
{
'ReLU'
:
'RELU'
,
'Sigmoid'
:
'SIGMOID'
,
'TanH'
:
'TANH'
,
'ReLU'
:
'RELU'
,
'Sigmoid'
:
'SIGMOID'
,
'TanH'
:
'TANH'
,
}
MACE_INPUT_NODE_NAME
=
"mace_input_node"
...
...
@@ -38,6 +32,7 @@ MACE_OUTPUT_NODE_NAME = "mace_output_node"
OPENCL_IMAGE_MAX_SIZE
=
16384
class
Operator
(
object
):
def
__init__
(
self
,
name
,
type
,
layer
):
self
.
name
=
name
...
...
@@ -54,37 +49,52 @@ class Operator(object):
def
get_single_parent
(
self
):
if
len
(
self
.
parents
)
!=
1
:
raise
Exception
(
'Operation %s expected single parent, but got %s'
%
(
self
.
name
,
len
(
self
.
parents
)))
raise
Exception
(
'Operation %s expected single parent, but got %s'
%
(
self
.
name
,
len
(
self
.
parents
)))
return
self
.
parents
[
0
]
def
BlobToNPArray
(
blob
):
if
blob
.
num
!=
0
:
return
(
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
(
blob
.
num
,
blob
.
channels
,
blob
.
height
,
blob
.
width
)))
return
(
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
(
blob
.
num
,
blob
.
channels
,
blob
.
height
,
blob
.
width
)))
else
:
return
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
blob
.
shape
.
dim
)
class
Shapes
(
object
):
@
staticmethod
def
conv_pool_shape
(
input_shape
,
filter_shape
,
paddings
,
strides
,
dilations
,
round_func
,
input_format
=
'NHWC'
):
def
conv_pool_shape
(
input_shape
,
filter_shape
,
paddings
,
strides
,
dilations
,
round_func
,
input_format
=
'NHWC'
):
output_shape
=
np
.
zeros_like
(
input_shape
)
output_shape
[
0
]
=
input_shape
[
0
]
if
input_format
==
'NHWC'
:
# input format: NHWC, filter format: HWOI
output_shape
[
1
]
=
int
(
round_func
((
input_shape
[
1
]
+
paddings
[
0
]
-
filter_shape
[
0
]
-
(
filter_shape
[
0
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
1
]
-
filter_shape
[
1
]
-
(
filter_shape
[
1
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
output_shape
[
1
]
=
int
(
round_func
((
input_shape
[
1
]
+
paddings
[
0
]
-
filter_shape
[
0
]
-
(
filter_shape
[
0
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
1
]
-
filter_shape
[
1
]
-
(
filter_shape
[
1
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
output_shape
[
3
]
=
filter_shape
[
2
]
elif
input_format
==
'NCHW'
:
# input format: NCHW, filter format: OIHW
output_shape
[
1
]
=
filter_shape
[
0
]
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
0
]
-
filter_shape
[
2
]
-
(
filter_shape
[
2
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
3
]
=
int
(
round_func
((
input_shape
[
3
]
+
paddings
[
1
]
-
filter_shape
[
3
]
-
(
filter_shape
[
3
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
0
]
-
filter_shape
[
2
]
-
(
filter_shape
[
2
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
3
]
=
int
(
round_func
((
input_shape
[
3
]
+
paddings
[
1
]
-
filter_shape
[
3
]
-
(
filter_shape
[
3
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
else
:
raise
Exception
(
"format %s is not supported"
%
input_format
)
...
...
@@ -107,12 +117,19 @@ class Shapes(object):
@
staticmethod
def
slice_shape
(
input_shape
,
num_output
,
input_format
=
'NHWC'
):
if
input_format
==
'NHWC'
:
return
[
input_shape
[
0
],
input_shape
[
1
],
input_shape
[
2
],
input_shape
[
3
]
/
num_output
]
return
[
input_shape
[
0
],
input_shape
[
1
],
input_shape
[
2
],
input_shape
[
3
]
/
num_output
]
elif
input_format
==
'NCHW'
:
return
[
input_shape
[
0
],
input_shape
[
1
]
/
num_output
,
input_shape
[
2
],
input_shape
[
3
]]
return
[
input_shape
[
0
],
input_shape
[
1
]
/
num_output
,
input_shape
[
2
],
input_shape
[
3
]
]
else
:
raise
Exception
(
"format %s is not supported"
%
input_format
)
# outputs' name is [op.name + '_' + #]
class
CaffeConverter
(
object
):
def
__init__
(
self
,
caffe_net
,
weights
,
net_def
,
dt
,
device
,
winograd
):
...
...
@@ -140,9 +157,10 @@ class CaffeConverter(object):
# Construct graph
# Only support single-output layer
# layer with single output often use the same top name.
self
.
ops
.
extend
([
Operator
(
layer
.
name
,
layer
.
type
,
layer
)
for
layer
in
layers
])
self
.
ops
.
extend
(
[
Operator
(
layer
.
name
,
layer
.
type
,
layer
)
for
layer
in
layers
])
self
.
ops_map
=
{
op
.
name
:
op
for
op
in
self
.
ops
}
self
.
ops_map
=
{
op
.
name
:
op
for
op
in
self
.
ops
}
output_op_map
=
{}
for
layer
in
layers
:
op
=
self
.
ops_map
[
layer
.
name
]
...
...
@@ -165,7 +183,6 @@ class CaffeConverter(object):
continue
output_op_map
[
output_name
]
=
op
# Load weights
weights_layers
=
weights
.
layer
for
layer
in
weights_layers
:
...
...
@@ -191,7 +208,7 @@ class CaffeConverter(object):
data_format_arg
.
s
=
'NHWC'
op_def
.
name
=
op
.
name
op_def
.
type
=
mace_type
op_def
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
op_def
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
return
op_def
def
remove_unused_layers
(
self
,
layers
):
...
...
@@ -274,7 +291,7 @@ class CaffeConverter(object):
op_def
.
name
=
name
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
name
+
':0'
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
...
...
@@ -290,7 +307,7 @@ class CaffeConverter(object):
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
epsilon_arg
=
op_def
.
arg
.
add
()
...
...
@@ -315,11 +332,16 @@ class CaffeConverter(object):
def
add_stride_pad_kernel_arg
(
self
,
param
,
op_def
):
try
:
if
len
(
param
.
stride
)
>
1
or
len
(
param
.
kernel_size
)
>
1
or
len
(
param
.
pad
)
>
1
:
raise
Exception
(
'Mace does not support multiple stride/kernel_size/pad'
)
stride
=
[
param
.
stride
[
0
],
param
.
stride
[
0
]]
if
len
(
param
.
stride
)
else
[
1
,
1
]
pad
=
[
param
.
pad
[
0
]
*
2
,
param
.
pad
[
0
]
*
2
]
if
len
(
param
.
pad
)
else
[
0
,
0
]
kernel
=
[
param
.
kernel_size
[
0
],
param
.
kernel_size
[
0
]]
if
len
(
param
.
kernel_size
)
else
[
0
,
0
]
if
len
(
param
.
stride
)
>
1
or
len
(
param
.
kernel_size
)
>
1
or
len
(
param
.
pad
)
>
1
:
raise
Exception
(
'Mace does not support multiple stride/kernel_size/pad'
)
stride
=
[
param
.
stride
[
0
],
param
.
stride
[
0
]]
if
len
(
param
.
stride
)
else
[
1
,
1
]
pad
=
[
param
.
pad
[
0
]
*
2
,
param
.
pad
[
0
]
*
2
]
if
len
(
param
.
pad
)
else
[
0
,
0
]
kernel
=
[
param
.
kernel_size
[
0
],
param
.
kernel_size
[
0
]]
if
len
(
param
.
kernel_size
)
else
[
0
,
0
]
except
TypeError
:
stride
=
[
param
.
stride
,
param
.
stride
]
pad
=
[
param
.
pad
*
2
,
param
.
pad
*
2
]
...
...
@@ -370,8 +392,10 @@ class CaffeConverter(object):
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
if
self
.
device
==
'gpu'
:
buffer_type
=
"DW_CONV2D_FILTER"
if
is_depthwise
else
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
buffer_type
=
"DW_CONV2D_FILTER"
\
if
is_depthwise
else
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
weight_tensor_name
])
...
...
@@ -382,7 +406,8 @@ class CaffeConverter(object):
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
bias_tensor_name
])
...
...
@@ -401,14 +426,15 @@ class CaffeConverter(object):
self
.
resolved_ops
.
add
(
op
.
name
)
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]],
weight_data
.
shape
,
paddings
,
strides
,
dilations
,
math
.
floor
,
input_format
)
output_shape
=
Shapes
.
conv_pool_shape
(
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
,
weight_data
.
shape
,
paddings
,
strides
,
dilations
,
math
.
floor
,
input_format
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
in
activation_name_map
:
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
and
\
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
if
not
is_depthwise
:
op_def
.
type
=
"FusedConv2D"
...
...
@@ -419,7 +445,7 @@ class CaffeConverter(object):
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
op_def
.
output
.
extend
([
final_op
.
name
+
':0'
])
op_def
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
...
...
@@ -443,17 +469,22 @@ class CaffeConverter(object):
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]],
filter_shape
,
paddings
,
strides
,
dilations
,
math
.
floor
,
input_format
)
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
if
self
.
winograd
and
dilations
[
0
]
==
1
and
(
dilations
[
0
]
==
dilations
[
1
])
and
\
filter_shape
,
paddings
,
strides
,
dilations
,
math
.
floor
,
input_format
)
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
if
self
.
winograd
and
dilations
[
0
]
==
1
and
\
(
dilations
[
0
]
==
dilations
[
1
])
and
\
(
strides
[
0
]
==
1
)
and
(
strides
[
0
]
==
strides
[
1
]):
if
self
.
device
==
'gpu'
:
return
filter_shape
[
0
]
==
3
and
(
filter_shape
[
0
]
==
filter_shape
[
1
])
and
\
return
filter_shape
[
0
]
==
3
and
\
(
filter_shape
[
0
]
==
filter_shape
[
1
])
and
\
(
16
*
filter_shape
[
2
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
3
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
width
<
OPENCL_IMAGE_MAX_SIZE
)
elif
self
.
device
==
'neon'
:
return
filter_shape
[
2
]
==
3
and
(
filter_shape
[
2
]
==
filter_shape
[
3
])
return
filter_shape
[
2
]
==
3
and
(
filter_shape
[
2
]
==
filter_shape
[
3
])
return
False
def
convert_winograd_conv
(
self
,
op
):
...
...
@@ -486,16 +517,20 @@ class CaffeConverter(object):
padding_arg
.
ints
.
extend
(
paddings
)
wt_op
.
name
=
op
.
name
+
'_input_transform'
wt_op
.
type
=
'WinogradTransform'
wt_op
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
wt_op
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
wt_output_name
=
wt_op
.
name
+
":0"
wt_op
.
output
.
extend
([
wt_output_name
])
wt_output_shape
=
mace_pb2
.
OutputShape
()
if
self
.
device
!=
'neon'
:
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
else
:
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
2
]
+
1
)
/
2
)
*
((
output_shape
[
3
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
1
],
wt_output_width
,
1
])
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
2
]
+
1
)
/
2
)
*
((
output_shape
[
3
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
1
],
wt_output_width
,
1
])
wt_op
.
output_shape
.
extend
([
wt_output_shape
])
# MatMul
...
...
@@ -510,9 +545,11 @@ class CaffeConverter(object):
matmul_op
.
output
.
extend
([
matmul_output_name
])
matmul_output_shape
=
mace_pb2
.
OutputShape
()
if
self
.
device
!=
'neon'
:
matmul_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
2
],
wt_output_width
,
1
])
matmul_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
2
],
wt_output_width
,
1
])
else
:
matmul_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
0
],
wt_output_width
,
1
])
matmul_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
0
],
wt_output_width
,
1
])
matmul_op
.
output_shape
.
extend
([
matmul_output_shape
])
# Inverse transform
...
...
@@ -525,10 +562,12 @@ class CaffeConverter(object):
batch_arg
.
i
=
output_shape
[
0
]
height_arg
=
iwt_op
.
arg
.
add
()
height_arg
.
name
=
'height'
height_arg
.
i
=
output_shape
[
1
]
if
self
.
device
!=
'neon'
else
output_shape
[
2
]
height_arg
.
i
=
output_shape
[
1
]
if
self
.
device
!=
'neon'
else
output_shape
[
2
]
width_arg
=
iwt_op
.
arg
.
add
()
width_arg
.
name
=
'width'
width_arg
.
i
=
output_shape
[
2
]
if
self
.
device
!=
'neon'
else
output_shape
[
3
]
width_arg
.
i
=
output_shape
[
2
]
if
self
.
device
!=
'neon'
else
output_shape
[
3
]
iwt_op
.
name
=
op
.
name
+
'_inverse_transform'
iwt_op
.
type
=
'WinogradInverseTransform'
iwt_op
.
input
.
extend
([
matmul_output_name
])
...
...
@@ -538,15 +577,17 @@ class CaffeConverter(object):
bias_tensor_name
=
op
.
name
+
'_bias:0'
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
iwt_op
.
input
.
extend
([
output_name
])
final_op
=
op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
op
.
name
)
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
in
activation_name_map
:
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
and
\
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
iwt_op
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
...
...
@@ -555,7 +596,7 @@ class CaffeConverter(object):
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
iwt_op
.
output
.
extend
([
final_op
.
name
+
':0'
])
iwt_op
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
iwt_op
,
output_shape
)
self
.
net_def
.
op
.
extend
([
wt_op
,
matmul_op
,
iwt_op
])
...
...
@@ -577,11 +618,11 @@ class CaffeConverter(object):
if
len
(
scale_op
.
data
)
==
2
:
beta_value
=
scale_op
.
data
[
1
]
scale_value
=
(
(
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
scale_value
=
(
(
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
gamma_value
).
reshape
(
-
1
)
offset_value
=
((
-
mean_value
*
scale_value
)
+
beta_value
).
reshape
(
-
1
)
input_names
=
[
op
.
name
+
'_scale:0'
,
op
.
name
+
'_offset:0'
]
input_names
=
[
op
.
name
+
'_scale:0'
,
op
.
name
+
'_offset:0'
]
self
.
add_tensor
(
input_names
[
0
],
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
offset_value
)
...
...
@@ -596,10 +637,12 @@ class CaffeConverter(object):
self
.
resolved_ops
.
add
(
scale_op
.
name
)
final_op
=
scale_op
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
in
activation_name_map
:
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
...
...
@@ -616,7 +659,8 @@ class CaffeConverter(object):
param
=
op
.
layer
.
inner_product_param
try
:
if
param
.
axis
!=
1
or
param
.
transpose
:
raise
ValueError
(
'Do not support non-default axis and transpose '
raise
ValueError
(
'Do not support non-default axis and transpose '
'case for innner product'
)
except
AttributeError
:
pass
...
...
@@ -626,20 +670,26 @@ class CaffeConverter(object):
if
op
.
data
[
0
].
ndim
not
in
[
2
,
4
]:
raise
ValueError
(
'Unexpected weigth ndim.'
)
if
op
.
data
[
0
].
ndim
==
4
and
list
(
op
.
data
[
0
].
shape
[:
2
])
!=
[
1
,
1
]:
raise
ValueError
(
'Do not support 4D weight with shape [1, 1, *, *]'
)
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
raise
ValueError
(
'Do not support 4D weight with shape [1, 1, *, *]'
)
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
weight_data
=
op
.
data
[
0
].
reshape
(
-
1
,
op
.
data
[
0
].
shape
[
-
1
])
assert
weight_data
.
shape
[
1
]
==
(
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
])
assert
weight_data
.
shape
[
1
]
==
(
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
])
if
self
.
device
!=
'neon'
:
weight_data
=
weight_data
.
reshape
(
-
1
,
input_shape
[
3
],
input_shape
[
1
],
input_shape
[
2
])
weight_data
=
weight_data
.
transpose
((
0
,
2
,
3
,
1
)).
reshape
(
weight_data
.
shape
[
0
],
-
1
)
weight_data
=
weight_data
.
reshape
(
-
1
,
input_shape
[
3
],
input_shape
[
1
],
input_shape
[
2
])
weight_data
=
weight_data
.
transpose
((
0
,
2
,
3
,
1
)).
reshape
(
weight_data
.
shape
[
0
],
-
1
)
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
if
self
.
device
==
'gpu'
:
if
(
weight_data
.
shape
[
0
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
\
and
(
weight_data
.
shape
[
1
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
:
raise
Exception
(
'Mace gpu do not support FC with weight shape: '
+
str
(
weight_data
.
shape
))
if
(
weight_data
.
shape
[
0
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
and
\
(
weight_data
.
shape
[
1
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
:
raise
Exception
(
'Mace gpu do not support FC with weight shape: '
+
str
(
weight_data
.
shape
))
if
input_shape
[
3
]
%
4
==
0
:
buffer_type
=
"WEIGHT_WIDTH"
else
:
...
...
@@ -650,9 +700,11 @@ class CaffeConverter(object):
if
buffer_type
==
"WEIGHT_HEIGHT"
and
\
(
weight_data
.
shape
[
0
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
:
raise
Exception
(
'Mace gpu do not support FC with weight shape: '
+
str
(
weight_data
.
shape
))
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
raise
Exception
(
'Mace gpu do not support FC with weight shape: '
+
str
(
weight_data
.
shape
))
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
weight_tensor_name
])
...
...
@@ -663,18 +715,21 @@ class CaffeConverter(object):
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
bias_tensor_name
])
self
.
resolved_ops
.
add
(
op
.
name
)
output_shape
=
Shapes
.
fully_connected_shape
(
input_shape
,
weight_data
.
shape
)
output_shape
=
Shapes
.
fully_connected_shape
(
input_shape
,
weight_data
.
shape
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
final_op
=
op
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
in
activation_name_map
:
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
...
...
@@ -691,7 +746,8 @@ class CaffeConverter(object):
op_def
=
self
.
CommonConvert
(
op
,
'Pooling'
)
param
=
op
.
layer
.
pooling_param
paddings
,
strides
,
kernels
=
self
.
add_stride_pad_kernel_arg
(
param
,
op_def
)
paddings
,
strides
,
kernels
=
self
.
add_stride_pad_kernel_arg
(
param
,
op_def
)
if
param
.
pool
==
caffe_pb2
.
PoolingParameter
.
MAX
:
pooling_type
=
"MaxPool"
elif
param
.
pool
==
caffe_pb2
.
PoolingParameter
.
AVE
:
...
...
@@ -700,7 +756,8 @@ class CaffeConverter(object):
pooling_type_arg
.
name
=
'pooling_type'
pooling_type_arg
.
i
=
pooling_type_mode
[
pooling_type
]
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
if
param
.
HasField
(
'global_pooling'
)
and
param
.
global_pooling
:
kernels
=
[
input_shape
[
1
],
input_shape
[
2
]]
...
...
@@ -708,12 +765,18 @@ class CaffeConverter(object):
kernel_arg
.
name
=
'kernels'
kernel_arg
.
ints
.
extend
(
kernels
)
filter_shape
=
[
kernels
[
0
],
kernels
[
1
],
input_shape
[
3
],
input_shape
[
3
]]
\
if
self
.
device
!=
'neon'
else
\
[
input_shape
[
1
],
input_shape
[
1
],
kernels
[
0
],
kernels
[
1
]]
if
self
.
device
!=
'neon'
:
filter_shape
=
[
kernels
[
0
],
kernels
[
1
],
input_shape
[
3
],
input_shape
[
3
]
]
else
:
filter_shape
=
[
input_shape
[
1
],
input_shape
[
1
],
kernels
[
0
],
kernels
[
1
]
]
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
input_shape
,
filter_shape
,
paddings
,
strides
,
[
1
,
1
],
math
.
ceil
,
input_format
)
paddings
,
strides
,
[
1
,
1
],
math
.
ceil
,
input_format
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
...
...
@@ -727,7 +790,8 @@ class CaffeConverter(object):
activation_arg
.
name
=
'activation'
activation_arg
.
s
=
activation_name_map
[
op
.
type
]
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
...
...
@@ -742,12 +806,14 @@ class CaffeConverter(object):
alpha_data
=
op
.
data
[
0
].
reshape
(
-
1
)
self
.
add_tensor
(
alpha_tensor_name
,
alpha_data
)
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
alpha_tensor_name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
alpha_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
alpha_tensor_name
])
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
...
...
@@ -777,7 +843,8 @@ class CaffeConverter(object):
input_shapes
=
[]
for
i
in
range
(
len
(
op
.
parents
)):
input_shapes
.
append
(
op
.
parents
[
i
].
output_shape_map
[
op
.
layer
.
bottom
[
i
]])
input_shapes
.
append
(
op
.
parents
[
i
].
output_shape_map
[
op
.
layer
.
bottom
[
i
]])
output_shape
=
Shapes
.
concat_shape
(
input_shapes
,
axis_arg
.
i
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
...
...
@@ -808,7 +875,8 @@ class CaffeConverter(object):
if
op
.
layer
.
HasField
(
'slice_param'
):
param
=
op
.
layer
.
slice_param
if
param
.
HasField
(
'axis'
)
and
param
.
axis
!=
1
:
raise
Exception
(
'Mace do not support slice with axis '
+
str
(
param
.
axis
))
raise
Exception
(
'Mace do not support slice with axis '
+
str
(
param
.
axis
))
if
len
(
param
.
slice_point
)
>
0
:
raise
Exception
(
'Mace do not support slice with slice_point'
)
...
...
@@ -820,11 +888,14 @@ class CaffeConverter(object):
num_outputs
=
len
(
op
.
layer
.
top
)
input_channels
=
input_shape
[
axis_arg
.
i
]
if
(
input_channels
%
num_outputs
)
!=
0
or
\
(
self
.
device
==
'gpu'
and
((
input_channels
/
num_outputs
)
%
4
!=
0
)):
raise
Exception
(
'Mace do not support slice with input shape '
+
str
(
input_shape
)
+
' and number of output '
+
str
(
num_outputs
))
(
self
.
device
==
'gpu'
and
((
input_channels
/
num_outputs
)
%
4
!=
0
)):
raise
Exception
(
'Mace do not support slice with input shape '
+
str
(
input_shape
)
+
' and number of output '
+
str
(
num_outputs
))
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
output_shape
=
Shapes
.
slice_shape
(
input_shape
,
num_outputs
,
input_format
)
output_shape
=
Shapes
.
slice_shape
(
input_shape
,
num_outputs
,
input_format
)
for
i
in
range
(
len
(
op
.
layer
.
top
)):
op
.
output_shape_map
[
op
.
layer
.
top
[
i
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
...
...
@@ -925,7 +996,8 @@ class CaffeConverter(object):
for
i
in
range
(
len
(
input_nodes
)):
input_op
=
self
.
ops_map
[
input_nodes
[
i
]]
input_shape
=
input_shapes
[
i
]
if
self
.
device
!=
'neon'
else
\
[
input_shapes
[
i
][
0
],
input_shapes
[
i
][
3
],
input_shapes
[
i
][
1
],
input_shapes
[
i
][
2
]]
[
input_shapes
[
i
][
0
],
input_shapes
[
i
][
3
],
input_shapes
[
i
][
1
],
input_shapes
[
i
][
2
]]
if
input_op
.
layer
is
not
None
:
input_op
.
output_shape_map
[
input_op
.
layer
.
top
[
0
]]
=
input_shape
else
:
...
...
@@ -938,7 +1010,7 @@ class CaffeConverter(object):
op_def
.
name
=
name
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
name
+
':0'
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
...
...
@@ -954,7 +1026,7 @@ class CaffeConverter(object):
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
dims_arg
=
op_def
.
arg
.
add
()
...
...
@@ -1008,7 +1080,8 @@ class CaffeConverter(object):
elif
op
.
type
in
[
'Softmax'
]:
self
.
convert_normal_op
(
op
)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
if
self
.
device
==
'gpu'
:
self
.
add_output_transform
(
output_nodes
)
...
...
@@ -1024,8 +1097,9 @@ class CaffeConverter(object):
print
'Unresolve Op: %s with type %s'
%
(
op
.
name
,
op
.
type
)
def
convert_to_mace_pb
(
model_file
,
weight_file
,
input_node_str
,
input_shape_str
,
output_node_str
,
data_type
,
device
,
winograd
):
def
convert_to_mace_pb
(
model_file
,
weight_file
,
input_node_str
,
input_shape_str
,
output_node_str
,
data_type
,
device
,
winograd
):
net_def
=
mace_pb2
.
NetDef
()
dt
=
data_type_map
[
data_type
]
...
...
@@ -1046,7 +1120,8 @@ def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str,
output_nodes
=
[
x
for
x
in
output_node_str
.
split
(
','
)]
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
converter
=
CaffeConverter
(
caffe_net
,
weights
,
net_def
,
dt
,
device
,
winograd
)
converter
=
CaffeConverter
(
caffe_net
,
weights
,
net_def
,
dt
,
device
,
winograd
)
converter
.
convert
(
input_nodes
,
input_shapes
,
output_nodes
)
print
"PB Converted."
if
device
==
'gpu'
:
...
...
@@ -1056,4 +1131,3 @@ def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str,
print
"Memory optimization done."
return
net_def
mace/python/tools/convert_util.py
浏览文件 @
58f2516e
...
...
@@ -26,4 +26,3 @@ def tf_dtype_2_mace_dtype(tf_dtype):
if
not
mace_dtype
:
raise
Exception
(
"Not supported tensorflow dtype: "
+
tf_dtype
)
return
mace_dtype
mace/python/tools/converter.py
浏览文件 @
58f2516e
...
...
@@ -4,10 +4,14 @@ import hashlib
import
os.path
from
mace.python.tools
import
source_converter_lib
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb --output quantized_test_dsp.pb --runtime dsp --input_dim input_node,1,28,28,3
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
# --output quantized_test_dsp.pb \
# --runtime dsp \
# --input_dim input_node,1,28,28,3
FLAGS
=
None
def
file_checksum
(
fname
):
hash_func
=
hashlib
.
sha256
()
with
open
(
fname
,
"rb"
)
as
f
:
...
...
@@ -15,6 +19,7 @@ def file_checksum(fname):
hash_func
.
update
(
chunk
)
return
hash_func
.
hexdigest
()
def
main
(
unused_args
):
if
not
os
.
path
.
isfile
(
FLAGS
.
model_file
):
print
(
"Input graph file '"
+
FLAGS
.
model_file
+
"' does not exist!"
)
...
...
@@ -22,17 +27,21 @@ def main(unused_args):
model_checksum
=
file_checksum
(
FLAGS
.
model_file
)
if
FLAGS
.
model_checksum
!=
""
and
FLAGS
.
model_checksum
!=
model_checksum
:
print
(
"Model checksum mismatch: %s != %s"
%
(
model_checksum
,
FLAGS
.
model_checksum
))
print
(
"Model checksum mismatch: %s != %s"
%
(
model_checksum
,
FLAGS
.
model_checksum
))
sys
.
exit
(
-
1
)
if
FLAGS
.
platform
==
'caffe'
:
if
not
os
.
path
.
isfile
(
FLAGS
.
weight_file
):
print
(
"Input weight file '"
+
FLAGS
.
weight_file
+
"' does not exist!"
)
print
(
"Input weight file '"
+
FLAGS
.
weight_file
+
"' does not exist!"
)
sys
.
exit
(
-
1
)
weight_checksum
=
file_checksum
(
FLAGS
.
weight_file
)
if
FLAGS
.
weight_checksum
!=
""
and
FLAGS
.
weight_checksum
!=
weight_checksum
:
print
(
"Weight checksum mismatch: %s != %s"
%
(
weight_checksum
,
FLAGS
.
weight_checksum
))
if
FLAGS
.
weight_checksum
!=
""
and
\
FLAGS
.
weight_checksum
!=
weight_checksum
:
print
(
"Weight checksum mismatch: %s != %s"
%
(
weight_checksum
,
FLAGS
.
weight_checksum
))
sys
.
exit
(
-
1
)
if
FLAGS
.
runtime
==
'dsp'
:
...
...
@@ -41,22 +50,27 @@ def main(unused_args):
from
mace.python.tools
import
caffe_converter_lib
output_graph_def
=
caffe_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
weight_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
FLAGS
.
model_file
,
FLAGS
.
weight_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
elif
FLAGS
.
platform
==
'tensorflow'
:
if
FLAGS
.
runtime
==
'dsp'
:
from
mace.python.tools
import
tf_dsp_converter_lib
output_graph_def
=
tf_dsp_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
output_node
,
FLAGS
.
dsp_mode
)
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
output_node
,
FLAGS
.
dsp_mode
)
else
:
from
mace.python.tools
import
tf_converter_lib
output_graph_def
=
tf_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
if
FLAGS
.
output_type
==
'source'
:
source_converter_lib
.
convert_to_source
(
output_graph_def
,
model_checksum
,
FLAGS
.
template
,
FLAGS
.
obfuscate
,
FLAGS
.
model_tag
,
FLAGS
.
output
,
FLAGS
.
runtime
,
FLAGS
.
embed_model_data
)
source_converter_lib
.
convert_to_source
(
output_graph_def
,
model_checksum
,
FLAGS
.
template
,
FLAGS
.
obfuscate
,
FLAGS
.
model_tag
,
FLAGS
.
output
,
FLAGS
.
runtime
,
FLAGS
.
embed_model_data
)
else
:
with
open
(
FLAGS
.
output
,
"wb"
)
as
f
:
f
.
write
(
output_graph_def
.
SerializeToString
())
...
...
@@ -65,6 +79,7 @@ def main(unused_args):
f
.
write
(
str
(
output_graph_def
))
print
(
"Model conversion is completed."
)
def
str2bool
(
v
):
if
v
.
lower
()
in
(
'yes'
,
'true'
,
't'
,
'y'
,
'1'
):
return
True
...
...
@@ -73,6 +88,7 @@ def str2bool(v):
else
:
raise
argparse
.
ArgumentTypeError
(
'Boolean value expected.'
)
def
parse_args
():
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
...
...
@@ -81,12 +97,10 @@ def parse_args():
"--model_file"
,
type
=
str
,
default
=
""
,
help
=
"TensorFlow
\'
GraphDef
\'
file to load, Caffe prototxt file to load."
)
help
=
"TensorFlow
\'
GraphDef
\'
file to load, "
"Caffe prototxt file to load."
)
parser
.
add_argument
(
"--weight_file"
,
type
=
str
,
default
=
""
,
help
=
"Caffe data file to load."
)
"--weight_file"
,
type
=
str
,
default
=
""
,
help
=
"Caffe data file to load."
)
parser
.
add_argument
(
"--model_checksum"
,
type
=
str
,
...
...
@@ -103,35 +117,23 @@ def parse_args():
default
=
""
,
help
=
"File to save the output graph to."
)
parser
.
add_argument
(
"--runtime"
,
type
=
str
,
default
=
"cpu"
,
help
=
"Runtime: cpu/gpu/dsp"
)
"--runtime"
,
type
=
str
,
default
=
"cpu"
,
help
=
"Runtime: cpu/gpu/dsp"
)
parser
.
add_argument
(
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"e.g., input_node"
)
parser
.
add_argument
(
"--output_node"
,
type
=
str
,
default
=
"softmax"
,
help
=
"e.g., softmax"
)
"--output_node"
,
type
=
str
,
default
=
"softmax"
,
help
=
"e.g., softmax"
)
parser
.
add_argument
(
"--data_type"
,
type
=
str
,
default
=
'DT_FLOAT'
,
help
=
"e.g., DT_HALF/DT_FLOAT"
)
parser
.
add_argument
(
"--output_type"
,
type
=
str
,
default
=
"pb"
,
help
=
"output type: source/pb"
)
"--output_type"
,
type
=
str
,
default
=
"pb"
,
help
=
"output type: source/pb"
)
parser
.
add_argument
(
"--template"
,
type
=
str
,
default
=
""
,
help
=
"template path"
)
"--template"
,
type
=
str
,
default
=
""
,
help
=
"template path"
)
parser
.
add_argument
(
"--obfuscate"
,
type
=
str2bool
,
...
...
@@ -152,25 +154,13 @@ def parse_args():
default
=
False
,
help
=
"open winograd convolution or not"
)
parser
.
add_argument
(
"--dsp_mode"
,
type
=
int
,
default
=
0
,
help
=
"dsp run mode, defalut=0"
)
"--dsp_mode"
,
type
=
int
,
default
=
0
,
help
=
"dsp run mode, defalut=0"
)
parser
.
add_argument
(
"--input_shape"
,
type
=
str
,
default
=
""
,
help
=
"input shape."
)
"--input_shape"
,
type
=
str
,
default
=
""
,
help
=
"input shape."
)
parser
.
add_argument
(
"--platform"
,
type
=
str
,
default
=
"tensorflow"
,
help
=
"tensorflow/caffe"
)
"--platform"
,
type
=
str
,
default
=
"tensorflow"
,
help
=
"tensorflow/caffe"
)
parser
.
add_argument
(
"--embed_model_data"
,
type
=
str2bool
,
default
=
True
,
help
=
"input shape."
)
"--embed_model_data"
,
type
=
str2bool
,
default
=
True
,
help
=
"input shape."
)
return
parser
.
parse_known_args
()
...
...
mace/python/tools/dsp_ops.py
浏览文件 @
58f2516e
class
DspOps
(
object
):
def
__init__
(
self
):
self
.
dsp_ops
=
{
...
...
@@ -18,7 +17,7 @@ class DspOps(object):
'QuantizedAvgPool'
:
'QuantizedAvgPool_8'
,
'QuantizedConcat'
:
'QuantizedConcat_8'
,
'QuantizedBiasAdd'
:
'QuantizedBiasAdd_8p8to32'
,
'QuantizedResizeBilinear'
:
'QuantizedResizeBilinear_8'
,
'QuantizedResizeBilinear'
:
'QuantizedResizeBilinear_8'
,
'QuantizedSpaceToBatchND'
:
'QuantizedSpaceToBatchND_8'
,
'QuantizedBatchToSpaceND'
:
'QuantizedBatchToSpaceND_8'
,
'QuantizedSoftmax'
:
'QuantizedSoftmax_8'
,
...
...
@@ -54,6 +53,7 @@ class DspOps(object):
'Concat'
:
'Concat_f'
,
'AddN'
:
'AddN_f'
,
}
def
has_op
(
self
,
tf_op
):
return
tf_op
in
self
.
dsp_ops
...
...
@@ -61,5 +61,3 @@ class DspOps(object):
if
tf_op
not
in
self
.
dsp_ops
:
raise
Exception
(
'Could not map nn op for: '
,
tf_op
)
return
self
.
dsp_ops
[
tf_op
]
mace/python/tools/encrypt_opencl_codegen.py
浏览文件 @
58f2516e
...
...
@@ -11,10 +11,13 @@ FLAGS = None
encrypt_lookup_table
=
"Xiaomi-AI-Platform-Mace"
def
encrypt_code
(
code_str
):
encrypted_arr
=
[]
for
i
in
range
(
len
(
code_str
)):
encrypted_char
=
hex
(
ord
(
code_str
[
i
])
^
ord
(
encrypt_lookup_table
[
i
%
len
(
encrypt_lookup_table
)]))
encrypted_char
=
hex
(
ord
(
code_str
[
i
])
^
ord
(
encrypt_lookup_table
[
i
%
len
(
encrypt_lookup_table
)]))
encrypted_arr
.
append
(
encrypted_char
)
return
encrypted_arr
...
...
@@ -45,7 +48,8 @@ def main(unused_args):
encrypted_code_maps
[
file_name
[:
-
3
]]
=
encrypted_code_arr
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
cpp_cl_encrypted_kernel
=
env
.
get_template
(
'str2vec_maps.cc.jinja2'
).
render
(
cpp_cl_encrypted_kernel
=
env
.
get_template
(
'str2vec_maps.cc.jinja2'
).
render
(
maps
=
encrypted_code_maps
,
data_type
=
'unsigned char'
,
variable_name
=
'kEncryptedProgramMap'
)
...
...
mace/python/tools/graph_util.py
浏览文件 @
58f2516e
...
...
@@ -2,18 +2,21 @@ import tensorflow as tf
from
mace.proto
import
mace_pb2
from
collections
import
OrderedDict
def
sort_tf_node
(
node
,
nodes_map
,
ordered_nodes_map
):
if
node
.
name
not
in
ordered_nodes_map
:
for
input_tensor_name
in
node
.
input
:
input_node_name
=
input_tensor_name
.
split
(
':'
)[
0
]
if
':'
in
input_tensor_name
else
input_tensor_name
if
input_node_name
not
in
nodes_map
or
input_node_name
in
ordered_nodes_map
:
if
input_node_name
not
in
nodes_map
or
\
input_node_name
in
ordered_nodes_map
:
continue
input_node
=
nodes_map
[
input_node_name
]
sort_tf_node
(
input_node
,
nodes_map
,
ordered_nodes_map
)
ordered_nodes_map
[
node
.
name
]
=
node
def
sort_tf_graph
(
graph_def
):
nodes_map
=
{}
ordered_nodes_map
=
OrderedDict
()
...
...
@@ -31,13 +34,15 @@ def sort_mace_node(node, nodes_map, ordered_nodes_map):
for
input_tensor_name
in
node
.
input
:
input_node_name
=
input_tensor_name
.
split
(
':'
)[
0
]
if
':'
in
input_tensor_name
else
input_tensor_name
if
input_node_name
not
in
nodes_map
or
input_node_name
in
ordered_nodes_map
:
if
input_node_name
not
in
nodes_map
or
\
input_node_name
in
ordered_nodes_map
:
continue
input_node
=
nodes_map
[
input_node_name
]
sort_mace_node
(
input_node
,
nodes_map
,
ordered_nodes_map
)
ordered_nodes_map
[
node
.
name
]
=
node
def
sort_mace_graph
(
graph_def
,
output_name
):
nodes_map
=
{}
ordered_nodes_map
=
OrderedDict
()
...
...
mace/python/tools/memory_optimizer.py
浏览文件 @
58f2516e
...
...
@@ -2,6 +2,7 @@ import sys
import
operator
from
mace.proto
import
mace_pb2
class
MemoryOptimizer
(
object
):
def
__init__
(
self
,
net_def
):
self
.
net_def
=
net_def
...
...
@@ -37,9 +38,9 @@ class MemoryOptimizer(object):
mem_size
=
[
0
,
0
]
if
op_type
==
'WinogradTransform'
or
op_type
==
'MatMul'
:
mem_size
[
0
]
=
output_shape
[
2
]
*
output_shape
[
3
]
mem_size
[
1
]
=
output_shape
[
0
]
*
int
((
output_shape
[
1
]
+
3
)
/
4
)
mem_size
[
1
]
=
output_shape
[
0
]
*
int
((
output_shape
[
1
]
+
3
)
/
4
)
else
:
mem_size
[
0
]
=
output_shape
[
2
]
*
int
((
output_shape
[
3
]
+
3
)
/
4
)
mem_size
[
0
]
=
output_shape
[
2
]
*
int
((
output_shape
[
3
]
+
3
)
/
4
)
mem_size
[
1
]
=
output_shape
[
0
]
*
output_shape
[
1
]
return
mem_size
...
...
@@ -51,13 +52,16 @@ class MemoryOptimizer(object):
if
self
.
is_buffer_image_op
(
op
):
continue
if
not
op
.
output_shape
:
print
(
'WARNING: There is no output shape information to do memory optimization.'
)
print
(
'WARNING: There is no output shape information to '
'do memory optimization.'
)
return
if
len
(
op
.
output_shape
)
!=
len
(
op
.
output
):
print
(
'WARNING: the number of output shape is not equal to the number of output.'
)
print
(
'WARNING: the number of output shape is not equal to '
'the number of output.'
)
return
for
i
in
range
(
len
(
op
.
output
)):
op_mem_size
=
self
.
get_mem_size
(
op
.
type
,
op
.
output_shape
[
i
].
dims
)
op_mem_size
=
self
.
get_mem_size
(
op
.
type
,
op
.
output_shape
[
i
].
dims
)
mem_id
=
-
1
if
len
(
self
.
idle_mem
)
>
0
:
best_mem_candidate_id
=
-
1
...
...
@@ -65,16 +69,22 @@ class MemoryOptimizer(object):
best_mem_candidate_shape
=
[]
for
mid
in
self
.
idle_mem
:
reuse_mem_size
=
self
.
mem_block
[
mid
]
resize_mem_size
=
[
max
(
reuse_mem_size
[
0
],
op_mem_size
[
0
]),
max
(
reuse_mem_size
[
1
],
op_mem_size
[
1
])]
delta_mem_area
=
self
.
mem_area
(
resize_mem_size
)
-
self
.
mem_area
(
reuse_mem_size
)
resize_mem_size
=
[
max
(
reuse_mem_size
[
0
],
op_mem_size
[
0
]),
max
(
reuse_mem_size
[
1
],
op_mem_size
[
1
])
]
delta_mem_area
=
self
.
mem_area
(
resize_mem_size
)
-
self
.
mem_area
(
reuse_mem_size
)
if
delta_mem_area
<
best_mem_candidate_delta_area
:
best_mem_candidate_id
=
mid
best_mem_candidate_delta_area
=
delta_mem_area
best_mem_candidate_shape
=
resize_mem_size
if
best_mem_candidate_delta_area
<=
self
.
mem_area
(
op_mem_size
):
if
best_mem_candidate_delta_area
<=
self
.
mem_area
(
op_mem_size
):
# reuse
self
.
mem_block
[
best_mem_candidate_id
]
=
best_mem_candidate_shape
self
.
mem_block
[
best_mem_candidate_id
]
=
best_mem_candidate_shape
mem_id
=
best_mem_candidate_id
self
.
idle_mem
.
remove
(
mem_id
)
...
...
@@ -113,7 +123,8 @@ class MemoryOptimizer(object):
print
mem
,
self
.
mem_block
[
mem
]
optimized_mem_size
+=
reduce
(
operator
.
mul
,
self
.
mem_block
[
mem
],
4
)
print
(
'origin mem: %d, optimized mem: %d'
,
origin_mem_size
,
optimized_mem_size
)
print
(
'origin mem: %d, optimized mem: %d'
,
origin_mem_size
,
optimized_mem_size
)
def
optimize_memory
(
net_def
):
...
...
mace/python/tools/opencl_codegen.py
浏览文件 @
58f2516e
...
...
@@ -27,37 +27,40 @@ def generate_cpp_source():
binary_array
=
np
.
fromfile
(
f
,
dtype
=
np
.
uint8
)
idx
=
0
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
idx
+=
8
for
_
in
xrange
(
size
):
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
idx
+=
key_size
value_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
value_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
maps
[
key
]
=
[]
value
=
struct
.
unpack
(
str
(
value_size
)
+
"B"
,
binary_array
[
idx
:
idx
+
value_size
])
value
=
struct
.
unpack
(
str
(
value_size
)
+
"B"
,
binary_array
[
idx
:
idx
+
value_size
])
idx
+=
value_size
for
ele
in
value
:
maps
[
key
].
append
(
hex
(
ele
))
cl_platform_info_path
=
os
.
path
.
join
(
binary_dir
,
FLAGS
.
platform_info_file_name
)
cl_platform_info_path
=
os
.
path
.
join
(
binary_dir
,
FLAGS
.
platform_info_file_name
)
with
open
(
cl_platform_info_path
,
'r'
)
as
f
:
curr_platform_info
=
f
.
read
()
if
platform_info
!=
""
:
assert
(
curr_platform_info
==
platform_info
)
assert
(
curr_platform_info
==
platform_info
)
platform_info
=
curr_platform_info
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
return
env
.
get_template
(
'opencl_compiled_kernel.cc.jinja2'
).
render
(
maps
=
maps
,
data_type
=
'unsigned char'
,
variable_name
=
'kCompiledProgramMap'
,
platform_info
=
platform_info
,
maps
=
maps
,
data_type
=
'unsigned char'
,
variable_name
=
'kCompiledProgramMap'
,
platform_info
=
platform_info
,
)
def
main
(
unused_args
):
cpp_cl_binary_source
=
generate_cpp_source
()
...
...
@@ -90,7 +93,7 @@ def parse_args():
"--output_path"
,
type
=
str
,
default
=
"./mace/examples/codegen/opencl/opencl_compiled_program.cc"
,
help
=
"The path of generated C++ header file which contains
cl binaries."
)
help
=
"The path of generated C++ header file for
cl binaries."
)
return
parser
.
parse_known_args
()
...
...
mace/python/tools/source_converter_lib.py
浏览文件 @
58f2516e
...
...
@@ -6,9 +6,9 @@ import hashlib
from
mace.proto
import
mace_pb2
from
jinja2
import
Environment
,
FileSystemLoader
GENERATED_NAME
=
set
()
def
generate_obfuscated_name
(
namespace
,
name
):
md5
=
hashlib
.
md5
()
md5
.
update
(
namespace
)
...
...
@@ -22,31 +22,36 @@ def generate_obfuscated_name(namespace, name):
GENERATED_NAME
.
add
(
name
)
return
name
def
generate_tensor_map
(
tensors
):
tensor_map
=
{}
for
t
in
tensors
:
if
not
tensor_map
.
has_key
(
t
.
name
)
:
if
t
.
name
not
in
tensor_map
:
tensor_map
[
t
.
name
]
=
generate_obfuscated_name
(
"tensor"
,
t
.
name
)
return
tensor_map
def
generate_in_out_map
(
ops
,
tensor_map
):
in_out_map
=
{}
for
op
in
ops
:
op
.
name
=
generate_obfuscated_name
(
"op"
,
op
.
name
)
for
input_name
in
op
.
input
:
if
not
in_out_map
.
has_key
(
input_name
)
:
if
tensor_map
.
has_key
(
input_name
)
:
if
input_name
not
in
in_out_map
:
if
input_name
in
tensor_map
:
in_out_map
[
input_name
]
=
tensor_map
[
input_name
]
else
:
in_out_map
[
input_name
]
=
generate_obfuscated_name
(
"in"
,
input_name
)
in_out_map
[
input_name
]
=
generate_obfuscated_name
(
"in"
,
input_name
)
for
output_name
in
op
.
output
:
if
not
in_out_map
.
has_key
(
output_name
)
:
if
tensor_map
.
has_key
(
output_name
)
:
if
output_name
not
in
in_out_map
:
if
output_name
in
tensor_map
:
in_out_map
[
output_name
]
=
tensor_map
[
output_name
]
else
:
in_out_map
[
output_name
]
=
generate_obfuscated_name
(
"out"
,
output_name
)
in_out_map
[
output_name
]
=
generate_obfuscated_name
(
"out"
,
output_name
)
return
in_out_map
def
obfuscate_name
(
net_def
):
input_node
=
"mace_input_node"
output_node
=
"mace_output_node"
...
...
@@ -63,20 +68,22 @@ def obfuscate_name(net_def):
if
output_node
not
in
op
.
output
[
i
]:
op
.
output
[
i
]
=
in_out_map
[
op
.
output
[
i
]]
def
rename_tensor
(
net_def
):
tensor_map
=
{}
for
t
in
net_def
.
tensors
:
if
not
tensor_map
.
has_key
(
t
.
name
)
:
if
t
.
name
not
in
tensor_map
:
tensor_map
[
t
.
name
]
=
"_"
+
t
.
name
[:
-
2
].
replace
(
"/"
,
"_"
)
t
.
name
=
tensor_map
[
t
.
name
]
for
op
in
net_def
.
op
:
for
i
in
range
(
len
(
op
.
input
)):
if
tensor_map
.
has_key
(
op
.
input
[
i
])
:
if
op
.
input
[
i
]
in
tensor_map
:
op
.
input
[
i
]
=
tensor_map
[
op
.
input
[
i
]]
for
i
in
range
(
len
(
op
.
output
)):
if
tensor_map
.
has_key
(
op
.
output
[
i
])
:
if
op
.
output
[
i
]
in
tensor_map
:
op
.
output
[
i
]
=
tensor_map
[
op
.
output
[
i
]]
class
TensorInfo
:
def
__init__
(
self
,
id
,
t
,
runtime
):
self
.
id
=
id
...
...
@@ -84,19 +91,26 @@ class TensorInfo:
if
t
.
data_type
==
mace_pb2
.
DT_FLOAT
:
if
runtime
==
'gpu'
:
self
.
data_type
=
mace_pb2
.
DT_HALF
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float16
).
tobytes
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float16
).
tobytes
())
else
:
self
.
data_type
=
mace_pb2
.
DT_FLOAT
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float32
).
tobytes
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float32
).
tobytes
())
elif
t
.
data_type
==
mace_pb2
.
DT_INT32
:
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
int32
).
tobytes
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
int32
).
tobytes
())
elif
t
.
data_type
==
mace_pb2
.
DT_UINT8
:
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
uint8
).
tolist
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
uint8
).
tolist
())
def
stringfy
(
value
):
return
', '
.
join
(
'"{0}"'
.
format
(
w
)
for
w
in
value
)
def
convert_to_source
(
net_def
,
mode_pb_checksum
,
template_dir
,
obfuscate
,
model_tag
,
output
,
runtime
,
embed_model_data
):
def
convert_to_source
(
net_def
,
mode_pb_checksum
,
template_dir
,
obfuscate
,
model_tag
,
output
,
runtime
,
embed_model_data
):
if
obfuscate
:
obfuscate_name
(
net_def
)
else
:
...
...
@@ -106,7 +120,8 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
print
template_dir
# Create the jinja2 environment.
j2_env
=
Environment
(
loader
=
FileSystemLoader
(
template_dir
),
trim_blocks
=
True
)
j2_env
=
Environment
(
loader
=
FileSystemLoader
(
template_dir
),
trim_blocks
=
True
)
j2_env
.
filters
[
'stringfy'
]
=
stringfy
output_dir
=
os
.
path
.
dirname
(
output
)
+
'/'
# generate tensor source files
...
...
@@ -122,11 +137,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
model_data
.
extend
(
bytearray
([
0
]
*
padding
))
offset
+=
padding
source
=
j2_env
.
get_template
(
template_name
).
render
(
tensor_info
=
tensor_info
,
tensor
=
t
,
tag
=
model_tag
,
runtime
=
runtime
,
offset
=
offset
,
tensor_info
=
tensor_info
,
tensor
=
t
,
tag
=
model_tag
,
runtime
=
runtime
,
offset
=
offset
,
)
model_data
.
extend
(
tensor_info
.
data
)
offset
+=
len
(
tensor_info
.
data
)
...
...
@@ -137,11 +152,10 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate tensor data
template_name
=
'tensor_data.jinja2'
source
=
j2_env
.
get_template
(
template_name
).
render
(
tag
=
model_tag
,
embed_model_data
=
embed_model_data
,
model_data_size
=
offset
,
model_data
=
model_data
)
tag
=
model_tag
,
embed_model_data
=
embed_model_data
,
model_data_size
=
offset
,
model_data
=
model_data
)
with
open
(
output_dir
+
'tensor_data'
+
'.cc'
,
"wb"
)
as
f
:
f
.
write
(
source
)
if
not
embed_model_data
:
...
...
@@ -155,11 +169,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
op_size
=
len
(
net_def
.
op
)
for
start
in
range
(
0
,
op_size
,
10
):
source
=
j2_env
.
get_template
(
template_name
).
render
(
start
=
start
,
end
=
min
(
start
+
10
,
op_size
),
net
=
net_def
,
tag
=
model_tag
,
runtime
=
runtime
,
start
=
start
,
end
=
min
(
start
+
10
,
op_size
),
net
=
net_def
,
tag
=
model_tag
,
runtime
=
runtime
,
)
with
open
(
output_dir
+
'op'
+
str
(
counter
)
+
'.cc'
,
"wb"
)
as
f
:
f
.
write
(
source
)
...
...
@@ -167,21 +181,21 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate model source files
template_name
=
'model.jinja2'
tensors
=
[
TensorInfo
(
i
,
net_def
.
tensors
[
i
],
runtime
)
for
i
in
range
(
len
(
net_def
.
tensors
))]
tensors
=
[
TensorInfo
(
i
,
net_def
.
tensors
[
i
],
runtime
)
for
i
in
range
(
len
(
net_def
.
tensors
))
]
source
=
j2_env
.
get_template
(
template_name
).
render
(
tensors
=
tensors
,
net
=
net_def
,
tag
=
model_tag
,
runtime
=
runtime
,
model_pb_checksum
=
mode_pb_checksum
)
tensors
=
tensors
,
net
=
net_def
,
tag
=
model_tag
,
runtime
=
runtime
,
model_pb_checksum
=
mode_pb_checksum
)
with
open
(
output
,
"wb"
)
as
f
:
f
.
write
(
source
)
# generate model header file
template_name
=
'model_header.jinja2'
source
=
j2_env
.
get_template
(
template_name
).
render
(
tag
=
model_tag
,
)
source
=
j2_env
.
get_template
(
template_name
).
render
(
tag
=
model_tag
,
)
with
open
(
output_dir
+
model_tag
+
'.h'
,
"wb"
)
as
f
:
f
.
write
(
source
)
mace/python/tools/tf_converter_lib.py
浏览文件 @
58f2516e
...
...
@@ -8,15 +8,8 @@ from mace.python.tools import memory_optimizer
from
tensorflow.core.framework
import
graph_pb2
from
tensorflow.core.framework
import
tensor_shape_pb2
padding_mode
=
{
'VALID'
:
0
,
'SAME'
:
1
,
'FULL'
:
2
}
pooling_type_mode
=
{
'AvgPool'
:
1
,
'MaxPool'
:
2
}
padding_mode
=
{
'VALID'
:
0
,
'SAME'
:
1
,
'FULL'
:
2
}
pooling_type_mode
=
{
'AvgPool'
:
1
,
'MaxPool'
:
2
}
# the order should be the same as
# eltwise type's in mace/kernels/eltwise.h
...
...
@@ -34,25 +27,22 @@ math_type_mode = {
}
buffer_type_map
=
{
'CONV2D_FILTER'
:
0
,
'IN_OUT_CHANNEL'
:
1
,
'ARGUMENT'
:
2
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_WIDTH'
:
4
,
'WINOGRAD_FILTER'
:
5
,
'DW_CONV2D_FILTER'
:
6
,
'CONV2D_FILTER'
:
0
,
'IN_OUT_CHANNEL'
:
1
,
'ARGUMENT'
:
2
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_WIDTH'
:
4
,
'WINOGRAD_FILTER'
:
5
,
'DW_CONV2D_FILTER'
:
6
,
}
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
activation_name_map
=
{
'Relu'
:
'RELU'
,
'Sigmoid'
:
'SIGMOID'
,
'Tanh'
:
'TANH'
,
'Relu6'
:
'RELUX'
'Relu'
:
'RELU'
,
'Sigmoid'
:
'SIGMOID'
,
'Tanh'
:
'TANH'
,
'Relu6'
:
'RELUX'
}
BATCH_NORM_ORDER
=
[
"Add"
,
"Rsqrt"
,
"Mul"
,
"Mul"
,
"Mul"
,
"Sub"
,
"Add"
]
...
...
@@ -62,12 +52,14 @@ MACE_OUTPUT_NODE_NAME = "mace_output_node"
OPENCL_IMAGE_MAX_SIZE
=
16384
def
get_input_tensor
(
op
,
index
):
input_tensor
=
op
.
inputs
[
index
]
if
input_tensor
.
op
.
type
==
'Reshape'
:
input_tensor
=
get_input_tensor
(
input_tensor
.
op
,
0
)
return
input_tensor
class
TFConverter
(
object
):
def
__init__
(
self
,
tf_ops
,
net_def
,
dt
,
device
,
winograd
):
self
.
net_def
=
net_def
...
...
@@ -139,7 +131,7 @@ class TFConverter(object):
op_def
.
name
=
name
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
name
+
':0'
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
...
...
@@ -156,7 +148,7 @@ class TFConverter(object):
op_def
.
name
=
name
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
name
+
':0'
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
...
...
@@ -172,7 +164,7 @@ class TFConverter(object):
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
epsilon_arg
=
op_def
.
arg
.
add
()
...
...
@@ -185,7 +177,7 @@ class TFConverter(object):
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
dims_arg
=
op_def
.
arg
.
add
()
...
...
@@ -237,7 +229,8 @@ class TFConverter(object):
tensor
=
self
.
net_def
.
tensors
.
add
()
tf_tensor
=
op
.
outputs
[
0
].
eval
()
if
output_name
in
self
.
transpose_filter_tensor
:
tf_tensor
=
tf_tensor
.
transpose
(
self
.
transpose_filter_tensor
[
output_name
])
tf_tensor
=
tf_tensor
.
transpose
(
self
.
transpose_filter_tensor
[
output_name
])
if
output_name
in
self
.
reshape_tensor
:
tf_tensor
=
tf_tensor
.
reshape
(
self
.
reshape_tensor
[
output_name
])
tensor
.
name
=
op
.
outputs
[
0
].
name
...
...
@@ -262,9 +255,11 @@ class TFConverter(object):
output_shape
=
op
.
outputs
[
0
].
shape
.
as_list
()
if
len
(
output_shape
)
==
0
or
output_shape
[
0
]
is
None
:
return
False
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
return
self
.
winograd
and
op
.
type
!=
'DepthwiseConv2dNative'
and
self
.
device
==
'gpu'
and
\
filter_shape
[
0
]
==
3
and
(
filter_shape
[
0
]
==
filter_shape
[
1
])
and
\
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
return
self
.
winograd
and
op
.
type
!=
'DepthwiseConv2dNative'
and
\
self
.
device
==
'gpu'
and
filter_shape
[
0
]
==
3
and
\
(
filter_shape
[
0
]
==
filter_shape
[
1
])
and
\
(
strides
[
0
]
==
1
)
and
(
strides
[
0
]
==
strides
[
1
])
and
\
(
16
*
filter_shape
[
2
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
3
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
...
...
@@ -276,7 +271,8 @@ class TFConverter(object):
output_shape
=
op
.
outputs
[
0
].
shape
.
as_list
()
self
.
transpose_filter_tensor
[
filter_tensor
.
name
]
=
(
3
,
2
,
0
,
1
)
filter_name
=
self
.
add_buffer_to_image
(
op
.
inputs
[
1
].
name
,
"WINOGRAD_FILTER"
)
filter_name
=
self
.
add_buffer_to_image
(
op
.
inputs
[
1
].
name
,
"WINOGRAD_FILTER"
)
# Input transform
wt_op
=
mace_pb2
.
OperatorDef
()
...
...
@@ -292,7 +288,8 @@ class TFConverter(object):
wt_output_name
=
wt_op
.
name
+
":0"
wt_op
.
output
.
extend
([
wt_output_name
])
wt_output_shape
=
mace_pb2
.
OutputShape
()
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
2
],
wt_output_width
,
1
])
wt_op
.
output_shape
.
extend
([
wt_output_shape
])
...
...
@@ -307,7 +304,8 @@ class TFConverter(object):
matmul_output_name
=
matmul_op
.
name
+
":0"
matmul_op
.
output
.
extend
([
matmul_output_name
])
matmul_output_shape
=
mace_pb2
.
OutputShape
()
matmul_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
matmul_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
matmul_op
.
output_shape
.
extend
([
matmul_output_shape
])
# Inverse transform
...
...
@@ -331,15 +329,17 @@ class TFConverter(object):
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
if
len
(
self
.
tf_graph
[
op
.
name
]
)
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
iwt_op
.
input
.
extend
([
output_name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
\
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_act_arg
=
iwt_op
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
...
...
@@ -355,7 +355,6 @@ class TFConverter(object):
self
.
add_output_shape
(
final_op
.
outputs
,
iwt_op
)
self
.
net_def
.
op
.
extend
([
wt_op
,
matmul_op
,
iwt_op
])
def
convert_conv2d
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
...
...
@@ -365,20 +364,28 @@ class TFConverter(object):
if
op
.
type
==
'DepthwiseConv2dNative'
:
op_def
.
type
=
'DepthwiseConv2d'
if
self
.
device
==
'neon'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
else
:
op_def
.
type
=
op
.
type
if
self
.
device
==
'neon'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
else
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
buffer_type
=
"DW_CONV2D_FILTER"
if
op_def
.
type
==
'DepthwiseConv2d'
else
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
buffer_type
)
if
op_def
.
type
==
'DepthwiseConv2d'
:
buffer_type
=
"DW_CONV2D_FILTER"
else
:
buffer_type
=
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
i
).
name
for
i
in
range
(
len
(
op
.
inputs
))])
op_def
.
input
.
extend
(
[
get_input_tensor
(
op
,
i
).
name
for
i
in
range
(
len
(
op
.
inputs
))])
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
...
...
@@ -395,18 +402,20 @@ class TFConverter(object):
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
final_op
.
name
,
[]))
==
1
\
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
if
len
(
self
.
tf_graph
.
get
(
final_op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
if
op_def
.
type
==
"Conv2D"
:
op_def
.
type
=
"FusedConv2D"
...
...
@@ -450,17 +459,16 @@ class TFConverter(object):
var_value
=
get_input_tensor
(
op
,
4
).
eval
().
astype
(
np
.
float32
)
epsilon_value
=
op
.
get_attr
(
'epsilon'
)
scale_value
=
(
(
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
gamma_value
)
scale_value
=
((
1.0
/
np
.
vectorize
(
math
.
sqrt
)
(
var_value
+
epsilon_value
))
*
gamma_value
)
offset_value
=
(
-
mean_value
*
scale_value
)
+
beta_value
idx
=
gamma_tensor
.
name
.
rfind
(
'/'
)
name_prefix
=
gamma_tensor
.
name
[:
idx
]
+
'/'
input_names
=
[
name_prefix
+
'scale:0'
,
name_prefix
+
'offset:0'
]
self
.
add_tensor
(
input_names
[
0
],
gamma_value
.
sha
pe
,
gamma_tensor
.
dtype
,
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
gamma_value
.
sha
pe
,
gamma_tensor
.
dtype
,
offset_value
)
input_names
=
[
name_prefix
+
'scale:0'
,
name_prefix
+
'offset:0'
]
self
.
add_tensor
(
input_names
[
0
],
gamma_value
.
shape
,
gamma_tensor
.
dty
pe
,
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
gamma_value
.
shape
,
gamma_tensor
.
dty
pe
,
offset_value
)
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
self
.
device
==
'gpu'
:
...
...
@@ -495,14 +503,15 @@ class TFConverter(object):
bn_ops
=
[]
bn_ops
.
append
(
op
)
for
i
in
range
(
1
,
3
):
if
len
(
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
])
==
1
\
and
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
][
0
].
type
==
BATCH_NORM_ORDER
[
i
]:
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
][
0
])
if
len
(
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
])
==
1
and
\
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
][
0
].
type
==
BATCH_NORM_ORDER
[
i
]:
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
][
0
])
else
:
raise
Exception
(
'Invalid BatchNorm Op'
)
if
len
(
self
.
tf_graph
[
bn_ops
[
2
].
name
])
==
2
\
and
self
.
tf_graph
[
bn_ops
[
2
].
name
][
0
].
type
==
BATCH_NORM_ORDER
[
3
]
\
and
self
.
tf_graph
[
bn_ops
[
2
].
name
][
1
].
type
==
BATCH_NORM_ORDER
[
4
]:
if
len
(
self
.
tf_graph
[
bn_ops
[
2
].
name
])
==
2
and
\
self
.
tf_graph
[
bn_ops
[
2
].
name
][
0
].
type
==
\
BATCH_NORM_ORDER
[
3
]
and
\
self
.
tf_graph
[
bn_ops
[
2
].
name
][
1
].
type
==
BATCH_NORM_ORDER
[
4
]:
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
2
].
name
][
0
])
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
2
].
name
][
1
])
else
:
...
...
@@ -682,7 +691,8 @@ class TFConverter(object):
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'size'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'align_corners'
size_arg
.
i
=
op
.
get_attr
(
'align_corners'
)
...
...
@@ -712,7 +722,7 @@ class TFConverter(object):
else
:
op_def
.
type
=
"CWise"
x_value
=
0
if
len
(
input_tensor1
.
shape
)
==
4
:
if
len
(
input_tensor1
.
shape
)
==
4
:
op_def
.
input
.
extend
([
op
.
inputs
[
1
].
name
])
x_value
=
get_input_tensor
(
op
,
0
).
eval
().
astype
(
np
.
float32
)
else
:
...
...
@@ -752,7 +762,8 @@ class TFConverter(object):
op_def
.
type
=
"BiasAdd"
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
1
).
name
])
...
...
@@ -772,21 +783,24 @@ class TFConverter(object):
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'block_shape'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
=
op_def
.
arg
.
add
()
if
b2s
:
size_arg
.
name
=
'crops'
else
:
size_arg
.
name
=
'paddings'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
)
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
1
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
2
).
name
)
def
is_atrous_conv2d
(
self
,
op
):
return
op
.
type
==
'SpaceToBatchND'
and
\
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Conv2D'
return
op
.
type
==
'SpaceToBatchND'
and
\
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Conv2D'
def
convert_atrous_conv2d
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
...
...
@@ -796,10 +810,12 @@ class TFConverter(object):
conv_op
=
self
.
tf_graph
[
op
.
name
][
0
]
op_def
.
name
=
conv_op
.
name
op_def
.
type
=
conv_op
.
type
self
.
transpose_filter_tensor
[
get_input_tensor
(
conv_op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
self
.
transpose_filter_tensor
[
get_input_tensor
(
conv_op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
conv_op
,
1
).
name
,
"CONV2D_FILTER"
)
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
conv_op
,
1
).
name
,
"CONV2D_FILTER"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
0
).
name
])
...
...
@@ -807,7 +823,8 @@ class TFConverter(object):
dilation_arg
=
op_def
.
arg
.
add
()
dilation_arg
.
name
=
'dilations'
dilation_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
dilation_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_values
=
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
...
...
@@ -831,18 +848,20 @@ class TFConverter(object):
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
resolved_ops
[
conv_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BiasAdd'
:
if
len
(
self
.
tf_graph
[
final_op
.
name
]
)
==
1
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
\
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BatchToSpaceND'
:
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BatchToSpaceND'
:
final_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
self
.
resolved_ops
[
final_op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
final_op
,
1
).
name
)
...
...
@@ -850,8 +869,8 @@ class TFConverter(object):
else
:
raise
Exception
(
'Convert atrous conv error: no BatchToSpaceND op'
)
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
\
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'Relu'
:
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'Relu'
:
relu_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
op_def
.
type
=
"FusedConv2D"
fused_relu_arg
=
op_def
.
arg
.
add
()
...
...
@@ -866,8 +885,10 @@ class TFConverter(object):
def
is_softmax
(
self
,
op
):
return
op
.
type
==
'Softmax'
and
\
len
(
self
.
tf_parents
[
op
.
name
])
==
1
and
self
.
tf_parents
[
op
.
name
][
0
].
type
==
'Reshape'
and
\
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Reshape'
len
(
self
.
tf_parents
[
op
.
name
])
==
1
and
\
self
.
tf_parents
[
op
.
name
][
0
].
type
==
'Reshape'
and
\
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Reshape'
def
convert_softmax
(
self
,
softmax_op
):
op_def
=
self
.
net_def
.
op
.
add
()
...
...
@@ -890,7 +911,8 @@ class TFConverter(object):
children_ops
=
self
.
tf_graph
[
squeeze_op
.
name
]
print
children_ops
if
len
(
children_ops
)
>
1
and
children_ops
[
0
].
type
==
'Shape'
:
self
.
unused_tensor
.
add
(
get_input_tensor
(
children_ops
[
1
],
0
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
children_ops
[
1
],
0
).
name
)
self
.
resolved_ops
[
children_ops
[
1
].
name
]
=
1
else
:
op_def
.
input
.
extend
([
parent_reshape_op
.
inputs
[
0
].
name
])
...
...
@@ -999,11 +1021,13 @@ class TFConverter(object):
self
.
convert_global_avg_pooling
(
op
)
self
.
unused_tensor
.
add
(
op
.
inputs
[
1
].
name
)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
#elif op.type in ['']:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
# elif op.type in ['']:
# self.convert_normal_op(op)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
for
op
in
self
.
tf_ops
:
if
self
.
resolved_ops
[
op
.
name
]
==
1
:
...
...
@@ -1011,7 +1035,8 @@ class TFConverter(object):
elif
op
.
type
==
'Const'
:
self
.
convert_tensor
(
op
)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
if
self
.
device
==
'gpu'
:
self
.
add_gpu_output_transform
(
output_nodes
)
...
...
@@ -1026,6 +1051,7 @@ class TFConverter(object):
if
self
.
resolved_ops
[
key
]
!=
1
:
print
'Unresolve Op: %s'
%
key
class
Optimizer
:
def
__init__
(
self
,
net_def
,
device
):
self
.
net_def
=
net_def
...
...
@@ -1056,14 +1082,17 @@ class Optimizer:
for
op
in
self
.
net_def
.
op
:
if
op
.
name
in
resolved_ops
:
pass
elif
op
.
type
==
'DepthwiseConv2d'
and
len
(
op
.
output
)
==
1
\
and
self
.
mace_graph
[
op
.
output
[
0
]][
0
].
type
==
'FoldedBatchNorm'
:
elif
op
.
type
==
'DepthwiseConv2d'
and
len
(
op
.
output
)
==
1
and
\
self
.
mace_graph
[
op
.
output
[
0
]][
0
].
type
==
'FoldedBatchNorm'
:
depthwise_conv2d_op
=
op
folded_bn_op
=
self
.
mace_graph
[
op
.
output
[
0
]][
0
]
weight_buffer_name
=
self
.
get_buffer_tensor_name
(
depthwise_conv2d_op
.
input
[
1
])
weight_buffer_name
=
self
.
get_buffer_tensor_name
(
depthwise_conv2d_op
.
input
[
1
])
weight_tensor
=
self
.
tensor_map
[
weight_buffer_name
]
scale_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
1
])
offset_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
2
])
scale_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
1
])
offset_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
2
])
scale_tensor
=
self
.
tensor_map
[
scale_buffer_name
]
weight_shape
=
weight_tensor
.
dims
idx
=
0
...
...
@@ -1072,14 +1101,18 @@ class Optimizer:
for
ic
in
range
(
weight_shape
[
1
]):
for
i
in
range
(
weight_shape
[
2
]):
for
j
in
range
(
weight_shape
[
3
]):
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
0
]
+
oc
]
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
0
]
+
oc
]
idx
+=
1
else
:
# HWIO
for
i
in
range
(
weight_shape
[
0
]):
for
j
in
range
(
weight_shape
[
1
]):
for
ic
in
range
(
weight_shape
[
2
]):
for
oc
in
range
(
weight_shape
[
3
]):
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
3
]
+
oc
]
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
3
]
+
oc
]
idx
+=
1
new_tensors
.
append
(
weight_tensor
)
...
...
@@ -1129,6 +1162,7 @@ class Optimizer:
new_net
=
self
.
fold_batch_norm
()
return
new_net
def
add_shape_info
(
input_graph_def
,
input_nodes
,
input_shapes
):
inputs_replaced_graph
=
graph_pb2
.
GraphDef
()
for
node
in
input_graph_def
.
node
:
...
...
@@ -1138,7 +1172,8 @@ def add_shape_info(input_graph_def, input_nodes, input_shapes):
placeholder_node
=
copy
.
deepcopy
(
node
)
placeholder_node
.
attr
.
clear
()
placeholder_node
.
attr
[
'shape'
].
shape
.
dim
.
extend
([
tensor_shape_pb2
.
TensorShapeProto
.
Dim
(
size
=
i
)
for
i
in
input_shape
tensor_shape_pb2
.
TensorShapeProto
.
Dim
(
size
=
i
)
for
i
in
input_shape
])
placeholder_node
.
attr
[
'dtype'
].
CopyFrom
(
node
.
attr
[
'dtype'
])
inputs_replaced_graph
.
node
.
extend
([
placeholder_node
])
...
...
@@ -1147,7 +1182,8 @@ def add_shape_info(input_graph_def, input_nodes, input_shapes):
return
inputs_replaced_graph
def
convert_to_mace_pb
(
model_file
,
input_node
,
input_shape
,
output_node
,
data_type
,
device
,
winograd
):
def
convert_to_mace_pb
(
model_file
,
input_node
,
input_shape
,
output_node
,
data_type
,
device
,
winograd
):
net_def
=
mace_pb2
.
NetDef
()
dt
=
data_type_map
[
data_type
]
...
...
@@ -1165,7 +1201,8 @@ def convert_to_mace_pb(model_file, input_node, input_shape, output_node, data_ty
output_nodes
=
[
x
for
x
in
output_node
.
split
(
','
)]
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
input_graph_def
=
add_shape_info
(
input_graph_def
,
input_nodes
,
input_shapes
)
input_graph_def
=
add_shape_info
(
input_graph_def
,
input_nodes
,
input_shapes
)
with
tf
.
Session
()
as
session
:
with
session
.
graph
.
as_default
()
as
graph
:
tf
.
import_graph_def
(
input_graph_def
,
name
=
""
)
...
...
mace/python/tools/tf_dsp_converter_lib.py
浏览文件 @
58f2516e
...
...
@@ -6,8 +6,10 @@ from dsp_ops import DspOps
from
mace.python.tools
import
graph_util
from
mace.python.tools.convert_util
import
tf_dtype_2_mace_dtype
# converter --input ../libcv/quantized_model.pb --output quantized_model_dsp.pb \
# --runtime dsp --input_node input_node --output_node output_node
# converter --input ../libcv/quantized_model.pb \
# --output quantized_model_dsp.pb \
# --runtime dsp --input_node input_node \
# --output_node output_node
padding_mode
=
{
'NA'
:
0
,
...
...
@@ -18,24 +20,29 @@ padding_mode = {
'SAME_CAFFE'
:
5
}
def
get_tensor_name_from_op
(
op_name
,
port
):
return
op_name
+
':'
+
str
(
port
)
def
get_node_from_map
(
op_map
,
op_or_tensor_name
):
op_name
=
op_or_tensor_name
.
split
(
':'
)[
0
]
return
op_map
[
op_name
]
def
get_op_and_port_from_tensor
(
tensor_name
):
op
,
port
=
tensor_name
.
split
(
':'
)
port
=
int
(
port
)
return
op
,
port
def
max_elem_size
(
tensor
):
if
len
(
tensor
.
shape
.
as_list
())
==
0
:
return
tensor
.
dtype
.
size
else
:
return
reduce
(
mul
,
tensor
.
shape
.
as_list
())
*
tensor
.
dtype
.
size
def
find_dtype
(
tensor_dtype
):
if
tensor_dtype
==
tf
.
float32
:
return
mace_pb2
.
DT_FLOAT
...
...
@@ -46,20 +53,24 @@ def find_dtype(tensor_dtype):
else
:
raise
Exception
(
'Unsupported data type: '
,
tensor_dtype
)
def
has_padding_and_strides
(
op
):
return
'padding'
in
op
.
node_def
.
attr
and
'strides'
in
op
.
node_def
.
attr
def
is_node_flatten_reshape
(
op
):
return
op
.
type
==
'Reshape'
and
len
(
op
.
outputs
[
0
].
shape
)
==
1
def
get_input_tensor
(
op
,
index
):
input_tensor
=
op
.
inputs
[
index
]
if
input_tensor
.
op
.
type
==
'Reshape'
:
input_tensor
=
get_input_tensor
(
input_tensor
.
op
,
0
)
return
input_tensor
def
add_shape_const_node
(
net_def
,
op
,
values
,
name
):
print
(
'Add const node: '
,
op
.
name
+
'/'
+
name
)
print
(
'Add const node: '
,
op
.
name
+
'/'
+
name
)
tensor
=
net_def
.
tensors
.
add
()
node_name
=
op
.
name
+
'/'
+
name
tensor
.
name
=
node_name
+
':0'
...
...
@@ -69,8 +80,8 @@ def add_shape_const_node(net_def, op, values, name):
def
convert_op_outputs
(
mace_op_def
,
tf_op
):
mace_op_def
.
output_type
.
extend
([
tf_dtype_2_mace_dtype
(
output
.
dtype
)
for
output
in
tf_op
.
outputs
])
mace_op_def
.
output_type
.
extend
(
[
tf_dtype_2_mace_dtype
(
output
.
dtype
)
for
output
in
tf_op
.
outputs
])
output_shapes
=
[]
for
output
in
tf_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
...
...
@@ -81,13 +92,13 @@ def convert_op_outputs(mace_op_def, tf_op):
def
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
):
first_op
=
unresolved_ops
[
0
]
print
(
'Op: '
,
first_op
.
name
,
first_op
.
type
,
first_op
.
outputs
[
0
].
shape
)
print
(
'Op: '
,
first_op
.
name
,
first_op
.
type
,
first_op
.
outputs
[
0
].
shape
)
if
first_op
.
name
in
resolved_ops
:
pass
elif
first_op
.
type
==
'Const'
:
print
(
'Add const node: '
,
first_op
.
name
)
print
(
'Add const node: '
,
first_op
.
name
)
tf_tensor
=
first_op
.
outputs
[
0
].
eval
()
tensor
=
net_def
.
tensors
.
add
()
tensor
.
name
=
first_op
.
outputs
[
0
].
name
...
...
@@ -112,8 +123,8 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
if
len
(
first_op
.
outputs
)
>
0
and
first_op
.
type
==
'Dequantize'
\
and
len
(
first_op
.
outputs
[
0
].
consumers
())
>
0
\
and
(
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'SpaceToBatchND'
\
or
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'BatchToSpaceND'
):
and
(
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'SpaceToBatchND'
or
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'BatchToSpaceND'
):
input_tensor
=
first_op
.
inputs
[
0
]
min_tensor
=
first_op
.
inputs
[
1
]
max_tensor
=
first_op
.
inputs
[
2
]
...
...
@@ -133,13 +144,17 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
op_def
.
input
.
append
(
input_tensor
.
name
)
op_def
.
input
.
extend
([
t
.
name
for
t
in
s2b_op
.
inputs
[
1
:]])
op_def
.
input
.
extend
([
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
quantize_op
.
outputs
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
quantize_op
.
outputs
])
convert_op_outputs
(
op_def
,
quantize_op
)
elif
len
(
first_op
.
outputs
)
>
0
and
first_op
.
type
==
'QuantizedReshape'
\
and
len
(
first_op
.
outputs
[
0
].
consumers
())
>
0
\
and
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'Dequantize'
\
and
len
(
first_op
.
outputs
[
0
].
consumers
()[
0
].
outputs
[
0
].
consumers
())
>
0
\
and
first_op
.
outputs
[
0
].
consumers
()[
0
].
outputs
[
0
].
consumers
()[
0
].
type
==
'Softmax'
:
elif
len
(
first_op
.
outputs
)
>
0
and
\
first_op
.
type
==
'QuantizedReshape'
and
\
len
(
first_op
.
outputs
[
0
].
consumers
())
>
0
and
\
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'Dequantize'
and
\
len
(
first_op
.
outputs
[
0
].
consumers
()[
0
].
outputs
[
0
].
consumers
())
\
>
0
and
\
first_op
.
outputs
[
0
].
consumers
()[
0
].
outputs
[
0
].
consumers
()[
0
].
type
\
==
'Softmax'
:
input_tensor
=
first_op
.
inputs
[
0
]
min_tensor
=
first_op
.
inputs
[
2
]
max_tensor
=
first_op
.
inputs
[
3
]
...
...
@@ -161,12 +176,14 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
op_def
.
name
=
quantize_reshape_op
.
name
op_def
.
type
=
dsp_ops
.
map_nn_op
(
'QuantizedSoftmax'
)
op_def
.
input
.
extend
([
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
quantize_reshape_op
.
outputs
])
op_def
.
input
.
extend
(
[
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
quantize_reshape_op
.
outputs
])
convert_op_outputs
(
op_def
,
quantize_reshape_op
)
elif
len
(
first_op
.
outputs
)
>
0
and
first_op
.
type
==
'Dequantize'
\
and
len
(
first_op
.
outputs
[
0
].
consumers
())
>
0
\
and
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'Tanh'
:
elif
len
(
first_op
.
outputs
)
>
0
and
first_op
.
type
==
'Dequantize'
and
\
len
(
first_op
.
outputs
[
0
].
consumers
())
>
0
and
\
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'Tanh'
:
input_tensor
=
first_op
.
inputs
[
0
]
min_tensor
=
first_op
.
inputs
[
1
]
max_tensor
=
first_op
.
inputs
[
2
]
...
...
@@ -186,18 +203,24 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
op_def
.
name
=
quantize_op
.
name
op_def
.
type
=
dsp_ops
.
map_nn_op
(
'Quantized'
+
tanh_op
.
type
)
op_def
.
input
.
extend
([
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
quantize_op
.
outputs
])
op_def
.
input
.
extend
(
[
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
quantize_op
.
outputs
])
convert_op_outputs
(
op_def
,
quantize_op
)
# tanh is last op
else
:
op_def
.
name
=
tanh_op
.
name
+
'/QuantizedTanh'
op_def
.
type
=
dsp_ops
.
map_nn_op
(
'Quantized'
+
tanh_op
.
type
)
op_def
.
input
.
extend
([
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
input_tensor
),
op_def
.
input
.
extend
(
[
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
input_tensor
),
max_elem_size
(
min_tensor
),
max_elem_size
(
max_tensor
)])
op_def
.
output_type
.
extend
([
mace_pb2
.
DT_UINT8
,
mace_pb2
.
DT_FLOAT
,
mace_pb2
.
DT_FLOAT
])
max_elem_size
(
max_tensor
)
])
op_def
.
output_type
.
extend
(
[
mace_pb2
.
DT_UINT8
,
mace_pb2
.
DT_FLOAT
,
mace_pb2
.
DT_FLOAT
])
output_shapes
=
[]
for
output
in
first_op
.
inputs
:
output_shape
=
mace_pb2
.
OutputShape
()
...
...
@@ -208,31 +231,39 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
new_tanh_op_def
=
net_def
.
op
.
add
()
new_tanh_op_def
.
name
=
tanh_op
.
name
new_tanh_op_def
.
type
=
dsp_ops
.
map_nn_op
(
'Dequantize'
)
new_tanh_op_def
.
input
.
extend
([
get_tensor_name_from_op
(
op_def
.
name
,
0
),
new_tanh_op_def
.
input
.
extend
([
get_tensor_name_from_op
(
op_def
.
name
,
0
),
get_tensor_name_from_op
(
op_def
.
name
,
1
),
get_tensor_name_from_op
(
op_def
.
name
,
2
)])
new_tanh_op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
tanh_op
.
outputs
[
0
])])
get_tensor_name_from_op
(
op_def
.
name
,
2
)
])
new_tanh_op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
tanh_op
.
outputs
[
0
])])
convert_op_outputs
(
new_tanh_op_def
,
tanh_op
)
elif
has_padding_and_strides
(
first_op
):
op_def
.
padding
=
padding_mode
[
first_op
.
get_attr
(
'padding'
)]
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
if
'ksize'
in
first_op
.
node_def
.
attr
:
ksize
=
first_op
.
get_attr
(
'ksize'
)
ksize_tensor
=
add_shape_const_node
(
net_def
,
first_op
,
ksize
,
'ksize'
)
ksize_tensor
=
add_shape_const_node
(
net_def
,
first_op
,
ksize
,
'ksize'
)
op_def
.
input
.
extend
([
ksize_tensor
])
strides
=
first_op
.
get_attr
(
'strides'
)
strides_tensor
=
add_shape_const_node
(
net_def
,
first_op
,
strides
,
'strides'
)
strides_tensor
=
add_shape_const_node
(
net_def
,
first_op
,
strides
,
'strides'
)
op_def
.
input
.
extend
([
strides_tensor
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
convert_op_outputs
(
op_def
,
first_op
)
elif
is_node_flatten_reshape
(
first_op
):
op_def
.
type
=
'Flatten'
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
convert_op_outputs
(
op_def
,
first_op
)
elif
dsp_ops
.
has_op
(
first_op
.
type
):
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
convert_op_outputs
(
op_def
,
first_op
)
else
:
raise
Exception
(
'Unsupported op: '
,
first_op
)
...
...
@@ -241,12 +272,14 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
del
unresolved_ops
[
0
]
def
add_output_node
(
net_def
,
output_node
):
op_def
=
net_def
.
op
.
add
()
op_def
.
name
=
'__output__'
op_def
.
type
=
'OUTPUT'
op_def
.
input
.
extend
([
get_tensor_name_from_op
(
output_node
,
0
)])
def
reverse_batch_to_space_and_biasadd
(
net_def
):
tensor_map
=
{}
for
tensor
in
net_def
.
tensors
:
...
...
@@ -272,42 +305,65 @@ def reverse_batch_to_space_and_biasadd(net_def):
success
=
False
if
op
.
type
==
'Requantize_32to8'
:
biasadd_requantize_op
=
op
biasadd_op
=
get_node_from_map
(
op_map
,
biasadd_requantize_op
.
input
[
0
])
biasadd_op
=
get_node_from_map
(
op_map
,
biasadd_requantize_op
.
input
[
0
])
if
biasadd_op
.
type
==
'QuantizedBiasAdd_8p8to32'
:
b2s_op
=
get_node_from_map
(
op_map
,
biasadd_op
.
input
[
0
])
if
b2s_op
.
type
==
'QuantizedBatchToSpaceND_8'
:
conv_requantize_op
=
get_node_from_map
(
op_map
,
b2s_op
.
input
[
0
])
conv_op
=
get_node_from_map
(
op_map
,
conv_requantize_op
.
input
[
0
])
conv_requantize_op
=
get_node_from_map
(
op_map
,
b2s_op
.
input
[
0
])
conv_op
=
get_node_from_map
(
op_map
,
conv_requantize_op
.
input
[
0
])
if
conv_op
.
type
==
'QuantizedConv2d_8x8to32'
:
new_biasadd_op
=
mace_pb2
.
OperatorDef
()
new_biasadd_op
.
CopyFrom
(
biasadd_op
)
new_biasadd_op
.
input
[
0
]
=
get_tensor_name_from_op
(
conv_requantize_op
.
name
,
0
)
new_biasadd_op
.
input
[
2
]
=
get_tensor_name_from_op
(
conv_requantize_op
.
name
,
1
)
new_biasadd_op
.
input
[
3
]
=
get_tensor_name_from_op
(
conv_requantize_op
.
name
,
2
)
new_biasadd_op
.
out_max_byte_size
[
0
]
=
conv_requantize_op
.
out_max_byte_size
[
0
]
*
4
new_biasadd_op
.
input
[
0
]
=
get_tensor_name_from_op
(
conv_requantize_op
.
name
,
0
)
new_biasadd_op
.
input
[
2
]
=
get_tensor_name_from_op
(
conv_requantize_op
.
name
,
1
)
new_biasadd_op
.
input
[
3
]
=
get_tensor_name_from_op
(
conv_requantize_op
.
name
,
2
)
new_biasadd_op
.
out_max_byte_size
[
0
]
=
conv_requantize_op
.
out_max_byte_size
[
0
]
*
4
new_biasadd_requantize_op
=
mace_pb2
.
OperatorDef
()
new_biasadd_requantize_op
.
CopyFrom
(
biasadd_requantize_op
)
new_biasadd_requantize_op
.
out_max_byte_size
[
0
]
=
new_biasadd_op
.
out_max_byte_size
[
0
]
/
4
new_biasadd_requantize_op
.
CopyFrom
(
biasadd_requantize_op
)
new_biasadd_requantize_op
.
out_max_byte_size
[
0
]
=
new_biasadd_op
.
out_max_byte_size
[
0
]
/
4
new_b2s_op
=
mace_pb2
.
OperatorDef
()
new_b2s_op
.
CopyFrom
(
b2s_op
)
new_b2s_op
.
input
[
0
]
=
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
0
)
new_b2s_op
.
input
[
3
]
=
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
1
)
new_b2s_op
.
input
[
4
]
=
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
2
)
new_ops
.
extend
([
new_biasadd_op
,
new_biasadd_requantize_op
,
new_b2s_op
])
skip_ops
=
skip_ops
.
union
([
biasadd_op
.
name
,
biasadd_requantize_op
.
name
,
b2s_op
.
name
])
new_b2s_op
.
input
[
0
]
=
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
0
)
new_b2s_op
.
input
[
3
]
=
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
1
)
new_b2s_op
.
input
[
4
]
=
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
2
)
new_ops
.
extend
([
new_biasadd_op
,
new_biasadd_requantize_op
,
new_b2s_op
])
skip_ops
=
skip_ops
.
union
([
biasadd_op
.
name
,
biasadd_requantize_op
.
name
,
b2s_op
.
name
])
visited_ops
.
add
(
op
.
name
)
follow_ops
=
consumers
[
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
0
)]
follow_ops
=
consumers
[
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
0
)]
for
follow_op
in
follow_ops
:
new_follow_op
=
mace_pb2
.
OperatorDef
()
new_follow_op
.
CopyFrom
(
follow_op
)
for
i
in
xrange
(
len
(
follow_op
.
input
)):
for
k
in
xrange
(
3
):
if
new_follow_op
.
input
[
i
]
==
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
k
):
new_follow_op
.
input
[
i
]
=
get_tensor_name_from_op
(
b2s_op
.
name
,
k
)
if
new_follow_op
.
input
[
i
]
==
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
k
):
new_follow_op
.
input
[
i
]
=
get_tensor_name_from_op
(
b2s_op
.
name
,
k
)
new_ops
.
append
(
new_follow_op
)
skip_ops
.
add
(
follow_op
.
name
)
visited_ops
.
add
(
follow_op
.
name
)
...
...
@@ -321,6 +377,7 @@ def reverse_batch_to_space_and_biasadd(net_def):
return
new_net_def
def
add_node_id
(
net_def
):
node_id_counter
=
0
node_id_map
=
{}
...
...
@@ -343,9 +400,12 @@ def add_node_id(net_def):
return
net_def
def
add_input_output_info
(
net_def
,
input_node
,
output_node
,
graph
,
dtype
):
input_tensor
=
graph
.
get_tensor_by_name
(
get_tensor_name_from_op
(
input_node
,
0
))
output_tensor
=
graph
.
get_tensor_by_name
(
get_tensor_name_from_op
(
output_node
,
0
))
input_tensor
=
graph
.
get_tensor_by_name
(
get_tensor_name_from_op
(
input_node
,
0
))
output_tensor
=
graph
.
get_tensor_by_name
(
get_tensor_name_from_op
(
output_node
,
0
))
input_info
=
net_def
.
input_info
.
add
()
input_info
.
dims
.
extend
(
input_tensor
.
shape
.
as_list
())
...
...
@@ -353,7 +413,7 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype):
if
dtype
==
mace_pb2
.
DT_UINT8
:
for
i
in
xrange
(
2
):
input_info
=
net_def
.
input_info
.
add
()
input_info
.
dims
.
extend
([
1
,
1
,
1
,
1
])
input_info
.
dims
.
extend
([
1
,
1
,
1
,
1
])
input_info
.
data_type
=
mace_pb2
.
DT_FLOAT
output_info
=
net_def
.
output_info
.
add
()
...
...
@@ -362,11 +422,12 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype):
if
dtype
==
mace_pb2
.
DT_UINT8
:
for
i
in
xrange
(
2
):
output_info
=
net_def
.
output_info
.
add
()
output_info
.
dims
.
extend
([
1
,
1
,
1
,
1
])
output_info
.
dims
.
extend
([
1
,
1
,
1
,
1
])
output_info
.
data_type
=
mace_pb2
.
DT_FLOAT
return
net_def
def
fuse_quantize
(
net_def
,
input_node
,
output_node
):
tensor_map
=
{}
for
tensor
in
net_def
.
tensors
:
...
...
@@ -397,18 +458,24 @@ def fuse_quantize(net_def, input_node, output_node):
elif
o
.
type
==
'Quantize'
:
quantize_op
=
o
if
quantize_op
is
not
None
:
minf_op
,
maxf_op
=
consumers
[
get_tensor_name_from_op
(
flatten_op
.
name
,
0
)]
skip_ops
=
skip_ops
.
union
([
flatten_op
.
name
,
minf_op
.
name
,
maxf_op
.
name
])
skip_tensors
=
skip_tensors
.
union
([
flatten_op
.
input
[
1
],
minf_op
.
input
[
1
],
maxf_op
.
input
[
1
]])
minf_op
,
maxf_op
=
consumers
[
get_tensor_name_from_op
(
flatten_op
.
name
,
0
)]
skip_ops
=
skip_ops
.
union
(
[
flatten_op
.
name
,
minf_op
.
name
,
maxf_op
.
name
])
skip_tensors
=
skip_tensors
.
union
(
[
flatten_op
.
input
[
1
],
minf_op
.
input
[
1
],
maxf_op
.
input
[
1
]])
quantize_op
.
type
=
'AutoQuantize'
del
quantize_op
.
input
[
1
:]
new_net_def
=
mace_pb2
.
NetDef
()
new_net_def
.
tensors
.
extend
([
tensor
for
tensor
in
net_def
.
tensors
if
tensor
.
name
not
in
skip_tensors
])
new_net_def
.
tensors
.
extend
([
tensor
for
tensor
in
net_def
.
tensors
if
tensor
.
name
not
in
skip_tensors
])
new_net_def
.
op
.
extend
([
op
for
op
in
net_def
.
op
if
op
.
name
not
in
skip_ops
])
new_net_def
.
op
.
extend
(
new_ops
)
return
new_net_def
def
convert_to_mace_pb
(
model_file
,
input_node
,
output_node
,
dsp_mode
):
"""
nnlib does not have batch norm, so use tensorflow optimizer to fold
...
...
@@ -432,12 +499,14 @@ def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode):
# convert const node
unresolved_ops
=
[
op
for
op
in
ops
if
op
.
type
==
'Const'
]
while
len
(
unresolved_ops
)
>
0
:
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
)
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
)
# convert op node
unresolved_ops
=
[
op
for
op
in
ops
if
op
.
type
!=
'Const'
]
while
len
(
unresolved_ops
)
>
0
:
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
)
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
)
add_output_node
(
net_def
,
output_node
)
net_def
=
reverse_batch_to_space_and_biasadd
(
net_def
)
...
...
@@ -447,11 +516,11 @@ def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode):
net_def_with_node_id
=
add_node_id
(
sorted_net_def
)
dtype
=
mace_pb2
.
DT_FLOAT
final_net_def
=
add_input_output_info
(
net_def_with_node_id
,
input_node
,
output_node
,
graph
,
dtype
)
final_net_def
=
add_input_output_info
(
net_def_with_node_id
,
input_node
,
output_node
,
graph
,
dtype
)
arg
=
final_net_def
.
arg
.
add
()
arg
.
name
=
'dsp_mode'
arg
.
i
=
dsp_mode
return
final_net_def
mace/python/tools/tf_ops_stats.py
浏览文件 @
58f2516e
...
...
@@ -10,18 +10,21 @@ from tensorflow import gfile
FLAGS
=
None
def
hist_inc
(
hist
,
key
):
if
key
in
hist
:
hist
[
key
]
+=
1
else
:
hist
[
key
]
=
1
def
to_int_list
(
long_list
):
int_list
=
[]
for
value
in
long_list
:
int_list
.
append
(
int
(
value
))
return
int_list
def
main
(
unused_args
):
if
not
FLAGS
.
input
or
not
gfile
.
Exists
(
FLAGS
.
input
):
print
(
'Input graph file '
+
FLAGS
.
input
+
' does not exist!'
)
...
...
@@ -49,7 +52,9 @@ def main(unused_args):
tensor
=
output
.
eval
()
tensor_shape
=
list
(
tensor
.
shape
)
tensor_shapes
[
tensor_name
]
=
tensor_shape
print
(
"Const %s: %s, %d"
%
(
tensor_name
,
tensor_shape
,
functools
.
reduce
(
operator
.
mul
,
tensor_shape
,
1
)))
print
(
"Const %s: %s, %d"
%
(
tensor_name
,
tensor_shape
,
functools
.
reduce
(
operator
.
mul
,
tensor_shape
,
1
)))
if
len
(
tensor_shape
)
==
1
and
tensor_shape
[
0
]
<
10
:
tensor_values
[
tensor_name
]
=
list
(
tensor
)
...
...
@@ -65,11 +70,16 @@ def main(unused_args):
if
input_name
.
endswith
(
'weights/read:0'
):
ksize
=
input
.
shape
.
as_list
()
break
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
ksize
=
tensor_shapes
[
input_name
]
break
print
(
'%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
,
op
.
inputs
[
0
].
shape
,
op
.
outputs
[
0
].
shape
))
key
=
'%s(padding=%s, strides=%s, ksize=%s, format=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
)
print
(
'%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
,
op
.
inputs
[
0
].
shape
,
op
.
outputs
[
0
].
shape
))
key
=
'%s(padding=%s, strides=%s, ksize=%s, format=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'FusedResizeAndPadConv2D'
]:
padding
=
op
.
get_attr
(
'padding'
)
...
...
@@ -78,20 +88,25 @@ def main(unused_args):
ksize
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
ksize
=
tensor_shapes
[
input_name
]
break
key
=
'%s(padding=%s, strides=%s, ksize=%s, resize_align_corners=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
resize_align_corners
)
key
=
'%s(padding=%s, strides=%s, ksize=%s, '
\
'resize_align_corners=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
resize_align_corners
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'ResizeBilinear'
]:
align_corners
=
op
.
get_attr
(
'align_corners'
)
size
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'size:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'size:0'
)
and
input_name
in
tensor_values
:
size
=
tensor_values
[
input_name
]
break
key
=
'%s(size=%s, align_corners=%s)'
%
(
op
.
type
,
size
,
align_corners
)
key
=
'%s(size=%s, align_corners=%s)'
%
(
op
.
type
,
size
,
align_corners
)
print
(
key
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'AvgPool'
,
'MaxPool'
]:
...
...
@@ -99,38 +114,47 @@ def main(unused_args):
strides
=
to_int_list
(
op
.
get_attr
(
'strides'
))
ksize
=
to_int_list
(
op
.
get_attr
(
'ksize'
))
data_format
=
op
.
get_attr
(
'data_format'
)
key
=
'%s(padding=%s, strides=%s, ksize=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
)
key
=
'%s(padding=%s, strides=%s, ksize=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'SpaceToBatchND'
,
'BatchToSpaceND'
]:
block_shape
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'block_shape:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'block_shape:0'
)
and
input_name
in
tensor_values
:
block_shape
=
tensor_values
[
input_name
]
break
paddings
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
paddings
=
tensor_values
[
input_name
]
break
crops
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'crops:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'crops:0'
)
and
input_name
in
tensor_values
:
paddings
=
tensor_values
[
input_name
]
break
if
op
.
type
==
'SpaceToBatchND'
:
key
=
'%s(block_shape=%s, paddings=%s)'
%
(
op
.
type
,
block_shape
,
paddings
)
key
=
'%s(block_shape=%s, paddings=%s)'
%
(
op
.
type
,
block_shape
,
paddings
)
else
:
key
=
'%s(block_shape=%s, crops=%s)'
%
(
op
.
type
,
block_shape
,
crops
)
key
=
'%s(block_shape=%s, crops=%s)'
%
(
op
.
type
,
block_shape
,
crops
)
print
(
key
)
hist_inc
(
stats
,
key
)
elif
op
.
type
==
'Pad'
:
paddings
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
paddings
=
tensor_values
[
input_name
]
break
key
=
'%s(paddings=%s)'
%
(
op
.
type
,
paddings
)
...
...
@@ -142,6 +166,7 @@ def main(unused_args):
for
key
,
value
in
sorted
(
six
.
iteritems
(
stats
)):
print
(
'%s: %d'
%
(
key
,
value
))
def
parse_args
():
'''Parses command line arguments.'''
parser
=
argparse
.
ArgumentParser
()
...
...
@@ -152,6 +177,7 @@ def parse_args():
help
=
'TensorFlow
\'
GraphDef
\'
file to load.'
)
return
parser
.
parse_known_args
()
if
__name__
==
'__main__'
:
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/bazel_adb_run.py
浏览文件 @
58f2516e
...
...
@@ -7,7 +7,6 @@
# --target=//mace/ops:ops_test
# --stdout_processor=stdout_processor
import
argparse
import
random
import
re
...
...
@@ -15,15 +14,18 @@ import sys
import
sh_commands
def
stdout_processor
(
stdout
,
device_properties
,
abi
):
pass
def
ops_test_stdout_processor
(
stdout
,
device_properties
,
abi
):
stdout_lines
=
stdout
.
split
(
"
\n
"
)
for
line
in
stdout_lines
:
if
"Aborted"
in
line
or
"FAILED"
in
line
:
raise
Exception
(
"Command failed"
)
def
ops_benchmark_stdout_processor
(
stdout
,
device_properties
,
abi
):
stdout_lines
=
stdout
.
split
(
"
\n
"
)
metrics
=
{}
...
...
@@ -33,17 +35,20 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
line
=
line
.
strip
()
parts
=
line
.
split
()
if
len
(
parts
)
==
5
and
parts
[
0
].
startswith
(
"BM_"
):
metrics
[
"%s.time_ms"
%
parts
[
0
]]
=
str
(
float
(
parts
[
1
])
/
1e6
)
metrics
[
"%s.time_ms"
%
parts
[
0
]]
=
str
(
float
(
parts
[
1
])
/
1e6
)
metrics
[
"%s.input_mb_per_sec"
%
parts
[
0
]]
=
parts
[
3
]
metrics
[
"%s.gmacc_per_sec"
%
parts
[
0
]]
=
parts
[
4
]
platform
=
device_properties
[
"ro.board.platform"
].
replace
(
" "
,
"-"
)
model
=
device_properties
[
"ro.product.model"
].
replace
(
" "
,
"-"
)
tags
=
{
"ro.board.platform"
:
platform
,
tags
=
{
"ro.board.platform"
:
platform
,
"ro.product.model"
:
model
,
"abi"
:
abi
}
sh_commands
.
falcon_push_metrics
(
metrics
,
tags
=
tags
,
endpoint
=
"mace_ops_benchmark"
)
"abi"
:
abi
}
sh_commands
.
falcon_push_metrics
(
metrics
,
tags
=
tags
,
endpoint
=
"mace_ops_benchmark"
)
def
parse_args
():
"""Parses command line arguments."""
...
...
@@ -57,22 +62,16 @@ def parse_args():
"--target_socs"
,
type
=
str
,
default
=
"all"
,
help
=
"SoCs(ro.board.platform) to build, comma seperated list or all/random"
)
help
=
"SoCs (ro.board.platform from getprop) to build, "
"comma seperated list or all/random"
)
parser
.
add_argument
(
"--target"
,
type
=
str
,
default
=
"//..."
,
help
=
"Bazel target to build"
)
"--target"
,
type
=
str
,
default
=
"//..."
,
help
=
"Bazel target to build"
)
parser
.
add_argument
(
"--run_target"
,
type
=
bool
,
default
=
False
,
help
=
"Whether to run the target"
)
parser
.
add_argument
(
"--args"
,
type
=
str
,
default
=
""
,
help
=
"Command args"
)
parser
.
add_argument
(
"--args"
,
type
=
str
,
default
=
""
,
help
=
"Command args"
)
parser
.
add_argument
(
"--stdout_processor"
,
type
=
str
,
...
...
@@ -80,6 +79,7 @@ def parse_args():
help
=
"Stdout processing function, default: stdout_processor"
)
return
parser
.
parse_known_args
()
def
main
(
unused_args
):
target_socs
=
None
if
FLAGS
.
target_socs
!=
"all"
and
FLAGS
.
target_socs
!=
"random"
:
...
...
@@ -101,17 +101,25 @@ def main(unused_args):
sh_commands
.
bazel_build
(
target
,
abi
=
target_abi
)
if
FLAGS
.
run_target
:
for
serialno
in
target_devices
:
if
target_abi
not
in
set
(
sh_commands
.
adb_supported_abis
(
serialno
)):
print
(
"Skip device %s which does not support ABI %s"
%
(
serialno
,
target_abi
))
if
target_abi
not
in
set
(
sh_commands
.
adb_supported_abis
(
serialno
)):
print
(
"Skip device %s which does not support ABI %s"
%
(
serialno
,
target_abi
))
continue
stdouts
=
sh_commands
.
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
stdouts
=
sh_commands
.
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
args
=
FLAGS
.
args
,
opencl_profiling
=
1
,
vlog_level
=
0
,
device_bin_path
=
"/data/local/tmp/mace"
,
out_of_range_check
=
1
)
device_properties
=
sh_commands
.
adb_getprop_by_serialno
(
serialno
)
globals
()[
FLAGS
.
stdout_processor
](
stdouts
,
device_properties
,
target_abi
)
device_properties
=
sh_commands
.
adb_getprop_by_serialno
(
serialno
)
globals
()[
FLAGS
.
stdout_processor
](
stdouts
,
device_properties
,
target_abi
)
if
__name__
==
"__main__"
:
FLAGS
,
unparsed
=
parse_args
()
...
...
tools/falcon_cli.py
浏览文件 @
58f2516e
#-*- coding:utf8 -*-
import
json
import
socket
import
itertools
import
json
,
socket
,
itertools
class
FalconCli
(
object
):
def
__init__
(
self
,
addr
,
debug
=
True
,
buf_size
=
1000
):
self
.
socket_
=
socket
.
create_connection
(
addr
)
self
.
stream
=
self
.
socket_
.
makefile
()
...
...
@@ -16,16 +16,19 @@ class FalconCli(object):
self
.
stream
.
close
()
@
classmethod
def
connect
(
cls
,
server
=
"transfer.falcon.miliao.srv"
,
port
=
8433
,
debug
=
True
,
buf_size
=
1000
):
def
connect
(
cls
,
server
=
"transfer.falcon.miliao.srv"
,
port
=
8433
,
debug
=
True
,
buf_size
=
1000
):
try
:
return
FalconCli
((
server
,
port
),
debug
,
buf_size
)
except
socket
.
error
,
exc
:
print
"error: connect to %s:%s error: %s"
%
(
server
,
port
,
exc
)
print
"error: connect to %s:%s error: %s"
%
(
server
,
port
,
exc
)
def
call
(
self
,
name
,
*
params
):
request
=
dict
(
id
=
next
(
self
.
id_counter
),
params
=
list
(
params
),
method
=
name
)
request
=
dict
(
id
=
next
(
self
.
id_counter
),
params
=
list
(
params
),
method
=
name
)
payload
=
json
.
dumps
(
request
).
encode
()
if
self
.
debug
:
print
"--> req:"
,
payload
...
...
@@ -49,7 +52,7 @@ class FalconCli(object):
resp
=
[]
while
True
:
buf
=
lines
[
s
:
s
+
self
.
buf_size
]
buf
=
lines
[
s
:
s
+
self
.
buf_size
]
s
=
s
+
self
.
buf_size
if
len
(
buf
)
==
0
:
break
...
...
@@ -57,4 +60,3 @@ class FalconCli(object):
resp
.
append
(
r
)
return
resp
tools/generate_data.py
浏览文件 @
58f2516e
...
...
@@ -11,13 +11,16 @@ import re
# --input_file input_file
#
def
generate_data
(
name
,
shape
):
np
.
random
.
seed
()
data
=
np
.
random
.
random
(
shape
)
*
2
-
1
input_file_name
=
FLAGS
.
input_file
+
"_"
+
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
name
)
input_file_name
=
FLAGS
.
input_file
+
"_"
+
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
name
)
print
'Generate input file: '
,
input_file_name
data
.
astype
(
np
.
float32
).
tofile
(
input_file_name
)
def
main
(
unused_args
):
input_names
=
[
name
for
name
in
FLAGS
.
input_node
.
split
(
','
)]
input_shapes
=
[
shape
for
shape
in
FLAGS
.
input_shape
.
split
(
':'
)]
...
...
@@ -27,29 +30,21 @@ def main(unused_args):
generate_data
(
input_names
[
i
],
shape
)
print
"Generate input file done."
def
parse_args
():
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
parser
.
register
(
"type"
,
"bool"
,
lambda
v
:
v
.
lower
()
==
"true"
)
parser
.
add_argument
(
"--input_file"
,
type
=
str
,
default
=
""
,
help
=
"input file."
)
"--input_file"
,
type
=
str
,
default
=
""
,
help
=
"input file."
)
parser
.
add_argument
(
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
parser
.
add_argument
(
"--input_shape"
,
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
"--input_shape"
,
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
return
parser
.
parse_known_args
()
if
__name__
==
'__main__'
:
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/mace_tools.py
浏览文件 @
58f2516e
...
...
@@ -34,7 +34,8 @@ def run_command(command):
print
(
"Stderr msg:
\n
{}"
.
format
(
err
))
if
result
.
returncode
!=
0
:
raise
Exception
(
"Exit not 0 from bash with code: {}, command: {}"
.
format
(
raise
Exception
(
"Exit not 0 from bash with code: {}, command: {}"
.
format
(
result
.
returncode
,
command
))
...
...
@@ -63,10 +64,12 @@ def generate_version_code():
command
=
"bash tools/generate_version_code.sh"
run_command
(
command
)
def
generate_opencl_source_code
():
command
=
"bash tools/generate_opencl_code.sh source"
run_command
(
command
)
def
generate_opencl_binay_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
cl_bin_dirs
=
[]
for
d
in
model_output_dirs
:
...
...
@@ -79,6 +82,7 @@ def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
'binary'
,
target_soc
,
cl_bin_dirs_str
,
int
(
pull_or_not
))
run_command
(
command
)
def
generate_tuning_param_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
cl_bin_dirs
=
[]
for
d
in
model_output_dirs
:
...
...
@@ -91,20 +95,24 @@ def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
target_soc
,
cl_bin_dirs_str
,
int
(
pull_or_not
))
run_command
(
command
)
def
generate_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
generate_opencl_binay_code
(
target_soc
,
model_output_dirs
,
pull_or_not
)
generate_tuning_param_code
(
target_soc
,
model_output_dirs
,
pull_or_not
)
def
clear_env
(
target_soc
):
command
=
"bash tools/clear_env.sh {}"
.
format
(
target_soc
)
run_command
(
command
)
def
input_file_name
(
input_name
):
return
os
.
environ
[
'INPUT_FILE_NAME'
]
+
'_'
+
\
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
input_name
)
def
generate_random_input
(
target_soc
,
model_output_dir
,
input_names
,
input_files
):
def
generate_random_input
(
target_soc
,
model_output_dir
,
input_names
,
input_files
):
generate_data_or_not
=
True
command
=
"bash tools/validate_tools.sh {} {} {}"
.
format
(
target_soc
,
model_output_dir
,
int
(
generate_data_or_not
))
...
...
@@ -122,16 +130,19 @@ def generate_random_input(target_soc, model_output_dir,
else
:
input_name_list
.
append
(
input_names
)
if
len
(
input_file_list
)
!=
len
(
input_name_list
):
raise
Exception
(
'If input_files set, the input files should match the input names.'
)
raise
Exception
(
'If input_files set, the input files should '
'match the input names.'
)
for
i
in
range
(
len
(
input_file_list
)):
if
input_file_list
[
i
]
is
not
None
:
dst_input_file
=
model_output_dir
+
'/'
+
input_file_name
(
input_name_list
[
i
])
dst_input_file
=
model_output_dir
+
'/'
+
input_file_name
(
input_name_list
[
i
])
if
input_file_list
[
i
].
startswith
(
"http://"
)
or
\
input_file_list
[
i
].
startswith
(
"https://"
):
urllib
.
urlretrieve
(
input_file_list
[
i
],
dst_input_file
)
else
:
shutil
.
copy
(
input_file_list
[
i
],
dst_input_file
)
def
generate_model_code
():
command
=
"bash tools/generate_model_code.sh"
run_command
(
command
)
...
...
@@ -155,10 +166,17 @@ def tuning_run(model_name,
# TODO(yejianwu) refactoring the hackish code
stdout_buff
=
[]
process_output
=
sh_commands
.
make_output_processor
(
stdout_buff
)
p
=
sh
.
bash
(
"tools/tuning_run.sh"
,
target_soc
,
model_output_dir
,
running_round
,
int
(
tuning
),
restart_round
,
option_args
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
=
sh
.
bash
(
"tools/tuning_run.sh"
,
target_soc
,
model_output_dir
,
running_round
,
int
(
tuning
),
restart_round
,
option_args
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
.
wait
()
metrics
=
{}
for
line
in
stdout_buff
:
...
...
@@ -166,18 +184,23 @@ def tuning_run(model_name,
parts
=
line
.
split
()
if
len
(
parts
)
==
6
and
parts
[
0
].
startswith
(
"time"
):
metrics
[
"%s.create_net_ms"
%
model_name
]
=
str
(
float
(
parts
[
1
]))
metrics
[
"%s.mace_engine_ctor_ms"
%
model_name
]
=
str
(
float
(
parts
[
2
]))
metrics
[
"%s.mace_engine_ctor_ms"
%
model_name
]
=
str
(
float
(
parts
[
2
]))
metrics
[
"%s.init_ms"
%
model_name
]
=
str
(
float
(
parts
[
3
]))
metrics
[
"%s.warmup_ms"
%
model_name
]
=
str
(
float
(
parts
[
4
]))
if
float
(
parts
[
5
])
>
0
:
metrics
[
"%s.avg_latency_ms"
%
model_name
]
=
str
(
float
(
parts
[
5
]))
tags
=
{
"ro.board.platform"
:
target_soc
,
metrics
[
"%s.avg_latency_ms"
%
model_name
]
=
str
(
float
(
parts
[
5
]))
tags
=
{
"ro.board.platform"
:
target_soc
,
"abi"
:
target_abi
,
# "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime
"round"
:
running_round
,
# TODO(yejianwu) change this to source/binary
"tuning"
:
tuning
}
sh_commands
.
falcon_push_metrics
(
metrics
,
endpoint
=
"mace_model_benchmark"
,
tags
=
tags
)
"tuning"
:
tuning
}
sh_commands
.
falcon_push_metrics
(
metrics
,
endpoint
=
"mace_model_benchmark"
,
tags
=
tags
)
def
benchmark_model
(
target_soc
,
model_output_dir
,
option_args
=
''
):
command
=
"bash tools/benchmark.sh {} {}
\"
{}
\"
"
.
format
(
...
...
@@ -188,8 +211,8 @@ def benchmark_model(target_soc, model_output_dir, option_args=''):
def
run_model
(
model_name
,
target_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
running_round
,
restart_round
,
option_args
):
tuning_run
(
model_name
,
target_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
running_round
,
False
,
restart_round
,
option_args
)
model_output_dir
,
running_round
,
False
,
restart_round
,
option_args
)
def
generate_production_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
...
...
@@ -251,8 +274,8 @@ def merge_libs_and_tuning_results(target_soc, output_dir, model_output_dirs):
build_production_code
()
model_output_dirs_str
=
","
.
join
(
model_output_dirs
)
command
=
"bash tools/merge_libs.sh {} {} {}"
.
format
(
target_soc
,
output_dir
,
model_output_dirs_str
)
command
=
"bash tools/merge_libs.sh {} {} {}"
.
format
(
target_soc
,
output_dir
,
model_output_dirs_str
)
run_command
(
command
)
...
...
@@ -260,6 +283,7 @@ def packaging_lib_file(output_dir):
command
=
"bash tools/packaging_lib.sh {}"
.
format
(
output_dir
)
run_command
(
command
)
def
download_model_files
(
model_file_path
,
model_output_dir
,
weight_file_path
=
""
):
...
...
@@ -270,10 +294,9 @@ def download_model_files(model_file_path,
if
weight_file_path
.
startswith
(
"http://"
)
or
\
weight_file_path
.
startswith
(
"https://"
):
os
.
environ
[
"WEIGHT_FILE_PATH"
]
=
model_output_dir
+
"/model.caffemodel"
urllib
.
urlretrieve
(
weight_file_path
,
os
.
environ
[
"WEIGHT_FILE_PATH"
])
os
.
environ
[
"WEIGHT_FILE_PATH"
]
=
model_output_dir
+
"/model.caffemodel"
urllib
.
urlretrieve
(
weight_file_path
,
os
.
environ
[
"WEIGHT_FILE_PATH"
])
def
md5sum
(
str
):
md5
=
hashlib
.
md5
()
...
...
@@ -306,7 +329,10 @@ def parse_args():
default
=
10
,
help
=
"The model throughput test running seconds."
)
parser
.
add_argument
(
"--restart_round"
,
type
=
int
,
default
=
1
,
help
=
"The model restart round."
)
"--restart_round"
,
type
=
int
,
default
=
1
,
help
=
"The model restart round."
)
parser
.
add_argument
(
"--tuning"
,
type
=
"bool"
,
default
=
"true"
,
help
=
"Tune opencl params."
)
parser
.
add_argument
(
...
...
@@ -321,14 +347,16 @@ def parse_args():
help
=
"SoCs to build, comma seperated list (getprop ro.board.platform)"
)
return
parser
.
parse_known_args
()
def
set_environment
(
configs
):
os
.
environ
[
"EMBED_MODEL_DATA"
]
=
str
(
configs
[
"embed_model_data"
])
os
.
environ
[
"VLOG_LEVEL"
]
=
str
(
configs
[
"vlog_level"
])
os
.
environ
[
"PROJECT_NAME"
]
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
FLAGS
.
config
))[
0
]
os
.
environ
[
"PROJECT_NAME"
]
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
FLAGS
.
config
))[
0
]
os
.
environ
[
'INPUT_FILE_NAME'
]
=
"model_input"
os
.
environ
[
'OUTPUT_FILE_NAME'
]
=
"model_out"
def
main
(
unused_args
):
configs
=
parse_model_configs
()
...
...
@@ -343,13 +371,16 @@ def main(unused_args):
if
not
os
.
path
.
exists
(
FLAGS
.
output_dir
):
os
.
makedirs
(
FLAGS
.
output_dir
)
elif
os
.
path
.
exists
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
"libmace"
)):
shutil
.
rmtree
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
os
.
makedirs
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
shutil
.
rmtree
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
os
.
makedirs
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
generate_version_code
()
generate_opencl_source_code
()
option_args
=
' '
.
join
([
arg
for
arg
in
unused_args
if
arg
.
startswith
(
'--'
)])
option_args
=
' '
.
join
(
[
arg
for
arg
in
unused_args
if
arg
.
startswith
(
'--'
)])
available_socs
=
sh_commands
.
adb_get_all_socs
()
target_socs
=
available_socs
...
...
@@ -362,10 +393,10 @@ def main(unused_args):
target_socs
=
target_socs
&
socs
missing_socs
=
socs
.
difference
(
target_socs
)
if
len
(
missing_socs
)
>
0
:
print
(
"Error: devices with SoCs are not connected %s"
%
missing_socs
)
print
(
"Error: devices with SoCs are not connected %s"
%
missing_socs
)
exit
(
1
)
for
target_soc
in
target_socs
:
for
target_abi
in
configs
[
"target_abis"
]:
global_runtime
=
get_global_runtime
(
configs
)
...
...
@@ -373,28 +404,27 @@ def main(unused_args):
os
.
environ
[
"TARGET_ABI"
]
=
target_abi
model_output_dirs
=
[]
for
model_name
in
configs
[
"models"
]:
print
'======================='
,
model_name
,
'====
==================='
print
'==================='
,
model_name
,
'
==================='
# Transfer params by environment
os
.
environ
[
"MODEL_TAG"
]
=
model_name
model_config
=
configs
[
"models"
][
model_name
]
input_file_list
=
model_config
.
get
(
"validation_inputs_data"
,
[])
input_file_list
=
model_config
.
get
(
"validation_inputs_data"
,
[])
for
key
in
model_config
:
if
key
in
[
'input_nodes'
,
'output_nodes'
]
and
isinstance
(
model_config
[
key
],
list
):
os
.
environ
[
key
.
upper
()]
=
","
.
join
(
model_config
[
key
])
elif
key
in
[
'input_shapes'
,
'output_shapes'
]
and
isinstance
(
model_config
[
key
],
list
):
elif
key
in
[
'input_shapes'
,
'output_shapes'
]
and
isinstance
(
model_config
[
key
],
list
):
os
.
environ
[
key
.
upper
()]
=
":"
.
join
(
model_config
[
key
])
else
:
os
.
environ
[
key
.
upper
()]
=
str
(
model_config
[
key
])
# Create model build directory
model_path_digest
=
md5sum
(
model_config
[
"model_file_path"
])
model_output_dir
=
"%s/%s/%s/%s/%s/%s/%s"
%
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
],
"build"
,
model_name
,
model_path_digest
,
target_soc
,
target_abi
)
model_output_dir
=
"%s/%s/%s/%s/%s/%s/%s"
%
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
],
"build"
,
model_name
,
model_path_digest
,
target_soc
,
target_abi
)
model_output_dirs
.
append
(
model_output_dir
)
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"all"
:
...
...
@@ -404,22 +434,27 @@ def main(unused_args):
clear_env
(
target_soc
)
download_model_files
(
model_config
[
"model_file_path"
],
model_output_dir
,
model_config
.
get
(
"weight_file_path"
,
""
))
model_output_dir
,
model_config
.
get
(
"weight_file_path"
,
""
))
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
\
or
FLAGS
.
mode
==
"benchmark"
or
FLAGS
.
mode
==
"all"
:
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"run"
or
\
FLAGS
.
mode
==
"validate"
or
\
FLAGS
.
mode
==
"benchmark"
or
FLAGS
.
mode
==
"all"
:
generate_random_input
(
target_soc
,
model_output_dir
,
model_config
[
'input_nodes'
],
input_file_list
)
model_config
[
'input_nodes'
],
input_file_list
)
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"all"
:
generate_model_code
()
build_mace_run_prod
(
model_name
,
global_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
FLAGS
.
tuning
)
target_soc
,
model_output_dir
,
FLAGS
.
tuning
)
if
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
or
FLAGS
.
mode
==
"all"
:
run_model
(
model_name
,
global_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
FLAGS
.
round
,
FLAGS
.
restart_round
,
option_args
)
if
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
or
\
FLAGS
.
mode
==
"all"
:
run_model
(
model_name
,
global_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
FLAGS
.
round
,
FLAGS
.
restart_round
,
option_args
)
if
FLAGS
.
mode
==
"benchmark"
:
benchmark_model
(
target_soc
,
model_output_dir
,
option_args
)
...
...
@@ -427,14 +462,18 @@ def main(unused_args):
if
FLAGS
.
mode
==
"validate"
or
FLAGS
.
mode
==
"all"
:
validate_model
(
target_soc
,
model_output_dir
)
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"merge"
or
FLAGS
.
mode
==
"all"
:
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"merge"
or
\
FLAGS
.
mode
==
"all"
:
merge_libs_and_tuning_results
(
target_soc
,
FLAGS
.
output_dir
+
"/"
+
os
.
environ
[
"PROJECT_NAME"
],
target_soc
,
FLAGS
.
output_dir
+
"/"
+
os
.
environ
[
"PROJECT_NAME"
],
model_output_dirs
)
if
FLAGS
.
mode
==
"throughput_test"
:
merged_lib_file
=
FLAGS
.
output_dir
+
"/%s/%s/libmace_%s.%s.a"
%
\
(
os
.
environ
[
"PROJECT_NAME"
],
target_abi
,
os
.
environ
[
"PROJECT_NAME"
],
target_soc
)
merged_lib_file
=
FLAGS
.
output_dir
+
\
"/%s/%s/libmace_%s.%s.a"
%
\
(
os
.
environ
[
"PROJECT_NAME"
],
target_abi
,
os
.
environ
[
"PROJECT_NAME"
],
target_soc
)
generate_random_input
(
target_soc
,
FLAGS
.
output_dir
,
[],
[])
for
model_name
in
configs
[
"models"
]:
runtime
=
configs
[
"models"
][
model_name
][
"runtime"
]
...
...
@@ -449,4 +488,3 @@ def main(unused_args):
if
__name__
==
"__main__"
:
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/sh_commands.py
浏览文件 @
58f2516e
...
...
@@ -3,18 +3,22 @@ import re
import
time
import
falcon_cli
################################
# common
################################
def
strip_invalid_utf8
(
str
):
return
sh
.
iconv
(
str
,
"-c"
,
"-t"
,
"UTF-8"
)
def
make_output_processor
(
buff
):
def
process_output
(
line
):
print
(
line
.
strip
())
buff
.
append
(
line
)
return
process_output
################################
# adb commands
################################
...
...
@@ -23,11 +27,12 @@ def adb_split_stdout(stdout_str):
# Filter out last empty line
return
[
l
.
strip
()
for
l
in
stdout_str
.
split
(
'
\n
'
)
if
len
(
l
.
strip
())
>
0
]
def
adb_devices
(
target_socs
=
None
):
outputs
=
sh
.
grep
(
sh
.
adb
(
"devices"
),
"^[A-Za-z0-9]\+[[:space:]]\+device$"
)
raw_lists
=
sh
.
cut
(
outputs
,
"-f1"
)
device_ids
=
adb_split_stdout
(
raw_lists
)
if
target_socs
!=
None
:
if
target_socs
is
not
None
:
target_socs_set
=
set
(
target_socs
)
target_devices
=
[]
for
serialno
in
device_ids
:
...
...
@@ -38,6 +43,7 @@ def adb_devices(target_socs=None):
else
:
return
device_ids
def
adb_getprop_by_serialno
(
serialno
):
outputs
=
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"getprop"
)
raw_props
=
adb_split_stdout
(
outputs
)
...
...
@@ -49,12 +55,14 @@ def adb_getprop_by_serialno(serialno):
props
[
m
.
group
(
1
)]
=
m
.
group
(
2
)
return
props
def
adb_supported_abis
(
serialno
):
props
=
adb_getprop_by_serialno
(
serialno
)
abilist_str
=
props
[
"ro.product.cpu.abilist"
]
abis
=
[
abi
.
strip
()
for
abi
in
abilist_str
.
split
(
','
)]
return
abis
def
adb_get_all_socs
():
socs
=
[]
for
d
in
adb_devices
():
...
...
@@ -62,7 +70,10 @@ def adb_get_all_socs():
socs
.
append
(
props
[
"ro.board.platform"
])
return
set
(
socs
)
def
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
def
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
args
=
""
,
opencl_profiling
=
1
,
vlog_level
=
0
,
...
...
@@ -71,7 +82,9 @@ def adb_run(serialno, host_bin_path, bin_name,
host_bin_full_path
=
"%s/%s"
%
(
host_bin_path
,
bin_name
)
device_bin_full_path
=
"%s/%s"
%
(
device_bin_path
,
bin_name
)
props
=
adb_getprop_by_serialno
(
serialno
)
print
(
"====================================================================="
)
print
(
"====================================================================="
)
print
(
"Run on device: %s, %s, %s"
%
(
serialno
,
props
[
"ro.board.platform"
],
props
[
"ro.product.model"
]))
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"rm -rf %s"
%
device_bin_path
)
...
...
@@ -79,12 +92,19 @@ def adb_run(serialno, host_bin_path, bin_name,
print
(
"Push %s to %s"
%
(
host_bin_full_path
,
device_bin_full_path
))
sh
.
adb
(
"-s"
,
serialno
,
"push"
,
host_bin_full_path
,
device_bin_full_path
)
print
(
"Run %s"
%
device_bin_full_path
)
stdout_buff
=
[]
stdout_buff
=
[]
process_output
=
make_output_processor
(
stdout_buff
)
p
=
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d MACE_CPP_MIN_VLOG_LEVEL=%d %s %s"
%
(
out_of_range_check
,
opencl_profiling
,
vlog_level
,
device_bin_full_path
,
args
),
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
=
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d "
"MACE_CPP_MIN_VLOG_LEVEL=%d %s %s"
%
(
out_of_range_check
,
opencl_profiling
,
vlog_level
,
device_bin_full_path
,
args
),
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
.
wait
()
return
""
.
join
(
stdout_buff
)
...
...
@@ -94,11 +114,14 @@ def adb_run(serialno, host_bin_path, bin_name,
################################
def
bazel_build
(
target
,
strip
=
"always"
,
abi
=
"armeabi-v7a"
):
print
(
"Build %s with ABI %s"
%
(
target
,
abi
))
stdout_buff
=
[]
stdout_buff
=
[]
process_output
=
make_output_processor
(
stdout_buff
)
p
=
sh
.
bazel
(
"build"
,
"-c"
,
"opt"
,
"--strip"
,
strip
,
p
=
sh
.
bazel
(
"build"
,
"-c"
,
"opt"
,
"--strip"
,
strip
,
"--verbose_failures"
,
target
,
"--crosstool_top=//external:android/crosstool"
,
...
...
@@ -109,12 +132,17 @@ def bazel_build(target, strip="always", abi="armeabi-v7a"):
"--copt=-DMACE_DISABLE_NO_TUNING_WARNING"
,
"--copt=-Werror=return-type"
,
"--copt=-O3"
,
"--define"
,
"neon=true"
,
"--define"
,
"openmp=true"
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
"--define"
,
"neon=true"
,
"--define"
,
"openmp=true"
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
.
wait
()
return
""
.
join
(
stdout_buff
)
def
bazel_target_to_bin
(
target
):
# change //mace/a/b:c to bazel-bin/mace/a/b/c
prefix
,
bin_name
=
target
.
split
(
':'
)
...
...
@@ -124,26 +152,32 @@ def bazel_target_to_bin(target):
host_bin_path
=
"bazel-bin/%s"
%
prefix
return
host_bin_path
,
bin_name
################################
# mace commands
################################
# TODO this should be refactored
def
gen_encrypted_opencl_source
(
codegen_path
=
"mace/codegen"
):
sh
.
mkdir
(
"-p"
,
"%s/opencl"
%
codegen_path
)
sh
.
python
(
"mace/python/tools/encrypt_opencl_codegen.py"
,
sh
.
python
(
"mace/python/tools/encrypt_opencl_codegen.py"
,
"--cl_kernel_dir=./mace/kernels/opencl/cl/"
,
"--output_path=%s/opencl/opencl_encrypt_program.cc"
%
codegen_path
)
def
gen_mace_version
(
codegen_path
=
"mace/codegen"
):
sh
.
mkdir
(
"-p"
,
"%s/version"
%
codegen_path
)
sh
.
bash
(
"mace/tools/git/gen_version_source.sh"
,
"%s/version/version.cc"
%
codegen_path
)
def
gen_compiled_opencl_source
(
codegen_path
=
"mace/codegen"
):
sh
.
mkdir
(
"-p"
,
"%s/opencl"
%
codegen_path
)
sh
.
python
(
"mace/python/tools/opencl_codegen.py"
,
sh
.
python
(
"mace/python/tools/opencl_codegen.py"
,
"--output_path=%s/opencl/opencl_compiled_program.cc"
%
codegen_path
)
################################
# falcon
################################
...
...
@@ -156,10 +190,10 @@ def falcon_tags(tags_dict):
tags
=
tags
+
",%s=%s"
%
(
k
,
v
)
return
tags
def
falcon_push_metrics
(
metrics
,
endpoint
=
"mace_dev"
,
tags
=
{}):
cli
=
falcon_cli
.
FalconCli
.
connect
(
server
=
"transfer.falcon.miliao.srv"
,
port
=
8433
,
debug
=
False
)
cli
=
falcon_cli
.
FalconCli
.
connect
(
server
=
"transfer.falcon.miliao.srv"
,
port
=
8433
,
debug
=
False
)
ts
=
int
(
time
.
time
())
falcon_metrics
=
[{
"endpoint"
:
endpoint
,
...
...
@@ -171,4 +205,3 @@ def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
"counterType"
:
"GAUGE"
}
for
key
,
value
in
metrics
.
iteritems
()]
cli
.
update
(
falcon_metrics
)
tools/validate.py
浏览文件 @
58f2516e
...
...
@@ -20,29 +20,33 @@ from scipy import stats
# --input_shape 1,64,64,3 \
# --output_shape 1,64,64,2
def
load_data
(
file
):
if
os
.
path
.
isfile
(
file
):
return
np
.
fromfile
(
file
=
file
,
dtype
=
np
.
float32
)
else
:
return
np
.
empty
([
0
])
def
format_output_name
(
name
):
return
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
name
)
def
compare_output
(
output_name
,
mace_out_value
,
out_value
):
if
mace_out_value
.
size
!=
0
:
out_value
=
out_value
.
reshape
(
-
1
)
mace_out_value
=
mace_out_value
.
reshape
(
-
1
)
assert
len
(
out_value
)
==
len
(
mace_out_value
)
similarity
=
(
1
-
spatial
.
distance
.
cosine
(
out_value
,
mace_out_value
))
print
output_name
,
'MACE VS'
,
FLAGS
.
platform
.
upper
(),
'similarity: '
,
similarity
print
output_name
,
'MACE VS'
,
FLAGS
.
platform
.
upper
(
),
'similarity: '
,
similarity
if
(
FLAGS
.
mace_runtime
==
"cpu"
and
similarity
>
0.999
)
or
\
(
FLAGS
.
mace_runtime
==
"neon"
and
similarity
>
0.999
)
or
\
(
FLAGS
.
mace_runtime
==
"gpu"
and
similarity
>
0.995
)
or
\
(
FLAGS
.
mace_runtime
==
"dsp"
and
similarity
>
0.930
):
print
'=======================Similarity Test Passed====
=================='
print
'===================Similarity Test Passed
=================='
else
:
print
'=======================Similarity Test Failed====
=================='
print
'===================Similarity Test Failed
=================='
sys
.
exit
(
-
1
)
else
:
print
'=======================Skip empty node==================='
...
...
@@ -66,21 +70,28 @@ def validate_tf_model(input_names, input_shapes, output_names):
tf
.
import_graph_def
(
input_graph_def
,
name
=
""
)
input_dict
=
{}
for
i
in
range
(
len
(
input_names
)):
input_value
=
load_data
(
FLAGS
.
input_file
+
"_"
+
input_names
[
i
])
input_value
=
load_data
(
FLAGS
.
input_file
+
"_"
+
input_names
[
i
])
input_value
=
input_value
.
reshape
(
input_shapes
[
i
])
input_node
=
graph
.
get_tensor_by_name
(
input_names
[
i
]
+
':0'
)
input_node
=
graph
.
get_tensor_by_name
(
input_names
[
i
]
+
':0'
)
input_dict
[
input_node
]
=
input_value
output_nodes
=
[]
for
name
in
output_names
:
output_nodes
.
extend
([
graph
.
get_tensor_by_name
(
name
+
':0'
)])
output_nodes
.
extend
(
[
graph
.
get_tensor_by_name
(
name
+
':0'
)])
output_values
=
session
.
run
(
output_nodes
,
feed_dict
=
input_dict
)
for
i
in
range
(
len
(
output_names
)):
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
format_output_name
(
output_names
[
i
])
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
\
format_output_name
(
output_names
[
i
])
mace_out_value
=
load_data
(
output_file_name
)
compare_output
(
output_names
[
i
],
mace_out_value
,
output_values
[
i
])
compare_output
(
output_names
[
i
],
mace_out_value
,
output_values
[
i
])
def
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
):
def
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
):
os
.
environ
[
'GLOG_minloglevel'
]
=
'1'
# suprress Caffe verbose prints
import
caffe
if
not
os
.
path
.
isfile
(
FLAGS
.
model_file
):
...
...
@@ -96,7 +107,8 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for
i
in
range
(
len
(
input_names
)):
input_value
=
load_data
(
FLAGS
.
input_file
+
"_"
+
input_names
[
i
])
input_value
=
input_value
.
reshape
(
input_shapes
[
i
]).
transpose
((
0
,
3
,
1
,
2
))
input_value
=
input_value
.
reshape
(
input_shapes
[
i
]).
transpose
((
0
,
3
,
1
,
2
))
input_blob_name
=
input_names
[
i
]
try
:
if
input_names
[
i
]
in
net
.
top_names
:
...
...
@@ -110,16 +122,20 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for
i
in
range
(
len
(
output_names
)):
value
=
net
.
blobs
[
net
.
top_names
[
output_names
[
i
]][
0
]].
data
out_shape
=
output_shapes
[
i
]
out_shape
[
1
],
out_shape
[
2
],
out_shape
[
3
]
=
out_shape
[
3
],
out_shape
[
1
],
out_shape
[
2
]
out_shape
[
1
],
out_shape
[
2
],
out_shape
[
3
]
=
out_shape
[
3
],
out_shape
[
1
],
out_shape
[
2
]
value
=
value
.
reshape
(
out_shape
).
transpose
((
0
,
2
,
3
,
1
))
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
format_output_name
(
output_names
[
i
])
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
format_output_name
(
output_names
[
i
])
mace_out_value
=
load_data
(
output_file_name
)
compare_output
(
output_names
[
i
],
mace_out_value
,
value
)
def
main
(
unused_args
):
input_names
=
[
name
for
name
in
FLAGS
.
input_node
.
split
(
','
)]
input_shape_strs
=
[
shape
for
shape
in
FLAGS
.
input_shape
.
split
(
':'
)]
input_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
input_shape_strs
]
input_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
input_shape_strs
]
output_names
=
[
name
for
name
in
FLAGS
.
output_node
.
split
(
','
)]
assert
len
(
input_names
)
==
len
(
input_shapes
)
...
...
@@ -127,18 +143,18 @@ def main(unused_args):
validate_tf_model
(
input_names
,
input_shapes
,
output_names
)
elif
FLAGS
.
platform
==
'caffe'
:
output_shape_strs
=
[
shape
for
shape
in
FLAGS
.
output_shape
.
split
(
':'
)]
output_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
output_shape_strs
]
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
)
output_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
output_shape_strs
]
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
)
def
parse_args
():
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
parser
.
register
(
"type"
,
"bool"
,
lambda
v
:
v
.
lower
()
==
"true"
)
parser
.
add_argument
(
"--platform"
,
type
=
str
,
default
=
""
,
help
=
"Tensorflow or Caffe."
)
"--platform"
,
type
=
str
,
default
=
""
,
help
=
"Tensorflow or Caffe."
)
parser
.
add_argument
(
"--model_file"
,
type
=
str
,
...
...
@@ -150,40 +166,22 @@ def parse_args():
default
=
""
,
help
=
"caffe model file to load."
)
parser
.
add_argument
(
"--input_file"
,
type
=
str
,
default
=
""
,
help
=
"input file."
)
"--input_file"
,
type
=
str
,
default
=
""
,
help
=
"input file."
)
parser
.
add_argument
(
"--mace_out_file"
,
type
=
str
,
default
=
""
,
help
=
"mace output file to load."
)
parser
.
add_argument
(
"--mace_runtime"
,
type
=
str
,
default
=
"gpu"
,
help
=
"mace runtime device."
)
"--mace_runtime"
,
type
=
str
,
default
=
"gpu"
,
help
=
"mace runtime device."
)
parser
.
add_argument
(
"--input_shape"
,
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
"--input_shape"
,
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
parser
.
add_argument
(
"--output_shape"
,
type
=
str
,
default
=
"1,64,64,2"
,
help
=
"output shape."
)
"--output_shape"
,
type
=
str
,
default
=
"1,64,64,2"
,
help
=
"output shape."
)
parser
.
add_argument
(
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
parser
.
add_argument
(
"--output_node"
,
type
=
str
,
default
=
"output_node"
,
help
=
"output node"
)
"--output_node"
,
type
=
str
,
default
=
"output_node"
,
help
=
"output node"
)
return
parser
.
parse_known_args
()
...
...
@@ -191,4 +189,3 @@ def parse_args():
if
__name__
==
'__main__'
:
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/wino_conv.py
浏览文件 @
58f2516e
...
...
@@ -11,12 +11,8 @@ G_T = {}
# f(2, 3)
A_T
[
4
]
=
np
.
array
([[
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
-
1
]]).
astype
(
np
.
float32
)
A
[
4
]
=
np
.
transpose
(
A_T
[
4
])
B_T
[
4
]
=
np
.
array
([
[
1
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
0
],
[
0
,
1
,
0
,
-
1
]
]).
astype
(
np
.
float32
)
B_T
[
4
]
=
np
.
array
([[
1
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
0
],
[
0
,
1
,
0
,
-
1
]]).
astype
(
np
.
float32
)
B
[
4
]
=
np
.
transpose
(
B_T
[
4
])
G
[
4
]
=
np
.
array
([
[
1
,
0
,
0
],
...
...
@@ -44,45 +40,45 @@ B_T[6] = np.array([
]).
astype
(
np
.
float32
)
B
[
6
]
=
np
.
transpose
(
B_T
[
6
])
G
[
6
]
=
np
.
array
([
[
1
/
4.0
,
0
,
0
],
[
-
1
/
6.0
,
-
1
/
6.0
,
-
1
/
6.0
],
[
-
1
/
6.0
,
1
/
6.0
,
-
1
/
6.0
],
[
1
/
24.0
,
1
/
12.0
,
1
/
6.0
],
[
1
/
24.0
,
-
1
/
12.0
,
1
/
6.0
],
[
0
,
0
,
1
],
[
1
/
4.0
,
0
,
0
],
[
-
1
/
6.0
,
-
1
/
6.0
,
-
1
/
6.0
],
[
-
1
/
6.0
,
1
/
6.0
,
-
1
/
6.0
],
[
1
/
24.0
,
1
/
12.0
,
1
/
6.0
],
[
1
/
24.0
,
-
1
/
12.0
,
1
/
6.0
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
G_T
[
6
]
=
np
.
transpose
(
G
[
6
])
# f(6, 3)
A_T
[
8
]
=
np
.
array
([
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
2
,
-
2
,
1
/
2.
,
-
1
/
2.
,
0
],
[
0
,
1
,
1
,
4
,
4
,
1
/
4.
,
1
/
4.
,
0
],
[
0
,
1
,
-
1
,
8
,
-
8
,
1
/
8.
,
-
1
/
8.
,
0
],
[
0
,
1
,
1
,
16
,
16
,
1
/
16.
,
1
/
16.
,
0
],
[
0
,
1
,
-
1
,
32
,
-
32
,
1
/
32.
,
-
1
/
32.
,
1
],
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
2
,
-
2
,
1
/
2.
,
-
1
/
2.
,
0
],
[
0
,
1
,
1
,
4
,
4
,
1
/
4.
,
1
/
4.
,
0
],
[
0
,
1
,
-
1
,
8
,
-
8
,
1
/
8.
,
-
1
/
8.
,
0
],
[
0
,
1
,
1
,
16
,
16
,
1
/
16.
,
1
/
16.
,
0
],
[
0
,
1
,
-
1
,
32
,
-
32
,
1
/
32.
,
-
1
/
32.
,
1
],
]).
astype
(
np
.
float32
)
A
[
8
]
=
np
.
transpose
(
A_T
[
8
])
B_T
[
8
]
=
np
.
array
([
[
1
,
0
,
-
21
/
4.
,
0
,
21
/
4.
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
-
17
/
4.
,
-
17
/
4.
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
17
/
4.
,
-
17
/
4.
,
-
1
,
1
,
0
],
[
0
,
1
/
2.
,
1
/
4.
,
-
5
/
2.
,
-
5
/
4.
,
2
,
1
,
0
],
[
0
,
-
1
/
2.
,
1
/
4.
,
5
/
2.
,
-
5
/
4.
,
-
2
,
1
,
0
],
[
0
,
2
,
4
,
-
5
/
2.
,
-
5
,
1
/
2.
,
1
,
0
],
[
0
,
-
2
,
4
,
5
/
2.
,
-
5
,
-
1
/
2.
,
1
,
0
],
[
0
,
-
1
,
0
,
21
/
4.
,
0
,
-
21
/
4.
,
0
,
1
],
[
1
,
0
,
-
21
/
4.
,
0
,
21
/
4.
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
-
17
/
4.
,
-
17
/
4.
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
17
/
4.
,
-
17
/
4.
,
-
1
,
1
,
0
],
[
0
,
1
/
2.
,
1
/
4.
,
-
5
/
2.
,
-
5
/
4.
,
2
,
1
,
0
],
[
0
,
-
1
/
2.
,
1
/
4.
,
5
/
2.
,
-
5
/
4.
,
-
2
,
1
,
0
],
[
0
,
2
,
4
,
-
5
/
2.
,
-
5
,
1
/
2.
,
1
,
0
],
[
0
,
-
2
,
4
,
5
/
2.
,
-
5
,
-
1
/
2.
,
1
,
0
],
[
0
,
-
1
,
0
,
21
/
4.
,
0
,
-
21
/
4.
,
0
,
1
],
]).
astype
(
np
.
float32
)
B
[
8
]
=
np
.
transpose
(
B_T
[
8
])
G
[
8
]
=
np
.
array
([
[
1
,
0
,
0
],
[
-
2
/
9.
,
-
2
/
9.
,
-
2
/
9.
],
[
-
2
/
9.
,
2
/
9.
,
-
2
/
9.
],
[
1
/
90.
,
1
/
45.
,
2
/
45.
],
[
1
/
90.
,
-
1
/
45.
,
2
/
45.
],
[
32
/
45.
,
16
/
45.
,
8
/
45.
],
[
32
/
45.
,
-
16
/
45.
,
8
/
45.
],
[
0
,
0
,
1
],
[
1
,
0
,
0
],
[
-
2
/
9.
,
-
2
/
9.
,
-
2
/
9.
],
[
-
2
/
9.
,
2
/
9.
,
-
2
/
9.
],
[
1
/
90.
,
1
/
45.
,
2
/
45.
],
[
1
/
90.
,
-
1
/
45.
,
2
/
45.
],
[
32
/
45.
,
16
/
45.
,
8
/
45.
],
[
32
/
45.
,
-
16
/
45.
,
8
/
45.
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
G_T
[
8
]
=
np
.
transpose
(
G
[
8
])
...
...
@@ -112,7 +108,7 @@ def winograd_conv(m, r, input, filter):
for
c
in
range
(
C
):
u
=
np
.
dot
(
np
.
dot
(
G
[
alpha
],
filter
[
k
,
c
,
:,
:]),
G_T
[
alpha
])
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
)
:
for
j
in
range
(
alpha
)
:
U
[(
i
*
alpha
+
j
)
*
K
+
k
,
c
]
=
u
[
i
,
j
]
print
'filter out: '
,
U
.
shape
...
...
@@ -129,24 +125,24 @@ def winograd_conv(m, r, input, filter):
w_idx
=
t
%
rounded_w
h_start
=
h_idx
*
m
w_start
=
w_idx
*
m
h_end
=
min
(
h_start
+
alpha
,
input_shape
[
2
])
w_end
=
min
(
w_start
+
alpha
,
input_shape
[
3
])
h_end
=
min
(
h_start
+
alpha
,
input_shape
[
2
])
w_end
=
min
(
w_start
+
alpha
,
input_shape
[
3
])
d
=
np
.
zeros
((
alpha
,
alpha
))
d
[
0
:
h_end
-
h_start
,
0
:
w_end
-
w_start
]
=
\
input
[
n
,
c
,
h_start
:
h_end
,
w_start
:
w_end
]
v
=
np
.
dot
(
np
.
dot
(
B_T
[
alpha
],
d
),
B
[
alpha
])
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
):
V
[(
i
*
alpha
+
j
)
*
C
+
c
,
p
]
=
v
[
i
,
j
]
V
[(
i
*
alpha
+
j
)
*
C
+
c
,
p
]
=
v
[
i
,
j
]
tmp
=
V
.
reshape
(
alpha_square
,
C
,
P
,
1
)
print
'input out: '
,
tmp
.
shape
tmp
.
astype
(
np
.
float32
).
tofile
(
"C"
)
M
=
np
.
zeros
((
alpha_square
*
K
,
P
))
for
i
in
range
(
alpha_square
):
u
=
U
[
i
*
K
:
(
i
+
1
)
*
K
,
:]
v
=
V
[
i
*
C
:
(
i
+
1
)
*
C
,
:]
M
[
i
*
K
:
(
i
+
1
)
*
K
,
:]
=
np
.
dot
(
u
,
v
)
u
=
U
[
i
*
K
:(
i
+
1
)
*
K
,
:]
v
=
V
[
i
*
C
:(
i
+
1
)
*
C
,
:]
M
[
i
*
K
:(
i
+
1
)
*
K
,
:]
=
np
.
dot
(
u
,
v
)
print
'M shape: '
,
M
.
shape
M
.
astype
(
np
.
float32
).
tofile
(
"gemm"
)
...
...
@@ -156,7 +152,7 @@ def winograd_conv(m, r, input, filter):
tm
=
np
.
zeros
((
alpha
,
alpha
))
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
):
tm
[
i
][
j
]
=
M
[(
i
*
alpha
+
j
)
*
K
+
k
,
b
]
tm
[
i
][
j
]
=
M
[(
i
*
alpha
+
j
)
*
K
+
k
,
b
]
y
=
np
.
dot
(
np
.
dot
(
A_T
[
alpha
],
tm
),
A
[
alpha
])
for
i
in
range
(
m
):
for
j
in
range
(
m
):
...
...
@@ -173,6 +169,7 @@ def winograd_conv(m, r, input, filter):
return
res
def
tf_conv
(
input
,
filter
):
conv_op
=
tf
.
nn
.
conv2d
(
input
,
filter
,
[
1
,
1
,
1
,
1
],
'VALID'
)
with
tf
.
Session
()
as
sess
:
...
...
@@ -206,4 +203,3 @@ def main():
if
__name__
==
'__main__'
:
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录