Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
58f2516e
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
58f2516e
编写于
4月 10, 2018
作者:
叶
叶剑武
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'pycodestyle' into 'master'
Enable python style check See merge request !361
上级
e54825c5
6da30d22
变更
22
显示空白变更内容
内联
并排
Showing
22 changed file
with
4594 addition
and
4287 deletion
+4594
-4287
.gitlab-ci.yml
.gitlab-ci.yml
+7
-1
docker/Dockerfile
docker/Dockerfile
+2
-1
mace/python/tools/binary_codegen.py
mace/python/tools/binary_codegen.py
+62
-61
mace/python/tools/caffe_converter_lib.py
mace/python/tools/caffe_converter_lib.py
+1098
-1024
mace/python/tools/convert_util.py
mace/python/tools/convert_util.py
+0
-1
mace/python/tools/converter.py
mace/python/tools/converter.py
+149
-159
mace/python/tools/dsp_ops.py
mace/python/tools/dsp_ops.py
+60
-62
mace/python/tools/encrypt_opencl_codegen.py
mace/python/tools/encrypt_opencl_codegen.py
+62
-58
mace/python/tools/graph_util.py
mace/python/tools/graph_util.py
+7
-2
mace/python/tools/memory_optimizer.py
mace/python/tools/memory_optimizer.py
+123
-112
mace/python/tools/opencl_codegen.py
mace/python/tools/opencl_codegen.py
+77
-74
mace/python/tools/source_converter_lib.py
mace/python/tools/source_converter_lib.py
+176
-162
mace/python/tools/tf_converter_lib.py
mace/python/tools/tf_converter_lib.py
+1162
-1125
mace/python/tools/tf_dsp_converter_lib.py
mace/python/tools/tf_dsp_converter_lib.py
+472
-403
mace/python/tools/tf_ops_stats.py
mace/python/tools/tf_ops_stats.py
+162
-136
tools/bazel_adb_run.py
tools/bazel_adb_run.py
+99
-91
tools/falcon_cli.py
tools/falcon_cli.py
+12
-10
tools/generate_data.py
tools/generate_data.py
+30
-35
tools/mace_tools.py
tools/mace_tools.py
+378
-340
tools/sh_commands.py
tools/sh_commands.py
+152
-119
tools/validate.py
tools/validate.py
+152
-155
tools/wino_conv.py
tools/wino_conv.py
+152
-156
未找到文件。
.gitlab-ci.yml
浏览文件 @
58f2516e
stages
:
stages
:
-
cpplint
-
cpplint
-
pycodestyle
-
ops_test
-
ops_test
-
ops_benchmark
-
ops_benchmark
...
@@ -7,7 +8,12 @@ cpplint:
...
@@ -7,7 +8,12 @@ cpplint:
stage
:
cpplint
stage
:
cpplint
script
:
script
:
-
curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py
-
curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py
-
python cpplint.py --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc)
-
python cpplint.py --linelength=80 --counting=detailed $(find mace -name "*.h" -or -name "*.cc")
pycodestyle
:
stage
:
pycodestyle
script
:
-
pycodestyle $(find -name "*.py")
ops_test
:
ops_test
:
stage
:
ops_test
stage
:
ops_test
...
...
docker/Dockerfile
浏览文件 @
58f2516e
...
@@ -113,7 +113,8 @@ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
...
@@ -113,7 +113,8 @@ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
scipy
\
scipy
\
jinja2
\
jinja2
\
pyyaml
\
pyyaml
\
sh
sh
\
pycodestyle
# Download tensorflow tools
# Download tensorflow tools
RUN
wget http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph
&&
\
RUN
wget http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph
&&
\
...
...
mace/python/tools/binary_codegen.py
浏览文件 @
58f2516e
...
@@ -27,28 +27,30 @@ def generate_cpp_source():
...
@@ -27,28 +27,30 @@ def generate_cpp_source():
print
"Generate binary from"
,
binary_path
print
"Generate binary from"
,
binary_path
idx
=
0
idx
=
0
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
idx
+=
8
idx
+=
8
for
_
in
xrange
(
size
):
for
_
in
xrange
(
size
):
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
idx
+=
4
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
idx
+=
key_size
idx
+=
key_size
params_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
params_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
idx
+=
4
data_map
[
key
]
=
[]
data_map
[
key
]
=
[]
count
=
params_size
/
4
count
=
params_size
/
4
params
=
struct
.
unpack
(
str
(
count
)
+
"i"
,
binary_array
[
idx
:
idx
+
params_size
])
params
=
struct
.
unpack
(
str
(
count
)
+
"i"
,
binary_array
[
idx
:
idx
+
params_size
])
for
i
in
params
:
for
i
in
params
:
data_map
[
key
].
append
(
i
)
data_map
[
key
].
append
(
i
)
idx
+=
params_size
idx
+=
params_size
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
return
env
.
get_template
(
'str2vec_maps.cc.jinja2'
).
render
(
return
env
.
get_template
(
'str2vec_maps.cc.jinja2'
).
render
(
maps
=
data_map
,
maps
=
data_map
,
data_type
=
'unsigned int'
,
data_type
=
'unsigned int'
,
variable_name
=
FLAGS
.
variable_name
variable_name
=
FLAGS
.
variable_name
)
)
def
main
(
unused_args
):
def
main
(
unused_args
):
cpp_binary_source
=
generate_cpp_source
()
cpp_binary_source
=
generate_cpp_source
()
...
@@ -58,14 +60,12 @@ def main(unused_args):
...
@@ -58,14 +60,12 @@ def main(unused_args):
w_file
.
write
(
cpp_binary_source
)
w_file
.
write
(
cpp_binary_source
)
w_file
.
close
()
w_file
.
close
()
def
parse_args
():
def
parse_args
():
"""Parses command line arguments."""
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
parser
.
add_argument
(
"--binary_dirs"
,
"--binary_dirs"
,
type
=
str
,
default
=
""
,
help
=
"The binaries file path."
)
type
=
str
,
default
=
""
,
help
=
"The binaries file path."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--binary_file_name"
,
"--binary_file_name"
,
type
=
str
,
type
=
str
,
...
@@ -75,7 +75,8 @@ def parse_args():
...
@@ -75,7 +75,8 @@ def parse_args():
"--output_path"
,
"--output_path"
,
type
=
str
,
type
=
str
,
default
=
""
,
default
=
""
,
help
=
"The path of generated C++ source file which contains the binary."
)
help
=
"The path of generated C++ source file which contains the binary."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--variable_name"
,
"--variable_name"
,
type
=
str
,
type
=
str
,
...
...
mace/python/tools/caffe_converter_lib.py
浏览文件 @
58f2516e
...
@@ -5,32 +5,26 @@ import google.protobuf.text_format
...
@@ -5,32 +5,26 @@ import google.protobuf.text_format
import
numpy
as
np
import
numpy
as
np
import
math
import
math
pooling_type_mode
=
{
pooling_type_mode
=
{
'AvgPool'
:
1
,
'MaxPool'
:
2
}
'AvgPool'
:
1
,
'MaxPool'
:
2
}
buffer_type_map
=
{
buffer_type_map
=
{
'CONV2D_FILTER'
:
0
,
'CONV2D_FILTER'
:
0
,
'IN_OUT_CHANNEL'
:
1
,
'IN_OUT_CHANNEL'
:
1
,
'ARGUMENT'
:
2
,
'ARGUMENT'
:
2
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_WIDTH'
:
4
,
'IN_OUT_WIDTH'
:
4
,
'WINOGRAD_FILTER'
:
5
,
'WINOGRAD_FILTER'
:
5
,
'DW_CONV2D_FILTER'
:
6
,
'DW_CONV2D_FILTER'
:
6
,
'WEIGHT_HEIGHT'
:
7
,
'WEIGHT_HEIGHT'
:
7
,
'WEIGHT_WIDTH'
:
8
,
'WEIGHT_WIDTH'
:
8
,
}
}
data_type_map
=
{
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
activation_name_map
=
{
activation_name_map
=
{
'ReLU'
:
'RELU'
,
'ReLU'
:
'RELU'
,
'Sigmoid'
:
'SIGMOID'
,
'Sigmoid'
:
'SIGMOID'
,
'TanH'
:
'TANH'
,
'TanH'
:
'TANH'
,
}
}
MACE_INPUT_NODE_NAME
=
"mace_input_node"
MACE_INPUT_NODE_NAME
=
"mace_input_node"
...
@@ -38,6 +32,7 @@ MACE_OUTPUT_NODE_NAME = "mace_output_node"
...
@@ -38,6 +32,7 @@ MACE_OUTPUT_NODE_NAME = "mace_output_node"
OPENCL_IMAGE_MAX_SIZE
=
16384
OPENCL_IMAGE_MAX_SIZE
=
16384
class
Operator
(
object
):
class
Operator
(
object
):
def
__init__
(
self
,
name
,
type
,
layer
):
def
__init__
(
self
,
name
,
type
,
layer
):
self
.
name
=
name
self
.
name
=
name
...
@@ -54,37 +49,52 @@ class Operator(object):
...
@@ -54,37 +49,52 @@ class Operator(object):
def
get_single_parent
(
self
):
def
get_single_parent
(
self
):
if
len
(
self
.
parents
)
!=
1
:
if
len
(
self
.
parents
)
!=
1
:
raise
Exception
(
'Operation %s expected single parent, but got %s'
raise
Exception
(
'Operation %s expected single parent, but got %s'
%
%
(
self
.
name
,
len
(
self
.
parents
)))
(
self
.
name
,
len
(
self
.
parents
)))
return
self
.
parents
[
0
]
return
self
.
parents
[
0
]
def
BlobToNPArray
(
blob
):
def
BlobToNPArray
(
blob
):
if
blob
.
num
!=
0
:
if
blob
.
num
!=
0
:
return
(
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
return
(
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
reshape
(
(
blob
.
num
,
blob
.
channels
,
blob
.
height
,
blob
.
width
)))
(
blob
.
num
,
blob
.
channels
,
blob
.
height
,
blob
.
width
)))
else
:
else
:
return
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
blob
.
shape
.
dim
)
return
np
.
asarray
(
blob
.
data
,
dtype
=
np
.
float32
).
reshape
(
blob
.
shape
.
dim
)
class
Shapes
(
object
):
class
Shapes
(
object
):
@
staticmethod
@
staticmethod
def
conv_pool_shape
(
input_shape
,
filter_shape
,
paddings
,
strides
,
dilations
,
round_func
,
input_format
=
'NHWC'
):
def
conv_pool_shape
(
input_shape
,
filter_shape
,
paddings
,
strides
,
dilations
,
round_func
,
input_format
=
'NHWC'
):
output_shape
=
np
.
zeros_like
(
input_shape
)
output_shape
=
np
.
zeros_like
(
input_shape
)
output_shape
[
0
]
=
input_shape
[
0
]
output_shape
[
0
]
=
input_shape
[
0
]
if
input_format
==
'NHWC'
:
if
input_format
==
'NHWC'
:
# input format: NHWC, filter format: HWOI
# input format: NHWC, filter format: HWOI
output_shape
[
1
]
=
int
(
round_func
((
input_shape
[
1
]
+
paddings
[
0
]
-
filter_shape
[
0
]
output_shape
[
1
]
=
int
(
-
(
filter_shape
[
0
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
round_func
((
input_shape
[
1
]
+
paddings
[
0
]
-
filter_shape
[
0
]
-
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
1
]
-
filter_shape
[
1
]
(
filter_shape
[
0
]
-
1
)
*
-
(
filter_shape
[
1
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
1
]
-
filter_shape
[
1
]
-
(
filter_shape
[
1
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
output_shape
[
3
]
=
filter_shape
[
2
]
output_shape
[
3
]
=
filter_shape
[
2
]
elif
input_format
==
'NCHW'
:
elif
input_format
==
'NCHW'
:
# input format: NCHW, filter format: OIHW
# input format: NCHW, filter format: OIHW
output_shape
[
1
]
=
filter_shape
[
0
]
output_shape
[
1
]
=
filter_shape
[
0
]
output_shape
[
2
]
=
int
(
round_func
((
input_shape
[
2
]
+
paddings
[
0
]
-
filter_shape
[
2
]
output_shape
[
2
]
=
int
(
-
(
filter_shape
[
2
]
-
1
)
*
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
round_func
((
input_shape
[
2
]
+
paddings
[
0
]
-
filter_shape
[
2
]
-
output_shape
[
3
]
=
int
(
round_func
((
input_shape
[
3
]
+
paddings
[
1
]
-
filter_shape
[
3
]
(
filter_shape
[
2
]
-
1
)
*
-
(
filter_shape
[
3
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
(
dilations
[
0
]
-
1
))
/
float
(
strides
[
0
])))
+
1
output_shape
[
3
]
=
int
(
round_func
((
input_shape
[
3
]
+
paddings
[
1
]
-
filter_shape
[
3
]
-
(
filter_shape
[
3
]
-
1
)
*
(
dilations
[
1
]
-
1
))
/
float
(
strides
[
1
])))
+
1
else
:
else
:
raise
Exception
(
"format %s is not supported"
%
input_format
)
raise
Exception
(
"format %s is not supported"
%
input_format
)
...
@@ -107,12 +117,19 @@ class Shapes(object):
...
@@ -107,12 +117,19 @@ class Shapes(object):
@
staticmethod
@
staticmethod
def
slice_shape
(
input_shape
,
num_output
,
input_format
=
'NHWC'
):
def
slice_shape
(
input_shape
,
num_output
,
input_format
=
'NHWC'
):
if
input_format
==
'NHWC'
:
if
input_format
==
'NHWC'
:
return
[
input_shape
[
0
],
input_shape
[
1
],
input_shape
[
2
],
input_shape
[
3
]
/
num_output
]
return
[
input_shape
[
0
],
input_shape
[
1
],
input_shape
[
2
],
input_shape
[
3
]
/
num_output
]
elif
input_format
==
'NCHW'
:
elif
input_format
==
'NCHW'
:
return
[
input_shape
[
0
],
input_shape
[
1
]
/
num_output
,
input_shape
[
2
],
input_shape
[
3
]]
return
[
input_shape
[
0
],
input_shape
[
1
]
/
num_output
,
input_shape
[
2
],
input_shape
[
3
]
]
else
:
else
:
raise
Exception
(
"format %s is not supported"
%
input_format
)
raise
Exception
(
"format %s is not supported"
%
input_format
)
# outputs' name is [op.name + '_' + #]
# outputs' name is [op.name + '_' + #]
class
CaffeConverter
(
object
):
class
CaffeConverter
(
object
):
def
__init__
(
self
,
caffe_net
,
weights
,
net_def
,
dt
,
device
,
winograd
):
def
__init__
(
self
,
caffe_net
,
weights
,
net_def
,
dt
,
device
,
winograd
):
...
@@ -140,9 +157,10 @@ class CaffeConverter(object):
...
@@ -140,9 +157,10 @@ class CaffeConverter(object):
# Construct graph
# Construct graph
# Only support single-output layer
# Only support single-output layer
# layer with single output often use the same top name.
# layer with single output often use the same top name.
self
.
ops
.
extend
([
Operator
(
layer
.
name
,
layer
.
type
,
layer
)
for
layer
in
layers
])
self
.
ops
.
extend
(
[
Operator
(
layer
.
name
,
layer
.
type
,
layer
)
for
layer
in
layers
])
self
.
ops_map
=
{
op
.
name
:
op
for
op
in
self
.
ops
}
self
.
ops_map
=
{
op
.
name
:
op
for
op
in
self
.
ops
}
output_op_map
=
{}
output_op_map
=
{}
for
layer
in
layers
:
for
layer
in
layers
:
op
=
self
.
ops_map
[
layer
.
name
]
op
=
self
.
ops_map
[
layer
.
name
]
...
@@ -165,7 +183,6 @@ class CaffeConverter(object):
...
@@ -165,7 +183,6 @@ class CaffeConverter(object):
continue
continue
output_op_map
[
output_name
]
=
op
output_op_map
[
output_name
]
=
op
# Load weights
# Load weights
weights_layers
=
weights
.
layer
weights_layers
=
weights
.
layer
for
layer
in
weights_layers
:
for
layer
in
weights_layers
:
...
@@ -191,7 +208,7 @@ class CaffeConverter(object):
...
@@ -191,7 +208,7 @@ class CaffeConverter(object):
data_format_arg
.
s
=
'NHWC'
data_format_arg
.
s
=
'NHWC'
op_def
.
name
=
op
.
name
op_def
.
name
=
op
.
name
op_def
.
type
=
mace_type
op_def
.
type
=
mace_type
op_def
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
op_def
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
return
op_def
return
op_def
def
remove_unused_layers
(
self
,
layers
):
def
remove_unused_layers
(
self
,
layers
):
...
@@ -274,7 +291,7 @@ class CaffeConverter(object):
...
@@ -274,7 +291,7 @@ class CaffeConverter(object):
op_def
.
name
=
name
op_def
.
name
=
name
op_def
.
type
=
'BufferToImage'
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
name
+
':0'
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
epsilon_arg
.
name
=
'buffer_type'
...
@@ -290,7 +307,7 @@ class CaffeConverter(object):
...
@@ -290,7 +307,7 @@ class CaffeConverter(object):
op_def
=
self
.
net_def
.
op
.
add
()
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
op_def
.
output
.
extend
([
output_name
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
=
op_def
.
arg
.
add
()
...
@@ -315,11 +332,16 @@ class CaffeConverter(object):
...
@@ -315,11 +332,16 @@ class CaffeConverter(object):
def
add_stride_pad_kernel_arg
(
self
,
param
,
op_def
):
def
add_stride_pad_kernel_arg
(
self
,
param
,
op_def
):
try
:
try
:
if
len
(
param
.
stride
)
>
1
or
len
(
param
.
kernel_size
)
>
1
or
len
(
param
.
pad
)
>
1
:
if
len
(
param
.
stride
)
>
1
or
len
(
param
.
kernel_size
)
>
1
or
len
(
raise
Exception
(
'Mace does not support multiple stride/kernel_size/pad'
)
param
.
pad
)
>
1
:
stride
=
[
param
.
stride
[
0
],
param
.
stride
[
0
]]
if
len
(
param
.
stride
)
else
[
1
,
1
]
raise
Exception
(
pad
=
[
param
.
pad
[
0
]
*
2
,
param
.
pad
[
0
]
*
2
]
if
len
(
param
.
pad
)
else
[
0
,
0
]
'Mace does not support multiple stride/kernel_size/pad'
)
kernel
=
[
param
.
kernel_size
[
0
],
param
.
kernel_size
[
0
]]
if
len
(
param
.
kernel_size
)
else
[
0
,
0
]
stride
=
[
param
.
stride
[
0
],
param
.
stride
[
0
]]
if
len
(
param
.
stride
)
else
[
1
,
1
]
pad
=
[
param
.
pad
[
0
]
*
2
,
param
.
pad
[
0
]
*
2
]
if
len
(
param
.
pad
)
else
[
0
,
0
]
kernel
=
[
param
.
kernel_size
[
0
],
param
.
kernel_size
[
0
]]
if
len
(
param
.
kernel_size
)
else
[
0
,
0
]
except
TypeError
:
except
TypeError
:
stride
=
[
param
.
stride
,
param
.
stride
]
stride
=
[
param
.
stride
,
param
.
stride
]
pad
=
[
param
.
pad
*
2
,
param
.
pad
*
2
]
pad
=
[
param
.
pad
*
2
,
param
.
pad
*
2
]
...
@@ -370,8 +392,10 @@ class CaffeConverter(object):
...
@@ -370,8 +392,10 @@ class CaffeConverter(object):
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
buffer_type
=
"DW_CONV2D_FILTER"
if
is_depthwise
else
"CONV2D_FILTER"
buffer_type
=
"DW_CONV2D_FILTER"
\
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
if
is_depthwise
else
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
op_def
.
input
.
extend
([
output_name
])
else
:
else
:
op_def
.
input
.
extend
([
weight_tensor_name
])
op_def
.
input
.
extend
([
weight_tensor_name
])
...
@@ -382,7 +406,8 @@ class CaffeConverter(object):
...
@@ -382,7 +406,8 @@ class CaffeConverter(object):
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
op_def
.
input
.
extend
([
output_name
])
else
:
else
:
op_def
.
input
.
extend
([
bias_tensor_name
])
op_def
.
input
.
extend
([
bias_tensor_name
])
...
@@ -401,14 +426,15 @@ class CaffeConverter(object):
...
@@ -401,14 +426,15 @@ class CaffeConverter(object):
self
.
resolved_ops
.
add
(
op
.
name
)
self
.
resolved_ops
.
add
(
op
.
name
)
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]],
output_shape
=
Shapes
.
conv_pool_shape
(
weight_data
.
shape
,
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
,
paddings
,
strides
,
dilations
,
weight_data
.
shape
,
paddings
,
strides
,
dilations
,
math
.
floor
,
math
.
floor
,
input_format
)
input_format
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
and
\
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
in
activation_name_map
:
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
if
not
is_depthwise
:
if
not
is_depthwise
:
op_def
.
type
=
"FusedConv2D"
op_def
.
type
=
"FusedConv2D"
...
@@ -419,7 +445,7 @@ class CaffeConverter(object):
...
@@ -419,7 +445,7 @@ class CaffeConverter(object):
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
self
.
resolved_ops
.
add
(
activation_op
.
name
)
op_def
.
output
.
extend
([
final_op
.
name
+
':0'
])
op_def
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
net_def
.
op
.
extend
([
op_def
])
...
@@ -443,17 +469,22 @@ class CaffeConverter(object):
...
@@ -443,17 +469,22 @@ class CaffeConverter(object):
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
output_shape
=
Shapes
.
conv_pool_shape
(
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]],
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]],
filter_shape
,
paddings
,
strides
,
dilations
,
math
.
floor
,
input_format
)
filter_shape
,
paddings
,
strides
,
dilations
,
math
.
floor
,
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
input_format
)
if
self
.
winograd
and
dilations
[
0
]
==
1
and
(
dilations
[
0
]
==
dilations
[
1
])
and
\
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
if
self
.
winograd
and
dilations
[
0
]
==
1
and
\
(
dilations
[
0
]
==
dilations
[
1
])
and
\
(
strides
[
0
]
==
1
)
and
(
strides
[
0
]
==
strides
[
1
]):
(
strides
[
0
]
==
1
)
and
(
strides
[
0
]
==
strides
[
1
]):
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
return
filter_shape
[
0
]
==
3
and
(
filter_shape
[
0
]
==
filter_shape
[
1
])
and
\
return
filter_shape
[
0
]
==
3
and
\
(
filter_shape
[
0
]
==
filter_shape
[
1
])
and
\
(
16
*
filter_shape
[
2
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
2
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
3
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
3
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
width
<
OPENCL_IMAGE_MAX_SIZE
)
(
width
<
OPENCL_IMAGE_MAX_SIZE
)
elif
self
.
device
==
'neon'
:
elif
self
.
device
==
'neon'
:
return
filter_shape
[
2
]
==
3
and
(
filter_shape
[
2
]
==
filter_shape
[
3
])
return
filter_shape
[
2
]
==
3
and
(
filter_shape
[
2
]
==
filter_shape
[
3
])
return
False
return
False
def
convert_winograd_conv
(
self
,
op
):
def
convert_winograd_conv
(
self
,
op
):
...
@@ -486,16 +517,20 @@ class CaffeConverter(object):
...
@@ -486,16 +517,20 @@ class CaffeConverter(object):
padding_arg
.
ints
.
extend
(
paddings
)
padding_arg
.
ints
.
extend
(
paddings
)
wt_op
.
name
=
op
.
name
+
'_input_transform'
wt_op
.
name
=
op
.
name
+
'_input_transform'
wt_op
.
type
=
'WinogradTransform'
wt_op
.
type
=
'WinogradTransform'
wt_op
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
wt_op
.
input
.
extend
([
name
+
':0'
for
name
in
self
.
inputs_map
[
op
.
name
]])
wt_output_name
=
wt_op
.
name
+
":0"
wt_output_name
=
wt_op
.
name
+
":0"
wt_op
.
output
.
extend
([
wt_output_name
])
wt_op
.
output
.
extend
([
wt_output_name
])
wt_output_shape
=
mace_pb2
.
OutputShape
()
wt_output_shape
=
mace_pb2
.
OutputShape
()
if
self
.
device
!=
'neon'
:
if
self
.
device
!=
'neon'
:
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_width
=
output_shape
[
0
]
*
((
wt_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
else
:
else
:
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
2
]
+
1
)
/
2
)
*
((
output_shape
[
3
]
+
1
)
/
2
)
wt_output_width
=
output_shape
[
0
]
*
((
wt_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
1
],
wt_output_width
,
1
])
output_shape
[
2
]
+
1
)
/
2
)
*
((
output_shape
[
3
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
1
],
wt_output_width
,
1
])
wt_op
.
output_shape
.
extend
([
wt_output_shape
])
wt_op
.
output_shape
.
extend
([
wt_output_shape
])
# MatMul
# MatMul
...
@@ -510,9 +545,11 @@ class CaffeConverter(object):
...
@@ -510,9 +545,11 @@ class CaffeConverter(object):
matmul_op
.
output
.
extend
([
matmul_output_name
])
matmul_op
.
output
.
extend
([
matmul_output_name
])
matmul_output_shape
=
mace_pb2
.
OutputShape
()
matmul_output_shape
=
mace_pb2
.
OutputShape
()
if
self
.
device
!=
'neon'
:
if
self
.
device
!=
'neon'
:
matmul_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
2
],
wt_output_width
,
1
])
matmul_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
2
],
wt_output_width
,
1
])
else
:
else
:
matmul_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
0
],
wt_output_width
,
1
])
matmul_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
0
],
wt_output_width
,
1
])
matmul_op
.
output_shape
.
extend
([
matmul_output_shape
])
matmul_op
.
output_shape
.
extend
([
matmul_output_shape
])
# Inverse transform
# Inverse transform
...
@@ -525,10 +562,12 @@ class CaffeConverter(object):
...
@@ -525,10 +562,12 @@ class CaffeConverter(object):
batch_arg
.
i
=
output_shape
[
0
]
batch_arg
.
i
=
output_shape
[
0
]
height_arg
=
iwt_op
.
arg
.
add
()
height_arg
=
iwt_op
.
arg
.
add
()
height_arg
.
name
=
'height'
height_arg
.
name
=
'height'
height_arg
.
i
=
output_shape
[
1
]
if
self
.
device
!=
'neon'
else
output_shape
[
2
]
height_arg
.
i
=
output_shape
[
1
]
if
self
.
device
!=
'neon'
else
output_shape
[
2
]
width_arg
=
iwt_op
.
arg
.
add
()
width_arg
=
iwt_op
.
arg
.
add
()
width_arg
.
name
=
'width'
width_arg
.
name
=
'width'
width_arg
.
i
=
output_shape
[
2
]
if
self
.
device
!=
'neon'
else
output_shape
[
3
]
width_arg
.
i
=
output_shape
[
2
]
if
self
.
device
!=
'neon'
else
output_shape
[
3
]
iwt_op
.
name
=
op
.
name
+
'_inverse_transform'
iwt_op
.
name
=
op
.
name
+
'_inverse_transform'
iwt_op
.
type
=
'WinogradInverseTransform'
iwt_op
.
type
=
'WinogradInverseTransform'
iwt_op
.
input
.
extend
([
matmul_output_name
])
iwt_op
.
input
.
extend
([
matmul_output_name
])
...
@@ -538,15 +577,17 @@ class CaffeConverter(object):
...
@@ -538,15 +577,17 @@ class CaffeConverter(object):
bias_tensor_name
=
op
.
name
+
'_bias:0'
bias_tensor_name
=
op
.
name
+
'_bias:0'
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
iwt_op
.
input
.
extend
([
output_name
])
iwt_op
.
input
.
extend
([
output_name
])
final_op
=
op
final_op
=
op
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
op
.
name
)
self
.
resolved_ops
.
add
(
op
.
name
)
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
and
\
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
in
activation_name_map
:
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
iwt_op
.
arg
.
add
()
fused_act_arg
=
iwt_op
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
name
=
'activation'
...
@@ -555,7 +596,7 @@ class CaffeConverter(object):
...
@@ -555,7 +596,7 @@ class CaffeConverter(object):
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
final_op
.
output_shape_map
[
final_op
.
layer
.
top
[
0
]]
=
output_shape
self
.
resolved_ops
.
add
(
activation_op
.
name
)
self
.
resolved_ops
.
add
(
activation_op
.
name
)
iwt_op
.
output
.
extend
([
final_op
.
name
+
':0'
])
iwt_op
.
output
.
extend
([
final_op
.
name
+
':0'
])
self
.
add_output_shape
(
iwt_op
,
output_shape
)
self
.
add_output_shape
(
iwt_op
,
output_shape
)
self
.
net_def
.
op
.
extend
([
wt_op
,
matmul_op
,
iwt_op
])
self
.
net_def
.
op
.
extend
([
wt_op
,
matmul_op
,
iwt_op
])
...
@@ -577,11 +618,11 @@ class CaffeConverter(object):
...
@@ -577,11 +618,11 @@ class CaffeConverter(object):
if
len
(
scale_op
.
data
)
==
2
:
if
len
(
scale_op
.
data
)
==
2
:
beta_value
=
scale_op
.
data
[
1
]
beta_value
=
scale_op
.
data
[
1
]
scale_value
=
(
scale_value
=
(
(
(
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
gamma_value
).
reshape
(
-
1
)
gamma_value
).
reshape
(
-
1
)
offset_value
=
((
-
mean_value
*
scale_value
)
+
beta_value
).
reshape
(
-
1
)
offset_value
=
((
-
mean_value
*
scale_value
)
+
beta_value
).
reshape
(
-
1
)
input_names
=
[
op
.
name
+
'_scale:0'
,
op
.
name
+
'_offset:0'
]
input_names
=
[
op
.
name
+
'_scale:0'
,
op
.
name
+
'_offset:0'
]
self
.
add_tensor
(
input_names
[
0
],
scale_value
)
self
.
add_tensor
(
input_names
[
0
],
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
offset_value
)
self
.
add_tensor
(
input_names
[
1
],
offset_value
)
...
@@ -596,10 +637,12 @@ class CaffeConverter(object):
...
@@ -596,10 +637,12 @@ class CaffeConverter(object):
self
.
resolved_ops
.
add
(
scale_op
.
name
)
self
.
resolved_ops
.
add
(
scale_op
.
name
)
final_op
=
scale_op
final_op
=
scale_op
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
in
activation_name_map
:
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
name
=
'activation'
...
@@ -616,7 +659,8 @@ class CaffeConverter(object):
...
@@ -616,7 +659,8 @@ class CaffeConverter(object):
param
=
op
.
layer
.
inner_product_param
param
=
op
.
layer
.
inner_product_param
try
:
try
:
if
param
.
axis
!=
1
or
param
.
transpose
:
if
param
.
axis
!=
1
or
param
.
transpose
:
raise
ValueError
(
'Do not support non-default axis and transpose '
raise
ValueError
(
'Do not support non-default axis and transpose '
'case for innner product'
)
'case for innner product'
)
except
AttributeError
:
except
AttributeError
:
pass
pass
...
@@ -626,20 +670,26 @@ class CaffeConverter(object):
...
@@ -626,20 +670,26 @@ class CaffeConverter(object):
if
op
.
data
[
0
].
ndim
not
in
[
2
,
4
]:
if
op
.
data
[
0
].
ndim
not
in
[
2
,
4
]:
raise
ValueError
(
'Unexpected weigth ndim.'
)
raise
ValueError
(
'Unexpected weigth ndim.'
)
if
op
.
data
[
0
].
ndim
==
4
and
list
(
op
.
data
[
0
].
shape
[:
2
])
!=
[
1
,
1
]:
if
op
.
data
[
0
].
ndim
==
4
and
list
(
op
.
data
[
0
].
shape
[:
2
])
!=
[
1
,
1
]:
raise
ValueError
(
'Do not support 4D weight with shape [1, 1, *, *]'
)
raise
ValueError
(
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
'Do not support 4D weight with shape [1, 1, *, *]'
)
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
weight_data
=
op
.
data
[
0
].
reshape
(
-
1
,
op
.
data
[
0
].
shape
[
-
1
])
weight_data
=
op
.
data
[
0
].
reshape
(
-
1
,
op
.
data
[
0
].
shape
[
-
1
])
assert
weight_data
.
shape
[
1
]
==
(
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
])
assert
weight_data
.
shape
[
1
]
==
(
input_shape
[
1
]
*
input_shape
[
2
]
*
input_shape
[
3
])
if
self
.
device
!=
'neon'
:
if
self
.
device
!=
'neon'
:
weight_data
=
weight_data
.
reshape
(
-
1
,
input_shape
[
3
],
input_shape
[
1
],
input_shape
[
2
])
weight_data
=
weight_data
.
reshape
(
-
1
,
input_shape
[
3
],
weight_data
=
weight_data
.
transpose
((
0
,
2
,
3
,
1
)).
reshape
(
weight_data
.
shape
[
0
],
-
1
)
input_shape
[
1
],
input_shape
[
2
])
weight_data
=
weight_data
.
transpose
((
0
,
2
,
3
,
1
)).
reshape
(
weight_data
.
shape
[
0
],
-
1
)
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
self
.
add_tensor
(
weight_tensor_name
,
weight_data
)
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
if
(
weight_data
.
shape
[
0
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
\
if
(
weight_data
.
shape
[
0
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
and
\
and
(
weight_data
.
shape
[
1
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
:
(
weight_data
.
shape
[
1
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
:
raise
Exception
(
'Mace gpu do not support FC with weight shape: '
raise
Exception
(
+
str
(
weight_data
.
shape
))
'Mace gpu do not support FC with weight shape: '
+
str
(
weight_data
.
shape
))
if
input_shape
[
3
]
%
4
==
0
:
if
input_shape
[
3
]
%
4
==
0
:
buffer_type
=
"WEIGHT_WIDTH"
buffer_type
=
"WEIGHT_WIDTH"
else
:
else
:
...
@@ -650,9 +700,11 @@ class CaffeConverter(object):
...
@@ -650,9 +700,11 @@ class CaffeConverter(object):
if
buffer_type
==
"WEIGHT_HEIGHT"
and
\
if
buffer_type
==
"WEIGHT_HEIGHT"
and
\
(
weight_data
.
shape
[
0
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
:
(
weight_data
.
shape
[
0
]
+
3
)
/
4
>
OPENCL_IMAGE_MAX_SIZE
:
raise
Exception
(
'Mace gpu do not support FC with weight shape: '
raise
Exception
(
+
str
(
weight_data
.
shape
))
'Mace gpu do not support FC with weight shape: '
+
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
str
(
weight_data
.
shape
))
output_name
=
self
.
add_buffer_to_image
(
weight_tensor_name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
op_def
.
input
.
extend
([
output_name
])
else
:
else
:
op_def
.
input
.
extend
([
weight_tensor_name
])
op_def
.
input
.
extend
([
weight_tensor_name
])
...
@@ -663,18 +715,21 @@ class CaffeConverter(object):
...
@@ -663,18 +715,21 @@ class CaffeConverter(object):
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
bias_data
=
op
.
data
[
1
].
reshape
(
-
1
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
self
.
add_tensor
(
bias_tensor_name
,
bias_data
)
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
bias_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
op_def
.
input
.
extend
([
output_name
])
else
:
else
:
op_def
.
input
.
extend
([
bias_tensor_name
])
op_def
.
input
.
extend
([
bias_tensor_name
])
self
.
resolved_ops
.
add
(
op
.
name
)
self
.
resolved_ops
.
add
(
op
.
name
)
output_shape
=
Shapes
.
fully_connected_shape
(
input_shape
,
weight_data
.
shape
)
output_shape
=
Shapes
.
fully_connected_shape
(
input_shape
,
weight_data
.
shape
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
final_op
=
op
final_op
=
op
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
if
len
(
self
.
ops_map
[
final_op
.
name
].
children
)
==
1
\
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
in
activation_name_map
:
and
self
.
ops_map
[
final_op
.
name
].
children
[
0
].
type
\
in
activation_name_map
:
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
activation_op
=
self
.
ops_map
[
final_op
.
name
].
children
[
0
]
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
=
op_def
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
name
=
'activation'
...
@@ -691,7 +746,8 @@ class CaffeConverter(object):
...
@@ -691,7 +746,8 @@ class CaffeConverter(object):
op_def
=
self
.
CommonConvert
(
op
,
'Pooling'
)
op_def
=
self
.
CommonConvert
(
op
,
'Pooling'
)
param
=
op
.
layer
.
pooling_param
param
=
op
.
layer
.
pooling_param
paddings
,
strides
,
kernels
=
self
.
add_stride_pad_kernel_arg
(
param
,
op_def
)
paddings
,
strides
,
kernels
=
self
.
add_stride_pad_kernel_arg
(
param
,
op_def
)
if
param
.
pool
==
caffe_pb2
.
PoolingParameter
.
MAX
:
if
param
.
pool
==
caffe_pb2
.
PoolingParameter
.
MAX
:
pooling_type
=
"MaxPool"
pooling_type
=
"MaxPool"
elif
param
.
pool
==
caffe_pb2
.
PoolingParameter
.
AVE
:
elif
param
.
pool
==
caffe_pb2
.
PoolingParameter
.
AVE
:
...
@@ -700,7 +756,8 @@ class CaffeConverter(object):
...
@@ -700,7 +756,8 @@ class CaffeConverter(object):
pooling_type_arg
.
name
=
'pooling_type'
pooling_type_arg
.
name
=
'pooling_type'
pooling_type_arg
.
i
=
pooling_type_mode
[
pooling_type
]
pooling_type_arg
.
i
=
pooling_type_mode
[
pooling_type
]
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
input_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
if
param
.
HasField
(
'global_pooling'
)
and
param
.
global_pooling
:
if
param
.
HasField
(
'global_pooling'
)
and
param
.
global_pooling
:
kernels
=
[
input_shape
[
1
],
input_shape
[
2
]]
kernels
=
[
input_shape
[
1
],
input_shape
[
2
]]
...
@@ -708,12 +765,18 @@ class CaffeConverter(object):
...
@@ -708,12 +765,18 @@ class CaffeConverter(object):
kernel_arg
.
name
=
'kernels'
kernel_arg
.
name
=
'kernels'
kernel_arg
.
ints
.
extend
(
kernels
)
kernel_arg
.
ints
.
extend
(
kernels
)
filter_shape
=
[
kernels
[
0
],
kernels
[
1
],
input_shape
[
3
],
input_shape
[
3
]]
\
if
self
.
device
!=
'neon'
:
if
self
.
device
!=
'neon'
else
\
filter_shape
=
[
[
input_shape
[
1
],
input_shape
[
1
],
kernels
[
0
],
kernels
[
1
]]
kernels
[
0
],
kernels
[
1
],
input_shape
[
3
],
input_shape
[
3
]
]
else
:
filter_shape
=
[
input_shape
[
1
],
input_shape
[
1
],
kernels
[
0
],
kernels
[
1
]
]
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
output_shape
=
Shapes
.
conv_pool_shape
(
input_shape
,
filter_shape
,
output_shape
=
Shapes
.
conv_pool_shape
(
input_shape
,
filter_shape
,
paddings
,
strides
,
[
1
,
1
],
math
.
ceil
,
input_format
)
paddings
,
strides
,
[
1
,
1
],
math
.
ceil
,
input_format
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
...
@@ -727,7 +790,8 @@ class CaffeConverter(object):
...
@@ -727,7 +790,8 @@ class CaffeConverter(object):
activation_arg
.
name
=
'activation'
activation_arg
.
name
=
'activation'
activation_arg
.
s
=
activation_name_map
[
op
.
type
]
activation_arg
.
s
=
activation_name_map
[
op
.
type
]
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
net_def
.
op
.
extend
([
op_def
])
...
@@ -742,12 +806,14 @@ class CaffeConverter(object):
...
@@ -742,12 +806,14 @@ class CaffeConverter(object):
alpha_data
=
op
.
data
[
0
].
reshape
(
-
1
)
alpha_data
=
op
.
data
[
0
].
reshape
(
-
1
)
self
.
add_tensor
(
alpha_tensor_name
,
alpha_data
)
self
.
add_tensor
(
alpha_tensor_name
,
alpha_data
)
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
alpha_tensor_name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
alpha_tensor_name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
op_def
.
input
.
extend
([
output_name
])
else
:
else
:
op_def
.
input
.
extend
([
alpha_tensor_name
])
op_def
.
input
.
extend
([
alpha_tensor_name
])
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
op_def
.
output
.
extend
([
op
.
name
+
':0'
])
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
output_shape
=
op
.
get_single_parent
().
output_shape_map
[
op
.
layer
.
bottom
[
0
]]
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
net_def
.
op
.
extend
([
op_def
])
self
.
net_def
.
op
.
extend
([
op_def
])
...
@@ -777,7 +843,8 @@ class CaffeConverter(object):
...
@@ -777,7 +843,8 @@ class CaffeConverter(object):
input_shapes
=
[]
input_shapes
=
[]
for
i
in
range
(
len
(
op
.
parents
)):
for
i
in
range
(
len
(
op
.
parents
)):
input_shapes
.
append
(
op
.
parents
[
i
].
output_shape_map
[
op
.
layer
.
bottom
[
i
]])
input_shapes
.
append
(
op
.
parents
[
i
].
output_shape_map
[
op
.
layer
.
bottom
[
i
]])
output_shape
=
Shapes
.
concat_shape
(
input_shapes
,
axis_arg
.
i
)
output_shape
=
Shapes
.
concat_shape
(
input_shapes
,
axis_arg
.
i
)
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
op
.
output_shape_map
[
op
.
layer
.
top
[
0
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
add_output_shape
(
op_def
,
output_shape
)
...
@@ -808,7 +875,8 @@ class CaffeConverter(object):
...
@@ -808,7 +875,8 @@ class CaffeConverter(object):
if
op
.
layer
.
HasField
(
'slice_param'
):
if
op
.
layer
.
HasField
(
'slice_param'
):
param
=
op
.
layer
.
slice_param
param
=
op
.
layer
.
slice_param
if
param
.
HasField
(
'axis'
)
and
param
.
axis
!=
1
:
if
param
.
HasField
(
'axis'
)
and
param
.
axis
!=
1
:
raise
Exception
(
'Mace do not support slice with axis '
+
str
(
param
.
axis
))
raise
Exception
(
'Mace do not support slice with axis '
+
str
(
param
.
axis
))
if
len
(
param
.
slice_point
)
>
0
:
if
len
(
param
.
slice_point
)
>
0
:
raise
Exception
(
'Mace do not support slice with slice_point'
)
raise
Exception
(
'Mace do not support slice with slice_point'
)
...
@@ -820,11 +888,14 @@ class CaffeConverter(object):
...
@@ -820,11 +888,14 @@ class CaffeConverter(object):
num_outputs
=
len
(
op
.
layer
.
top
)
num_outputs
=
len
(
op
.
layer
.
top
)
input_channels
=
input_shape
[
axis_arg
.
i
]
input_channels
=
input_shape
[
axis_arg
.
i
]
if
(
input_channels
%
num_outputs
)
!=
0
or
\
if
(
input_channels
%
num_outputs
)
!=
0
or
\
(
self
.
device
==
'gpu'
and
((
input_channels
/
num_outputs
)
%
4
!=
0
)):
(
self
.
device
==
'gpu'
and
raise
Exception
(
'Mace do not support slice with input shape '
((
input_channels
/
num_outputs
)
%
4
!=
0
)):
+
str
(
input_shape
)
+
' and number of output '
+
str
(
num_outputs
))
raise
Exception
(
'Mace do not support slice with input shape '
+
str
(
input_shape
)
+
' and number of output '
+
str
(
num_outputs
))
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
input_format
=
'NCHW'
if
self
.
device
==
'neon'
else
'NHWC'
output_shape
=
Shapes
.
slice_shape
(
input_shape
,
num_outputs
,
input_format
)
output_shape
=
Shapes
.
slice_shape
(
input_shape
,
num_outputs
,
input_format
)
for
i
in
range
(
len
(
op
.
layer
.
top
)):
for
i
in
range
(
len
(
op
.
layer
.
top
)):
op
.
output_shape_map
[
op
.
layer
.
top
[
i
]]
=
output_shape
op
.
output_shape_map
[
op
.
layer
.
top
[
i
]]
=
output_shape
self
.
add_output_shape
(
op_def
,
output_shape
)
self
.
add_output_shape
(
op_def
,
output_shape
)
...
@@ -925,7 +996,8 @@ class CaffeConverter(object):
...
@@ -925,7 +996,8 @@ class CaffeConverter(object):
for
i
in
range
(
len
(
input_nodes
)):
for
i
in
range
(
len
(
input_nodes
)):
input_op
=
self
.
ops_map
[
input_nodes
[
i
]]
input_op
=
self
.
ops_map
[
input_nodes
[
i
]]
input_shape
=
input_shapes
[
i
]
if
self
.
device
!=
'neon'
else
\
input_shape
=
input_shapes
[
i
]
if
self
.
device
!=
'neon'
else
\
[
input_shapes
[
i
][
0
],
input_shapes
[
i
][
3
],
input_shapes
[
i
][
1
],
input_shapes
[
i
][
2
]]
[
input_shapes
[
i
][
0
],
input_shapes
[
i
][
3
],
input_shapes
[
i
][
1
],
input_shapes
[
i
][
2
]]
if
input_op
.
layer
is
not
None
:
if
input_op
.
layer
is
not
None
:
input_op
.
output_shape_map
[
input_op
.
layer
.
top
[
0
]]
=
input_shape
input_op
.
output_shape_map
[
input_op
.
layer
.
top
[
0
]]
=
input_shape
else
:
else
:
...
@@ -938,7 +1010,7 @@ class CaffeConverter(object):
...
@@ -938,7 +1010,7 @@ class CaffeConverter(object):
op_def
.
name
=
name
op_def
.
name
=
name
op_def
.
type
=
'Transpose'
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
name
+
':0'
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
dims_arg
.
name
=
'dims'
...
@@ -954,7 +1026,7 @@ class CaffeConverter(object):
...
@@ -954,7 +1026,7 @@ class CaffeConverter(object):
op_def
=
self
.
net_def
.
op
.
add
()
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'Transpose'
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
op_def
.
output
.
extend
([
output_name
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
=
op_def
.
arg
.
add
()
...
@@ -1008,7 +1080,8 @@ class CaffeConverter(object):
...
@@ -1008,7 +1080,8 @@ class CaffeConverter(object):
elif
op
.
type
in
[
'Softmax'
]:
elif
op
.
type
in
[
'Softmax'
]:
self
.
convert_normal_op
(
op
)
self
.
convert_normal_op
(
op
)
else
:
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
self
.
add_output_transform
(
output_nodes
)
self
.
add_output_transform
(
output_nodes
)
...
@@ -1024,8 +1097,9 @@ class CaffeConverter(object):
...
@@ -1024,8 +1097,9 @@ class CaffeConverter(object):
print
'Unresolve Op: %s with type %s'
%
(
op
.
name
,
op
.
type
)
print
'Unresolve Op: %s with type %s'
%
(
op
.
name
,
op
.
type
)
def
convert_to_mace_pb
(
model_file
,
weight_file
,
input_node_str
,
input_shape_str
,
def
convert_to_mace_pb
(
model_file
,
weight_file
,
input_node_str
,
output_node_str
,
data_type
,
device
,
winograd
):
input_shape_str
,
output_node_str
,
data_type
,
device
,
winograd
):
net_def
=
mace_pb2
.
NetDef
()
net_def
=
mace_pb2
.
NetDef
()
dt
=
data_type_map
[
data_type
]
dt
=
data_type_map
[
data_type
]
...
@@ -1046,7 +1120,8 @@ def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str,
...
@@ -1046,7 +1120,8 @@ def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str,
output_nodes
=
[
x
for
x
in
output_node_str
.
split
(
','
)]
output_nodes
=
[
x
for
x
in
output_node_str
.
split
(
','
)]
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
converter
=
CaffeConverter
(
caffe_net
,
weights
,
net_def
,
dt
,
device
,
winograd
)
converter
=
CaffeConverter
(
caffe_net
,
weights
,
net_def
,
dt
,
device
,
winograd
)
converter
.
convert
(
input_nodes
,
input_shapes
,
output_nodes
)
converter
.
convert
(
input_nodes
,
input_shapes
,
output_nodes
)
print
"PB Converted."
print
"PB Converted."
if
device
==
'gpu'
:
if
device
==
'gpu'
:
...
@@ -1056,4 +1131,3 @@ def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str,
...
@@ -1056,4 +1131,3 @@ def convert_to_mace_pb(model_file, weight_file, input_node_str, input_shape_str,
print
"Memory optimization done."
print
"Memory optimization done."
return
net_def
return
net_def
mace/python/tools/convert_util.py
浏览文件 @
58f2516e
...
@@ -26,4 +26,3 @@ def tf_dtype_2_mace_dtype(tf_dtype):
...
@@ -26,4 +26,3 @@ def tf_dtype_2_mace_dtype(tf_dtype):
if
not
mace_dtype
:
if
not
mace_dtype
:
raise
Exception
(
"Not supported tensorflow dtype: "
+
tf_dtype
)
raise
Exception
(
"Not supported tensorflow dtype: "
+
tf_dtype
)
return
mace_dtype
return
mace_dtype
mace/python/tools/converter.py
浏览文件 @
58f2516e
...
@@ -4,10 +4,14 @@ import hashlib
...
@@ -4,10 +4,14 @@ import hashlib
import
os.path
import
os.path
from
mace.python.tools
import
source_converter_lib
from
mace.python.tools
import
source_converter_lib
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb --output quantized_test_dsp.pb --runtime dsp --input_dim input_node,1,28,28,3
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
# --output quantized_test_dsp.pb \
# --runtime dsp \
# --input_dim input_node,1,28,28,3
FLAGS
=
None
FLAGS
=
None
def
file_checksum
(
fname
):
def
file_checksum
(
fname
):
hash_func
=
hashlib
.
sha256
()
hash_func
=
hashlib
.
sha256
()
with
open
(
fname
,
"rb"
)
as
f
:
with
open
(
fname
,
"rb"
)
as
f
:
...
@@ -15,6 +19,7 @@ def file_checksum(fname):
...
@@ -15,6 +19,7 @@ def file_checksum(fname):
hash_func
.
update
(
chunk
)
hash_func
.
update
(
chunk
)
return
hash_func
.
hexdigest
()
return
hash_func
.
hexdigest
()
def
main
(
unused_args
):
def
main
(
unused_args
):
if
not
os
.
path
.
isfile
(
FLAGS
.
model_file
):
if
not
os
.
path
.
isfile
(
FLAGS
.
model_file
):
print
(
"Input graph file '"
+
FLAGS
.
model_file
+
"' does not exist!"
)
print
(
"Input graph file '"
+
FLAGS
.
model_file
+
"' does not exist!"
)
...
@@ -22,17 +27,21 @@ def main(unused_args):
...
@@ -22,17 +27,21 @@ def main(unused_args):
model_checksum
=
file_checksum
(
FLAGS
.
model_file
)
model_checksum
=
file_checksum
(
FLAGS
.
model_file
)
if
FLAGS
.
model_checksum
!=
""
and
FLAGS
.
model_checksum
!=
model_checksum
:
if
FLAGS
.
model_checksum
!=
""
and
FLAGS
.
model_checksum
!=
model_checksum
:
print
(
"Model checksum mismatch: %s != %s"
%
(
model_checksum
,
FLAGS
.
model_checksum
))
print
(
"Model checksum mismatch: %s != %s"
%
(
model_checksum
,
FLAGS
.
model_checksum
))
sys
.
exit
(
-
1
)
sys
.
exit
(
-
1
)
if
FLAGS
.
platform
==
'caffe'
:
if
FLAGS
.
platform
==
'caffe'
:
if
not
os
.
path
.
isfile
(
FLAGS
.
weight_file
):
if
not
os
.
path
.
isfile
(
FLAGS
.
weight_file
):
print
(
"Input weight file '"
+
FLAGS
.
weight_file
+
"' does not exist!"
)
print
(
"Input weight file '"
+
FLAGS
.
weight_file
+
"' does not exist!"
)
sys
.
exit
(
-
1
)
sys
.
exit
(
-
1
)
weight_checksum
=
file_checksum
(
FLAGS
.
weight_file
)
weight_checksum
=
file_checksum
(
FLAGS
.
weight_file
)
if
FLAGS
.
weight_checksum
!=
""
and
FLAGS
.
weight_checksum
!=
weight_checksum
:
if
FLAGS
.
weight_checksum
!=
""
and
\
print
(
"Weight checksum mismatch: %s != %s"
%
(
weight_checksum
,
FLAGS
.
weight_checksum
))
FLAGS
.
weight_checksum
!=
weight_checksum
:
print
(
"Weight checksum mismatch: %s != %s"
%
(
weight_checksum
,
FLAGS
.
weight_checksum
))
sys
.
exit
(
-
1
)
sys
.
exit
(
-
1
)
if
FLAGS
.
runtime
==
'dsp'
:
if
FLAGS
.
runtime
==
'dsp'
:
...
@@ -41,22 +50,27 @@ def main(unused_args):
...
@@ -41,22 +50,27 @@ def main(unused_args):
from
mace.python.tools
import
caffe_converter_lib
from
mace.python.tools
import
caffe_converter_lib
output_graph_def
=
caffe_converter_lib
.
convert_to_mace_pb
(
output_graph_def
=
caffe_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
weight_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
model_file
,
FLAGS
.
weight_file
,
FLAGS
.
input_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
elif
FLAGS
.
platform
==
'tensorflow'
:
elif
FLAGS
.
platform
==
'tensorflow'
:
if
FLAGS
.
runtime
==
'dsp'
:
if
FLAGS
.
runtime
==
'dsp'
:
from
mace.python.tools
import
tf_dsp_converter_lib
from
mace.python.tools
import
tf_dsp_converter_lib
output_graph_def
=
tf_dsp_converter_lib
.
convert_to_mace_pb
(
output_graph_def
=
tf_dsp_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
output_node
,
FLAGS
.
dsp_mode
)
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
output_node
,
FLAGS
.
dsp_mode
)
else
:
else
:
from
mace.python.tools
import
tf_converter_lib
from
mace.python.tools
import
tf_converter_lib
output_graph_def
=
tf_converter_lib
.
convert_to_mace_pb
(
output_graph_def
=
tf_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
if
FLAGS
.
output_type
==
'source'
:
if
FLAGS
.
output_type
==
'source'
:
source_converter_lib
.
convert_to_source
(
output_graph_def
,
model_checksum
,
FLAGS
.
template
,
FLAGS
.
obfuscate
,
source_converter_lib
.
convert_to_source
(
FLAGS
.
model_tag
,
FLAGS
.
output
,
FLAGS
.
runtime
,
FLAGS
.
embed_model_data
)
output_graph_def
,
model_checksum
,
FLAGS
.
template
,
FLAGS
.
obfuscate
,
FLAGS
.
model_tag
,
FLAGS
.
output
,
FLAGS
.
runtime
,
FLAGS
.
embed_model_data
)
else
:
else
:
with
open
(
FLAGS
.
output
,
"wb"
)
as
f
:
with
open
(
FLAGS
.
output
,
"wb"
)
as
f
:
f
.
write
(
output_graph_def
.
SerializeToString
())
f
.
write
(
output_graph_def
.
SerializeToString
())
...
@@ -65,6 +79,7 @@ def main(unused_args):
...
@@ -65,6 +79,7 @@ def main(unused_args):
f
.
write
(
str
(
output_graph_def
))
f
.
write
(
str
(
output_graph_def
))
print
(
"Model conversion is completed."
)
print
(
"Model conversion is completed."
)
def
str2bool
(
v
):
def
str2bool
(
v
):
if
v
.
lower
()
in
(
'yes'
,
'true'
,
't'
,
'y'
,
'1'
):
if
v
.
lower
()
in
(
'yes'
,
'true'
,
't'
,
'y'
,
'1'
):
return
True
return
True
...
@@ -73,6 +88,7 @@ def str2bool(v):
...
@@ -73,6 +88,7 @@ def str2bool(v):
else
:
else
:
raise
argparse
.
ArgumentTypeError
(
'Boolean value expected.'
)
raise
argparse
.
ArgumentTypeError
(
'Boolean value expected.'
)
def
parse_args
():
def
parse_args
():
"""Parses command line arguments."""
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
...
@@ -81,12 +97,10 @@ def parse_args():
...
@@ -81,12 +97,10 @@ def parse_args():
"--model_file"
,
"--model_file"
,
type
=
str
,
type
=
str
,
default
=
""
,
default
=
""
,
help
=
"TensorFlow
\'
GraphDef
\'
file to load, Caffe prototxt file to load."
)
help
=
"TensorFlow
\'
GraphDef
\'
file to load, "
"Caffe prototxt file to load."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--weight_file"
,
"--weight_file"
,
type
=
str
,
default
=
""
,
help
=
"Caffe data file to load."
)
type
=
str
,
default
=
""
,
help
=
"Caffe data file to load."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--model_checksum"
,
"--model_checksum"
,
type
=
str
,
type
=
str
,
...
@@ -103,35 +117,23 @@ def parse_args():
...
@@ -103,35 +117,23 @@ def parse_args():
default
=
""
,
default
=
""
,
help
=
"File to save the output graph to."
)
help
=
"File to save the output graph to."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--runtime"
,
"--runtime"
,
type
=
str
,
default
=
"cpu"
,
help
=
"Runtime: cpu/gpu/dsp"
)
type
=
str
,
default
=
"cpu"
,
help
=
"Runtime: cpu/gpu/dsp"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--input_node"
,
"--input_node"
,
type
=
str
,
type
=
str
,
default
=
"input_node"
,
default
=
"input_node"
,
help
=
"e.g., input_node"
)
help
=
"e.g., input_node"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--output_node"
,
"--output_node"
,
type
=
str
,
default
=
"softmax"
,
help
=
"e.g., softmax"
)
type
=
str
,
default
=
"softmax"
,
help
=
"e.g., softmax"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--data_type"
,
"--data_type"
,
type
=
str
,
type
=
str
,
default
=
'DT_FLOAT'
,
default
=
'DT_FLOAT'
,
help
=
"e.g., DT_HALF/DT_FLOAT"
)
help
=
"e.g., DT_HALF/DT_FLOAT"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--output_type"
,
"--output_type"
,
type
=
str
,
default
=
"pb"
,
help
=
"output type: source/pb"
)
type
=
str
,
default
=
"pb"
,
help
=
"output type: source/pb"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--template"
,
"--template"
,
type
=
str
,
default
=
""
,
help
=
"template path"
)
type
=
str
,
default
=
""
,
help
=
"template path"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--obfuscate"
,
"--obfuscate"
,
type
=
str2bool
,
type
=
str2bool
,
...
@@ -152,25 +154,13 @@ def parse_args():
...
@@ -152,25 +154,13 @@ def parse_args():
default
=
False
,
default
=
False
,
help
=
"open winograd convolution or not"
)
help
=
"open winograd convolution or not"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--dsp_mode"
,
"--dsp_mode"
,
type
=
int
,
default
=
0
,
help
=
"dsp run mode, defalut=0"
)
type
=
int
,
default
=
0
,
help
=
"dsp run mode, defalut=0"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--input_shape"
,
"--input_shape"
,
type
=
str
,
default
=
""
,
help
=
"input shape."
)
type
=
str
,
default
=
""
,
help
=
"input shape."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--platform"
,
"--platform"
,
type
=
str
,
default
=
"tensorflow"
,
help
=
"tensorflow/caffe"
)
type
=
str
,
default
=
"tensorflow"
,
help
=
"tensorflow/caffe"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--embed_model_data"
,
"--embed_model_data"
,
type
=
str2bool
,
default
=
True
,
help
=
"input shape."
)
type
=
str2bool
,
default
=
True
,
help
=
"input shape."
)
return
parser
.
parse_known_args
()
return
parser
.
parse_known_args
()
...
...
mace/python/tools/dsp_ops.py
浏览文件 @
58f2516e
class
DspOps
(
object
):
class
DspOps
(
object
):
def
__init__
(
self
):
def
__init__
(
self
):
self
.
dsp_ops
=
{
self
.
dsp_ops
=
{
...
@@ -18,7 +17,7 @@ class DspOps(object):
...
@@ -18,7 +17,7 @@ class DspOps(object):
'QuantizedAvgPool'
:
'QuantizedAvgPool_8'
,
'QuantizedAvgPool'
:
'QuantizedAvgPool_8'
,
'QuantizedConcat'
:
'QuantizedConcat_8'
,
'QuantizedConcat'
:
'QuantizedConcat_8'
,
'QuantizedBiasAdd'
:
'QuantizedBiasAdd_8p8to32'
,
'QuantizedBiasAdd'
:
'QuantizedBiasAdd_8p8to32'
,
'QuantizedResizeBilinear'
:
'QuantizedResizeBilinear_8'
,
'QuantizedResizeBilinear'
:
'QuantizedResizeBilinear_8'
,
'QuantizedSpaceToBatchND'
:
'QuantizedSpaceToBatchND_8'
,
'QuantizedSpaceToBatchND'
:
'QuantizedSpaceToBatchND_8'
,
'QuantizedBatchToSpaceND'
:
'QuantizedBatchToSpaceND_8'
,
'QuantizedBatchToSpaceND'
:
'QuantizedBatchToSpaceND_8'
,
'QuantizedSoftmax'
:
'QuantizedSoftmax_8'
,
'QuantizedSoftmax'
:
'QuantizedSoftmax_8'
,
...
@@ -54,6 +53,7 @@ class DspOps(object):
...
@@ -54,6 +53,7 @@ class DspOps(object):
'Concat'
:
'Concat_f'
,
'Concat'
:
'Concat_f'
,
'AddN'
:
'AddN_f'
,
'AddN'
:
'AddN_f'
,
}
}
def
has_op
(
self
,
tf_op
):
def
has_op
(
self
,
tf_op
):
return
tf_op
in
self
.
dsp_ops
return
tf_op
in
self
.
dsp_ops
...
@@ -61,5 +61,3 @@ class DspOps(object):
...
@@ -61,5 +61,3 @@ class DspOps(object):
if
tf_op
not
in
self
.
dsp_ops
:
if
tf_op
not
in
self
.
dsp_ops
:
raise
Exception
(
'Could not map nn op for: '
,
tf_op
)
raise
Exception
(
'Could not map nn op for: '
,
tf_op
)
return
self
.
dsp_ops
[
tf_op
]
return
self
.
dsp_ops
[
tf_op
]
mace/python/tools/encrypt_opencl_codegen.py
浏览文件 @
58f2516e
...
@@ -11,10 +11,13 @@ FLAGS = None
...
@@ -11,10 +11,13 @@ FLAGS = None
encrypt_lookup_table
=
"Xiaomi-AI-Platform-Mace"
encrypt_lookup_table
=
"Xiaomi-AI-Platform-Mace"
def
encrypt_code
(
code_str
):
def
encrypt_code
(
code_str
):
encrypted_arr
=
[]
encrypted_arr
=
[]
for
i
in
range
(
len
(
code_str
)):
for
i
in
range
(
len
(
code_str
)):
encrypted_char
=
hex
(
ord
(
code_str
[
i
])
^
ord
(
encrypt_lookup_table
[
i
%
len
(
encrypt_lookup_table
)]))
encrypted_char
=
hex
(
ord
(
code_str
[
i
])
^
ord
(
encrypt_lookup_table
[
i
%
len
(
encrypt_lookup_table
)]))
encrypted_arr
.
append
(
encrypted_char
)
encrypted_arr
.
append
(
encrypted_char
)
return
encrypted_arr
return
encrypted_arr
...
@@ -45,7 +48,8 @@ def main(unused_args):
...
@@ -45,7 +48,8 @@ def main(unused_args):
encrypted_code_maps
[
file_name
[:
-
3
]]
=
encrypted_code_arr
encrypted_code_maps
[
file_name
[:
-
3
]]
=
encrypted_code_arr
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
cpp_cl_encrypted_kernel
=
env
.
get_template
(
'str2vec_maps.cc.jinja2'
).
render
(
cpp_cl_encrypted_kernel
=
env
.
get_template
(
'str2vec_maps.cc.jinja2'
).
render
(
maps
=
encrypted_code_maps
,
maps
=
encrypted_code_maps
,
data_type
=
'unsigned char'
,
data_type
=
'unsigned char'
,
variable_name
=
'kEncryptedProgramMap'
)
variable_name
=
'kEncryptedProgramMap'
)
...
...
mace/python/tools/graph_util.py
浏览文件 @
58f2516e
...
@@ -2,18 +2,21 @@ import tensorflow as tf
...
@@ -2,18 +2,21 @@ import tensorflow as tf
from
mace.proto
import
mace_pb2
from
mace.proto
import
mace_pb2
from
collections
import
OrderedDict
from
collections
import
OrderedDict
def
sort_tf_node
(
node
,
nodes_map
,
ordered_nodes_map
):
def
sort_tf_node
(
node
,
nodes_map
,
ordered_nodes_map
):
if
node
.
name
not
in
ordered_nodes_map
:
if
node
.
name
not
in
ordered_nodes_map
:
for
input_tensor_name
in
node
.
input
:
for
input_tensor_name
in
node
.
input
:
input_node_name
=
input_tensor_name
.
split
(
':'
)[
input_node_name
=
input_tensor_name
.
split
(
':'
)[
0
]
if
':'
in
input_tensor_name
else
input_tensor_name
0
]
if
':'
in
input_tensor_name
else
input_tensor_name
if
input_node_name
not
in
nodes_map
or
input_node_name
in
ordered_nodes_map
:
if
input_node_name
not
in
nodes_map
or
\
input_node_name
in
ordered_nodes_map
:
continue
continue
input_node
=
nodes_map
[
input_node_name
]
input_node
=
nodes_map
[
input_node_name
]
sort_tf_node
(
input_node
,
nodes_map
,
ordered_nodes_map
)
sort_tf_node
(
input_node
,
nodes_map
,
ordered_nodes_map
)
ordered_nodes_map
[
node
.
name
]
=
node
ordered_nodes_map
[
node
.
name
]
=
node
def
sort_tf_graph
(
graph_def
):
def
sort_tf_graph
(
graph_def
):
nodes_map
=
{}
nodes_map
=
{}
ordered_nodes_map
=
OrderedDict
()
ordered_nodes_map
=
OrderedDict
()
...
@@ -31,13 +34,15 @@ def sort_mace_node(node, nodes_map, ordered_nodes_map):
...
@@ -31,13 +34,15 @@ def sort_mace_node(node, nodes_map, ordered_nodes_map):
for
input_tensor_name
in
node
.
input
:
for
input_tensor_name
in
node
.
input
:
input_node_name
=
input_tensor_name
.
split
(
':'
)[
input_node_name
=
input_tensor_name
.
split
(
':'
)[
0
]
if
':'
in
input_tensor_name
else
input_tensor_name
0
]
if
':'
in
input_tensor_name
else
input_tensor_name
if
input_node_name
not
in
nodes_map
or
input_node_name
in
ordered_nodes_map
:
if
input_node_name
not
in
nodes_map
or
\
input_node_name
in
ordered_nodes_map
:
continue
continue
input_node
=
nodes_map
[
input_node_name
]
input_node
=
nodes_map
[
input_node_name
]
sort_mace_node
(
input_node
,
nodes_map
,
ordered_nodes_map
)
sort_mace_node
(
input_node
,
nodes_map
,
ordered_nodes_map
)
ordered_nodes_map
[
node
.
name
]
=
node
ordered_nodes_map
[
node
.
name
]
=
node
def
sort_mace_graph
(
graph_def
,
output_name
):
def
sort_mace_graph
(
graph_def
,
output_name
):
nodes_map
=
{}
nodes_map
=
{}
ordered_nodes_map
=
OrderedDict
()
ordered_nodes_map
=
OrderedDict
()
...
...
mace/python/tools/memory_optimizer.py
浏览文件 @
58f2516e
...
@@ -2,6 +2,7 @@ import sys
...
@@ -2,6 +2,7 @@ import sys
import
operator
import
operator
from
mace.proto
import
mace_pb2
from
mace.proto
import
mace_pb2
class
MemoryOptimizer
(
object
):
class
MemoryOptimizer
(
object
):
def
__init__
(
self
,
net_def
):
def
__init__
(
self
,
net_def
):
self
.
net_def
=
net_def
self
.
net_def
=
net_def
...
@@ -37,9 +38,9 @@ class MemoryOptimizer(object):
...
@@ -37,9 +38,9 @@ class MemoryOptimizer(object):
mem_size
=
[
0
,
0
]
mem_size
=
[
0
,
0
]
if
op_type
==
'WinogradTransform'
or
op_type
==
'MatMul'
:
if
op_type
==
'WinogradTransform'
or
op_type
==
'MatMul'
:
mem_size
[
0
]
=
output_shape
[
2
]
*
output_shape
[
3
]
mem_size
[
0
]
=
output_shape
[
2
]
*
output_shape
[
3
]
mem_size
[
1
]
=
output_shape
[
0
]
*
int
((
output_shape
[
1
]
+
3
)
/
4
)
mem_size
[
1
]
=
output_shape
[
0
]
*
int
((
output_shape
[
1
]
+
3
)
/
4
)
else
:
else
:
mem_size
[
0
]
=
output_shape
[
2
]
*
int
((
output_shape
[
3
]
+
3
)
/
4
)
mem_size
[
0
]
=
output_shape
[
2
]
*
int
((
output_shape
[
3
]
+
3
)
/
4
)
mem_size
[
1
]
=
output_shape
[
0
]
*
output_shape
[
1
]
mem_size
[
1
]
=
output_shape
[
0
]
*
output_shape
[
1
]
return
mem_size
return
mem_size
...
@@ -51,13 +52,16 @@ class MemoryOptimizer(object):
...
@@ -51,13 +52,16 @@ class MemoryOptimizer(object):
if
self
.
is_buffer_image_op
(
op
):
if
self
.
is_buffer_image_op
(
op
):
continue
continue
if
not
op
.
output_shape
:
if
not
op
.
output_shape
:
print
(
'WARNING: There is no output shape information to do memory optimization.'
)
print
(
'WARNING: There is no output shape information to '
'do memory optimization.'
)
return
return
if
len
(
op
.
output_shape
)
!=
len
(
op
.
output
):
if
len
(
op
.
output_shape
)
!=
len
(
op
.
output
):
print
(
'WARNING: the number of output shape is not equal to the number of output.'
)
print
(
'WARNING: the number of output shape is not equal to '
'the number of output.'
)
return
return
for
i
in
range
(
len
(
op
.
output
)):
for
i
in
range
(
len
(
op
.
output
)):
op_mem_size
=
self
.
get_mem_size
(
op
.
type
,
op
.
output_shape
[
i
].
dims
)
op_mem_size
=
self
.
get_mem_size
(
op
.
type
,
op
.
output_shape
[
i
].
dims
)
mem_id
=
-
1
mem_id
=
-
1
if
len
(
self
.
idle_mem
)
>
0
:
if
len
(
self
.
idle_mem
)
>
0
:
best_mem_candidate_id
=
-
1
best_mem_candidate_id
=
-
1
...
@@ -65,16 +69,22 @@ class MemoryOptimizer(object):
...
@@ -65,16 +69,22 @@ class MemoryOptimizer(object):
best_mem_candidate_shape
=
[]
best_mem_candidate_shape
=
[]
for
mid
in
self
.
idle_mem
:
for
mid
in
self
.
idle_mem
:
reuse_mem_size
=
self
.
mem_block
[
mid
]
reuse_mem_size
=
self
.
mem_block
[
mid
]
resize_mem_size
=
[
max
(
reuse_mem_size
[
0
],
op_mem_size
[
0
]),
max
(
reuse_mem_size
[
1
],
op_mem_size
[
1
])]
resize_mem_size
=
[
delta_mem_area
=
self
.
mem_area
(
resize_mem_size
)
-
self
.
mem_area
(
reuse_mem_size
)
max
(
reuse_mem_size
[
0
],
op_mem_size
[
0
]),
max
(
reuse_mem_size
[
1
],
op_mem_size
[
1
])
]
delta_mem_area
=
self
.
mem_area
(
resize_mem_size
)
-
self
.
mem_area
(
reuse_mem_size
)
if
delta_mem_area
<
best_mem_candidate_delta_area
:
if
delta_mem_area
<
best_mem_candidate_delta_area
:
best_mem_candidate_id
=
mid
best_mem_candidate_id
=
mid
best_mem_candidate_delta_area
=
delta_mem_area
best_mem_candidate_delta_area
=
delta_mem_area
best_mem_candidate_shape
=
resize_mem_size
best_mem_candidate_shape
=
resize_mem_size
if
best_mem_candidate_delta_area
<=
self
.
mem_area
(
op_mem_size
):
if
best_mem_candidate_delta_area
<=
self
.
mem_area
(
op_mem_size
):
# reuse
# reuse
self
.
mem_block
[
best_mem_candidate_id
]
=
best_mem_candidate_shape
self
.
mem_block
[
best_mem_candidate_id
]
=
best_mem_candidate_shape
mem_id
=
best_mem_candidate_id
mem_id
=
best_mem_candidate_id
self
.
idle_mem
.
remove
(
mem_id
)
self
.
idle_mem
.
remove
(
mem_id
)
...
@@ -113,7 +123,8 @@ class MemoryOptimizer(object):
...
@@ -113,7 +123,8 @@ class MemoryOptimizer(object):
print
mem
,
self
.
mem_block
[
mem
]
print
mem
,
self
.
mem_block
[
mem
]
optimized_mem_size
+=
reduce
(
operator
.
mul
,
self
.
mem_block
[
mem
],
4
)
optimized_mem_size
+=
reduce
(
operator
.
mul
,
self
.
mem_block
[
mem
],
4
)
print
(
'origin mem: %d, optimized mem: %d'
,
origin_mem_size
,
optimized_mem_size
)
print
(
'origin mem: %d, optimized mem: %d'
,
origin_mem_size
,
optimized_mem_size
)
def
optimize_memory
(
net_def
):
def
optimize_memory
(
net_def
):
...
...
mace/python/tools/opencl_codegen.py
浏览文件 @
58f2516e
...
@@ -27,37 +27,40 @@ def generate_cpp_source():
...
@@ -27,37 +27,40 @@ def generate_cpp_source():
binary_array
=
np
.
fromfile
(
f
,
dtype
=
np
.
uint8
)
binary_array
=
np
.
fromfile
(
f
,
dtype
=
np
.
uint8
)
idx
=
0
idx
=
0
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
idx
+=
8
idx
+=
8
for
_
in
xrange
(
size
):
for
_
in
xrange
(
size
):
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
idx
+=
4
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
idx
+=
key_size
idx
+=
key_size
value_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
value_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
idx
+=
4
maps
[
key
]
=
[]
maps
[
key
]
=
[]
value
=
struct
.
unpack
(
str
(
value_size
)
+
"B"
,
value
=
struct
.
unpack
(
binary_array
[
idx
:
idx
+
value_size
])
str
(
value_size
)
+
"B"
,
binary_array
[
idx
:
idx
+
value_size
])
idx
+=
value_size
idx
+=
value_size
for
ele
in
value
:
for
ele
in
value
:
maps
[
key
].
append
(
hex
(
ele
))
maps
[
key
].
append
(
hex
(
ele
))
cl_platform_info_path
=
os
.
path
.
join
(
binary_dir
,
FLAGS
.
platform_info_file_name
)
cl_platform_info_path
=
os
.
path
.
join
(
binary_dir
,
FLAGS
.
platform_info_file_name
)
with
open
(
cl_platform_info_path
,
'r'
)
as
f
:
with
open
(
cl_platform_info_path
,
'r'
)
as
f
:
curr_platform_info
=
f
.
read
()
curr_platform_info
=
f
.
read
()
if
platform_info
!=
""
:
if
platform_info
!=
""
:
assert
(
curr_platform_info
==
platform_info
)
assert
(
curr_platform_info
==
platform_info
)
platform_info
=
curr_platform_info
platform_info
=
curr_platform_info
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
return
env
.
get_template
(
'opencl_compiled_kernel.cc.jinja2'
).
render
(
return
env
.
get_template
(
'opencl_compiled_kernel.cc.jinja2'
).
render
(
maps
=
maps
,
maps
=
maps
,
data_type
=
'unsigned char'
,
data_type
=
'unsigned char'
,
variable_name
=
'kCompiledProgramMap'
,
variable_name
=
'kCompiledProgramMap'
,
platform_info
=
platform_info
,
platform_info
=
platform_info
,
)
)
def
main
(
unused_args
):
def
main
(
unused_args
):
cpp_cl_binary_source
=
generate_cpp_source
()
cpp_cl_binary_source
=
generate_cpp_source
()
...
@@ -90,7 +93,7 @@ def parse_args():
...
@@ -90,7 +93,7 @@ def parse_args():
"--output_path"
,
"--output_path"
,
type
=
str
,
type
=
str
,
default
=
"./mace/examples/codegen/opencl/opencl_compiled_program.cc"
,
default
=
"./mace/examples/codegen/opencl/opencl_compiled_program.cc"
,
help
=
"The path of generated C++ header file which contains
cl binaries."
)
help
=
"The path of generated C++ header file for
cl binaries."
)
return
parser
.
parse_known_args
()
return
parser
.
parse_known_args
()
...
...
mace/python/tools/source_converter_lib.py
浏览文件 @
58f2516e
...
@@ -6,9 +6,9 @@ import hashlib
...
@@ -6,9 +6,9 @@ import hashlib
from
mace.proto
import
mace_pb2
from
mace.proto
import
mace_pb2
from
jinja2
import
Environment
,
FileSystemLoader
from
jinja2
import
Environment
,
FileSystemLoader
GENERATED_NAME
=
set
()
GENERATED_NAME
=
set
()
def
generate_obfuscated_name
(
namespace
,
name
):
def
generate_obfuscated_name
(
namespace
,
name
):
md5
=
hashlib
.
md5
()
md5
=
hashlib
.
md5
()
md5
.
update
(
namespace
)
md5
.
update
(
namespace
)
...
@@ -22,31 +22,36 @@ def generate_obfuscated_name(namespace, name):
...
@@ -22,31 +22,36 @@ def generate_obfuscated_name(namespace, name):
GENERATED_NAME
.
add
(
name
)
GENERATED_NAME
.
add
(
name
)
return
name
return
name
def
generate_tensor_map
(
tensors
):
def
generate_tensor_map
(
tensors
):
tensor_map
=
{}
tensor_map
=
{}
for
t
in
tensors
:
for
t
in
tensors
:
if
not
tensor_map
.
has_key
(
t
.
name
)
:
if
t
.
name
not
in
tensor_map
:
tensor_map
[
t
.
name
]
=
generate_obfuscated_name
(
"tensor"
,
t
.
name
)
tensor_map
[
t
.
name
]
=
generate_obfuscated_name
(
"tensor"
,
t
.
name
)
return
tensor_map
return
tensor_map
def
generate_in_out_map
(
ops
,
tensor_map
):
def
generate_in_out_map
(
ops
,
tensor_map
):
in_out_map
=
{}
in_out_map
=
{}
for
op
in
ops
:
for
op
in
ops
:
op
.
name
=
generate_obfuscated_name
(
"op"
,
op
.
name
)
op
.
name
=
generate_obfuscated_name
(
"op"
,
op
.
name
)
for
input_name
in
op
.
input
:
for
input_name
in
op
.
input
:
if
not
in_out_map
.
has_key
(
input_name
)
:
if
input_name
not
in
in_out_map
:
if
tensor_map
.
has_key
(
input_name
)
:
if
input_name
in
tensor_map
:
in_out_map
[
input_name
]
=
tensor_map
[
input_name
]
in_out_map
[
input_name
]
=
tensor_map
[
input_name
]
else
:
else
:
in_out_map
[
input_name
]
=
generate_obfuscated_name
(
"in"
,
input_name
)
in_out_map
[
input_name
]
=
generate_obfuscated_name
(
"in"
,
input_name
)
for
output_name
in
op
.
output
:
for
output_name
in
op
.
output
:
if
not
in_out_map
.
has_key
(
output_name
)
:
if
output_name
not
in
in_out_map
:
if
tensor_map
.
has_key
(
output_name
)
:
if
output_name
in
tensor_map
:
in_out_map
[
output_name
]
=
tensor_map
[
output_name
]
in_out_map
[
output_name
]
=
tensor_map
[
output_name
]
else
:
else
:
in_out_map
[
output_name
]
=
generate_obfuscated_name
(
"out"
,
output_name
)
in_out_map
[
output_name
]
=
generate_obfuscated_name
(
"out"
,
output_name
)
return
in_out_map
return
in_out_map
def
obfuscate_name
(
net_def
):
def
obfuscate_name
(
net_def
):
input_node
=
"mace_input_node"
input_node
=
"mace_input_node"
output_node
=
"mace_output_node"
output_node
=
"mace_output_node"
...
@@ -63,20 +68,22 @@ def obfuscate_name(net_def):
...
@@ -63,20 +68,22 @@ def obfuscate_name(net_def):
if
output_node
not
in
op
.
output
[
i
]:
if
output_node
not
in
op
.
output
[
i
]:
op
.
output
[
i
]
=
in_out_map
[
op
.
output
[
i
]]
op
.
output
[
i
]
=
in_out_map
[
op
.
output
[
i
]]
def
rename_tensor
(
net_def
):
def
rename_tensor
(
net_def
):
tensor_map
=
{}
tensor_map
=
{}
for
t
in
net_def
.
tensors
:
for
t
in
net_def
.
tensors
:
if
not
tensor_map
.
has_key
(
t
.
name
)
:
if
t
.
name
not
in
tensor_map
:
tensor_map
[
t
.
name
]
=
"_"
+
t
.
name
[:
-
2
].
replace
(
"/"
,
"_"
)
tensor_map
[
t
.
name
]
=
"_"
+
t
.
name
[:
-
2
].
replace
(
"/"
,
"_"
)
t
.
name
=
tensor_map
[
t
.
name
]
t
.
name
=
tensor_map
[
t
.
name
]
for
op
in
net_def
.
op
:
for
op
in
net_def
.
op
:
for
i
in
range
(
len
(
op
.
input
)):
for
i
in
range
(
len
(
op
.
input
)):
if
tensor_map
.
has_key
(
op
.
input
[
i
])
:
if
op
.
input
[
i
]
in
tensor_map
:
op
.
input
[
i
]
=
tensor_map
[
op
.
input
[
i
]]
op
.
input
[
i
]
=
tensor_map
[
op
.
input
[
i
]]
for
i
in
range
(
len
(
op
.
output
)):
for
i
in
range
(
len
(
op
.
output
)):
if
tensor_map
.
has_key
(
op
.
output
[
i
])
:
if
op
.
output
[
i
]
in
tensor_map
:
op
.
output
[
i
]
=
tensor_map
[
op
.
output
[
i
]]
op
.
output
[
i
]
=
tensor_map
[
op
.
output
[
i
]]
class
TensorInfo
:
class
TensorInfo
:
def
__init__
(
self
,
id
,
t
,
runtime
):
def
__init__
(
self
,
id
,
t
,
runtime
):
self
.
id
=
id
self
.
id
=
id
...
@@ -84,19 +91,26 @@ class TensorInfo:
...
@@ -84,19 +91,26 @@ class TensorInfo:
if
t
.
data_type
==
mace_pb2
.
DT_FLOAT
:
if
t
.
data_type
==
mace_pb2
.
DT_FLOAT
:
if
runtime
==
'gpu'
:
if
runtime
==
'gpu'
:
self
.
data_type
=
mace_pb2
.
DT_HALF
self
.
data_type
=
mace_pb2
.
DT_HALF
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float16
).
tobytes
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float16
).
tobytes
())
else
:
else
:
self
.
data_type
=
mace_pb2
.
DT_FLOAT
self
.
data_type
=
mace_pb2
.
DT_FLOAT
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float32
).
tobytes
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float32
).
tobytes
())
elif
t
.
data_type
==
mace_pb2
.
DT_INT32
:
elif
t
.
data_type
==
mace_pb2
.
DT_INT32
:
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
int32
).
tobytes
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
int32
).
tobytes
())
elif
t
.
data_type
==
mace_pb2
.
DT_UINT8
:
elif
t
.
data_type
==
mace_pb2
.
DT_UINT8
:
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
uint8
).
tolist
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
uint8
).
tolist
())
def
stringfy
(
value
):
def
stringfy
(
value
):
return
', '
.
join
(
'"{0}"'
.
format
(
w
)
for
w
in
value
)
return
', '
.
join
(
'"{0}"'
.
format
(
w
)
for
w
in
value
)
def
convert_to_source
(
net_def
,
mode_pb_checksum
,
template_dir
,
obfuscate
,
model_tag
,
output
,
runtime
,
embed_model_data
):
def
convert_to_source
(
net_def
,
mode_pb_checksum
,
template_dir
,
obfuscate
,
model_tag
,
output
,
runtime
,
embed_model_data
):
if
obfuscate
:
if
obfuscate
:
obfuscate_name
(
net_def
)
obfuscate_name
(
net_def
)
else
:
else
:
...
@@ -106,7 +120,8 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
...
@@ -106,7 +120,8 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
print
template_dir
print
template_dir
# Create the jinja2 environment.
# Create the jinja2 environment.
j2_env
=
Environment
(
loader
=
FileSystemLoader
(
template_dir
),
trim_blocks
=
True
)
j2_env
=
Environment
(
loader
=
FileSystemLoader
(
template_dir
),
trim_blocks
=
True
)
j2_env
.
filters
[
'stringfy'
]
=
stringfy
j2_env
.
filters
[
'stringfy'
]
=
stringfy
output_dir
=
os
.
path
.
dirname
(
output
)
+
'/'
output_dir
=
os
.
path
.
dirname
(
output
)
+
'/'
# generate tensor source files
# generate tensor source files
...
@@ -122,11 +137,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
...
@@ -122,11 +137,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
model_data
.
extend
(
bytearray
([
0
]
*
padding
))
model_data
.
extend
(
bytearray
([
0
]
*
padding
))
offset
+=
padding
offset
+=
padding
source
=
j2_env
.
get_template
(
template_name
).
render
(
source
=
j2_env
.
get_template
(
template_name
).
render
(
tensor_info
=
tensor_info
,
tensor_info
=
tensor_info
,
tensor
=
t
,
tensor
=
t
,
tag
=
model_tag
,
tag
=
model_tag
,
runtime
=
runtime
,
runtime
=
runtime
,
offset
=
offset
,
offset
=
offset
,
)
)
model_data
.
extend
(
tensor_info
.
data
)
model_data
.
extend
(
tensor_info
.
data
)
offset
+=
len
(
tensor_info
.
data
)
offset
+=
len
(
tensor_info
.
data
)
...
@@ -137,11 +152,10 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
...
@@ -137,11 +152,10 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate tensor data
# generate tensor data
template_name
=
'tensor_data.jinja2'
template_name
=
'tensor_data.jinja2'
source
=
j2_env
.
get_template
(
template_name
).
render
(
source
=
j2_env
.
get_template
(
template_name
).
render
(
tag
=
model_tag
,
tag
=
model_tag
,
embed_model_data
=
embed_model_data
,
embed_model_data
=
embed_model_data
,
model_data_size
=
offset
,
model_data_size
=
offset
,
model_data
=
model_data
model_data
=
model_data
)
)
with
open
(
output_dir
+
'tensor_data'
+
'.cc'
,
"wb"
)
as
f
:
with
open
(
output_dir
+
'tensor_data'
+
'.cc'
,
"wb"
)
as
f
:
f
.
write
(
source
)
f
.
write
(
source
)
if
not
embed_model_data
:
if
not
embed_model_data
:
...
@@ -155,11 +169,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
...
@@ -155,11 +169,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
op_size
=
len
(
net_def
.
op
)
op_size
=
len
(
net_def
.
op
)
for
start
in
range
(
0
,
op_size
,
10
):
for
start
in
range
(
0
,
op_size
,
10
):
source
=
j2_env
.
get_template
(
template_name
).
render
(
source
=
j2_env
.
get_template
(
template_name
).
render
(
start
=
start
,
start
=
start
,
end
=
min
(
start
+
10
,
op_size
),
end
=
min
(
start
+
10
,
op_size
),
net
=
net_def
,
net
=
net_def
,
tag
=
model_tag
,
tag
=
model_tag
,
runtime
=
runtime
,
runtime
=
runtime
,
)
)
with
open
(
output_dir
+
'op'
+
str
(
counter
)
+
'.cc'
,
"wb"
)
as
f
:
with
open
(
output_dir
+
'op'
+
str
(
counter
)
+
'.cc'
,
"wb"
)
as
f
:
f
.
write
(
source
)
f
.
write
(
source
)
...
@@ -167,21 +181,21 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
...
@@ -167,21 +181,21 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate model source files
# generate model source files
template_name
=
'model.jinja2'
template_name
=
'model.jinja2'
tensors
=
[
TensorInfo
(
i
,
net_def
.
tensors
[
i
],
runtime
)
for
i
in
range
(
len
(
net_def
.
tensors
))]
tensors
=
[
TensorInfo
(
i
,
net_def
.
tensors
[
i
],
runtime
)
for
i
in
range
(
len
(
net_def
.
tensors
))
]
source
=
j2_env
.
get_template
(
template_name
).
render
(
source
=
j2_env
.
get_template
(
template_name
).
render
(
tensors
=
tensors
,
tensors
=
tensors
,
net
=
net_def
,
net
=
net_def
,
tag
=
model_tag
,
tag
=
model_tag
,
runtime
=
runtime
,
runtime
=
runtime
,
model_pb_checksum
=
mode_pb_checksum
model_pb_checksum
=
mode_pb_checksum
)
)
with
open
(
output
,
"wb"
)
as
f
:
with
open
(
output
,
"wb"
)
as
f
:
f
.
write
(
source
)
f
.
write
(
source
)
# generate model header file
# generate model header file
template_name
=
'model_header.jinja2'
template_name
=
'model_header.jinja2'
source
=
j2_env
.
get_template
(
template_name
).
render
(
source
=
j2_env
.
get_template
(
template_name
).
render
(
tag
=
model_tag
,
)
tag
=
model_tag
,
)
with
open
(
output_dir
+
model_tag
+
'.h'
,
"wb"
)
as
f
:
with
open
(
output_dir
+
model_tag
+
'.h'
,
"wb"
)
as
f
:
f
.
write
(
source
)
f
.
write
(
source
)
mace/python/tools/tf_converter_lib.py
浏览文件 @
58f2516e
...
@@ -8,15 +8,8 @@ from mace.python.tools import memory_optimizer
...
@@ -8,15 +8,8 @@ from mace.python.tools import memory_optimizer
from
tensorflow.core.framework
import
graph_pb2
from
tensorflow.core.framework
import
graph_pb2
from
tensorflow.core.framework
import
tensor_shape_pb2
from
tensorflow.core.framework
import
tensor_shape_pb2
padding_mode
=
{
padding_mode
=
{
'VALID'
:
0
,
'SAME'
:
1
,
'FULL'
:
2
}
'VALID'
:
0
,
pooling_type_mode
=
{
'AvgPool'
:
1
,
'MaxPool'
:
2
}
'SAME'
:
1
,
'FULL'
:
2
}
pooling_type_mode
=
{
'AvgPool'
:
1
,
'MaxPool'
:
2
}
# the order should be the same as
# the order should be the same as
# eltwise type's in mace/kernels/eltwise.h
# eltwise type's in mace/kernels/eltwise.h
...
@@ -34,25 +27,22 @@ math_type_mode = {
...
@@ -34,25 +27,22 @@ math_type_mode = {
}
}
buffer_type_map
=
{
buffer_type_map
=
{
'CONV2D_FILTER'
:
0
,
'CONV2D_FILTER'
:
0
,
'IN_OUT_CHANNEL'
:
1
,
'IN_OUT_CHANNEL'
:
1
,
'ARGUMENT'
:
2
,
'ARGUMENT'
:
2
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_HEIGHT'
:
3
,
'IN_OUT_WIDTH'
:
4
,
'IN_OUT_WIDTH'
:
4
,
'WINOGRAD_FILTER'
:
5
,
'WINOGRAD_FILTER'
:
5
,
'DW_CONV2D_FILTER'
:
6
,
'DW_CONV2D_FILTER'
:
6
,
}
}
data_type_map
=
{
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
activation_name_map
=
{
activation_name_map
=
{
'Relu'
:
'RELU'
,
'Relu'
:
'RELU'
,
'Sigmoid'
:
'SIGMOID'
,
'Sigmoid'
:
'SIGMOID'
,
'Tanh'
:
'TANH'
,
'Tanh'
:
'TANH'
,
'Relu6'
:
'RELUX'
'Relu6'
:
'RELUX'
}
}
BATCH_NORM_ORDER
=
[
"Add"
,
"Rsqrt"
,
"Mul"
,
"Mul"
,
"Mul"
,
"Sub"
,
"Add"
]
BATCH_NORM_ORDER
=
[
"Add"
,
"Rsqrt"
,
"Mul"
,
"Mul"
,
"Mul"
,
"Sub"
,
"Add"
]
...
@@ -62,12 +52,14 @@ MACE_OUTPUT_NODE_NAME = "mace_output_node"
...
@@ -62,12 +52,14 @@ MACE_OUTPUT_NODE_NAME = "mace_output_node"
OPENCL_IMAGE_MAX_SIZE
=
16384
OPENCL_IMAGE_MAX_SIZE
=
16384
def
get_input_tensor
(
op
,
index
):
def
get_input_tensor
(
op
,
index
):
input_tensor
=
op
.
inputs
[
index
]
input_tensor
=
op
.
inputs
[
index
]
if
input_tensor
.
op
.
type
==
'Reshape'
:
if
input_tensor
.
op
.
type
==
'Reshape'
:
input_tensor
=
get_input_tensor
(
input_tensor
.
op
,
0
)
input_tensor
=
get_input_tensor
(
input_tensor
.
op
,
0
)
return
input_tensor
return
input_tensor
class
TFConverter
(
object
):
class
TFConverter
(
object
):
def
__init__
(
self
,
tf_ops
,
net_def
,
dt
,
device
,
winograd
):
def
__init__
(
self
,
tf_ops
,
net_def
,
dt
,
device
,
winograd
):
self
.
net_def
=
net_def
self
.
net_def
=
net_def
...
@@ -139,7 +131,7 @@ class TFConverter(object):
...
@@ -139,7 +131,7 @@ class TFConverter(object):
op_def
.
name
=
name
op_def
.
name
=
name
op_def
.
type
=
'BufferToImage'
op_def
.
type
=
'BufferToImage'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
name
+
':0'
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
.
name
=
'buffer_type'
epsilon_arg
.
name
=
'buffer_type'
...
@@ -156,7 +148,7 @@ class TFConverter(object):
...
@@ -156,7 +148,7 @@ class TFConverter(object):
op_def
.
name
=
name
op_def
.
name
=
name
op_def
.
type
=
'Transpose'
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
input
.
extend
([
new_input_name
])
op_def
.
output
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
name
+
':0'
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
.
name
=
'dims'
dims_arg
.
name
=
'dims'
...
@@ -172,7 +164,7 @@ class TFConverter(object):
...
@@ -172,7 +164,7 @@ class TFConverter(object):
op_def
=
self
.
net_def
.
op
.
add
()
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'ImageToBuffer'
op_def
.
type
=
'ImageToBuffer'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
op_def
.
output
.
extend
([
output_name
])
epsilon_arg
=
op_def
.
arg
.
add
()
epsilon_arg
=
op_def
.
arg
.
add
()
...
@@ -185,7 +177,7 @@ class TFConverter(object):
...
@@ -185,7 +177,7 @@ class TFConverter(object):
op_def
=
self
.
net_def
.
op
.
add
()
op_def
=
self
.
net_def
.
op
.
add
()
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
name
=
output_name
[:
-
2
]
op_def
.
type
=
'Transpose'
op_def
.
type
=
'Transpose'
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
input
.
extend
([
name
+
':0'
])
op_def
.
output
.
extend
([
output_name
])
op_def
.
output
.
extend
([
output_name
])
dims_arg
=
op_def
.
arg
.
add
()
dims_arg
=
op_def
.
arg
.
add
()
...
@@ -237,7 +229,8 @@ class TFConverter(object):
...
@@ -237,7 +229,8 @@ class TFConverter(object):
tensor
=
self
.
net_def
.
tensors
.
add
()
tensor
=
self
.
net_def
.
tensors
.
add
()
tf_tensor
=
op
.
outputs
[
0
].
eval
()
tf_tensor
=
op
.
outputs
[
0
].
eval
()
if
output_name
in
self
.
transpose_filter_tensor
:
if
output_name
in
self
.
transpose_filter_tensor
:
tf_tensor
=
tf_tensor
.
transpose
(
self
.
transpose_filter_tensor
[
output_name
])
tf_tensor
=
tf_tensor
.
transpose
(
self
.
transpose_filter_tensor
[
output_name
])
if
output_name
in
self
.
reshape_tensor
:
if
output_name
in
self
.
reshape_tensor
:
tf_tensor
=
tf_tensor
.
reshape
(
self
.
reshape_tensor
[
output_name
])
tf_tensor
=
tf_tensor
.
reshape
(
self
.
reshape_tensor
[
output_name
])
tensor
.
name
=
op
.
outputs
[
0
].
name
tensor
.
name
=
op
.
outputs
[
0
].
name
...
@@ -262,9 +255,11 @@ class TFConverter(object):
...
@@ -262,9 +255,11 @@ class TFConverter(object):
output_shape
=
op
.
outputs
[
0
].
shape
.
as_list
()
output_shape
=
op
.
outputs
[
0
].
shape
.
as_list
()
if
len
(
output_shape
)
==
0
or
output_shape
[
0
]
is
None
:
if
len
(
output_shape
)
==
0
or
output_shape
[
0
]
is
None
:
return
False
return
False
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
return
self
.
winograd
and
op
.
type
!=
'DepthwiseConv2dNative'
and
self
.
device
==
'gpu'
and
\
output_shape
[
2
]
+
1
)
/
2
)
filter_shape
[
0
]
==
3
and
(
filter_shape
[
0
]
==
filter_shape
[
1
])
and
\
return
self
.
winograd
and
op
.
type
!=
'DepthwiseConv2dNative'
and
\
self
.
device
==
'gpu'
and
filter_shape
[
0
]
==
3
and
\
(
filter_shape
[
0
]
==
filter_shape
[
1
])
and
\
(
strides
[
0
]
==
1
)
and
(
strides
[
0
]
==
strides
[
1
])
and
\
(
strides
[
0
]
==
1
)
and
(
strides
[
0
]
==
strides
[
1
])
and
\
(
16
*
filter_shape
[
2
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
2
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
3
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
(
16
*
filter_shape
[
3
]
<
OPENCL_IMAGE_MAX_SIZE
)
and
\
...
@@ -276,7 +271,8 @@ class TFConverter(object):
...
@@ -276,7 +271,8 @@ class TFConverter(object):
output_shape
=
op
.
outputs
[
0
].
shape
.
as_list
()
output_shape
=
op
.
outputs
[
0
].
shape
.
as_list
()
self
.
transpose_filter_tensor
[
filter_tensor
.
name
]
=
(
3
,
2
,
0
,
1
)
self
.
transpose_filter_tensor
[
filter_tensor
.
name
]
=
(
3
,
2
,
0
,
1
)
filter_name
=
self
.
add_buffer_to_image
(
op
.
inputs
[
1
].
name
,
"WINOGRAD_FILTER"
)
filter_name
=
self
.
add_buffer_to_image
(
op
.
inputs
[
1
].
name
,
"WINOGRAD_FILTER"
)
# Input transform
# Input transform
wt_op
=
mace_pb2
.
OperatorDef
()
wt_op
=
mace_pb2
.
OperatorDef
()
...
@@ -292,7 +288,8 @@ class TFConverter(object):
...
@@ -292,7 +288,8 @@ class TFConverter(object):
wt_output_name
=
wt_op
.
name
+
":0"
wt_output_name
=
wt_op
.
name
+
":0"
wt_op
.
output
.
extend
([
wt_output_name
])
wt_op
.
output
.
extend
([
wt_output_name
])
wt_output_shape
=
mace_pb2
.
OutputShape
()
wt_output_shape
=
mace_pb2
.
OutputShape
()
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_width
=
output_shape
[
0
]
*
((
output_shape
[
1
]
+
1
)
/
2
)
*
((
output_shape
[
2
]
+
1
)
/
2
)
wt_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
2
],
wt_output_width
,
1
])
wt_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
2
],
wt_output_width
,
1
])
wt_op
.
output_shape
.
extend
([
wt_output_shape
])
wt_op
.
output_shape
.
extend
([
wt_output_shape
])
...
@@ -307,7 +304,8 @@ class TFConverter(object):
...
@@ -307,7 +304,8 @@ class TFConverter(object):
matmul_output_name
=
matmul_op
.
name
+
":0"
matmul_output_name
=
matmul_op
.
name
+
":0"
matmul_op
.
output
.
extend
([
matmul_output_name
])
matmul_op
.
output
.
extend
([
matmul_output_name
])
matmul_output_shape
=
mace_pb2
.
OutputShape
()
matmul_output_shape
=
mace_pb2
.
OutputShape
()
matmul_output_shape
.
dims
.
extend
([
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
matmul_output_shape
.
dims
.
extend
(
[
16
,
filter_shape
[
3
],
wt_output_width
,
1
])
matmul_op
.
output_shape
.
extend
([
matmul_output_shape
])
matmul_op
.
output_shape
.
extend
([
matmul_output_shape
])
# Inverse transform
# Inverse transform
...
@@ -331,15 +329,17 @@ class TFConverter(object):
...
@@ -331,15 +329,17 @@ class TFConverter(object):
final_op
=
op
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
if
len
(
self
.
tf_graph
[
op
.
name
]
)
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
iwt_op
.
input
.
extend
([
output_name
])
iwt_op
.
input
.
extend
([
output_name
])
final_op
=
bias_add_op
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
\
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
fused_act_arg
=
iwt_op
.
arg
.
add
()
fused_act_arg
=
iwt_op
.
arg
.
add
()
fused_act_arg
.
name
=
'activation'
fused_act_arg
.
name
=
'activation'
...
@@ -355,7 +355,6 @@ class TFConverter(object):
...
@@ -355,7 +355,6 @@ class TFConverter(object):
self
.
add_output_shape
(
final_op
.
outputs
,
iwt_op
)
self
.
add_output_shape
(
final_op
.
outputs
,
iwt_op
)
self
.
net_def
.
op
.
extend
([
wt_op
,
matmul_op
,
iwt_op
])
self
.
net_def
.
op
.
extend
([
wt_op
,
matmul_op
,
iwt_op
])
def
convert_conv2d
(
self
,
op
):
def
convert_conv2d
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
arg
=
op_def
.
arg
.
add
()
...
@@ -365,20 +364,28 @@ class TFConverter(object):
...
@@ -365,20 +364,28 @@ class TFConverter(object):
if
op
.
type
==
'DepthwiseConv2dNative'
:
if
op
.
type
==
'DepthwiseConv2dNative'
:
op_def
.
type
=
'DepthwiseConv2d'
op_def
.
type
=
'DepthwiseConv2d'
if
self
.
device
==
'neon'
:
if
self
.
device
==
'neon'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
else
:
else
:
op_def
.
type
=
op
.
type
op_def
.
type
=
op
.
type
if
self
.
device
==
'neon'
:
if
self
.
device
==
'neon'
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
3
,
2
,
0
,
1
)
else
:
else
:
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
self
.
transpose_filter_tensor
[
get_input_tensor
(
op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
buffer_type
=
"DW_CONV2D_FILTER"
if
op_def
.
type
==
'DepthwiseConv2d'
else
"CONV2D_FILTER"
if
op_def
.
type
==
'DepthwiseConv2d'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
buffer_type
)
buffer_type
=
"DW_CONV2D_FILTER"
else
:
buffer_type
=
"CONV2D_FILTER"
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
buffer_type
)
op_def
.
input
.
extend
([
output_name
])
op_def
.
input
.
extend
([
output_name
])
else
:
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
i
).
name
for
i
in
range
(
len
(
op
.
inputs
))])
op_def
.
input
.
extend
(
[
get_input_tensor
(
op
,
i
).
name
for
i
in
range
(
len
(
op
.
inputs
))])
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
name
=
'padding'
...
@@ -395,18 +402,20 @@ class TFConverter(object):
...
@@ -395,18 +402,20 @@ class TFConverter(object):
final_op
=
op
final_op
=
op
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
resolved_ops
[
op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
if
len
(
self
.
tf_graph
.
get
(
op
.
name
,
[]))
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
bias_add_op
=
self
.
tf_graph
[
op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
op_def
.
input
.
extend
([
output_name
])
else
:
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
.
get
(
final_op
.
name
,
[]))
==
1
\
if
len
(
self
.
tf_graph
.
get
(
final_op
.
name
,
[]))
==
1
and
\
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
self
.
tf_graph
[
final_op
.
name
][
0
].
type
in
activation_name_map
:
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
activation_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
if
op_def
.
type
==
"Conv2D"
:
if
op_def
.
type
==
"Conv2D"
:
op_def
.
type
=
"FusedConv2D"
op_def
.
type
=
"FusedConv2D"
...
@@ -450,17 +459,16 @@ class TFConverter(object):
...
@@ -450,17 +459,16 @@ class TFConverter(object):
var_value
=
get_input_tensor
(
op
,
4
).
eval
().
astype
(
np
.
float32
)
var_value
=
get_input_tensor
(
op
,
4
).
eval
().
astype
(
np
.
float32
)
epsilon_value
=
op
.
get_attr
(
'epsilon'
)
epsilon_value
=
op
.
get_attr
(
'epsilon'
)
scale_value
=
(
scale_value
=
((
1.0
/
np
.
vectorize
(
math
.
sqrt
)
(
1.0
/
np
.
vectorize
(
math
.
sqrt
)(
var_value
+
epsilon_value
))
*
(
var_value
+
epsilon_value
))
*
gamma_value
)
gamma_value
)
offset_value
=
(
-
mean_value
*
scale_value
)
+
beta_value
offset_value
=
(
-
mean_value
*
scale_value
)
+
beta_value
idx
=
gamma_tensor
.
name
.
rfind
(
'/'
)
idx
=
gamma_tensor
.
name
.
rfind
(
'/'
)
name_prefix
=
gamma_tensor
.
name
[:
idx
]
+
'/'
name_prefix
=
gamma_tensor
.
name
[:
idx
]
+
'/'
input_names
=
[
name_prefix
+
'scale:0'
,
name_prefix
+
'offset:0'
]
input_names
=
[
name_prefix
+
'scale:0'
,
name_prefix
+
'offset:0'
]
self
.
add_tensor
(
input_names
[
0
],
gamma_value
.
sha
pe
,
self
.
add_tensor
(
input_names
[
0
],
gamma_value
.
shape
,
gamma_tensor
.
dty
pe
,
gamma_tensor
.
dtype
,
scale_value
)
scale_value
)
self
.
add_tensor
(
input_names
[
1
],
gamma_value
.
sha
pe
,
self
.
add_tensor
(
input_names
[
1
],
gamma_value
.
shape
,
gamma_tensor
.
dty
pe
,
gamma_tensor
.
dtype
,
offset_value
)
offset_value
)
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
...
@@ -495,14 +503,15 @@ class TFConverter(object):
...
@@ -495,14 +503,15 @@ class TFConverter(object):
bn_ops
=
[]
bn_ops
=
[]
bn_ops
.
append
(
op
)
bn_ops
.
append
(
op
)
for
i
in
range
(
1
,
3
):
for
i
in
range
(
1
,
3
):
if
len
(
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
])
==
1
\
if
len
(
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
])
==
1
and
\
and
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
][
0
].
type
==
BATCH_NORM_ORDER
[
i
]:
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
][
0
].
type
==
BATCH_NORM_ORDER
[
i
]:
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
][
0
])
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
][
0
])
else
:
else
:
raise
Exception
(
'Invalid BatchNorm Op'
)
raise
Exception
(
'Invalid BatchNorm Op'
)
if
len
(
self
.
tf_graph
[
bn_ops
[
2
].
name
])
==
2
\
if
len
(
self
.
tf_graph
[
bn_ops
[
2
].
name
])
==
2
and
\
and
self
.
tf_graph
[
bn_ops
[
2
].
name
][
0
].
type
==
BATCH_NORM_ORDER
[
3
]
\
self
.
tf_graph
[
bn_ops
[
2
].
name
][
0
].
type
==
\
and
self
.
tf_graph
[
bn_ops
[
2
].
name
][
1
].
type
==
BATCH_NORM_ORDER
[
4
]:
BATCH_NORM_ORDER
[
3
]
and
\
self
.
tf_graph
[
bn_ops
[
2
].
name
][
1
].
type
==
BATCH_NORM_ORDER
[
4
]:
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
2
].
name
][
0
])
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
2
].
name
][
0
])
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
2
].
name
][
1
])
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
2
].
name
][
1
])
else
:
else
:
...
@@ -682,7 +691,8 @@ class TFConverter(object):
...
@@ -682,7 +691,8 @@ class TFConverter(object):
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
size_arg
=
op_def
.
arg
.
add
()
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'size'
size_arg
.
name
=
'size'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
=
op_def
.
arg
.
add
()
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'align_corners'
size_arg
.
name
=
'align_corners'
size_arg
.
i
=
op
.
get_attr
(
'align_corners'
)
size_arg
.
i
=
op
.
get_attr
(
'align_corners'
)
...
@@ -712,7 +722,7 @@ class TFConverter(object):
...
@@ -712,7 +722,7 @@ class TFConverter(object):
else
:
else
:
op_def
.
type
=
"CWise"
op_def
.
type
=
"CWise"
x_value
=
0
x_value
=
0
if
len
(
input_tensor1
.
shape
)
==
4
:
if
len
(
input_tensor1
.
shape
)
==
4
:
op_def
.
input
.
extend
([
op
.
inputs
[
1
].
name
])
op_def
.
input
.
extend
([
op
.
inputs
[
1
].
name
])
x_value
=
get_input_tensor
(
op
,
0
).
eval
().
astype
(
np
.
float32
)
x_value
=
get_input_tensor
(
op
,
0
).
eval
().
astype
(
np
.
float32
)
else
:
else
:
...
@@ -752,7 +762,8 @@ class TFConverter(object):
...
@@ -752,7 +762,8 @@ class TFConverter(object):
op_def
.
type
=
"BiasAdd"
op_def
.
type
=
"BiasAdd"
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
op_def
.
input
.
extend
([
output_name
])
else
:
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
1
).
name
])
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
1
).
name
])
...
@@ -772,21 +783,24 @@ class TFConverter(object):
...
@@ -772,21 +783,24 @@ class TFConverter(object):
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
op
.
outputs
])
size_arg
=
op_def
.
arg
.
add
()
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'block_shape'
size_arg
.
name
=
'block_shape'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
=
op_def
.
arg
.
add
()
size_arg
=
op_def
.
arg
.
add
()
if
b2s
:
if
b2s
:
size_arg
.
name
=
'crops'
size_arg
.
name
=
'crops'
else
:
else
:
size_arg
.
name
=
'paddings'
size_arg
.
name
=
'paddings'
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
)
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
add_output_shape
(
op
.
outputs
,
op_def
)
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
1
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
1
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
2
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
op
,
2
).
name
)
def
is_atrous_conv2d
(
self
,
op
):
def
is_atrous_conv2d
(
self
,
op
):
return
op
.
type
==
'SpaceToBatchND'
and
\
return
op
.
type
==
'SpaceToBatchND'
and
\
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Conv2D'
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Conv2D'
def
convert_atrous_conv2d
(
self
,
op
):
def
convert_atrous_conv2d
(
self
,
op
):
op_def
=
mace_pb2
.
OperatorDef
()
op_def
=
mace_pb2
.
OperatorDef
()
...
@@ -796,10 +810,12 @@ class TFConverter(object):
...
@@ -796,10 +810,12 @@ class TFConverter(object):
conv_op
=
self
.
tf_graph
[
op
.
name
][
0
]
conv_op
=
self
.
tf_graph
[
op
.
name
][
0
]
op_def
.
name
=
conv_op
.
name
op_def
.
name
=
conv_op
.
name
op_def
.
type
=
conv_op
.
type
op_def
.
type
=
conv_op
.
type
self
.
transpose_filter_tensor
[
get_input_tensor
(
conv_op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
self
.
transpose_filter_tensor
[
get_input_tensor
(
conv_op
,
1
).
name
]
=
(
0
,
1
,
3
,
2
)
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
op_def
.
input
.
extend
([
op
.
inputs
[
0
].
name
])
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
conv_op
,
1
).
name
,
"CONV2D_FILTER"
)
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
conv_op
,
1
).
name
,
"CONV2D_FILTER"
)
op_def
.
input
.
extend
([
output_name
])
op_def
.
input
.
extend
([
output_name
])
else
:
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
0
).
name
])
op_def
.
input
.
extend
([
get_input_tensor
(
op
,
0
).
name
])
...
@@ -807,7 +823,8 @@ class TFConverter(object):
...
@@ -807,7 +823,8 @@ class TFConverter(object):
dilation_arg
=
op_def
.
arg
.
add
()
dilation_arg
=
op_def
.
arg
.
add
()
dilation_arg
.
name
=
'dilations'
dilation_arg
.
name
=
'dilations'
dilation_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
dilation_arg
.
ints
.
extend
(
get_input_tensor
(
op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
=
op_def
.
arg
.
add
()
padding_arg
.
name
=
'padding'
padding_arg
.
name
=
'padding'
padding_values
=
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
padding_values
=
get_input_tensor
(
op
,
2
).
eval
().
astype
(
np
.
int32
).
flat
...
@@ -831,18 +848,20 @@ class TFConverter(object):
...
@@ -831,18 +848,20 @@ class TFConverter(object):
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
resolved_ops
[
op
.
name
]
=
1
self
.
resolved_ops
[
conv_op
.
name
]
=
1
self
.
resolved_ops
[
conv_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BiasAdd'
:
if
len
(
self
.
tf_graph
[
final_op
.
name
]
)
==
1
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BiasAdd'
:
bias_add_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
bias_add_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
output_name
=
self
.
add_buffer_to_image
(
get_input_tensor
(
bias_add_op
,
1
).
name
,
"ARGUMENT"
)
op_def
.
input
.
extend
([
output_name
])
op_def
.
input
.
extend
([
output_name
])
else
:
else
:
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
op_def
.
input
.
extend
([
get_input_tensor
(
bias_add_op
,
1
).
name
])
final_op
=
bias_add_op
final_op
=
bias_add_op
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
self
.
resolved_ops
[
bias_add_op
.
name
]
=
1
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
\
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BatchToSpaceND'
:
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'BatchToSpaceND'
:
final_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
final_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
self
.
resolved_ops
[
final_op
.
name
]
=
1
self
.
resolved_ops
[
final_op
.
name
]
=
1
self
.
unused_tensor
.
add
(
get_input_tensor
(
final_op
,
1
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
final_op
,
1
).
name
)
...
@@ -850,8 +869,8 @@ class TFConverter(object):
...
@@ -850,8 +869,8 @@ class TFConverter(object):
else
:
else
:
raise
Exception
(
'Convert atrous conv error: no BatchToSpaceND op'
)
raise
Exception
(
'Convert atrous conv error: no BatchToSpaceND op'
)
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
\
if
len
(
self
.
tf_graph
[
final_op
.
name
])
==
1
and
\
and
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'Relu'
:
self
.
tf_graph
[
final_op
.
name
][
0
].
type
==
'Relu'
:
relu_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
relu_op
=
self
.
tf_graph
[
final_op
.
name
][
0
]
op_def
.
type
=
"FusedConv2D"
op_def
.
type
=
"FusedConv2D"
fused_relu_arg
=
op_def
.
arg
.
add
()
fused_relu_arg
=
op_def
.
arg
.
add
()
...
@@ -866,8 +885,10 @@ class TFConverter(object):
...
@@ -866,8 +885,10 @@ class TFConverter(object):
def
is_softmax
(
self
,
op
):
def
is_softmax
(
self
,
op
):
return
op
.
type
==
'Softmax'
and
\
return
op
.
type
==
'Softmax'
and
\
len
(
self
.
tf_parents
[
op
.
name
])
==
1
and
self
.
tf_parents
[
op
.
name
][
0
].
type
==
'Reshape'
and
\
len
(
self
.
tf_parents
[
op
.
name
])
==
1
and
\
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Reshape'
self
.
tf_parents
[
op
.
name
][
0
].
type
==
'Reshape'
and
\
len
(
self
.
tf_graph
[
op
.
name
])
==
1
and
\
self
.
tf_graph
[
op
.
name
][
0
].
type
==
'Reshape'
def
convert_softmax
(
self
,
softmax_op
):
def
convert_softmax
(
self
,
softmax_op
):
op_def
=
self
.
net_def
.
op
.
add
()
op_def
=
self
.
net_def
.
op
.
add
()
...
@@ -890,7 +911,8 @@ class TFConverter(object):
...
@@ -890,7 +911,8 @@ class TFConverter(object):
children_ops
=
self
.
tf_graph
[
squeeze_op
.
name
]
children_ops
=
self
.
tf_graph
[
squeeze_op
.
name
]
print
children_ops
print
children_ops
if
len
(
children_ops
)
>
1
and
children_ops
[
0
].
type
==
'Shape'
:
if
len
(
children_ops
)
>
1
and
children_ops
[
0
].
type
==
'Shape'
:
self
.
unused_tensor
.
add
(
get_input_tensor
(
children_ops
[
1
],
0
).
name
)
self
.
unused_tensor
.
add
(
get_input_tensor
(
children_ops
[
1
],
0
).
name
)
self
.
resolved_ops
[
children_ops
[
1
].
name
]
=
1
self
.
resolved_ops
[
children_ops
[
1
].
name
]
=
1
else
:
else
:
op_def
.
input
.
extend
([
parent_reshape_op
.
inputs
[
0
].
name
])
op_def
.
input
.
extend
([
parent_reshape_op
.
inputs
[
0
].
name
])
...
@@ -999,11 +1021,13 @@ class TFConverter(object):
...
@@ -999,11 +1021,13 @@ class TFConverter(object):
self
.
convert_global_avg_pooling
(
op
)
self
.
convert_global_avg_pooling
(
op
)
self
.
unused_tensor
.
add
(
op
.
inputs
[
1
].
name
)
self
.
unused_tensor
.
add
(
op
.
inputs
[
1
].
name
)
else
:
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
#elif op.type in ['']:
op
.
type
))
# elif op.type in ['']:
# self.convert_normal_op(op)
# self.convert_normal_op(op)
else
:
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
for
op
in
self
.
tf_ops
:
for
op
in
self
.
tf_ops
:
if
self
.
resolved_ops
[
op
.
name
]
==
1
:
if
self
.
resolved_ops
[
op
.
name
]
==
1
:
...
@@ -1011,7 +1035,8 @@ class TFConverter(object):
...
@@ -1011,7 +1035,8 @@ class TFConverter(object):
elif
op
.
type
==
'Const'
:
elif
op
.
type
==
'Const'
:
self
.
convert_tensor
(
op
)
self
.
convert_tensor
(
op
)
else
:
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
op
.
name
,
op
.
type
))
if
self
.
device
==
'gpu'
:
if
self
.
device
==
'gpu'
:
self
.
add_gpu_output_transform
(
output_nodes
)
self
.
add_gpu_output_transform
(
output_nodes
)
...
@@ -1026,6 +1051,7 @@ class TFConverter(object):
...
@@ -1026,6 +1051,7 @@ class TFConverter(object):
if
self
.
resolved_ops
[
key
]
!=
1
:
if
self
.
resolved_ops
[
key
]
!=
1
:
print
'Unresolve Op: %s'
%
key
print
'Unresolve Op: %s'
%
key
class
Optimizer
:
class
Optimizer
:
def
__init__
(
self
,
net_def
,
device
):
def
__init__
(
self
,
net_def
,
device
):
self
.
net_def
=
net_def
self
.
net_def
=
net_def
...
@@ -1056,14 +1082,17 @@ class Optimizer:
...
@@ -1056,14 +1082,17 @@ class Optimizer:
for
op
in
self
.
net_def
.
op
:
for
op
in
self
.
net_def
.
op
:
if
op
.
name
in
resolved_ops
:
if
op
.
name
in
resolved_ops
:
pass
pass
elif
op
.
type
==
'DepthwiseConv2d'
and
len
(
op
.
output
)
==
1
\
elif
op
.
type
==
'DepthwiseConv2d'
and
len
(
op
.
output
)
==
1
and
\
and
self
.
mace_graph
[
op
.
output
[
0
]][
0
].
type
==
'FoldedBatchNorm'
:
self
.
mace_graph
[
op
.
output
[
0
]][
0
].
type
==
'FoldedBatchNorm'
:
depthwise_conv2d_op
=
op
depthwise_conv2d_op
=
op
folded_bn_op
=
self
.
mace_graph
[
op
.
output
[
0
]][
0
]
folded_bn_op
=
self
.
mace_graph
[
op
.
output
[
0
]][
0
]
weight_buffer_name
=
self
.
get_buffer_tensor_name
(
depthwise_conv2d_op
.
input
[
1
])
weight_buffer_name
=
self
.
get_buffer_tensor_name
(
depthwise_conv2d_op
.
input
[
1
])
weight_tensor
=
self
.
tensor_map
[
weight_buffer_name
]
weight_tensor
=
self
.
tensor_map
[
weight_buffer_name
]
scale_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
1
])
scale_buffer_name
=
self
.
get_buffer_tensor_name
(
offset_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
2
])
folded_bn_op
.
input
[
1
])
offset_buffer_name
=
self
.
get_buffer_tensor_name
(
folded_bn_op
.
input
[
2
])
scale_tensor
=
self
.
tensor_map
[
scale_buffer_name
]
scale_tensor
=
self
.
tensor_map
[
scale_buffer_name
]
weight_shape
=
weight_tensor
.
dims
weight_shape
=
weight_tensor
.
dims
idx
=
0
idx
=
0
...
@@ -1072,14 +1101,18 @@ class Optimizer:
...
@@ -1072,14 +1101,18 @@ class Optimizer:
for
ic
in
range
(
weight_shape
[
1
]):
for
ic
in
range
(
weight_shape
[
1
]):
for
i
in
range
(
weight_shape
[
2
]):
for
i
in
range
(
weight_shape
[
2
]):
for
j
in
range
(
weight_shape
[
3
]):
for
j
in
range
(
weight_shape
[
3
]):
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
0
]
+
oc
]
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
0
]
+
oc
]
idx
+=
1
idx
+=
1
else
:
# HWIO
else
:
# HWIO
for
i
in
range
(
weight_shape
[
0
]):
for
i
in
range
(
weight_shape
[
0
]):
for
j
in
range
(
weight_shape
[
1
]):
for
j
in
range
(
weight_shape
[
1
]):
for
ic
in
range
(
weight_shape
[
2
]):
for
ic
in
range
(
weight_shape
[
2
]):
for
oc
in
range
(
weight_shape
[
3
]):
for
oc
in
range
(
weight_shape
[
3
]):
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
3
]
+
oc
]
weight_tensor
.
float_data
[
idx
]
*=
scale_tensor
.
float_data
[
ic
*
weight_shape
[
3
]
+
oc
]
idx
+=
1
idx
+=
1
new_tensors
.
append
(
weight_tensor
)
new_tensors
.
append
(
weight_tensor
)
...
@@ -1129,6 +1162,7 @@ class Optimizer:
...
@@ -1129,6 +1162,7 @@ class Optimizer:
new_net
=
self
.
fold_batch_norm
()
new_net
=
self
.
fold_batch_norm
()
return
new_net
return
new_net
def
add_shape_info
(
input_graph_def
,
input_nodes
,
input_shapes
):
def
add_shape_info
(
input_graph_def
,
input_nodes
,
input_shapes
):
inputs_replaced_graph
=
graph_pb2
.
GraphDef
()
inputs_replaced_graph
=
graph_pb2
.
GraphDef
()
for
node
in
input_graph_def
.
node
:
for
node
in
input_graph_def
.
node
:
...
@@ -1138,7 +1172,8 @@ def add_shape_info(input_graph_def, input_nodes, input_shapes):
...
@@ -1138,7 +1172,8 @@ def add_shape_info(input_graph_def, input_nodes, input_shapes):
placeholder_node
=
copy
.
deepcopy
(
node
)
placeholder_node
=
copy
.
deepcopy
(
node
)
placeholder_node
.
attr
.
clear
()
placeholder_node
.
attr
.
clear
()
placeholder_node
.
attr
[
'shape'
].
shape
.
dim
.
extend
([
placeholder_node
.
attr
[
'shape'
].
shape
.
dim
.
extend
([
tensor_shape_pb2
.
TensorShapeProto
.
Dim
(
size
=
i
)
for
i
in
input_shape
tensor_shape_pb2
.
TensorShapeProto
.
Dim
(
size
=
i
)
for
i
in
input_shape
])
])
placeholder_node
.
attr
[
'dtype'
].
CopyFrom
(
node
.
attr
[
'dtype'
])
placeholder_node
.
attr
[
'dtype'
].
CopyFrom
(
node
.
attr
[
'dtype'
])
inputs_replaced_graph
.
node
.
extend
([
placeholder_node
])
inputs_replaced_graph
.
node
.
extend
([
placeholder_node
])
...
@@ -1147,7 +1182,8 @@ def add_shape_info(input_graph_def, input_nodes, input_shapes):
...
@@ -1147,7 +1182,8 @@ def add_shape_info(input_graph_def, input_nodes, input_shapes):
return
inputs_replaced_graph
return
inputs_replaced_graph
def
convert_to_mace_pb
(
model_file
,
input_node
,
input_shape
,
output_node
,
data_type
,
device
,
winograd
):
def
convert_to_mace_pb
(
model_file
,
input_node
,
input_shape
,
output_node
,
data_type
,
device
,
winograd
):
net_def
=
mace_pb2
.
NetDef
()
net_def
=
mace_pb2
.
NetDef
()
dt
=
data_type_map
[
data_type
]
dt
=
data_type_map
[
data_type
]
...
@@ -1165,7 +1201,8 @@ def convert_to_mace_pb(model_file, input_node, input_shape, output_node, data_ty
...
@@ -1165,7 +1201,8 @@ def convert_to_mace_pb(model_file, input_node, input_shape, output_node, data_ty
output_nodes
=
[
x
for
x
in
output_node
.
split
(
','
)]
output_nodes
=
[
x
for
x
in
output_node
.
split
(
','
)]
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
assert
len
(
input_nodes
)
==
len
(
input_shapes
)
input_graph_def
=
add_shape_info
(
input_graph_def
,
input_nodes
,
input_shapes
)
input_graph_def
=
add_shape_info
(
input_graph_def
,
input_nodes
,
input_shapes
)
with
tf
.
Session
()
as
session
:
with
tf
.
Session
()
as
session
:
with
session
.
graph
.
as_default
()
as
graph
:
with
session
.
graph
.
as_default
()
as
graph
:
tf
.
import_graph_def
(
input_graph_def
,
name
=
""
)
tf
.
import_graph_def
(
input_graph_def
,
name
=
""
)
...
...
mace/python/tools/tf_dsp_converter_lib.py
浏览文件 @
58f2516e
...
@@ -6,8 +6,10 @@ from dsp_ops import DspOps
...
@@ -6,8 +6,10 @@ from dsp_ops import DspOps
from
mace.python.tools
import
graph_util
from
mace.python.tools
import
graph_util
from
mace.python.tools.convert_util
import
tf_dtype_2_mace_dtype
from
mace.python.tools.convert_util
import
tf_dtype_2_mace_dtype
# converter --input ../libcv/quantized_model.pb --output quantized_model_dsp.pb \
# converter --input ../libcv/quantized_model.pb \
# --runtime dsp --input_node input_node --output_node output_node
# --output quantized_model_dsp.pb \
# --runtime dsp --input_node input_node \
# --output_node output_node
padding_mode
=
{
padding_mode
=
{
'NA'
:
0
,
'NA'
:
0
,
...
@@ -18,24 +20,29 @@ padding_mode = {
...
@@ -18,24 +20,29 @@ padding_mode = {
'SAME_CAFFE'
:
5
'SAME_CAFFE'
:
5
}
}
def
get_tensor_name_from_op
(
op_name
,
port
):
def
get_tensor_name_from_op
(
op_name
,
port
):
return
op_name
+
':'
+
str
(
port
)
return
op_name
+
':'
+
str
(
port
)
def
get_node_from_map
(
op_map
,
op_or_tensor_name
):
def
get_node_from_map
(
op_map
,
op_or_tensor_name
):
op_name
=
op_or_tensor_name
.
split
(
':'
)[
0
]
op_name
=
op_or_tensor_name
.
split
(
':'
)[
0
]
return
op_map
[
op_name
]
return
op_map
[
op_name
]
def
get_op_and_port_from_tensor
(
tensor_name
):
def
get_op_and_port_from_tensor
(
tensor_name
):
op
,
port
=
tensor_name
.
split
(
':'
)
op
,
port
=
tensor_name
.
split
(
':'
)
port
=
int
(
port
)
port
=
int
(
port
)
return
op
,
port
return
op
,
port
def
max_elem_size
(
tensor
):
def
max_elem_size
(
tensor
):
if
len
(
tensor
.
shape
.
as_list
())
==
0
:
if
len
(
tensor
.
shape
.
as_list
())
==
0
:
return
tensor
.
dtype
.
size
return
tensor
.
dtype
.
size
else
:
else
:
return
reduce
(
mul
,
tensor
.
shape
.
as_list
())
*
tensor
.
dtype
.
size
return
reduce
(
mul
,
tensor
.
shape
.
as_list
())
*
tensor
.
dtype
.
size
def
find_dtype
(
tensor_dtype
):
def
find_dtype
(
tensor_dtype
):
if
tensor_dtype
==
tf
.
float32
:
if
tensor_dtype
==
tf
.
float32
:
return
mace_pb2
.
DT_FLOAT
return
mace_pb2
.
DT_FLOAT
...
@@ -46,20 +53,24 @@ def find_dtype(tensor_dtype):
...
@@ -46,20 +53,24 @@ def find_dtype(tensor_dtype):
else
:
else
:
raise
Exception
(
'Unsupported data type: '
,
tensor_dtype
)
raise
Exception
(
'Unsupported data type: '
,
tensor_dtype
)
def
has_padding_and_strides
(
op
):
def
has_padding_and_strides
(
op
):
return
'padding'
in
op
.
node_def
.
attr
and
'strides'
in
op
.
node_def
.
attr
return
'padding'
in
op
.
node_def
.
attr
and
'strides'
in
op
.
node_def
.
attr
def
is_node_flatten_reshape
(
op
):
def
is_node_flatten_reshape
(
op
):
return
op
.
type
==
'Reshape'
and
len
(
op
.
outputs
[
0
].
shape
)
==
1
return
op
.
type
==
'Reshape'
and
len
(
op
.
outputs
[
0
].
shape
)
==
1
def
get_input_tensor
(
op
,
index
):
def
get_input_tensor
(
op
,
index
):
input_tensor
=
op
.
inputs
[
index
]
input_tensor
=
op
.
inputs
[
index
]
if
input_tensor
.
op
.
type
==
'Reshape'
:
if
input_tensor
.
op
.
type
==
'Reshape'
:
input_tensor
=
get_input_tensor
(
input_tensor
.
op
,
0
)
input_tensor
=
get_input_tensor
(
input_tensor
.
op
,
0
)
return
input_tensor
return
input_tensor
def
add_shape_const_node
(
net_def
,
op
,
values
,
name
):
def
add_shape_const_node
(
net_def
,
op
,
values
,
name
):
print
(
'Add const node: '
,
op
.
name
+
'/'
+
name
)
print
(
'Add const node: '
,
op
.
name
+
'/'
+
name
)
tensor
=
net_def
.
tensors
.
add
()
tensor
=
net_def
.
tensors
.
add
()
node_name
=
op
.
name
+
'/'
+
name
node_name
=
op
.
name
+
'/'
+
name
tensor
.
name
=
node_name
+
':0'
tensor
.
name
=
node_name
+
':0'
...
@@ -69,8 +80,8 @@ def add_shape_const_node(net_def, op, values, name):
...
@@ -69,8 +80,8 @@ def add_shape_const_node(net_def, op, values, name):
def
convert_op_outputs
(
mace_op_def
,
tf_op
):
def
convert_op_outputs
(
mace_op_def
,
tf_op
):
mace_op_def
.
output_type
.
extend
([
tf_dtype_2_mace_dtype
(
output
.
dtype
)
mace_op_def
.
output_type
.
extend
(
for
output
in
tf_op
.
outputs
])
[
tf_dtype_2_mace_dtype
(
output
.
dtype
)
for
output
in
tf_op
.
outputs
])
output_shapes
=
[]
output_shapes
=
[]
for
output
in
tf_op
.
outputs
:
for
output
in
tf_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
=
mace_pb2
.
OutputShape
()
...
@@ -81,13 +92,13 @@ def convert_op_outputs(mace_op_def, tf_op):
...
@@ -81,13 +92,13 @@ def convert_op_outputs(mace_op_def, tf_op):
def
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
):
def
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
):
first_op
=
unresolved_ops
[
0
]
first_op
=
unresolved_ops
[
0
]
print
(
'Op: '
,
first_op
.
name
,
first_op
.
type
,
first_op
.
outputs
[
0
].
shape
)
print
(
'Op: '
,
first_op
.
name
,
first_op
.
type
,
first_op
.
outputs
[
0
].
shape
)
if
first_op
.
name
in
resolved_ops
:
if
first_op
.
name
in
resolved_ops
:
pass
pass
elif
first_op
.
type
==
'Const'
:
elif
first_op
.
type
==
'Const'
:
print
(
'Add const node: '
,
first_op
.
name
)
print
(
'Add const node: '
,
first_op
.
name
)
tf_tensor
=
first_op
.
outputs
[
0
].
eval
()
tf_tensor
=
first_op
.
outputs
[
0
].
eval
()
tensor
=
net_def
.
tensors
.
add
()
tensor
=
net_def
.
tensors
.
add
()
tensor
.
name
=
first_op
.
outputs
[
0
].
name
tensor
.
name
=
first_op
.
outputs
[
0
].
name
...
@@ -112,8 +123,8 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
...
@@ -112,8 +123,8 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
if
len
(
first_op
.
outputs
)
>
0
and
first_op
.
type
==
'Dequantize'
\
if
len
(
first_op
.
outputs
)
>
0
and
first_op
.
type
==
'Dequantize'
\
and
len
(
first_op
.
outputs
[
0
].
consumers
())
>
0
\
and
len
(
first_op
.
outputs
[
0
].
consumers
())
>
0
\
and
(
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'SpaceToBatchND'
\
and
(
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'SpaceToBatchND'
or
or
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'BatchToSpaceND'
):
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'BatchToSpaceND'
):
input_tensor
=
first_op
.
inputs
[
0
]
input_tensor
=
first_op
.
inputs
[
0
]
min_tensor
=
first_op
.
inputs
[
1
]
min_tensor
=
first_op
.
inputs
[
1
]
max_tensor
=
first_op
.
inputs
[
2
]
max_tensor
=
first_op
.
inputs
[
2
]
...
@@ -133,13 +144,17 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
...
@@ -133,13 +144,17 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
op_def
.
input
.
append
(
input_tensor
.
name
)
op_def
.
input
.
append
(
input_tensor
.
name
)
op_def
.
input
.
extend
([
t
.
name
for
t
in
s2b_op
.
inputs
[
1
:]])
op_def
.
input
.
extend
([
t
.
name
for
t
in
s2b_op
.
inputs
[
1
:]])
op_def
.
input
.
extend
([
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
input
.
extend
([
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
quantize_op
.
outputs
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
quantize_op
.
outputs
])
convert_op_outputs
(
op_def
,
quantize_op
)
convert_op_outputs
(
op_def
,
quantize_op
)
elif
len
(
first_op
.
outputs
)
>
0
and
first_op
.
type
==
'QuantizedReshape'
\
elif
len
(
first_op
.
outputs
)
>
0
and
\
and
len
(
first_op
.
outputs
[
0
].
consumers
())
>
0
\
first_op
.
type
==
'QuantizedReshape'
and
\
and
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'Dequantize'
\
len
(
first_op
.
outputs
[
0
].
consumers
())
>
0
and
\
and
len
(
first_op
.
outputs
[
0
].
consumers
()[
0
].
outputs
[
0
].
consumers
())
>
0
\
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'Dequantize'
and
\
and
first_op
.
outputs
[
0
].
consumers
()[
0
].
outputs
[
0
].
consumers
()[
0
].
type
==
'Softmax'
:
len
(
first_op
.
outputs
[
0
].
consumers
()[
0
].
outputs
[
0
].
consumers
())
\
>
0
and
\
first_op
.
outputs
[
0
].
consumers
()[
0
].
outputs
[
0
].
consumers
()[
0
].
type
\
==
'Softmax'
:
input_tensor
=
first_op
.
inputs
[
0
]
input_tensor
=
first_op
.
inputs
[
0
]
min_tensor
=
first_op
.
inputs
[
2
]
min_tensor
=
first_op
.
inputs
[
2
]
max_tensor
=
first_op
.
inputs
[
3
]
max_tensor
=
first_op
.
inputs
[
3
]
...
@@ -161,12 +176,14 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
...
@@ -161,12 +176,14 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
op_def
.
name
=
quantize_reshape_op
.
name
op_def
.
name
=
quantize_reshape_op
.
name
op_def
.
type
=
dsp_ops
.
map_nn_op
(
'QuantizedSoftmax'
)
op_def
.
type
=
dsp_ops
.
map_nn_op
(
'QuantizedSoftmax'
)
op_def
.
input
.
extend
([
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
input
.
extend
(
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
quantize_reshape_op
.
outputs
])
[
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
quantize_reshape_op
.
outputs
])
convert_op_outputs
(
op_def
,
quantize_reshape_op
)
convert_op_outputs
(
op_def
,
quantize_reshape_op
)
elif
len
(
first_op
.
outputs
)
>
0
and
first_op
.
type
==
'Dequantize'
\
elif
len
(
first_op
.
outputs
)
>
0
and
first_op
.
type
==
'Dequantize'
and
\
and
len
(
first_op
.
outputs
[
0
].
consumers
())
>
0
\
len
(
first_op
.
outputs
[
0
].
consumers
())
>
0
and
\
and
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'Tanh'
:
first_op
.
outputs
[
0
].
consumers
()[
0
].
type
==
'Tanh'
:
input_tensor
=
first_op
.
inputs
[
0
]
input_tensor
=
first_op
.
inputs
[
0
]
min_tensor
=
first_op
.
inputs
[
1
]
min_tensor
=
first_op
.
inputs
[
1
]
max_tensor
=
first_op
.
inputs
[
2
]
max_tensor
=
first_op
.
inputs
[
2
]
...
@@ -186,18 +203,24 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
...
@@ -186,18 +203,24 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
op_def
.
name
=
quantize_op
.
name
op_def
.
name
=
quantize_op
.
name
op_def
.
type
=
dsp_ops
.
map_nn_op
(
'Quantized'
+
tanh_op
.
type
)
op_def
.
type
=
dsp_ops
.
map_nn_op
(
'Quantized'
+
tanh_op
.
type
)
op_def
.
input
.
extend
([
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
input
.
extend
(
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
quantize_op
.
outputs
])
[
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
quantize_op
.
outputs
])
convert_op_outputs
(
op_def
,
quantize_op
)
convert_op_outputs
(
op_def
,
quantize_op
)
# tanh is last op
# tanh is last op
else
:
else
:
op_def
.
name
=
tanh_op
.
name
+
'/QuantizedTanh'
op_def
.
name
=
tanh_op
.
name
+
'/QuantizedTanh'
op_def
.
type
=
dsp_ops
.
map_nn_op
(
'Quantized'
+
tanh_op
.
type
)
op_def
.
type
=
dsp_ops
.
map_nn_op
(
'Quantized'
+
tanh_op
.
type
)
op_def
.
input
.
extend
([
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
input
.
extend
(
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
input_tensor
),
[
input_tensor
.
name
,
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
input_tensor
),
max_elem_size
(
min_tensor
),
max_elem_size
(
min_tensor
),
max_elem_size
(
max_tensor
)])
max_elem_size
(
max_tensor
)
op_def
.
output_type
.
extend
([
mace_pb2
.
DT_UINT8
,
mace_pb2
.
DT_FLOAT
,
mace_pb2
.
DT_FLOAT
])
])
op_def
.
output_type
.
extend
(
[
mace_pb2
.
DT_UINT8
,
mace_pb2
.
DT_FLOAT
,
mace_pb2
.
DT_FLOAT
])
output_shapes
=
[]
output_shapes
=
[]
for
output
in
first_op
.
inputs
:
for
output
in
first_op
.
inputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
=
mace_pb2
.
OutputShape
()
...
@@ -208,31 +231,39 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
...
@@ -208,31 +231,39 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
new_tanh_op_def
=
net_def
.
op
.
add
()
new_tanh_op_def
=
net_def
.
op
.
add
()
new_tanh_op_def
.
name
=
tanh_op
.
name
new_tanh_op_def
.
name
=
tanh_op
.
name
new_tanh_op_def
.
type
=
dsp_ops
.
map_nn_op
(
'Dequantize'
)
new_tanh_op_def
.
type
=
dsp_ops
.
map_nn_op
(
'Dequantize'
)
new_tanh_op_def
.
input
.
extend
([
get_tensor_name_from_op
(
op_def
.
name
,
0
),
new_tanh_op_def
.
input
.
extend
([
get_tensor_name_from_op
(
op_def
.
name
,
0
),
get_tensor_name_from_op
(
op_def
.
name
,
1
),
get_tensor_name_from_op
(
op_def
.
name
,
1
),
get_tensor_name_from_op
(
op_def
.
name
,
2
)])
get_tensor_name_from_op
(
op_def
.
name
,
2
)
new_tanh_op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
tanh_op
.
outputs
[
0
])])
])
new_tanh_op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
tanh_op
.
outputs
[
0
])])
convert_op_outputs
(
new_tanh_op_def
,
tanh_op
)
convert_op_outputs
(
new_tanh_op_def
,
tanh_op
)
elif
has_padding_and_strides
(
first_op
):
elif
has_padding_and_strides
(
first_op
):
op_def
.
padding
=
padding_mode
[
first_op
.
get_attr
(
'padding'
)]
op_def
.
padding
=
padding_mode
[
first_op
.
get_attr
(
'padding'
)]
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
if
'ksize'
in
first_op
.
node_def
.
attr
:
if
'ksize'
in
first_op
.
node_def
.
attr
:
ksize
=
first_op
.
get_attr
(
'ksize'
)
ksize
=
first_op
.
get_attr
(
'ksize'
)
ksize_tensor
=
add_shape_const_node
(
net_def
,
first_op
,
ksize
,
'ksize'
)
ksize_tensor
=
add_shape_const_node
(
net_def
,
first_op
,
ksize
,
'ksize'
)
op_def
.
input
.
extend
([
ksize_tensor
])
op_def
.
input
.
extend
([
ksize_tensor
])
strides
=
first_op
.
get_attr
(
'strides'
)
strides
=
first_op
.
get_attr
(
'strides'
)
strides_tensor
=
add_shape_const_node
(
net_def
,
first_op
,
strides
,
'strides'
)
strides_tensor
=
add_shape_const_node
(
net_def
,
first_op
,
strides
,
'strides'
)
op_def
.
input
.
extend
([
strides_tensor
])
op_def
.
input
.
extend
([
strides_tensor
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
convert_op_outputs
(
op_def
,
first_op
)
convert_op_outputs
(
op_def
,
first_op
)
elif
is_node_flatten_reshape
(
first_op
):
elif
is_node_flatten_reshape
(
first_op
):
op_def
.
type
=
'Flatten'
op_def
.
type
=
'Flatten'
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
convert_op_outputs
(
op_def
,
first_op
)
convert_op_outputs
(
op_def
,
first_op
)
elif
dsp_ops
.
has_op
(
first_op
.
type
):
elif
dsp_ops
.
has_op
(
first_op
.
type
):
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
op_def
.
out_max_byte_size
.
extend
(
[
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
convert_op_outputs
(
op_def
,
first_op
)
convert_op_outputs
(
op_def
,
first_op
)
else
:
else
:
raise
Exception
(
'Unsupported op: '
,
first_op
)
raise
Exception
(
'Unsupported op: '
,
first_op
)
...
@@ -241,12 +272,14 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
...
@@ -241,12 +272,14 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
del
unresolved_ops
[
0
]
del
unresolved_ops
[
0
]
def
add_output_node
(
net_def
,
output_node
):
def
add_output_node
(
net_def
,
output_node
):
op_def
=
net_def
.
op
.
add
()
op_def
=
net_def
.
op
.
add
()
op_def
.
name
=
'__output__'
op_def
.
name
=
'__output__'
op_def
.
type
=
'OUTPUT'
op_def
.
type
=
'OUTPUT'
op_def
.
input
.
extend
([
get_tensor_name_from_op
(
output_node
,
0
)])
op_def
.
input
.
extend
([
get_tensor_name_from_op
(
output_node
,
0
)])
def
reverse_batch_to_space_and_biasadd
(
net_def
):
def
reverse_batch_to_space_and_biasadd
(
net_def
):
tensor_map
=
{}
tensor_map
=
{}
for
tensor
in
net_def
.
tensors
:
for
tensor
in
net_def
.
tensors
:
...
@@ -272,42 +305,65 @@ def reverse_batch_to_space_and_biasadd(net_def):
...
@@ -272,42 +305,65 @@ def reverse_batch_to_space_and_biasadd(net_def):
success
=
False
success
=
False
if
op
.
type
==
'Requantize_32to8'
:
if
op
.
type
==
'Requantize_32to8'
:
biasadd_requantize_op
=
op
biasadd_requantize_op
=
op
biasadd_op
=
get_node_from_map
(
op_map
,
biasadd_requantize_op
.
input
[
0
])
biasadd_op
=
get_node_from_map
(
op_map
,
biasadd_requantize_op
.
input
[
0
])
if
biasadd_op
.
type
==
'QuantizedBiasAdd_8p8to32'
:
if
biasadd_op
.
type
==
'QuantizedBiasAdd_8p8to32'
:
b2s_op
=
get_node_from_map
(
op_map
,
biasadd_op
.
input
[
0
])
b2s_op
=
get_node_from_map
(
op_map
,
biasadd_op
.
input
[
0
])
if
b2s_op
.
type
==
'QuantizedBatchToSpaceND_8'
:
if
b2s_op
.
type
==
'QuantizedBatchToSpaceND_8'
:
conv_requantize_op
=
get_node_from_map
(
op_map
,
b2s_op
.
input
[
0
])
conv_requantize_op
=
get_node_from_map
(
conv_op
=
get_node_from_map
(
op_map
,
conv_requantize_op
.
input
[
0
])
op_map
,
b2s_op
.
input
[
0
])
conv_op
=
get_node_from_map
(
op_map
,
conv_requantize_op
.
input
[
0
])
if
conv_op
.
type
==
'QuantizedConv2d_8x8to32'
:
if
conv_op
.
type
==
'QuantizedConv2d_8x8to32'
:
new_biasadd_op
=
mace_pb2
.
OperatorDef
()
new_biasadd_op
=
mace_pb2
.
OperatorDef
()
new_biasadd_op
.
CopyFrom
(
biasadd_op
)
new_biasadd_op
.
CopyFrom
(
biasadd_op
)
new_biasadd_op
.
input
[
0
]
=
get_tensor_name_from_op
(
conv_requantize_op
.
name
,
0
)
new_biasadd_op
.
input
[
0
]
=
get_tensor_name_from_op
(
new_biasadd_op
.
input
[
2
]
=
get_tensor_name_from_op
(
conv_requantize_op
.
name
,
1
)
conv_requantize_op
.
name
,
0
)
new_biasadd_op
.
input
[
3
]
=
get_tensor_name_from_op
(
conv_requantize_op
.
name
,
2
)
new_biasadd_op
.
input
[
2
]
=
get_tensor_name_from_op
(
new_biasadd_op
.
out_max_byte_size
[
0
]
=
conv_requantize_op
.
out_max_byte_size
[
0
]
*
4
conv_requantize_op
.
name
,
1
)
new_biasadd_op
.
input
[
3
]
=
get_tensor_name_from_op
(
conv_requantize_op
.
name
,
2
)
new_biasadd_op
.
out_max_byte_size
[
0
]
=
conv_requantize_op
.
out_max_byte_size
[
0
]
*
4
new_biasadd_requantize_op
=
mace_pb2
.
OperatorDef
()
new_biasadd_requantize_op
=
mace_pb2
.
OperatorDef
()
new_biasadd_requantize_op
.
CopyFrom
(
biasadd_requantize_op
)
new_biasadd_requantize_op
.
CopyFrom
(
new_biasadd_requantize_op
.
out_max_byte_size
[
0
]
=
new_biasadd_op
.
out_max_byte_size
[
0
]
/
4
biasadd_requantize_op
)
new_biasadd_requantize_op
.
out_max_byte_size
[
0
]
=
new_biasadd_op
.
out_max_byte_size
[
0
]
/
4
new_b2s_op
=
mace_pb2
.
OperatorDef
()
new_b2s_op
=
mace_pb2
.
OperatorDef
()
new_b2s_op
.
CopyFrom
(
b2s_op
)
new_b2s_op
.
CopyFrom
(
b2s_op
)
new_b2s_op
.
input
[
0
]
=
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
0
)
new_b2s_op
.
input
[
0
]
=
get_tensor_name_from_op
(
new_b2s_op
.
input
[
3
]
=
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
1
)
biasadd_requantize_op
.
name
,
0
)
new_b2s_op
.
input
[
4
]
=
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
2
)
new_b2s_op
.
input
[
3
]
=
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
1
)
new_ops
.
extend
([
new_biasadd_op
,
new_biasadd_requantize_op
,
new_b2s_op
])
new_b2s_op
.
input
[
4
]
=
get_tensor_name_from_op
(
skip_ops
=
skip_ops
.
union
([
biasadd_op
.
name
,
biasadd_requantize_op
.
name
,
b2s_op
.
name
])
biasadd_requantize_op
.
name
,
2
)
new_ops
.
extend
([
new_biasadd_op
,
new_biasadd_requantize_op
,
new_b2s_op
])
skip_ops
=
skip_ops
.
union
([
biasadd_op
.
name
,
biasadd_requantize_op
.
name
,
b2s_op
.
name
])
visited_ops
.
add
(
op
.
name
)
visited_ops
.
add
(
op
.
name
)
follow_ops
=
consumers
[
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
0
)]
follow_ops
=
consumers
[
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
0
)]
for
follow_op
in
follow_ops
:
for
follow_op
in
follow_ops
:
new_follow_op
=
mace_pb2
.
OperatorDef
()
new_follow_op
=
mace_pb2
.
OperatorDef
()
new_follow_op
.
CopyFrom
(
follow_op
)
new_follow_op
.
CopyFrom
(
follow_op
)
for
i
in
xrange
(
len
(
follow_op
.
input
)):
for
i
in
xrange
(
len
(
follow_op
.
input
)):
for
k
in
xrange
(
3
):
for
k
in
xrange
(
3
):
if
new_follow_op
.
input
[
i
]
==
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
k
):
if
new_follow_op
.
input
[
new_follow_op
.
input
[
i
]
=
get_tensor_name_from_op
(
b2s_op
.
name
,
k
)
i
]
==
get_tensor_name_from_op
(
biasadd_requantize_op
.
name
,
k
):
new_follow_op
.
input
[
i
]
=
get_tensor_name_from_op
(
b2s_op
.
name
,
k
)
new_ops
.
append
(
new_follow_op
)
new_ops
.
append
(
new_follow_op
)
skip_ops
.
add
(
follow_op
.
name
)
skip_ops
.
add
(
follow_op
.
name
)
visited_ops
.
add
(
follow_op
.
name
)
visited_ops
.
add
(
follow_op
.
name
)
...
@@ -321,6 +377,7 @@ def reverse_batch_to_space_and_biasadd(net_def):
...
@@ -321,6 +377,7 @@ def reverse_batch_to_space_and_biasadd(net_def):
return
new_net_def
return
new_net_def
def
add_node_id
(
net_def
):
def
add_node_id
(
net_def
):
node_id_counter
=
0
node_id_counter
=
0
node_id_map
=
{}
node_id_map
=
{}
...
@@ -343,9 +400,12 @@ def add_node_id(net_def):
...
@@ -343,9 +400,12 @@ def add_node_id(net_def):
return
net_def
return
net_def
def
add_input_output_info
(
net_def
,
input_node
,
output_node
,
graph
,
dtype
):
def
add_input_output_info
(
net_def
,
input_node
,
output_node
,
graph
,
dtype
):
input_tensor
=
graph
.
get_tensor_by_name
(
get_tensor_name_from_op
(
input_node
,
0
))
input_tensor
=
graph
.
get_tensor_by_name
(
output_tensor
=
graph
.
get_tensor_by_name
(
get_tensor_name_from_op
(
output_node
,
0
))
get_tensor_name_from_op
(
input_node
,
0
))
output_tensor
=
graph
.
get_tensor_by_name
(
get_tensor_name_from_op
(
output_node
,
0
))
input_info
=
net_def
.
input_info
.
add
()
input_info
=
net_def
.
input_info
.
add
()
input_info
.
dims
.
extend
(
input_tensor
.
shape
.
as_list
())
input_info
.
dims
.
extend
(
input_tensor
.
shape
.
as_list
())
...
@@ -353,7 +413,7 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype):
...
@@ -353,7 +413,7 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype):
if
dtype
==
mace_pb2
.
DT_UINT8
:
if
dtype
==
mace_pb2
.
DT_UINT8
:
for
i
in
xrange
(
2
):
for
i
in
xrange
(
2
):
input_info
=
net_def
.
input_info
.
add
()
input_info
=
net_def
.
input_info
.
add
()
input_info
.
dims
.
extend
([
1
,
1
,
1
,
1
])
input_info
.
dims
.
extend
([
1
,
1
,
1
,
1
])
input_info
.
data_type
=
mace_pb2
.
DT_FLOAT
input_info
.
data_type
=
mace_pb2
.
DT_FLOAT
output_info
=
net_def
.
output_info
.
add
()
output_info
=
net_def
.
output_info
.
add
()
...
@@ -362,11 +422,12 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype):
...
@@ -362,11 +422,12 @@ def add_input_output_info(net_def, input_node, output_node, graph, dtype):
if
dtype
==
mace_pb2
.
DT_UINT8
:
if
dtype
==
mace_pb2
.
DT_UINT8
:
for
i
in
xrange
(
2
):
for
i
in
xrange
(
2
):
output_info
=
net_def
.
output_info
.
add
()
output_info
=
net_def
.
output_info
.
add
()
output_info
.
dims
.
extend
([
1
,
1
,
1
,
1
])
output_info
.
dims
.
extend
([
1
,
1
,
1
,
1
])
output_info
.
data_type
=
mace_pb2
.
DT_FLOAT
output_info
.
data_type
=
mace_pb2
.
DT_FLOAT
return
net_def
return
net_def
def
fuse_quantize
(
net_def
,
input_node
,
output_node
):
def
fuse_quantize
(
net_def
,
input_node
,
output_node
):
tensor_map
=
{}
tensor_map
=
{}
for
tensor
in
net_def
.
tensors
:
for
tensor
in
net_def
.
tensors
:
...
@@ -397,18 +458,24 @@ def fuse_quantize(net_def, input_node, output_node):
...
@@ -397,18 +458,24 @@ def fuse_quantize(net_def, input_node, output_node):
elif
o
.
type
==
'Quantize'
:
elif
o
.
type
==
'Quantize'
:
quantize_op
=
o
quantize_op
=
o
if
quantize_op
is
not
None
:
if
quantize_op
is
not
None
:
minf_op
,
maxf_op
=
consumers
[
get_tensor_name_from_op
(
flatten_op
.
name
,
0
)]
minf_op
,
maxf_op
=
consumers
[
get_tensor_name_from_op
(
skip_ops
=
skip_ops
.
union
([
flatten_op
.
name
,
minf_op
.
name
,
maxf_op
.
name
])
flatten_op
.
name
,
0
)]
skip_tensors
=
skip_tensors
.
union
([
flatten_op
.
input
[
1
],
minf_op
.
input
[
1
],
maxf_op
.
input
[
1
]])
skip_ops
=
skip_ops
.
union
(
[
flatten_op
.
name
,
minf_op
.
name
,
maxf_op
.
name
])
skip_tensors
=
skip_tensors
.
union
(
[
flatten_op
.
input
[
1
],
minf_op
.
input
[
1
],
maxf_op
.
input
[
1
]])
quantize_op
.
type
=
'AutoQuantize'
quantize_op
.
type
=
'AutoQuantize'
del
quantize_op
.
input
[
1
:]
del
quantize_op
.
input
[
1
:]
new_net_def
=
mace_pb2
.
NetDef
()
new_net_def
=
mace_pb2
.
NetDef
()
new_net_def
.
tensors
.
extend
([
tensor
for
tensor
in
net_def
.
tensors
if
tensor
.
name
not
in
skip_tensors
])
new_net_def
.
tensors
.
extend
([
tensor
for
tensor
in
net_def
.
tensors
if
tensor
.
name
not
in
skip_tensors
])
new_net_def
.
op
.
extend
([
op
for
op
in
net_def
.
op
if
op
.
name
not
in
skip_ops
])
new_net_def
.
op
.
extend
([
op
for
op
in
net_def
.
op
if
op
.
name
not
in
skip_ops
])
new_net_def
.
op
.
extend
(
new_ops
)
new_net_def
.
op
.
extend
(
new_ops
)
return
new_net_def
return
new_net_def
def
convert_to_mace_pb
(
model_file
,
input_node
,
output_node
,
dsp_mode
):
def
convert_to_mace_pb
(
model_file
,
input_node
,
output_node
,
dsp_mode
):
"""
"""
nnlib does not have batch norm, so use tensorflow optimizer to fold
nnlib does not have batch norm, so use tensorflow optimizer to fold
...
@@ -432,12 +499,14 @@ def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode):
...
@@ -432,12 +499,14 @@ def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode):
# convert const node
# convert const node
unresolved_ops
=
[
op
for
op
in
ops
if
op
.
type
==
'Const'
]
unresolved_ops
=
[
op
for
op
in
ops
if
op
.
type
==
'Const'
]
while
len
(
unresolved_ops
)
>
0
:
while
len
(
unresolved_ops
)
>
0
:
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
)
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
)
# convert op node
# convert op node
unresolved_ops
=
[
op
for
op
in
ops
if
op
.
type
!=
'Const'
]
unresolved_ops
=
[
op
for
op
in
ops
if
op
.
type
!=
'Const'
]
while
len
(
unresolved_ops
)
>
0
:
while
len
(
unresolved_ops
)
>
0
:
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
)
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
)
add_output_node
(
net_def
,
output_node
)
add_output_node
(
net_def
,
output_node
)
net_def
=
reverse_batch_to_space_and_biasadd
(
net_def
)
net_def
=
reverse_batch_to_space_and_biasadd
(
net_def
)
...
@@ -447,11 +516,11 @@ def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode):
...
@@ -447,11 +516,11 @@ def convert_to_mace_pb(model_file, input_node, output_node, dsp_mode):
net_def_with_node_id
=
add_node_id
(
sorted_net_def
)
net_def_with_node_id
=
add_node_id
(
sorted_net_def
)
dtype
=
mace_pb2
.
DT_FLOAT
dtype
=
mace_pb2
.
DT_FLOAT
final_net_def
=
add_input_output_info
(
net_def_with_node_id
,
input_node
,
output_node
,
graph
,
dtype
)
final_net_def
=
add_input_output_info
(
net_def_with_node_id
,
input_node
,
output_node
,
graph
,
dtype
)
arg
=
final_net_def
.
arg
.
add
()
arg
=
final_net_def
.
arg
.
add
()
arg
.
name
=
'dsp_mode'
arg
.
name
=
'dsp_mode'
arg
.
i
=
dsp_mode
arg
.
i
=
dsp_mode
return
final_net_def
return
final_net_def
mace/python/tools/tf_ops_stats.py
浏览文件 @
58f2516e
...
@@ -10,18 +10,21 @@ from tensorflow import gfile
...
@@ -10,18 +10,21 @@ from tensorflow import gfile
FLAGS
=
None
FLAGS
=
None
def
hist_inc
(
hist
,
key
):
def
hist_inc
(
hist
,
key
):
if
key
in
hist
:
if
key
in
hist
:
hist
[
key
]
+=
1
hist
[
key
]
+=
1
else
:
else
:
hist
[
key
]
=
1
hist
[
key
]
=
1
def
to_int_list
(
long_list
):
def
to_int_list
(
long_list
):
int_list
=
[]
int_list
=
[]
for
value
in
long_list
:
for
value
in
long_list
:
int_list
.
append
(
int
(
value
))
int_list
.
append
(
int
(
value
))
return
int_list
return
int_list
def
main
(
unused_args
):
def
main
(
unused_args
):
if
not
FLAGS
.
input
or
not
gfile
.
Exists
(
FLAGS
.
input
):
if
not
FLAGS
.
input
or
not
gfile
.
Exists
(
FLAGS
.
input
):
print
(
'Input graph file '
+
FLAGS
.
input
+
' does not exist!'
)
print
(
'Input graph file '
+
FLAGS
.
input
+
' does not exist!'
)
...
@@ -49,7 +52,9 @@ def main(unused_args):
...
@@ -49,7 +52,9 @@ def main(unused_args):
tensor
=
output
.
eval
()
tensor
=
output
.
eval
()
tensor_shape
=
list
(
tensor
.
shape
)
tensor_shape
=
list
(
tensor
.
shape
)
tensor_shapes
[
tensor_name
]
=
tensor_shape
tensor_shapes
[
tensor_name
]
=
tensor_shape
print
(
"Const %s: %s, %d"
%
(
tensor_name
,
tensor_shape
,
functools
.
reduce
(
operator
.
mul
,
tensor_shape
,
1
)))
print
(
"Const %s: %s, %d"
%
(
tensor_name
,
tensor_shape
,
functools
.
reduce
(
operator
.
mul
,
tensor_shape
,
1
)))
if
len
(
tensor_shape
)
==
1
and
tensor_shape
[
0
]
<
10
:
if
len
(
tensor_shape
)
==
1
and
tensor_shape
[
0
]
<
10
:
tensor_values
[
tensor_name
]
=
list
(
tensor
)
tensor_values
[
tensor_name
]
=
list
(
tensor
)
...
@@ -65,11 +70,16 @@ def main(unused_args):
...
@@ -65,11 +70,16 @@ def main(unused_args):
if
input_name
.
endswith
(
'weights/read:0'
):
if
input_name
.
endswith
(
'weights/read:0'
):
ksize
=
input
.
shape
.
as_list
()
ksize
=
input
.
shape
.
as_list
()
break
break
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
ksize
=
tensor_shapes
[
input_name
]
ksize
=
tensor_shapes
[
input_name
]
break
break
print
(
'%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
,
op
.
inputs
[
0
].
shape
,
op
.
outputs
[
0
].
shape
))
print
(
key
=
'%s(padding=%s, strides=%s, ksize=%s, format=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
)
'%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
,
op
.
inputs
[
0
].
shape
,
op
.
outputs
[
0
].
shape
))
key
=
'%s(padding=%s, strides=%s, ksize=%s, format=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
)
hist_inc
(
stats
,
key
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'FusedResizeAndPadConv2D'
]:
elif
op
.
type
in
[
'FusedResizeAndPadConv2D'
]:
padding
=
op
.
get_attr
(
'padding'
)
padding
=
op
.
get_attr
(
'padding'
)
...
@@ -78,20 +88,25 @@ def main(unused_args):
...
@@ -78,20 +88,25 @@ def main(unused_args):
ksize
=
'Unknown'
ksize
=
'Unknown'
for
input
in
op
.
inputs
:
for
input
in
op
.
inputs
:
input_name
=
input
.
name
input_name
=
input
.
name
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
ksize
=
tensor_shapes
[
input_name
]
ksize
=
tensor_shapes
[
input_name
]
break
break
key
=
'%s(padding=%s, strides=%s, ksize=%s, resize_align_corners=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
resize_align_corners
)
key
=
'%s(padding=%s, strides=%s, ksize=%s, '
\
'resize_align_corners=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
resize_align_corners
)
hist_inc
(
stats
,
key
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'ResizeBilinear'
]:
elif
op
.
type
in
[
'ResizeBilinear'
]:
align_corners
=
op
.
get_attr
(
'align_corners'
)
align_corners
=
op
.
get_attr
(
'align_corners'
)
size
=
'Unknown'
size
=
'Unknown'
for
input
in
op
.
inputs
:
for
input
in
op
.
inputs
:
input_name
=
input
.
name
input_name
=
input
.
name
if
input_name
.
endswith
(
'size:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'size:0'
)
and
input_name
in
tensor_values
:
size
=
tensor_values
[
input_name
]
size
=
tensor_values
[
input_name
]
break
break
key
=
'%s(size=%s, align_corners=%s)'
%
(
op
.
type
,
size
,
align_corners
)
key
=
'%s(size=%s, align_corners=%s)'
%
(
op
.
type
,
size
,
align_corners
)
print
(
key
)
print
(
key
)
hist_inc
(
stats
,
key
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'AvgPool'
,
'MaxPool'
]:
elif
op
.
type
in
[
'AvgPool'
,
'MaxPool'
]:
...
@@ -99,38 +114,47 @@ def main(unused_args):
...
@@ -99,38 +114,47 @@ def main(unused_args):
strides
=
to_int_list
(
op
.
get_attr
(
'strides'
))
strides
=
to_int_list
(
op
.
get_attr
(
'strides'
))
ksize
=
to_int_list
(
op
.
get_attr
(
'ksize'
))
ksize
=
to_int_list
(
op
.
get_attr
(
'ksize'
))
data_format
=
op
.
get_attr
(
'data_format'
)
data_format
=
op
.
get_attr
(
'data_format'
)
key
=
'%s(padding=%s, strides=%s, ksize=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
)
key
=
'%s(padding=%s, strides=%s, ksize=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
)
hist_inc
(
stats
,
key
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'SpaceToBatchND'
,
'BatchToSpaceND'
]:
elif
op
.
type
in
[
'SpaceToBatchND'
,
'BatchToSpaceND'
]:
block_shape
=
'Unknown'
block_shape
=
'Unknown'
for
input
in
op
.
inputs
:
for
input
in
op
.
inputs
:
input_name
=
input
.
name
input_name
=
input
.
name
if
input_name
.
endswith
(
'block_shape:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'block_shape:0'
)
and
input_name
in
tensor_values
:
block_shape
=
tensor_values
[
input_name
]
block_shape
=
tensor_values
[
input_name
]
break
break
paddings
=
'Unknown'
paddings
=
'Unknown'
for
input
in
op
.
inputs
:
for
input
in
op
.
inputs
:
input_name
=
input
.
name
input_name
=
input
.
name
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
paddings
=
tensor_values
[
input_name
]
paddings
=
tensor_values
[
input_name
]
break
break
crops
=
'Unknown'
crops
=
'Unknown'
for
input
in
op
.
inputs
:
for
input
in
op
.
inputs
:
input_name
=
input
.
name
input_name
=
input
.
name
if
input_name
.
endswith
(
'crops:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'crops:0'
)
and
input_name
in
tensor_values
:
paddings
=
tensor_values
[
input_name
]
paddings
=
tensor_values
[
input_name
]
break
break
if
op
.
type
==
'SpaceToBatchND'
:
if
op
.
type
==
'SpaceToBatchND'
:
key
=
'%s(block_shape=%s, paddings=%s)'
%
(
op
.
type
,
block_shape
,
paddings
)
key
=
'%s(block_shape=%s, paddings=%s)'
%
(
op
.
type
,
block_shape
,
paddings
)
else
:
else
:
key
=
'%s(block_shape=%s, crops=%s)'
%
(
op
.
type
,
block_shape
,
crops
)
key
=
'%s(block_shape=%s, crops=%s)'
%
(
op
.
type
,
block_shape
,
crops
)
print
(
key
)
print
(
key
)
hist_inc
(
stats
,
key
)
hist_inc
(
stats
,
key
)
elif
op
.
type
==
'Pad'
:
elif
op
.
type
==
'Pad'
:
paddings
=
'Unknown'
paddings
=
'Unknown'
for
input
in
op
.
inputs
:
for
input
in
op
.
inputs
:
input_name
=
input
.
name
input_name
=
input
.
name
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
paddings
=
tensor_values
[
input_name
]
paddings
=
tensor_values
[
input_name
]
break
break
key
=
'%s(paddings=%s)'
%
(
op
.
type
,
paddings
)
key
=
'%s(paddings=%s)'
%
(
op
.
type
,
paddings
)
...
@@ -142,6 +166,7 @@ def main(unused_args):
...
@@ -142,6 +166,7 @@ def main(unused_args):
for
key
,
value
in
sorted
(
six
.
iteritems
(
stats
)):
for
key
,
value
in
sorted
(
six
.
iteritems
(
stats
)):
print
(
'%s: %d'
%
(
key
,
value
))
print
(
'%s: %d'
%
(
key
,
value
))
def
parse_args
():
def
parse_args
():
'''Parses command line arguments.'''
'''Parses command line arguments.'''
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
...
@@ -152,6 +177,7 @@ def parse_args():
...
@@ -152,6 +177,7 @@ def parse_args():
help
=
'TensorFlow
\'
GraphDef
\'
file to load.'
)
help
=
'TensorFlow
\'
GraphDef
\'
file to load.'
)
return
parser
.
parse_known_args
()
return
parser
.
parse_known_args
()
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
FLAGS
,
unparsed
=
parse_args
()
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/bazel_adb_run.py
浏览文件 @
58f2516e
...
@@ -7,7 +7,6 @@
...
@@ -7,7 +7,6 @@
# --target=//mace/ops:ops_test
# --target=//mace/ops:ops_test
# --stdout_processor=stdout_processor
# --stdout_processor=stdout_processor
import
argparse
import
argparse
import
random
import
random
import
re
import
re
...
@@ -15,15 +14,18 @@ import sys
...
@@ -15,15 +14,18 @@ import sys
import
sh_commands
import
sh_commands
def
stdout_processor
(
stdout
,
device_properties
,
abi
):
def
stdout_processor
(
stdout
,
device_properties
,
abi
):
pass
pass
def
ops_test_stdout_processor
(
stdout
,
device_properties
,
abi
):
def
ops_test_stdout_processor
(
stdout
,
device_properties
,
abi
):
stdout_lines
=
stdout
.
split
(
"
\n
"
)
stdout_lines
=
stdout
.
split
(
"
\n
"
)
for
line
in
stdout_lines
:
for
line
in
stdout_lines
:
if
"Aborted"
in
line
or
"FAILED"
in
line
:
if
"Aborted"
in
line
or
"FAILED"
in
line
:
raise
Exception
(
"Command failed"
)
raise
Exception
(
"Command failed"
)
def
ops_benchmark_stdout_processor
(
stdout
,
device_properties
,
abi
):
def
ops_benchmark_stdout_processor
(
stdout
,
device_properties
,
abi
):
stdout_lines
=
stdout
.
split
(
"
\n
"
)
stdout_lines
=
stdout
.
split
(
"
\n
"
)
metrics
=
{}
metrics
=
{}
...
@@ -33,17 +35,20 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
...
@@ -33,17 +35,20 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
line
=
line
.
strip
()
line
=
line
.
strip
()
parts
=
line
.
split
()
parts
=
line
.
split
()
if
len
(
parts
)
==
5
and
parts
[
0
].
startswith
(
"BM_"
):
if
len
(
parts
)
==
5
and
parts
[
0
].
startswith
(
"BM_"
):
metrics
[
"%s.time_ms"
%
parts
[
0
]]
=
str
(
float
(
parts
[
1
])
/
1e6
)
metrics
[
"%s.time_ms"
%
parts
[
0
]]
=
str
(
float
(
parts
[
1
])
/
1e6
)
metrics
[
"%s.input_mb_per_sec"
%
parts
[
0
]]
=
parts
[
3
]
metrics
[
"%s.input_mb_per_sec"
%
parts
[
0
]]
=
parts
[
3
]
metrics
[
"%s.gmacc_per_sec"
%
parts
[
0
]]
=
parts
[
4
]
metrics
[
"%s.gmacc_per_sec"
%
parts
[
0
]]
=
parts
[
4
]
platform
=
device_properties
[
"ro.board.platform"
].
replace
(
" "
,
"-"
)
platform
=
device_properties
[
"ro.board.platform"
].
replace
(
" "
,
"-"
)
model
=
device_properties
[
"ro.product.model"
].
replace
(
" "
,
"-"
)
model
=
device_properties
[
"ro.product.model"
].
replace
(
" "
,
"-"
)
tags
=
{
"ro.board.platform"
:
platform
,
tags
=
{
"ro.board.platform"
:
platform
,
"ro.product.model"
:
model
,
"ro.product.model"
:
model
,
"abi"
:
abi
}
"abi"
:
abi
sh_commands
.
falcon_push_metrics
(
metrics
,
tags
=
tags
,
}
endpoint
=
"mace_ops_benchmark"
)
sh_commands
.
falcon_push_metrics
(
metrics
,
tags
=
tags
,
endpoint
=
"mace_ops_benchmark"
)
def
parse_args
():
def
parse_args
():
"""Parses command line arguments."""
"""Parses command line arguments."""
...
@@ -57,22 +62,16 @@ def parse_args():
...
@@ -57,22 +62,16 @@ def parse_args():
"--target_socs"
,
"--target_socs"
,
type
=
str
,
type
=
str
,
default
=
"all"
,
default
=
"all"
,
help
=
"SoCs(ro.board.platform) to build, comma seperated list or all/random"
)
help
=
"SoCs (ro.board.platform from getprop) to build, "
"comma seperated list or all/random"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--target"
,
"--target"
,
type
=
str
,
default
=
"//..."
,
help
=
"Bazel target to build"
)
type
=
str
,
default
=
"//..."
,
help
=
"Bazel target to build"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--run_target"
,
"--run_target"
,
type
=
bool
,
type
=
bool
,
default
=
False
,
default
=
False
,
help
=
"Whether to run the target"
)
help
=
"Whether to run the target"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--args"
,
type
=
str
,
default
=
""
,
help
=
"Command args"
)
"--args"
,
type
=
str
,
default
=
""
,
help
=
"Command args"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--stdout_processor"
,
"--stdout_processor"
,
type
=
str
,
type
=
str
,
...
@@ -80,6 +79,7 @@ def parse_args():
...
@@ -80,6 +79,7 @@ def parse_args():
help
=
"Stdout processing function, default: stdout_processor"
)
help
=
"Stdout processing function, default: stdout_processor"
)
return
parser
.
parse_known_args
()
return
parser
.
parse_known_args
()
def
main
(
unused_args
):
def
main
(
unused_args
):
target_socs
=
None
target_socs
=
None
if
FLAGS
.
target_socs
!=
"all"
and
FLAGS
.
target_socs
!=
"random"
:
if
FLAGS
.
target_socs
!=
"all"
and
FLAGS
.
target_socs
!=
"random"
:
...
@@ -101,17 +101,25 @@ def main(unused_args):
...
@@ -101,17 +101,25 @@ def main(unused_args):
sh_commands
.
bazel_build
(
target
,
abi
=
target_abi
)
sh_commands
.
bazel_build
(
target
,
abi
=
target_abi
)
if
FLAGS
.
run_target
:
if
FLAGS
.
run_target
:
for
serialno
in
target_devices
:
for
serialno
in
target_devices
:
if
target_abi
not
in
set
(
sh_commands
.
adb_supported_abis
(
serialno
)):
if
target_abi
not
in
set
(
print
(
"Skip device %s which does not support ABI %s"
%
(
serialno
,
target_abi
))
sh_commands
.
adb_supported_abis
(
serialno
)):
print
(
"Skip device %s which does not support ABI %s"
%
(
serialno
,
target_abi
))
continue
continue
stdouts
=
sh_commands
.
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
stdouts
=
sh_commands
.
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
args
=
FLAGS
.
args
,
args
=
FLAGS
.
args
,
opencl_profiling
=
1
,
opencl_profiling
=
1
,
vlog_level
=
0
,
vlog_level
=
0
,
device_bin_path
=
"/data/local/tmp/mace"
,
device_bin_path
=
"/data/local/tmp/mace"
,
out_of_range_check
=
1
)
out_of_range_check
=
1
)
device_properties
=
sh_commands
.
adb_getprop_by_serialno
(
serialno
)
device_properties
=
sh_commands
.
adb_getprop_by_serialno
(
globals
()[
FLAGS
.
stdout_processor
](
stdouts
,
device_properties
,
target_abi
)
serialno
)
globals
()[
FLAGS
.
stdout_processor
](
stdouts
,
device_properties
,
target_abi
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
FLAGS
,
unparsed
=
parse_args
()
FLAGS
,
unparsed
=
parse_args
()
...
...
tools/falcon_cli.py
浏览文件 @
58f2516e
#-*- coding:utf8 -*-
import
json
import
socket
import
itertools
import
json
,
socket
,
itertools
class
FalconCli
(
object
):
class
FalconCli
(
object
):
def
__init__
(
self
,
addr
,
debug
=
True
,
buf_size
=
1000
):
def
__init__
(
self
,
addr
,
debug
=
True
,
buf_size
=
1000
):
self
.
socket_
=
socket
.
create_connection
(
addr
)
self
.
socket_
=
socket
.
create_connection
(
addr
)
self
.
stream
=
self
.
socket_
.
makefile
()
self
.
stream
=
self
.
socket_
.
makefile
()
...
@@ -16,16 +16,19 @@ class FalconCli(object):
...
@@ -16,16 +16,19 @@ class FalconCli(object):
self
.
stream
.
close
()
self
.
stream
.
close
()
@
classmethod
@
classmethod
def
connect
(
cls
,
server
=
"transfer.falcon.miliao.srv"
,
port
=
8433
,
debug
=
True
,
buf_size
=
1000
):
def
connect
(
cls
,
server
=
"transfer.falcon.miliao.srv"
,
port
=
8433
,
debug
=
True
,
buf_size
=
1000
):
try
:
try
:
return
FalconCli
((
server
,
port
),
debug
,
buf_size
)
return
FalconCli
((
server
,
port
),
debug
,
buf_size
)
except
socket
.
error
,
exc
:
except
socket
.
error
,
exc
:
print
"error: connect to %s:%s error: %s"
%
(
server
,
port
,
exc
)
print
"error: connect to %s:%s error: %s"
%
(
server
,
port
,
exc
)
def
call
(
self
,
name
,
*
params
):
def
call
(
self
,
name
,
*
params
):
request
=
dict
(
id
=
next
(
self
.
id_counter
),
request
=
dict
(
params
=
list
(
params
),
id
=
next
(
self
.
id_counter
),
params
=
list
(
params
),
method
=
name
)
method
=
name
)
payload
=
json
.
dumps
(
request
).
encode
()
payload
=
json
.
dumps
(
request
).
encode
()
if
self
.
debug
:
if
self
.
debug
:
print
"--> req:"
,
payload
print
"--> req:"
,
payload
...
@@ -49,7 +52,7 @@ class FalconCli(object):
...
@@ -49,7 +52,7 @@ class FalconCli(object):
resp
=
[]
resp
=
[]
while
True
:
while
True
:
buf
=
lines
[
s
:
s
+
self
.
buf_size
]
buf
=
lines
[
s
:
s
+
self
.
buf_size
]
s
=
s
+
self
.
buf_size
s
=
s
+
self
.
buf_size
if
len
(
buf
)
==
0
:
if
len
(
buf
)
==
0
:
break
break
...
@@ -57,4 +60,3 @@ class FalconCli(object):
...
@@ -57,4 +60,3 @@ class FalconCli(object):
resp
.
append
(
r
)
resp
.
append
(
r
)
return
resp
return
resp
tools/generate_data.py
浏览文件 @
58f2516e
...
@@ -11,13 +11,16 @@ import re
...
@@ -11,13 +11,16 @@ import re
# --input_file input_file
# --input_file input_file
#
#
def
generate_data
(
name
,
shape
):
def
generate_data
(
name
,
shape
):
np
.
random
.
seed
()
np
.
random
.
seed
()
data
=
np
.
random
.
random
(
shape
)
*
2
-
1
data
=
np
.
random
.
random
(
shape
)
*
2
-
1
input_file_name
=
FLAGS
.
input_file
+
"_"
+
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
name
)
input_file_name
=
FLAGS
.
input_file
+
"_"
+
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
name
)
print
'Generate input file: '
,
input_file_name
print
'Generate input file: '
,
input_file_name
data
.
astype
(
np
.
float32
).
tofile
(
input_file_name
)
data
.
astype
(
np
.
float32
).
tofile
(
input_file_name
)
def
main
(
unused_args
):
def
main
(
unused_args
):
input_names
=
[
name
for
name
in
FLAGS
.
input_node
.
split
(
','
)]
input_names
=
[
name
for
name
in
FLAGS
.
input_node
.
split
(
','
)]
input_shapes
=
[
shape
for
shape
in
FLAGS
.
input_shape
.
split
(
':'
)]
input_shapes
=
[
shape
for
shape
in
FLAGS
.
input_shape
.
split
(
':'
)]
...
@@ -27,29 +30,21 @@ def main(unused_args):
...
@@ -27,29 +30,21 @@ def main(unused_args):
generate_data
(
input_names
[
i
],
shape
)
generate_data
(
input_names
[
i
],
shape
)
print
"Generate input file done."
print
"Generate input file done."
def
parse_args
():
def
parse_args
():
"""Parses command line arguments."""
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
register
(
"type"
,
"bool"
,
lambda
v
:
v
.
lower
()
==
"true"
)
parser
.
register
(
"type"
,
"bool"
,
lambda
v
:
v
.
lower
()
==
"true"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--input_file"
,
"--input_file"
,
type
=
str
,
default
=
""
,
help
=
"input file."
)
type
=
str
,
default
=
""
,
help
=
"input file."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--input_node"
,
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--input_shape"
,
"--input_shape"
,
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
return
parser
.
parse_known_args
()
return
parser
.
parse_known_args
()
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
FLAGS
,
unparsed
=
parse_args
()
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/mace_tools.py
浏览文件 @
58f2516e
...
@@ -34,7 +34,8 @@ def run_command(command):
...
@@ -34,7 +34,8 @@ def run_command(command):
print
(
"Stderr msg:
\n
{}"
.
format
(
err
))
print
(
"Stderr msg:
\n
{}"
.
format
(
err
))
if
result
.
returncode
!=
0
:
if
result
.
returncode
!=
0
:
raise
Exception
(
"Exit not 0 from bash with code: {}, command: {}"
.
format
(
raise
Exception
(
"Exit not 0 from bash with code: {}, command: {}"
.
format
(
result
.
returncode
,
command
))
result
.
returncode
,
command
))
...
@@ -63,10 +64,12 @@ def generate_version_code():
...
@@ -63,10 +64,12 @@ def generate_version_code():
command
=
"bash tools/generate_version_code.sh"
command
=
"bash tools/generate_version_code.sh"
run_command
(
command
)
run_command
(
command
)
def
generate_opencl_source_code
():
def
generate_opencl_source_code
():
command
=
"bash tools/generate_opencl_code.sh source"
command
=
"bash tools/generate_opencl_code.sh source"
run_command
(
command
)
run_command
(
command
)
def
generate_opencl_binay_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
def
generate_opencl_binay_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
cl_bin_dirs
=
[]
cl_bin_dirs
=
[]
for
d
in
model_output_dirs
:
for
d
in
model_output_dirs
:
...
@@ -79,6 +82,7 @@ def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
...
@@ -79,6 +82,7 @@ def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
'binary'
,
target_soc
,
cl_bin_dirs_str
,
int
(
pull_or_not
))
'binary'
,
target_soc
,
cl_bin_dirs_str
,
int
(
pull_or_not
))
run_command
(
command
)
run_command
(
command
)
def
generate_tuning_param_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
def
generate_tuning_param_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
cl_bin_dirs
=
[]
cl_bin_dirs
=
[]
for
d
in
model_output_dirs
:
for
d
in
model_output_dirs
:
...
@@ -91,20 +95,24 @@ def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
...
@@ -91,20 +95,24 @@ def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
target_soc
,
cl_bin_dirs_str
,
int
(
pull_or_not
))
target_soc
,
cl_bin_dirs_str
,
int
(
pull_or_not
))
run_command
(
command
)
run_command
(
command
)
def
generate_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
def
generate_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
generate_opencl_binay_code
(
target_soc
,
model_output_dirs
,
pull_or_not
)
generate_opencl_binay_code
(
target_soc
,
model_output_dirs
,
pull_or_not
)
generate_tuning_param_code
(
target_soc
,
model_output_dirs
,
pull_or_not
)
generate_tuning_param_code
(
target_soc
,
model_output_dirs
,
pull_or_not
)
def
clear_env
(
target_soc
):
def
clear_env
(
target_soc
):
command
=
"bash tools/clear_env.sh {}"
.
format
(
target_soc
)
command
=
"bash tools/clear_env.sh {}"
.
format
(
target_soc
)
run_command
(
command
)
run_command
(
command
)
def
input_file_name
(
input_name
):
def
input_file_name
(
input_name
):
return
os
.
environ
[
'INPUT_FILE_NAME'
]
+
'_'
+
\
return
os
.
environ
[
'INPUT_FILE_NAME'
]
+
'_'
+
\
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
input_name
)
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
input_name
)
def
generate_random_input
(
target_soc
,
model_output_dir
,
input_names
,
input_files
):
def
generate_random_input
(
target_soc
,
model_output_dir
,
input_names
,
input_files
):
generate_data_or_not
=
True
generate_data_or_not
=
True
command
=
"bash tools/validate_tools.sh {} {} {}"
.
format
(
command
=
"bash tools/validate_tools.sh {} {} {}"
.
format
(
target_soc
,
model_output_dir
,
int
(
generate_data_or_not
))
target_soc
,
model_output_dir
,
int
(
generate_data_or_not
))
...
@@ -122,16 +130,19 @@ def generate_random_input(target_soc, model_output_dir,
...
@@ -122,16 +130,19 @@ def generate_random_input(target_soc, model_output_dir,
else
:
else
:
input_name_list
.
append
(
input_names
)
input_name_list
.
append
(
input_names
)
if
len
(
input_file_list
)
!=
len
(
input_name_list
):
if
len
(
input_file_list
)
!=
len
(
input_name_list
):
raise
Exception
(
'If input_files set, the input files should match the input names.'
)
raise
Exception
(
'If input_files set, the input files should '
'match the input names.'
)
for
i
in
range
(
len
(
input_file_list
)):
for
i
in
range
(
len
(
input_file_list
)):
if
input_file_list
[
i
]
is
not
None
:
if
input_file_list
[
i
]
is
not
None
:
dst_input_file
=
model_output_dir
+
'/'
+
input_file_name
(
input_name_list
[
i
])
dst_input_file
=
model_output_dir
+
'/'
+
input_file_name
(
input_name_list
[
i
])
if
input_file_list
[
i
].
startswith
(
"http://"
)
or
\
if
input_file_list
[
i
].
startswith
(
"http://"
)
or
\
input_file_list
[
i
].
startswith
(
"https://"
):
input_file_list
[
i
].
startswith
(
"https://"
):
urllib
.
urlretrieve
(
input_file_list
[
i
],
dst_input_file
)
urllib
.
urlretrieve
(
input_file_list
[
i
],
dst_input_file
)
else
:
else
:
shutil
.
copy
(
input_file_list
[
i
],
dst_input_file
)
shutil
.
copy
(
input_file_list
[
i
],
dst_input_file
)
def
generate_model_code
():
def
generate_model_code
():
command
=
"bash tools/generate_model_code.sh"
command
=
"bash tools/generate_model_code.sh"
run_command
(
command
)
run_command
(
command
)
...
@@ -155,10 +166,17 @@ def tuning_run(model_name,
...
@@ -155,10 +166,17 @@ def tuning_run(model_name,
# TODO(yejianwu) refactoring the hackish code
# TODO(yejianwu) refactoring the hackish code
stdout_buff
=
[]
stdout_buff
=
[]
process_output
=
sh_commands
.
make_output_processor
(
stdout_buff
)
process_output
=
sh_commands
.
make_output_processor
(
stdout_buff
)
p
=
sh
.
bash
(
"tools/tuning_run.sh"
,
target_soc
,
model_output_dir
,
p
=
sh
.
bash
(
running_round
,
int
(
tuning
),
"tools/tuning_run.sh"
,
restart_round
,
option_args
,
_out
=
process_output
,
target_soc
,
_bg
=
True
,
_err_to_out
=
True
)
model_output_dir
,
running_round
,
int
(
tuning
),
restart_round
,
option_args
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
.
wait
()
p
.
wait
()
metrics
=
{}
metrics
=
{}
for
line
in
stdout_buff
:
for
line
in
stdout_buff
:
...
@@ -166,18 +184,23 @@ def tuning_run(model_name,
...
@@ -166,18 +184,23 @@ def tuning_run(model_name,
parts
=
line
.
split
()
parts
=
line
.
split
()
if
len
(
parts
)
==
6
and
parts
[
0
].
startswith
(
"time"
):
if
len
(
parts
)
==
6
and
parts
[
0
].
startswith
(
"time"
):
metrics
[
"%s.create_net_ms"
%
model_name
]
=
str
(
float
(
parts
[
1
]))
metrics
[
"%s.create_net_ms"
%
model_name
]
=
str
(
float
(
parts
[
1
]))
metrics
[
"%s.mace_engine_ctor_ms"
%
model_name
]
=
str
(
float
(
parts
[
2
]))
metrics
[
"%s.mace_engine_ctor_ms"
%
model_name
]
=
str
(
float
(
parts
[
2
]))
metrics
[
"%s.init_ms"
%
model_name
]
=
str
(
float
(
parts
[
3
]))
metrics
[
"%s.init_ms"
%
model_name
]
=
str
(
float
(
parts
[
3
]))
metrics
[
"%s.warmup_ms"
%
model_name
]
=
str
(
float
(
parts
[
4
]))
metrics
[
"%s.warmup_ms"
%
model_name
]
=
str
(
float
(
parts
[
4
]))
if
float
(
parts
[
5
])
>
0
:
if
float
(
parts
[
5
])
>
0
:
metrics
[
"%s.avg_latency_ms"
%
model_name
]
=
str
(
float
(
parts
[
5
]))
metrics
[
"%s.avg_latency_ms"
%
model_name
]
=
str
(
tags
=
{
"ro.board.platform"
:
target_soc
,
float
(
parts
[
5
]))
tags
=
{
"ro.board.platform"
:
target_soc
,
"abi"
:
target_abi
,
"abi"
:
target_abi
,
# "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime
# "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime
"round"
:
running_round
,
# TODO(yejianwu) change this to source/binary
"round"
:
running_round
,
# TODO(yejianwu) change this to source/binary
"tuning"
:
tuning
}
"tuning"
:
tuning
sh_commands
.
falcon_push_metrics
(
metrics
,
endpoint
=
"mace_model_benchmark"
,
}
tags
=
tags
)
sh_commands
.
falcon_push_metrics
(
metrics
,
endpoint
=
"mace_model_benchmark"
,
tags
=
tags
)
def
benchmark_model
(
target_soc
,
model_output_dir
,
option_args
=
''
):
def
benchmark_model
(
target_soc
,
model_output_dir
,
option_args
=
''
):
command
=
"bash tools/benchmark.sh {} {}
\"
{}
\"
"
.
format
(
command
=
"bash tools/benchmark.sh {} {}
\"
{}
\"
"
.
format
(
...
@@ -188,8 +211,8 @@ def benchmark_model(target_soc, model_output_dir, option_args=''):
...
@@ -188,8 +211,8 @@ def benchmark_model(target_soc, model_output_dir, option_args=''):
def
run_model
(
model_name
,
target_runtime
,
target_abi
,
target_soc
,
def
run_model
(
model_name
,
target_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
running_round
,
restart_round
,
option_args
):
model_output_dir
,
running_round
,
restart_round
,
option_args
):
tuning_run
(
model_name
,
target_runtime
,
target_abi
,
target_soc
,
tuning_run
(
model_name
,
target_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
running_round
,
False
,
model_output_dir
,
running_round
,
False
,
restart_round
,
restart_round
,
option_args
)
option_args
)
def
generate_production_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
def
generate_production_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
...
@@ -251,8 +274,8 @@ def merge_libs_and_tuning_results(target_soc, output_dir, model_output_dirs):
...
@@ -251,8 +274,8 @@ def merge_libs_and_tuning_results(target_soc, output_dir, model_output_dirs):
build_production_code
()
build_production_code
()
model_output_dirs_str
=
","
.
join
(
model_output_dirs
)
model_output_dirs_str
=
","
.
join
(
model_output_dirs
)
command
=
"bash tools/merge_libs.sh {} {} {}"
.
format
(
target_soc
,
output_dir
,
command
=
"bash tools/merge_libs.sh {} {} {}"
.
format
(
model_output_dirs_str
)
target_soc
,
output_dir
,
model_output_dirs_str
)
run_command
(
command
)
run_command
(
command
)
...
@@ -260,6 +283,7 @@ def packaging_lib_file(output_dir):
...
@@ -260,6 +283,7 @@ def packaging_lib_file(output_dir):
command
=
"bash tools/packaging_lib.sh {}"
.
format
(
output_dir
)
command
=
"bash tools/packaging_lib.sh {}"
.
format
(
output_dir
)
run_command
(
command
)
run_command
(
command
)
def
download_model_files
(
model_file_path
,
def
download_model_files
(
model_file_path
,
model_output_dir
,
model_output_dir
,
weight_file_path
=
""
):
weight_file_path
=
""
):
...
@@ -270,10 +294,9 @@ def download_model_files(model_file_path,
...
@@ -270,10 +294,9 @@ def download_model_files(model_file_path,
if
weight_file_path
.
startswith
(
"http://"
)
or
\
if
weight_file_path
.
startswith
(
"http://"
)
or
\
weight_file_path
.
startswith
(
"https://"
):
weight_file_path
.
startswith
(
"https://"
):
os
.
environ
[
os
.
environ
[
"WEIGHT_FILE_PATH"
]
=
model_output_dir
+
"/model.caffemodel"
"WEIGHT_FILE_PATH"
]
=
model_output_dir
+
"/model.caffemodel"
urllib
.
urlretrieve
(
weight_file_path
,
os
.
environ
[
"WEIGHT_FILE_PATH"
])
urllib
.
urlretrieve
(
weight_file_path
,
os
.
environ
[
"WEIGHT_FILE_PATH"
])
def
md5sum
(
str
):
def
md5sum
(
str
):
md5
=
hashlib
.
md5
()
md5
=
hashlib
.
md5
()
...
@@ -306,7 +329,10 @@ def parse_args():
...
@@ -306,7 +329,10 @@ def parse_args():
default
=
10
,
default
=
10
,
help
=
"The model throughput test running seconds."
)
help
=
"The model throughput test running seconds."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--restart_round"
,
type
=
int
,
default
=
1
,
help
=
"The model restart round."
)
"--restart_round"
,
type
=
int
,
default
=
1
,
help
=
"The model restart round."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--tuning"
,
type
=
"bool"
,
default
=
"true"
,
help
=
"Tune opencl params."
)
"--tuning"
,
type
=
"bool"
,
default
=
"true"
,
help
=
"Tune opencl params."
)
parser
.
add_argument
(
parser
.
add_argument
(
...
@@ -321,14 +347,16 @@ def parse_args():
...
@@ -321,14 +347,16 @@ def parse_args():
help
=
"SoCs to build, comma seperated list (getprop ro.board.platform)"
)
help
=
"SoCs to build, comma seperated list (getprop ro.board.platform)"
)
return
parser
.
parse_known_args
()
return
parser
.
parse_known_args
()
def
set_environment
(
configs
):
def
set_environment
(
configs
):
os
.
environ
[
"EMBED_MODEL_DATA"
]
=
str
(
configs
[
"embed_model_data"
])
os
.
environ
[
"EMBED_MODEL_DATA"
]
=
str
(
configs
[
"embed_model_data"
])
os
.
environ
[
"VLOG_LEVEL"
]
=
str
(
configs
[
"vlog_level"
])
os
.
environ
[
"VLOG_LEVEL"
]
=
str
(
configs
[
"vlog_level"
])
os
.
environ
[
"PROJECT_NAME"
]
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
os
.
environ
[
"PROJECT_NAME"
]
=
os
.
path
.
splitext
(
FLAGS
.
config
))[
0
]
os
.
path
.
basename
(
FLAGS
.
config
))[
0
]
os
.
environ
[
'INPUT_FILE_NAME'
]
=
"model_input"
os
.
environ
[
'INPUT_FILE_NAME'
]
=
"model_input"
os
.
environ
[
'OUTPUT_FILE_NAME'
]
=
"model_out"
os
.
environ
[
'OUTPUT_FILE_NAME'
]
=
"model_out"
def
main
(
unused_args
):
def
main
(
unused_args
):
configs
=
parse_model_configs
()
configs
=
parse_model_configs
()
...
@@ -343,13 +371,16 @@ def main(unused_args):
...
@@ -343,13 +371,16 @@ def main(unused_args):
if
not
os
.
path
.
exists
(
FLAGS
.
output_dir
):
if
not
os
.
path
.
exists
(
FLAGS
.
output_dir
):
os
.
makedirs
(
FLAGS
.
output_dir
)
os
.
makedirs
(
FLAGS
.
output_dir
)
elif
os
.
path
.
exists
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
"libmace"
)):
elif
os
.
path
.
exists
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
"libmace"
)):
shutil
.
rmtree
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
shutil
.
rmtree
(
os
.
makedirs
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
os
.
makedirs
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
generate_version_code
()
generate_version_code
()
generate_opencl_source_code
()
generate_opencl_source_code
()
option_args
=
' '
.
join
([
arg
for
arg
in
unused_args
if
arg
.
startswith
(
'--'
)])
option_args
=
' '
.
join
(
[
arg
for
arg
in
unused_args
if
arg
.
startswith
(
'--'
)])
available_socs
=
sh_commands
.
adb_get_all_socs
()
available_socs
=
sh_commands
.
adb_get_all_socs
()
target_socs
=
available_socs
target_socs
=
available_socs
...
@@ -362,10 +393,10 @@ def main(unused_args):
...
@@ -362,10 +393,10 @@ def main(unused_args):
target_socs
=
target_socs
&
socs
target_socs
=
target_socs
&
socs
missing_socs
=
socs
.
difference
(
target_socs
)
missing_socs
=
socs
.
difference
(
target_socs
)
if
len
(
missing_socs
)
>
0
:
if
len
(
missing_socs
)
>
0
:
print
(
"Error: devices with SoCs are not connected %s"
%
missing_socs
)
print
(
"Error: devices with SoCs are not connected %s"
%
missing_socs
)
exit
(
1
)
exit
(
1
)
for
target_soc
in
target_socs
:
for
target_soc
in
target_socs
:
for
target_abi
in
configs
[
"target_abis"
]:
for
target_abi
in
configs
[
"target_abis"
]:
global_runtime
=
get_global_runtime
(
configs
)
global_runtime
=
get_global_runtime
(
configs
)
...
@@ -373,28 +404,27 @@ def main(unused_args):
...
@@ -373,28 +404,27 @@ def main(unused_args):
os
.
environ
[
"TARGET_ABI"
]
=
target_abi
os
.
environ
[
"TARGET_ABI"
]
=
target_abi
model_output_dirs
=
[]
model_output_dirs
=
[]
for
model_name
in
configs
[
"models"
]:
for
model_name
in
configs
[
"models"
]:
print
'======================='
,
model_name
,
'====
==================='
print
'==================='
,
model_name
,
'
==================='
# Transfer params by environment
# Transfer params by environment
os
.
environ
[
"MODEL_TAG"
]
=
model_name
os
.
environ
[
"MODEL_TAG"
]
=
model_name
model_config
=
configs
[
"models"
][
model_name
]
model_config
=
configs
[
"models"
][
model_name
]
input_file_list
=
model_config
.
get
(
"validation_inputs_data"
,
[])
input_file_list
=
model_config
.
get
(
"validation_inputs_data"
,
[])
for
key
in
model_config
:
for
key
in
model_config
:
if
key
in
[
'input_nodes'
,
'output_nodes'
]
and
isinstance
(
if
key
in
[
'input_nodes'
,
'output_nodes'
]
and
isinstance
(
model_config
[
key
],
list
):
model_config
[
key
],
list
):
os
.
environ
[
key
.
upper
()]
=
","
.
join
(
model_config
[
key
])
os
.
environ
[
key
.
upper
()]
=
","
.
join
(
model_config
[
key
])
elif
key
in
[
'input_shapes'
,
'output_shapes'
]
and
isinstance
(
elif
key
in
[
'input_shapes'
,
'output_shapes'
model_config
[
key
],
list
):
]
and
isinstance
(
model_config
[
key
],
list
):
os
.
environ
[
key
.
upper
()]
=
":"
.
join
(
model_config
[
key
])
os
.
environ
[
key
.
upper
()]
=
":"
.
join
(
model_config
[
key
])
else
:
else
:
os
.
environ
[
key
.
upper
()]
=
str
(
model_config
[
key
])
os
.
environ
[
key
.
upper
()]
=
str
(
model_config
[
key
])
# Create model build directory
# Create model build directory
model_path_digest
=
md5sum
(
model_config
[
"model_file_path"
])
model_path_digest
=
md5sum
(
model_config
[
"model_file_path"
])
model_output_dir
=
"%s/%s/%s/%s/%s/%s/%s"
%
(
FLAGS
.
output_dir
,
model_output_dir
=
"%s/%s/%s/%s/%s/%s/%s"
%
(
os
.
environ
[
"PROJECT_NAME"
],
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
],
"build"
,
"build"
,
model_name
,
model_name
,
model_path_digest
,
target_soc
,
target_abi
)
model_path_digest
,
target_soc
,
target_abi
)
model_output_dirs
.
append
(
model_output_dir
)
model_output_dirs
.
append
(
model_output_dir
)
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"all"
:
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"all"
:
...
@@ -404,22 +434,27 @@ def main(unused_args):
...
@@ -404,22 +434,27 @@ def main(unused_args):
clear_env
(
target_soc
)
clear_env
(
target_soc
)
download_model_files
(
model_config
[
"model_file_path"
],
download_model_files
(
model_config
[
"model_file_path"
],
model_output_dir
,
model_config
.
get
(
"weight_file_path"
,
""
))
model_output_dir
,
model_config
.
get
(
"weight_file_path"
,
""
))
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
\
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"run"
or
\
or
FLAGS
.
mode
==
"benchmark"
or
FLAGS
.
mode
==
"all"
:
FLAGS
.
mode
==
"validate"
or
\
FLAGS
.
mode
==
"benchmark"
or
FLAGS
.
mode
==
"all"
:
generate_random_input
(
target_soc
,
model_output_dir
,
generate_random_input
(
target_soc
,
model_output_dir
,
model_config
[
'input_nodes'
],
input_file_list
)
model_config
[
'input_nodes'
],
input_file_list
)
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"all"
:
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"all"
:
generate_model_code
()
generate_model_code
()
build_mace_run_prod
(
model_name
,
global_runtime
,
target_abi
,
build_mace_run_prod
(
model_name
,
global_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
FLAGS
.
tuning
)
target_soc
,
model_output_dir
,
FLAGS
.
tuning
)
if
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
or
FLAGS
.
mode
==
"all"
:
if
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
or
\
run_model
(
model_name
,
global_runtime
,
target_abi
,
target_soc
,
FLAGS
.
mode
==
"all"
:
model_output_dir
,
FLAGS
.
round
,
FLAGS
.
restart_round
,
run_model
(
model_name
,
global_runtime
,
target_abi
,
option_args
)
target_soc
,
model_output_dir
,
FLAGS
.
round
,
FLAGS
.
restart_round
,
option_args
)
if
FLAGS
.
mode
==
"benchmark"
:
if
FLAGS
.
mode
==
"benchmark"
:
benchmark_model
(
target_soc
,
model_output_dir
,
option_args
)
benchmark_model
(
target_soc
,
model_output_dir
,
option_args
)
...
@@ -427,14 +462,18 @@ def main(unused_args):
...
@@ -427,14 +462,18 @@ def main(unused_args):
if
FLAGS
.
mode
==
"validate"
or
FLAGS
.
mode
==
"all"
:
if
FLAGS
.
mode
==
"validate"
or
FLAGS
.
mode
==
"all"
:
validate_model
(
target_soc
,
model_output_dir
)
validate_model
(
target_soc
,
model_output_dir
)
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"merge"
or
FLAGS
.
mode
==
"all"
:
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"merge"
or
\
FLAGS
.
mode
==
"all"
:
merge_libs_and_tuning_results
(
merge_libs_and_tuning_results
(
target_soc
,
FLAGS
.
output_dir
+
"/"
+
os
.
environ
[
"PROJECT_NAME"
],
target_soc
,
FLAGS
.
output_dir
+
"/"
+
os
.
environ
[
"PROJECT_NAME"
],
model_output_dirs
)
model_output_dirs
)
if
FLAGS
.
mode
==
"throughput_test"
:
if
FLAGS
.
mode
==
"throughput_test"
:
merged_lib_file
=
FLAGS
.
output_dir
+
"/%s/%s/libmace_%s.%s.a"
%
\
merged_lib_file
=
FLAGS
.
output_dir
+
\
(
os
.
environ
[
"PROJECT_NAME"
],
target_abi
,
os
.
environ
[
"PROJECT_NAME"
],
target_soc
)
"/%s/%s/libmace_%s.%s.a"
%
\
(
os
.
environ
[
"PROJECT_NAME"
],
target_abi
,
os
.
environ
[
"PROJECT_NAME"
],
target_soc
)
generate_random_input
(
target_soc
,
FLAGS
.
output_dir
,
[],
[])
generate_random_input
(
target_soc
,
FLAGS
.
output_dir
,
[],
[])
for
model_name
in
configs
[
"models"
]:
for
model_name
in
configs
[
"models"
]:
runtime
=
configs
[
"models"
][
model_name
][
"runtime"
]
runtime
=
configs
[
"models"
][
model_name
][
"runtime"
]
...
@@ -449,4 +488,3 @@ def main(unused_args):
...
@@ -449,4 +488,3 @@ def main(unused_args):
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
FLAGS
,
unparsed
=
parse_args
()
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/sh_commands.py
浏览文件 @
58f2516e
...
@@ -3,18 +3,22 @@ import re
...
@@ -3,18 +3,22 @@ import re
import
time
import
time
import
falcon_cli
import
falcon_cli
################################
################################
# common
# common
################################
################################
def
strip_invalid_utf8
(
str
):
def
strip_invalid_utf8
(
str
):
return
sh
.
iconv
(
str
,
"-c"
,
"-t"
,
"UTF-8"
)
return
sh
.
iconv
(
str
,
"-c"
,
"-t"
,
"UTF-8"
)
def
make_output_processor
(
buff
):
def
make_output_processor
(
buff
):
def
process_output
(
line
):
def
process_output
(
line
):
print
(
line
.
strip
())
print
(
line
.
strip
())
buff
.
append
(
line
)
buff
.
append
(
line
)
return
process_output
return
process_output
################################
################################
# adb commands
# adb commands
################################
################################
...
@@ -23,11 +27,12 @@ def adb_split_stdout(stdout_str):
...
@@ -23,11 +27,12 @@ def adb_split_stdout(stdout_str):
# Filter out last empty line
# Filter out last empty line
return
[
l
.
strip
()
for
l
in
stdout_str
.
split
(
'
\n
'
)
if
len
(
l
.
strip
())
>
0
]
return
[
l
.
strip
()
for
l
in
stdout_str
.
split
(
'
\n
'
)
if
len
(
l
.
strip
())
>
0
]
def
adb_devices
(
target_socs
=
None
):
def
adb_devices
(
target_socs
=
None
):
outputs
=
sh
.
grep
(
sh
.
adb
(
"devices"
),
"^[A-Za-z0-9]\+[[:space:]]\+device$"
)
outputs
=
sh
.
grep
(
sh
.
adb
(
"devices"
),
"^[A-Za-z0-9]\+[[:space:]]\+device$"
)
raw_lists
=
sh
.
cut
(
outputs
,
"-f1"
)
raw_lists
=
sh
.
cut
(
outputs
,
"-f1"
)
device_ids
=
adb_split_stdout
(
raw_lists
)
device_ids
=
adb_split_stdout
(
raw_lists
)
if
target_socs
!=
None
:
if
target_socs
is
not
None
:
target_socs_set
=
set
(
target_socs
)
target_socs_set
=
set
(
target_socs
)
target_devices
=
[]
target_devices
=
[]
for
serialno
in
device_ids
:
for
serialno
in
device_ids
:
...
@@ -38,6 +43,7 @@ def adb_devices(target_socs=None):
...
@@ -38,6 +43,7 @@ def adb_devices(target_socs=None):
else
:
else
:
return
device_ids
return
device_ids
def
adb_getprop_by_serialno
(
serialno
):
def
adb_getprop_by_serialno
(
serialno
):
outputs
=
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"getprop"
)
outputs
=
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"getprop"
)
raw_props
=
adb_split_stdout
(
outputs
)
raw_props
=
adb_split_stdout
(
outputs
)
...
@@ -49,12 +55,14 @@ def adb_getprop_by_serialno(serialno):
...
@@ -49,12 +55,14 @@ def adb_getprop_by_serialno(serialno):
props
[
m
.
group
(
1
)]
=
m
.
group
(
2
)
props
[
m
.
group
(
1
)]
=
m
.
group
(
2
)
return
props
return
props
def
adb_supported_abis
(
serialno
):
def
adb_supported_abis
(
serialno
):
props
=
adb_getprop_by_serialno
(
serialno
)
props
=
adb_getprop_by_serialno
(
serialno
)
abilist_str
=
props
[
"ro.product.cpu.abilist"
]
abilist_str
=
props
[
"ro.product.cpu.abilist"
]
abis
=
[
abi
.
strip
()
for
abi
in
abilist_str
.
split
(
','
)]
abis
=
[
abi
.
strip
()
for
abi
in
abilist_str
.
split
(
','
)]
return
abis
return
abis
def
adb_get_all_socs
():
def
adb_get_all_socs
():
socs
=
[]
socs
=
[]
for
d
in
adb_devices
():
for
d
in
adb_devices
():
...
@@ -62,7 +70,10 @@ def adb_get_all_socs():
...
@@ -62,7 +70,10 @@ def adb_get_all_socs():
socs
.
append
(
props
[
"ro.board.platform"
])
socs
.
append
(
props
[
"ro.board.platform"
])
return
set
(
socs
)
return
set
(
socs
)
def
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
def
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
args
=
""
,
args
=
""
,
opencl_profiling
=
1
,
opencl_profiling
=
1
,
vlog_level
=
0
,
vlog_level
=
0
,
...
@@ -71,7 +82,9 @@ def adb_run(serialno, host_bin_path, bin_name,
...
@@ -71,7 +82,9 @@ def adb_run(serialno, host_bin_path, bin_name,
host_bin_full_path
=
"%s/%s"
%
(
host_bin_path
,
bin_name
)
host_bin_full_path
=
"%s/%s"
%
(
host_bin_path
,
bin_name
)
device_bin_full_path
=
"%s/%s"
%
(
device_bin_path
,
bin_name
)
device_bin_full_path
=
"%s/%s"
%
(
device_bin_path
,
bin_name
)
props
=
adb_getprop_by_serialno
(
serialno
)
props
=
adb_getprop_by_serialno
(
serialno
)
print
(
"====================================================================="
)
print
(
"====================================================================="
)
print
(
"Run on device: %s, %s, %s"
%
(
serialno
,
props
[
"ro.board.platform"
],
print
(
"Run on device: %s, %s, %s"
%
(
serialno
,
props
[
"ro.board.platform"
],
props
[
"ro.product.model"
]))
props
[
"ro.product.model"
]))
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"rm -rf %s"
%
device_bin_path
)
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"rm -rf %s"
%
device_bin_path
)
...
@@ -79,12 +92,19 @@ def adb_run(serialno, host_bin_path, bin_name,
...
@@ -79,12 +92,19 @@ def adb_run(serialno, host_bin_path, bin_name,
print
(
"Push %s to %s"
%
(
host_bin_full_path
,
device_bin_full_path
))
print
(
"Push %s to %s"
%
(
host_bin_full_path
,
device_bin_full_path
))
sh
.
adb
(
"-s"
,
serialno
,
"push"
,
host_bin_full_path
,
device_bin_full_path
)
sh
.
adb
(
"-s"
,
serialno
,
"push"
,
host_bin_full_path
,
device_bin_full_path
)
print
(
"Run %s"
%
device_bin_full_path
)
print
(
"Run %s"
%
device_bin_full_path
)
stdout_buff
=
[]
stdout_buff
=
[]
process_output
=
make_output_processor
(
stdout_buff
)
process_output
=
make_output_processor
(
stdout_buff
)
p
=
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
p
=
sh
.
adb
(
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d MACE_CPP_MIN_VLOG_LEVEL=%d %s %s"
%
"-s"
,
(
out_of_range_check
,
opencl_profiling
,
vlog_level
,
device_bin_full_path
,
args
),
serialno
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
"shell"
,
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d "
"MACE_CPP_MIN_VLOG_LEVEL=%d %s %s"
%
(
out_of_range_check
,
opencl_profiling
,
vlog_level
,
device_bin_full_path
,
args
),
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
.
wait
()
p
.
wait
()
return
""
.
join
(
stdout_buff
)
return
""
.
join
(
stdout_buff
)
...
@@ -94,11 +114,14 @@ def adb_run(serialno, host_bin_path, bin_name,
...
@@ -94,11 +114,14 @@ def adb_run(serialno, host_bin_path, bin_name,
################################
################################
def
bazel_build
(
target
,
strip
=
"always"
,
abi
=
"armeabi-v7a"
):
def
bazel_build
(
target
,
strip
=
"always"
,
abi
=
"armeabi-v7a"
):
print
(
"Build %s with ABI %s"
%
(
target
,
abi
))
print
(
"Build %s with ABI %s"
%
(
target
,
abi
))
stdout_buff
=
[]
stdout_buff
=
[]
process_output
=
make_output_processor
(
stdout_buff
)
process_output
=
make_output_processor
(
stdout_buff
)
p
=
sh
.
bazel
(
"build"
,
p
=
sh
.
bazel
(
"-c"
,
"opt"
,
"build"
,
"--strip"
,
strip
,
"-c"
,
"opt"
,
"--strip"
,
strip
,
"--verbose_failures"
,
"--verbose_failures"
,
target
,
target
,
"--crosstool_top=//external:android/crosstool"
,
"--crosstool_top=//external:android/crosstool"
,
...
@@ -109,12 +132,17 @@ def bazel_build(target, strip="always", abi="armeabi-v7a"):
...
@@ -109,12 +132,17 @@ def bazel_build(target, strip="always", abi="armeabi-v7a"):
"--copt=-DMACE_DISABLE_NO_TUNING_WARNING"
,
"--copt=-DMACE_DISABLE_NO_TUNING_WARNING"
,
"--copt=-Werror=return-type"
,
"--copt=-Werror=return-type"
,
"--copt=-O3"
,
"--copt=-O3"
,
"--define"
,
"neon=true"
,
"--define"
,
"--define"
,
"openmp=true"
,
"neon=true"
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
"--define"
,
"openmp=true"
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
.
wait
()
p
.
wait
()
return
""
.
join
(
stdout_buff
)
return
""
.
join
(
stdout_buff
)
def
bazel_target_to_bin
(
target
):
def
bazel_target_to_bin
(
target
):
# change //mace/a/b:c to bazel-bin/mace/a/b/c
# change //mace/a/b:c to bazel-bin/mace/a/b/c
prefix
,
bin_name
=
target
.
split
(
':'
)
prefix
,
bin_name
=
target
.
split
(
':'
)
...
@@ -124,26 +152,32 @@ def bazel_target_to_bin(target):
...
@@ -124,26 +152,32 @@ def bazel_target_to_bin(target):
host_bin_path
=
"bazel-bin/%s"
%
prefix
host_bin_path
=
"bazel-bin/%s"
%
prefix
return
host_bin_path
,
bin_name
return
host_bin_path
,
bin_name
################################
################################
# mace commands
# mace commands
################################
################################
# TODO this should be refactored
# TODO this should be refactored
def
gen_encrypted_opencl_source
(
codegen_path
=
"mace/codegen"
):
def
gen_encrypted_opencl_source
(
codegen_path
=
"mace/codegen"
):
sh
.
mkdir
(
"-p"
,
"%s/opencl"
%
codegen_path
)
sh
.
mkdir
(
"-p"
,
"%s/opencl"
%
codegen_path
)
sh
.
python
(
"mace/python/tools/encrypt_opencl_codegen.py"
,
sh
.
python
(
"mace/python/tools/encrypt_opencl_codegen.py"
,
"--cl_kernel_dir=./mace/kernels/opencl/cl/"
,
"--cl_kernel_dir=./mace/kernels/opencl/cl/"
,
"--output_path=%s/opencl/opencl_encrypt_program.cc"
%
codegen_path
)
"--output_path=%s/opencl/opencl_encrypt_program.cc"
%
codegen_path
)
def
gen_mace_version
(
codegen_path
=
"mace/codegen"
):
def
gen_mace_version
(
codegen_path
=
"mace/codegen"
):
sh
.
mkdir
(
"-p"
,
"%s/version"
%
codegen_path
)
sh
.
mkdir
(
"-p"
,
"%s/version"
%
codegen_path
)
sh
.
bash
(
"mace/tools/git/gen_version_source.sh"
,
sh
.
bash
(
"mace/tools/git/gen_version_source.sh"
,
"%s/version/version.cc"
%
codegen_path
)
"%s/version/version.cc"
%
codegen_path
)
def
gen_compiled_opencl_source
(
codegen_path
=
"mace/codegen"
):
def
gen_compiled_opencl_source
(
codegen_path
=
"mace/codegen"
):
sh
.
mkdir
(
"-p"
,
"%s/opencl"
%
codegen_path
)
sh
.
mkdir
(
"-p"
,
"%s/opencl"
%
codegen_path
)
sh
.
python
(
"mace/python/tools/opencl_codegen.py"
,
sh
.
python
(
"mace/python/tools/opencl_codegen.py"
,
"--output_path=%s/opencl/opencl_compiled_program.cc"
%
codegen_path
)
"--output_path=%s/opencl/opencl_compiled_program.cc"
%
codegen_path
)
################################
################################
# falcon
# falcon
################################
################################
...
@@ -156,10 +190,10 @@ def falcon_tags(tags_dict):
...
@@ -156,10 +190,10 @@ def falcon_tags(tags_dict):
tags
=
tags
+
",%s=%s"
%
(
k
,
v
)
tags
=
tags
+
",%s=%s"
%
(
k
,
v
)
return
tags
return
tags
def
falcon_push_metrics
(
metrics
,
endpoint
=
"mace_dev"
,
tags
=
{}):
def
falcon_push_metrics
(
metrics
,
endpoint
=
"mace_dev"
,
tags
=
{}):
cli
=
falcon_cli
.
FalconCli
.
connect
(
server
=
"transfer.falcon.miliao.srv"
,
cli
=
falcon_cli
.
FalconCli
.
connect
(
port
=
8433
,
server
=
"transfer.falcon.miliao.srv"
,
port
=
8433
,
debug
=
False
)
debug
=
False
)
ts
=
int
(
time
.
time
())
ts
=
int
(
time
.
time
())
falcon_metrics
=
[{
falcon_metrics
=
[{
"endpoint"
:
endpoint
,
"endpoint"
:
endpoint
,
...
@@ -171,4 +205,3 @@ def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
...
@@ -171,4 +205,3 @@ def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
"counterType"
:
"GAUGE"
"counterType"
:
"GAUGE"
}
for
key
,
value
in
metrics
.
iteritems
()]
}
for
key
,
value
in
metrics
.
iteritems
()]
cli
.
update
(
falcon_metrics
)
cli
.
update
(
falcon_metrics
)
tools/validate.py
浏览文件 @
58f2516e
...
@@ -20,29 +20,33 @@ from scipy import stats
...
@@ -20,29 +20,33 @@ from scipy import stats
# --input_shape 1,64,64,3 \
# --input_shape 1,64,64,3 \
# --output_shape 1,64,64,2
# --output_shape 1,64,64,2
def
load_data
(
file
):
def
load_data
(
file
):
if
os
.
path
.
isfile
(
file
):
if
os
.
path
.
isfile
(
file
):
return
np
.
fromfile
(
file
=
file
,
dtype
=
np
.
float32
)
return
np
.
fromfile
(
file
=
file
,
dtype
=
np
.
float32
)
else
:
else
:
return
np
.
empty
([
0
])
return
np
.
empty
([
0
])
def
format_output_name
(
name
):
def
format_output_name
(
name
):
return
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
name
)
return
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
name
)
def
compare_output
(
output_name
,
mace_out_value
,
out_value
):
def
compare_output
(
output_name
,
mace_out_value
,
out_value
):
if
mace_out_value
.
size
!=
0
:
if
mace_out_value
.
size
!=
0
:
out_value
=
out_value
.
reshape
(
-
1
)
out_value
=
out_value
.
reshape
(
-
1
)
mace_out_value
=
mace_out_value
.
reshape
(
-
1
)
mace_out_value
=
mace_out_value
.
reshape
(
-
1
)
assert
len
(
out_value
)
==
len
(
mace_out_value
)
assert
len
(
out_value
)
==
len
(
mace_out_value
)
similarity
=
(
1
-
spatial
.
distance
.
cosine
(
out_value
,
mace_out_value
))
similarity
=
(
1
-
spatial
.
distance
.
cosine
(
out_value
,
mace_out_value
))
print
output_name
,
'MACE VS'
,
FLAGS
.
platform
.
upper
(),
'similarity: '
,
similarity
print
output_name
,
'MACE VS'
,
FLAGS
.
platform
.
upper
(
),
'similarity: '
,
similarity
if
(
FLAGS
.
mace_runtime
==
"cpu"
and
similarity
>
0.999
)
or
\
if
(
FLAGS
.
mace_runtime
==
"cpu"
and
similarity
>
0.999
)
or
\
(
FLAGS
.
mace_runtime
==
"neon"
and
similarity
>
0.999
)
or
\
(
FLAGS
.
mace_runtime
==
"neon"
and
similarity
>
0.999
)
or
\
(
FLAGS
.
mace_runtime
==
"gpu"
and
similarity
>
0.995
)
or
\
(
FLAGS
.
mace_runtime
==
"gpu"
and
similarity
>
0.995
)
or
\
(
FLAGS
.
mace_runtime
==
"dsp"
and
similarity
>
0.930
):
(
FLAGS
.
mace_runtime
==
"dsp"
and
similarity
>
0.930
):
print
'=======================Similarity Test Passed====
=================='
print
'===================Similarity Test Passed
=================='
else
:
else
:
print
'=======================Similarity Test Failed====
=================='
print
'===================Similarity Test Failed
=================='
sys
.
exit
(
-
1
)
sys
.
exit
(
-
1
)
else
:
else
:
print
'=======================Skip empty node==================='
print
'=======================Skip empty node==================='
...
@@ -66,21 +70,28 @@ def validate_tf_model(input_names, input_shapes, output_names):
...
@@ -66,21 +70,28 @@ def validate_tf_model(input_names, input_shapes, output_names):
tf
.
import_graph_def
(
input_graph_def
,
name
=
""
)
tf
.
import_graph_def
(
input_graph_def
,
name
=
""
)
input_dict
=
{}
input_dict
=
{}
for
i
in
range
(
len
(
input_names
)):
for
i
in
range
(
len
(
input_names
)):
input_value
=
load_data
(
FLAGS
.
input_file
+
"_"
+
input_names
[
i
])
input_value
=
load_data
(
FLAGS
.
input_file
+
"_"
+
input_names
[
i
])
input_value
=
input_value
.
reshape
(
input_shapes
[
i
])
input_value
=
input_value
.
reshape
(
input_shapes
[
i
])
input_node
=
graph
.
get_tensor_by_name
(
input_names
[
i
]
+
':0'
)
input_node
=
graph
.
get_tensor_by_name
(
input_names
[
i
]
+
':0'
)
input_dict
[
input_node
]
=
input_value
input_dict
[
input_node
]
=
input_value
output_nodes
=
[]
output_nodes
=
[]
for
name
in
output_names
:
for
name
in
output_names
:
output_nodes
.
extend
([
graph
.
get_tensor_by_name
(
name
+
':0'
)])
output_nodes
.
extend
(
[
graph
.
get_tensor_by_name
(
name
+
':0'
)])
output_values
=
session
.
run
(
output_nodes
,
feed_dict
=
input_dict
)
output_values
=
session
.
run
(
output_nodes
,
feed_dict
=
input_dict
)
for
i
in
range
(
len
(
output_names
)):
for
i
in
range
(
len
(
output_names
)):
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
format_output_name
(
output_names
[
i
])
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
\
format_output_name
(
output_names
[
i
])
mace_out_value
=
load_data
(
output_file_name
)
mace_out_value
=
load_data
(
output_file_name
)
compare_output
(
output_names
[
i
],
mace_out_value
,
output_values
[
i
])
compare_output
(
output_names
[
i
],
mace_out_value
,
output_values
[
i
])
def
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
):
def
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
):
os
.
environ
[
'GLOG_minloglevel'
]
=
'1'
# suprress Caffe verbose prints
os
.
environ
[
'GLOG_minloglevel'
]
=
'1'
# suprress Caffe verbose prints
import
caffe
import
caffe
if
not
os
.
path
.
isfile
(
FLAGS
.
model_file
):
if
not
os
.
path
.
isfile
(
FLAGS
.
model_file
):
...
@@ -96,7 +107,8 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
...
@@ -96,7 +107,8 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for
i
in
range
(
len
(
input_names
)):
for
i
in
range
(
len
(
input_names
)):
input_value
=
load_data
(
FLAGS
.
input_file
+
"_"
+
input_names
[
i
])
input_value
=
load_data
(
FLAGS
.
input_file
+
"_"
+
input_names
[
i
])
input_value
=
input_value
.
reshape
(
input_shapes
[
i
]).
transpose
((
0
,
3
,
1
,
2
))
input_value
=
input_value
.
reshape
(
input_shapes
[
i
]).
transpose
((
0
,
3
,
1
,
2
))
input_blob_name
=
input_names
[
i
]
input_blob_name
=
input_names
[
i
]
try
:
try
:
if
input_names
[
i
]
in
net
.
top_names
:
if
input_names
[
i
]
in
net
.
top_names
:
...
@@ -110,16 +122,20 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
...
@@ -110,16 +122,20 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for
i
in
range
(
len
(
output_names
)):
for
i
in
range
(
len
(
output_names
)):
value
=
net
.
blobs
[
net
.
top_names
[
output_names
[
i
]][
0
]].
data
value
=
net
.
blobs
[
net
.
top_names
[
output_names
[
i
]][
0
]].
data
out_shape
=
output_shapes
[
i
]
out_shape
=
output_shapes
[
i
]
out_shape
[
1
],
out_shape
[
2
],
out_shape
[
3
]
=
out_shape
[
3
],
out_shape
[
1
],
out_shape
[
2
]
out_shape
[
1
],
out_shape
[
2
],
out_shape
[
3
]
=
out_shape
[
3
],
out_shape
[
1
],
out_shape
[
2
]
value
=
value
.
reshape
(
out_shape
).
transpose
((
0
,
2
,
3
,
1
))
value
=
value
.
reshape
(
out_shape
).
transpose
((
0
,
2
,
3
,
1
))
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
format_output_name
(
output_names
[
i
])
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
format_output_name
(
output_names
[
i
])
mace_out_value
=
load_data
(
output_file_name
)
mace_out_value
=
load_data
(
output_file_name
)
compare_output
(
output_names
[
i
],
mace_out_value
,
value
)
compare_output
(
output_names
[
i
],
mace_out_value
,
value
)
def
main
(
unused_args
):
def
main
(
unused_args
):
input_names
=
[
name
for
name
in
FLAGS
.
input_node
.
split
(
','
)]
input_names
=
[
name
for
name
in
FLAGS
.
input_node
.
split
(
','
)]
input_shape_strs
=
[
shape
for
shape
in
FLAGS
.
input_shape
.
split
(
':'
)]
input_shape_strs
=
[
shape
for
shape
in
FLAGS
.
input_shape
.
split
(
':'
)]
input_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
input_shape_strs
]
input_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
input_shape_strs
]
output_names
=
[
name
for
name
in
FLAGS
.
output_node
.
split
(
','
)]
output_names
=
[
name
for
name
in
FLAGS
.
output_node
.
split
(
','
)]
assert
len
(
input_names
)
==
len
(
input_shapes
)
assert
len
(
input_names
)
==
len
(
input_shapes
)
...
@@ -127,18 +143,18 @@ def main(unused_args):
...
@@ -127,18 +143,18 @@ def main(unused_args):
validate_tf_model
(
input_names
,
input_shapes
,
output_names
)
validate_tf_model
(
input_names
,
input_shapes
,
output_names
)
elif
FLAGS
.
platform
==
'caffe'
:
elif
FLAGS
.
platform
==
'caffe'
:
output_shape_strs
=
[
shape
for
shape
in
FLAGS
.
output_shape
.
split
(
':'
)]
output_shape_strs
=
[
shape
for
shape
in
FLAGS
.
output_shape
.
split
(
':'
)]
output_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
output_shape_strs
]
output_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
)
for
shape
in
output_shape_strs
]
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
)
def
parse_args
():
def
parse_args
():
"""Parses command line arguments."""
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
parser
=
argparse
.
ArgumentParser
()
parser
.
register
(
"type"
,
"bool"
,
lambda
v
:
v
.
lower
()
==
"true"
)
parser
.
register
(
"type"
,
"bool"
,
lambda
v
:
v
.
lower
()
==
"true"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--platform"
,
"--platform"
,
type
=
str
,
default
=
""
,
help
=
"Tensorflow or Caffe."
)
type
=
str
,
default
=
""
,
help
=
"Tensorflow or Caffe."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--model_file"
,
"--model_file"
,
type
=
str
,
type
=
str
,
...
@@ -150,40 +166,22 @@ def parse_args():
...
@@ -150,40 +166,22 @@ def parse_args():
default
=
""
,
default
=
""
,
help
=
"caffe model file to load."
)
help
=
"caffe model file to load."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--input_file"
,
"--input_file"
,
type
=
str
,
default
=
""
,
help
=
"input file."
)
type
=
str
,
default
=
""
,
help
=
"input file."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--mace_out_file"
,
"--mace_out_file"
,
type
=
str
,
type
=
str
,
default
=
""
,
default
=
""
,
help
=
"mace output file to load."
)
help
=
"mace output file to load."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--mace_runtime"
,
"--mace_runtime"
,
type
=
str
,
default
=
"gpu"
,
help
=
"mace runtime device."
)
type
=
str
,
default
=
"gpu"
,
help
=
"mace runtime device."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--input_shape"
,
"--input_shape"
,
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--output_shape"
,
"--output_shape"
,
type
=
str
,
default
=
"1,64,64,2"
,
help
=
"output shape."
)
type
=
str
,
default
=
"1,64,64,2"
,
help
=
"output shape."
)
parser
.
add_argument
(
parser
.
add_argument
(
"--input_node"
,
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
parser
.
add_argument
(
parser
.
add_argument
(
"--output_node"
,
"--output_node"
,
type
=
str
,
default
=
"output_node"
,
help
=
"output node"
)
type
=
str
,
default
=
"output_node"
,
help
=
"output node"
)
return
parser
.
parse_known_args
()
return
parser
.
parse_known_args
()
...
@@ -191,4 +189,3 @@ def parse_args():
...
@@ -191,4 +189,3 @@ def parse_args():
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
FLAGS
,
unparsed
=
parse_args
()
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/wino_conv.py
浏览文件 @
58f2516e
...
@@ -11,12 +11,8 @@ G_T = {}
...
@@ -11,12 +11,8 @@ G_T = {}
# f(2, 3)
# f(2, 3)
A_T
[
4
]
=
np
.
array
([[
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
-
1
]]).
astype
(
np
.
float32
)
A_T
[
4
]
=
np
.
array
([[
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
-
1
]]).
astype
(
np
.
float32
)
A
[
4
]
=
np
.
transpose
(
A_T
[
4
])
A
[
4
]
=
np
.
transpose
(
A_T
[
4
])
B_T
[
4
]
=
np
.
array
([
B_T
[
4
]
=
np
.
array
([[
1
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
0
],
[
1
,
0
,
-
1
,
0
],
[
0
,
1
,
0
,
-
1
]]).
astype
(
np
.
float32
)
[
0
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
0
],
[
0
,
1
,
0
,
-
1
]
]).
astype
(
np
.
float32
)
B
[
4
]
=
np
.
transpose
(
B_T
[
4
])
B
[
4
]
=
np
.
transpose
(
B_T
[
4
])
G
[
4
]
=
np
.
array
([
G
[
4
]
=
np
.
array
([
[
1
,
0
,
0
],
[
1
,
0
,
0
],
...
@@ -44,45 +40,45 @@ B_T[6] = np.array([
...
@@ -44,45 +40,45 @@ B_T[6] = np.array([
]).
astype
(
np
.
float32
)
]).
astype
(
np
.
float32
)
B
[
6
]
=
np
.
transpose
(
B_T
[
6
])
B
[
6
]
=
np
.
transpose
(
B_T
[
6
])
G
[
6
]
=
np
.
array
([
G
[
6
]
=
np
.
array
([
[
1
/
4.0
,
0
,
0
],
[
1
/
4.0
,
0
,
0
],
[
-
1
/
6.0
,
-
1
/
6.0
,
-
1
/
6.0
],
[
-
1
/
6.0
,
-
1
/
6.0
,
-
1
/
6.0
],
[
-
1
/
6.0
,
1
/
6.0
,
-
1
/
6.0
],
[
-
1
/
6.0
,
1
/
6.0
,
-
1
/
6.0
],
[
1
/
24.0
,
1
/
12.0
,
1
/
6.0
],
[
1
/
24.0
,
1
/
12.0
,
1
/
6.0
],
[
1
/
24.0
,
-
1
/
12.0
,
1
/
6.0
],
[
1
/
24.0
,
-
1
/
12.0
,
1
/
6.0
],
[
0
,
0
,
1
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
]).
astype
(
np
.
float32
)
G_T
[
6
]
=
np
.
transpose
(
G
[
6
])
G_T
[
6
]
=
np
.
transpose
(
G
[
6
])
# f(6, 3)
# f(6, 3)
A_T
[
8
]
=
np
.
array
([
A_T
[
8
]
=
np
.
array
([
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
],
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
2
,
-
2
,
1
/
2.
,
-
1
/
2.
,
0
],
[
0
,
1
,
-
1
,
2
,
-
2
,
1
/
2.
,
-
1
/
2.
,
0
],
[
0
,
1
,
1
,
4
,
4
,
1
/
4.
,
1
/
4.
,
0
],
[
0
,
1
,
1
,
4
,
4
,
1
/
4.
,
1
/
4.
,
0
],
[
0
,
1
,
-
1
,
8
,
-
8
,
1
/
8.
,
-
1
/
8.
,
0
],
[
0
,
1
,
-
1
,
8
,
-
8
,
1
/
8.
,
-
1
/
8.
,
0
],
[
0
,
1
,
1
,
16
,
16
,
1
/
16.
,
1
/
16.
,
0
],
[
0
,
1
,
1
,
16
,
16
,
1
/
16.
,
1
/
16.
,
0
],
[
0
,
1
,
-
1
,
32
,
-
32
,
1
/
32.
,
-
1
/
32.
,
1
],
[
0
,
1
,
-
1
,
32
,
-
32
,
1
/
32.
,
-
1
/
32.
,
1
],
]).
astype
(
np
.
float32
)
]).
astype
(
np
.
float32
)
A
[
8
]
=
np
.
transpose
(
A_T
[
8
])
A
[
8
]
=
np
.
transpose
(
A_T
[
8
])
B_T
[
8
]
=
np
.
array
([
B_T
[
8
]
=
np
.
array
([
[
1
,
0
,
-
21
/
4.
,
0
,
21
/
4.
,
0
,
-
1
,
0
],
[
1
,
0
,
-
21
/
4.
,
0
,
21
/
4.
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
-
17
/
4.
,
-
17
/
4.
,
1
,
1
,
0
],
[
0
,
1
,
1
,
-
17
/
4.
,
-
17
/
4.
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
17
/
4.
,
-
17
/
4.
,
-
1
,
1
,
0
],
[
0
,
-
1
,
1
,
17
/
4.
,
-
17
/
4.
,
-
1
,
1
,
0
],
[
0
,
1
/
2.
,
1
/
4.
,
-
5
/
2.
,
-
5
/
4.
,
2
,
1
,
0
],
[
0
,
1
/
2.
,
1
/
4.
,
-
5
/
2.
,
-
5
/
4.
,
2
,
1
,
0
],
[
0
,
-
1
/
2.
,
1
/
4.
,
5
/
2.
,
-
5
/
4.
,
-
2
,
1
,
0
],
[
0
,
-
1
/
2.
,
1
/
4.
,
5
/
2.
,
-
5
/
4.
,
-
2
,
1
,
0
],
[
0
,
2
,
4
,
-
5
/
2.
,
-
5
,
1
/
2.
,
1
,
0
],
[
0
,
2
,
4
,
-
5
/
2.
,
-
5
,
1
/
2.
,
1
,
0
],
[
0
,
-
2
,
4
,
5
/
2.
,
-
5
,
-
1
/
2.
,
1
,
0
],
[
0
,
-
2
,
4
,
5
/
2.
,
-
5
,
-
1
/
2.
,
1
,
0
],
[
0
,
-
1
,
0
,
21
/
4.
,
0
,
-
21
/
4.
,
0
,
1
],
[
0
,
-
1
,
0
,
21
/
4.
,
0
,
-
21
/
4.
,
0
,
1
],
]).
astype
(
np
.
float32
)
]).
astype
(
np
.
float32
)
B
[
8
]
=
np
.
transpose
(
B_T
[
8
])
B
[
8
]
=
np
.
transpose
(
B_T
[
8
])
G
[
8
]
=
np
.
array
([
G
[
8
]
=
np
.
array
([
[
1
,
0
,
0
],
[
1
,
0
,
0
],
[
-
2
/
9.
,
-
2
/
9.
,
-
2
/
9.
],
[
-
2
/
9.
,
-
2
/
9.
,
-
2
/
9.
],
[
-
2
/
9.
,
2
/
9.
,
-
2
/
9.
],
[
-
2
/
9.
,
2
/
9.
,
-
2
/
9.
],
[
1
/
90.
,
1
/
45.
,
2
/
45.
],
[
1
/
90.
,
1
/
45.
,
2
/
45.
],
[
1
/
90.
,
-
1
/
45.
,
2
/
45.
],
[
1
/
90.
,
-
1
/
45.
,
2
/
45.
],
[
32
/
45.
,
16
/
45.
,
8
/
45.
],
[
32
/
45.
,
16
/
45.
,
8
/
45.
],
[
32
/
45.
,
-
16
/
45.
,
8
/
45.
],
[
32
/
45.
,
-
16
/
45.
,
8
/
45.
],
[
0
,
0
,
1
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
]).
astype
(
np
.
float32
)
G_T
[
8
]
=
np
.
transpose
(
G
[
8
])
G_T
[
8
]
=
np
.
transpose
(
G
[
8
])
...
@@ -112,7 +108,7 @@ def winograd_conv(m, r, input, filter):
...
@@ -112,7 +108,7 @@ def winograd_conv(m, r, input, filter):
for
c
in
range
(
C
):
for
c
in
range
(
C
):
u
=
np
.
dot
(
np
.
dot
(
G
[
alpha
],
filter
[
k
,
c
,
:,
:]),
G_T
[
alpha
])
u
=
np
.
dot
(
np
.
dot
(
G
[
alpha
],
filter
[
k
,
c
,
:,
:]),
G_T
[
alpha
])
for
i
in
range
(
alpha
):
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
)
:
for
j
in
range
(
alpha
)
:
U
[(
i
*
alpha
+
j
)
*
K
+
k
,
c
]
=
u
[
i
,
j
]
U
[(
i
*
alpha
+
j
)
*
K
+
k
,
c
]
=
u
[
i
,
j
]
print
'filter out: '
,
U
.
shape
print
'filter out: '
,
U
.
shape
...
@@ -129,24 +125,24 @@ def winograd_conv(m, r, input, filter):
...
@@ -129,24 +125,24 @@ def winograd_conv(m, r, input, filter):
w_idx
=
t
%
rounded_w
w_idx
=
t
%
rounded_w
h_start
=
h_idx
*
m
h_start
=
h_idx
*
m
w_start
=
w_idx
*
m
w_start
=
w_idx
*
m
h_end
=
min
(
h_start
+
alpha
,
input_shape
[
2
])
h_end
=
min
(
h_start
+
alpha
,
input_shape
[
2
])
w_end
=
min
(
w_start
+
alpha
,
input_shape
[
3
])
w_end
=
min
(
w_start
+
alpha
,
input_shape
[
3
])
d
=
np
.
zeros
((
alpha
,
alpha
))
d
=
np
.
zeros
((
alpha
,
alpha
))
d
[
0
:
h_end
-
h_start
,
0
:
w_end
-
w_start
]
=
\
d
[
0
:
h_end
-
h_start
,
0
:
w_end
-
w_start
]
=
\
input
[
n
,
c
,
h_start
:
h_end
,
w_start
:
w_end
]
input
[
n
,
c
,
h_start
:
h_end
,
w_start
:
w_end
]
v
=
np
.
dot
(
np
.
dot
(
B_T
[
alpha
],
d
),
B
[
alpha
])
v
=
np
.
dot
(
np
.
dot
(
B_T
[
alpha
],
d
),
B
[
alpha
])
for
i
in
range
(
alpha
):
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
):
for
j
in
range
(
alpha
):
V
[(
i
*
alpha
+
j
)
*
C
+
c
,
p
]
=
v
[
i
,
j
]
V
[(
i
*
alpha
+
j
)
*
C
+
c
,
p
]
=
v
[
i
,
j
]
tmp
=
V
.
reshape
(
alpha_square
,
C
,
P
,
1
)
tmp
=
V
.
reshape
(
alpha_square
,
C
,
P
,
1
)
print
'input out: '
,
tmp
.
shape
print
'input out: '
,
tmp
.
shape
tmp
.
astype
(
np
.
float32
).
tofile
(
"C"
)
tmp
.
astype
(
np
.
float32
).
tofile
(
"C"
)
M
=
np
.
zeros
((
alpha_square
*
K
,
P
))
M
=
np
.
zeros
((
alpha_square
*
K
,
P
))
for
i
in
range
(
alpha_square
):
for
i
in
range
(
alpha_square
):
u
=
U
[
i
*
K
:
(
i
+
1
)
*
K
,
:]
u
=
U
[
i
*
K
:(
i
+
1
)
*
K
,
:]
v
=
V
[
i
*
C
:
(
i
+
1
)
*
C
,
:]
v
=
V
[
i
*
C
:(
i
+
1
)
*
C
,
:]
M
[
i
*
K
:
(
i
+
1
)
*
K
,
:]
=
np
.
dot
(
u
,
v
)
M
[
i
*
K
:(
i
+
1
)
*
K
,
:]
=
np
.
dot
(
u
,
v
)
print
'M shape: '
,
M
.
shape
print
'M shape: '
,
M
.
shape
M
.
astype
(
np
.
float32
).
tofile
(
"gemm"
)
M
.
astype
(
np
.
float32
).
tofile
(
"gemm"
)
...
@@ -156,7 +152,7 @@ def winograd_conv(m, r, input, filter):
...
@@ -156,7 +152,7 @@ def winograd_conv(m, r, input, filter):
tm
=
np
.
zeros
((
alpha
,
alpha
))
tm
=
np
.
zeros
((
alpha
,
alpha
))
for
i
in
range
(
alpha
):
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
):
for
j
in
range
(
alpha
):
tm
[
i
][
j
]
=
M
[(
i
*
alpha
+
j
)
*
K
+
k
,
b
]
tm
[
i
][
j
]
=
M
[(
i
*
alpha
+
j
)
*
K
+
k
,
b
]
y
=
np
.
dot
(
np
.
dot
(
A_T
[
alpha
],
tm
),
A
[
alpha
])
y
=
np
.
dot
(
np
.
dot
(
A_T
[
alpha
],
tm
),
A
[
alpha
])
for
i
in
range
(
m
):
for
i
in
range
(
m
):
for
j
in
range
(
m
):
for
j
in
range
(
m
):
...
@@ -173,6 +169,7 @@ def winograd_conv(m, r, input, filter):
...
@@ -173,6 +169,7 @@ def winograd_conv(m, r, input, filter):
return
res
return
res
def
tf_conv
(
input
,
filter
):
def
tf_conv
(
input
,
filter
):
conv_op
=
tf
.
nn
.
conv2d
(
input
,
filter
,
[
1
,
1
,
1
,
1
],
'VALID'
)
conv_op
=
tf
.
nn
.
conv2d
(
input
,
filter
,
[
1
,
1
,
1
,
1
],
'VALID'
)
with
tf
.
Session
()
as
sess
:
with
tf
.
Session
()
as
sess
:
...
@@ -206,4 +203,3 @@ def main():
...
@@ -206,4 +203,3 @@ def main():
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
main
()
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录