Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
毕竟曾有刹那
Mace
提交
6da30d22
Mace
项目概览
毕竟曾有刹那
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
6da30d22
编写于
4月 10, 2018
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Enable python style check
上级
e54825c5
变更
22
展开全部
显示空白变更内容
内联
并排
Showing
22 changed file
with
4594 addition
and
4287 deletion
+4594
-4287
.gitlab-ci.yml
.gitlab-ci.yml
+7
-1
docker/Dockerfile
docker/Dockerfile
+2
-1
mace/python/tools/binary_codegen.py
mace/python/tools/binary_codegen.py
+62
-61
mace/python/tools/caffe_converter_lib.py
mace/python/tools/caffe_converter_lib.py
+1098
-1024
mace/python/tools/convert_util.py
mace/python/tools/convert_util.py
+0
-1
mace/python/tools/converter.py
mace/python/tools/converter.py
+149
-159
mace/python/tools/dsp_ops.py
mace/python/tools/dsp_ops.py
+60
-62
mace/python/tools/encrypt_opencl_codegen.py
mace/python/tools/encrypt_opencl_codegen.py
+62
-58
mace/python/tools/graph_util.py
mace/python/tools/graph_util.py
+7
-2
mace/python/tools/memory_optimizer.py
mace/python/tools/memory_optimizer.py
+123
-112
mace/python/tools/opencl_codegen.py
mace/python/tools/opencl_codegen.py
+77
-74
mace/python/tools/source_converter_lib.py
mace/python/tools/source_converter_lib.py
+176
-162
mace/python/tools/tf_converter_lib.py
mace/python/tools/tf_converter_lib.py
+1162
-1125
mace/python/tools/tf_dsp_converter_lib.py
mace/python/tools/tf_dsp_converter_lib.py
+472
-403
mace/python/tools/tf_ops_stats.py
mace/python/tools/tf_ops_stats.py
+162
-136
tools/bazel_adb_run.py
tools/bazel_adb_run.py
+99
-91
tools/falcon_cli.py
tools/falcon_cli.py
+12
-10
tools/generate_data.py
tools/generate_data.py
+30
-35
tools/mace_tools.py
tools/mace_tools.py
+378
-340
tools/sh_commands.py
tools/sh_commands.py
+152
-119
tools/validate.py
tools/validate.py
+152
-155
tools/wino_conv.py
tools/wino_conv.py
+152
-156
未找到文件。
.gitlab-ci.yml
浏览文件 @
6da30d22
stages
:
-
cpplint
-
pycodestyle
-
ops_test
-
ops_benchmark
...
...
@@ -7,7 +8,12 @@ cpplint:
stage
:
cpplint
script
:
-
curl -o cpplint.py https://raw.githubusercontent.com/google/styleguide/gh-pages/cpplint/cpplint.py
-
python cpplint.py --linelength=80 --counting=detailed $(find mace -name *.h -or -name *.cc)
-
python cpplint.py --linelength=80 --counting=detailed $(find mace -name "*.h" -or -name "*.cc")
pycodestyle
:
stage
:
pycodestyle
script
:
-
pycodestyle $(find -name "*.py")
ops_test
:
stage
:
ops_test
...
...
docker/Dockerfile
浏览文件 @
6da30d22
...
...
@@ -113,7 +113,8 @@ RUN pip install -i http://pypi.douban.com/simple/ --trusted-host pypi.douban.com
scipy
\
jinja2
\
pyyaml
\
sh
sh
\
pycodestyle
# Download tensorflow tools
RUN
wget http://cnbj1-inner-fds.api.xiaomi.net/mace/tool/transform_graph
&&
\
...
...
mace/python/tools/binary_codegen.py
浏览文件 @
6da30d22
...
...
@@ -27,28 +27,30 @@ def generate_cpp_source():
print
"Generate binary from"
,
binary_path
idx
=
0
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
idx
+=
8
for
_
in
xrange
(
size
):
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
idx
+=
key_size
params_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
params_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
data_map
[
key
]
=
[]
count
=
params_size
/
4
params
=
struct
.
unpack
(
str
(
count
)
+
"i"
,
binary_array
[
idx
:
idx
+
params_size
])
params
=
struct
.
unpack
(
str
(
count
)
+
"i"
,
binary_array
[
idx
:
idx
+
params_size
])
for
i
in
params
:
data_map
[
key
].
append
(
i
)
idx
+=
params_size
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
return
env
.
get_template
(
'str2vec_maps.cc.jinja2'
).
render
(
maps
=
data_map
,
data_type
=
'unsigned int'
,
variable_name
=
FLAGS
.
variable_name
)
maps
=
data_map
,
data_type
=
'unsigned int'
,
variable_name
=
FLAGS
.
variable_name
)
def
main
(
unused_args
):
cpp_binary_source
=
generate_cpp_source
()
...
...
@@ -58,14 +60,12 @@ def main(unused_args):
w_file
.
write
(
cpp_binary_source
)
w_file
.
close
()
def
parse_args
():
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
parser
.
add_argument
(
"--binary_dirs"
,
type
=
str
,
default
=
""
,
help
=
"The binaries file path."
)
"--binary_dirs"
,
type
=
str
,
default
=
""
,
help
=
"The binaries file path."
)
parser
.
add_argument
(
"--binary_file_name"
,
type
=
str
,
...
...
@@ -75,7 +75,8 @@ def parse_args():
"--output_path"
,
type
=
str
,
default
=
""
,
help
=
"The path of generated C++ source file which contains the binary."
)
help
=
"The path of generated C++ source file which contains the binary."
)
parser
.
add_argument
(
"--variable_name"
,
type
=
str
,
...
...
mace/python/tools/caffe_converter_lib.py
浏览文件 @
6da30d22
此差异已折叠。
点击以展开。
mace/python/tools/convert_util.py
浏览文件 @
6da30d22
...
...
@@ -26,4 +26,3 @@ def tf_dtype_2_mace_dtype(tf_dtype):
if
not
mace_dtype
:
raise
Exception
(
"Not supported tensorflow dtype: "
+
tf_dtype
)
return
mace_dtype
mace/python/tools/converter.py
浏览文件 @
6da30d22
...
...
@@ -4,10 +4,14 @@ import hashlib
import
os.path
from
mace.python.tools
import
source_converter_lib
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb --output quantized_test_dsp.pb --runtime dsp --input_dim input_node,1,28,28,3
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
# --output quantized_test_dsp.pb \
# --runtime dsp \
# --input_dim input_node,1,28,28,3
FLAGS
=
None
def
file_checksum
(
fname
):
hash_func
=
hashlib
.
sha256
()
with
open
(
fname
,
"rb"
)
as
f
:
...
...
@@ -15,6 +19,7 @@ def file_checksum(fname):
hash_func
.
update
(
chunk
)
return
hash_func
.
hexdigest
()
def
main
(
unused_args
):
if
not
os
.
path
.
isfile
(
FLAGS
.
model_file
):
print
(
"Input graph file '"
+
FLAGS
.
model_file
+
"' does not exist!"
)
...
...
@@ -22,17 +27,21 @@ def main(unused_args):
model_checksum
=
file_checksum
(
FLAGS
.
model_file
)
if
FLAGS
.
model_checksum
!=
""
and
FLAGS
.
model_checksum
!=
model_checksum
:
print
(
"Model checksum mismatch: %s != %s"
%
(
model_checksum
,
FLAGS
.
model_checksum
))
print
(
"Model checksum mismatch: %s != %s"
%
(
model_checksum
,
FLAGS
.
model_checksum
))
sys
.
exit
(
-
1
)
if
FLAGS
.
platform
==
'caffe'
:
if
not
os
.
path
.
isfile
(
FLAGS
.
weight_file
):
print
(
"Input weight file '"
+
FLAGS
.
weight_file
+
"' does not exist!"
)
print
(
"Input weight file '"
+
FLAGS
.
weight_file
+
"' does not exist!"
)
sys
.
exit
(
-
1
)
weight_checksum
=
file_checksum
(
FLAGS
.
weight_file
)
if
FLAGS
.
weight_checksum
!=
""
and
FLAGS
.
weight_checksum
!=
weight_checksum
:
print
(
"Weight checksum mismatch: %s != %s"
%
(
weight_checksum
,
FLAGS
.
weight_checksum
))
if
FLAGS
.
weight_checksum
!=
""
and
\
FLAGS
.
weight_checksum
!=
weight_checksum
:
print
(
"Weight checksum mismatch: %s != %s"
%
(
weight_checksum
,
FLAGS
.
weight_checksum
))
sys
.
exit
(
-
1
)
if
FLAGS
.
runtime
==
'dsp'
:
...
...
@@ -41,22 +50,27 @@ def main(unused_args):
from
mace.python.tools
import
caffe_converter_lib
output_graph_def
=
caffe_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
weight_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
FLAGS
.
model_file
,
FLAGS
.
weight_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
elif
FLAGS
.
platform
==
'tensorflow'
:
if
FLAGS
.
runtime
==
'dsp'
:
from
mace.python.tools
import
tf_dsp_converter_lib
output_graph_def
=
tf_dsp_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
output_node
,
FLAGS
.
dsp_mode
)
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
output_node
,
FLAGS
.
dsp_mode
)
else
:
from
mace.python.tools
import
tf_converter_lib
output_graph_def
=
tf_converter_lib
.
convert_to_mace_pb
(
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
FLAGS
.
model_file
,
FLAGS
.
input_node
,
FLAGS
.
input_shape
,
FLAGS
.
output_node
,
FLAGS
.
data_type
,
FLAGS
.
runtime
,
FLAGS
.
winograd
)
if
FLAGS
.
output_type
==
'source'
:
source_converter_lib
.
convert_to_source
(
output_graph_def
,
model_checksum
,
FLAGS
.
template
,
FLAGS
.
obfuscate
,
FLAGS
.
model_tag
,
FLAGS
.
output
,
FLAGS
.
runtime
,
FLAGS
.
embed_model_data
)
source_converter_lib
.
convert_to_source
(
output_graph_def
,
model_checksum
,
FLAGS
.
template
,
FLAGS
.
obfuscate
,
FLAGS
.
model_tag
,
FLAGS
.
output
,
FLAGS
.
runtime
,
FLAGS
.
embed_model_data
)
else
:
with
open
(
FLAGS
.
output
,
"wb"
)
as
f
:
f
.
write
(
output_graph_def
.
SerializeToString
())
...
...
@@ -65,6 +79,7 @@ def main(unused_args):
f
.
write
(
str
(
output_graph_def
))
print
(
"Model conversion is completed."
)
def
str2bool
(
v
):
if
v
.
lower
()
in
(
'yes'
,
'true'
,
't'
,
'y'
,
'1'
):
return
True
...
...
@@ -73,6 +88,7 @@ def str2bool(v):
else
:
raise
argparse
.
ArgumentTypeError
(
'Boolean value expected.'
)
def
parse_args
():
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
...
...
@@ -81,12 +97,10 @@ def parse_args():
"--model_file"
,
type
=
str
,
default
=
""
,
help
=
"TensorFlow
\'
GraphDef
\'
file to load, Caffe prototxt file to load."
)
help
=
"TensorFlow
\'
GraphDef
\'
file to load, "
"Caffe prototxt file to load."
)
parser
.
add_argument
(
"--weight_file"
,
type
=
str
,
default
=
""
,
help
=
"Caffe data file to load."
)
"--weight_file"
,
type
=
str
,
default
=
""
,
help
=
"Caffe data file to load."
)
parser
.
add_argument
(
"--model_checksum"
,
type
=
str
,
...
...
@@ -103,35 +117,23 @@ def parse_args():
default
=
""
,
help
=
"File to save the output graph to."
)
parser
.
add_argument
(
"--runtime"
,
type
=
str
,
default
=
"cpu"
,
help
=
"Runtime: cpu/gpu/dsp"
)
"--runtime"
,
type
=
str
,
default
=
"cpu"
,
help
=
"Runtime: cpu/gpu/dsp"
)
parser
.
add_argument
(
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"e.g., input_node"
)
parser
.
add_argument
(
"--output_node"
,
type
=
str
,
default
=
"softmax"
,
help
=
"e.g., softmax"
)
"--output_node"
,
type
=
str
,
default
=
"softmax"
,
help
=
"e.g., softmax"
)
parser
.
add_argument
(
"--data_type"
,
type
=
str
,
default
=
'DT_FLOAT'
,
help
=
"e.g., DT_HALF/DT_FLOAT"
)
parser
.
add_argument
(
"--output_type"
,
type
=
str
,
default
=
"pb"
,
help
=
"output type: source/pb"
)
"--output_type"
,
type
=
str
,
default
=
"pb"
,
help
=
"output type: source/pb"
)
parser
.
add_argument
(
"--template"
,
type
=
str
,
default
=
""
,
help
=
"template path"
)
"--template"
,
type
=
str
,
default
=
""
,
help
=
"template path"
)
parser
.
add_argument
(
"--obfuscate"
,
type
=
str2bool
,
...
...
@@ -152,25 +154,13 @@ def parse_args():
default
=
False
,
help
=
"open winograd convolution or not"
)
parser
.
add_argument
(
"--dsp_mode"
,
type
=
int
,
default
=
0
,
help
=
"dsp run mode, defalut=0"
)
"--dsp_mode"
,
type
=
int
,
default
=
0
,
help
=
"dsp run mode, defalut=0"
)
parser
.
add_argument
(
"--input_shape"
,
type
=
str
,
default
=
""
,
help
=
"input shape."
)
"--input_shape"
,
type
=
str
,
default
=
""
,
help
=
"input shape."
)
parser
.
add_argument
(
"--platform"
,
type
=
str
,
default
=
"tensorflow"
,
help
=
"tensorflow/caffe"
)
"--platform"
,
type
=
str
,
default
=
"tensorflow"
,
help
=
"tensorflow/caffe"
)
parser
.
add_argument
(
"--embed_model_data"
,
type
=
str2bool
,
default
=
True
,
help
=
"input shape."
)
"--embed_model_data"
,
type
=
str2bool
,
default
=
True
,
help
=
"input shape."
)
return
parser
.
parse_known_args
()
...
...
mace/python/tools/dsp_ops.py
浏览文件 @
6da30d22
class
DspOps
(
object
):
def
__init__
(
self
):
self
.
dsp_ops
=
{
...
...
@@ -18,7 +17,7 @@ class DspOps(object):
'QuantizedAvgPool'
:
'QuantizedAvgPool_8'
,
'QuantizedConcat'
:
'QuantizedConcat_8'
,
'QuantizedBiasAdd'
:
'QuantizedBiasAdd_8p8to32'
,
'QuantizedResizeBilinear'
:
'QuantizedResizeBilinear_8'
,
'QuantizedResizeBilinear'
:
'QuantizedResizeBilinear_8'
,
'QuantizedSpaceToBatchND'
:
'QuantizedSpaceToBatchND_8'
,
'QuantizedBatchToSpaceND'
:
'QuantizedBatchToSpaceND_8'
,
'QuantizedSoftmax'
:
'QuantizedSoftmax_8'
,
...
...
@@ -54,6 +53,7 @@ class DspOps(object):
'Concat'
:
'Concat_f'
,
'AddN'
:
'AddN_f'
,
}
def
has_op
(
self
,
tf_op
):
return
tf_op
in
self
.
dsp_ops
...
...
@@ -61,5 +61,3 @@ class DspOps(object):
if
tf_op
not
in
self
.
dsp_ops
:
raise
Exception
(
'Could not map nn op for: '
,
tf_op
)
return
self
.
dsp_ops
[
tf_op
]
mace/python/tools/encrypt_opencl_codegen.py
浏览文件 @
6da30d22
...
...
@@ -11,10 +11,13 @@ FLAGS = None
encrypt_lookup_table
=
"Xiaomi-AI-Platform-Mace"
def
encrypt_code
(
code_str
):
encrypted_arr
=
[]
for
i
in
range
(
len
(
code_str
)):
encrypted_char
=
hex
(
ord
(
code_str
[
i
])
^
ord
(
encrypt_lookup_table
[
i
%
len
(
encrypt_lookup_table
)]))
encrypted_char
=
hex
(
ord
(
code_str
[
i
])
^
ord
(
encrypt_lookup_table
[
i
%
len
(
encrypt_lookup_table
)]))
encrypted_arr
.
append
(
encrypted_char
)
return
encrypted_arr
...
...
@@ -45,7 +48,8 @@ def main(unused_args):
encrypted_code_maps
[
file_name
[:
-
3
]]
=
encrypted_code_arr
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
cpp_cl_encrypted_kernel
=
env
.
get_template
(
'str2vec_maps.cc.jinja2'
).
render
(
cpp_cl_encrypted_kernel
=
env
.
get_template
(
'str2vec_maps.cc.jinja2'
).
render
(
maps
=
encrypted_code_maps
,
data_type
=
'unsigned char'
,
variable_name
=
'kEncryptedProgramMap'
)
...
...
mace/python/tools/graph_util.py
浏览文件 @
6da30d22
...
...
@@ -2,18 +2,21 @@ import tensorflow as tf
from
mace.proto
import
mace_pb2
from
collections
import
OrderedDict
def
sort_tf_node
(
node
,
nodes_map
,
ordered_nodes_map
):
if
node
.
name
not
in
ordered_nodes_map
:
for
input_tensor_name
in
node
.
input
:
input_node_name
=
input_tensor_name
.
split
(
':'
)[
0
]
if
':'
in
input_tensor_name
else
input_tensor_name
if
input_node_name
not
in
nodes_map
or
input_node_name
in
ordered_nodes_map
:
if
input_node_name
not
in
nodes_map
or
\
input_node_name
in
ordered_nodes_map
:
continue
input_node
=
nodes_map
[
input_node_name
]
sort_tf_node
(
input_node
,
nodes_map
,
ordered_nodes_map
)
ordered_nodes_map
[
node
.
name
]
=
node
def
sort_tf_graph
(
graph_def
):
nodes_map
=
{}
ordered_nodes_map
=
OrderedDict
()
...
...
@@ -31,13 +34,15 @@ def sort_mace_node(node, nodes_map, ordered_nodes_map):
for
input_tensor_name
in
node
.
input
:
input_node_name
=
input_tensor_name
.
split
(
':'
)[
0
]
if
':'
in
input_tensor_name
else
input_tensor_name
if
input_node_name
not
in
nodes_map
or
input_node_name
in
ordered_nodes_map
:
if
input_node_name
not
in
nodes_map
or
\
input_node_name
in
ordered_nodes_map
:
continue
input_node
=
nodes_map
[
input_node_name
]
sort_mace_node
(
input_node
,
nodes_map
,
ordered_nodes_map
)
ordered_nodes_map
[
node
.
name
]
=
node
def
sort_mace_graph
(
graph_def
,
output_name
):
nodes_map
=
{}
ordered_nodes_map
=
OrderedDict
()
...
...
mace/python/tools/memory_optimizer.py
浏览文件 @
6da30d22
...
...
@@ -2,6 +2,7 @@ import sys
import
operator
from
mace.proto
import
mace_pb2
class
MemoryOptimizer
(
object
):
def
__init__
(
self
,
net_def
):
self
.
net_def
=
net_def
...
...
@@ -37,9 +38,9 @@ class MemoryOptimizer(object):
mem_size
=
[
0
,
0
]
if
op_type
==
'WinogradTransform'
or
op_type
==
'MatMul'
:
mem_size
[
0
]
=
output_shape
[
2
]
*
output_shape
[
3
]
mem_size
[
1
]
=
output_shape
[
0
]
*
int
((
output_shape
[
1
]
+
3
)
/
4
)
mem_size
[
1
]
=
output_shape
[
0
]
*
int
((
output_shape
[
1
]
+
3
)
/
4
)
else
:
mem_size
[
0
]
=
output_shape
[
2
]
*
int
((
output_shape
[
3
]
+
3
)
/
4
)
mem_size
[
0
]
=
output_shape
[
2
]
*
int
((
output_shape
[
3
]
+
3
)
/
4
)
mem_size
[
1
]
=
output_shape
[
0
]
*
output_shape
[
1
]
return
mem_size
...
...
@@ -51,13 +52,16 @@ class MemoryOptimizer(object):
if
self
.
is_buffer_image_op
(
op
):
continue
if
not
op
.
output_shape
:
print
(
'WARNING: There is no output shape information to do memory optimization.'
)
print
(
'WARNING: There is no output shape information to '
'do memory optimization.'
)
return
if
len
(
op
.
output_shape
)
!=
len
(
op
.
output
):
print
(
'WARNING: the number of output shape is not equal to the number of output.'
)
print
(
'WARNING: the number of output shape is not equal to '
'the number of output.'
)
return
for
i
in
range
(
len
(
op
.
output
)):
op_mem_size
=
self
.
get_mem_size
(
op
.
type
,
op
.
output_shape
[
i
].
dims
)
op_mem_size
=
self
.
get_mem_size
(
op
.
type
,
op
.
output_shape
[
i
].
dims
)
mem_id
=
-
1
if
len
(
self
.
idle_mem
)
>
0
:
best_mem_candidate_id
=
-
1
...
...
@@ -65,16 +69,22 @@ class MemoryOptimizer(object):
best_mem_candidate_shape
=
[]
for
mid
in
self
.
idle_mem
:
reuse_mem_size
=
self
.
mem_block
[
mid
]
resize_mem_size
=
[
max
(
reuse_mem_size
[
0
],
op_mem_size
[
0
]),
max
(
reuse_mem_size
[
1
],
op_mem_size
[
1
])]
delta_mem_area
=
self
.
mem_area
(
resize_mem_size
)
-
self
.
mem_area
(
reuse_mem_size
)
resize_mem_size
=
[
max
(
reuse_mem_size
[
0
],
op_mem_size
[
0
]),
max
(
reuse_mem_size
[
1
],
op_mem_size
[
1
])
]
delta_mem_area
=
self
.
mem_area
(
resize_mem_size
)
-
self
.
mem_area
(
reuse_mem_size
)
if
delta_mem_area
<
best_mem_candidate_delta_area
:
best_mem_candidate_id
=
mid
best_mem_candidate_delta_area
=
delta_mem_area
best_mem_candidate_shape
=
resize_mem_size
if
best_mem_candidate_delta_area
<=
self
.
mem_area
(
op_mem_size
):
if
best_mem_candidate_delta_area
<=
self
.
mem_area
(
op_mem_size
):
# reuse
self
.
mem_block
[
best_mem_candidate_id
]
=
best_mem_candidate_shape
self
.
mem_block
[
best_mem_candidate_id
]
=
best_mem_candidate_shape
mem_id
=
best_mem_candidate_id
self
.
idle_mem
.
remove
(
mem_id
)
...
...
@@ -113,7 +123,8 @@ class MemoryOptimizer(object):
print
mem
,
self
.
mem_block
[
mem
]
optimized_mem_size
+=
reduce
(
operator
.
mul
,
self
.
mem_block
[
mem
],
4
)
print
(
'origin mem: %d, optimized mem: %d'
,
origin_mem_size
,
optimized_mem_size
)
print
(
'origin mem: %d, optimized mem: %d'
,
origin_mem_size
,
optimized_mem_size
)
def
optimize_memory
(
net_def
):
...
...
mace/python/tools/opencl_codegen.py
浏览文件 @
6da30d22
...
...
@@ -27,37 +27,40 @@ def generate_cpp_source():
binary_array
=
np
.
fromfile
(
f
,
dtype
=
np
.
uint8
)
idx
=
0
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
size
,
=
struct
.
unpack
(
"Q"
,
binary_array
[
idx
:
idx
+
8
])
idx
+=
8
for
_
in
xrange
(
size
):
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
key_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
key
,
=
struct
.
unpack
(
str
(
key_size
)
+
"s"
,
binary_array
[
idx
:
idx
+
key_size
])
idx
+=
key_size
value_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
value_size
,
=
struct
.
unpack
(
"i"
,
binary_array
[
idx
:
idx
+
4
])
idx
+=
4
maps
[
key
]
=
[]
value
=
struct
.
unpack
(
str
(
value_size
)
+
"B"
,
binary_array
[
idx
:
idx
+
value_size
])
value
=
struct
.
unpack
(
str
(
value_size
)
+
"B"
,
binary_array
[
idx
:
idx
+
value_size
])
idx
+=
value_size
for
ele
in
value
:
maps
[
key
].
append
(
hex
(
ele
))
cl_platform_info_path
=
os
.
path
.
join
(
binary_dir
,
FLAGS
.
platform_info_file_name
)
cl_platform_info_path
=
os
.
path
.
join
(
binary_dir
,
FLAGS
.
platform_info_file_name
)
with
open
(
cl_platform_info_path
,
'r'
)
as
f
:
curr_platform_info
=
f
.
read
()
if
platform_info
!=
""
:
assert
(
curr_platform_info
==
platform_info
)
assert
(
curr_platform_info
==
platform_info
)
platform_info
=
curr_platform_info
env
=
jinja2
.
Environment
(
loader
=
jinja2
.
FileSystemLoader
(
sys
.
path
[
0
]))
return
env
.
get_template
(
'opencl_compiled_kernel.cc.jinja2'
).
render
(
maps
=
maps
,
data_type
=
'unsigned char'
,
variable_name
=
'kCompiledProgramMap'
,
platform_info
=
platform_info
,
maps
=
maps
,
data_type
=
'unsigned char'
,
variable_name
=
'kCompiledProgramMap'
,
platform_info
=
platform_info
,
)
def
main
(
unused_args
):
cpp_cl_binary_source
=
generate_cpp_source
()
...
...
@@ -90,7 +93,7 @@ def parse_args():
"--output_path"
,
type
=
str
,
default
=
"./mace/examples/codegen/opencl/opencl_compiled_program.cc"
,
help
=
"The path of generated C++ header file which contains
cl binaries."
)
help
=
"The path of generated C++ header file for
cl binaries."
)
return
parser
.
parse_known_args
()
...
...
mace/python/tools/source_converter_lib.py
浏览文件 @
6da30d22
...
...
@@ -6,9 +6,9 @@ import hashlib
from
mace.proto
import
mace_pb2
from
jinja2
import
Environment
,
FileSystemLoader
GENERATED_NAME
=
set
()
def
generate_obfuscated_name
(
namespace
,
name
):
md5
=
hashlib
.
md5
()
md5
.
update
(
namespace
)
...
...
@@ -22,31 +22,36 @@ def generate_obfuscated_name(namespace, name):
GENERATED_NAME
.
add
(
name
)
return
name
def
generate_tensor_map
(
tensors
):
tensor_map
=
{}
for
t
in
tensors
:
if
not
tensor_map
.
has_key
(
t
.
name
)
:
if
t
.
name
not
in
tensor_map
:
tensor_map
[
t
.
name
]
=
generate_obfuscated_name
(
"tensor"
,
t
.
name
)
return
tensor_map
def
generate_in_out_map
(
ops
,
tensor_map
):
in_out_map
=
{}
for
op
in
ops
:
op
.
name
=
generate_obfuscated_name
(
"op"
,
op
.
name
)
for
input_name
in
op
.
input
:
if
not
in_out_map
.
has_key
(
input_name
)
:
if
tensor_map
.
has_key
(
input_name
)
:
if
input_name
not
in
in_out_map
:
if
input_name
in
tensor_map
:
in_out_map
[
input_name
]
=
tensor_map
[
input_name
]
else
:
in_out_map
[
input_name
]
=
generate_obfuscated_name
(
"in"
,
input_name
)
in_out_map
[
input_name
]
=
generate_obfuscated_name
(
"in"
,
input_name
)
for
output_name
in
op
.
output
:
if
not
in_out_map
.
has_key
(
output_name
)
:
if
tensor_map
.
has_key
(
output_name
)
:
if
output_name
not
in
in_out_map
:
if
output_name
in
tensor_map
:
in_out_map
[
output_name
]
=
tensor_map
[
output_name
]
else
:
in_out_map
[
output_name
]
=
generate_obfuscated_name
(
"out"
,
output_name
)
in_out_map
[
output_name
]
=
generate_obfuscated_name
(
"out"
,
output_name
)
return
in_out_map
def
obfuscate_name
(
net_def
):
input_node
=
"mace_input_node"
output_node
=
"mace_output_node"
...
...
@@ -63,20 +68,22 @@ def obfuscate_name(net_def):
if
output_node
not
in
op
.
output
[
i
]:
op
.
output
[
i
]
=
in_out_map
[
op
.
output
[
i
]]
def
rename_tensor
(
net_def
):
tensor_map
=
{}
for
t
in
net_def
.
tensors
:
if
not
tensor_map
.
has_key
(
t
.
name
)
:
if
t
.
name
not
in
tensor_map
:
tensor_map
[
t
.
name
]
=
"_"
+
t
.
name
[:
-
2
].
replace
(
"/"
,
"_"
)
t
.
name
=
tensor_map
[
t
.
name
]
for
op
in
net_def
.
op
:
for
i
in
range
(
len
(
op
.
input
)):
if
tensor_map
.
has_key
(
op
.
input
[
i
])
:
if
op
.
input
[
i
]
in
tensor_map
:
op
.
input
[
i
]
=
tensor_map
[
op
.
input
[
i
]]
for
i
in
range
(
len
(
op
.
output
)):
if
tensor_map
.
has_key
(
op
.
output
[
i
])
:
if
op
.
output
[
i
]
in
tensor_map
:
op
.
output
[
i
]
=
tensor_map
[
op
.
output
[
i
]]
class
TensorInfo
:
def
__init__
(
self
,
id
,
t
,
runtime
):
self
.
id
=
id
...
...
@@ -84,19 +91,26 @@ class TensorInfo:
if
t
.
data_type
==
mace_pb2
.
DT_FLOAT
:
if
runtime
==
'gpu'
:
self
.
data_type
=
mace_pb2
.
DT_HALF
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float16
).
tobytes
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float16
).
tobytes
())
else
:
self
.
data_type
=
mace_pb2
.
DT_FLOAT
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float32
).
tobytes
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
float_data
).
astype
(
np
.
float32
).
tobytes
())
elif
t
.
data_type
==
mace_pb2
.
DT_INT32
:
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
int32
).
tobytes
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
int32
).
tobytes
())
elif
t
.
data_type
==
mace_pb2
.
DT_UINT8
:
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
uint8
).
tolist
())
self
.
data
=
bytearray
(
np
.
array
(
t
.
int32_data
).
astype
(
np
.
uint8
).
tolist
())
def
stringfy
(
value
):
return
', '
.
join
(
'"{0}"'
.
format
(
w
)
for
w
in
value
)
def
convert_to_source
(
net_def
,
mode_pb_checksum
,
template_dir
,
obfuscate
,
model_tag
,
output
,
runtime
,
embed_model_data
):
def
convert_to_source
(
net_def
,
mode_pb_checksum
,
template_dir
,
obfuscate
,
model_tag
,
output
,
runtime
,
embed_model_data
):
if
obfuscate
:
obfuscate_name
(
net_def
)
else
:
...
...
@@ -106,7 +120,8 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
print
template_dir
# Create the jinja2 environment.
j2_env
=
Environment
(
loader
=
FileSystemLoader
(
template_dir
),
trim_blocks
=
True
)
j2_env
=
Environment
(
loader
=
FileSystemLoader
(
template_dir
),
trim_blocks
=
True
)
j2_env
.
filters
[
'stringfy'
]
=
stringfy
output_dir
=
os
.
path
.
dirname
(
output
)
+
'/'
# generate tensor source files
...
...
@@ -122,11 +137,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
model_data
.
extend
(
bytearray
([
0
]
*
padding
))
offset
+=
padding
source
=
j2_env
.
get_template
(
template_name
).
render
(
tensor_info
=
tensor_info
,
tensor
=
t
,
tag
=
model_tag
,
runtime
=
runtime
,
offset
=
offset
,
tensor_info
=
tensor_info
,
tensor
=
t
,
tag
=
model_tag
,
runtime
=
runtime
,
offset
=
offset
,
)
model_data
.
extend
(
tensor_info
.
data
)
offset
+=
len
(
tensor_info
.
data
)
...
...
@@ -137,11 +152,10 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate tensor data
template_name
=
'tensor_data.jinja2'
source
=
j2_env
.
get_template
(
template_name
).
render
(
tag
=
model_tag
,
embed_model_data
=
embed_model_data
,
model_data_size
=
offset
,
model_data
=
model_data
)
tag
=
model_tag
,
embed_model_data
=
embed_model_data
,
model_data_size
=
offset
,
model_data
=
model_data
)
with
open
(
output_dir
+
'tensor_data'
+
'.cc'
,
"wb"
)
as
f
:
f
.
write
(
source
)
if
not
embed_model_data
:
...
...
@@ -155,11 +169,11 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
op_size
=
len
(
net_def
.
op
)
for
start
in
range
(
0
,
op_size
,
10
):
source
=
j2_env
.
get_template
(
template_name
).
render
(
start
=
start
,
end
=
min
(
start
+
10
,
op_size
),
net
=
net_def
,
tag
=
model_tag
,
runtime
=
runtime
,
start
=
start
,
end
=
min
(
start
+
10
,
op_size
),
net
=
net_def
,
tag
=
model_tag
,
runtime
=
runtime
,
)
with
open
(
output_dir
+
'op'
+
str
(
counter
)
+
'.cc'
,
"wb"
)
as
f
:
f
.
write
(
source
)
...
...
@@ -167,21 +181,21 @@ def convert_to_source(net_def, mode_pb_checksum, template_dir, obfuscate, model_
# generate model source files
template_name
=
'model.jinja2'
tensors
=
[
TensorInfo
(
i
,
net_def
.
tensors
[
i
],
runtime
)
for
i
in
range
(
len
(
net_def
.
tensors
))]
tensors
=
[
TensorInfo
(
i
,
net_def
.
tensors
[
i
],
runtime
)
for
i
in
range
(
len
(
net_def
.
tensors
))
]
source
=
j2_env
.
get_template
(
template_name
).
render
(
tensors
=
tensors
,
net
=
net_def
,
tag
=
model_tag
,
runtime
=
runtime
,
model_pb_checksum
=
mode_pb_checksum
)
tensors
=
tensors
,
net
=
net_def
,
tag
=
model_tag
,
runtime
=
runtime
,
model_pb_checksum
=
mode_pb_checksum
)
with
open
(
output
,
"wb"
)
as
f
:
f
.
write
(
source
)
# generate model header file
template_name
=
'model_header.jinja2'
source
=
j2_env
.
get_template
(
template_name
).
render
(
tag
=
model_tag
,
)
source
=
j2_env
.
get_template
(
template_name
).
render
(
tag
=
model_tag
,
)
with
open
(
output_dir
+
model_tag
+
'.h'
,
"wb"
)
as
f
:
f
.
write
(
source
)
mace/python/tools/tf_converter_lib.py
浏览文件 @
6da30d22
此差异已折叠。
点击以展开。
mace/python/tools/tf_dsp_converter_lib.py
浏览文件 @
6da30d22
此差异已折叠。
点击以展开。
mace/python/tools/tf_ops_stats.py
浏览文件 @
6da30d22
...
...
@@ -10,18 +10,21 @@ from tensorflow import gfile
FLAGS
=
None
def
hist_inc
(
hist
,
key
):
if
key
in
hist
:
hist
[
key
]
+=
1
else
:
hist
[
key
]
=
1
def
to_int_list
(
long_list
):
int_list
=
[]
for
value
in
long_list
:
int_list
.
append
(
int
(
value
))
return
int_list
def
main
(
unused_args
):
if
not
FLAGS
.
input
or
not
gfile
.
Exists
(
FLAGS
.
input
):
print
(
'Input graph file '
+
FLAGS
.
input
+
' does not exist!'
)
...
...
@@ -49,7 +52,9 @@ def main(unused_args):
tensor
=
output
.
eval
()
tensor_shape
=
list
(
tensor
.
shape
)
tensor_shapes
[
tensor_name
]
=
tensor_shape
print
(
"Const %s: %s, %d"
%
(
tensor_name
,
tensor_shape
,
functools
.
reduce
(
operator
.
mul
,
tensor_shape
,
1
)))
print
(
"Const %s: %s, %d"
%
(
tensor_name
,
tensor_shape
,
functools
.
reduce
(
operator
.
mul
,
tensor_shape
,
1
)))
if
len
(
tensor_shape
)
==
1
and
tensor_shape
[
0
]
<
10
:
tensor_values
[
tensor_name
]
=
list
(
tensor
)
...
...
@@ -65,11 +70,16 @@ def main(unused_args):
if
input_name
.
endswith
(
'weights/read:0'
):
ksize
=
input
.
shape
.
as_list
()
break
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
ksize
=
tensor_shapes
[
input_name
]
break
print
(
'%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
,
op
.
inputs
[
0
].
shape
,
op
.
outputs
[
0
].
shape
))
key
=
'%s(padding=%s, strides=%s, ksize=%s, format=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
)
print
(
'%s(padding=%s, strides=%s, ksize=%s, format=%s) %s => %s'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
,
op
.
inputs
[
0
].
shape
,
op
.
outputs
[
0
].
shape
))
key
=
'%s(padding=%s, strides=%s, ksize=%s, format=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
data_format
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'FusedResizeAndPadConv2D'
]:
padding
=
op
.
get_attr
(
'padding'
)
...
...
@@ -78,20 +88,25 @@ def main(unused_args):
ksize
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
if
input_name
.
endswith
(
'weights:0'
)
and
input_name
in
tensor_shapes
:
ksize
=
tensor_shapes
[
input_name
]
break
key
=
'%s(padding=%s, strides=%s, ksize=%s, resize_align_corners=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
resize_align_corners
)
key
=
'%s(padding=%s, strides=%s, ksize=%s, '
\
'resize_align_corners=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
,
resize_align_corners
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'ResizeBilinear'
]:
align_corners
=
op
.
get_attr
(
'align_corners'
)
size
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'size:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'size:0'
)
and
input_name
in
tensor_values
:
size
=
tensor_values
[
input_name
]
break
key
=
'%s(size=%s, align_corners=%s)'
%
(
op
.
type
,
size
,
align_corners
)
key
=
'%s(size=%s, align_corners=%s)'
%
(
op
.
type
,
size
,
align_corners
)
print
(
key
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'AvgPool'
,
'MaxPool'
]:
...
...
@@ -99,38 +114,47 @@ def main(unused_args):
strides
=
to_int_list
(
op
.
get_attr
(
'strides'
))
ksize
=
to_int_list
(
op
.
get_attr
(
'ksize'
))
data_format
=
op
.
get_attr
(
'data_format'
)
key
=
'%s(padding=%s, strides=%s, ksize=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
)
key
=
'%s(padding=%s, strides=%s, ksize=%s)'
%
(
op
.
type
,
padding
,
strides
,
ksize
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'SpaceToBatchND'
,
'BatchToSpaceND'
]:
block_shape
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'block_shape:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'block_shape:0'
)
and
input_name
in
tensor_values
:
block_shape
=
tensor_values
[
input_name
]
break
paddings
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
paddings
=
tensor_values
[
input_name
]
break
crops
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'crops:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'crops:0'
)
and
input_name
in
tensor_values
:
paddings
=
tensor_values
[
input_name
]
break
if
op
.
type
==
'SpaceToBatchND'
:
key
=
'%s(block_shape=%s, paddings=%s)'
%
(
op
.
type
,
block_shape
,
paddings
)
key
=
'%s(block_shape=%s, paddings=%s)'
%
(
op
.
type
,
block_shape
,
paddings
)
else
:
key
=
'%s(block_shape=%s, crops=%s)'
%
(
op
.
type
,
block_shape
,
crops
)
key
=
'%s(block_shape=%s, crops=%s)'
%
(
op
.
type
,
block_shape
,
crops
)
print
(
key
)
hist_inc
(
stats
,
key
)
elif
op
.
type
==
'Pad'
:
paddings
=
'Unknown'
for
input
in
op
.
inputs
:
input_name
=
input
.
name
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
if
input_name
.
endswith
(
'paddings:0'
)
and
input_name
in
tensor_values
:
paddings
=
tensor_values
[
input_name
]
break
key
=
'%s(paddings=%s)'
%
(
op
.
type
,
paddings
)
...
...
@@ -142,6 +166,7 @@ def main(unused_args):
for
key
,
value
in
sorted
(
six
.
iteritems
(
stats
)):
print
(
'%s: %d'
%
(
key
,
value
))
def
parse_args
():
'''Parses command line arguments.'''
parser
=
argparse
.
ArgumentParser
()
...
...
@@ -152,6 +177,7 @@ def parse_args():
help
=
'TensorFlow
\'
GraphDef
\'
file to load.'
)
return
parser
.
parse_known_args
()
if
__name__
==
'__main__'
:
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/bazel_adb_run.py
浏览文件 @
6da30d22
...
...
@@ -7,7 +7,6 @@
# --target=//mace/ops:ops_test
# --stdout_processor=stdout_processor
import
argparse
import
random
import
re
...
...
@@ -15,15 +14,18 @@ import sys
import
sh_commands
def
stdout_processor
(
stdout
,
device_properties
,
abi
):
pass
def
ops_test_stdout_processor
(
stdout
,
device_properties
,
abi
):
stdout_lines
=
stdout
.
split
(
"
\n
"
)
for
line
in
stdout_lines
:
if
"Aborted"
in
line
or
"FAILED"
in
line
:
raise
Exception
(
"Command failed"
)
def
ops_benchmark_stdout_processor
(
stdout
,
device_properties
,
abi
):
stdout_lines
=
stdout
.
split
(
"
\n
"
)
metrics
=
{}
...
...
@@ -33,17 +35,20 @@ def ops_benchmark_stdout_processor(stdout, device_properties, abi):
line
=
line
.
strip
()
parts
=
line
.
split
()
if
len
(
parts
)
==
5
and
parts
[
0
].
startswith
(
"BM_"
):
metrics
[
"%s.time_ms"
%
parts
[
0
]]
=
str
(
float
(
parts
[
1
])
/
1e6
)
metrics
[
"%s.time_ms"
%
parts
[
0
]]
=
str
(
float
(
parts
[
1
])
/
1e6
)
metrics
[
"%s.input_mb_per_sec"
%
parts
[
0
]]
=
parts
[
3
]
metrics
[
"%s.gmacc_per_sec"
%
parts
[
0
]]
=
parts
[
4
]
platform
=
device_properties
[
"ro.board.platform"
].
replace
(
" "
,
"-"
)
model
=
device_properties
[
"ro.product.model"
].
replace
(
" "
,
"-"
)
tags
=
{
"ro.board.platform"
:
platform
,
tags
=
{
"ro.board.platform"
:
platform
,
"ro.product.model"
:
model
,
"abi"
:
abi
}
sh_commands
.
falcon_push_metrics
(
metrics
,
tags
=
tags
,
endpoint
=
"mace_ops_benchmark"
)
"abi"
:
abi
}
sh_commands
.
falcon_push_metrics
(
metrics
,
tags
=
tags
,
endpoint
=
"mace_ops_benchmark"
)
def
parse_args
():
"""Parses command line arguments."""
...
...
@@ -57,22 +62,16 @@ def parse_args():
"--target_socs"
,
type
=
str
,
default
=
"all"
,
help
=
"SoCs(ro.board.platform) to build, comma seperated list or all/random"
)
help
=
"SoCs (ro.board.platform from getprop) to build, "
"comma seperated list or all/random"
)
parser
.
add_argument
(
"--target"
,
type
=
str
,
default
=
"//..."
,
help
=
"Bazel target to build"
)
"--target"
,
type
=
str
,
default
=
"//..."
,
help
=
"Bazel target to build"
)
parser
.
add_argument
(
"--run_target"
,
type
=
bool
,
default
=
False
,
help
=
"Whether to run the target"
)
parser
.
add_argument
(
"--args"
,
type
=
str
,
default
=
""
,
help
=
"Command args"
)
parser
.
add_argument
(
"--args"
,
type
=
str
,
default
=
""
,
help
=
"Command args"
)
parser
.
add_argument
(
"--stdout_processor"
,
type
=
str
,
...
...
@@ -80,6 +79,7 @@ def parse_args():
help
=
"Stdout processing function, default: stdout_processor"
)
return
parser
.
parse_known_args
()
def
main
(
unused_args
):
target_socs
=
None
if
FLAGS
.
target_socs
!=
"all"
and
FLAGS
.
target_socs
!=
"random"
:
...
...
@@ -101,17 +101,25 @@ def main(unused_args):
sh_commands
.
bazel_build
(
target
,
abi
=
target_abi
)
if
FLAGS
.
run_target
:
for
serialno
in
target_devices
:
if
target_abi
not
in
set
(
sh_commands
.
adb_supported_abis
(
serialno
)):
print
(
"Skip device %s which does not support ABI %s"
%
(
serialno
,
target_abi
))
if
target_abi
not
in
set
(
sh_commands
.
adb_supported_abis
(
serialno
)):
print
(
"Skip device %s which does not support ABI %s"
%
(
serialno
,
target_abi
))
continue
stdouts
=
sh_commands
.
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
stdouts
=
sh_commands
.
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
args
=
FLAGS
.
args
,
opencl_profiling
=
1
,
vlog_level
=
0
,
device_bin_path
=
"/data/local/tmp/mace"
,
out_of_range_check
=
1
)
device_properties
=
sh_commands
.
adb_getprop_by_serialno
(
serialno
)
globals
()[
FLAGS
.
stdout_processor
](
stdouts
,
device_properties
,
target_abi
)
device_properties
=
sh_commands
.
adb_getprop_by_serialno
(
serialno
)
globals
()[
FLAGS
.
stdout_processor
](
stdouts
,
device_properties
,
target_abi
)
if
__name__
==
"__main__"
:
FLAGS
,
unparsed
=
parse_args
()
...
...
tools/falcon_cli.py
浏览文件 @
6da30d22
#-*- coding:utf8 -*-
import
json
import
socket
import
itertools
import
json
,
socket
,
itertools
class
FalconCli
(
object
):
def
__init__
(
self
,
addr
,
debug
=
True
,
buf_size
=
1000
):
self
.
socket_
=
socket
.
create_connection
(
addr
)
self
.
stream
=
self
.
socket_
.
makefile
()
...
...
@@ -16,16 +16,19 @@ class FalconCli(object):
self
.
stream
.
close
()
@
classmethod
def
connect
(
cls
,
server
=
"transfer.falcon.miliao.srv"
,
port
=
8433
,
debug
=
True
,
buf_size
=
1000
):
def
connect
(
cls
,
server
=
"transfer.falcon.miliao.srv"
,
port
=
8433
,
debug
=
True
,
buf_size
=
1000
):
try
:
return
FalconCli
((
server
,
port
),
debug
,
buf_size
)
except
socket
.
error
,
exc
:
print
"error: connect to %s:%s error: %s"
%
(
server
,
port
,
exc
)
print
"error: connect to %s:%s error: %s"
%
(
server
,
port
,
exc
)
def
call
(
self
,
name
,
*
params
):
request
=
dict
(
id
=
next
(
self
.
id_counter
),
params
=
list
(
params
),
method
=
name
)
request
=
dict
(
id
=
next
(
self
.
id_counter
),
params
=
list
(
params
),
method
=
name
)
payload
=
json
.
dumps
(
request
).
encode
()
if
self
.
debug
:
print
"--> req:"
,
payload
...
...
@@ -49,7 +52,7 @@ class FalconCli(object):
resp
=
[]
while
True
:
buf
=
lines
[
s
:
s
+
self
.
buf_size
]
buf
=
lines
[
s
:
s
+
self
.
buf_size
]
s
=
s
+
self
.
buf_size
if
len
(
buf
)
==
0
:
break
...
...
@@ -57,4 +60,3 @@ class FalconCli(object):
resp
.
append
(
r
)
return
resp
tools/generate_data.py
浏览文件 @
6da30d22
...
...
@@ -11,13 +11,16 @@ import re
# --input_file input_file
#
def
generate_data
(
name
,
shape
):
np
.
random
.
seed
()
data
=
np
.
random
.
random
(
shape
)
*
2
-
1
input_file_name
=
FLAGS
.
input_file
+
"_"
+
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
name
)
input_file_name
=
FLAGS
.
input_file
+
"_"
+
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
name
)
print
'Generate input file: '
,
input_file_name
data
.
astype
(
np
.
float32
).
tofile
(
input_file_name
)
def
main
(
unused_args
):
input_names
=
[
name
for
name
in
FLAGS
.
input_node
.
split
(
','
)]
input_shapes
=
[
shape
for
shape
in
FLAGS
.
input_shape
.
split
(
':'
)]
...
...
@@ -27,29 +30,21 @@ def main(unused_args):
generate_data
(
input_names
[
i
],
shape
)
print
"Generate input file done."
def
parse_args
():
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
parser
.
register
(
"type"
,
"bool"
,
lambda
v
:
v
.
lower
()
==
"true"
)
parser
.
add_argument
(
"--input_file"
,
type
=
str
,
default
=
""
,
help
=
"input file."
)
"--input_file"
,
type
=
str
,
default
=
""
,
help
=
"input file."
)
parser
.
add_argument
(
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
parser
.
add_argument
(
"--input_shape"
,
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
"--input_shape"
,
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
return
parser
.
parse_known_args
()
if
__name__
==
'__main__'
:
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/mace_tools.py
浏览文件 @
6da30d22
...
...
@@ -34,7 +34,8 @@ def run_command(command):
print
(
"Stderr msg:
\n
{}"
.
format
(
err
))
if
result
.
returncode
!=
0
:
raise
Exception
(
"Exit not 0 from bash with code: {}, command: {}"
.
format
(
raise
Exception
(
"Exit not 0 from bash with code: {}, command: {}"
.
format
(
result
.
returncode
,
command
))
...
...
@@ -63,10 +64,12 @@ def generate_version_code():
command
=
"bash tools/generate_version_code.sh"
run_command
(
command
)
def
generate_opencl_source_code
():
command
=
"bash tools/generate_opencl_code.sh source"
run_command
(
command
)
def
generate_opencl_binay_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
cl_bin_dirs
=
[]
for
d
in
model_output_dirs
:
...
...
@@ -79,6 +82,7 @@ def generate_opencl_binay_code(target_soc, model_output_dirs, pull_or_not):
'binary'
,
target_soc
,
cl_bin_dirs_str
,
int
(
pull_or_not
))
run_command
(
command
)
def
generate_tuning_param_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
cl_bin_dirs
=
[]
for
d
in
model_output_dirs
:
...
...
@@ -91,20 +95,24 @@ def generate_tuning_param_code(target_soc, model_output_dirs, pull_or_not):
target_soc
,
cl_bin_dirs_str
,
int
(
pull_or_not
))
run_command
(
command
)
def
generate_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
generate_opencl_binay_code
(
target_soc
,
model_output_dirs
,
pull_or_not
)
generate_tuning_param_code
(
target_soc
,
model_output_dirs
,
pull_or_not
)
def
clear_env
(
target_soc
):
command
=
"bash tools/clear_env.sh {}"
.
format
(
target_soc
)
run_command
(
command
)
def
input_file_name
(
input_name
):
return
os
.
environ
[
'INPUT_FILE_NAME'
]
+
'_'
+
\
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
input_name
)
def
generate_random_input
(
target_soc
,
model_output_dir
,
input_names
,
input_files
):
def
generate_random_input
(
target_soc
,
model_output_dir
,
input_names
,
input_files
):
generate_data_or_not
=
True
command
=
"bash tools/validate_tools.sh {} {} {}"
.
format
(
target_soc
,
model_output_dir
,
int
(
generate_data_or_not
))
...
...
@@ -122,16 +130,19 @@ def generate_random_input(target_soc, model_output_dir,
else
:
input_name_list
.
append
(
input_names
)
if
len
(
input_file_list
)
!=
len
(
input_name_list
):
raise
Exception
(
'If input_files set, the input files should match the input names.'
)
raise
Exception
(
'If input_files set, the input files should '
'match the input names.'
)
for
i
in
range
(
len
(
input_file_list
)):
if
input_file_list
[
i
]
is
not
None
:
dst_input_file
=
model_output_dir
+
'/'
+
input_file_name
(
input_name_list
[
i
])
dst_input_file
=
model_output_dir
+
'/'
+
input_file_name
(
input_name_list
[
i
])
if
input_file_list
[
i
].
startswith
(
"http://"
)
or
\
input_file_list
[
i
].
startswith
(
"https://"
):
urllib
.
urlretrieve
(
input_file_list
[
i
],
dst_input_file
)
else
:
shutil
.
copy
(
input_file_list
[
i
],
dst_input_file
)
def
generate_model_code
():
command
=
"bash tools/generate_model_code.sh"
run_command
(
command
)
...
...
@@ -155,10 +166,17 @@ def tuning_run(model_name,
# TODO(yejianwu) refactoring the hackish code
stdout_buff
=
[]
process_output
=
sh_commands
.
make_output_processor
(
stdout_buff
)
p
=
sh
.
bash
(
"tools/tuning_run.sh"
,
target_soc
,
model_output_dir
,
running_round
,
int
(
tuning
),
restart_round
,
option_args
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
=
sh
.
bash
(
"tools/tuning_run.sh"
,
target_soc
,
model_output_dir
,
running_round
,
int
(
tuning
),
restart_round
,
option_args
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
.
wait
()
metrics
=
{}
for
line
in
stdout_buff
:
...
...
@@ -166,18 +184,23 @@ def tuning_run(model_name,
parts
=
line
.
split
()
if
len
(
parts
)
==
6
and
parts
[
0
].
startswith
(
"time"
):
metrics
[
"%s.create_net_ms"
%
model_name
]
=
str
(
float
(
parts
[
1
]))
metrics
[
"%s.mace_engine_ctor_ms"
%
model_name
]
=
str
(
float
(
parts
[
2
]))
metrics
[
"%s.mace_engine_ctor_ms"
%
model_name
]
=
str
(
float
(
parts
[
2
]))
metrics
[
"%s.init_ms"
%
model_name
]
=
str
(
float
(
parts
[
3
]))
metrics
[
"%s.warmup_ms"
%
model_name
]
=
str
(
float
(
parts
[
4
]))
if
float
(
parts
[
5
])
>
0
:
metrics
[
"%s.avg_latency_ms"
%
model_name
]
=
str
(
float
(
parts
[
5
]))
tags
=
{
"ro.board.platform"
:
target_soc
,
metrics
[
"%s.avg_latency_ms"
%
model_name
]
=
str
(
float
(
parts
[
5
]))
tags
=
{
"ro.board.platform"
:
target_soc
,
"abi"
:
target_abi
,
# "runtime": target_runtime, # TODO(yejianwu) Add the actual runtime
"round"
:
running_round
,
# TODO(yejianwu) change this to source/binary
"tuning"
:
tuning
}
sh_commands
.
falcon_push_metrics
(
metrics
,
endpoint
=
"mace_model_benchmark"
,
tags
=
tags
)
"tuning"
:
tuning
}
sh_commands
.
falcon_push_metrics
(
metrics
,
endpoint
=
"mace_model_benchmark"
,
tags
=
tags
)
def
benchmark_model
(
target_soc
,
model_output_dir
,
option_args
=
''
):
command
=
"bash tools/benchmark.sh {} {}
\"
{}
\"
"
.
format
(
...
...
@@ -188,8 +211,8 @@ def benchmark_model(target_soc, model_output_dir, option_args=''):
def
run_model
(
model_name
,
target_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
running_round
,
restart_round
,
option_args
):
tuning_run
(
model_name
,
target_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
running_round
,
False
,
restart_round
,
option_args
)
model_output_dir
,
running_round
,
False
,
restart_round
,
option_args
)
def
generate_production_code
(
target_soc
,
model_output_dirs
,
pull_or_not
):
...
...
@@ -251,8 +274,8 @@ def merge_libs_and_tuning_results(target_soc, output_dir, model_output_dirs):
build_production_code
()
model_output_dirs_str
=
","
.
join
(
model_output_dirs
)
command
=
"bash tools/merge_libs.sh {} {} {}"
.
format
(
target_soc
,
output_dir
,
model_output_dirs_str
)
command
=
"bash tools/merge_libs.sh {} {} {}"
.
format
(
target_soc
,
output_dir
,
model_output_dirs_str
)
run_command
(
command
)
...
...
@@ -260,6 +283,7 @@ def packaging_lib_file(output_dir):
command
=
"bash tools/packaging_lib.sh {}"
.
format
(
output_dir
)
run_command
(
command
)
def
download_model_files
(
model_file_path
,
model_output_dir
,
weight_file_path
=
""
):
...
...
@@ -270,10 +294,9 @@ def download_model_files(model_file_path,
if
weight_file_path
.
startswith
(
"http://"
)
or
\
weight_file_path
.
startswith
(
"https://"
):
os
.
environ
[
"WEIGHT_FILE_PATH"
]
=
model_output_dir
+
"/model.caffemodel"
urllib
.
urlretrieve
(
weight_file_path
,
os
.
environ
[
"WEIGHT_FILE_PATH"
])
os
.
environ
[
"WEIGHT_FILE_PATH"
]
=
model_output_dir
+
"/model.caffemodel"
urllib
.
urlretrieve
(
weight_file_path
,
os
.
environ
[
"WEIGHT_FILE_PATH"
])
def
md5sum
(
str
):
md5
=
hashlib
.
md5
()
...
...
@@ -306,7 +329,10 @@ def parse_args():
default
=
10
,
help
=
"The model throughput test running seconds."
)
parser
.
add_argument
(
"--restart_round"
,
type
=
int
,
default
=
1
,
help
=
"The model restart round."
)
"--restart_round"
,
type
=
int
,
default
=
1
,
help
=
"The model restart round."
)
parser
.
add_argument
(
"--tuning"
,
type
=
"bool"
,
default
=
"true"
,
help
=
"Tune opencl params."
)
parser
.
add_argument
(
...
...
@@ -321,14 +347,16 @@ def parse_args():
help
=
"SoCs to build, comma seperated list (getprop ro.board.platform)"
)
return
parser
.
parse_known_args
()
def
set_environment
(
configs
):
os
.
environ
[
"EMBED_MODEL_DATA"
]
=
str
(
configs
[
"embed_model_data"
])
os
.
environ
[
"VLOG_LEVEL"
]
=
str
(
configs
[
"vlog_level"
])
os
.
environ
[
"PROJECT_NAME"
]
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
FLAGS
.
config
))[
0
]
os
.
environ
[
"PROJECT_NAME"
]
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
FLAGS
.
config
))[
0
]
os
.
environ
[
'INPUT_FILE_NAME'
]
=
"model_input"
os
.
environ
[
'OUTPUT_FILE_NAME'
]
=
"model_out"
def
main
(
unused_args
):
configs
=
parse_model_configs
()
...
...
@@ -343,13 +371,16 @@ def main(unused_args):
if
not
os
.
path
.
exists
(
FLAGS
.
output_dir
):
os
.
makedirs
(
FLAGS
.
output_dir
)
elif
os
.
path
.
exists
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
"libmace"
)):
shutil
.
rmtree
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
os
.
makedirs
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
shutil
.
rmtree
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
os
.
makedirs
(
os
.
path
.
join
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
]))
generate_version_code
()
generate_opencl_source_code
()
option_args
=
' '
.
join
([
arg
for
arg
in
unused_args
if
arg
.
startswith
(
'--'
)])
option_args
=
' '
.
join
(
[
arg
for
arg
in
unused_args
if
arg
.
startswith
(
'--'
)])
available_socs
=
sh_commands
.
adb_get_all_socs
()
target_socs
=
available_socs
...
...
@@ -362,10 +393,10 @@ def main(unused_args):
target_socs
=
target_socs
&
socs
missing_socs
=
socs
.
difference
(
target_socs
)
if
len
(
missing_socs
)
>
0
:
print
(
"Error: devices with SoCs are not connected %s"
%
missing_socs
)
print
(
"Error: devices with SoCs are not connected %s"
%
missing_socs
)
exit
(
1
)
for
target_soc
in
target_socs
:
for
target_abi
in
configs
[
"target_abis"
]:
global_runtime
=
get_global_runtime
(
configs
)
...
...
@@ -373,28 +404,27 @@ def main(unused_args):
os
.
environ
[
"TARGET_ABI"
]
=
target_abi
model_output_dirs
=
[]
for
model_name
in
configs
[
"models"
]:
print
'======================='
,
model_name
,
'====
==================='
print
'==================='
,
model_name
,
'
==================='
# Transfer params by environment
os
.
environ
[
"MODEL_TAG"
]
=
model_name
model_config
=
configs
[
"models"
][
model_name
]
input_file_list
=
model_config
.
get
(
"validation_inputs_data"
,
[])
input_file_list
=
model_config
.
get
(
"validation_inputs_data"
,
[])
for
key
in
model_config
:
if
key
in
[
'input_nodes'
,
'output_nodes'
]
and
isinstance
(
model_config
[
key
],
list
):
os
.
environ
[
key
.
upper
()]
=
","
.
join
(
model_config
[
key
])
elif
key
in
[
'input_shapes'
,
'output_shapes'
]
and
isinstance
(
model_config
[
key
],
list
):
elif
key
in
[
'input_shapes'
,
'output_shapes'
]
and
isinstance
(
model_config
[
key
],
list
):
os
.
environ
[
key
.
upper
()]
=
":"
.
join
(
model_config
[
key
])
else
:
os
.
environ
[
key
.
upper
()]
=
str
(
model_config
[
key
])
# Create model build directory
model_path_digest
=
md5sum
(
model_config
[
"model_file_path"
])
model_output_dir
=
"%s/%s/%s/%s/%s/%s/%s"
%
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
],
"build"
,
model_name
,
model_path_digest
,
target_soc
,
target_abi
)
model_output_dir
=
"%s/%s/%s/%s/%s/%s/%s"
%
(
FLAGS
.
output_dir
,
os
.
environ
[
"PROJECT_NAME"
],
"build"
,
model_name
,
model_path_digest
,
target_soc
,
target_abi
)
model_output_dirs
.
append
(
model_output_dir
)
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"all"
:
...
...
@@ -404,22 +434,27 @@ def main(unused_args):
clear_env
(
target_soc
)
download_model_files
(
model_config
[
"model_file_path"
],
model_output_dir
,
model_config
.
get
(
"weight_file_path"
,
""
))
model_output_dir
,
model_config
.
get
(
"weight_file_path"
,
""
))
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
\
or
FLAGS
.
mode
==
"benchmark"
or
FLAGS
.
mode
==
"all"
:
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"run"
or
\
FLAGS
.
mode
==
"validate"
or
\
FLAGS
.
mode
==
"benchmark"
or
FLAGS
.
mode
==
"all"
:
generate_random_input
(
target_soc
,
model_output_dir
,
model_config
[
'input_nodes'
],
input_file_list
)
model_config
[
'input_nodes'
],
input_file_list
)
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"all"
:
generate_model_code
()
build_mace_run_prod
(
model_name
,
global_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
FLAGS
.
tuning
)
target_soc
,
model_output_dir
,
FLAGS
.
tuning
)
if
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
or
FLAGS
.
mode
==
"all"
:
run_model
(
model_name
,
global_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
FLAGS
.
round
,
FLAGS
.
restart_round
,
option_args
)
if
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
or
\
FLAGS
.
mode
==
"all"
:
run_model
(
model_name
,
global_runtime
,
target_abi
,
target_soc
,
model_output_dir
,
FLAGS
.
round
,
FLAGS
.
restart_round
,
option_args
)
if
FLAGS
.
mode
==
"benchmark"
:
benchmark_model
(
target_soc
,
model_output_dir
,
option_args
)
...
...
@@ -427,14 +462,18 @@ def main(unused_args):
if
FLAGS
.
mode
==
"validate"
or
FLAGS
.
mode
==
"all"
:
validate_model
(
target_soc
,
model_output_dir
)
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"merge"
or
FLAGS
.
mode
==
"all"
:
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"merge"
or
\
FLAGS
.
mode
==
"all"
:
merge_libs_and_tuning_results
(
target_soc
,
FLAGS
.
output_dir
+
"/"
+
os
.
environ
[
"PROJECT_NAME"
],
target_soc
,
FLAGS
.
output_dir
+
"/"
+
os
.
environ
[
"PROJECT_NAME"
],
model_output_dirs
)
if
FLAGS
.
mode
==
"throughput_test"
:
merged_lib_file
=
FLAGS
.
output_dir
+
"/%s/%s/libmace_%s.%s.a"
%
\
(
os
.
environ
[
"PROJECT_NAME"
],
target_abi
,
os
.
environ
[
"PROJECT_NAME"
],
target_soc
)
merged_lib_file
=
FLAGS
.
output_dir
+
\
"/%s/%s/libmace_%s.%s.a"
%
\
(
os
.
environ
[
"PROJECT_NAME"
],
target_abi
,
os
.
environ
[
"PROJECT_NAME"
],
target_soc
)
generate_random_input
(
target_soc
,
FLAGS
.
output_dir
,
[],
[])
for
model_name
in
configs
[
"models"
]:
runtime
=
configs
[
"models"
][
model_name
][
"runtime"
]
...
...
@@ -449,4 +488,3 @@ def main(unused_args):
if
__name__
==
"__main__"
:
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/sh_commands.py
浏览文件 @
6da30d22
...
...
@@ -3,18 +3,22 @@ import re
import
time
import
falcon_cli
################################
# common
################################
def
strip_invalid_utf8
(
str
):
return
sh
.
iconv
(
str
,
"-c"
,
"-t"
,
"UTF-8"
)
def
make_output_processor
(
buff
):
def
process_output
(
line
):
print
(
line
.
strip
())
buff
.
append
(
line
)
return
process_output
################################
# adb commands
################################
...
...
@@ -23,11 +27,12 @@ def adb_split_stdout(stdout_str):
# Filter out last empty line
return
[
l
.
strip
()
for
l
in
stdout_str
.
split
(
'
\n
'
)
if
len
(
l
.
strip
())
>
0
]
def
adb_devices
(
target_socs
=
None
):
outputs
=
sh
.
grep
(
sh
.
adb
(
"devices"
),
"^[A-Za-z0-9]\+[[:space:]]\+device$"
)
raw_lists
=
sh
.
cut
(
outputs
,
"-f1"
)
device_ids
=
adb_split_stdout
(
raw_lists
)
if
target_socs
!=
None
:
if
target_socs
is
not
None
:
target_socs_set
=
set
(
target_socs
)
target_devices
=
[]
for
serialno
in
device_ids
:
...
...
@@ -38,6 +43,7 @@ def adb_devices(target_socs=None):
else
:
return
device_ids
def
adb_getprop_by_serialno
(
serialno
):
outputs
=
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"getprop"
)
raw_props
=
adb_split_stdout
(
outputs
)
...
...
@@ -49,12 +55,14 @@ def adb_getprop_by_serialno(serialno):
props
[
m
.
group
(
1
)]
=
m
.
group
(
2
)
return
props
def
adb_supported_abis
(
serialno
):
props
=
adb_getprop_by_serialno
(
serialno
)
abilist_str
=
props
[
"ro.product.cpu.abilist"
]
abis
=
[
abi
.
strip
()
for
abi
in
abilist_str
.
split
(
','
)]
return
abis
def
adb_get_all_socs
():
socs
=
[]
for
d
in
adb_devices
():
...
...
@@ -62,7 +70,10 @@ def adb_get_all_socs():
socs
.
append
(
props
[
"ro.board.platform"
])
return
set
(
socs
)
def
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
def
adb_run
(
serialno
,
host_bin_path
,
bin_name
,
args
=
""
,
opencl_profiling
=
1
,
vlog_level
=
0
,
...
...
@@ -71,7 +82,9 @@ def adb_run(serialno, host_bin_path, bin_name,
host_bin_full_path
=
"%s/%s"
%
(
host_bin_path
,
bin_name
)
device_bin_full_path
=
"%s/%s"
%
(
device_bin_path
,
bin_name
)
props
=
adb_getprop_by_serialno
(
serialno
)
print
(
"====================================================================="
)
print
(
"====================================================================="
)
print
(
"Run on device: %s, %s, %s"
%
(
serialno
,
props
[
"ro.board.platform"
],
props
[
"ro.product.model"
]))
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"rm -rf %s"
%
device_bin_path
)
...
...
@@ -79,12 +92,19 @@ def adb_run(serialno, host_bin_path, bin_name,
print
(
"Push %s to %s"
%
(
host_bin_full_path
,
device_bin_full_path
))
sh
.
adb
(
"-s"
,
serialno
,
"push"
,
host_bin_full_path
,
device_bin_full_path
)
print
(
"Run %s"
%
device_bin_full_path
)
stdout_buff
=
[]
stdout_buff
=
[]
process_output
=
make_output_processor
(
stdout_buff
)
p
=
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d MACE_CPP_MIN_VLOG_LEVEL=%d %s %s"
%
(
out_of_range_check
,
opencl_profiling
,
vlog_level
,
device_bin_full_path
,
args
),
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
=
sh
.
adb
(
"-s"
,
serialno
,
"shell"
,
"MACE_OUT_OF_RANGE_CHECK=%d MACE_OPENCL_PROFILING=%d "
"MACE_CPP_MIN_VLOG_LEVEL=%d %s %s"
%
(
out_of_range_check
,
opencl_profiling
,
vlog_level
,
device_bin_full_path
,
args
),
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
.
wait
()
return
""
.
join
(
stdout_buff
)
...
...
@@ -94,11 +114,14 @@ def adb_run(serialno, host_bin_path, bin_name,
################################
def
bazel_build
(
target
,
strip
=
"always"
,
abi
=
"armeabi-v7a"
):
print
(
"Build %s with ABI %s"
%
(
target
,
abi
))
stdout_buff
=
[]
stdout_buff
=
[]
process_output
=
make_output_processor
(
stdout_buff
)
p
=
sh
.
bazel
(
"build"
,
"-c"
,
"opt"
,
"--strip"
,
strip
,
p
=
sh
.
bazel
(
"build"
,
"-c"
,
"opt"
,
"--strip"
,
strip
,
"--verbose_failures"
,
target
,
"--crosstool_top=//external:android/crosstool"
,
...
...
@@ -109,12 +132,17 @@ def bazel_build(target, strip="always", abi="armeabi-v7a"):
"--copt=-DMACE_DISABLE_NO_TUNING_WARNING"
,
"--copt=-Werror=return-type"
,
"--copt=-O3"
,
"--define"
,
"neon=true"
,
"--define"
,
"openmp=true"
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
"--define"
,
"neon=true"
,
"--define"
,
"openmp=true"
,
_out
=
process_output
,
_bg
=
True
,
_err_to_out
=
True
)
p
.
wait
()
return
""
.
join
(
stdout_buff
)
def
bazel_target_to_bin
(
target
):
# change //mace/a/b:c to bazel-bin/mace/a/b/c
prefix
,
bin_name
=
target
.
split
(
':'
)
...
...
@@ -124,26 +152,32 @@ def bazel_target_to_bin(target):
host_bin_path
=
"bazel-bin/%s"
%
prefix
return
host_bin_path
,
bin_name
################################
# mace commands
################################
# TODO this should be refactored
def
gen_encrypted_opencl_source
(
codegen_path
=
"mace/codegen"
):
sh
.
mkdir
(
"-p"
,
"%s/opencl"
%
codegen_path
)
sh
.
python
(
"mace/python/tools/encrypt_opencl_codegen.py"
,
sh
.
python
(
"mace/python/tools/encrypt_opencl_codegen.py"
,
"--cl_kernel_dir=./mace/kernels/opencl/cl/"
,
"--output_path=%s/opencl/opencl_encrypt_program.cc"
%
codegen_path
)
def
gen_mace_version
(
codegen_path
=
"mace/codegen"
):
sh
.
mkdir
(
"-p"
,
"%s/version"
%
codegen_path
)
sh
.
bash
(
"mace/tools/git/gen_version_source.sh"
,
"%s/version/version.cc"
%
codegen_path
)
def
gen_compiled_opencl_source
(
codegen_path
=
"mace/codegen"
):
sh
.
mkdir
(
"-p"
,
"%s/opencl"
%
codegen_path
)
sh
.
python
(
"mace/python/tools/opencl_codegen.py"
,
sh
.
python
(
"mace/python/tools/opencl_codegen.py"
,
"--output_path=%s/opencl/opencl_compiled_program.cc"
%
codegen_path
)
################################
# falcon
################################
...
...
@@ -156,10 +190,10 @@ def falcon_tags(tags_dict):
tags
=
tags
+
",%s=%s"
%
(
k
,
v
)
return
tags
def
falcon_push_metrics
(
metrics
,
endpoint
=
"mace_dev"
,
tags
=
{}):
cli
=
falcon_cli
.
FalconCli
.
connect
(
server
=
"transfer.falcon.miliao.srv"
,
port
=
8433
,
debug
=
False
)
cli
=
falcon_cli
.
FalconCli
.
connect
(
server
=
"transfer.falcon.miliao.srv"
,
port
=
8433
,
debug
=
False
)
ts
=
int
(
time
.
time
())
falcon_metrics
=
[{
"endpoint"
:
endpoint
,
...
...
@@ -171,4 +205,3 @@ def falcon_push_metrics(metrics, endpoint="mace_dev", tags={}):
"counterType"
:
"GAUGE"
}
for
key
,
value
in
metrics
.
iteritems
()]
cli
.
update
(
falcon_metrics
)
tools/validate.py
浏览文件 @
6da30d22
...
...
@@ -20,29 +20,33 @@ from scipy import stats
# --input_shape 1,64,64,3 \
# --output_shape 1,64,64,2
def
load_data
(
file
):
if
os
.
path
.
isfile
(
file
):
return
np
.
fromfile
(
file
=
file
,
dtype
=
np
.
float32
)
else
:
return
np
.
empty
([
0
])
def
format_output_name
(
name
):
return
re
.
sub
(
'[^0-9a-zA-Z]+'
,
'_'
,
name
)
def
compare_output
(
output_name
,
mace_out_value
,
out_value
):
if
mace_out_value
.
size
!=
0
:
out_value
=
out_value
.
reshape
(
-
1
)
mace_out_value
=
mace_out_value
.
reshape
(
-
1
)
assert
len
(
out_value
)
==
len
(
mace_out_value
)
similarity
=
(
1
-
spatial
.
distance
.
cosine
(
out_value
,
mace_out_value
))
print
output_name
,
'MACE VS'
,
FLAGS
.
platform
.
upper
(),
'similarity: '
,
similarity
print
output_name
,
'MACE VS'
,
FLAGS
.
platform
.
upper
(
),
'similarity: '
,
similarity
if
(
FLAGS
.
mace_runtime
==
"cpu"
and
similarity
>
0.999
)
or
\
(
FLAGS
.
mace_runtime
==
"neon"
and
similarity
>
0.999
)
or
\
(
FLAGS
.
mace_runtime
==
"gpu"
and
similarity
>
0.995
)
or
\
(
FLAGS
.
mace_runtime
==
"dsp"
and
similarity
>
0.930
):
print
'=======================Similarity Test Passed====
=================='
print
'===================Similarity Test Passed
=================='
else
:
print
'=======================Similarity Test Failed====
=================='
print
'===================Similarity Test Failed
=================='
sys
.
exit
(
-
1
)
else
:
print
'=======================Skip empty node==================='
...
...
@@ -66,21 +70,28 @@ def validate_tf_model(input_names, input_shapes, output_names):
tf
.
import_graph_def
(
input_graph_def
,
name
=
""
)
input_dict
=
{}
for
i
in
range
(
len
(
input_names
)):
input_value
=
load_data
(
FLAGS
.
input_file
+
"_"
+
input_names
[
i
])
input_value
=
load_data
(
FLAGS
.
input_file
+
"_"
+
input_names
[
i
])
input_value
=
input_value
.
reshape
(
input_shapes
[
i
])
input_node
=
graph
.
get_tensor_by_name
(
input_names
[
i
]
+
':0'
)
input_node
=
graph
.
get_tensor_by_name
(
input_names
[
i
]
+
':0'
)
input_dict
[
input_node
]
=
input_value
output_nodes
=
[]
for
name
in
output_names
:
output_nodes
.
extend
([
graph
.
get_tensor_by_name
(
name
+
':0'
)])
output_nodes
.
extend
(
[
graph
.
get_tensor_by_name
(
name
+
':0'
)])
output_values
=
session
.
run
(
output_nodes
,
feed_dict
=
input_dict
)
for
i
in
range
(
len
(
output_names
)):
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
format_output_name
(
output_names
[
i
])
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
\
format_output_name
(
output_names
[
i
])
mace_out_value
=
load_data
(
output_file_name
)
compare_output
(
output_names
[
i
],
mace_out_value
,
output_values
[
i
])
compare_output
(
output_names
[
i
],
mace_out_value
,
output_values
[
i
])
def
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
):
def
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
):
os
.
environ
[
'GLOG_minloglevel'
]
=
'1'
# suprress Caffe verbose prints
import
caffe
if
not
os
.
path
.
isfile
(
FLAGS
.
model_file
):
...
...
@@ -96,7 +107,8 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for
i
in
range
(
len
(
input_names
)):
input_value
=
load_data
(
FLAGS
.
input_file
+
"_"
+
input_names
[
i
])
input_value
=
input_value
.
reshape
(
input_shapes
[
i
]).
transpose
((
0
,
3
,
1
,
2
))
input_value
=
input_value
.
reshape
(
input_shapes
[
i
]).
transpose
((
0
,
3
,
1
,
2
))
input_blob_name
=
input_names
[
i
]
try
:
if
input_names
[
i
]
in
net
.
top_names
:
...
...
@@ -110,16 +122,20 @@ def validate_caffe_model(input_names, input_shapes, output_names, output_shapes)
for
i
in
range
(
len
(
output_names
)):
value
=
net
.
blobs
[
net
.
top_names
[
output_names
[
i
]][
0
]].
data
out_shape
=
output_shapes
[
i
]
out_shape
[
1
],
out_shape
[
2
],
out_shape
[
3
]
=
out_shape
[
3
],
out_shape
[
1
],
out_shape
[
2
]
out_shape
[
1
],
out_shape
[
2
],
out_shape
[
3
]
=
out_shape
[
3
],
out_shape
[
1
],
out_shape
[
2
]
value
=
value
.
reshape
(
out_shape
).
transpose
((
0
,
2
,
3
,
1
))
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
format_output_name
(
output_names
[
i
])
output_file_name
=
FLAGS
.
mace_out_file
+
"_"
+
format_output_name
(
output_names
[
i
])
mace_out_value
=
load_data
(
output_file_name
)
compare_output
(
output_names
[
i
],
mace_out_value
,
value
)
def
main
(
unused_args
):
input_names
=
[
name
for
name
in
FLAGS
.
input_node
.
split
(
','
)]
input_shape_strs
=
[
shape
for
shape
in
FLAGS
.
input_shape
.
split
(
':'
)]
input_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
input_shape_strs
]
input_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
input_shape_strs
]
output_names
=
[
name
for
name
in
FLAGS
.
output_node
.
split
(
','
)]
assert
len
(
input_names
)
==
len
(
input_shapes
)
...
...
@@ -127,18 +143,18 @@ def main(unused_args):
validate_tf_model
(
input_names
,
input_shapes
,
output_names
)
elif
FLAGS
.
platform
==
'caffe'
:
output_shape_strs
=
[
shape
for
shape
in
FLAGS
.
output_shape
.
split
(
':'
)]
output_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
output_shape_strs
]
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
)
output_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
output_shape_strs
]
validate_caffe_model
(
input_names
,
input_shapes
,
output_names
,
output_shapes
)
def
parse_args
():
"""Parses command line arguments."""
parser
=
argparse
.
ArgumentParser
()
parser
.
register
(
"type"
,
"bool"
,
lambda
v
:
v
.
lower
()
==
"true"
)
parser
.
add_argument
(
"--platform"
,
type
=
str
,
default
=
""
,
help
=
"Tensorflow or Caffe."
)
"--platform"
,
type
=
str
,
default
=
""
,
help
=
"Tensorflow or Caffe."
)
parser
.
add_argument
(
"--model_file"
,
type
=
str
,
...
...
@@ -150,40 +166,22 @@ def parse_args():
default
=
""
,
help
=
"caffe model file to load."
)
parser
.
add_argument
(
"--input_file"
,
type
=
str
,
default
=
""
,
help
=
"input file."
)
"--input_file"
,
type
=
str
,
default
=
""
,
help
=
"input file."
)
parser
.
add_argument
(
"--mace_out_file"
,
type
=
str
,
default
=
""
,
help
=
"mace output file to load."
)
parser
.
add_argument
(
"--mace_runtime"
,
type
=
str
,
default
=
"gpu"
,
help
=
"mace runtime device."
)
"--mace_runtime"
,
type
=
str
,
default
=
"gpu"
,
help
=
"mace runtime device."
)
parser
.
add_argument
(
"--input_shape"
,
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
"--input_shape"
,
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
parser
.
add_argument
(
"--output_shape"
,
type
=
str
,
default
=
"1,64,64,2"
,
help
=
"output shape."
)
"--output_shape"
,
type
=
str
,
default
=
"1,64,64,2"
,
help
=
"output shape."
)
parser
.
add_argument
(
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
"--input_node"
,
type
=
str
,
default
=
"input_node"
,
help
=
"input node"
)
parser
.
add_argument
(
"--output_node"
,
type
=
str
,
default
=
"output_node"
,
help
=
"output node"
)
"--output_node"
,
type
=
str
,
default
=
"output_node"
,
help
=
"output node"
)
return
parser
.
parse_known_args
()
...
...
@@ -191,4 +189,3 @@ def parse_args():
if
__name__
==
'__main__'
:
FLAGS
,
unparsed
=
parse_args
()
main
(
unused_args
=
[
sys
.
argv
[
0
]]
+
unparsed
)
tools/wino_conv.py
浏览文件 @
6da30d22
...
...
@@ -11,12 +11,8 @@ G_T = {}
# f(2, 3)
A_T
[
4
]
=
np
.
array
([[
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
-
1
]]).
astype
(
np
.
float32
)
A
[
4
]
=
np
.
transpose
(
A_T
[
4
])
B_T
[
4
]
=
np
.
array
([
[
1
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
0
],
[
0
,
1
,
0
,
-
1
]
]).
astype
(
np
.
float32
)
B_T
[
4
]
=
np
.
array
([[
1
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
0
],
[
0
,
1
,
0
,
-
1
]]).
astype
(
np
.
float32
)
B
[
4
]
=
np
.
transpose
(
B_T
[
4
])
G
[
4
]
=
np
.
array
([
[
1
,
0
,
0
],
...
...
@@ -44,45 +40,45 @@ B_T[6] = np.array([
]).
astype
(
np
.
float32
)
B
[
6
]
=
np
.
transpose
(
B_T
[
6
])
G
[
6
]
=
np
.
array
([
[
1
/
4.0
,
0
,
0
],
[
-
1
/
6.0
,
-
1
/
6.0
,
-
1
/
6.0
],
[
-
1
/
6.0
,
1
/
6.0
,
-
1
/
6.0
],
[
1
/
24.0
,
1
/
12.0
,
1
/
6.0
],
[
1
/
24.0
,
-
1
/
12.0
,
1
/
6.0
],
[
0
,
0
,
1
],
[
1
/
4.0
,
0
,
0
],
[
-
1
/
6.0
,
-
1
/
6.0
,
-
1
/
6.0
],
[
-
1
/
6.0
,
1
/
6.0
,
-
1
/
6.0
],
[
1
/
24.0
,
1
/
12.0
,
1
/
6.0
],
[
1
/
24.0
,
-
1
/
12.0
,
1
/
6.0
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
G_T
[
6
]
=
np
.
transpose
(
G
[
6
])
# f(6, 3)
A_T
[
8
]
=
np
.
array
([
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
2
,
-
2
,
1
/
2.
,
-
1
/
2.
,
0
],
[
0
,
1
,
1
,
4
,
4
,
1
/
4.
,
1
/
4.
,
0
],
[
0
,
1
,
-
1
,
8
,
-
8
,
1
/
8.
,
-
1
/
8.
,
0
],
[
0
,
1
,
1
,
16
,
16
,
1
/
16.
,
1
/
16.
,
0
],
[
0
,
1
,
-
1
,
32
,
-
32
,
1
/
32.
,
-
1
/
32.
,
1
],
[
1
,
1
,
1
,
1
,
1
,
1
,
1
,
0
],
[
0
,
1
,
-
1
,
2
,
-
2
,
1
/
2.
,
-
1
/
2.
,
0
],
[
0
,
1
,
1
,
4
,
4
,
1
/
4.
,
1
/
4.
,
0
],
[
0
,
1
,
-
1
,
8
,
-
8
,
1
/
8.
,
-
1
/
8.
,
0
],
[
0
,
1
,
1
,
16
,
16
,
1
/
16.
,
1
/
16.
,
0
],
[
0
,
1
,
-
1
,
32
,
-
32
,
1
/
32.
,
-
1
/
32.
,
1
],
]).
astype
(
np
.
float32
)
A
[
8
]
=
np
.
transpose
(
A_T
[
8
])
B_T
[
8
]
=
np
.
array
([
[
1
,
0
,
-
21
/
4.
,
0
,
21
/
4.
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
-
17
/
4.
,
-
17
/
4.
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
17
/
4.
,
-
17
/
4.
,
-
1
,
1
,
0
],
[
0
,
1
/
2.
,
1
/
4.
,
-
5
/
2.
,
-
5
/
4.
,
2
,
1
,
0
],
[
0
,
-
1
/
2.
,
1
/
4.
,
5
/
2.
,
-
5
/
4.
,
-
2
,
1
,
0
],
[
0
,
2
,
4
,
-
5
/
2.
,
-
5
,
1
/
2.
,
1
,
0
],
[
0
,
-
2
,
4
,
5
/
2.
,
-
5
,
-
1
/
2.
,
1
,
0
],
[
0
,
-
1
,
0
,
21
/
4.
,
0
,
-
21
/
4.
,
0
,
1
],
[
1
,
0
,
-
21
/
4.
,
0
,
21
/
4.
,
0
,
-
1
,
0
],
[
0
,
1
,
1
,
-
17
/
4.
,
-
17
/
4.
,
1
,
1
,
0
],
[
0
,
-
1
,
1
,
17
/
4.
,
-
17
/
4.
,
-
1
,
1
,
0
],
[
0
,
1
/
2.
,
1
/
4.
,
-
5
/
2.
,
-
5
/
4.
,
2
,
1
,
0
],
[
0
,
-
1
/
2.
,
1
/
4.
,
5
/
2.
,
-
5
/
4.
,
-
2
,
1
,
0
],
[
0
,
2
,
4
,
-
5
/
2.
,
-
5
,
1
/
2.
,
1
,
0
],
[
0
,
-
2
,
4
,
5
/
2.
,
-
5
,
-
1
/
2.
,
1
,
0
],
[
0
,
-
1
,
0
,
21
/
4.
,
0
,
-
21
/
4.
,
0
,
1
],
]).
astype
(
np
.
float32
)
B
[
8
]
=
np
.
transpose
(
B_T
[
8
])
G
[
8
]
=
np
.
array
([
[
1
,
0
,
0
],
[
-
2
/
9.
,
-
2
/
9.
,
-
2
/
9.
],
[
-
2
/
9.
,
2
/
9.
,
-
2
/
9.
],
[
1
/
90.
,
1
/
45.
,
2
/
45.
],
[
1
/
90.
,
-
1
/
45.
,
2
/
45.
],
[
32
/
45.
,
16
/
45.
,
8
/
45.
],
[
32
/
45.
,
-
16
/
45.
,
8
/
45.
],
[
0
,
0
,
1
],
[
1
,
0
,
0
],
[
-
2
/
9.
,
-
2
/
9.
,
-
2
/
9.
],
[
-
2
/
9.
,
2
/
9.
,
-
2
/
9.
],
[
1
/
90.
,
1
/
45.
,
2
/
45.
],
[
1
/
90.
,
-
1
/
45.
,
2
/
45.
],
[
32
/
45.
,
16
/
45.
,
8
/
45.
],
[
32
/
45.
,
-
16
/
45.
,
8
/
45.
],
[
0
,
0
,
1
],
]).
astype
(
np
.
float32
)
G_T
[
8
]
=
np
.
transpose
(
G
[
8
])
...
...
@@ -112,7 +108,7 @@ def winograd_conv(m, r, input, filter):
for
c
in
range
(
C
):
u
=
np
.
dot
(
np
.
dot
(
G
[
alpha
],
filter
[
k
,
c
,
:,
:]),
G_T
[
alpha
])
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
)
:
for
j
in
range
(
alpha
)
:
U
[(
i
*
alpha
+
j
)
*
K
+
k
,
c
]
=
u
[
i
,
j
]
print
'filter out: '
,
U
.
shape
...
...
@@ -129,24 +125,24 @@ def winograd_conv(m, r, input, filter):
w_idx
=
t
%
rounded_w
h_start
=
h_idx
*
m
w_start
=
w_idx
*
m
h_end
=
min
(
h_start
+
alpha
,
input_shape
[
2
])
w_end
=
min
(
w_start
+
alpha
,
input_shape
[
3
])
h_end
=
min
(
h_start
+
alpha
,
input_shape
[
2
])
w_end
=
min
(
w_start
+
alpha
,
input_shape
[
3
])
d
=
np
.
zeros
((
alpha
,
alpha
))
d
[
0
:
h_end
-
h_start
,
0
:
w_end
-
w_start
]
=
\
input
[
n
,
c
,
h_start
:
h_end
,
w_start
:
w_end
]
v
=
np
.
dot
(
np
.
dot
(
B_T
[
alpha
],
d
),
B
[
alpha
])
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
):
V
[(
i
*
alpha
+
j
)
*
C
+
c
,
p
]
=
v
[
i
,
j
]
V
[(
i
*
alpha
+
j
)
*
C
+
c
,
p
]
=
v
[
i
,
j
]
tmp
=
V
.
reshape
(
alpha_square
,
C
,
P
,
1
)
print
'input out: '
,
tmp
.
shape
tmp
.
astype
(
np
.
float32
).
tofile
(
"C"
)
M
=
np
.
zeros
((
alpha_square
*
K
,
P
))
for
i
in
range
(
alpha_square
):
u
=
U
[
i
*
K
:
(
i
+
1
)
*
K
,
:]
v
=
V
[
i
*
C
:
(
i
+
1
)
*
C
,
:]
M
[
i
*
K
:
(
i
+
1
)
*
K
,
:]
=
np
.
dot
(
u
,
v
)
u
=
U
[
i
*
K
:(
i
+
1
)
*
K
,
:]
v
=
V
[
i
*
C
:(
i
+
1
)
*
C
,
:]
M
[
i
*
K
:(
i
+
1
)
*
K
,
:]
=
np
.
dot
(
u
,
v
)
print
'M shape: '
,
M
.
shape
M
.
astype
(
np
.
float32
).
tofile
(
"gemm"
)
...
...
@@ -156,7 +152,7 @@ def winograd_conv(m, r, input, filter):
tm
=
np
.
zeros
((
alpha
,
alpha
))
for
i
in
range
(
alpha
):
for
j
in
range
(
alpha
):
tm
[
i
][
j
]
=
M
[(
i
*
alpha
+
j
)
*
K
+
k
,
b
]
tm
[
i
][
j
]
=
M
[(
i
*
alpha
+
j
)
*
K
+
k
,
b
]
y
=
np
.
dot
(
np
.
dot
(
A_T
[
alpha
],
tm
),
A
[
alpha
])
for
i
in
range
(
m
):
for
j
in
range
(
m
):
...
...
@@ -173,6 +169,7 @@ def winograd_conv(m, r, input, filter):
return
res
def
tf_conv
(
input
,
filter
):
conv_op
=
tf
.
nn
.
conv2d
(
input
,
filter
,
[
1
,
1
,
1
,
1
],
'VALID'
)
with
tf
.
Session
()
as
sess
:
...
...
@@ -206,4 +203,3 @@ def main():
if
__name__
==
'__main__'
:
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录