Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
孤小逸
Mace
提交
1617b83f
Mace
项目概览
孤小逸
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
1617b83f
编写于
5月 16, 2018
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Support build once then run on cpu or gpu.
上级
9716a876
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
393 addition
and
219 deletion
+393
-219
mace/core/net.cc
mace/core/net.cc
+13
-7
mace/core/workspace.cc
mace/core/workspace.cc
+23
-10
mace/python/tools/converter.py
mace/python/tools/converter.py
+53
-22
mace/python/tools/converter_tool/base_converter.py
mace/python/tools/converter_tool/base_converter.py
+67
-0
mace/python/tools/converter_tool/caffe_converter.py
mace/python/tools/converter_tool/caffe_converter.py
+15
-4
mace/python/tools/converter_tool/shape_inference.py
mace/python/tools/converter_tool/shape_inference.py
+15
-0
mace/python/tools/converter_tool/tensorflow_converter.py
mace/python/tools/converter_tool/tensorflow_converter.py
+15
-6
mace/python/tools/converter_tool/transformer.py
mace/python/tools/converter_tool/transformer.py
+112
-106
mace/python/tools/source_converter_lib.py
mace/python/tools/source_converter_lib.py
+0
-1
mace/test/mace_api_mt_test.cc
mace/test/mace_api_mt_test.cc
+16
-3
mace/test/mace_api_test.cc
mace/test/mace_api_test.cc
+16
-4
tools/mace_tools.py
tools/mace_tools.py
+30
-37
tools/sh_commands.py
tools/sh_commands.py
+5
-6
tools/validate.py
tools/validate.py
+13
-13
未找到文件。
mace/core/net.cc
浏览文件 @
1617b83f
...
...
@@ -40,13 +40,19 @@ SerialNet::SerialNet(const std::shared_ptr<const OperatorRegistry> op_registry,
MACE_LATENCY_LOGGER
(
1
,
"Constructing SerialNet "
,
net_def
->
name
());
for
(
int
idx
=
0
;
idx
<
net_def
->
op_size
();
++
idx
)
{
const
auto
&
operator_def
=
net_def
->
op
(
idx
);
VLOG
(
3
)
<<
"Creating operator "
<<
operator_def
.
name
()
<<
"("
<<
operator_def
.
type
()
<<
")"
;
OperatorDef
temp_def
(
operator_def
);
std
::
unique_ptr
<
OperatorBase
>
op
(
op_registry
->
CreateOperator
(
temp_def
,
ws
,
type
,
mode
));
if
(
op
)
{
operators_
.
emplace_back
(
std
::
move
(
op
));
// TODO(liuqi): refactor based on PB
const
int
op_device
=
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
operator_def
,
"device"
,
-
1
);
if
(
op_device
==
type
)
{
VLOG
(
3
)
<<
"Creating operator "
<<
operator_def
.
name
()
<<
"("
<<
operator_def
.
type
()
<<
")"
;
OperatorDef
temp_def
(
operator_def
);
std
::
unique_ptr
<
OperatorBase
>
op
(
op_registry
->
CreateOperator
(
temp_def
,
ws
,
type
,
mode
));
if
(
op
)
{
operators_
.
emplace_back
(
std
::
move
(
op
));
}
}
}
}
...
...
mace/core/workspace.cc
浏览文件 @
1617b83f
...
...
@@ -136,7 +136,11 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
// As DSP may have different data output type for each op,
// we stick to the same concept.
for
(
auto
&
op
:
net_def
.
op
())
{
if
(
!
op
.
mem_id
().
empty
())
{
// TODO(liuqi): refactor based on PB
const
int
op_device
=
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
op
,
"device"
,
-
1
);
if
(
op_device
==
device_type
&&
!
op
.
mem_id
().
empty
())
{
const
DataType
op_dtype
=
static_cast
<
DataType
>
(
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
op
,
"T"
,
static_cast
<
int
>
(
DT_FLOAT
)));
...
...
@@ -150,20 +154,29 @@ void Workspace::CreateOutputTensorBuffer(const NetDef &net_def,
MACE_CHECK
(
dtype
!=
DataType
::
DT_INVALID
,
"data type is invalid."
);
for
(
auto
&
mem_block
:
net_def
.
mem_arena
().
mem_block
())
{
if
(
device_type
==
DeviceType
::
GPU
)
{
std
::
unique_ptr
<
BufferBase
>
image_buf
(
new
Image
({
mem_block
.
x
(),
mem_block
.
y
()},
dtype
));
preallocated_allocator_
.
SetBuffer
(
mem_block
.
mem_id
(),
std
::
move
(
image_buf
));
// TODO(liuqi): refactor based on PB
if
(
mem_block
.
mem_id
()
>=
20000
)
{
std
::
unique_ptr
<
BufferBase
>
image_buf
(
new
Image
({
mem_block
.
x
(),
mem_block
.
y
()},
dtype
));
preallocated_allocator_
.
SetBuffer
(
mem_block
.
mem_id
(),
std
::
move
(
image_buf
));
}
}
else
{
std
::
unique_ptr
<
BufferBase
>
tensor_buf
(
new
Buffer
(
GetDeviceAllocator
(
device_type
),
mem_block
.
x
()));
preallocated_allocator_
.
SetBuffer
(
mem_block
.
mem_id
(),
std
::
move
(
tensor_buf
));
if
(
mem_block
.
mem_id
()
<
20000
)
{
std
::
unique_ptr
<
BufferBase
>
tensor_buf
(
new
Buffer
(
GetDeviceAllocator
(
device_type
),
mem_block
.
x
()));
preallocated_allocator_
.
SetBuffer
(
mem_block
.
mem_id
(),
std
::
move
(
tensor_buf
));
}
}
}
VLOG
(
3
)
<<
"Preallocate buffer to tensors"
;
for
(
auto
&
op
:
net_def
.
op
())
{
if
(
!
op
.
mem_id
().
empty
())
{
// TODO(liuqi): refactor based on PB
const
int
op_device
=
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
op
,
"device"
,
-
1
);
if
(
op_device
==
device_type
&&
!
op
.
mem_id
().
empty
())
{
auto
mem_ids
=
op
.
mem_id
();
int
count
=
mem_ids
.
size
();
for
(
int
i
=
0
;
i
<
count
;
++
i
)
{
...
...
mace/python/tools/converter.py
浏览文件 @
1617b83f
...
...
@@ -16,6 +16,7 @@ import argparse
import
sys
import
hashlib
import
os.path
import
copy
from
mace.proto
import
mace_pb2
from
mace.python.tools
import
tf_dsp_converter_lib
...
...
@@ -25,6 +26,7 @@ from mace.python.tools.converter_tool import base_converter as cvt
from
mace.python.tools.converter_tool
import
tensorflow_converter
from
mace.python.tools.converter_tool
import
caffe_converter
from
mace.python.tools.converter_tool
import
transformer
from
mace.python.tools.convert_util
import
mace_check
# ./bazel-bin/mace/python/tools/tf_converter --model_file quantized_test.pb \
...
...
@@ -34,11 +36,14 @@ from mace.python.tools.converter_tool import transformer
FLAGS
=
None
data_type_map
=
{
'DT_HALF'
:
mace_pb2
.
DT_HALF
,
'DT_FLOAT'
:
mace_pb2
.
DT_FLOAT
}
device_type_map
=
{
'cpu'
:
mace_pb2
.
CPU
,
'gpu'
:
mace_pb2
.
GPU
,
'dsp'
:
mace_pb2
.
HEXAGON
}
device_data_type_map
=
{
mace_pb2
.
CPU
:
mace_pb2
.
DT_FLOAT
,
mace_pb2
.
GPU
:
mace_pb2
.
DT_HALF
,
mace_pb2
.
HEXAGON
:
mace_pb2
.
DT_UINT8
}
def
file_checksum
(
fname
):
...
...
@@ -81,7 +86,7 @@ def main(unused_args):
if
FLAGS
.
platform
not
in
[
'tensorflow'
,
'caffe'
]:
print
(
"platform %s is not supported."
%
FLAGS
.
platform
)
sys
.
exit
(
-
1
)
if
FLAGS
.
runtime
not
in
[
'cpu'
,
'gpu'
,
'dsp'
]:
if
FLAGS
.
runtime
not
in
[
'cpu'
,
'gpu'
,
'dsp'
,
''
]:
print
(
"runtime %s is not supported."
%
FLAGS
.
runtime
)
sys
.
exit
(
-
1
)
...
...
@@ -95,8 +100,6 @@ def main(unused_args):
sys
.
exit
(
-
1
)
else
:
option
=
cvt
.
ConverterOption
()
option
.
data_type
=
data_type_map
[
FLAGS
.
data_type
]
option
.
device
=
device_type_map
[
FLAGS
.
runtime
]
option
.
winograd_enabled
=
bool
(
FLAGS
.
winograd
)
input_node_names
=
FLAGS
.
input_node
.
split
(
','
)
...
...
@@ -117,8 +120,8 @@ def main(unused_args):
print
(
"Convert model to mace model."
)
if
FLAGS
.
platform
==
'tensorflow'
:
converter
=
tensorflow_converter
.
TensorflowConverter
(
option
,
FLAGS
.
model_file
)
# noqa
converter
=
tensorflow_converter
.
TensorflowConverter
(
option
,
FLAGS
.
model_file
)
elif
FLAGS
.
platform
==
'caffe'
:
converter
=
caffe_converter
.
CaffeConverter
(
option
,
FLAGS
.
model_file
,
...
...
@@ -126,16 +129,49 @@ def main(unused_args):
output_graph_def
=
converter
.
run
()
print
(
"Transform model to one that can better run on device."
)
# TODO(liuqi/liyin): transform gpu/cpu and merge their ops
mace_transformer
=
transformer
.
Transformer
(
option
,
output_graph_def
)
output_graph_def
=
mace_transformer
.
run
()
if
not
FLAGS
.
runtime
:
cpu_graph_def
=
copy
.
deepcopy
(
output_graph_def
)
option
.
device
=
mace_pb2
.
CPU
option
.
data_type
=
device_data_type_map
[
mace_pb2
.
CPU
]
option
.
disable_transpose_filters
()
mace_cpu_transformer
=
transformer
.
Transformer
(
option
,
cpu_graph_def
)
cpu_graph_def
=
mace_cpu_transformer
.
run
()
print
"start optimize cpu memory."
memory_optimizer
.
optimize_cpu_memory
(
cpu_graph_def
)
print
"CPU memory optimization done."
print
"start optimize memory."
if
FLAGS
.
runtime
==
'gpu'
:
memory_optimizer
.
optimize_gpu_memory
(
output_graph_def
)
elif
FLAGS
.
runtime
==
'cpu'
:
memory_optimizer
.
optimize_cpu_memory
(
output_graph_def
)
print
"Memory optimization done."
option
.
device
=
mace_pb2
.
GPU
option
.
data_type
=
device_data_type_map
[
mace_pb2
.
GPU
]
option
.
enable_transpose_filters
()
mace_gpu_transformer
=
transformer
.
Transformer
(
option
,
output_graph_def
)
output_gpu_graph_def
=
mace_gpu_transformer
.
run
()
print
"start optimize gpu memory."
memory_optimizer
.
optimize_gpu_memory
(
output_gpu_graph_def
)
print
"GPU memory optimization done."
print
"Merge cpu and gpu ops together"
output_graph_def
.
op
.
extend
(
cpu_graph_def
.
op
)
output_graph_def
.
mem_arena
.
mem_block
.
extend
(
cpu_graph_def
.
mem_arena
.
mem_block
)
print
"Merge done"
else
:
option
.
device
=
device_type_map
[
FLAGS
.
runtime
]
option
.
data_type
=
device_data_type_map
[
option
.
device
]
mace_transformer
=
transformer
.
Transformer
(
option
,
output_graph_def
)
output_graph_def
=
mace_transformer
.
run
()
print
"start optimize memory."
if
FLAGS
.
runtime
==
'gpu'
:
memory_optimizer
.
optimize_gpu_memory
(
output_graph_def
)
elif
FLAGS
.
runtime
==
'cpu'
:
memory_optimizer
.
optimize_cpu_memory
(
output_graph_def
)
else
:
mace_check
(
False
,
"runtime only support [gpu|cpu|dsp]"
)
print
"Memory optimization done."
if
FLAGS
.
output_type
==
'source'
:
source_converter_lib
.
convert_to_source
(
...
...
@@ -188,7 +224,7 @@ def parse_args():
default
=
""
,
help
=
"File to save the output graph to."
)
parser
.
add_argument
(
"--runtime"
,
type
=
str
,
default
=
"
cpu
"
,
help
=
"Runtime: cpu/gpu/dsp"
)
"--runtime"
,
type
=
str
,
default
=
""
,
help
=
"Runtime: cpu/gpu/dsp"
)
parser
.
add_argument
(
"--input_node"
,
type
=
str
,
...
...
@@ -196,11 +232,6 @@ def parse_args():
help
=
"e.g., input_node"
)
parser
.
add_argument
(
"--output_node"
,
type
=
str
,
default
=
"softmax"
,
help
=
"e.g., softmax"
)
parser
.
add_argument
(
"--data_type"
,
type
=
str
,
default
=
'DT_FLOAT'
,
help
=
"e.g., DT_HALF/DT_FLOAT"
)
parser
.
add_argument
(
"--output_type"
,
type
=
str
,
default
=
"pb"
,
help
=
"output type: source/pb"
)
parser
.
add_argument
(
...
...
mace/python/tools/converter_tool/base_converter.py
浏览文件 @
1617b83f
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
enum
import
Enum
from
mace.proto
import
mace_pb2
...
...
@@ -117,6 +132,27 @@ class MaceKeyword(object):
mace_axis_str
=
'axis'
mace_shape_str
=
'shape'
mace_winograd_filter_transformed
=
'is_filter_transformed'
mace_device
=
'device'
class
TransformerRule
(
Enum
):
REMOVE_IDENTITY_OP
=
0
TRANSFORM_GLOBAL_POOLING
=
1
FOLD_SOFTMAX
=
2
FOLD_BATCHNORM
=
3
,
FOLD_CONV_AND_BN
=
4
,
FOLD_DEPTHWISE_CONV_AND_BN
=
5
,
TRANSFORM_GPU_WINOGRAD
=
6
,
TRANSFORM_ADD_TO_BIASADD
=
7
,
FOLD_BIASADD
=
8
,
FOLD_ACTIVATION
=
9
,
TRANSPOSE_FILTERS
=
10
,
RESHAPE_FC_WEIGHT
=
11
,
TRANSPOSE_DATA_FORMAT
=
12
,
TRANSFORM_GLOBAL_CONV_TO_FC
=
13
,
TRANSFORM_BUFFER_IMAGE
=
14
,
ADD_DEVICE_AND_DATA_TYPE
=
15
,
SORT_BY_EXECUTION
=
16
class
ConverterInterface
(
object
):
...
...
@@ -162,6 +198,25 @@ class ConverterOption(object):
self
.
_data_type
=
mace_pb2
.
DT_FLOAT
self
.
_device
=
mace_pb2
.
CPU
self
.
_winograd_enabled
=
False
self
.
_transformer_option
=
[
TransformerRule
.
REMOVE_IDENTITY_OP
,
TransformerRule
.
TRANSFORM_GLOBAL_POOLING
,
TransformerRule
.
FOLD_SOFTMAX
,
TransformerRule
.
FOLD_BATCHNORM
,
TransformerRule
.
FOLD_CONV_AND_BN
,
TransformerRule
.
FOLD_DEPTHWISE_CONV_AND_BN
,
TransformerRule
.
TRANSFORM_GPU_WINOGRAD
,
TransformerRule
.
TRANSFORM_ADD_TO_BIASADD
,
TransformerRule
.
FOLD_BIASADD
,
TransformerRule
.
FOLD_ACTIVATION
,
TransformerRule
.
TRANSPOSE_FILTERS
,
TransformerRule
.
RESHAPE_FC_WEIGHT
,
TransformerRule
.
TRANSPOSE_DATA_FORMAT
,
TransformerRule
.
TRANSFORM_GLOBAL_CONV_TO_FC
,
TransformerRule
.
TRANSFORM_BUFFER_IMAGE
,
TransformerRule
.
ADD_DEVICE_AND_DATA_TYPE
,
TransformerRule
.
SORT_BY_EXECUTION
,
]
@
property
def
input_nodes
(
self
):
...
...
@@ -183,6 +238,10 @@ class ConverterOption(object):
def
winograd_enabled
(
self
):
return
self
.
_winograd_enabled
@
property
def
transformer_option
(
self
):
return
self
.
_transformer_option
@
input_nodes
.
setter
def
input_nodes
(
self
,
input_nodes
):
for
node
in
input_nodes
:
...
...
@@ -211,6 +270,14 @@ class ConverterOption(object):
def
winograd_enabled
(
self
,
winograd_enabled
):
self
.
_winograd_enabled
=
winograd_enabled
def
disable_transpose_filters
(
self
):
if
TransformerRule
.
TRANSPOSE_FILTERS
in
self
.
_transformer_option
:
self
.
_transformer_option
.
remove
(
TransformerRule
.
TRANSPOSE_FILTERS
)
def
enable_transpose_filters
(
self
):
if
TransformerRule
.
TRANSPOSE_FILTERS
not
in
self
.
_transformer_option
:
self
.
_transformer_option
.
append
(
TransformerRule
.
TRANSPOSE_FILTERS
)
class
ConverterUtil
(
object
):
@
staticmethod
...
...
mace/python/tools/converter_tool/caffe_converter.py
浏览文件 @
1617b83f
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
numpy
as
np
import
google.protobuf.text_format
...
...
@@ -325,10 +340,6 @@ class CaffeConverter(base_converter.ConverterInterface):
op
.
input
.
extend
(
caffe_op
.
layer
.
bottom
)
op
.
output
.
extend
(
caffe_op
.
layer
.
top
)
data_type_arg
=
op
.
arg
.
add
()
data_type_arg
.
name
=
'T'
data_type_arg
.
i
=
self
.
_option
.
data_type
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NCHW
)
return
op
...
...
mace/python/tools/converter_tool/shape_inference.py
浏览文件 @
1617b83f
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
numpy
as
np
...
...
mace/python/tools/converter_tool/tensorflow_converter.py
浏览文件 @
1617b83f
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
numpy
as
np
import
tensorflow
as
tf
...
...
@@ -197,11 +212,6 @@ class TensorflowConverter(base_converter.ConverterInterface):
for
tf_output
in
tf_op
.
outputs
:
output_shape
=
op
.
output_shape
.
add
()
output_shape
.
dims
.
extend
(
tf_output
.
shape
.
as_list
())
op
.
output_type
.
append
(
self
.
_option
.
data_type
)
data_type_arg
=
op
.
arg
.
add
()
data_type_arg
.
name
=
'T'
data_type_arg
.
i
=
self
.
_option
.
data_type
ConverterUtil
.
add_data_format_arg
(
op
,
DataFormat
.
NHWC
)
...
...
@@ -289,7 +299,6 @@ class TensorflowConverter(base_converter.ConverterInterface):
op
.
input
.
extend
([
scale_name
,
offset_name
])
del
op
.
output
[
1
:]
del
op
.
output_shape
[
1
:]
del
op
.
output_type
[
1
:]
def
convert_pooling
(
self
,
tf_op
):
op
=
self
.
convert_general_op
(
tf_op
)
...
...
mace/python/tools/converter_tool/transformer.py
浏览文件 @
1617b83f
# Copyright 2018 Xiaomi, Inc. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
enum
import
numpy
as
np
...
...
@@ -11,6 +26,7 @@ from mace.python.tools.converter_tool.base_converter import FilterFormat
from
mace.python.tools.converter_tool.base_converter
import
MaceOp
from
mace.python.tools.converter_tool.base_converter
import
MaceKeyword
from
mace.python.tools.converter_tool.base_converter
import
ConverterUtil
from
mace.python.tools.converter_tool.base_converter
import
TransformerRule
from
mace.python.tools.convert_util
import
mace_check
OPENCL_IMAGE_MAX_SIZE
=
16384
...
...
@@ -36,23 +52,52 @@ class Transformer(base_converter.ConverterInterface):
def
__init__
(
self
,
option
,
model
):
# DO NOT reorder the following transformers
self
.
_registered_transformers
=
[
self
.
remove_identity_op
,
self
.
transform_global_pooling
,
self
.
fold_softmax
,
self
.
fold_batchnorm
,
self
.
fold_conv_and_bn
,
# data_format related
self
.
fold_depthwise_conv_and_bn
,
# data_format related
self
.
transform_gpu_winograd
,
# data_format related
self
.
transform_add_to_biasadd
,
self
.
fold_biasadd
,
self
.
fold_activation
,
self
.
transpose_filters
,
self
.
transpose_data_format
,
self
.
transform_global_conv_to_fc
,
self
.
transform_buffer_image
,
self
.
sort_by_execution
,
self
.
_registered_transformers_order
=
[
TransformerRule
.
REMOVE_IDENTITY_OP
,
TransformerRule
.
TRANSFORM_GLOBAL_POOLING
,
TransformerRule
.
FOLD_SOFTMAX
,
TransformerRule
.
FOLD_BATCHNORM
,
TransformerRule
.
FOLD_CONV_AND_BN
,
TransformerRule
.
FOLD_DEPTHWISE_CONV_AND_BN
,
TransformerRule
.
TRANSFORM_GPU_WINOGRAD
,
TransformerRule
.
TRANSFORM_ADD_TO_BIASADD
,
TransformerRule
.
FOLD_BIASADD
,
TransformerRule
.
FOLD_ACTIVATION
,
TransformerRule
.
TRANSPOSE_FILTERS
,
TransformerRule
.
RESHAPE_FC_WEIGHT
,
TransformerRule
.
TRANSPOSE_DATA_FORMAT
,
TransformerRule
.
TRANSFORM_GLOBAL_CONV_TO_FC
,
TransformerRule
.
TRANSFORM_BUFFER_IMAGE
,
TransformerRule
.
ADD_DEVICE_AND_DATA_TYPE
,
TransformerRule
.
SORT_BY_EXECUTION
,
]
self
.
_registered_transformers
=
{
TransformerRule
.
REMOVE_IDENTITY_OP
:
self
.
remove_identity_op
,
TransformerRule
.
TRANSFORM_GLOBAL_POOLING
:
self
.
transform_global_pooling
,
TransformerRule
.
FOLD_SOFTMAX
:
self
.
fold_softmax
,
TransformerRule
.
FOLD_BATCHNORM
:
self
.
fold_batchnorm
,
TransformerRule
.
FOLD_CONV_AND_BN
:
self
.
fold_conv_and_bn
,
# data_format related
TransformerRule
.
FOLD_DEPTHWISE_CONV_AND_BN
:
self
.
fold_depthwise_conv_and_bn
,
# data_format related
TransformerRule
.
TRANSFORM_GPU_WINOGRAD
:
self
.
transform_gpu_winograd
,
# data_format related
TransformerRule
.
TRANSFORM_ADD_TO_BIASADD
:
self
.
transform_add_to_biasadd
,
TransformerRule
.
FOLD_BIASADD
:
self
.
fold_biasadd
,
TransformerRule
.
FOLD_ACTIVATION
:
self
.
fold_activation
,
TransformerRule
.
TRANSPOSE_FILTERS
:
self
.
transpose_filters
,
TransformerRule
.
RESHAPE_FC_WEIGHT
:
self
.
reshape_fc_weight
,
TransformerRule
.
TRANSPOSE_DATA_FORMAT
:
self
.
transpose_data_format
,
TransformerRule
.
TRANSFORM_GLOBAL_CONV_TO_FC
:
self
.
transform_global_conv_to_fc
,
TransformerRule
.
TRANSFORM_BUFFER_IMAGE
:
self
.
transform_buffer_image
,
TransformerRule
.
ADD_DEVICE_AND_DATA_TYPE
:
self
.
add_device_and_data_type
,
TransformerRule
.
SORT_BY_EXECUTION
:
self
.
sort_by_execution
,
}
self
.
_option
=
option
self
.
_model
=
model
...
...
@@ -67,12 +112,14 @@ class Transformer(base_converter.ConverterInterface):
self
.
_target_data_format
=
DataFormat
.
NCHW
def
run
(
self
):
for
transformer
in
self
.
_registered_transformers
:
while
True
:
self
.
construct_ops_and_consumers
()
changed
=
transformer
()
if
not
changed
:
break
for
key
in
self
.
_registered_transformers_order
:
if
key
in
self
.
_option
.
transformer_option
:
transformer
=
self
.
_registered_transformers
[
key
]
while
True
:
self
.
construct_ops_and_consumers
()
changed
=
transformer
()
if
not
changed
:
break
return
self
.
_model
...
...
@@ -404,19 +451,16 @@ class Transformer(base_converter.ConverterInterface):
wt_output_shape
.
dims
.
extend
(
[
16
,
in_channels
,
wt_output_width
,
1
])
arg
=
wt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_str
)
\
is
not
None
:
padding_arg
=
wt_op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_str
padding_arg
.
i
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_str
).
i
# noqa
elif
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_values_str
)
is
not
None
:
# noqa
padding_arg
.
i
=
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_str
).
i
elif
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_padding_values_str
)
\
is
not
None
:
padding_arg
=
wt_op
.
arg
.
add
()
padding_arg
.
name
=
MaceKeyword
.
mace_padding_values_str
padding_arg
.
ints
.
extend
(
ConverterUtil
.
get_arg
(
...
...
@@ -432,9 +476,6 @@ class Transformer(base_converter.ConverterInterface):
matmul_output_shape
.
dims
.
extend
(
[
16
,
out_channels
,
wt_output_width
,
1
])
arg
=
matmul_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
arg
=
matmul_op
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_winograd_filter_transformed
arg
.
i
=
1
...
...
@@ -451,9 +492,6 @@ class Transformer(base_converter.ConverterInterface):
iwt_output_shape
=
iwt_op
.
output_shape
.
add
()
iwt_output_shape
.
dims
.
extend
(
op
.
output_shape
[
0
].
dims
)
arg
=
iwt_op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
batch_arg
=
iwt_op
.
arg
.
add
()
batch_arg
.
name
=
'batch'
batch_arg
.
i
=
batch
...
...
@@ -618,10 +656,6 @@ class Transformer(base_converter.ConverterInterface):
dims_arg
.
name
=
MaceKeyword
.
mace_dims_str
dims_arg
.
ints
.
extend
([
0
,
3
,
1
,
2
])
arg
=
op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
for
output_node
in
self
.
_option
.
output_nodes
.
values
():
output_name
=
MaceKeyword
.
mace_output_node_name
\
+
'_'
+
output_node
.
name
...
...
@@ -639,75 +673,43 @@ class Transformer(base_converter.ConverterInterface):
dims_arg
.
name
=
MaceKeyword
.
mace_dims_str
dims_arg
.
ints
.
extend
([
0
,
2
,
3
,
1
])
arg
=
op
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
return
False
def
transpose_filters
(
self
):
net
=
self
.
_model
filter_format
=
self
.
filter_format
()
# TODO(liyin/liuqi): remove this if-condition after combine cpu/gpu
if
self
.
_option
.
device
==
mace_pb2
.
CPU
:
print
(
"Transpose filters to OIHW"
)
# transpose filter to OIHW/MIHW for tensorflow (HWIO/HWIM)
if
filter_format
==
FilterFormat
.
HWIO
:
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
\
or
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
:
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_winograd_filter_transformed
)
is
None
:
# noqa
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
filter
.
dims
)
filter_data
=
filter_data
.
transpose
(
3
,
2
,
0
,
1
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
self
.
set_filter_format
(
FilterFormat
.
OIHW
)
elif
self
.
_option
.
device
==
mace_pb2
.
GPU
:
# TODO(liyin/liuqi): remove this whole logic after combine cpu/gpu
print
(
"Transpose filters to HWOI/HWIM"
)
print
(
"Transpose filters to OIHW"
)
# transpose filter to OIHW/MIHW for tensorflow (HWIO/HWIM)
if
filter_format
==
FilterFormat
.
HWIO
:
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
\
or
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
:
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
filter
.
dims
)
# transpose filter to HWOI/HWIM for
# tensorflow and caffe (OIHW/MIHW)
if
filter_format
==
FilterFormat
.
HWIO
\
and
(
op
.
type
==
MaceOp
.
Conv2D
.
name
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
):
filter_data
=
filter_data
.
transpose
(
0
,
1
,
3
,
2
)
if
ConverterUtil
.
get_arg
(
op
,
MaceKeyword
.
mace_winograd_filter_transformed
)
\
is
None
:
filter
=
self
.
_consts
[
op
.
input
[
1
]]
filter_data
=
np
.
array
(
filter
.
float_data
).
reshape
(
filter
.
dims
)
filter_data
=
filter_data
.
transpose
(
3
,
2
,
0
,
1
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
elif
filter_format
==
FilterFormat
.
OIHW
:
if
op
.
type
==
MaceOp
.
Conv2D
.
name
\
or
op
.
type
==
MaceOp
.
Deconv2D
.
name
:
filter_data
=
filter_data
.
transpose
(
2
,
3
,
0
,
1
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
elif
op
.
type
==
MaceOp
.
DepthwiseConv2d
.
name
:
filter_data
=
filter_data
.
transpose
(
2
,
3
,
1
,
0
)
filter
.
float_data
[:]
=
filter_data
.
flat
filter
.
dims
[:]
=
filter_data
.
shape
if
op
.
type
==
MaceOp
.
FullyConnected
.
name
:
weight
=
self
.
_consts
[
op
.
input
[
1
]]
input_shape
=
list
(
self
.
_producer
[
op
.
input
[
0
]]
.
output_shape
[
0
].
dims
)
weight_shape
=
[
weight
.
dims
[
0
]]
+
input_shape
[
1
:]
# OCHW -> OHWC
weight_data
=
np
.
array
(
weight
.
float_data
).
reshape
(
weight_shape
)
weight_data
=
weight_data
.
transpose
(
0
,
2
,
3
,
1
)
weight
.
float_data
[:]
=
weight_data
.
flat
self
.
set_filter_format
(
FilterFormat
.
HWOI
)
self
.
set_filter_format
(
FilterFormat
.
OIHW
)
return
False
def
reshape_fc_weight
(
self
):
net
=
self
.
_model
for
op
in
net
.
op
:
if
op
.
type
==
MaceOp
.
FullyConnected
.
name
:
weight
=
self
.
_consts
[
op
.
input
[
1
]]
# NCHW
input_shape
=
list
(
self
.
_producer
[
op
.
input
[
0
]]
.
output_shape
[
0
].
dims
)
weight_shape
=
[
weight
.
dims
[
0
]]
+
input_shape
[
1
:]
del
weight
.
dims
[:]
weight
.
dims
.
extend
(
weight_shape
)
return
False
...
...
@@ -727,9 +729,6 @@ class Transformer(base_converter.ConverterInterface):
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_mode
arg
.
i
=
0
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
op
.
input
[
input_idx
]
=
output_name
...
...
@@ -788,9 +787,6 @@ class Transformer(base_converter.ConverterInterface):
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_buffer_type
arg
.
i
=
OpenCLBufferType
.
IN_OUT_CHANNEL
.
value
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
for
output_node
in
self
.
_option
.
output_nodes
.
values
():
output_name
=
MaceKeyword
.
mace_output_node_name
\
...
...
@@ -806,9 +802,6 @@ class Transformer(base_converter.ConverterInterface):
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_buffer_type
arg
.
i
=
OpenCLBufferType
.
IN_OUT_CHANNEL
.
value
arg
=
op_def
.
arg
.
add
()
arg
.
name
=
'T'
arg
.
i
=
self
.
_option
.
data_type
return
False
...
...
@@ -885,6 +878,19 @@ class Transformer(base_converter.ConverterInterface):
in_channels
*
filter_width
*
filter_height
][:]
def
add_device_and_data_type
(
self
):
# TODO(liuqi) add device definition in OperatorDef
net
=
self
.
_model
for
op
in
net
.
op
:
arg
=
op
.
arg
.
add
()
arg
.
name
=
MaceKeyword
.
mace_device
arg
.
i
=
self
.
_option
.
device
data_type_arg
=
op
.
arg
.
add
()
data_type_arg
.
name
=
'T'
data_type_arg
.
i
=
self
.
_option
.
data_type
return
False
def
sort_dfs
(
self
,
op
,
visited
,
sorted_nodes
):
visited
.
update
([
op
.
name
])
if
len
(
op
.
input
)
>
0
:
...
...
mace/python/tools/source_converter_lib.py
浏览文件 @
1617b83f
...
...
@@ -167,7 +167,6 @@ def convert_to_source(net_def, model_checksum, weight_checksum, template_dir,
tensor_info
=
tensor_info
,
tensor
=
t
,
tag
=
model_tag
,
runtime
=
runtime
,
offset
=
offset
,
)
model_data
.
extend
(
tensor_info
.
data
)
...
...
mace/test/mace_api_mt_test.cc
浏览文件 @
1617b83f
...
...
@@ -55,6 +55,7 @@ void BufferToImage(const std::string &input_name,
const
std
::
string
&
output_name
,
const
int
buffer_type
,
const
std
::
vector
<
int
>
&
mem_ids
,
const
DeviceType
device_type
,
NetDef
*
net_def
,
const
int
mode
=
NetMode
::
NORMAL
)
{
OperatorDef
operator_def
;
...
...
@@ -64,6 +65,7 @@ void BufferToImage(const std::string &input_name,
.
Output
(
output_name
)
.
AddIntArg
(
"buffer_type"
,
buffer_type
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"device"
,
static_cast
<
int
>
(
device_type
))
.
AddIntArg
(
"mode"
,
mode
)
.
Finalize
(
&
operator_def
);
...
...
@@ -76,6 +78,7 @@ template <typename T>
void
ImageToBuffer
(
const
std
::
string
&
input_name
,
const
std
::
string
&
output_name
,
const
int
buffer_type
,
const
DeviceType
device_type
,
NetDef
*
net_def
)
{
OperatorDef
operator_def
;
...
...
@@ -84,6 +87,7 @@ void ImageToBuffer(const std::string &input_name,
.
Output
(
output_name
)
.
AddIntArg
(
"buffer_type"
,
buffer_type
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"device"
,
static_cast
<
int
>
(
device_type
))
.
Finalize
(
&
operator_def
);
net_def
->
add_op
()
->
CopyFrom
(
operator_def
);
...
...
@@ -94,6 +98,7 @@ void Conv3x3(const std::string &input_name,
const
std
::
string
&
filter_name
,
const
std
::
string
&
output_name
,
const
std
::
vector
<
int
>
&
mem_ids
,
const
DeviceType
device_type
,
NetDef
*
net_def
)
{
OperatorDef
operator_def
;
ops
::
test
::
OpDefBuilder
(
"Conv2D"
,
"Conv2dOp"
)
...
...
@@ -104,6 +109,7 @@ void Conv3x3(const std::string &input_name,
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"device"
,
static_cast
<
int
>
(
device_type
))
.
Finalize
(
&
operator_def
);
operator_def
.
set_mem_id
(
mem_ids
);
...
...
@@ -113,6 +119,7 @@ void Conv3x3(const std::string &input_name,
template
<
typename
T
>
void
Relu
(
const
std
::
string
&
input_name
,
const
std
::
string
&
output_name
,
const
DeviceType
device_type
,
NetDef
*
net_def
)
{
OperatorDef
operator_def
;
ops
::
test
::
OpDefBuilder
(
"Activation"
,
"ReluTest"
)
...
...
@@ -120,6 +127,7 @@ void Relu(const std::string &input_name,
.
Output
(
output_name
)
.
AddStringArg
(
"activation"
,
"RELU"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"device"
,
static_cast
<
int
>
(
device_type
))
.
Finalize
(
&
operator_def
);
net_def
->
add_op
()
->
CopyFrom
(
operator_def
);
...
...
@@ -195,7 +203,8 @@ std::map<std::string, int> AddMemoryOptimization(
const
std
::
vector
<
std
::
vector
<
int64_t
>>
&
output_shapes
,
NetDef
*
net_def
)
{
std
::
map
<
std
::
string
,
int
>
res
;
int
mem_id
=
0
;
// TODO(liuqi) refactor based on PB
int
mem_id
=
20000
;
size_t
input_shape_size
=
input_shapes
.
size
();
uint32_t
in_mem_block_x
=
0
;
uint32_t
in_mem_block_y
=
0
;
...
...
@@ -269,21 +278,25 @@ void MaceRunFunc(const int in_out_size) {
BufferToImage
<
half
>
(
input_name
,
input_names
[
i
],
mace
::
kernels
::
IN_OUT_CHANNEL
,
{
mem_map
[
input_names
[
i
]]},
device
,
&
net_def
);
}
BufferToImage
<
half
>
(
filter_tensor_name
,
filter_tensor_img_name
,
mace
::
kernels
::
CONV2D_FILTER
,
{},
mace
::
kernels
::
CONV2D_FILTER
,
{},
device
,
&
net_def
,
NetMode
::
INIT
);
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
++
i
)
{
Conv3x3
<
half
>
(
input_names
[
i
],
filter_tensor_img_name
,
output_names
[
i
],
{
mem_map
[
output_names
[
i
]]},
device
,
&
net_def
);
}
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
++
i
)
{
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
output_names
[
i
]);
ImageToBuffer
<
float
>
(
output_names
[
i
],
output_name
,
mace
::
kernels
::
IN_OUT_CHANNEL
,
&
net_def
);
mace
::
kernels
::
IN_OUT_CHANNEL
,
device
,
&
net_def
);
}
const
std
::
string
file_path
=
"/data/local/tmp/mace"
;
...
...
mace/test/mace_api_test.cc
浏览文件 @
1617b83f
...
...
@@ -65,6 +65,7 @@ void BufferToImage(const std::string &input_name,
const
std
::
string
&
output_name
,
const
int
buffer_type
,
const
std
::
vector
<
int
>
&
mem_ids
,
const
DeviceType
device_type
,
NetDef
*
net_def
,
const
int
mode
=
NetMode
::
NORMAL
)
{
OperatorDef
operator_def
;
...
...
@@ -74,6 +75,7 @@ void BufferToImage(const std::string &input_name,
.
Output
(
output_name
)
.
AddIntArg
(
"buffer_type"
,
buffer_type
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"device"
,
static_cast
<
int
>
(
device_type
))
.
AddIntArg
(
"mode"
,
mode
)
.
Finalize
(
&
operator_def
);
...
...
@@ -86,6 +88,7 @@ template <typename T>
void
ImageToBuffer
(
const
std
::
string
&
input_name
,
const
std
::
string
&
output_name
,
const
int
buffer_type
,
const
DeviceType
device_type
,
NetDef
*
net_def
)
{
OperatorDef
operator_def
;
...
...
@@ -94,6 +97,7 @@ void ImageToBuffer(const std::string &input_name,
.
Output
(
output_name
)
.
AddIntArg
(
"buffer_type"
,
buffer_type
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"device"
,
static_cast
<
int
>
(
device_type
))
.
Finalize
(
&
operator_def
);
net_def
->
add_op
()
->
CopyFrom
(
operator_def
);
...
...
@@ -104,6 +108,7 @@ void Conv3x3(const std::string &input_name,
const
std
::
string
&
filter_name
,
const
std
::
string
&
output_name
,
const
std
::
vector
<
int
>
&
mem_ids
,
const
DeviceType
device_type
,
NetDef
*
net_def
)
{
OperatorDef
operator_def
;
ops
::
test
::
OpDefBuilder
(
"Conv2D"
,
"Conv2dOp"
)
...
...
@@ -114,6 +119,7 @@ void Conv3x3(const std::string &input_name,
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"device"
,
static_cast
<
int
>
(
device_type
))
.
Finalize
(
&
operator_def
);
operator_def
.
set_mem_id
(
mem_ids
);
...
...
@@ -123,6 +129,7 @@ void Conv3x3(const std::string &input_name,
template
<
typename
T
>
void
Relu
(
const
std
::
string
&
input_name
,
const
std
::
string
&
output_name
,
const
DeviceType
device_type
,
NetDef
*
net_def
)
{
OperatorDef
operator_def
;
ops
::
test
::
OpDefBuilder
(
"Activation"
,
"ReluTest"
)
...
...
@@ -130,6 +137,7 @@ void Relu(const std::string &input_name,
.
Output
(
output_name
)
.
AddStringArg
(
"activation"
,
"RELU"
)
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
AddIntArg
(
"device"
,
static_cast
<
int
>
(
device_type
))
.
Finalize
(
&
operator_def
);
net_def
->
add_op
()
->
CopyFrom
(
operator_def
);
...
...
@@ -205,7 +213,8 @@ std::map<std::string, int> AddMemoryOptimization(
const
std
::
vector
<
std
::
vector
<
int64_t
>>
&
output_shapes
,
NetDef
*
net_def
)
{
std
::
map
<
std
::
string
,
int
>
res
;
int
mem_id
=
0
;
// TODO(liuqi) refactor based on PB
int
mem_id
=
20000
;
size_t
input_shape_size
=
input_shapes
.
size
();
uint32_t
in_mem_block_x
=
0
;
uint32_t
in_mem_block_y
=
0
;
...
...
@@ -279,21 +288,24 @@ void MaceRun(const int in_out_size,
BufferToImage
<
half
>
(
input_name
,
input_names
[
i
],
mace
::
kernels
::
IN_OUT_CHANNEL
,
{
mem_map
[
input_names
[
i
]]},
device
,
&
net_def
);
}
BufferToImage
<
half
>
(
filter_tensor_name
,
filter_tensor_img_name
,
mace
::
kernels
::
CONV2D_FILTER
,
{},
mace
::
kernels
::
CONV2D_FILTER
,
{},
device
,
&
net_def
,
NetMode
::
INIT
);
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
++
i
)
{
Conv3x3
<
half
>
(
input_names
[
i
],
filter_tensor_img_name
,
output_names
[
i
],
{
mem_map
[
output_names
[
i
]]},
&
net_def
);
device
,
&
net_def
);
}
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
++
i
)
{
std
::
string
output_name
=
MakeString
(
"mace_output_node_"
,
output_names
[
i
]);
ImageToBuffer
<
float
>
(
output_names
[
i
],
output_name
,
mace
::
kernels
::
IN_OUT_CHANNEL
,
&
net_def
);
mace
::
kernels
::
IN_OUT_CHANNEL
,
device
,
&
net_def
);
}
MaceEngine
engine
(
&
net_def
,
device
,
input_names
,
output_names
);
...
...
tools/mace_tools.py
浏览文件 @
1617b83f
...
...
@@ -62,27 +62,23 @@ def get_target_socs(configs):
return
target_socs
def
get_data_and_device_type
(
runtime
):
data_type
=
""
def
parse_device_type
(
runtime
):
device_type
=
""
if
runtime
==
"dsp"
:
data_type
=
"DT_UINT8"
device_type
=
"HEXAGON"
elif
runtime
==
"gpu"
:
data_type
=
"DT_HALF"
device_type
=
"GPU"
elif
runtime
==
"cpu"
:
data_type
=
"DT_FLOAT"
device_type
=
"CPU"
return
d
ata_type
,
d
evice_type
return
device_type
def
get_hexagon_mode
(
configs
):
runtime_list
=
[]
for
model_name
in
configs
[
"models"
]:
model_runtime
=
configs
[
"models"
][
model_name
]
[
"runtime"
]
model_runtime
=
configs
[
"models"
][
model_name
]
.
get
(
"runtime"
,
""
)
runtime_list
.
append
(
model_runtime
.
lower
())
global_runtime
=
""
...
...
@@ -114,7 +110,7 @@ def model_benchmark_stdout_processor(stdout,
abi
,
serialno
,
model_name
,
runtim
e
):
device_typ
e
):
metrics
=
[
0
]
*
3
for
line
in
stdout
.
split
(
'
\n
'
):
line
=
line
.
strip
()
...
...
@@ -138,14 +134,14 @@ def model_benchmark_stdout_processor(stdout,
f
.
write
(
"model_name,device_name,soc,abi,runtime,"
"init,warmup,run_avg
\n
"
)
data_str
=
"{model_name},{device_name},{soc},{abi},{
runtim
e},"
\
data_str
=
"{model_name},{device_name},{soc},{abi},{
device_typ
e},"
\
"{init},{warmup},{run_avg}
\n
"
\
.
format
(
model_name
=
model_name
,
device_name
=
device_name
,
soc
=
target_soc
,
abi
=
abi
,
runtime
=
runtim
e
,
device_type
=
device_typ
e
,
init
=
metrics
[
0
],
warmup
=
metrics
[
1
],
run_avg
=
metrics
[
2
]
...
...
@@ -154,8 +150,7 @@ def model_benchmark_stdout_processor(stdout,
f
.
write
(
data_str
)
def
tuning_run
(
runtime
,
target_abi
,
def
tuning_run
(
target_abi
,
serialno
,
vlog_level
,
embed_model_data
,
...
...
@@ -205,7 +200,7 @@ def tuning_run(runtime,
if
running_round
>
0
and
FLAGS
.
collect_report
:
model_benchmark_stdout_processor
(
stdout
,
target_abi
,
serialno
,
model_name
,
runtim
e
)
stdout
,
target_abi
,
serialno
,
model_name
,
device_typ
e
)
def
build_mace_run_prod
(
hexagon_mode
,
runtime
,
target_abi
,
...
...
@@ -222,7 +217,7 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
strip
=
"never"
debug
=
True
if
runtime
==
"gpu"
:
if
not
runtime
or
runtime
==
"gpu"
:
gen_opencl_and_tuning_code
(
target_abi
,
serialno
,
[],
False
)
sh_commands
.
bazel_build
(
mace_run_target
,
...
...
@@ -234,19 +229,14 @@ def build_mace_run_prod(hexagon_mode, runtime, target_abi,
sh_commands
.
update_mace_run_lib
(
model_output_dir
,
model_name
,
embed_model_data
)
tuning_run
(
runtime
,
target_abi
,
serialno
,
vlog_level
,
embed_model_data
,
device_type
=
parse_device_type
(
"gpu"
)
tuning_run
(
target_abi
,
serialno
,
vlog_level
,
embed_model_data
,
model_output_dir
,
input_nodes
,
output_nodes
,
input_shapes
,
output_shapes
,
model_name
,
device_type
,
running_round
=
0
,
restart_round
=
1
,
out_of_range_check
=
False
,
phone_data_dir
=
phone_data_dir
,
tuning
=
tuning
,
limit_opencl_kernel_time
=
limit_opencl_kernel_time
)
tuning_run
(
runtime
,
target_abi
,
serialno
,
vlog_level
,
embed_model_data
,
model_output_dir
,
input_nodes
,
output_nodes
,
input_shapes
,
output_shapes
,
model_name
,
device_type
,
running_round
=
0
,
restart_round
=
1
,
out_of_range_check
=
True
,
phone_data_dir
=
phone_data_dir
,
tuning
=
False
)
gen_opencl_and_tuning_code
(
target_abi
,
serialno
,
[
model_output_dir
],
True
)
sh_commands
.
bazel_build
(
...
...
@@ -391,8 +381,7 @@ def parse_model_configs():
print
(
"'platform' must be 'tensorflow' or 'caffe'"
)
exit
(
1
)
for
key
in
[
"model_file_path"
,
"model_sha256_checksum"
,
"runtime"
]:
for
key
in
[
"model_file_path"
,
"model_sha256_checksum"
]:
value
=
model_config
.
get
(
key
,
""
)
if
value
==
""
:
print
(
"CONFIG ERROR:"
)
...
...
@@ -529,6 +518,11 @@ def parse_args():
type
=
str
,
default
=
""
,
help
=
"Valgrind command args."
)
parser
.
add_argument
(
"--validation_runtime"
,
type
=
str
,
default
=
"cpu"
,
help
=
"validation runtime."
)
return
parser
.
parse_known_args
()
...
...
@@ -541,9 +535,11 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
print
'==================='
,
model_name
,
'==================='
model_config
=
configs
[
"models"
][
model_name
]
input_file_list
=
model_config
[
"validation_inputs_data"
]
data_type
,
device_type
=
get_data_and_device_type
(
model_config
[
"runtime"
])
model_runtime
=
model_config
.
get
(
"runtime"
,
""
)
model_device_type
=
parse_device_type
(
model_runtime
)
run_device_type
=
model_device_type
if
not
run_device_type
:
run_device_type
=
parse_device_type
(
FLAGS
.
validation_runtime
)
# Create model build directory
model_path_digest
=
md5sum
(
model_config
[
"model_file_path"
])
model_output_base_dir
=
"%s/%s/%s/%s/%s"
%
(
...
...
@@ -581,7 +577,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
if
FLAGS
.
mode
==
"build"
or
FLAGS
.
mode
==
"all"
:
build_mace_run_prod
(
hexagon_mode
,
model_
config
[
"runtime"
]
,
model_
runtime
,
target_abi
,
serialno
,
vlog_level
,
...
...
@@ -592,7 +588,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_config
[
"input_shapes"
],
model_config
[
"output_shapes"
],
model_name
,
device_type
,
model_
device_type
,
FLAGS
.
round
,
FLAGS
.
restart_round
,
FLAGS
.
tuning
,
...
...
@@ -607,8 +603,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
if
FLAGS
.
mode
==
"run"
or
FLAGS
.
mode
==
"validate"
or
\
FLAGS
.
mode
==
"all"
:
tuning_run
(
model_config
[
"runtime"
],
target_abi
,
tuning_run
(
target_abi
,
serialno
,
vlog_level
,
embed_model_data
,
...
...
@@ -618,7 +613,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_config
[
"input_shapes"
],
model_config
[
"output_shapes"
],
model_name
,
device_type
,
run_
device_type
,
FLAGS
.
round
,
FLAGS
.
restart_round
,
FLAGS
.
out_of_range_check
,
...
...
@@ -641,7 +636,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_config
[
"input_shapes"
],
model_config
[
"output_shapes"
],
model_name
,
device_type
,
run_
device_type
,
phone_data_dir
,
FLAGS
.
omp_num_threads
,
FLAGS
.
cpu_affinity_policy
,
...
...
@@ -654,7 +649,7 @@ def process_models(project_name, configs, embed_model_data, vlog_level,
model_file_path
,
weight_file_path
,
model_config
[
"platform"
],
model_config
[
"runtime"
]
,
run_device_type
,
model_config
[
"input_nodes"
],
model_config
[
"output_nodes"
],
model_config
[
"input_shapes"
],
...
...
@@ -746,8 +741,7 @@ def main(unused_args):
for
model_name
in
configs
[
"models"
]:
print
'==================='
,
model_name
,
'==================='
model_config
=
configs
[
"models"
][
model_name
]
data_type
,
device_type
=
get_data_and_device_type
(
model_config
[
"runtime"
])
runtime
=
model_config
.
get
(
"runtime"
,
""
)
# Create model build directory
model_path_digest
=
md5sum
(
model_config
[
"model_file_path"
])
...
...
@@ -778,8 +772,7 @@ def main(unused_args):
model_config
[
"model_sha256_checksum"
],
","
.
join
(
model_config
[
"input_nodes"
]),
","
.
join
(
model_config
[
"output_nodes"
]),
data_type
,
model_config
[
"runtime"
],
runtime
,
model_name
,
":"
.
join
(
model_config
[
"input_shapes"
]),
model_config
[
"dsp_mode"
],
...
...
tools/sh_commands.py
浏览文件 @
1617b83f
...
...
@@ -465,7 +465,6 @@ def gen_model_code(model_codegen_dir,
model_sha256_checksum
,
input_nodes
,
output_nodes
,
data_type
,
runtime
,
model_tag
,
input_shapes
,
...
...
@@ -489,7 +488,6 @@ def gen_model_code(model_codegen_dir,
"--output=%s"
%
model_codegen_dir
+
"/model.cc"
,
"--input_node=%s"
%
input_nodes
,
"--output_node=%s"
%
output_nodes
,
"--data_type=%s"
%
data_type
,
"--runtime=%s"
%
runtime
,
"--output_type=source"
,
"--template=%s"
%
"mace/python/tools"
,
...
...
@@ -703,7 +701,7 @@ def validate_model(abi,
model_file_path
,
weight_file_path
,
platform
,
runtim
e
,
device_typ
e
,
input_nodes
,
output_nodes
,
input_shapes
,
...
...
@@ -727,7 +725,7 @@ def validate_model(abi,
if
platform
==
"tensorflow"
:
validate
(
platform
,
model_file_path
,
""
,
"%s/%s"
%
(
model_output_dir
,
input_file_name
),
"%s/%s"
%
(
model_output_dir
,
output_file_name
),
runtim
e
,
"%s/%s"
%
(
model_output_dir
,
output_file_name
),
device_typ
e
,
":"
.
join
(
input_shapes
),
":"
.
join
(
output_shapes
),
","
.
join
(
input_nodes
),
","
.
join
(
output_nodes
))
elif
platform
==
"caffe"
:
...
...
@@ -743,7 +741,8 @@ def validate_model(abi,
logger
.
error
(
'There is no caffe python module.'
)
validate
(
platform
,
model_file_path
,
weight_file_path
,
"%s/%s"
%
(
model_output_dir
,
input_file_name
),
"%s/%s"
%
(
model_output_dir
,
output_file_name
),
runtime
,
"%s/%s"
%
(
model_output_dir
,
output_file_name
),
device_type
,
":"
.
join
(
input_shapes
),
":"
.
join
(
output_shapes
),
","
.
join
(
input_nodes
),
","
.
join
(
output_nodes
))
elif
caffe_env
==
common
.
CaffeEnvType
.
DOCKER
:
...
...
@@ -806,7 +805,7 @@ def validate_model(abi,
"--weight_file=/mace/%s"
%
weight_file_name
,
"--input_file=/mace/%s"
%
input_file_name
,
"--mace_out_file=/mace/%s"
%
output_file_name
,
"--
mace_runtime=%s"
%
runtim
e
,
"--
device_type=%s"
%
device_typ
e
,
"--input_node=%s"
%
","
.
join
(
input_nodes
),
"--output_node=%s"
%
","
.
join
(
output_nodes
),
"--input_shape=%s"
%
":"
.
join
(
input_shapes
),
...
...
tools/validate.py
浏览文件 @
1617b83f
...
...
@@ -44,7 +44,7 @@ def load_data(file):
return
np
.
empty
([
0
])
def
compare_output
(
platform
,
mace_runtim
e
,
output_name
,
mace_out_value
,
def
compare_output
(
platform
,
device_typ
e
,
output_name
,
mace_out_value
,
out_value
):
if
mace_out_value
.
size
!=
0
:
out_value
=
out_value
.
reshape
(
-
1
)
...
...
@@ -53,9 +53,9 @@ def compare_output(platform, mace_runtime, output_name, mace_out_value,
similarity
=
(
1
-
spatial
.
distance
.
cosine
(
out_value
,
mace_out_value
))
print
output_name
,
'MACE VS'
,
platform
.
upper
(
),
'similarity: '
,
similarity
if
(
mace_runtime
==
"cpu
"
and
similarity
>
0.999
)
or
\
(
mace_runtime
==
"gpu
"
and
similarity
>
0.995
)
or
\
(
mace_runtime
==
"dsp
"
and
similarity
>
0.930
):
if
(
device_type
==
"CPU
"
and
similarity
>
0.999
)
or
\
(
device_type
==
"GPU
"
and
similarity
>
0.995
)
or
\
(
device_type
==
"HEXAGON
"
and
similarity
>
0.930
):
print
'===================Similarity Test Passed=================='
else
:
print
'===================Similarity Test Failed=================='
...
...
@@ -65,7 +65,7 @@ def compare_output(platform, mace_runtime, output_name, mace_out_value,
sys
.
exit
(
-
1
)
def
validate_tf_model
(
platform
,
mace_runtim
e
,
model_file
,
input_file
,
def
validate_tf_model
(
platform
,
device_typ
e
,
model_file
,
input_file
,
mace_out_file
,
input_names
,
input_shapes
,
output_names
):
import
tensorflow
as
tf
if
not
os
.
path
.
isfile
(
model_file
):
...
...
@@ -100,11 +100,11 @@ def validate_tf_model(platform, mace_runtime, model_file, input_file,
output_file_name
=
common
.
formatted_file_name
(
mace_out_file
,
output_names
[
i
])
mace_out_value
=
load_data
(
output_file_name
)
compare_output
(
platform
,
mace_runtim
e
,
output_names
[
i
],
compare_output
(
platform
,
device_typ
e
,
output_names
[
i
],
mace_out_value
,
output_values
[
i
])
def
validate_caffe_model
(
platform
,
mace_runtim
e
,
model_file
,
input_file
,
def
validate_caffe_model
(
platform
,
device_typ
e
,
model_file
,
input_file
,
mace_out_file
,
weight_file
,
input_names
,
input_shapes
,
output_names
,
output_shapes
):
os
.
environ
[
'GLOG_minloglevel'
]
=
'1'
# suprress Caffe verbose prints
...
...
@@ -144,12 +144,12 @@ def validate_caffe_model(platform, mace_runtime, model_file, input_file,
output_file_name
=
common
.
formatted_file_name
(
mace_out_file
,
output_names
[
i
])
mace_out_value
=
load_data
(
output_file_name
)
compare_output
(
platform
,
mace_runtim
e
,
output_names
[
i
],
mace_out_value
,
compare_output
(
platform
,
device_typ
e
,
output_names
[
i
],
mace_out_value
,
value
)
def
validate
(
platform
,
model_file
,
weight_file
,
input_file
,
mace_out_file
,
mace_runtim
e
,
input_shape
,
output_shape
,
input_node
,
output_node
):
device_typ
e
,
input_shape
,
output_shape
,
input_node
,
output_node
):
input_names
=
[
name
for
name
in
input_node
.
split
(
','
)]
input_shape_strs
=
[
shape
for
shape
in
input_shape
.
split
(
':'
)]
input_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
...
...
@@ -158,14 +158,14 @@ def validate(platform, model_file, weight_file, input_file, mace_out_file,
assert
len
(
input_names
)
==
len
(
input_shapes
)
if
platform
==
'tensorflow'
:
validate_tf_model
(
platform
,
mace_runtim
e
,
model_file
,
input_file
,
validate_tf_model
(
platform
,
device_typ
e
,
model_file
,
input_file
,
mace_out_file
,
input_names
,
input_shapes
,
output_names
)
elif
platform
==
'caffe'
:
output_shape_strs
=
[
shape
for
shape
in
output_shape
.
split
(
':'
)]
output_shapes
=
[[
int
(
x
)
for
x
in
shape
.
split
(
','
)]
for
shape
in
output_shape_strs
]
validate_caffe_model
(
platform
,
mace_runtim
e
,
model_file
,
input_file
,
validate_caffe_model
(
platform
,
device_typ
e
,
model_file
,
input_file
,
mace_out_file
,
weight_file
,
input_names
,
input_shapes
,
output_names
,
output_shapes
)
...
...
@@ -194,7 +194,7 @@ def parse_args():
default
=
""
,
help
=
"mace output file to load."
)
parser
.
add_argument
(
"--
mace_runtime"
,
type
=
str
,
default
=
"gpu
"
,
help
=
"mace runtime device."
)
"--
device_type"
,
type
=
str
,
default
=
"
"
,
help
=
"mace runtime device."
)
parser
.
add_argument
(
"--input_shape"
,
type
=
str
,
default
=
"1,64,64,3"
,
help
=
"input shape."
)
parser
.
add_argument
(
...
...
@@ -214,7 +214,7 @@ if __name__ == '__main__':
FLAGS
.
weight_file
,
FLAGS
.
input_file
,
FLAGS
.
mace_out_file
,
FLAGS
.
mace_runtim
e
,
FLAGS
.
device_typ
e
,
FLAGS
.
input_shape
,
FLAGS
.
output_shape
,
FLAGS
.
input_node
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录