Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
冰之2023
Mace
提交
d24d459e
Mace
项目概览
冰之2023
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
提交
d24d459e
编写于
12月 08, 2017
作者:
L
liuqi
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
merge memory_optimizer code to tf_converter_lib.
上级
66a68689
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
102 addition
and
115 deletion
+102
-115
mace/python/tools/memory_optimizer.py
mace/python/tools/memory_optimizer.py
+0
-102
mace/python/tools/tf_converter_lib.py
mace/python/tools/tf_converter_lib.py
+100
-6
tools/validate_gcn.sh
tools/validate_gcn.sh
+2
-7
未找到文件。
mace/python/tools/memory_optimizer.py
已删除
100644 → 0
浏览文件 @
66a68689
import
sys
import
operator
from
mace.proto
import
mace_pb2
class
MemoryOptimizer
(
object
):
def
__init__
(
self
,
net_def
):
self
.
net_def
=
net_def
self
.
idle_mem
=
set
()
self
.
op_mem
=
{}
# op_name->mem_id
self
.
mem_block
=
{}
# mem_id->[x, y]
self
.
total_mem_count
=
0
self
.
ref_counter
=
{}
consumers
=
{}
for
op
in
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
for
ipt
in
op
.
input
:
if
ipt
not
in
consumers
:
consumers
[
ipt
]
=
[]
consumers
[
ipt
].
append
(
op
)
# only ref op's output tensor
for
op
in
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
tensor_name
=
self
.
_op_to_tensor
(
op
)
if
tensor_name
in
consumers
:
self
.
ref_counter
[
tensor_name
]
=
len
(
consumers
[
tensor_name
])
else
:
self
.
ref_counter
[
tensor_name
]
=
0
def
_op_to_tensor
(
self
,
op
):
return
op
.
name
+
':0'
def
is_buffer_image_op
(
self
,
op
):
return
op
.
type
==
'BufferToImage'
or
op
.
type
==
'ImageToBuffer'
def
optimize
(
self
):
for
op
in
self
.
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
if
len
(
self
.
idle_mem
)
==
0
:
# allocate new mem
mem_id
=
self
.
total_mem_count
self
.
total_mem_count
+=
1
else
:
# reuse mem
mem_id
=
self
.
idle_mem
.
pop
()
op
.
mem_id
=
mem_id
self
.
op_mem
[
self
.
_op_to_tensor
(
op
)]
=
mem_id
if
mem_id
not
in
self
.
mem_block
:
self
.
mem_block
[
mem_id
]
=
[
0
,
0
]
mem_size
=
self
.
mem_block
[
mem_id
]
mem_size
[
1
]
=
max
(
mem_size
[
1
],
op
.
output_shape
[
0
].
dims
[
0
]
*
op
.
output_shape
[
0
].
dims
[
1
])
mem_size
[
0
]
=
max
(
mem_size
[
0
],
op
.
output_shape
[
0
].
dims
[
2
]
*
(
op
.
output_shape
[
0
].
dims
[
3
]
+
3
)
/
4
)
# de-ref input tensor mem
for
ipt
in
op
.
input
:
if
ipt
in
self
.
ref_counter
:
self
.
ref_counter
[
ipt
]
-=
1
if
self
.
ref_counter
[
ipt
]
==
0
:
self
.
idle_mem
.
add
(
self
.
op_mem
[
ipt
])
elif
self
.
ref_counter
[
ipt
]
<
0
:
raise
Exception
(
'ref count is less than 0'
)
for
mem
in
self
.
mem_block
:
arena
=
net_def
.
mem_arena
block
=
arena
.
mem_block
.
add
()
block
.
mem_id
=
mem
block
.
x
=
self
.
mem_block
[
mem
][
0
]
block
.
y
=
self
.
mem_block
[
mem
][
1
]
print
(
'total op: %d'
,
len
(
self
.
net_def
.
op
))
origin_mem_size
=
0
optimized_mem_size
=
0
for
op
in
self
.
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
origin_mem_size
+=
reduce
(
operator
.
mul
,
op
.
output_shape
[
0
].
dims
,
1
)
for
mem
in
self
.
mem_block
:
optimized_mem_size
+=
reduce
(
operator
.
mul
,
self
.
mem_block
[
mem
],
4
)
print
(
'origin mem: %d, optimized mem: %d'
,
origin_mem_size
,
optimized_mem_size
)
if
__name__
==
'__main__'
:
model_file
=
sys
.
argv
[
1
]
opt_model_file
=
sys
.
argv
[
2
]
with
open
(
model_file
,
"rb"
)
as
f
:
net_def
=
mace_pb2
.
NetDef
()
net_def
.
ParseFromString
(
f
.
read
())
optimizer
=
MemoryOptimizer
(
net_def
)
optimizer
.
optimize
()
with
open
(
opt_model_file
,
"wb"
)
as
f
:
f
.
write
(
net_def
.
SerializeToString
())
with
open
(
opt_model_file
+
'_txt'
,
"wb"
)
as
f
:
net_def
.
ClearField
(
'tensors'
)
f
.
write
(
str
(
net_def
))
mace/python/tools/tf_converter_lib.py
浏览文件 @
d24d459e
import
operator
import
sys
from
mace.proto
import
mace_pb2
import
tensorflow
as
tf
import
numpy
as
np
...
...
@@ -44,13 +46,18 @@ class TFConverter(object):
self
.
tf_graph
=
{}
self
.
resolved_ops
=
{}
self
.
idle_mem
=
set
()
self
.
op_mem
=
{}
# op_name->mem_id
self
.
mem_block
=
{}
# mem_id->[x, y]
self
.
total_mem_count
=
0
self
.
ref_counter
=
{}
for
op
in
tf_ops
:
self
.
resolved_ops
[
op
.
name
]
=
0
for
input
in
op
.
inputs
:
input_name
=
input
.
name
[:
-
2
]
if
input_name
not
in
self
.
tf_graph
:
self
.
tf_graph
[
input_name
]
=
[]
print
input_name
self
.
tf_graph
[
input_name
].
append
(
op
)
def
add_buffer_to_image
(
self
,
input_name
,
input_type
):
...
...
@@ -104,7 +111,7 @@ class TFConverter(object):
def
add_output_shape
(
outputs
,
op
):
output_shapes
=
[]
for
output
in
outputs
:
if
output
.
shape
is
not
None
and
not
output
.
shap
e
:
if
output
.
shape
.
num_elements
()
is
not
Non
e
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
output
.
shape
.
as_list
())
output_shapes
.
append
(
output_shape
)
...
...
@@ -209,12 +216,21 @@ class TFConverter(object):
def
convert_batchnorm
(
self
,
op
):
bn_ops
=
[]
bn_ops
.
append
(
op
)
for
i
in
range
(
1
,
7
):
for
i
in
range
(
1
,
3
):
if
len
(
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
])
==
1
\
and
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
][
0
].
type
==
BATCH_NORM_ORDER
[
i
]:
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
i
-
1
].
name
][
0
])
else
:
raise
Exception
(
'Invalid BatchNorm Op'
)
if
len
(
self
.
tf_graph
[
bn_ops
[
2
].
name
])
==
2
\
and
self
.
tf_graph
[
bn_ops
[
2
].
name
][
0
].
type
==
BATCH_NORM_ORDER
[
3
]
\
and
self
.
tf_graph
[
bn_ops
[
2
].
name
][
1
].
type
==
BATCH_NORM_ORDER
[
4
]:
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
2
].
name
][
0
])
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
2
].
name
][
1
])
else
:
raise
Exception
(
'Invalid BatchNorm Op'
)
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
4
].
name
][
0
])
bn_ops
.
append
(
self
.
tf_graph
[
bn_ops
[
3
].
name
][
0
])
op_def
=
mace_pb2
.
OperatorDef
()
arg
=
op_def
.
arg
.
add
()
...
...
@@ -246,7 +262,7 @@ class TFConverter(object):
data_format_arg
.
s
=
'NHWC'
self
.
net_def
.
op
.
extend
([
op_def
])
for
i
in
range
(
1
,
7
):
for
i
in
range
(
0
,
7
):
self
.
resolved_ops
[
bn_ops
[
i
].
name
]
=
1
def
convert_pooling
(
self
,
op
):
...
...
@@ -408,6 +424,83 @@ class TFConverter(object):
if
self
.
resolved_ops
[
key
]
!=
1
:
print
'Unresolve Op: %s'
%
key
@
staticmethod
def
_op_to_tensor
(
op
):
return
op
.
name
+
':0'
@
staticmethod
def
is_buffer_image_op
(
op
):
return
op
.
type
==
'BufferToImage'
or
op
.
type
==
'ImageToBuffer'
def
optimize
(
self
):
consumers
=
{}
for
op
in
self
.
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
for
ipt
in
op
.
input
:
if
ipt
not
in
consumers
:
consumers
[
ipt
]
=
[]
consumers
[
ipt
].
append
(
op
)
# only ref op's output tensor
for
op
in
self
.
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
tensor_name
=
self
.
_op_to_tensor
(
op
)
if
tensor_name
in
consumers
:
self
.
ref_counter
[
tensor_name
]
=
len
(
consumers
[
tensor_name
])
else
:
self
.
ref_counter
[
tensor_name
]
=
0
for
op
in
self
.
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
if
not
op
.
output_shape
:
print
"Op %s don't have output shape information, No way to optimize memory."
%
op
.
name
return
if
len
(
self
.
idle_mem
)
==
0
:
# allocate new mem
mem_id
=
self
.
total_mem_count
self
.
total_mem_count
+=
1
else
:
# reuse mem
mem_id
=
self
.
idle_mem
.
pop
()
op
.
mem_id
=
mem_id
self
.
op_mem
[
self
.
_op_to_tensor
(
op
)]
=
mem_id
if
mem_id
not
in
self
.
mem_block
:
self
.
mem_block
[
mem_id
]
=
[
0
,
0
]
mem_size
=
self
.
mem_block
[
mem_id
]
mem_size
[
1
]
=
max
(
mem_size
[
1
],
op
.
output_shape
[
0
].
dims
[
0
]
*
op
.
output_shape
[
0
].
dims
[
1
])
mem_size
[
0
]
=
max
(
mem_size
[
0
],
op
.
output_shape
[
0
].
dims
[
2
]
*
(
op
.
output_shape
[
0
].
dims
[
3
]
+
3
)
/
4
)
# de-ref input tensor mem
for
ipt
in
op
.
input
:
if
ipt
in
self
.
ref_counter
:
self
.
ref_counter
[
ipt
]
-=
1
if
self
.
ref_counter
[
ipt
]
==
0
:
self
.
idle_mem
.
add
(
self
.
op_mem
[
ipt
])
elif
self
.
ref_counter
[
ipt
]
<
0
:
raise
Exception
(
'ref count is less than 0'
)
for
mem
in
self
.
mem_block
:
arena
=
self
.
net_def
.
mem_arena
block
=
arena
.
mem_block
.
add
()
block
.
mem_id
=
mem
block
.
x
=
self
.
mem_block
[
mem
][
0
]
block
.
y
=
self
.
mem_block
[
mem
][
1
]
print
(
'total op: %d'
,
len
(
self
.
net_def
.
op
))
origin_mem_size
=
0
optimized_mem_size
=
0
for
op
in
self
.
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
origin_mem_size
+=
reduce
(
operator
.
mul
,
op
.
output_shape
[
0
].
dims
,
1
)
for
mem
in
self
.
mem_block
:
optimized_mem_size
+=
reduce
(
operator
.
mul
,
self
.
mem_block
[
mem
],
4
)
print
(
'origin mem: %d, optimized mem: %d'
,
origin_mem_size
,
optimized_mem_size
)
def
convert_to_mace_pb
(
input_graph_def
,
input_node
,
output_node
,
data_type
,
device
):
net_def
=
mace_pb2
.
NetDef
()
dt
=
data_type_map
[
data_type
]
...
...
@@ -418,7 +511,8 @@ def convert_to_mace_pb(input_graph_def, input_node, output_node, data_type, devi
ops
=
graph
.
get_operations
()
converter
=
TFConverter
(
ops
,
net_def
,
dt
,
device
)
converter
.
convert
(
input_node
,
output_node
)
print
"PB Parsed."
print
"PB Converted, start optimize memory."
converter
.
optimize
()
print
"Memory optimization done."
return
net_def
tools/validate_gcn.sh
浏览文件 @
d24d459e
...
...
@@ -13,7 +13,6 @@ fi
TF_MODEL_FILE_PATH
=
$1
MODEL_DIR
=
$(
dirname
${
TF_MODEL_FILE_PATH
}
)
MACE_MODEL_NAME
=
'mace_model.pb'
MACE_OPT_MODEL_NAME
=
'mace_opt_model.pb'
INPUT_FILE_NAME
=
'model_input'
OUTPUT_FILE_NAME
=
'gcn.out'
OUTPUT_LIST_FILE
=
'gcn.list'
...
...
@@ -36,10 +35,6 @@ bazel-bin/mace/python/tools/tf_converter --input=${TF_MODEL_FILE_PATH} \
--output_node
=
GCN/br_result_2/fcn_br
\
--data_type
=
DT_HALF
\
--runtime
=
gpu
bazel build mace/python/tools:memory_optimizer
bazel-bin/mace/python/tools/memory_optimizer
${
MODEL_DIR
}
/
${
MACE_MODEL_NAME
}
\
${
MODEL_DIR
}
/
${
MACE_OPT_MODEL_NAME
}
# Step 3: Run model on the phone
echo
"Step 3: Run model on the phone"
...
...
@@ -51,7 +46,7 @@ bazel build -c opt --strip always mace/examples:mace_run \
adb shell
"mkdir -p
${
PHONE_DATA_DIR
}
"
adb shell
"mkdir -p
${
KERNEL_DIR
}
"
adb push mace/kernels/opencl/cl/
*
${
KERNEL_DIR
}
adb push
${
MODEL_DIR
}
/
${
MACE_
OPT_
MODEL_NAME
}
${
PHONE_DATA_DIR
}
adb push
${
MODEL_DIR
}
/
${
MACE_MODEL_NAME
}
${
PHONE_DATA_DIR
}
adb push
${
MODEL_DIR
}
/
${
INPUT_FILE_NAME
}
${
PHONE_DATA_DIR
}
adb push bazel-bin/mace/examples/mace_run
${
PHONE_DATA_DIR
}
...
...
@@ -62,7 +57,7 @@ adb </dev/null shell MACE_CPP_MIN_VLOG_LEVEL=0 \
MACE_KERNEL_PATH
=
$KERNEL_DIR
\
OMP_NUM_THREADS
=
$num_threads
\
${
PHONE_DATA_DIR
}
/mace_run
\
--model
=
${
PHONE_DATA_DIR
}
/
${
MACE_
OPT_
MODEL_NAME
}
\
--model
=
${
PHONE_DATA_DIR
}
/
${
MACE_MODEL_NAME
}
\
--input
=
mace_input_node
\
--output
=
mace_output_node
\
--input_shape
=
"1,
${
IMAGE_SIZE
}
,
${
IMAGE_SIZE
}
,3"
\
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录