Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
慢慢CG
Mace
提交
1d924255
Mace
项目概览
慢慢CG
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
1d924255
编写于
12月 11, 2017
作者:
Y
yejianwu
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'master' of v9.git.n.xiaomi.com:deep-learning/mace into gen_opencl_kernel_binary
上级
154a47dc
800eb69a
变更
12
隐藏空白更改
内联
并排
Showing
12 changed file
with
271 addition
and
73 deletion
+271
-73
mace/core/operator.h
mace/core/operator.h
+7
-2
mace/core/tensor.h
mace/core/tensor.h
+22
-5
mace/core/workspace.cc
mace/core/workspace.cc
+29
-1
mace/core/workspace.h
mace/core/workspace.h
+7
-1
mace/examples/mace_run.cc
mace/examples/mace_run.cc
+3
-0
mace/proto/mace.proto
mace/proto/mace.proto
+4
-0
mace/python/tools/BUILD
mace/python/tools/BUILD
+12
-1
mace/python/tools/convert_util.py
mace/python/tools/convert_util.py
+29
-0
mace/python/tools/memory_optimizer.py
mace/python/tools/memory_optimizer.py
+102
-0
mace/python/tools/tf_converter_lib.py
mace/python/tools/tf_converter_lib.py
+24
-56
mace/python/tools/tf_dsp_converter_lib.py
mace/python/tools/tf_dsp_converter_lib.py
+21
-1
tools/validate_gcn.sh
tools/validate_gcn.sh
+11
-6
未找到文件。
mace/core/operator.h
浏览文件 @
1d924255
...
...
@@ -91,8 +91,13 @@ class Operator : public OperatorBase {
}
for
(
const
string
&
output_str
:
operator_def
.
output
())
{
outputs_
.
push_back
(
MACE_CHECK_NOTNULL
(
ws
->
CreateTensor
(
output_str
,
GetDeviceAllocator
(
D
),
DataTypeToEnum
<
T
>::
v
())));
if
(
ws
->
HasTensor
(
output_str
))
{
Tensor
*
found_tensor
=
ws
->
GetTensor
(
output_str
);
outputs_
.
push_back
(
ws
->
GetTensor
(
output_str
));
}
else
{
outputs_
.
push_back
(
MACE_CHECK_NOTNULL
(
ws
->
CreateTensor
(
output_str
,
GetDeviceAllocator
(
D
),
DataTypeToEnum
<
T
>::
v
())));
}
}
}
virtual
bool
Run
()
override
=
0
;
...
...
mace/core/tensor.h
浏览文件 @
1d924255
...
...
@@ -199,14 +199,20 @@ class Tensor {
size_
=
size
;
MACE_CHECK
(
data_
==
nullptr
,
"Buffer must be unmapped before resize"
);
if
(
is_image_
)
{
alloc_
->
DeleteImage
(
buffer_
);
}
else
{
if
(
is_image_
&&
!
image_shape_
.
empty
())
{
MACE_ASSERT
(
image_shape_
.
size
()
==
2
&&
image_shape_
[
0
]
>=
image_shape
[
0
]
||
image_shape_
[
1
]
>=
image_shape
[
1
],
"image shape not large enough"
);
}
if
(
!
is_image_
&&
buffer_
!=
nullptr
)
{
alloc_
->
Delete
(
buffer_
);
}
is_image_
=
true
;
image_shape_
=
image_shape
;
buffer_
=
alloc_
->
NewImage
(
image_shape
,
dtype_
);
if
(
image_shape_
.
empty
())
{
image_shape_
=
image_shape
;
buffer_
=
alloc_
->
NewImage
(
image_shape
,
dtype_
);
}
}
}
...
...
@@ -226,6 +232,17 @@ class Tensor {
}
}
inline
void
AllocateImageMemory
(
const
std
::
vector
<
size_t
>
&
image_shape
)
{
is_image_
=
true
;
if
(
image_shape_
!=
image_shape
)
{
if
(
buffer_
!=
nullptr
)
{
alloc_
->
DeleteImage
(
buffer_
);
}
image_shape_
=
image_shape
;
buffer_
=
alloc_
->
NewImage
(
image_shape
,
dtype_
);
}
}
template
<
typename
T
>
inline
void
Copy
(
const
T
*
src
,
index_t
size
)
{
MACE_CHECK
(
size
==
size_
,
"copy src and dst with different size."
);
...
...
mace/core/workspace.cc
浏览文件 @
1d924255
...
...
@@ -3,8 +3,8 @@
//
#include "mace/core/workspace.h"
#include "mace/core/common.h"
#include "mace/core/serializer.h"
#include "mace/core/proto_utils.h"
namespace
mace
{
...
...
@@ -63,6 +63,34 @@ void Workspace::LoadModelTensor(const NetDef &net_def, DeviceType type) {
tensor_map_
[
tensor_proto
.
name
()]
=
serializer
.
Deserialize
(
tensor_proto
,
type
);
}
if
(
type
==
DeviceType
::
OPENCL
)
{
CreateImageOutputTensor
(
net_def
);
}
}
void
Workspace
::
CreateImageOutputTensor
(
const
NetDef
&
net_def
)
{
if
(
!
net_def
.
has_mem_arena
()
||
net_def
.
mem_arena
().
mem_block_size
()
==
0
)
{
return
;
}
std
::
map
<
std
::
string
,
std
::
shared_ptr
<
Tensor
>>
mem_tensor_map
;
const
DataType
dtype
=
static_cast
<
DataType
>
(
ArgumentHelper
::
GetSingleArgument
<
OperatorDef
,
int
>
(
net_def
.
op
(
0
),
"T"
,
static_cast
<
int
>
(
DT_FLOAT
)));
for
(
auto
&
mem_block
:
net_def
.
mem_arena
().
mem_block
())
{
string
mem_block_name
=
MemBlockName
(
mem_block
.
mem_id
());
mem_tensor_map
[
mem_block_name
].
reset
(
new
Tensor
(
GetDeviceAllocator
(
DeviceType
::
OPENCL
),
dtype
));
mem_tensor_map
[
mem_block_name
]
->
AllocateImageMemory
({
mem_block
.
x
(),
mem_block
.
y
()});
}
for
(
auto
&
op
:
net_def
.
op
())
{
if
(
op
.
has_mem_id
())
{
tensor_map_
[
op
.
output
(
0
)]
=
mem_tensor_map
[
MemBlockName
(
op
.
mem_id
())];
}
}
}
}
// namespace mace
\ No newline at end of file
mace/core/workspace.h
浏览文件 @
1d924255
...
...
@@ -13,7 +13,7 @@ namespace mace {
class
Workspace
{
public:
typedef
map
<
string
,
unique
_ptr
<
Tensor
>>
TensorMap
;
typedef
map
<
string
,
std
::
shared
_ptr
<
Tensor
>>
TensorMap
;
Workspace
()
{}
...
...
@@ -33,7 +33,13 @@ class Workspace {
void
LoadModelTensor
(
const
NetDef
&
net_def
,
DeviceType
type
);
inline
std
::
string
MemBlockName
(
int
mem_id
)
const
{
return
internal
::
MakeString
(
"mem_block_"
,
mem_id
);
};
private:
void
CreateImageOutputTensor
(
const
NetDef
&
net_def
);
TensorMap
tensor_map_
;
DISABLE_COPY_AND_ASSIGN
(
Workspace
);
...
...
mace/examples/mace_run.cc
浏览文件 @
1d924255
...
...
@@ -101,9 +101,12 @@ int main(int argc, char **argv) {
}
// Init model
VLOG
(
0
)
<<
"Run init"
;
auto
net
=
CreateNet
(
net_def
,
&
ws
,
device_type
,
NetMode
::
INIT
);
net
->
Run
();
VLOG
(
0
)
<<
"Run model"
;
// run model
net
=
CreateNet
(
net_def
,
&
ws
,
device_type
);
...
...
mace/proto/mace.proto
浏览文件 @
1d924255
...
...
@@ -83,6 +83,7 @@ message OperatorDef {
optional
string
type
=
4
;
repeated
Argument
arg
=
5
;
repeated
OutputShape
output_shape
=
6
;
repeated
DataType
output_type
=
7
;
// Memory optimization: only support one single output op
optional
int32
mem_id
=
10
[
default
=
-
1
];
...
...
@@ -128,6 +129,9 @@ message NetDef {
repeated
Argument
arg
=
4
;
repeated
TensorProto
tensors
=
5
;
// for mem optimization
optional
MemoryArena
mem_arena
=
10
;
// for hexagon mace-nnlib
repeated
InputInfo
input_info
=
100
;
repeated
OutputInfo
output_info
=
101
;
...
...
mace/python/tools/BUILD
浏览文件 @
1d924255
py_library
(
name
=
"tf_converter_lib"
,
srcs
=
[
"convert_util.py"
,
"graph_util.py"
,
"tf_converter_lib.py"
,
"tf_dsp_converter_lib.py"
,
"graph_util.py"
],
],
srcs_version
=
"PY2AND3"
,
deps
=
[
"//mace/proto:mace_py"
,
...
...
@@ -20,6 +22,15 @@ py_binary(
],
)
py_binary
(
name
=
"memory_optimizer"
,
srcs
=
[
"memory_optimizer.py"
],
srcs_version
=
"PY2AND3"
,
deps
=
[
"//mace/proto:mace_py"
,
],
)
py_binary
(
name
=
"tf_ops_stats"
,
srcs
=
[
"tf_ops_stats.py"
],
...
...
mace/python/tools/convert_util.py
0 → 100644
浏览文件 @
1d924255
import
tensorflow
as
tf
from
mace.proto
import
mace_pb2
TF_DTYPE_2_MACE_DTYPE_MAP
=
{
tf
.
float32
:
mace_pb2
.
DT_FLOAT
,
tf
.
double
:
mace_pb2
.
DT_DOUBLE
,
tf
.
half
:
mace_pb2
.
DT_HALF
,
tf
.
int64
:
mace_pb2
.
DT_INT64
,
tf
.
int32
:
mace_pb2
.
DT_INT32
,
tf
.
qint32
:
mace_pb2
.
DT_INT32
,
tf
.
int16
:
mace_pb2
.
DT_INT16
,
tf
.
qint16
:
mace_pb2
.
DT_INT16
,
tf
.
int8
:
mace_pb2
.
DT_INT8
,
tf
.
qint8
:
mace_pb2
.
DT_INT8
,
tf
.
quint16
:
mace_pb2
.
DT_UINT16
,
tf
.
uint16
:
mace_pb2
.
DT_UINT16
,
tf
.
quint8
:
mace_pb2
.
DT_UINT8
,
tf
.
uint8
:
mace_pb2
.
DT_UINT8
,
tf
.
string
:
mace_pb2
.
DT_STRING
,
tf
.
bool
:
mace_pb2
.
DT_BOOL
,
}
def
tf_dtype_2_mace_dtype
(
tf_dtype
):
mace_dtype
=
TF_DTYPE_2_MACE_DTYPE_MAP
.
get
(
tf_dtype
,
None
)
if
not
mace_dtype
:
raise
Exception
(
"Not supported tensorflow dtype: "
+
tf_dtype
)
return
mace_dtype
mace/python/tools/memory_optimizer.py
0 → 100644
浏览文件 @
1d924255
import
sys
import
operator
from
mace.proto
import
mace_pb2
class
MemoryOptimizer
(
object
):
def
__init__
(
self
,
net_def
):
self
.
net_def
=
net_def
self
.
idle_mem
=
set
()
self
.
op_mem
=
{}
# op_name->mem_id
self
.
mem_block
=
{}
# mem_id->[x, y]
self
.
total_mem_count
=
0
self
.
ref_counter
=
{}
consumers
=
{}
for
op
in
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
for
ipt
in
op
.
input
:
if
ipt
not
in
consumers
:
consumers
[
ipt
]
=
[]
consumers
[
ipt
].
append
(
op
)
# only ref op's output tensor
for
op
in
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
tensor_name
=
self
.
_op_to_tensor
(
op
)
if
tensor_name
in
consumers
:
self
.
ref_counter
[
tensor_name
]
=
len
(
consumers
[
tensor_name
])
else
:
self
.
ref_counter
[
tensor_name
]
=
0
def
_op_to_tensor
(
self
,
op
):
return
op
.
name
+
':0'
def
is_buffer_image_op
(
self
,
op
):
return
op
.
type
==
'BufferToImage'
or
op
.
type
==
'ImageToBuffer'
def
optimize
(
self
):
for
op
in
self
.
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
if
len
(
self
.
idle_mem
)
==
0
:
# allocate new mem
mem_id
=
self
.
total_mem_count
self
.
total_mem_count
+=
1
else
:
# reuse mem
mem_id
=
self
.
idle_mem
.
pop
()
op
.
mem_id
=
mem_id
self
.
op_mem
[
self
.
_op_to_tensor
(
op
)]
=
mem_id
if
mem_id
not
in
self
.
mem_block
:
self
.
mem_block
[
mem_id
]
=
[
0
,
0
]
mem_size
=
self
.
mem_block
[
mem_id
]
mem_size
[
1
]
=
max
(
mem_size
[
1
],
op
.
output_shape
[
0
].
dims
[
0
]
*
op
.
output_shape
[
0
].
dims
[
1
])
mem_size
[
0
]
=
max
(
mem_size
[
0
],
op
.
output_shape
[
0
].
dims
[
2
]
*
(
op
.
output_shape
[
0
].
dims
[
3
]
+
3
)
/
4
)
# de-ref input tensor mem
for
ipt
in
op
.
input
:
if
ipt
in
self
.
ref_counter
:
self
.
ref_counter
[
ipt
]
-=
1
if
self
.
ref_counter
[
ipt
]
==
0
:
self
.
idle_mem
.
add
(
self
.
op_mem
[
ipt
])
elif
self
.
ref_counter
[
ipt
]
<
0
:
raise
Exception
(
'ref count is less than 0'
)
for
mem
in
self
.
mem_block
:
arena
=
net_def
.
mem_arena
block
=
arena
.
mem_block
.
add
()
block
.
mem_id
=
mem
block
.
x
=
self
.
mem_block
[
mem
][
0
]
block
.
y
=
self
.
mem_block
[
mem
][
1
]
print
(
'total op: %d'
,
len
(
self
.
net_def
.
op
))
origin_mem_size
=
0
optimized_mem_size
=
0
for
op
in
self
.
net_def
.
op
:
if
self
.
is_buffer_image_op
(
op
):
continue
origin_mem_size
+=
reduce
(
operator
.
mul
,
op
.
output_shape
[
0
].
dims
,
1
)
for
mem
in
self
.
mem_block
:
optimized_mem_size
+=
reduce
(
operator
.
mul
,
self
.
mem_block
[
mem
],
4
)
print
(
'origin mem: %d, optimized mem: %d'
,
origin_mem_size
,
optimized_mem_size
)
if
__name__
==
'__main__'
:
model_file
=
sys
.
argv
[
1
]
opt_model_file
=
sys
.
argv
[
2
]
with
open
(
model_file
,
"rb"
)
as
f
:
net_def
=
mace_pb2
.
NetDef
()
net_def
.
ParseFromString
(
f
.
read
())
optimizer
=
MemoryOptimizer
(
net_def
)
optimizer
.
optimize
()
with
open
(
opt_model_file
,
"wb"
)
as
f
:
f
.
write
(
net_def
.
SerializeToString
())
with
open
(
opt_model_file
+
'_txt'
,
"wb"
)
as
f
:
net_def
.
ClearField
(
'tensors'
)
f
.
write
(
str
(
net_def
))
mace/python/tools/tf_converter_lib.py
浏览文件 @
1d924255
from
mace.proto
import
mace_pb2
import
tensorflow
as
tf
import
numpy
as
np
from
mace.python.tools.convert_util
import
tf_dtype_2_mace_dtype
# TODO: support NCHW formt, now only support NHWC.
padding_mode
=
{
...
...
@@ -110,6 +111,19 @@ def add_output_transform(name, net_def):
epsilon_arg
.
name
=
'buffer_type'
epsilon_arg
.
i
=
buffer_type_map
[
'IN_OUT'
]
def
convert_op_outputs
(
mace_op_def
,
tf_op
):
mace_op_def
.
output
.
extend
([
output
.
name
for
output
in
tf_op
.
outputs
])
mace_op_def
.
output_type
.
extend
([
tf_dtype_2_mace_dtype
(
output
.
dtype
)
for
output
in
tf_op
.
outputs
])
output_shapes
=
[]
for
output
in
tf_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
output
.
shape
.
as_list
())
output_shapes
.
append
(
output_shape
)
mace_op_def
.
output_shape
.
extend
(
output_shapes
)
def
convert_ops
(
unresolved_ops
,
dt
,
net_def
,
device
):
ops_count
=
len
(
unresolved_ops
)
resolved_count
=
1
...
...
@@ -171,13 +185,7 @@ def convert_ops(unresolved_ops, dt, net_def, device):
final_op
=
relu_op
resolved_count
=
4
op_def
.
output
.
extend
([
output
.
name
for
output
in
final_op
.
outputs
])
output_shapes
=
[]
for
output
in
final_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
output
.
shape
.
as_list
())
output_shapes
.
append
(
output_shape
)
op_def
.
output_shape
.
extend
(
output_shapes
)
convert_op_outputs
(
op_def
,
final_op
)
elif
first_op
.
type
==
'FusedBatchNorm'
:
op_def
.
name
=
first_op
.
name
...
...
@@ -225,26 +233,15 @@ def convert_ops(unresolved_ops, dt, net_def, device):
op_def
.
name
=
first_op
.
name
[:
-
4
]
# remove /add
op_def
.
type
=
'BatchNorm'
op_def
.
input
.
extend
([
input_name
,
gamma
,
beta
,
mean
,
variance
,
epsilon
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
add_1_op
.
outputs
])
output_shapes
=
[]
for
output
in
add_1_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
output
.
shape
.
as_list
())
output_shapes
.
append
(
output_shape
)
op_def
.
output_shape
.
extend
(
output_shapes
)
convert_op_outputs
(
op_def
,
add_1_op
)
resolved_count
=
7
elif
first_op
.
type
==
'Relu6'
:
op_def
.
name
=
first_op
.
name
op_def
.
type
=
'Relu'
op_def
.
input
.
extend
([
input
.
name
for
input
in
first_op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
first_op
.
outputs
])
output_shapes
=
[]
for
output
in
first_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
output
.
shape
.
as_list
())
output_shapes
.
append
(
output_shape
)
op_def
.
output_shape
.
extend
(
output_shapes
)
convert_op_outputs
(
op_def
,
first_op
)
max_limit_arg
=
op_def
.
arg
.
add
()
max_limit_arg
.
name
=
'max_limit'
max_limit_arg
.
f
=
6
...
...
@@ -252,13 +249,8 @@ def convert_ops(unresolved_ops, dt, net_def, device):
op_def
.
name
=
first_op
.
name
op_def
.
type
=
'Pooling'
op_def
.
input
.
extend
([
input
.
name
for
input
in
first_op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
first_op
.
outputs
])
output_shapes
=
[]
for
output
in
first_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
output
.
shape
.
as_list
())
output_shapes
.
append
(
output_shape
)
op_def
.
output_shape
.
extend
(
output_shapes
)
convert_op_outputs
(
op_def
,
first_op
)
pooling_type_arg
=
op_def
.
arg
.
add
()
pooling_type_arg
.
name
=
'pooling_type'
pooling_type_arg
.
i
=
pooling_type_mode
[
first_op
.
type
]
...
...
@@ -278,55 +270,31 @@ def convert_ops(unresolved_ops, dt, net_def, device):
op_def
.
name
=
first_op
.
name
op_def
.
type
=
"AddN"
op_def
.
input
.
extend
([
input
.
name
for
input
in
first_op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
first_op
.
outputs
])
output_shapes
=
[]
for
output
in
first_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
output
.
shape
.
as_list
())
output_shapes
.
append
(
output_shape
)
op_def
.
output_shape
.
extend
(
output_shapes
)
convert_op_outputs
(
op_def
,
first_op
)
elif
first_op
.
type
==
'ConcatV2'
:
op_def
.
name
=
first_op
.
name
op_def
.
type
=
"Concat"
op_def
.
input
.
extend
([
first_op
.
inputs
[
i
].
name
for
i
in
xrange
(
2
)])
op_def
.
output
.
extend
([
output
.
name
for
output
in
first_op
.
outputs
])
axis_arg
=
op_def
.
arg
.
add
()
axis_arg
.
name
=
'axis'
axis_arg
.
i
=
get_input_tensor
(
first_op
,
2
).
eval
().
astype
(
np
.
int32
)
output_shapes
=
[]
for
output
in
first_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
output
.
shape
.
as_list
())
output_shapes
.
append
(
output_shape
)
op_def
.
output_shape
.
extend
(
output_shapes
)
convert_op_outputs
(
op_def
,
first_op
)
elif
first_op
.
type
==
'ResizeBilinear'
:
op_def
.
name
=
first_op
.
name
op_def
.
type
=
"ResizeBilinear"
op_def
.
input
.
extend
([
first_op
.
inputs
[
0
].
name
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
first_op
.
outputs
])
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'size'
size_arg
.
ints
.
extend
(
get_input_tensor
(
first_op
,
1
).
eval
().
astype
(
np
.
int32
).
flat
)
size_arg
=
op_def
.
arg
.
add
()
size_arg
.
name
=
'align_corners'
size_arg
.
i
=
first_op
.
get_attr
(
'align_corners'
)
output_shapes
=
[]
for
output
in
first_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
output
.
shape
.
as_list
())
output_shapes
.
append
(
output_shape
)
op_def
.
output_shape
.
extend
(
output_shapes
)
convert_op_outputs
(
op_def
,
first_op
)
elif
first_op
.
type
in
[
'Relu'
,
'SpaceToBatchND'
,
'BatchToSpaceND'
,
'BiasAdd'
]:
op_def
.
name
=
first_op
.
name
op_def
.
type
=
first_op
.
type
op_def
.
input
.
extend
([
input
.
name
for
input
in
first_op
.
inputs
])
op_def
.
output
.
extend
([
output
.
name
for
output
in
first_op
.
outputs
])
output_shapes
=
[]
for
output
in
first_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
output
.
shape
.
as_list
())
output_shapes
.
append
(
output_shape
)
op_def
.
output_shape
.
extend
(
output_shapes
)
convert_op_outputs
(
op_def
,
first_op
)
else
:
raise
Exception
(
'Unknown Op: %s, type: %s'
%
(
first_op
.
name
,
first_op
.
type
))
pass
...
...
mace/python/tools/tf_dsp_converter_lib.py
浏览文件 @
1d924255
...
...
@@ -3,6 +3,7 @@ import tensorflow as tf
from
operator
import
mul
from
dsp_ops
import
DspOps
from
mace.python.tools
import
graph_util
from
mace.python.tools.convert_util
import
tf_dtype_2_mace_dtype
# converter --input ../libcv/quantized_icnet.pb --output quantized_icnet_dsp.pb \
# --runtime dsp --input_node input_node --output_node output_node
...
...
@@ -65,6 +66,18 @@ def add_shape_const_node(net_def, op, values, name):
tensor
.
dims
.
extend
(
values
)
return
tensor
.
name
def
convert_op_outputs
(
mace_op_def
,
tf_op
):
mace_op_def
.
output_type
.
extend
([
tf_dtype_2_mace_dtype
(
output
.
dtype
)
for
output
in
tf_op
.
outputs
])
output_shapes
=
[]
for
output
in
tf_op
.
outputs
:
output_shape
=
mace_pb2
.
OutputShape
()
output_shape
.
dims
.
extend
(
output
.
shape
.
as_list
())
output_shapes
.
append
(
output_shape
)
mace_op_def
.
output_shape
.
extend
(
output_shapes
)
def
convert_ops
(
unresolved_ops
,
resolved_ops
,
net_def
,
output_node
,
dsp_ops
):
first_op
=
unresolved_ops
[
0
]
print
(
'Op: '
,
first_op
.
name
,
first_op
.
type
,
first_op
.
outputs
[
0
].
shape
)
...
...
@@ -120,6 +133,7 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
op_def
.
input
.
extend
([
t
.
name
for
t
in
s2b_op
.
inputs
[
1
:]])
op_def
.
input
.
extend
([
min_tensor
.
name
,
max_tensor
.
name
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
quantize_op
.
outputs
])
convert_op_outputs
(
op_def
,
quantize_op
)
elif
has_padding_and_strides
(
first_op
):
op_def
.
padding
=
padding_mode
[
first_op
.
get_attr
(
'padding'
)]
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
...
...
@@ -131,13 +145,15 @@ def convert_ops(unresolved_ops, resolved_ops, net_def, output_node, dsp_ops):
strides_tensor
=
add_shape_const_node
(
net_def
,
first_op
,
strides
,
'strides'
)
op_def
.
input
.
extend
([
strides_tensor
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
convert_op_outputs
(
op_def
,
first_op
)
elif
is_node_flatten_reshape
(
first_op
):
op_def
.
type
=
'Flatten'
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
]
)
convert_op_outputs
(
op_def
,
first_op
)
elif
dsp_ops
.
has_op
(
first_op
.
type
):
op_def
.
input
.
extend
([
t
.
name
for
t
in
first_op
.
inputs
])
op_def
.
out_max_byte_size
.
extend
([
max_elem_size
(
out
)
for
out
in
first_op
.
outputs
])
convert_op_outputs
(
op_def
,
first_op
)
else
:
raise
Exception
(
'Unsupported op: '
,
first_op
)
...
...
@@ -311,6 +327,10 @@ def strip_input_quantize_and_output_dequantize(net_def, input_node, output_node)
new_input_op
.
padding
=
input_op
.
padding
new_input_op
.
out_max_byte_size
.
extend
([
input_op
.
out_max_byte_size
[
0
]
/
4
,
4
,
4
])
new_ops
.
append
(
new_input_op
)
new_input_op
.
output_shape
.
extend
([
input_op
.
output_shape
[
0
],
minf_op
.
output_shape
[
0
],
maxf_op
.
output_shape
[
0
]])
new_input_op
.
output_type
.
extend
([
input_op
.
output_type
[
0
],
mace_pb2
.
DT_FLOAT
,
mace_pb2
.
DT_FLOAT
])
for
follow_op
in
consumers
[
get_tensor_name_from_op
(
quantize_op
.
name
,
0
)]:
new_follow_op
=
mace_pb2
.
OperatorDef
()
new_follow_op
.
CopyFrom
(
follow_op
)
...
...
tools/validate_gcn.sh
浏览文件 @
1d924255
#!/bin/bash
# Must run at root dir of mace project.
set
+x
Usage
()
{
echo
'Usage: bash tools/validate_gcn.sh tf_model_file'
}
...
...
@@ -13,6 +13,7 @@ fi
TF_MODEL_FILE_PATH
=
$1
MODEL_DIR
=
$(
dirname
${
TF_MODEL_FILE_PATH
}
)
MACE_MODEL_NAME
=
'mace_model.pb'
MACE_OPT_MODEL_NAME
=
'mace_opt_model.pb'
INPUT_FILE_NAME
=
'model_input'
OUTPUT_FILE_NAME
=
'gcn.out'
OUTPUT_LIST_FILE
=
'gcn.list'
...
...
@@ -26,14 +27,17 @@ python tools/validate.py --generate_data true --random_seed 1 \
--input_shape
=
512,512,3
# Step 2: convert tf model to mace model
echo
"Step 2: convert tf model to mace model"
echo
"Step 2: convert tf model to mace model
and optimize memory
"
bazel build //mace/python/tools:tf_converter
bazel-bin/mace/python/tools/tf_converter
--input
=
${
TF_MODEL_FILE_PATH
}
\
--output
=
${
MODEL_DIR
}
/
${
MACE_MODEL_NAME
}
\
--input_node
=
input
\
--output_node
=
GCN/br_result_2/fcn_br
\
--data_type
=
DT_HALF
\
--data_type
=
DT_HALF
\
--runtime
=
gpu
bazel build mace/python/tools:memory_optimizer
bazel-bin/mace/python/tools/memory_optimizer
${
MODEL_DIR
}
/
${
MACE_MODEL_NAME
}
\
${
MODEL_DIR
}
/
${
MACE_OPT_MODEL_NAME
}
# Step 3: Run model on the phone
...
...
@@ -46,7 +50,7 @@ bazel build -c opt --strip always mace/examples:mace_run \
adb shell
"mkdir -p
${
PHONE_DATA_DIR
}
"
adb shell
"mkdir -p
${
KERNEL_DIR
}
"
adb push mace/kernels/opencl/cl/
*
${
KERNEL_DIR
}
adb push
${
MODEL_DIR
}
/
${
MACE_MODEL_NAME
}
${
PHONE_DATA_DIR
}
adb push
${
MODEL_DIR
}
/
${
MACE_
OPT_
MODEL_NAME
}
${
PHONE_DATA_DIR
}
adb push
${
MODEL_DIR
}
/
${
INPUT_FILE_NAME
}
${
PHONE_DATA_DIR
}
adb push bazel-bin/mace/examples/mace_run
${
PHONE_DATA_DIR
}
...
...
@@ -56,13 +60,14 @@ adb </dev/null shell MACE_RUN_PARAMETER_PATH=${PHONE_DATA_DIR}/mace_run.config \
MACE_KERNEL_PATH
=
$KERNEL_DIR
\
OMP_NUM_THREADS
=
$num_threads
\
${
PHONE_DATA_DIR
}
/mace_run
\
--model
=
${
PHONE_DATA_DIR
}
/
${
MACE_MODEL_NAME
}
\
--model
=
${
PHONE_DATA_DIR
}
/
${
MACE_
OPT_
MODEL_NAME
}
\
--input
=
mace_input_node
\
--output
=
mace_output_node
\
--input_shape
=
1,512,512,3
\
--input_file
=
${
PHONE_DATA_DIR
}
/
${
INPUT_FILE_NAME
}
\
--output_file
=
${
PHONE_DATA_DIR
}
/
${
OUTPUT_FILE_NAME
}
\
--device
=
OPENCL
--device
=
OPENCL
\
--round
=
1
# Step 4: pull the mace run result.
echo
"Step 4: pull the mace run result."
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录