Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
毕竟曾有刹那
Mace
提交
316c810e
Mace
项目概览
毕竟曾有刹那
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
316c810e
编写于
10月 31, 2018
作者:
B
Bin Li
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Optimize Hexagon converter and wrapper
上级
659e93ba
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
150 addition
and
93 deletion
+150
-93
mace/core/runtime/hexagon/hexagon_control_wrapper.cc
mace/core/runtime/hexagon/hexagon_control_wrapper.cc
+132
-65
mace/core/runtime/hexagon/hexagon_control_wrapper.h
mace/core/runtime/hexagon/hexagon_control_wrapper.h
+3
-4
mace/libmace/mace.cc
mace/libmace/mace.cc
+1
-1
mace/python/tools/converter_tool/hexagon_converter.py
mace/python/tools/converter_tool/hexagon_converter.py
+14
-23
未找到文件。
mace/core/runtime/hexagon/hexagon_control_wrapper.cc
浏览文件 @
316c810e
...
...
@@ -13,6 +13,8 @@
// limitations under the License.
#include <sys/time.h>
#include <algorithm>
#include <iomanip>
#include <thread> // NOLINT(build/c++11)
#include <vector>
#include <unordered_map>
...
...
@@ -43,6 +45,53 @@ enum {
NN_GRAPH_PERFEVENT_UTIME
=
5
,
};
namespace
{
struct
InputOutputMetadata
{
void
Init
(
float
min_val
,
float
max_val
,
int
needs_quantization
)
{
this
->
min_val
=
min_val
;
this
->
max_val
=
max_val
;
this
->
needs_quantization
=
needs_quantization
;
}
float
min_val
;
float
max_val
;
int
needs_quantization
;
};
template
<
typename
T
>
void
AddInputMetadata
(
const
T
&
data
,
hexagon_nn_tensordef
*
tensor
)
{
tensor
->
batches
=
1
;
tensor
->
height
=
1
;
tensor
->
width
=
1
;
tensor
->
depth
=
1
;
tensor
->
data
=
const_cast
<
unsigned
char
*>
(
reinterpret_cast
<
const
unsigned
char
*>
(
&
data
));
tensor
->
dataLen
=
sizeof
(
data
);
tensor
->
data_valid_len
=
sizeof
(
data
);
tensor
->
unused
=
0
;
}
template
<
typename
T
>
void
AddOutputMetadata
(
const
T
&
data
,
hexagon_nn_tensordef
*
tensor
)
{
tensor
->
data
=
const_cast
<
unsigned
char
*>
(
reinterpret_cast
<
const
unsigned
char
*>
(
&
data
));
tensor
->
dataLen
=
sizeof
(
data
);
}
template
<
typename
IntType
>
std
::
string
IntToString
(
const
IntType
v
)
{
std
::
stringstream
stream
;
stream
<<
v
;
return
stream
.
str
();
}
template
<
typename
FloatType
>
std
::
string
FloatToString
(
const
FloatType
v
,
const
int32_t
precision
)
{
std
::
stringstream
stream
;
stream
<<
std
::
fixed
<<
std
::
setprecision
(
precision
)
<<
v
;
return
stream
.
str
();
}
}
// namespace
int
HexagonControlWrapper
::
GetVersion
()
{
int
version
;
MACE_CHECK
(
hexagon_nn_version
(
&
version
)
==
0
,
"get version error"
);
...
...
@@ -299,9 +348,15 @@ void HexagonControlWrapper::GetPerfInfo() {
std
::
unordered_map
<
uint32_t
,
float
>
node_id_counters
;
std
::
unordered_map
<
std
::
string
,
std
::
pair
<
int
,
float
>>
node_type_counters
;
std
::
vector
<
std
::
string
>
node_types
;
float
total_duration
=
0.0
;
VLOG
(
1
)
<<
"items: "
<<
n_items
;
std
::
string
run_order_title
=
"Sort by Run Order"
;
const
std
::
vector
<
std
::
string
>
run_order_header
=
{
"Node Id"
,
"Node Type"
,
"Node Type Id"
,
"Executions"
,
"Duration(ms)"
};
std
::
vector
<
std
::
vector
<
std
::
string
>>
run_order_data
;
for
(
unsigned
int
i
=
0
;
i
<
n_items
;
++
i
)
{
unsigned
int
node_id
=
perf_info
[
i
].
node_id
;
unsigned
int
node_type_id
=
perf_info
[
i
].
node_type
;
...
...
@@ -313,27 +368,48 @@ void HexagonControlWrapper::GetPerfInfo() {
char
node_type_buf
[
MACE_MAX_NODE
];
hexagon_nn_op_id_to_name
(
node_type_id
,
node_type_buf
,
MACE_MAX_NODE
);
std
::
string
node_type
(
node_type_buf
);
LOG
(
INFO
)
<<
"node id: "
<<
perf_info
[
i
].
node_id
<<
", node type: "
<<
node_type
<<
", node type id: "
<<
node_type_id
<<
", executions: "
<<
perf_info
[
i
].
executions
<<
", duration: "
<<
node_id_counters
[
node_id
];
if
(
node_type
.
compare
(
"Const"
)
==
0
)
continue
;
std
::
vector
<
std
::
string
>
tuple
;
tuple
.
push_back
(
IntToString
(
perf_info
[
i
].
node_id
));
tuple
.
push_back
(
node_type
);
tuple
.
push_back
(
IntToString
(
node_type_id
));
tuple
.
push_back
(
IntToString
(
perf_info
[
i
].
executions
));
tuple
.
push_back
(
FloatToString
(
node_id_counters
[
node_id
]
/
1000.0
f
,
3
));
run_order_data
.
emplace_back
(
tuple
);
if
(
node_type_counters
.
find
(
node_type
)
==
node_type_counters
.
end
())
{
node_type_counters
[
node_type
]
=
{
0
,
0.0
};
node_types
.
push_back
(
node_type
);
}
++
node_type_counters
[
node_type
].
first
;
node_type_counters
[
node_type
].
second
+=
node_id_counters
[
node_id
];
if
(
node_type
.
compare
(
"Const"
)
!=
0
)
{
total_duration
+=
node_id_counters
[
node_id
];
}
total_duration
+=
node_id_counters
[
node_id
];
}
for
(
auto
&
node_type_counter
:
node_type_counters
)
{
LOG
(
INFO
)
<<
"node type: "
<<
node_type_counter
.
first
<<
", time: "
<<
node_type_counter
.
second
.
first
<<
", duration: "
<<
node_type_counter
.
second
.
second
;
std
::
sort
(
node_types
.
begin
(),
node_types
.
end
(),
[
&
](
const
std
::
string
&
lhs
,
const
std
::
string
&
rhs
)
{
return
node_type_counters
[
lhs
].
second
>
node_type_counters
[
rhs
].
second
;
});
std
::
string
duration_title
=
"Sort by Duration"
;
const
std
::
vector
<
std
::
string
>
duration_header
=
{
"Node Type"
,
"Times"
,
"Duration(ms)"
};
std
::
vector
<
std
::
vector
<
std
::
string
>>
duration_data
;
for
(
auto
&
node_type
:
node_types
)
{
auto
node_type_counter
=
node_type_counters
[
node_type
];
std
::
vector
<
std
::
string
>
tuple
;
tuple
.
push_back
(
node_type
);
tuple
.
push_back
(
IntToString
(
node_type_counter
.
first
));
tuple
.
push_back
(
FloatToString
(
node_type_counter
.
second
/
1000.0
f
,
3
));
duration_data
.
emplace_back
(
tuple
);
}
LOG
(
INFO
)
<<
mace
::
string_util
::
StringFormatter
::
Table
(
run_order_title
,
run_order_header
,
run_order_data
);
LOG
(
INFO
)
<<
mace
::
string_util
::
StringFormatter
::
Table
(
duration_title
,
duration_header
,
duration_data
);
LOG
(
INFO
)
<<
"total duration: "
<<
std
::
fixed
<<
total_duration
;
}
...
...
@@ -382,45 +458,64 @@ bool HexagonControlWrapper::ExecuteGraph(const Tensor &input_tensor,
}
bool
HexagonControlWrapper
::
ExecuteGraphNew
(
const
std
::
vector
<
Tensor
>
&
input_tensors
,
std
::
vector
<
Tensor
>
*
output_tensors
)
{
const
std
::
vector
<
Tensor
*
>
&
input_tensors
,
std
::
vector
<
Tensor
*
>
*
output_tensors
)
{
LOG
(
INFO
)
<<
"Execute graph new: "
<<
nn_id_
;
uint32_t
num_inputs
=
static_cast
<
uint32_t
>
(
input_tensors
.
size
());
uint32_t
num_outputs
=
static_cast
<
uint32_t
>
(
output_tensors
->
size
());
MACE_ASSERT
(
num_inputs_
==
num_inputs
,
"Wrong inputs num"
);
MACE_ASSERT
(
num_outputs_
==
num_outputs
,
"Wrong outputs num"
);
hexagon_nn_tensordef
*
inputs
=
new
hexagon_nn_tensordef
[
num_inputs
];
hexagon_nn_tensordef
*
outputs
=
new
hexagon_nn_tensordef
[
num_outputs
];
std
::
vector
<
hexagon_nn_tensordef
>
inputs
(
num_inputs
*
NUM_METADATA
);
std
::
vector
<
hexagon_nn_tensordef
>
outputs
(
num_outputs
*
NUM_METADATA
);
std
::
vector
<
InputOutputMetadata
>
input_metadata
(
num_inputs
);
std
::
vector
<
InputOutputMetadata
>
output_metadata
(
num_outputs
);
for
(
size_t
i
=
0
;
i
<
num_inputs
;
++
i
)
{
std
::
vector
<
index_t
>
input_shape
=
input_tensors
[
i
].
shape
();
inputs
[
i
].
batches
=
static_cast
<
uint32_t
>
(
input_shape
[
0
]);
inputs
[
i
].
height
=
static_cast
<
uint32_t
>
(
input_shape
[
1
]);
inputs
[
i
].
width
=
static_cast
<
uint32_t
>
(
input_shape
[
2
]);
inputs
[
i
].
depth
=
static_cast
<
uint32_t
>
(
input_shape
[
3
]);
inputs
[
i
].
data
=
const_cast
<
unsigned
char
*>
(
reinterpret_cast
<
const
unsigned
char
*>
(
input_tensors
[
i
].
raw_data
()));
inputs
[
i
].
dataLen
=
static_cast
<
int
>
(
input_tensors
[
i
].
raw_size
());
inputs
[
i
].
data_valid_len
=
static_cast
<
uint32_t
>
(
input_tensors
[
i
].
raw_size
());
inputs
[
i
].
unused
=
0
;
std
::
vector
<
index_t
>
input_shape
=
input_tensors
[
i
]
->
shape
();
size_t
index
=
i
*
NUM_METADATA
;
inputs
[
index
].
batches
=
static_cast
<
uint32_t
>
(
input_shape
[
0
]);
inputs
[
index
].
height
=
static_cast
<
uint32_t
>
(
input_shape
[
1
]);
inputs
[
index
].
width
=
static_cast
<
uint32_t
>
(
input_shape
[
2
]);
inputs
[
index
].
depth
=
static_cast
<
uint32_t
>
(
input_shape
[
3
]);
inputs
[
index
].
data
=
const_cast
<
unsigned
char
*>
(
reinterpret_cast
<
const
unsigned
char
*>
(
input_tensors
[
i
]
->
raw_data
()));
inputs
[
index
].
dataLen
=
static_cast
<
int
>
(
input_tensors
[
i
]
->
raw_size
());
inputs
[
index
].
data_valid_len
=
static_cast
<
uint32_t
>
(
input_tensors
[
i
]
->
raw_size
());
inputs
[
index
].
unused
=
0
;
input_metadata
[
i
].
Init
(
.0
f
,
.0
f
,
1
);
AddInputMetadata
(
input_metadata
[
i
].
min_val
,
&
inputs
[
index
+
1
]);
AddInputMetadata
(
input_metadata
[
i
].
max_val
,
&
inputs
[
index
+
2
]);
AddInputMetadata
(
input_metadata
[
i
].
needs_quantization
,
&
inputs
[
index
+
3
]);
}
for
(
size_t
i
=
0
;
i
<
num_outputs
;
++
i
)
{
(
*
output_tensors
)[
i
].
SetDtype
(
output_data_types_
[
i
]);
(
*
output_tensors
)[
i
].
Resize
(
output_shapes_
[
i
]);
outputs
[
i
].
data
=
reinterpret_cast
<
unsigned
char
*>
(
(
*
output_tensors
)[
i
].
raw_mutable_data
());
outputs
[
i
].
dataLen
=
static_cast
<
int
>
((
*
output_tensors
)[
i
].
raw_size
());
size_t
index
=
i
*
NUM_METADATA
;
(
*
output_tensors
)[
i
]
->
SetDtype
(
output_data_types_
[
i
]);
(
*
output_tensors
)[
i
]
->
Resize
(
output_shapes_
[
i
]);
outputs
[
index
].
data
=
reinterpret_cast
<
unsigned
char
*>
(
(
*
output_tensors
)[
i
]
->
raw_mutable_data
());
outputs
[
index
].
dataLen
=
static_cast
<
int
>
((
*
output_tensors
)[
i
]
->
raw_size
());
output_metadata
[
i
].
Init
(
.0
f
,
.0
f
,
1
);
AddOutputMetadata
(
output_metadata
[
i
].
min_val
,
&
outputs
[
index
+
1
]);
AddOutputMetadata
(
output_metadata
[
i
].
max_val
,
&
outputs
[
index
+
2
]);
AddOutputMetadata
(
output_metadata
[
i
].
needs_quantization
,
&
outputs
[
index
+
3
]);
}
int
res
=
hexagon_nn_execute_new
(
nn_id_
,
inputs
,
num_inputs
,
outputs
,
num_outputs
);
hexagon_nn_execute_new
(
nn_id_
,
inputs
.
data
(),
num_inputs
*
NUM_METADATA
,
outputs
.
data
(),
num_outputs
*
NUM_METADATA
);
for
(
size_t
i
=
0
;
i
<
num_outputs
;
++
i
)
{
std
::
vector
<
uint32_t
>
output_shape
{
outputs
[
i
].
batches
,
outputs
[
i
].
height
,
outputs
[
i
].
width
,
outputs
[
i
].
depth
};
size_t
index
=
i
*
NUM_METADATA
;
std
::
vector
<
uint32_t
>
output_shape
{
outputs
[
index
].
batches
,
outputs
[
index
].
height
,
outputs
[
index
].
width
,
outputs
[
index
].
depth
};
MACE_ASSERT
(
output_shape
.
size
()
==
output_shapes_
[
i
].
size
(),
"wrong output shape inferred"
);
for
(
size_t
j
=
0
;
j
<
output_shape
.
size
();
++
j
)
{
...
...
@@ -428,40 +523,12 @@ bool HexagonControlWrapper::ExecuteGraphNew(
==
output_shapes_
[
i
][
j
],
"wrong output shape inferred"
);
}
MACE_ASSERT
(
static_cast
<
index_t
>
(
outputs
[
i
].
data_valid_len
)
==
(
*
output_tensors
)[
i
]
.
raw_size
(),
MACE_ASSERT
(
static_cast
<
index_t
>
(
outputs
[
i
ndex
].
data_valid_len
)
==
(
*
output_tensors
)[
i
]
->
raw_size
(),
"wrong output bytes inferred."
);
}
delete
[]
inputs
;
delete
[]
outputs
;
return
res
==
0
;
}
bool
HexagonControlWrapper
::
ExecuteGraphPreQuantize
(
const
Tensor
&
input_tensor
,
Tensor
*
output_tensor
)
{
std
::
vector
<
Tensor
>
input_tensors
(
3
);
std
::
vector
<
Tensor
>
output_tensors
(
3
);
input_tensors
[
0
].
SetDtype
(
DT_UINT8
);
output_tensors
[
0
].
SetDtype
(
DT_UINT8
);
input_tensors
[
0
].
ResizeLike
(
input_tensor
);
input_tensors
[
1
].
Resize
({
1
,
1
,
1
,
1
});
float
*
min_in_data
=
input_tensors
[
1
].
mutable_data
<
float
>
();
input_tensors
[
2
].
Resize
({
1
,
1
,
1
,
1
});
float
*
max_in_data
=
input_tensors
[
2
].
mutable_data
<
float
>
();
quantizer_
.
Quantize
(
input_tensor
,
&
input_tensors
[
0
],
min_in_data
,
max_in_data
);
if
(
!
ExecuteGraphNew
(
input_tensors
,
&
output_tensors
))
{
return
false
;
}
output_tensor
->
ResizeLike
(
output_tensors
[
0
]);
const
float
*
min_out_data
=
output_tensors
[
1
].
data
<
float
>
();
const
float
*
max_out_data
=
output_tensors
[
2
].
data
<
float
>
();
quantizer_
.
DeQuantize
(
output_tensors
[
0
],
*
min_out_data
,
*
max_out_data
,
output_tensor
);
return
true
;
}
}
// namespace mace
mace/core/runtime/hexagon/hexagon_control_wrapper.h
浏览文件 @
316c810e
...
...
@@ -33,10 +33,8 @@ class HexagonControlWrapper {
bool
Finalize
();
bool
SetupGraph
(
const
NetDef
&
net_def
,
const
unsigned
char
*
model_data
);
bool
ExecuteGraph
(
const
Tensor
&
input_tensor
,
Tensor
*
output_tensor
);
bool
ExecuteGraphNew
(
const
std
::
vector
<
Tensor
>
&
input_tensors
,
std
::
vector
<
Tensor
>
*
output_tensors
);
bool
ExecuteGraphPreQuantize
(
const
Tensor
&
input_tensor
,
Tensor
*
output_tensor
);
bool
ExecuteGraphNew
(
const
std
::
vector
<
Tensor
*>
&
input_tensors
,
std
::
vector
<
Tensor
*>
*
output_tensors
);
bool
TeardownGraph
();
void
PrintLog
();
...
...
@@ -47,6 +45,7 @@ class HexagonControlWrapper {
private:
static
constexpr
int
NODE_ID_OFFSET
=
10000
;
static
constexpr
int
NUM_METADATA
=
4
;
inline
uint32_t
node_id
(
uint32_t
nodeid
)
{
return
NODE_ID_OFFSET
+
nodeid
;
}
...
...
mace/libmace/mace.cc
浏览文件 @
316c810e
...
...
@@ -567,7 +567,7 @@ MaceStatus MaceEngine::Impl::Run(
if
(
device_type_
==
HEXAGON
)
{
MACE_CHECK
(
input_tensors
.
size
()
==
1
&&
output_tensors
.
size
()
==
1
,
"HEXAGON not support multiple inputs and outputs yet."
);
hexagon_controller_
->
ExecuteGraph
(
*
input_tensors
[
0
],
output_tensors
[
0
]
);
hexagon_controller_
->
ExecuteGraph
New
(
input_tensors
,
&
output_tensors
);
}
else
{
#endif
MACE_RETURN_IF_ERROR
(
net_
->
Run
(
run_metadata
));
...
...
mace/python/tools/converter_tool/hexagon_converter.py
浏览文件 @
316c810e
...
...
@@ -31,10 +31,8 @@ from operator import mul
class
HexagonOps
(
object
):
def
__init__
(
self
):
self
.
hexagon_ops
=
{
'INPUT'
:
'INPUT'
,
'OUTPUT'
:
'OUTPUT'
,
'Quantize'
:
'Quantize'
,
'Dequantize'
:
'Dequantize'
,
'Quantize'
:
'QuantizeINPUT_f_to_8'
,
'Dequantize'
:
'DequantizeOUTPUT_8tof'
,
'Concat'
:
'QuantizedConcat_8'
,
'Conv2D'
:
'Supernode_8x8p32to8'
,
'DepthwiseConv2d'
:
'DepthwiseSupernode_8x8p32to8'
,
...
...
@@ -78,6 +76,10 @@ def get_op_and_port_from_tensor(tensor_name):
return
op
,
port
def
normalize_name
(
name
):
return
name
.
replace
(
':'
,
'_'
)
class
HexagonConverter
(
base_converter
.
ConverterInterface
):
def
__init__
(
self
,
option
,
model
,
quantize_activation_info
):
self
.
_option
=
option
...
...
@@ -99,7 +101,10 @@ class HexagonConverter(base_converter.ConverterInterface):
self
.
add_input_output_node
()
self
.
_model
=
graph_util
.
sort_mace_graph
(
self
.
_model
,
'__output__'
)
output_name
=
MaceKeyword
.
mace_output_node_name
+
'_'
\
+
self
.
_option
.
output_nodes
.
values
()[
0
].
name
output_name
=
normalize_name
(
output_name
)
self
.
_model
=
graph_util
.
sort_mace_graph
(
self
.
_model
,
output_name
)
self
.
add_node_id
()
...
...
@@ -293,23 +298,9 @@ class HexagonConverter(base_converter.ConverterInterface):
def
add_input_output_node
(
self
):
input_node
=
self
.
_option
.
input_nodes
.
values
()[
0
]
op_def
=
self
.
_model
.
op
.
add
()
op_def
.
name
=
'__input__'
op_def
.
type
=
'INPUT'
shape
=
op_def
.
output_shape
.
add
()
shape
.
dims
.
extend
(
input_node
.
shape
)
op_def
.
output_type
.
extend
([
mace_pb2
.
DT_FLOAT
])
out_max_byte_size
=
reduce
(
mul
,
shape
.
dims
)
op_def
.
out_max_byte_size
.
extend
([
out_max_byte_size
])
for
op
in
self
.
_model
.
op
:
if
op
.
name
==
input_node
.
name
:
del
op
.
input
[
0
]
input_name
=
op_def
.
name
+
':0'
op
.
input
.
extend
([
input_name
])
self
.
_consts
[
input_name
]
=
\
self
.
_quantize_activation_info
[
input_node
.
name
]
self
.
add_min_max_const_node
(
op
,
input_name
)
del
self
.
_consts
[
input_name
]
break
output_node
=
None
...
...
@@ -317,6 +308,7 @@ class HexagonConverter(base_converter.ConverterInterface):
output_name
=
self
.
_option
.
output_nodes
.
values
()[
0
].
name
else
:
output_name
=
self
.
_option
.
check_nodes
.
values
()[
0
].
name
output_name
=
normalize_name
(
output_name
)
for
op
in
self
.
_model
.
op
:
if
op
.
name
.
startswith
(
MaceKeyword
.
mace_output_node_name
)
\
and
op
.
name
.
find
(
output_name
)
!=
-
1
:
...
...
@@ -324,10 +316,9 @@ class HexagonConverter(base_converter.ConverterInterface):
break
mace_check
(
output_node
is
not
None
,
"mace_output_node_* not found."
)
op_def
=
self
.
_model
.
op
.
add
()
op_def
.
name
=
'__output__'
op_def
.
type
=
'OUTPUT'
op_def
.
input
.
extend
([
get_tensor_name_from_op
(
output_node
.
name
,
0
)])
del
output_node
.
output_shape
[:]
del
output_node
.
output_type
[:]
del
output_node
.
out_max_byte_size
[:]
def
add_node_id
(
self
):
node_id_counter
=
0
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录